From 8cd4e9de79e283b2b198faa337933af57efb9050 Mon Sep 17 00:00:00 2001 From: ModelHub XC Date: Tue, 5 May 2026 01:09:49 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=9D=E5=A7=8B=E5=8C=96=E9=A1=B9=E7=9B=AE?= =?UTF-8?q?=EF=BC=8C=E7=94=B1ModelHub=20XC=E7=A4=BE=E5=8C=BA=E6=8F=90?= =?UTF-8?q?=E4=BE=9B=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Model: rbelanec/train_record_42_1776331412 Source: Original Platform --- .gitattributes | 36 + README.md | 81 + all_results.json | 13 + config.json | 39 + eval_results.json | 8 + generation_config.json | 12 + model.safetensors | 3 + special_tokens_map.json | 26 + tokenizer.json | 3 + tokenizer_config.json | 2069 + train.yaml | 55 + train_results.json | 9 + trainer_log.jsonl | 15641 +++++ trainer_state.json | 125183 +++++++++++++++++++++++++++++++++++++ training_args.bin | 3 + training_eval_loss.png | Bin 0 -> 46483 bytes training_loss.png | Bin 0 -> 39037 bytes 17 files changed, 143181 insertions(+) create mode 100644 .gitattributes create mode 100644 README.md create mode 100644 all_results.json create mode 100644 config.json create mode 100644 eval_results.json create mode 100644 generation_config.json create mode 100644 model.safetensors create mode 100644 special_tokens_map.json create mode 100644 tokenizer.json create mode 100644 tokenizer_config.json create mode 100644 train.yaml create mode 100644 train_results.json create mode 100644 trainer_log.jsonl create mode 100644 trainer_state.json create mode 100644 training_args.bin create mode 100644 training_eval_loss.png create mode 100644 training_loss.png diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..52373fe --- /dev/null +++ b/.gitattributes @@ -0,0 +1,36 @@ +*.7z filter=lfs diff=lfs merge=lfs -text +*.arrow filter=lfs diff=lfs merge=lfs -text +*.bin filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.ckpt filter=lfs diff=lfs merge=lfs -text +*.ftz filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.h5 filter=lfs diff=lfs merge=lfs -text +*.joblib filter=lfs diff=lfs merge=lfs -text +*.lfs.* filter=lfs diff=lfs merge=lfs -text +*.mlmodel filter=lfs diff=lfs merge=lfs -text +*.model filter=lfs diff=lfs merge=lfs -text +*.msgpack filter=lfs diff=lfs merge=lfs -text +*.npy filter=lfs diff=lfs merge=lfs -text +*.npz filter=lfs diff=lfs merge=lfs -text +*.onnx filter=lfs diff=lfs merge=lfs -text +*.ot filter=lfs diff=lfs merge=lfs -text +*.parquet filter=lfs diff=lfs merge=lfs -text +*.pb filter=lfs diff=lfs merge=lfs -text +*.pickle filter=lfs diff=lfs merge=lfs -text +*.pkl filter=lfs diff=lfs merge=lfs -text +*.pt filter=lfs diff=lfs merge=lfs -text +*.pth filter=lfs diff=lfs merge=lfs -text +*.rar filter=lfs diff=lfs merge=lfs -text +*.safetensors filter=lfs diff=lfs merge=lfs -text +saved_model/**/* filter=lfs diff=lfs merge=lfs -text +*.tar.* filter=lfs diff=lfs merge=lfs -text +*.tar filter=lfs diff=lfs merge=lfs -text +*.tflite filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.wasm filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text +*tfevents* filter=lfs diff=lfs merge=lfs -text +tokenizer.json filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md new file mode 100644 index 0000000..31f393f --- /dev/null +++ b/README.md @@ -0,0 +1,81 @@ +--- +library_name: transformers +license: llama3.2 +base_model: meta-llama/Llama-3.2-1B-Instruct +tags: +- peft-factory +- full +- llama-factory +- generated_from_trainer +model-index: +- name: train_record_42_1776331412 + results: [] +--- + + + +# train_record_42_1776331412 + +This model is a fine-tuned version of [meta-llama/Llama-3.2-1B-Instruct](https://huggingface.co/meta-llama/Llama-3.2-1B-Instruct) on the record dataset. +It achieves the following results on the evaluation set: +- Loss: 0.4481 +- Num Input Tokens Seen: 245808128 + +## Model description + +More information needed + +## Intended uses & limitations + +More information needed + +## Training and evaluation data + +More information needed + +## Training procedure + +### Training hyperparameters + +The following hyperparameters were used during training: +- learning_rate: 5e-06 +- train_batch_size: 8 +- eval_batch_size: 8 +- seed: 42 +- optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments +- lr_scheduler_type: cosine +- lr_scheduler_warmup_ratio: 0.1 +- num_epochs: 5 + +### Training results + +| Training Loss | Epoch | Step | Validation Loss | Input Tokens Seen | +|:-------------:|:------:|:-----:|:---------------:|:-----------------:| +| 0.6094 | 0.2500 | 3906 | 0.5014 | 12292032 | +| 0.4689 | 0.5001 | 7812 | 0.5265 | 24620672 | +| 0.5124 | 0.7501 | 11718 | 0.4985 | 36894016 | +| 0.343 | 1.0002 | 15624 | 0.4854 | 49176512 | +| 0.265 | 1.2502 | 19530 | 0.5116 | 61465280 | +| 0.2897 | 1.5003 | 23436 | 0.4806 | 73739776 | +| 0.2995 | 1.7503 | 27342 | 0.4774 | 86015936 | +| 0.2658 | 2.0004 | 31248 | 0.4481 | 98341056 | +| 0.2663 | 2.2504 | 35154 | 0.5257 | 110649216 | +| 0.1792 | 2.5005 | 39060 | 0.5071 | 122910592 | +| 0.2395 | 2.7505 | 42966 | 0.5056 | 135222656 | +| 0.1496 | 3.0006 | 46872 | 0.5023 | 147516736 | +| 0.1005 | 3.2506 | 50778 | 0.5569 | 159826368 | +| 0.159 | 3.5007 | 54684 | 0.5747 | 172084032 | +| 0.1324 | 3.7507 | 58590 | 0.5466 | 184402752 | +| 0.1773 | 4.0008 | 62496 | 0.5555 | 196687936 | +| 0.0922 | 4.2508 | 66402 | 0.6279 | 209017024 | +| 0.1645 | 4.5009 | 70308 | 0.6087 | 221278272 | +| 0.1252 | 4.7509 | 74214 | 0.6058 | 233564288 | + + +### Framework versions + +- Transformers 4.51.3 +- Pytorch 2.10.0+cu128 +- Datasets 4.0.0 +- Tokenizers 0.21.4 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000..6b71066 --- /dev/null +++ b/all_results.json @@ -0,0 +1,13 @@ +{ + "epoch": 5.0, + "eval_loss": 0.4481422007083893, + "eval_runtime": 50.9215, + "eval_samples_per_second": 272.694, + "eval_steps_per_second": 34.092, + "num_input_tokens_seen": 245808128, + "total_flos": 1.43524334436719e+18, + "train_loss": 0.2808395623519733, + "train_runtime": 12345.7888, + "train_samples_per_second": 50.612, + "train_steps_per_second": 6.326 +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..5a2b93f --- /dev/null +++ b/config.json @@ -0,0 +1,39 @@ +{ + "architectures": [ + "LlamaForCausalLM" + ], + "attention_bias": false, + "attention_dropout": 0.0, + "bos_token_id": 128000, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "head_dim": 64, + "hidden_act": "silu", + "hidden_size": 2048, + "initializer_range": 0.02, + "intermediate_size": 8192, + "max_position_embeddings": 131072, + "mlp_bias": false, + "model_type": "llama", + "num_attention_heads": 32, + "num_hidden_layers": 16, + "num_key_value_heads": 8, + "pretraining_tp": 1, + "rms_norm_eps": 1e-05, + "rope_scaling": { + "factor": 32.0, + "high_freq_factor": 4.0, + "low_freq_factor": 1.0, + "original_max_position_embeddings": 8192, + "rope_type": "llama3" + }, + "rope_theta": 500000.0, + "tie_word_embeddings": true, + "torch_dtype": "float32", + "transformers_version": "4.51.3", + "use_cache": false, + "vocab_size": 128256 +} diff --git a/eval_results.json b/eval_results.json new file mode 100644 index 0000000..4b860eb --- /dev/null +++ b/eval_results.json @@ -0,0 +1,8 @@ +{ + "epoch": 5.0, + "eval_loss": 0.4481422007083893, + "eval_runtime": 50.9215, + "eval_samples_per_second": 272.694, + "eval_steps_per_second": 34.092, + "num_input_tokens_seen": 245808128 +} \ No newline at end of file diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000..2b8ae57 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,12 @@ +{ + "bos_token_id": 128000, + "do_sample": true, + "eos_token_id": [ + 128001, + 128008, + 128009 + ], + "temperature": 0.6, + "top_p": 0.9, + "transformers_version": "4.51.3" +} diff --git a/model.safetensors b/model.safetensors new file mode 100644 index 0000000..3f39cae --- /dev/null +++ b/model.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9ddaf8972ad4e41c9c3caea2f14ab808002010efd5ad03fcfc31c519586115 +size 4943274328 diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000..14daf45 --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,26 @@ +{ + "additional_special_tokens": [ + { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } + ], + "bos_token": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": "<|eot_id|>" +} diff --git a/tokenizer.json b/tokenizer.json new file mode 100644 index 0000000..1c1d8d5 --- /dev/null +++ b/tokenizer.json @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6b9e4e7fb171f92fd137b777cc2714bf87d11576700a1dcd7a399e7bbe39537b +size 17209920 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000..ddc3ce0 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,2069 @@ +{ + "added_tokens_decoder": { + "128000": { + "content": "<|begin_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128001": { + "content": "<|end_of_text|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128002": { + "content": "<|reserved_special_token_0|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128003": { + "content": "<|reserved_special_token_1|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128004": { + "content": "<|finetune_right_pad_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128005": { + "content": "<|reserved_special_token_2|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128006": { + "content": "<|start_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128007": { + "content": "<|end_header_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128008": { + "content": "<|eom_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128009": { + "content": "<|eot_id|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128010": { + "content": "<|python_tag|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128011": { + "content": "<|reserved_special_token_3|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128012": { + "content": "<|reserved_special_token_4|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128013": { + "content": "<|reserved_special_token_5|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128014": { + "content": "<|reserved_special_token_6|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128015": { + "content": "<|reserved_special_token_7|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128016": { + "content": "<|reserved_special_token_8|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128017": { + "content": "<|reserved_special_token_9|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128018": { + "content": "<|reserved_special_token_10|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128019": { + "content": "<|reserved_special_token_11|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128020": { + "content": "<|reserved_special_token_12|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128021": { + "content": "<|reserved_special_token_13|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128022": { + "content": "<|reserved_special_token_14|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128023": { + "content": "<|reserved_special_token_15|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128024": { + "content": "<|reserved_special_token_16|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128025": { + "content": "<|reserved_special_token_17|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128026": { + "content": "<|reserved_special_token_18|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128027": { + "content": "<|reserved_special_token_19|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128028": { + "content": "<|reserved_special_token_20|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128029": { + "content": "<|reserved_special_token_21|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128030": { + "content": "<|reserved_special_token_22|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128031": { + "content": "<|reserved_special_token_23|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128032": { + "content": "<|reserved_special_token_24|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128033": { + "content": "<|reserved_special_token_25|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128034": { + "content": "<|reserved_special_token_26|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128035": { + "content": "<|reserved_special_token_27|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128036": { + "content": "<|reserved_special_token_28|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128037": { + "content": "<|reserved_special_token_29|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128038": { + "content": "<|reserved_special_token_30|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128039": { + "content": "<|reserved_special_token_31|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128040": { + "content": "<|reserved_special_token_32|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128041": { + "content": "<|reserved_special_token_33|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128042": { + "content": "<|reserved_special_token_34|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128043": { + "content": "<|reserved_special_token_35|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128044": { + "content": "<|reserved_special_token_36|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128045": { + "content": "<|reserved_special_token_37|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128046": { + "content": "<|reserved_special_token_38|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128047": { + "content": "<|reserved_special_token_39|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128048": { + "content": "<|reserved_special_token_40|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128049": { + "content": "<|reserved_special_token_41|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128050": { + "content": "<|reserved_special_token_42|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128051": { + "content": "<|reserved_special_token_43|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128052": { + "content": "<|reserved_special_token_44|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128053": { + "content": "<|reserved_special_token_45|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128054": { + "content": "<|reserved_special_token_46|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128055": { + "content": "<|reserved_special_token_47|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128056": { + "content": "<|reserved_special_token_48|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128057": { + "content": "<|reserved_special_token_49|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128058": { + "content": "<|reserved_special_token_50|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128059": { + "content": "<|reserved_special_token_51|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128060": { + "content": "<|reserved_special_token_52|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128061": { + "content": "<|reserved_special_token_53|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128062": { + "content": "<|reserved_special_token_54|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128063": { + "content": "<|reserved_special_token_55|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128064": { + "content": "<|reserved_special_token_56|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128065": { + "content": "<|reserved_special_token_57|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128066": { + "content": "<|reserved_special_token_58|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128067": { + "content": "<|reserved_special_token_59|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128068": { + "content": "<|reserved_special_token_60|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128069": { + "content": "<|reserved_special_token_61|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128070": { + "content": "<|reserved_special_token_62|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128071": { + "content": "<|reserved_special_token_63|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128072": { + "content": "<|reserved_special_token_64|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128073": { + "content": "<|reserved_special_token_65|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128074": { + "content": "<|reserved_special_token_66|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128075": { + "content": "<|reserved_special_token_67|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128076": { + "content": "<|reserved_special_token_68|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128077": { + "content": "<|reserved_special_token_69|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128078": { + "content": "<|reserved_special_token_70|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128079": { + "content": "<|reserved_special_token_71|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128080": { + "content": "<|reserved_special_token_72|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128081": { + "content": "<|reserved_special_token_73|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128082": { + "content": "<|reserved_special_token_74|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128083": { + "content": "<|reserved_special_token_75|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128084": { + "content": "<|reserved_special_token_76|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128085": { + "content": "<|reserved_special_token_77|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128086": { + "content": "<|reserved_special_token_78|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128087": { + "content": "<|reserved_special_token_79|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128088": { + "content": "<|reserved_special_token_80|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128089": { + "content": "<|reserved_special_token_81|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128090": { + "content": "<|reserved_special_token_82|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128091": { + "content": "<|reserved_special_token_83|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128092": { + "content": "<|reserved_special_token_84|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128093": { + "content": "<|reserved_special_token_85|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128094": { + "content": "<|reserved_special_token_86|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128095": { + "content": "<|reserved_special_token_87|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128096": { + "content": "<|reserved_special_token_88|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128097": { + "content": "<|reserved_special_token_89|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128098": { + "content": "<|reserved_special_token_90|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128099": { + "content": "<|reserved_special_token_91|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128100": { + "content": "<|reserved_special_token_92|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128101": { + "content": "<|reserved_special_token_93|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128102": { + "content": "<|reserved_special_token_94|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128103": { + "content": "<|reserved_special_token_95|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128104": { + "content": "<|reserved_special_token_96|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128105": { + "content": "<|reserved_special_token_97|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128106": { + "content": "<|reserved_special_token_98|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128107": { + "content": "<|reserved_special_token_99|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128108": { + "content": "<|reserved_special_token_100|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128109": { + "content": "<|reserved_special_token_101|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128110": { + "content": "<|reserved_special_token_102|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128111": { + "content": "<|reserved_special_token_103|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128112": { + "content": "<|reserved_special_token_104|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128113": { + "content": "<|reserved_special_token_105|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128114": { + "content": "<|reserved_special_token_106|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128115": { + "content": "<|reserved_special_token_107|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128116": { + "content": "<|reserved_special_token_108|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128117": { + "content": "<|reserved_special_token_109|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128118": { + "content": "<|reserved_special_token_110|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128119": { + "content": "<|reserved_special_token_111|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128120": { + "content": "<|reserved_special_token_112|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128121": { + "content": "<|reserved_special_token_113|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128122": { + "content": "<|reserved_special_token_114|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128123": { + "content": "<|reserved_special_token_115|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128124": { + "content": "<|reserved_special_token_116|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128125": { + "content": "<|reserved_special_token_117|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128126": { + "content": "<|reserved_special_token_118|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128127": { + "content": "<|reserved_special_token_119|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128128": { + "content": "<|reserved_special_token_120|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128129": { + "content": "<|reserved_special_token_121|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128130": { + "content": "<|reserved_special_token_122|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128131": { + "content": "<|reserved_special_token_123|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128132": { + "content": "<|reserved_special_token_124|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128133": { + "content": "<|reserved_special_token_125|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128134": { + "content": "<|reserved_special_token_126|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128135": { + "content": "<|reserved_special_token_127|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128136": { + "content": "<|reserved_special_token_128|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128137": { + "content": "<|reserved_special_token_129|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128138": { + "content": "<|reserved_special_token_130|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128139": { + "content": "<|reserved_special_token_131|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128140": { + "content": "<|reserved_special_token_132|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128141": { + "content": "<|reserved_special_token_133|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128142": { + "content": "<|reserved_special_token_134|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128143": { + "content": "<|reserved_special_token_135|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128144": { + "content": "<|reserved_special_token_136|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128145": { + "content": "<|reserved_special_token_137|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128146": { + "content": "<|reserved_special_token_138|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128147": { + "content": "<|reserved_special_token_139|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128148": { + "content": "<|reserved_special_token_140|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128149": { + "content": "<|reserved_special_token_141|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128150": { + "content": "<|reserved_special_token_142|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128151": { + "content": "<|reserved_special_token_143|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128152": { + "content": "<|reserved_special_token_144|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128153": { + "content": "<|reserved_special_token_145|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128154": { + "content": "<|reserved_special_token_146|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128155": { + "content": "<|reserved_special_token_147|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128156": { + "content": "<|reserved_special_token_148|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128157": { + "content": "<|reserved_special_token_149|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128158": { + "content": "<|reserved_special_token_150|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128159": { + "content": "<|reserved_special_token_151|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128160": { + "content": "<|reserved_special_token_152|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128161": { + "content": "<|reserved_special_token_153|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128162": { + "content": "<|reserved_special_token_154|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128163": { + "content": "<|reserved_special_token_155|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128164": { + "content": "<|reserved_special_token_156|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128165": { + "content": "<|reserved_special_token_157|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128166": { + "content": "<|reserved_special_token_158|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128167": { + "content": "<|reserved_special_token_159|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128168": { + "content": "<|reserved_special_token_160|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128169": { + "content": "<|reserved_special_token_161|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128170": { + "content": "<|reserved_special_token_162|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128171": { + "content": "<|reserved_special_token_163|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128172": { + "content": "<|reserved_special_token_164|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128173": { + "content": "<|reserved_special_token_165|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128174": { + "content": "<|reserved_special_token_166|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128175": { + "content": "<|reserved_special_token_167|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128176": { + "content": "<|reserved_special_token_168|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128177": { + "content": "<|reserved_special_token_169|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128178": { + "content": "<|reserved_special_token_170|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128179": { + "content": "<|reserved_special_token_171|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128180": { + "content": "<|reserved_special_token_172|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128181": { + "content": "<|reserved_special_token_173|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128182": { + "content": "<|reserved_special_token_174|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128183": { + "content": "<|reserved_special_token_175|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128184": { + "content": "<|reserved_special_token_176|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128185": { + "content": "<|reserved_special_token_177|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128186": { + "content": "<|reserved_special_token_178|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128187": { + "content": "<|reserved_special_token_179|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128188": { + "content": "<|reserved_special_token_180|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128189": { + "content": "<|reserved_special_token_181|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128190": { + "content": "<|reserved_special_token_182|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128191": { + "content": "<|reserved_special_token_183|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128192": { + "content": "<|reserved_special_token_184|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128193": { + "content": "<|reserved_special_token_185|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128194": { + "content": "<|reserved_special_token_186|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128195": { + "content": "<|reserved_special_token_187|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128196": { + "content": "<|reserved_special_token_188|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128197": { + "content": "<|reserved_special_token_189|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128198": { + "content": "<|reserved_special_token_190|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128199": { + "content": "<|reserved_special_token_191|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128200": { + "content": "<|reserved_special_token_192|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128201": { + "content": "<|reserved_special_token_193|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128202": { + "content": "<|reserved_special_token_194|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128203": { + "content": "<|reserved_special_token_195|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128204": { + "content": "<|reserved_special_token_196|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128205": { + "content": "<|reserved_special_token_197|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128206": { + "content": "<|reserved_special_token_198|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128207": { + "content": "<|reserved_special_token_199|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128208": { + "content": "<|reserved_special_token_200|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128209": { + "content": "<|reserved_special_token_201|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128210": { + "content": "<|reserved_special_token_202|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128211": { + "content": "<|reserved_special_token_203|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128212": { + "content": "<|reserved_special_token_204|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128213": { + "content": "<|reserved_special_token_205|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128214": { + "content": "<|reserved_special_token_206|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128215": { + "content": "<|reserved_special_token_207|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128216": { + "content": "<|reserved_special_token_208|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128217": { + "content": "<|reserved_special_token_209|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128218": { + "content": "<|reserved_special_token_210|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128219": { + "content": "<|reserved_special_token_211|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128220": { + "content": "<|reserved_special_token_212|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128221": { + "content": "<|reserved_special_token_213|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128222": { + "content": "<|reserved_special_token_214|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128223": { + "content": "<|reserved_special_token_215|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128224": { + "content": "<|reserved_special_token_216|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128225": { + "content": "<|reserved_special_token_217|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128226": { + "content": "<|reserved_special_token_218|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128227": { + "content": "<|reserved_special_token_219|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128228": { + "content": "<|reserved_special_token_220|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128229": { + "content": "<|reserved_special_token_221|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128230": { + "content": "<|reserved_special_token_222|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128231": { + "content": "<|reserved_special_token_223|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128232": { + "content": "<|reserved_special_token_224|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128233": { + "content": "<|reserved_special_token_225|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128234": { + "content": "<|reserved_special_token_226|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128235": { + "content": "<|reserved_special_token_227|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128236": { + "content": "<|reserved_special_token_228|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128237": { + "content": "<|reserved_special_token_229|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128238": { + "content": "<|reserved_special_token_230|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128239": { + "content": "<|reserved_special_token_231|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128240": { + "content": "<|reserved_special_token_232|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128241": { + "content": "<|reserved_special_token_233|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128242": { + "content": "<|reserved_special_token_234|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128243": { + "content": "<|reserved_special_token_235|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128244": { + "content": "<|reserved_special_token_236|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128245": { + "content": "<|reserved_special_token_237|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128246": { + "content": "<|reserved_special_token_238|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128247": { + "content": "<|reserved_special_token_239|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128248": { + "content": "<|reserved_special_token_240|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128249": { + "content": "<|reserved_special_token_241|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128250": { + "content": "<|reserved_special_token_242|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128251": { + "content": "<|reserved_special_token_243|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128252": { + "content": "<|reserved_special_token_244|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128253": { + "content": "<|reserved_special_token_245|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128254": { + "content": "<|reserved_special_token_246|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "128255": { + "content": "<|reserved_special_token_247|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [ + "<|eom_id|>" + ], + "bos_token": "<|begin_of_text|>", + "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- System message #}\n{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n{%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n{%- endif %}\n{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n{%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n{%- endif %}\n{{- system_message }}\n{{- \"<|eot_id|>\" }}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n", + "clean_up_tokenization_spaces": true, + "eos_token": "<|eot_id|>", + "extra_special_tokens": {}, + "model_input_names": [ + "input_ids", + "attention_mask" + ], + "model_max_length": 131072, + "pad_token": "<|eot_id|>", + "padding_side": "right", + "split_special_tokens": false, + "tokenizer_class": "PreTrainedTokenizer" +} diff --git a/train.yaml b/train.yaml new file mode 100644 index 0000000..77b2606 --- /dev/null +++ b/train.yaml @@ -0,0 +1,55 @@ +seed: 42 + +### model +model_name_or_path: meta-llama/Llama-3.2-1B-Instruct +trust_remote_code: true +flash_attn: auto +use_cache: false + +### method +stage: sft +do_train: true +finetuning_type: full + +### dataset +dataset: record +template: llama3 +cutoff_len: 2048 +overwrite_cache: true +preprocessing_num_workers: 4 +dataloader_num_workers: 4 +packing: false + +### output +output_dir: saves_bts_preliminary/base/llama-3.2-1b-instruct/train_record_42_1776331412 +logging_steps: 5 +save_steps: 0.05 +overwrite_output_dir: true +save_only_model: false +plot_loss: true +include_num_input_tokens_seen: true +push_to_hub: true +push_to_hub_organization: rbelanec +load_best_model_at_end: true +save_total_limit: 1 + +### train +per_device_train_batch_size: 8 +learning_rate: 5.0e-6 +num_train_epochs: 5 +weight_decay: 1.0e-5 +lr_scheduler_type: cosine +bf16: true +ddp_timeout: 180000000 +resume_from_checkpoint: null +warmup_ratio: 0.1 +optim: adamw_torch +report_to: +- wandb +run_name: base_llama-3.2-1b-instruct_train_record_42_1776331412 + +### eval +per_device_eval_batch_size: 8 +eval_strategy: steps +eval_steps: 0.05 +val_size: 0.1 \ No newline at end of file diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000..ecf8bf4 --- /dev/null +++ b/train_results.json @@ -0,0 +1,9 @@ +{ + "epoch": 5.0, + "num_input_tokens_seen": 245808128, + "total_flos": 1.43524334436719e+18, + "train_loss": 0.2808395623519733, + "train_runtime": 12345.7888, + "train_samples_per_second": 50.612, + "train_steps_per_second": 6.326 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000..4b64d67 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,15641 @@ +{"current_steps": 5, "total_steps": 78105, "loss": 2.1551, "lr": 2.560491614389963e-09, "epoch": 0.0003200819409768901, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "4:41:54", "throughput": 14184.41, "total_tokens": 15360} +{"current_steps": 10, "total_steps": 78105, "loss": 2.3621, "lr": 5.761106132377417e-09, "epoch": 0.0006401638819537802, "percentage": 0.01, "elapsed_time": "0:00:01", "remaining_time": "3:50:57", "throughput": 17528.16, "total_tokens": 31104} +{"current_steps": 15, "total_steps": 78105, "loss": 2.1414, "lr": 8.961720650364872e-09, "epoch": 0.0009602458229306702, "percentage": 0.02, "elapsed_time": "0:00:02", "remaining_time": "3:31:14", "throughput": 18979.51, "total_tokens": 46208} +{"current_steps": 20, "total_steps": 78105, "loss": 2.7749, "lr": 1.2162335168352324e-08, "epoch": 0.0012803277639075604, "percentage": 0.03, "elapsed_time": "0:00:03", "remaining_time": "3:23:47", "throughput": 19945.06, "total_tokens": 62464} +{"current_steps": 25, "total_steps": 78105, "loss": 2.146, "lr": 1.536294968633978e-08, "epoch": 0.0016004097048844504, "percentage": 0.03, "elapsed_time": "0:00:03", "remaining_time": "3:19:50", "throughput": 20603.57, "total_tokens": 79104} +{"current_steps": 30, "total_steps": 78105, "loss": 2.2474, "lr": 1.8563564204327233e-08, "epoch": 0.0019204916458613404, "percentage": 0.04, "elapsed_time": "0:00:04", "remaining_time": "3:16:04", "throughput": 20996.81, "total_tokens": 94912} +{"current_steps": 35, "total_steps": 78105, "loss": 2.3948, "lr": 2.1764178722314687e-08, "epoch": 0.0022405735868382304, "percentage": 0.04, "elapsed_time": "0:00:05", "remaining_time": "3:13:31", "throughput": 21282.46, "total_tokens": 110784} +{"current_steps": 40, "total_steps": 78105, "loss": 2.2191, "lr": 2.496479324030214e-08, "epoch": 0.002560655527815121, "percentage": 0.05, "elapsed_time": "0:00:05", "remaining_time": "3:10:33", "throughput": 21455.77, "total_tokens": 125696} +{"current_steps": 45, "total_steps": 78105, "loss": 2.2197, "lr": 2.8165407758289592e-08, "epoch": 0.002880737468792011, "percentage": 0.06, "elapsed_time": "0:00:06", "remaining_time": "3:08:15", "throughput": 21602.64, "total_tokens": 140672} +{"current_steps": 50, "total_steps": 78105, "loss": 2.1808, "lr": 3.1366022276277046e-08, "epoch": 0.003200819409768901, "percentage": 0.06, "elapsed_time": "0:00:07", "remaining_time": "3:06:24", "throughput": 21698.56, "total_tokens": 155456} +{"current_steps": 55, "total_steps": 78105, "loss": 2.078, "lr": 3.4566636794264506e-08, "epoch": 0.003520901350745791, "percentage": 0.07, "elapsed_time": "0:00:07", "remaining_time": "3:05:24", "throughput": 21790.88, "total_tokens": 170816} +{"current_steps": 60, "total_steps": 78105, "loss": 2.3648, "lr": 3.7767251312251953e-08, "epoch": 0.003840983291722681, "percentage": 0.08, "elapsed_time": "0:00:08", "remaining_time": "3:03:44", "throughput": 21838.24, "total_tokens": 185088} +{"current_steps": 65, "total_steps": 78105, "loss": 2.0772, "lr": 4.096786583023941e-08, "epoch": 0.004161065232699571, "percentage": 0.08, "elapsed_time": "0:00:09", "remaining_time": "3:02:55", "throughput": 21919.25, "total_tokens": 200384} +{"current_steps": 70, "total_steps": 78105, "loss": 2.047, "lr": 4.416848034822686e-08, "epoch": 0.004481147173676461, "percentage": 0.09, "elapsed_time": "0:00:09", "remaining_time": "3:02:20", "throughput": 21982.49, "total_tokens": 215744} +{"current_steps": 75, "total_steps": 78105, "loss": 2.0567, "lr": 4.736909486621432e-08, "epoch": 0.004801229114653352, "percentage": 0.1, "elapsed_time": "0:00:10", "remaining_time": "3:01:32", "throughput": 22006.65, "total_tokens": 230400} +{"current_steps": 80, "total_steps": 78105, "loss": 1.8879, "lr": 5.056970938420177e-08, "epoch": 0.005121311055630242, "percentage": 0.1, "elapsed_time": "0:00:11", "remaining_time": "3:01:24", "throughput": 22096.74, "total_tokens": 246592} +{"current_steps": 85, "total_steps": 78105, "loss": 1.6707, "lr": 5.377032390218922e-08, "epoch": 0.005441392996607132, "percentage": 0.11, "elapsed_time": "0:00:11", "remaining_time": "3:01:04", "throughput": 22156.94, "total_tokens": 262272} +{"current_steps": 90, "total_steps": 78105, "loss": 1.5695, "lr": 5.6970938420176683e-08, "epoch": 0.005761474937584022, "percentage": 0.12, "elapsed_time": "0:00:12", "remaining_time": "3:00:45", "throughput": 22199.97, "total_tokens": 277760} +{"current_steps": 95, "total_steps": 78105, "loss": 1.8715, "lr": 6.017155293816413e-08, "epoch": 0.006081556878560912, "percentage": 0.12, "elapsed_time": "0:00:13", "remaining_time": "3:00:18", "throughput": 22238.3, "total_tokens": 292992} +{"current_steps": 100, "total_steps": 78105, "loss": 1.6241, "lr": 6.337216745615158e-08, "epoch": 0.006401638819537802, "percentage": 0.13, "elapsed_time": "0:00:13", "remaining_time": "2:59:39", "throughput": 22275.8, "total_tokens": 307840} +{"current_steps": 105, "total_steps": 78105, "loss": 1.6729, "lr": 6.657278197413904e-08, "epoch": 0.006721720760514692, "percentage": 0.13, "elapsed_time": "0:00:14", "remaining_time": "2:59:14", "throughput": 22311.85, "total_tokens": 323008} +{"current_steps": 110, "total_steps": 78105, "loss": 1.6035, "lr": 6.977339649212649e-08, "epoch": 0.007041802701491582, "percentage": 0.14, "elapsed_time": "0:00:15", "remaining_time": "2:59:20", "throughput": 22367.37, "total_tokens": 339456} +{"current_steps": 115, "total_steps": 78105, "loss": 1.4905, "lr": 7.297401101011395e-08, "epoch": 0.007361884642468472, "percentage": 0.15, "elapsed_time": "0:00:15", "remaining_time": "2:59:05", "throughput": 22392.76, "total_tokens": 354816} +{"current_steps": 120, "total_steps": 78105, "loss": 1.1193, "lr": 7.61746255281014e-08, "epoch": 0.007681966583445362, "percentage": 0.15, "elapsed_time": "0:00:16", "remaining_time": "2:58:35", "throughput": 22408.08, "total_tokens": 369472} +{"current_steps": 125, "total_steps": 78105, "loss": 1.2304, "lr": 7.937524004608884e-08, "epoch": 0.008002048524422252, "percentage": 0.16, "elapsed_time": "0:00:17", "remaining_time": "2:58:21", "throughput": 22429.16, "total_tokens": 384768} +{"current_steps": 130, "total_steps": 78105, "loss": 1.2864, "lr": 8.257585456407631e-08, "epoch": 0.008322130465399142, "percentage": 0.17, "elapsed_time": "0:00:17", "remaining_time": "2:58:09", "throughput": 22455.13, "total_tokens": 400192} +{"current_steps": 135, "total_steps": 78105, "loss": 1.1549, "lr": 8.577646908206376e-08, "epoch": 0.008642212406376032, "percentage": 0.17, "elapsed_time": "0:00:18", "remaining_time": "2:58:16", "throughput": 22496.2, "total_tokens": 416640} +{"current_steps": 140, "total_steps": 78105, "loss": 1.0669, "lr": 8.89770836000512e-08, "epoch": 0.008962294347352922, "percentage": 0.18, "elapsed_time": "0:00:19", "remaining_time": "2:58:15", "throughput": 22525.89, "total_tokens": 432640} +{"current_steps": 145, "total_steps": 78105, "loss": 1.1356, "lr": 9.217769811803867e-08, "epoch": 0.009282376288329812, "percentage": 0.19, "elapsed_time": "0:00:19", "remaining_time": "2:58:18", "throughput": 22546.13, "total_tokens": 448640} +{"current_steps": 150, "total_steps": 78105, "loss": 1.0674, "lr": 9.537831263602612e-08, "epoch": 0.009602458229306703, "percentage": 0.19, "elapsed_time": "0:00:20", "remaining_time": "2:58:16", "throughput": 22564.66, "total_tokens": 464448} +{"current_steps": 155, "total_steps": 78105, "loss": 1.0259, "lr": 9.857892715401356e-08, "epoch": 0.009922540170283593, "percentage": 0.2, "elapsed_time": "0:00:21", "remaining_time": "2:58:02", "throughput": 22573.14, "total_tokens": 479488} +{"current_steps": 160, "total_steps": 78105, "loss": 0.8523, "lr": 1.0177954167200103e-07, "epoch": 0.010242622111260483, "percentage": 0.2, "elapsed_time": "0:00:21", "remaining_time": "2:57:58", "throughput": 22595.57, "total_tokens": 495296} +{"current_steps": 165, "total_steps": 78105, "loss": 0.8782, "lr": 1.0498015618998849e-07, "epoch": 0.010562704052237373, "percentage": 0.21, "elapsed_time": "0:00:22", "remaining_time": "2:57:38", "throughput": 22608.06, "total_tokens": 510144} +{"current_steps": 170, "total_steps": 78105, "loss": 0.8971, "lr": 1.0818077070797593e-07, "epoch": 0.010882785993214263, "percentage": 0.22, "elapsed_time": "0:00:23", "remaining_time": "2:57:23", "throughput": 22610.26, "total_tokens": 524928} +{"current_steps": 175, "total_steps": 78105, "loss": 0.8068, "lr": 1.1138138522596339e-07, "epoch": 0.011202867934191153, "percentage": 0.22, "elapsed_time": "0:00:23", "remaining_time": "2:57:33", "throughput": 22634.73, "total_tokens": 541504} +{"current_steps": 180, "total_steps": 78105, "loss": 0.7725, "lr": 1.1458199974395085e-07, "epoch": 0.011522949875168043, "percentage": 0.23, "elapsed_time": "0:00:24", "remaining_time": "2:57:13", "throughput": 22640.0, "total_tokens": 556096} +{"current_steps": 185, "total_steps": 78105, "loss": 0.7868, "lr": 1.1778261426193829e-07, "epoch": 0.011843031816144933, "percentage": 0.24, "elapsed_time": "0:00:25", "remaining_time": "2:57:22", "throughput": 22666.58, "total_tokens": 572736} +{"current_steps": 190, "total_steps": 78105, "loss": 0.8952, "lr": 1.2098322877992575e-07, "epoch": 0.012163113757121823, "percentage": 0.24, "elapsed_time": "0:00:25", "remaining_time": "2:57:18", "throughput": 22678.75, "total_tokens": 588352} +{"current_steps": 195, "total_steps": 78105, "loss": 1.0207, "lr": 1.2418384329791322e-07, "epoch": 0.012483195698098713, "percentage": 0.25, "elapsed_time": "0:00:26", "remaining_time": "2:57:09", "throughput": 22685.57, "total_tokens": 603520} +{"current_steps": 200, "total_steps": 78105, "loss": 0.837, "lr": 1.2738445781590066e-07, "epoch": 0.012803277639075603, "percentage": 0.26, "elapsed_time": "0:00:27", "remaining_time": "2:57:08", "throughput": 22699.38, "total_tokens": 619392} +{"current_steps": 205, "total_steps": 78105, "loss": 0.8286, "lr": 1.305850723338881e-07, "epoch": 0.013123359580052493, "percentage": 0.26, "elapsed_time": "0:00:27", "remaining_time": "2:57:08", "throughput": 22720.38, "total_tokens": 635456} +{"current_steps": 210, "total_steps": 78105, "loss": 0.9468, "lr": 1.3378568685187557e-07, "epoch": 0.013443441521029383, "percentage": 0.27, "elapsed_time": "0:00:28", "remaining_time": "2:57:01", "throughput": 22729.5, "total_tokens": 650880} +{"current_steps": 215, "total_steps": 78105, "loss": 0.8357, "lr": 1.36986301369863e-07, "epoch": 0.013763523462006273, "percentage": 0.28, "elapsed_time": "0:00:29", "remaining_time": "2:57:02", "throughput": 22738.07, "total_tokens": 666688} +{"current_steps": 220, "total_steps": 78105, "loss": 0.8286, "lr": 1.4018691588785048e-07, "epoch": 0.014083605402983163, "percentage": 0.28, "elapsed_time": "0:00:29", "remaining_time": "2:56:55", "throughput": 22747.83, "total_tokens": 682112} +{"current_steps": 225, "total_steps": 78105, "loss": 0.8606, "lr": 1.4338753040583795e-07, "epoch": 0.014403687343960053, "percentage": 0.29, "elapsed_time": "0:00:30", "remaining_time": "2:56:50", "throughput": 22760.17, "total_tokens": 697728} +{"current_steps": 230, "total_steps": 78105, "loss": 0.6876, "lr": 1.4658814492382539e-07, "epoch": 0.014723769284936943, "percentage": 0.29, "elapsed_time": "0:00:31", "remaining_time": "2:56:38", "throughput": 22769.44, "total_tokens": 712704} +{"current_steps": 235, "total_steps": 78105, "loss": 1.0125, "lr": 1.4978875944181283e-07, "epoch": 0.015043851225913833, "percentage": 0.3, "elapsed_time": "0:00:32", "remaining_time": "2:56:46", "throughput": 22787.51, "total_tokens": 729408} +{"current_steps": 240, "total_steps": 78105, "loss": 0.8374, "lr": 1.529893739598003e-07, "epoch": 0.015363933166890723, "percentage": 0.31, "elapsed_time": "0:00:32", "remaining_time": "2:56:46", "throughput": 22799.39, "total_tokens": 745344} +{"current_steps": 245, "total_steps": 78105, "loss": 0.7396, "lr": 1.5618998847778773e-07, "epoch": 0.015684015107867613, "percentage": 0.31, "elapsed_time": "0:00:33", "remaining_time": "2:56:57", "throughput": 22828.65, "total_tokens": 762688} +{"current_steps": 250, "total_steps": 78105, "loss": 0.7435, "lr": 1.5939060299577523e-07, "epoch": 0.016004097048844503, "percentage": 0.32, "elapsed_time": "0:00:34", "remaining_time": "2:57:05", "throughput": 22843.38, "total_tokens": 779392} +{"current_steps": 255, "total_steps": 78105, "loss": 0.8331, "lr": 1.6259121751376267e-07, "epoch": 0.016324178989821393, "percentage": 0.33, "elapsed_time": "0:00:34", "remaining_time": "2:56:52", "throughput": 22844.02, "total_tokens": 794112} +{"current_steps": 260, "total_steps": 78105, "loss": 0.8581, "lr": 1.657918320317501e-07, "epoch": 0.016644260930798283, "percentage": 0.33, "elapsed_time": "0:00:35", "remaining_time": "2:56:52", "throughput": 22855.17, "total_tokens": 810112} +{"current_steps": 265, "total_steps": 78105, "loss": 0.8835, "lr": 1.6899244654973758e-07, "epoch": 0.016964342871775173, "percentage": 0.34, "elapsed_time": "0:00:36", "remaining_time": "2:56:46", "throughput": 22860.49, "total_tokens": 825472} +{"current_steps": 270, "total_steps": 78105, "loss": 1.0456, "lr": 1.7219306106772502e-07, "epoch": 0.017284424812752063, "percentage": 0.35, "elapsed_time": "0:00:36", "remaining_time": "2:56:34", "throughput": 22859.36, "total_tokens": 840128} +{"current_steps": 275, "total_steps": 78105, "loss": 0.8342, "lr": 1.7539367558571246e-07, "epoch": 0.017604506753728953, "percentage": 0.35, "elapsed_time": "0:00:37", "remaining_time": "2:56:26", "throughput": 22861.1, "total_tokens": 855104} +{"current_steps": 280, "total_steps": 78105, "loss": 0.8543, "lr": 1.7859429010369995e-07, "epoch": 0.017924588694705843, "percentage": 0.36, "elapsed_time": "0:00:38", "remaining_time": "2:56:25", "throughput": 22866.03, "total_tokens": 870848} +{"current_steps": 285, "total_steps": 78105, "loss": 0.7704, "lr": 1.817949046216874e-07, "epoch": 0.018244670635682733, "percentage": 0.36, "elapsed_time": "0:00:38", "remaining_time": "2:56:16", "throughput": 22868.57, "total_tokens": 885760} +{"current_steps": 290, "total_steps": 78105, "loss": 0.7688, "lr": 1.8499551913967483e-07, "epoch": 0.018564752576659623, "percentage": 0.37, "elapsed_time": "0:00:39", "remaining_time": "2:56:09", "throughput": 22870.87, "total_tokens": 900928} +{"current_steps": 295, "total_steps": 78105, "loss": 0.8229, "lr": 1.881961336576623e-07, "epoch": 0.018884834517636517, "percentage": 0.38, "elapsed_time": "0:00:40", "remaining_time": "2:56:00", "throughput": 22877.37, "total_tokens": 915968} +{"current_steps": 300, "total_steps": 78105, "loss": 1.0619, "lr": 1.9139674817564974e-07, "epoch": 0.019204916458613407, "percentage": 0.38, "elapsed_time": "0:00:40", "remaining_time": "2:56:07", "throughput": 22899.33, "total_tokens": 933056} +{"current_steps": 305, "total_steps": 78105, "loss": 0.7684, "lr": 1.9459736269363718e-07, "epoch": 0.019524998399590297, "percentage": 0.39, "elapsed_time": "0:00:41", "remaining_time": "2:56:03", "throughput": 22901.72, "total_tokens": 948416} +{"current_steps": 310, "total_steps": 78105, "loss": 0.8022, "lr": 1.9779797721162467e-07, "epoch": 0.019845080340567187, "percentage": 0.4, "elapsed_time": "0:00:42", "remaining_time": "2:55:52", "throughput": 22898.9, "total_tokens": 962880} +{"current_steps": 315, "total_steps": 78105, "loss": 0.8321, "lr": 2.0099859172961212e-07, "epoch": 0.020165162281544077, "percentage": 0.4, "elapsed_time": "0:00:42", "remaining_time": "2:56:00", "throughput": 22913.74, "total_tokens": 979904} +{"current_steps": 320, "total_steps": 78105, "loss": 0.8538, "lr": 2.0419920624759956e-07, "epoch": 0.020485244222520967, "percentage": 0.41, "elapsed_time": "0:00:43", "remaining_time": "2:55:56", "throughput": 22914.26, "total_tokens": 995136} +{"current_steps": 325, "total_steps": 78105, "loss": 0.8307, "lr": 2.0739982076558702e-07, "epoch": 0.020805326163497857, "percentage": 0.42, "elapsed_time": "0:00:44", "remaining_time": "2:55:56", "throughput": 22919.62, "total_tokens": 1011008} +{"current_steps": 330, "total_steps": 78105, "loss": 0.8699, "lr": 2.1060043528357446e-07, "epoch": 0.021125408104474747, "percentage": 0.42, "elapsed_time": "0:00:44", "remaining_time": "2:55:50", "throughput": 22913.85, "total_tokens": 1025792} +{"current_steps": 335, "total_steps": 78105, "loss": 0.7236, "lr": 2.138010498015619e-07, "epoch": 0.021445490045451637, "percentage": 0.43, "elapsed_time": "0:00:45", "remaining_time": "2:56:00", "throughput": 22927.32, "total_tokens": 1042944} +{"current_steps": 340, "total_steps": 78105, "loss": 0.8477, "lr": 2.170016643195494e-07, "epoch": 0.021765571986428527, "percentage": 0.44, "elapsed_time": "0:00:46", "remaining_time": "2:55:59", "throughput": 22930.78, "total_tokens": 1058688} +{"current_steps": 345, "total_steps": 78105, "loss": 0.7446, "lr": 2.2020227883753684e-07, "epoch": 0.022085653927405417, "percentage": 0.44, "elapsed_time": "0:00:46", "remaining_time": "2:55:59", "throughput": 22935.52, "total_tokens": 1074560} +{"current_steps": 350, "total_steps": 78105, "loss": 0.731, "lr": 2.2340289335552428e-07, "epoch": 0.022405735868382307, "percentage": 0.45, "elapsed_time": "0:00:47", "remaining_time": "2:55:55", "throughput": 22936.11, "total_tokens": 1089728} +{"current_steps": 355, "total_steps": 78105, "loss": 0.9404, "lr": 2.2660350787351175e-07, "epoch": 0.022725817809359197, "percentage": 0.45, "elapsed_time": "0:00:48", "remaining_time": "2:55:51", "throughput": 22937.5, "total_tokens": 1105024} +{"current_steps": 360, "total_steps": 78105, "loss": 0.8047, "lr": 2.298041223914992e-07, "epoch": 0.023045899750336087, "percentage": 0.46, "elapsed_time": "0:00:48", "remaining_time": "2:55:53", "throughput": 22941.73, "total_tokens": 1121088} +{"current_steps": 365, "total_steps": 78105, "loss": 0.7779, "lr": 2.3300473690948663e-07, "epoch": 0.023365981691312977, "percentage": 0.47, "elapsed_time": "0:00:49", "remaining_time": "2:55:51", "throughput": 22947.97, "total_tokens": 1136896} +{"current_steps": 370, "total_steps": 78105, "loss": 0.7289, "lr": 2.3620535142747412e-07, "epoch": 0.023686063632289867, "percentage": 0.47, "elapsed_time": "0:00:50", "remaining_time": "2:55:55", "throughput": 22955.19, "total_tokens": 1153280} +{"current_steps": 375, "total_steps": 78105, "loss": 0.9225, "lr": 2.3940596594546154e-07, "epoch": 0.024006145573266757, "percentage": 0.48, "elapsed_time": "0:00:50", "remaining_time": "2:55:57", "throughput": 22962.92, "total_tokens": 1169536} +{"current_steps": 380, "total_steps": 78105, "loss": 0.8308, "lr": 2.42606580463449e-07, "epoch": 0.024326227514243647, "percentage": 0.49, "elapsed_time": "0:00:51", "remaining_time": "2:55:55", "throughput": 22963.98, "total_tokens": 1185088} +{"current_steps": 385, "total_steps": 78105, "loss": 0.6761, "lr": 2.4580719498143647e-07, "epoch": 0.024646309455220537, "percentage": 0.49, "elapsed_time": "0:00:52", "remaining_time": "2:55:56", "throughput": 22963.43, "total_tokens": 1200832} +{"current_steps": 390, "total_steps": 78105, "loss": 0.6987, "lr": 2.4900780949942394e-07, "epoch": 0.024966391396197427, "percentage": 0.5, "elapsed_time": "0:00:52", "remaining_time": "2:55:53", "throughput": 22966.84, "total_tokens": 1216320} +{"current_steps": 395, "total_steps": 78105, "loss": 0.793, "lr": 2.5220842401741135e-07, "epoch": 0.025286473337174317, "percentage": 0.51, "elapsed_time": "0:00:53", "remaining_time": "2:55:58", "throughput": 22970.97, "total_tokens": 1232832} +{"current_steps": 400, "total_steps": 78105, "loss": 0.7453, "lr": 2.554090385353988e-07, "epoch": 0.025606555278151207, "percentage": 0.51, "elapsed_time": "0:00:54", "remaining_time": "2:55:56", "throughput": 22973.81, "total_tokens": 1248384} +{"current_steps": 405, "total_steps": 78105, "loss": 0.7423, "lr": 2.586096530533863e-07, "epoch": 0.025926637219128097, "percentage": 0.52, "elapsed_time": "0:00:55", "remaining_time": "2:55:53", "throughput": 22977.85, "total_tokens": 1263936} +{"current_steps": 410, "total_steps": 78105, "loss": 1.2019, "lr": 2.618102675713737e-07, "epoch": 0.026246719160104987, "percentage": 0.52, "elapsed_time": "0:00:56", "remaining_time": "2:57:33", "throughput": 23020.14, "total_tokens": 1294208} +{"current_steps": 415, "total_steps": 78105, "loss": 0.869, "lr": 2.650108820893612e-07, "epoch": 0.026566801101081877, "percentage": 0.53, "elapsed_time": "0:00:56", "remaining_time": "2:57:26", "throughput": 23019.84, "total_tokens": 1309120} +{"current_steps": 420, "total_steps": 78105, "loss": 0.8688, "lr": 2.6821149660734863e-07, "epoch": 0.026886883042058767, "percentage": 0.54, "elapsed_time": "0:00:57", "remaining_time": "2:57:20", "throughput": 23019.66, "total_tokens": 1324224} +{"current_steps": 425, "total_steps": 78105, "loss": 0.6996, "lr": 2.714121111253361e-07, "epoch": 0.027206964983035656, "percentage": 0.54, "elapsed_time": "0:00:58", "remaining_time": "2:57:24", "throughput": 23026.61, "total_tokens": 1341056} +{"current_steps": 430, "total_steps": 78105, "loss": 0.811, "lr": 2.7461272564332357e-07, "epoch": 0.027527046924012546, "percentage": 0.55, "elapsed_time": "0:00:58", "remaining_time": "2:57:21", "throughput": 23027.15, "total_tokens": 1356544} +{"current_steps": 435, "total_steps": 78105, "loss": 0.7142, "lr": 2.77813340161311e-07, "epoch": 0.027847128864989436, "percentage": 0.56, "elapsed_time": "0:00:59", "remaining_time": "2:57:17", "throughput": 23027.11, "total_tokens": 1371840} +{"current_steps": 440, "total_steps": 78105, "loss": 0.7129, "lr": 2.8101395467929845e-07, "epoch": 0.028167210805966326, "percentage": 0.56, "elapsed_time": "0:01:00", "remaining_time": "2:57:10", "throughput": 23026.16, "total_tokens": 1386816} +{"current_steps": 445, "total_steps": 78105, "loss": 0.7842, "lr": 2.842145691972859e-07, "epoch": 0.028487292746943216, "percentage": 0.57, "elapsed_time": "0:01:00", "remaining_time": "2:57:04", "throughput": 23025.99, "total_tokens": 1401792} +{"current_steps": 450, "total_steps": 78105, "loss": 0.6599, "lr": 2.874151837152734e-07, "epoch": 0.028807374687920106, "percentage": 0.58, "elapsed_time": "0:01:01", "remaining_time": "2:56:58", "throughput": 23025.67, "total_tokens": 1416896} +{"current_steps": 455, "total_steps": 78105, "loss": 0.8144, "lr": 2.906157982332608e-07, "epoch": 0.029127456628896996, "percentage": 0.58, "elapsed_time": "0:01:02", "remaining_time": "2:56:58", "throughput": 23025.77, "total_tokens": 1432704} +{"current_steps": 460, "total_steps": 78105, "loss": 0.7013, "lr": 2.9381641275124827e-07, "epoch": 0.029447538569873886, "percentage": 0.59, "elapsed_time": "0:01:02", "remaining_time": "2:56:57", "throughput": 23025.81, "total_tokens": 1448384} +{"current_steps": 465, "total_steps": 78105, "loss": 0.7257, "lr": 2.9701702726923573e-07, "epoch": 0.029767620510850776, "percentage": 0.6, "elapsed_time": "0:01:03", "remaining_time": "2:56:58", "throughput": 23032.6, "total_tokens": 1464832} +{"current_steps": 470, "total_steps": 78105, "loss": 0.6784, "lr": 3.0021764178722315e-07, "epoch": 0.030087702451827666, "percentage": 0.6, "elapsed_time": "0:01:04", "remaining_time": "2:56:51", "throughput": 23028.24, "total_tokens": 1479424} +{"current_steps": 475, "total_steps": 78105, "loss": 0.775, "lr": 3.034182563052106e-07, "epoch": 0.030407784392804556, "percentage": 0.61, "elapsed_time": "0:01:04", "remaining_time": "2:56:46", "throughput": 23025.94, "total_tokens": 1494336} +{"current_steps": 480, "total_steps": 78105, "loss": 0.6793, "lr": 3.066188708231981e-07, "epoch": 0.030727866333781446, "percentage": 0.61, "elapsed_time": "0:01:05", "remaining_time": "2:56:40", "throughput": 23023.81, "total_tokens": 1509184} +{"current_steps": 485, "total_steps": 78105, "loss": 0.8161, "lr": 3.0981948534118555e-07, "epoch": 0.031047948274758336, "percentage": 0.62, "elapsed_time": "0:01:06", "remaining_time": "2:56:42", "throughput": 23027.67, "total_tokens": 1525504} +{"current_steps": 490, "total_steps": 78105, "loss": 0.6406, "lr": 3.13020099859173e-07, "epoch": 0.031368030215735226, "percentage": 0.63, "elapsed_time": "0:01:06", "remaining_time": "2:56:41", "throughput": 23032.22, "total_tokens": 1541504} +{"current_steps": 495, "total_steps": 78105, "loss": 0.6751, "lr": 3.1622071437716043e-07, "epoch": 0.03168811215671212, "percentage": 0.63, "elapsed_time": "0:01:07", "remaining_time": "2:56:39", "throughput": 23034.79, "total_tokens": 1557184} +{"current_steps": 500, "total_steps": 78105, "loss": 0.7808, "lr": 3.194213288951479e-07, "epoch": 0.032008194097689006, "percentage": 0.64, "elapsed_time": "0:01:08", "remaining_time": "2:56:39", "throughput": 23039.69, "total_tokens": 1573440} +{"current_steps": 505, "total_steps": 78105, "loss": 0.8709, "lr": 3.2262194341313536e-07, "epoch": 0.0323282760386659, "percentage": 0.65, "elapsed_time": "0:01:08", "remaining_time": "2:56:35", "throughput": 23040.59, "total_tokens": 1588736} +{"current_steps": 510, "total_steps": 78105, "loss": 0.7501, "lr": 3.258225579311228e-07, "epoch": 0.032648357979642786, "percentage": 0.65, "elapsed_time": "0:01:09", "remaining_time": "2:56:33", "throughput": 23042.11, "total_tokens": 1604352} +{"current_steps": 515, "total_steps": 78105, "loss": 0.7055, "lr": 3.2902317244911025e-07, "epoch": 0.03296843992061968, "percentage": 0.66, "elapsed_time": "0:01:10", "remaining_time": "2:56:25", "throughput": 23039.05, "total_tokens": 1618816} +{"current_steps": 520, "total_steps": 78105, "loss": 0.7891, "lr": 3.322237869670977e-07, "epoch": 0.033288521861596566, "percentage": 0.67, "elapsed_time": "0:01:10", "remaining_time": "2:56:28", "throughput": 23048.55, "total_tokens": 1635648} +{"current_steps": 525, "total_steps": 78105, "loss": 0.7411, "lr": 3.3542440148508513e-07, "epoch": 0.03360860380257346, "percentage": 0.67, "elapsed_time": "0:01:11", "remaining_time": "2:56:27", "throughput": 23048.2, "total_tokens": 1651328} +{"current_steps": 530, "total_steps": 78105, "loss": 0.7586, "lr": 3.386250160030726e-07, "epoch": 0.033928685743550346, "percentage": 0.68, "elapsed_time": "0:01:12", "remaining_time": "2:56:34", "throughput": 23057.26, "total_tokens": 1668928} +{"current_steps": 535, "total_steps": 78105, "loss": 0.6686, "lr": 3.418256305210601e-07, "epoch": 0.03424876768452724, "percentage": 0.68, "elapsed_time": "0:01:13", "remaining_time": "2:56:37", "throughput": 23061.36, "total_tokens": 1685504} +{"current_steps": 540, "total_steps": 78105, "loss": 0.7246, "lr": 3.450262450390475e-07, "epoch": 0.034568849625504126, "percentage": 0.69, "elapsed_time": "0:01:13", "remaining_time": "2:56:38", "throughput": 23065.46, "total_tokens": 1701952} +{"current_steps": 545, "total_steps": 78105, "loss": 0.7636, "lr": 3.48226859557035e-07, "epoch": 0.03488893156648102, "percentage": 0.7, "elapsed_time": "0:01:14", "remaining_time": "2:56:34", "throughput": 23064.27, "total_tokens": 1716992} +{"current_steps": 550, "total_steps": 78105, "loss": 0.642, "lr": 3.5142747407502246e-07, "epoch": 0.035209013507457906, "percentage": 0.7, "elapsed_time": "0:01:15", "remaining_time": "2:56:31", "throughput": 23061.46, "total_tokens": 1732160} +{"current_steps": 555, "total_steps": 78105, "loss": 0.6954, "lr": 3.546280885930099e-07, "epoch": 0.0355290954484348, "percentage": 0.71, "elapsed_time": "0:01:15", "remaining_time": "2:56:32", "throughput": 23063.92, "total_tokens": 1748416} +{"current_steps": 560, "total_steps": 78105, "loss": 0.7845, "lr": 3.5782870311099734e-07, "epoch": 0.035849177389411686, "percentage": 0.72, "elapsed_time": "0:01:16", "remaining_time": "2:56:29", "throughput": 23063.52, "total_tokens": 1763776} +{"current_steps": 565, "total_steps": 78105, "loss": 0.7543, "lr": 3.610293176289848e-07, "epoch": 0.03616925933038858, "percentage": 0.72, "elapsed_time": "0:01:17", "remaining_time": "2:56:30", "throughput": 23068.43, "total_tokens": 1780160} +{"current_steps": 570, "total_steps": 78105, "loss": 0.6669, "lr": 3.642299321469722e-07, "epoch": 0.036489341271365466, "percentage": 0.73, "elapsed_time": "0:01:17", "remaining_time": "2:56:28", "throughput": 23071.32, "total_tokens": 1795968} +{"current_steps": 575, "total_steps": 78105, "loss": 0.667, "lr": 3.674305466649597e-07, "epoch": 0.03680942321234236, "percentage": 0.74, "elapsed_time": "0:01:18", "remaining_time": "2:56:43", "throughput": 23085.69, "total_tokens": 1815424} +{"current_steps": 580, "total_steps": 78105, "loss": 0.8525, "lr": 3.7063116118294716e-07, "epoch": 0.037129505153319246, "percentage": 0.74, "elapsed_time": "0:01:19", "remaining_time": "2:56:45", "throughput": 23088.54, "total_tokens": 1831936} +{"current_steps": 585, "total_steps": 78105, "loss": 0.7031, "lr": 3.7383177570093457e-07, "epoch": 0.03744958709429614, "percentage": 0.75, "elapsed_time": "0:01:20", "remaining_time": "2:56:43", "throughput": 23086.73, "total_tokens": 1847424} +{"current_steps": 590, "total_steps": 78105, "loss": 0.7892, "lr": 3.7703239021892204e-07, "epoch": 0.03776966903527303, "percentage": 0.76, "elapsed_time": "0:01:20", "remaining_time": "2:56:39", "throughput": 23084.32, "total_tokens": 1862400} +{"current_steps": 595, "total_steps": 78105, "loss": 0.7662, "lr": 3.8023300473690956e-07, "epoch": 0.03808975097624992, "percentage": 0.76, "elapsed_time": "0:01:21", "remaining_time": "2:56:32", "throughput": 23082.34, "total_tokens": 1876928} +{"current_steps": 600, "total_steps": 78105, "loss": 0.6976, "lr": 3.834336192548969e-07, "epoch": 0.03840983291722681, "percentage": 0.77, "elapsed_time": "0:01:21", "remaining_time": "2:56:31", "throughput": 23082.17, "total_tokens": 1892608} +{"current_steps": 605, "total_steps": 78105, "loss": 0.7447, "lr": 3.8663423377288444e-07, "epoch": 0.0387299148582037, "percentage": 0.77, "elapsed_time": "0:01:22", "remaining_time": "2:56:35", "throughput": 23088.99, "total_tokens": 1909696} +{"current_steps": 610, "total_steps": 78105, "loss": 0.8291, "lr": 3.898348482908719e-07, "epoch": 0.03904999679918059, "percentage": 0.78, "elapsed_time": "0:01:23", "remaining_time": "2:56:31", "throughput": 23087.8, "total_tokens": 1924864} +{"current_steps": 615, "total_steps": 78105, "loss": 0.5963, "lr": 3.930354628088593e-07, "epoch": 0.03937007874015748, "percentage": 0.79, "elapsed_time": "0:01:24", "remaining_time": "2:56:26", "throughput": 23088.61, "total_tokens": 1939968} +{"current_steps": 620, "total_steps": 78105, "loss": 0.7966, "lr": 3.962360773268468e-07, "epoch": 0.03969016068113437, "percentage": 0.79, "elapsed_time": "0:01:24", "remaining_time": "2:56:23", "throughput": 23087.1, "total_tokens": 1955136} +{"current_steps": 625, "total_steps": 78105, "loss": 0.7001, "lr": 3.9943669184483426e-07, "epoch": 0.04001024262211126, "percentage": 0.8, "elapsed_time": "0:01:25", "remaining_time": "2:56:22", "throughput": 23087.04, "total_tokens": 1970880} +{"current_steps": 630, "total_steps": 78105, "loss": 0.6206, "lr": 4.0263730636282167e-07, "epoch": 0.04033032456308815, "percentage": 0.81, "elapsed_time": "0:01:26", "remaining_time": "2:56:22", "throughput": 23087.89, "total_tokens": 1986752} +{"current_steps": 635, "total_steps": 78105, "loss": 0.6489, "lr": 4.0583792088080914e-07, "epoch": 0.04065040650406504, "percentage": 0.81, "elapsed_time": "0:01:26", "remaining_time": "2:56:18", "throughput": 23086.5, "total_tokens": 2001856} +{"current_steps": 640, "total_steps": 78105, "loss": 0.686, "lr": 4.090385353987966e-07, "epoch": 0.04097048844504193, "percentage": 0.82, "elapsed_time": "0:01:27", "remaining_time": "2:56:26", "throughput": 23095.84, "total_tokens": 2019968} +{"current_steps": 645, "total_steps": 78105, "loss": 0.7926, "lr": 4.12239149916784e-07, "epoch": 0.04129057038601882, "percentage": 0.83, "elapsed_time": "0:01:28", "remaining_time": "2:56:23", "throughput": 23095.83, "total_tokens": 2035328} +{"current_steps": 650, "total_steps": 78105, "loss": 0.6748, "lr": 4.154397644347715e-07, "epoch": 0.04161065232699571, "percentage": 0.83, "elapsed_time": "0:01:28", "remaining_time": "2:56:38", "throughput": 23107.27, "total_tokens": 2055168} +{"current_steps": 655, "total_steps": 78105, "loss": 0.7494, "lr": 4.18640378952759e-07, "epoch": 0.0419307342679726, "percentage": 0.84, "elapsed_time": "0:01:29", "remaining_time": "2:56:40", "throughput": 23110.34, "total_tokens": 2071808} +{"current_steps": 660, "total_steps": 78105, "loss": 0.6756, "lr": 4.2184099347074637e-07, "epoch": 0.04225081620894949, "percentage": 0.85, "elapsed_time": "0:01:30", "remaining_time": "2:56:38", "throughput": 23111.25, "total_tokens": 2087424} +{"current_steps": 665, "total_steps": 78105, "loss": 0.8031, "lr": 4.250416079887339e-07, "epoch": 0.04257089814992638, "percentage": 0.85, "elapsed_time": "0:01:30", "remaining_time": "2:56:34", "throughput": 23110.66, "total_tokens": 2102592} +{"current_steps": 670, "total_steps": 78105, "loss": 0.7141, "lr": 4.2824222250672136e-07, "epoch": 0.04289098009090327, "percentage": 0.86, "elapsed_time": "0:01:31", "remaining_time": "2:56:36", "throughput": 23116.19, "total_tokens": 2119488} +{"current_steps": 675, "total_steps": 78105, "loss": 0.6826, "lr": 4.3144283702470877e-07, "epoch": 0.04321106203188016, "percentage": 0.86, "elapsed_time": "0:01:32", "remaining_time": "2:56:37", "throughput": 23120.49, "total_tokens": 2136000} +{"current_steps": 680, "total_steps": 78105, "loss": 0.8482, "lr": 4.3464345154269624e-07, "epoch": 0.04353114397285705, "percentage": 0.87, "elapsed_time": "0:01:33", "remaining_time": "2:56:38", "throughput": 23124.22, "total_tokens": 2152448} +{"current_steps": 685, "total_steps": 78105, "loss": 0.6599, "lr": 4.378440660606837e-07, "epoch": 0.04385122591383394, "percentage": 0.88, "elapsed_time": "0:01:33", "remaining_time": "2:56:35", "throughput": 23125.32, "total_tokens": 2168000} +{"current_steps": 690, "total_steps": 78105, "loss": 0.6298, "lr": 4.410446805786711e-07, "epoch": 0.04417130785481083, "percentage": 0.88, "elapsed_time": "0:01:34", "remaining_time": "2:56:34", "throughput": 23124.0, "total_tokens": 2183552} +{"current_steps": 695, "total_steps": 78105, "loss": 0.7726, "lr": 4.442452950966586e-07, "epoch": 0.04449138979578772, "percentage": 0.89, "elapsed_time": "0:01:35", "remaining_time": "2:56:32", "throughput": 23126.79, "total_tokens": 2199488} +{"current_steps": 700, "total_steps": 78105, "loss": 0.7076, "lr": 4.4744590961464605e-07, "epoch": 0.04481147173676461, "percentage": 0.9, "elapsed_time": "0:01:35", "remaining_time": "2:56:31", "throughput": 23127.95, "total_tokens": 2215296} +{"current_steps": 705, "total_steps": 78105, "loss": 0.7781, "lr": 4.5064652413263347e-07, "epoch": 0.0451315536777415, "percentage": 0.9, "elapsed_time": "0:01:36", "remaining_time": "2:56:26", "throughput": 23127.28, "total_tokens": 2230016} +{"current_steps": 710, "total_steps": 78105, "loss": 0.7446, "lr": 4.5384713865062093e-07, "epoch": 0.04545163561871839, "percentage": 0.91, "elapsed_time": "0:01:37", "remaining_time": "2:56:22", "throughput": 23125.13, "total_tokens": 2245056} +{"current_steps": 715, "total_steps": 78105, "loss": 0.6779, "lr": 4.5704775316860845e-07, "epoch": 0.04577171755969528, "percentage": 0.92, "elapsed_time": "0:01:37", "remaining_time": "2:56:22", "throughput": 23127.25, "total_tokens": 2261248} +{"current_steps": 720, "total_steps": 78105, "loss": 0.6933, "lr": 4.602483676865958e-07, "epoch": 0.04609179950067217, "percentage": 0.92, "elapsed_time": "0:01:38", "remaining_time": "2:56:25", "throughput": 23130.21, "total_tokens": 2278016} +{"current_steps": 725, "total_steps": 78105, "loss": 0.5873, "lr": 4.6344898220458334e-07, "epoch": 0.04641188144164906, "percentage": 0.93, "elapsed_time": "0:01:39", "remaining_time": "2:56:21", "throughput": 23127.45, "total_tokens": 2292800} +{"current_steps": 730, "total_steps": 78105, "loss": 0.6548, "lr": 4.666495967225708e-07, "epoch": 0.04673196338262595, "percentage": 0.93, "elapsed_time": "0:01:39", "remaining_time": "2:56:18", "throughput": 23127.35, "total_tokens": 2308224} +{"current_steps": 735, "total_steps": 78105, "loss": 0.7381, "lr": 4.698502112405582e-07, "epoch": 0.04705204532360284, "percentage": 0.94, "elapsed_time": "0:01:40", "remaining_time": "2:56:22", "throughput": 23133.76, "total_tokens": 2325760} +{"current_steps": 740, "total_steps": 78105, "loss": 0.748, "lr": 4.730508257585457e-07, "epoch": 0.04737212726457973, "percentage": 0.95, "elapsed_time": "0:01:41", "remaining_time": "2:56:21", "throughput": 23135.66, "total_tokens": 2341632} +{"current_steps": 745, "total_steps": 78105, "loss": 0.7587, "lr": 4.7625144027653315e-07, "epoch": 0.04769220920555662, "percentage": 0.95, "elapsed_time": "0:01:41", "remaining_time": "2:56:20", "throughput": 23137.32, "total_tokens": 2357504} +{"current_steps": 750, "total_steps": 78105, "loss": 0.7956, "lr": 4.794520547945206e-07, "epoch": 0.04801229114653351, "percentage": 0.96, "elapsed_time": "0:01:42", "remaining_time": "2:56:17", "throughput": 23134.37, "total_tokens": 2372608} +{"current_steps": 755, "total_steps": 78105, "loss": 0.6417, "lr": 4.82652669312508e-07, "epoch": 0.0483323730875104, "percentage": 0.97, "elapsed_time": "0:01:43", "remaining_time": "2:56:15", "throughput": 23136.26, "total_tokens": 2388352} +{"current_steps": 760, "total_steps": 78105, "loss": 0.7503, "lr": 4.858532838304955e-07, "epoch": 0.04865245502848729, "percentage": 0.97, "elapsed_time": "0:01:43", "remaining_time": "2:56:16", "throughput": 23137.36, "total_tokens": 2404480} +{"current_steps": 765, "total_steps": 78105, "loss": 0.5662, "lr": 4.89053898348483e-07, "epoch": 0.04897253696946418, "percentage": 0.98, "elapsed_time": "0:01:44", "remaining_time": "2:56:12", "throughput": 23137.04, "total_tokens": 2419648} +{"current_steps": 770, "total_steps": 78105, "loss": 0.747, "lr": 4.922545128664704e-07, "epoch": 0.04929261891044107, "percentage": 0.99, "elapsed_time": "0:01:45", "remaining_time": "2:56:12", "throughput": 23136.79, "total_tokens": 2435584} +{"current_steps": 775, "total_steps": 78105, "loss": 0.6679, "lr": 4.954551273844579e-07, "epoch": 0.04961270085141796, "percentage": 0.99, "elapsed_time": "0:01:45", "remaining_time": "2:56:10", "throughput": 23136.8, "total_tokens": 2451072} +{"current_steps": 780, "total_steps": 78105, "loss": 0.7975, "lr": 4.986557419024453e-07, "epoch": 0.04993278279239485, "percentage": 1.0, "elapsed_time": "0:01:46", "remaining_time": "2:56:12", "throughput": 23141.31, "total_tokens": 2467968} +{"current_steps": 785, "total_steps": 78105, "loss": 0.6755, "lr": 5.018563564204327e-07, "epoch": 0.05025286473337175, "percentage": 1.01, "elapsed_time": "0:01:47", "remaining_time": "2:56:15", "throughput": 23144.91, "total_tokens": 2484928} +{"current_steps": 790, "total_steps": 78105, "loss": 0.6544, "lr": 5.050569709384202e-07, "epoch": 0.05057294667434863, "percentage": 1.01, "elapsed_time": "0:01:48", "remaining_time": "2:56:15", "throughput": 23148.79, "total_tokens": 2501504} +{"current_steps": 795, "total_steps": 78105, "loss": 0.5379, "lr": 5.082575854564077e-07, "epoch": 0.050893028615325527, "percentage": 1.02, "elapsed_time": "0:01:48", "remaining_time": "2:56:18", "throughput": 23155.4, "total_tokens": 2518848} +{"current_steps": 800, "total_steps": 78105, "loss": 0.6548, "lr": 5.114581999743951e-07, "epoch": 0.05121311055630241, "percentage": 1.02, "elapsed_time": "0:01:49", "remaining_time": "2:56:19", "throughput": 23159.58, "total_tokens": 2535680} +{"current_steps": 805, "total_steps": 78105, "loss": 0.7033, "lr": 5.146588144923826e-07, "epoch": 0.051533192497279307, "percentage": 1.03, "elapsed_time": "0:01:50", "remaining_time": "2:56:17", "throughput": 23157.77, "total_tokens": 2550976} +{"current_steps": 810, "total_steps": 78105, "loss": 0.6126, "lr": 5.1785942901037e-07, "epoch": 0.05185327443825619, "percentage": 1.04, "elapsed_time": "0:01:50", "remaining_time": "2:56:15", "throughput": 23159.09, "total_tokens": 2566656} +{"current_steps": 815, "total_steps": 78105, "loss": 0.6814, "lr": 5.210600435283575e-07, "epoch": 0.052173356379233086, "percentage": 1.04, "elapsed_time": "0:01:51", "remaining_time": "2:56:12", "throughput": 23157.0, "total_tokens": 2581568} +{"current_steps": 820, "total_steps": 78105, "loss": 0.6776, "lr": 5.24260658046345e-07, "epoch": 0.05249343832020997, "percentage": 1.05, "elapsed_time": "0:01:52", "remaining_time": "2:56:08", "throughput": 23155.87, "total_tokens": 2596608} +{"current_steps": 825, "total_steps": 78105, "loss": 0.6859, "lr": 5.274612725643324e-07, "epoch": 0.052813520261186866, "percentage": 1.06, "elapsed_time": "0:01:52", "remaining_time": "2:56:06", "throughput": 23155.0, "total_tokens": 2612032} +{"current_steps": 830, "total_steps": 78105, "loss": 0.8132, "lr": 5.306618870823198e-07, "epoch": 0.05313360220216375, "percentage": 1.06, "elapsed_time": "0:01:53", "remaining_time": "2:56:03", "throughput": 23155.46, "total_tokens": 2627264} +{"current_steps": 835, "total_steps": 78105, "loss": 0.6381, "lr": 5.338625016003073e-07, "epoch": 0.053453684143140646, "percentage": 1.07, "elapsed_time": "0:01:54", "remaining_time": "2:56:02", "throughput": 23157.17, "total_tokens": 2643264} +{"current_steps": 840, "total_steps": 78105, "loss": 0.643, "lr": 5.370631161182948e-07, "epoch": 0.05377376608411753, "percentage": 1.08, "elapsed_time": "0:01:54", "remaining_time": "2:56:01", "throughput": 23159.03, "total_tokens": 2659264} +{"current_steps": 845, "total_steps": 78105, "loss": 0.5987, "lr": 5.402637306362822e-07, "epoch": 0.054093848025094426, "percentage": 1.08, "elapsed_time": "0:01:55", "remaining_time": "2:55:57", "throughput": 23157.16, "total_tokens": 2673856} +{"current_steps": 850, "total_steps": 78105, "loss": 0.5327, "lr": 5.434643451542697e-07, "epoch": 0.05441392996607131, "percentage": 1.09, "elapsed_time": "0:01:56", "remaining_time": "2:55:53", "throughput": 23154.0, "total_tokens": 2688448} +{"current_steps": 855, "total_steps": 78105, "loss": 0.6485, "lr": 5.466649596722571e-07, "epoch": 0.054734011907048206, "percentage": 1.09, "elapsed_time": "0:01:56", "remaining_time": "2:55:51", "throughput": 23153.88, "total_tokens": 2703872} +{"current_steps": 860, "total_steps": 78105, "loss": 0.6576, "lr": 5.498655741902445e-07, "epoch": 0.05505409384802509, "percentage": 1.1, "elapsed_time": "0:01:57", "remaining_time": "2:55:48", "throughput": 23152.05, "total_tokens": 2719040} +{"current_steps": 865, "total_steps": 78105, "loss": 0.6156, "lr": 5.530661887082321e-07, "epoch": 0.055374175789001986, "percentage": 1.11, "elapsed_time": "0:01:58", "remaining_time": "2:55:47", "throughput": 23154.8, "total_tokens": 2735168} +{"current_steps": 870, "total_steps": 78105, "loss": 0.5268, "lr": 5.562668032262195e-07, "epoch": 0.05569425772997887, "percentage": 1.11, "elapsed_time": "0:01:58", "remaining_time": "2:55:45", "throughput": 23155.32, "total_tokens": 2750592} +{"current_steps": 875, "total_steps": 78105, "loss": 0.6933, "lr": 5.594674177442069e-07, "epoch": 0.056014339670955766, "percentage": 1.12, "elapsed_time": "0:01:59", "remaining_time": "2:55:46", "throughput": 23158.13, "total_tokens": 2767232} +{"current_steps": 880, "total_steps": 78105, "loss": 0.7287, "lr": 5.626680322621944e-07, "epoch": 0.05633442161193265, "percentage": 1.13, "elapsed_time": "0:02:00", "remaining_time": "2:55:50", "throughput": 23163.16, "total_tokens": 2784768} +{"current_steps": 885, "total_steps": 78105, "loss": 0.6941, "lr": 5.658686467801819e-07, "epoch": 0.056654503552909546, "percentage": 1.13, "elapsed_time": "0:02:00", "remaining_time": "2:55:47", "throughput": 23162.29, "total_tokens": 2799872} +{"current_steps": 890, "total_steps": 78105, "loss": 0.6689, "lr": 5.690692612981693e-07, "epoch": 0.05697458549388643, "percentage": 1.14, "elapsed_time": "0:02:01", "remaining_time": "2:55:47", "throughput": 23163.21, "total_tokens": 2816000} +{"current_steps": 895, "total_steps": 78105, "loss": 0.6694, "lr": 5.722698758161568e-07, "epoch": 0.057294667434863326, "percentage": 1.15, "elapsed_time": "0:02:02", "remaining_time": "2:55:46", "throughput": 23162.91, "total_tokens": 2831744} +{"current_steps": 900, "total_steps": 78105, "loss": 0.7011, "lr": 5.754704903341442e-07, "epoch": 0.05761474937584021, "percentage": 1.15, "elapsed_time": "0:02:02", "remaining_time": "2:55:45", "throughput": 23162.75, "total_tokens": 2847424} +{"current_steps": 905, "total_steps": 78105, "loss": 0.738, "lr": 5.786711048521316e-07, "epoch": 0.057934831316817106, "percentage": 1.16, "elapsed_time": "0:02:03", "remaining_time": "2:55:41", "throughput": 23162.21, "total_tokens": 2862272} +{"current_steps": 910, "total_steps": 78105, "loss": 0.524, "lr": 5.818717193701191e-07, "epoch": 0.05825491325779399, "percentage": 1.17, "elapsed_time": "0:02:04", "remaining_time": "2:55:38", "throughput": 23160.35, "total_tokens": 2877120} +{"current_steps": 915, "total_steps": 78105, "loss": 0.6219, "lr": 5.850723338881066e-07, "epoch": 0.058574995198770886, "percentage": 1.17, "elapsed_time": "0:02:04", "remaining_time": "2:55:40", "throughput": 23165.74, "total_tokens": 2894592} +{"current_steps": 920, "total_steps": 78105, "loss": 0.6786, "lr": 5.88272948406094e-07, "epoch": 0.05889507713974777, "percentage": 1.18, "elapsed_time": "0:02:05", "remaining_time": "2:55:38", "throughput": 23164.94, "total_tokens": 2909888} +{"current_steps": 925, "total_steps": 78105, "loss": 0.7083, "lr": 5.914735629240815e-07, "epoch": 0.059215159080724666, "percentage": 1.18, "elapsed_time": "0:02:06", "remaining_time": "2:55:38", "throughput": 23163.62, "total_tokens": 2925632} +{"current_steps": 930, "total_steps": 78105, "loss": 0.658, "lr": 5.946741774420689e-07, "epoch": 0.05953524102170155, "percentage": 1.19, "elapsed_time": "0:02:06", "remaining_time": "2:55:38", "throughput": 23165.25, "total_tokens": 2941760} +{"current_steps": 935, "total_steps": 78105, "loss": 0.814, "lr": 5.978747919600564e-07, "epoch": 0.059855322962678446, "percentage": 1.2, "elapsed_time": "0:02:07", "remaining_time": "2:55:35", "throughput": 23166.94, "total_tokens": 2957376} +{"current_steps": 940, "total_steps": 78105, "loss": 0.6456, "lr": 6.010754064780439e-07, "epoch": 0.06017540490365533, "percentage": 1.2, "elapsed_time": "0:02:08", "remaining_time": "2:55:34", "throughput": 23166.35, "total_tokens": 2972800} +{"current_steps": 945, "total_steps": 78105, "loss": 0.8423, "lr": 6.042760209960313e-07, "epoch": 0.060495486844632226, "percentage": 1.21, "elapsed_time": "0:02:08", "remaining_time": "2:55:32", "throughput": 23167.2, "total_tokens": 2988480} +{"current_steps": 950, "total_steps": 78105, "loss": 0.7039, "lr": 6.074766355140187e-07, "epoch": 0.06081556878560911, "percentage": 1.22, "elapsed_time": "0:02:09", "remaining_time": "2:55:32", "throughput": 23168.07, "total_tokens": 3004480} +{"current_steps": 955, "total_steps": 78105, "loss": 0.5306, "lr": 6.106772500320062e-07, "epoch": 0.061135650726586006, "percentage": 1.22, "elapsed_time": "0:02:10", "remaining_time": "2:55:30", "throughput": 23169.4, "total_tokens": 3020288} +{"current_steps": 960, "total_steps": 78105, "loss": 0.6458, "lr": 6.138778645499937e-07, "epoch": 0.06145573266756289, "percentage": 1.23, "elapsed_time": "0:02:11", "remaining_time": "2:55:30", "throughput": 23168.09, "total_tokens": 3035968} +{"current_steps": 965, "total_steps": 78105, "loss": 0.8471, "lr": 6.170784790679811e-07, "epoch": 0.061775814608539786, "percentage": 1.24, "elapsed_time": "0:02:11", "remaining_time": "2:55:29", "throughput": 23168.68, "total_tokens": 3051776} +{"current_steps": 970, "total_steps": 78105, "loss": 0.6216, "lr": 6.202790935859686e-07, "epoch": 0.06209589654951667, "percentage": 1.24, "elapsed_time": "0:02:12", "remaining_time": "2:55:26", "throughput": 23166.72, "total_tokens": 3066560} +{"current_steps": 975, "total_steps": 78105, "loss": 0.5871, "lr": 6.23479708103956e-07, "epoch": 0.062415978490493566, "percentage": 1.25, "elapsed_time": "0:02:13", "remaining_time": "2:55:24", "throughput": 23168.79, "total_tokens": 3082496} +{"current_steps": 980, "total_steps": 78105, "loss": 0.66, "lr": 6.266803226219435e-07, "epoch": 0.06273606043147045, "percentage": 1.25, "elapsed_time": "0:02:13", "remaining_time": "2:55:23", "throughput": 23168.02, "total_tokens": 3097856} +{"current_steps": 985, "total_steps": 78105, "loss": 0.537, "lr": 6.298809371399309e-07, "epoch": 0.06305614237244735, "percentage": 1.26, "elapsed_time": "0:02:14", "remaining_time": "2:55:22", "throughput": 23168.32, "total_tokens": 3113664} +{"current_steps": 990, "total_steps": 78105, "loss": 0.6638, "lr": 6.330815516579184e-07, "epoch": 0.06337622431342424, "percentage": 1.27, "elapsed_time": "0:02:15", "remaining_time": "2:55:21", "throughput": 23170.12, "total_tokens": 3129792} +{"current_steps": 995, "total_steps": 78105, "loss": 0.7097, "lr": 6.362821661759058e-07, "epoch": 0.06369630625440113, "percentage": 1.27, "elapsed_time": "0:02:15", "remaining_time": "2:55:19", "throughput": 23169.55, "total_tokens": 3145024} +{"current_steps": 1000, "total_steps": 78105, "loss": 0.7786, "lr": 6.394827806938933e-07, "epoch": 0.06401638819537801, "percentage": 1.28, "elapsed_time": "0:02:16", "remaining_time": "2:55:19", "throughput": 23170.15, "total_tokens": 3161216} +{"current_steps": 1005, "total_steps": 78105, "loss": 0.4931, "lr": 6.426833952118808e-07, "epoch": 0.0643364701363549, "percentage": 1.29, "elapsed_time": "0:02:17", "remaining_time": "2:55:18", "throughput": 23170.22, "total_tokens": 3176960} +{"current_steps": 1010, "total_steps": 78105, "loss": 0.6856, "lr": 6.458840097298682e-07, "epoch": 0.0646565520773318, "percentage": 1.29, "elapsed_time": "0:02:17", "remaining_time": "2:55:18", "throughput": 23171.61, "total_tokens": 3193088} +{"current_steps": 1015, "total_steps": 78105, "loss": 0.7215, "lr": 6.490846242478556e-07, "epoch": 0.0649766340183087, "percentage": 1.3, "elapsed_time": "0:02:18", "remaining_time": "2:55:20", "throughput": 23174.19, "total_tokens": 3210112} +{"current_steps": 1020, "total_steps": 78105, "loss": 0.5505, "lr": 6.52285238765843e-07, "epoch": 0.06529671595928557, "percentage": 1.31, "elapsed_time": "0:02:19", "remaining_time": "2:55:16", "throughput": 23172.79, "total_tokens": 3224768} +{"current_steps": 1025, "total_steps": 78105, "loss": 0.6565, "lr": 6.554858532838305e-07, "epoch": 0.06561679790026247, "percentage": 1.31, "elapsed_time": "0:02:19", "remaining_time": "2:55:14", "throughput": 23172.71, "total_tokens": 3240128} +{"current_steps": 1030, "total_steps": 78105, "loss": 0.6695, "lr": 6.58686467801818e-07, "epoch": 0.06593687984123936, "percentage": 1.32, "elapsed_time": "0:02:20", "remaining_time": "2:55:15", "throughput": 23175.36, "total_tokens": 3256576} +{"current_steps": 1035, "total_steps": 78105, "loss": 0.6371, "lr": 6.618870823198055e-07, "epoch": 0.06625696178221625, "percentage": 1.33, "elapsed_time": "0:02:21", "remaining_time": "2:55:14", "throughput": 23175.24, "total_tokens": 3272384} +{"current_steps": 1040, "total_steps": 78105, "loss": 0.4953, "lr": 6.65087696837793e-07, "epoch": 0.06657704372319313, "percentage": 1.33, "elapsed_time": "0:02:21", "remaining_time": "2:55:14", "throughput": 23176.29, "total_tokens": 3288512} +{"current_steps": 1045, "total_steps": 78105, "loss": 0.5671, "lr": 6.682883113557803e-07, "epoch": 0.06689712566417003, "percentage": 1.34, "elapsed_time": "0:02:22", "remaining_time": "2:55:17", "throughput": 23181.72, "total_tokens": 3306304} +{"current_steps": 1050, "total_steps": 78105, "loss": 0.5265, "lr": 6.714889258737677e-07, "epoch": 0.06721720760514692, "percentage": 1.34, "elapsed_time": "0:02:23", "remaining_time": "2:55:14", "throughput": 23180.38, "total_tokens": 3321344} +{"current_steps": 1055, "total_steps": 78105, "loss": 0.6922, "lr": 6.746895403917553e-07, "epoch": 0.06753728954612381, "percentage": 1.35, "elapsed_time": "0:02:24", "remaining_time": "2:55:17", "throughput": 23183.7, "total_tokens": 3338560} +{"current_steps": 1060, "total_steps": 78105, "loss": 0.5338, "lr": 6.778901549097428e-07, "epoch": 0.06785737148710069, "percentage": 1.36, "elapsed_time": "0:02:24", "remaining_time": "2:55:13", "throughput": 23181.1, "total_tokens": 3353152} +{"current_steps": 1065, "total_steps": 78105, "loss": 0.517, "lr": 6.810907694277303e-07, "epoch": 0.06817745342807759, "percentage": 1.36, "elapsed_time": "0:02:25", "remaining_time": "2:55:14", "throughput": 23182.01, "total_tokens": 3369536} +{"current_steps": 1070, "total_steps": 78105, "loss": 0.6104, "lr": 6.842913839457177e-07, "epoch": 0.06849753536905448, "percentage": 1.37, "elapsed_time": "0:02:26", "remaining_time": "2:55:12", "throughput": 23180.9, "total_tokens": 3384832} +{"current_steps": 1075, "total_steps": 78105, "loss": 0.5292, "lr": 6.874919984637051e-07, "epoch": 0.06881761731003137, "percentage": 1.38, "elapsed_time": "0:02:26", "remaining_time": "2:55:09", "throughput": 23178.49, "total_tokens": 3399424} +{"current_steps": 1080, "total_steps": 78105, "loss": 0.697, "lr": 6.906926129816925e-07, "epoch": 0.06913769925100825, "percentage": 1.38, "elapsed_time": "0:02:27", "remaining_time": "2:55:11", "throughput": 23181.67, "total_tokens": 3416704} +{"current_steps": 1085, "total_steps": 78105, "loss": 0.649, "lr": 6.9389322749968e-07, "epoch": 0.06945778119198515, "percentage": 1.39, "elapsed_time": "0:02:28", "remaining_time": "2:55:08", "throughput": 23181.15, "total_tokens": 3431552} +{"current_steps": 1090, "total_steps": 78105, "loss": 0.72, "lr": 6.970938420176675e-07, "epoch": 0.06977786313296204, "percentage": 1.4, "elapsed_time": "0:02:28", "remaining_time": "2:55:07", "throughput": 23181.29, "total_tokens": 3447488} +{"current_steps": 1095, "total_steps": 78105, "loss": 0.5332, "lr": 7.00294456535655e-07, "epoch": 0.07009794507393893, "percentage": 1.4, "elapsed_time": "0:02:29", "remaining_time": "2:55:07", "throughput": 23181.93, "total_tokens": 3463424} +{"current_steps": 1100, "total_steps": 78105, "loss": 0.6978, "lr": 7.034950710536424e-07, "epoch": 0.07041802701491581, "percentage": 1.41, "elapsed_time": "0:02:30", "remaining_time": "2:55:07", "throughput": 23182.74, "total_tokens": 3479680} +{"current_steps": 1105, "total_steps": 78105, "loss": 0.5592, "lr": 7.066956855716298e-07, "epoch": 0.0707381089558927, "percentage": 1.41, "elapsed_time": "0:02:30", "remaining_time": "2:55:06", "throughput": 23184.14, "total_tokens": 3495552} +{"current_steps": 1110, "total_steps": 78105, "loss": 0.5827, "lr": 7.098963000896172e-07, "epoch": 0.0710581908968696, "percentage": 1.42, "elapsed_time": "0:02:31", "remaining_time": "2:55:04", "throughput": 23183.57, "total_tokens": 3510976} +{"current_steps": 1115, "total_steps": 78105, "loss": 0.6853, "lr": 7.130969146076047e-07, "epoch": 0.0713782728378465, "percentage": 1.43, "elapsed_time": "0:02:32", "remaining_time": "2:55:02", "throughput": 23181.59, "total_tokens": 3526016} +{"current_steps": 1120, "total_steps": 78105, "loss": 0.591, "lr": 7.162975291255922e-07, "epoch": 0.07169835477882337, "percentage": 1.43, "elapsed_time": "0:02:32", "remaining_time": "2:54:58", "throughput": 23180.0, "total_tokens": 3540544} +{"current_steps": 1125, "total_steps": 78105, "loss": 0.618, "lr": 7.194981436435796e-07, "epoch": 0.07201843671980027, "percentage": 1.44, "elapsed_time": "0:02:33", "remaining_time": "2:54:58", "throughput": 23180.36, "total_tokens": 3556416} +{"current_steps": 1130, "total_steps": 78105, "loss": 0.5904, "lr": 7.226987581615671e-07, "epoch": 0.07233851866077716, "percentage": 1.45, "elapsed_time": "0:02:34", "remaining_time": "2:54:56", "throughput": 23181.42, "total_tokens": 3572096} +{"current_steps": 1135, "total_steps": 78105, "loss": 0.5448, "lr": 7.258993726795545e-07, "epoch": 0.07265860060175405, "percentage": 1.45, "elapsed_time": "0:02:34", "remaining_time": "2:54:55", "throughput": 23181.03, "total_tokens": 3587712} +{"current_steps": 1140, "total_steps": 78105, "loss": 0.6193, "lr": 7.290999871975419e-07, "epoch": 0.07297868254273093, "percentage": 1.46, "elapsed_time": "0:02:35", "remaining_time": "2:54:57", "throughput": 23185.21, "total_tokens": 3605056} +{"current_steps": 1145, "total_steps": 78105, "loss": 0.7339, "lr": 7.323006017155294e-07, "epoch": 0.07329876448370783, "percentage": 1.47, "elapsed_time": "0:02:36", "remaining_time": "2:54:57", "throughput": 23186.89, "total_tokens": 3621184} +{"current_steps": 1150, "total_steps": 78105, "loss": 0.6294, "lr": 7.355012162335169e-07, "epoch": 0.07361884642468472, "percentage": 1.47, "elapsed_time": "0:02:36", "remaining_time": "2:54:52", "throughput": 23184.16, "total_tokens": 3635392} +{"current_steps": 1155, "total_steps": 78105, "loss": 0.5553, "lr": 7.387018307515043e-07, "epoch": 0.07393892836566161, "percentage": 1.48, "elapsed_time": "0:02:37", "remaining_time": "2:54:49", "throughput": 23182.96, "total_tokens": 3649984} +{"current_steps": 1160, "total_steps": 78105, "loss": 0.6221, "lr": 7.419024452694919e-07, "epoch": 0.07425901030663849, "percentage": 1.49, "elapsed_time": "0:02:38", "remaining_time": "2:54:48", "throughput": 23184.34, "total_tokens": 3665920} +{"current_steps": 1165, "total_steps": 78105, "loss": 0.4922, "lr": 7.451030597874792e-07, "epoch": 0.07457909224761539, "percentage": 1.49, "elapsed_time": "0:02:38", "remaining_time": "2:54:44", "throughput": 23182.07, "total_tokens": 3680256} +{"current_steps": 1170, "total_steps": 78105, "loss": 0.5405, "lr": 7.483036743054666e-07, "epoch": 0.07489917418859228, "percentage": 1.5, "elapsed_time": "0:02:39", "remaining_time": "2:54:46", "throughput": 23185.45, "total_tokens": 3697536} +{"current_steps": 1175, "total_steps": 78105, "loss": 0.659, "lr": 7.515042888234542e-07, "epoch": 0.07521925612956917, "percentage": 1.5, "elapsed_time": "0:02:40", "remaining_time": "2:54:46", "throughput": 23183.13, "total_tokens": 3713088} +{"current_steps": 1180, "total_steps": 78105, "loss": 0.6654, "lr": 7.547049033414417e-07, "epoch": 0.07553933807054607, "percentage": 1.51, "elapsed_time": "0:02:40", "remaining_time": "2:54:47", "throughput": 23186.41, "total_tokens": 3729920} +{"current_steps": 1185, "total_steps": 78105, "loss": 0.5656, "lr": 7.579055178594291e-07, "epoch": 0.07585942001152295, "percentage": 1.52, "elapsed_time": "0:02:41", "remaining_time": "2:54:45", "throughput": 23187.39, "total_tokens": 3745664} +{"current_steps": 1190, "total_steps": 78105, "loss": 0.5935, "lr": 7.611061323774166e-07, "epoch": 0.07617950195249984, "percentage": 1.52, "elapsed_time": "0:02:42", "remaining_time": "2:54:43", "throughput": 23186.21, "total_tokens": 3760576} +{"current_steps": 1195, "total_steps": 78105, "loss": 0.5309, "lr": 7.64306746895404e-07, "epoch": 0.07649958389347673, "percentage": 1.53, "elapsed_time": "0:02:42", "remaining_time": "2:54:43", "throughput": 23185.83, "total_tokens": 3776576} +{"current_steps": 1200, "total_steps": 78105, "loss": 0.6443, "lr": 7.675073614133914e-07, "epoch": 0.07681966583445363, "percentage": 1.54, "elapsed_time": "0:02:43", "remaining_time": "2:54:42", "throughput": 23186.05, "total_tokens": 3792384} +{"current_steps": 1205, "total_steps": 78105, "loss": 0.6309, "lr": 7.707079759313789e-07, "epoch": 0.0771397477754305, "percentage": 1.54, "elapsed_time": "0:02:44", "remaining_time": "2:54:38", "throughput": 23182.52, "total_tokens": 3806592} +{"current_steps": 1210, "total_steps": 78105, "loss": 0.6041, "lr": 7.739085904493664e-07, "epoch": 0.0774598297164074, "percentage": 1.55, "elapsed_time": "0:02:44", "remaining_time": "2:54:37", "throughput": 23182.65, "total_tokens": 3822080} +{"current_steps": 1215, "total_steps": 78105, "loss": 0.5811, "lr": 7.771092049673538e-07, "epoch": 0.07777991165738429, "percentage": 1.56, "elapsed_time": "0:02:45", "remaining_time": "2:54:35", "throughput": 23181.01, "total_tokens": 3837120} +{"current_steps": 1220, "total_steps": 78105, "loss": 0.7375, "lr": 7.803098194853412e-07, "epoch": 0.07809999359836119, "percentage": 1.56, "elapsed_time": "0:02:46", "remaining_time": "2:54:34", "throughput": 23181.44, "total_tokens": 3852864} +{"current_steps": 1225, "total_steps": 78105, "loss": 0.5147, "lr": 7.835104340033287e-07, "epoch": 0.07842007553933807, "percentage": 1.57, "elapsed_time": "0:02:46", "remaining_time": "2:54:34", "throughput": 23183.18, "total_tokens": 3869184} +{"current_steps": 1230, "total_steps": 78105, "loss": 0.5631, "lr": 7.867110485213161e-07, "epoch": 0.07874015748031496, "percentage": 1.57, "elapsed_time": "0:02:47", "remaining_time": "2:54:33", "throughput": 23184.24, "total_tokens": 3885248} +{"current_steps": 1235, "total_steps": 78105, "loss": 0.4543, "lr": 7.899116630393036e-07, "epoch": 0.07906023942129185, "percentage": 1.58, "elapsed_time": "0:02:48", "remaining_time": "2:54:31", "throughput": 23183.89, "total_tokens": 3900416} +{"current_steps": 1240, "total_steps": 78105, "loss": 0.5628, "lr": 7.931122775572911e-07, "epoch": 0.07938032136226875, "percentage": 1.59, "elapsed_time": "0:02:48", "remaining_time": "2:54:30", "throughput": 23184.08, "total_tokens": 3916096} +{"current_steps": 1245, "total_steps": 78105, "loss": 0.6837, "lr": 7.963128920752785e-07, "epoch": 0.07970040330324563, "percentage": 1.59, "elapsed_time": "0:02:49", "remaining_time": "2:54:32", "throughput": 23187.2, "total_tokens": 3933312} +{"current_steps": 1250, "total_steps": 78105, "loss": 0.6801, "lr": 7.995135065932659e-07, "epoch": 0.08002048524422252, "percentage": 1.6, "elapsed_time": "0:02:50", "remaining_time": "2:54:31", "throughput": 23188.41, "total_tokens": 3949440} +{"current_steps": 1255, "total_steps": 78105, "loss": 0.6986, "lr": 8.027141211112534e-07, "epoch": 0.08034056718519941, "percentage": 1.61, "elapsed_time": "0:02:50", "remaining_time": "2:54:30", "throughput": 23189.01, "total_tokens": 3964992} +{"current_steps": 1260, "total_steps": 78105, "loss": 0.7041, "lr": 8.059147356292408e-07, "epoch": 0.0806606491261763, "percentage": 1.61, "elapsed_time": "0:02:51", "remaining_time": "2:54:30", "throughput": 23191.48, "total_tokens": 3981696} +{"current_steps": 1265, "total_steps": 78105, "loss": 0.6558, "lr": 8.091153501472283e-07, "epoch": 0.08098073106715319, "percentage": 1.62, "elapsed_time": "0:02:52", "remaining_time": "2:54:29", "throughput": 23192.03, "total_tokens": 3997248} +{"current_steps": 1270, "total_steps": 78105, "loss": 0.6925, "lr": 8.123159646652158e-07, "epoch": 0.08130081300813008, "percentage": 1.63, "elapsed_time": "0:02:52", "remaining_time": "2:54:26", "throughput": 23189.79, "total_tokens": 4011648} +{"current_steps": 1275, "total_steps": 78105, "loss": 0.5155, "lr": 8.155165791832032e-07, "epoch": 0.08162089494910697, "percentage": 1.63, "elapsed_time": "0:02:53", "remaining_time": "2:54:26", "throughput": 23191.0, "total_tokens": 4028160} +{"current_steps": 1280, "total_steps": 78105, "loss": 0.6299, "lr": 8.187171937011906e-07, "epoch": 0.08194097689008387, "percentage": 1.64, "elapsed_time": "0:02:54", "remaining_time": "2:54:25", "throughput": 23190.46, "total_tokens": 4043584} +{"current_steps": 1285, "total_steps": 78105, "loss": 0.7085, "lr": 8.219178082191781e-07, "epoch": 0.08226105883106075, "percentage": 1.65, "elapsed_time": "0:02:55", "remaining_time": "2:54:24", "throughput": 23190.67, "total_tokens": 4059456} +{"current_steps": 1290, "total_steps": 78105, "loss": 0.5905, "lr": 8.251184227371655e-07, "epoch": 0.08258114077203764, "percentage": 1.65, "elapsed_time": "0:02:55", "remaining_time": "2:54:25", "throughput": 23192.78, "total_tokens": 4076096} +{"current_steps": 1295, "total_steps": 78105, "loss": 0.6022, "lr": 8.283190372551531e-07, "epoch": 0.08290122271301453, "percentage": 1.66, "elapsed_time": "0:02:56", "remaining_time": "2:54:27", "throughput": 23196.5, "total_tokens": 4093568} +{"current_steps": 1300, "total_steps": 78105, "loss": 0.6345, "lr": 8.315196517731406e-07, "epoch": 0.08322130465399143, "percentage": 1.66, "elapsed_time": "0:02:57", "remaining_time": "2:54:25", "throughput": 23196.57, "total_tokens": 4108864} +{"current_steps": 1305, "total_steps": 78105, "loss": 0.6653, "lr": 8.34720266291128e-07, "epoch": 0.0835413865949683, "percentage": 1.67, "elapsed_time": "0:02:57", "remaining_time": "2:54:23", "throughput": 23195.9, "total_tokens": 4124224} +{"current_steps": 1310, "total_steps": 78105, "loss": 0.5601, "lr": 8.379208808091154e-07, "epoch": 0.0838614685359452, "percentage": 1.68, "elapsed_time": "0:02:58", "remaining_time": "2:54:20", "throughput": 23195.2, "total_tokens": 4139008} +{"current_steps": 1315, "total_steps": 78105, "loss": 0.5444, "lr": 8.411214953271029e-07, "epoch": 0.08418155047692209, "percentage": 1.68, "elapsed_time": "0:02:59", "remaining_time": "2:54:20", "throughput": 23196.14, "total_tokens": 4155008} +{"current_steps": 1320, "total_steps": 78105, "loss": 0.5153, "lr": 8.443221098450903e-07, "epoch": 0.08450163241789899, "percentage": 1.69, "elapsed_time": "0:02:59", "remaining_time": "2:54:21", "throughput": 23200.2, "total_tokens": 4172544} +{"current_steps": 1325, "total_steps": 78105, "loss": 0.7105, "lr": 8.475227243630778e-07, "epoch": 0.08482171435887587, "percentage": 1.7, "elapsed_time": "0:03:00", "remaining_time": "2:54:20", "throughput": 23201.21, "total_tokens": 4188416} +{"current_steps": 1330, "total_steps": 78105, "loss": 0.5091, "lr": 8.507233388810653e-07, "epoch": 0.08514179629985276, "percentage": 1.7, "elapsed_time": "0:03:01", "remaining_time": "2:54:17", "throughput": 23198.64, "total_tokens": 4202560} +{"current_steps": 1335, "total_steps": 78105, "loss": 0.5489, "lr": 8.539239533990527e-07, "epoch": 0.08546187824082965, "percentage": 1.71, "elapsed_time": "0:03:01", "remaining_time": "2:54:18", "throughput": 23200.21, "total_tokens": 4219392} +{"current_steps": 1340, "total_steps": 78105, "loss": 0.5204, "lr": 8.571245679170401e-07, "epoch": 0.08578196018180655, "percentage": 1.72, "elapsed_time": "0:03:02", "remaining_time": "2:54:18", "throughput": 23200.29, "total_tokens": 4235328} +{"current_steps": 1345, "total_steps": 78105, "loss": 0.5803, "lr": 8.603251824350276e-07, "epoch": 0.08610204212278343, "percentage": 1.72, "elapsed_time": "0:03:03", "remaining_time": "2:54:15", "throughput": 23199.72, "total_tokens": 4250368} +{"current_steps": 1350, "total_steps": 78105, "loss": 0.5222, "lr": 8.63525796953015e-07, "epoch": 0.08642212406376032, "percentage": 1.73, "elapsed_time": "0:03:03", "remaining_time": "2:54:14", "throughput": 23199.83, "total_tokens": 4265856} +{"current_steps": 1355, "total_steps": 78105, "loss": 0.6881, "lr": 8.667264114710025e-07, "epoch": 0.08674220600473721, "percentage": 1.73, "elapsed_time": "0:03:04", "remaining_time": "2:54:13", "throughput": 23199.89, "total_tokens": 4281792} +{"current_steps": 1360, "total_steps": 78105, "loss": 0.624, "lr": 8.6992702598899e-07, "epoch": 0.0870622879457141, "percentage": 1.74, "elapsed_time": "0:03:05", "remaining_time": "2:54:12", "throughput": 23199.61, "total_tokens": 4297088} +{"current_steps": 1365, "total_steps": 78105, "loss": 0.549, "lr": 8.731276405069774e-07, "epoch": 0.087382369886691, "percentage": 1.75, "elapsed_time": "0:03:05", "remaining_time": "2:54:10", "throughput": 23198.03, "total_tokens": 4312192} +{"current_steps": 1370, "total_steps": 78105, "loss": 0.5303, "lr": 8.763282550249648e-07, "epoch": 0.08770245182766788, "percentage": 1.75, "elapsed_time": "0:03:06", "remaining_time": "2:54:07", "throughput": 23196.59, "total_tokens": 4326720} +{"current_steps": 1375, "total_steps": 78105, "loss": 0.7837, "lr": 8.795288695429523e-07, "epoch": 0.08802253376864477, "percentage": 1.76, "elapsed_time": "0:03:07", "remaining_time": "2:54:05", "throughput": 23195.09, "total_tokens": 4341760} +{"current_steps": 1380, "total_steps": 78105, "loss": 0.6399, "lr": 8.827294840609397e-07, "epoch": 0.08834261570962167, "percentage": 1.77, "elapsed_time": "0:03:07", "remaining_time": "2:54:05", "throughput": 23195.78, "total_tokens": 4357760} +{"current_steps": 1385, "total_steps": 78105, "loss": 0.6436, "lr": 8.859300985789272e-07, "epoch": 0.08866269765059856, "percentage": 1.77, "elapsed_time": "0:03:08", "remaining_time": "2:54:05", "throughput": 23195.79, "total_tokens": 4373824} +{"current_steps": 1390, "total_steps": 78105, "loss": 0.4859, "lr": 8.891307130969147e-07, "epoch": 0.08898277959157544, "percentage": 1.78, "elapsed_time": "0:03:09", "remaining_time": "2:54:03", "throughput": 23195.15, "total_tokens": 4388992} +{"current_steps": 1395, "total_steps": 78105, "loss": 0.5303, "lr": 8.923313276149021e-07, "epoch": 0.08930286153255233, "percentage": 1.79, "elapsed_time": "0:03:09", "remaining_time": "2:54:01", "throughput": 23194.04, "total_tokens": 4404288} +{"current_steps": 1400, "total_steps": 78105, "loss": 0.5821, "lr": 8.955319421328895e-07, "epoch": 0.08962294347352923, "percentage": 1.79, "elapsed_time": "0:03:10", "remaining_time": "2:54:00", "throughput": 23194.94, "total_tokens": 4419840} +{"current_steps": 1405, "total_steps": 78105, "loss": 0.5764, "lr": 8.98732556650877e-07, "epoch": 0.08994302541450612, "percentage": 1.8, "elapsed_time": "0:03:11", "remaining_time": "2:53:58", "throughput": 23194.76, "total_tokens": 4435200} +{"current_steps": 1410, "total_steps": 78105, "loss": 0.6502, "lr": 9.019331711688644e-07, "epoch": 0.090263107355483, "percentage": 1.81, "elapsed_time": "0:03:11", "remaining_time": "2:53:56", "throughput": 23193.71, "total_tokens": 4450368} +{"current_steps": 1415, "total_steps": 78105, "loss": 0.494, "lr": 9.05133785686852e-07, "epoch": 0.09058318929645989, "percentage": 1.81, "elapsed_time": "0:03:12", "remaining_time": "2:53:55", "throughput": 23194.23, "total_tokens": 4466048} +{"current_steps": 1420, "total_steps": 78105, "loss": 0.5941, "lr": 9.083344002048395e-07, "epoch": 0.09090327123743679, "percentage": 1.82, "elapsed_time": "0:03:13", "remaining_time": "2:53:54", "throughput": 23195.02, "total_tokens": 4481920} +{"current_steps": 1425, "total_steps": 78105, "loss": 0.6146, "lr": 9.115350147228269e-07, "epoch": 0.09122335317841368, "percentage": 1.82, "elapsed_time": "0:03:13", "remaining_time": "2:53:54", "throughput": 23196.69, "total_tokens": 4498112} +{"current_steps": 1430, "total_steps": 78105, "loss": 0.4945, "lr": 9.147356292408143e-07, "epoch": 0.09154343511939056, "percentage": 1.83, "elapsed_time": "0:03:14", "remaining_time": "2:53:56", "throughput": 23199.93, "total_tokens": 4515648} +{"current_steps": 1435, "total_steps": 78105, "loss": 0.6525, "lr": 9.179362437588018e-07, "epoch": 0.09186351706036745, "percentage": 1.84, "elapsed_time": "0:03:15", "remaining_time": "2:53:56", "throughput": 23200.74, "total_tokens": 4531840} +{"current_steps": 1440, "total_steps": 78105, "loss": 0.5896, "lr": 9.211368582767892e-07, "epoch": 0.09218359900134435, "percentage": 1.84, "elapsed_time": "0:03:16", "remaining_time": "2:53:55", "throughput": 23200.92, "total_tokens": 4547456} +{"current_steps": 1445, "total_steps": 78105, "loss": 0.5707, "lr": 9.243374727947767e-07, "epoch": 0.09250368094232124, "percentage": 1.85, "elapsed_time": "0:03:16", "remaining_time": "2:53:54", "throughput": 23201.76, "total_tokens": 4563328} +{"current_steps": 1450, "total_steps": 78105, "loss": 0.5985, "lr": 9.275380873127642e-07, "epoch": 0.09282376288329812, "percentage": 1.86, "elapsed_time": "0:03:17", "remaining_time": "2:53:54", "throughput": 23201.8, "total_tokens": 4579392} +{"current_steps": 1455, "total_steps": 78105, "loss": 0.5964, "lr": 9.307387018307516e-07, "epoch": 0.09314384482427501, "percentage": 1.86, "elapsed_time": "0:03:18", "remaining_time": "2:53:53", "throughput": 23202.8, "total_tokens": 4595584} +{"current_steps": 1460, "total_steps": 78105, "loss": 0.5431, "lr": 9.33939316348739e-07, "epoch": 0.0934639267652519, "percentage": 1.87, "elapsed_time": "0:03:18", "remaining_time": "2:53:51", "throughput": 23200.4, "total_tokens": 4610112} +{"current_steps": 1465, "total_steps": 78105, "loss": 0.6868, "lr": 9.371399308667265e-07, "epoch": 0.0937840087062288, "percentage": 1.88, "elapsed_time": "0:03:19", "remaining_time": "2:53:51", "throughput": 23201.91, "total_tokens": 4626432} +{"current_steps": 1470, "total_steps": 78105, "loss": 0.7079, "lr": 9.403405453847139e-07, "epoch": 0.09410409064720568, "percentage": 1.88, "elapsed_time": "0:03:20", "remaining_time": "2:53:49", "throughput": 23201.6, "total_tokens": 4641792} +{"current_steps": 1475, "total_steps": 78105, "loss": 0.4159, "lr": 9.435411599027014e-07, "epoch": 0.09442417258818257, "percentage": 1.89, "elapsed_time": "0:03:20", "remaining_time": "2:53:48", "throughput": 23200.42, "total_tokens": 4656896} +{"current_steps": 1480, "total_steps": 78105, "loss": 0.6569, "lr": 9.467417744206889e-07, "epoch": 0.09474425452915947, "percentage": 1.89, "elapsed_time": "0:03:21", "remaining_time": "2:53:49", "throughput": 23200.83, "total_tokens": 4673472} +{"current_steps": 1485, "total_steps": 78105, "loss": 0.4387, "lr": 9.499423889386763e-07, "epoch": 0.09506433647013636, "percentage": 1.9, "elapsed_time": "0:03:22", "remaining_time": "2:53:47", "throughput": 23200.31, "total_tokens": 4688896} +{"current_steps": 1490, "total_steps": 78105, "loss": 0.7065, "lr": 9.531430034566637e-07, "epoch": 0.09538441841111324, "percentage": 1.91, "elapsed_time": "0:03:22", "remaining_time": "2:53:47", "throughput": 23199.98, "total_tokens": 4704576} +{"current_steps": 1495, "total_steps": 78105, "loss": 0.5807, "lr": 9.563436179746512e-07, "epoch": 0.09570450035209013, "percentage": 1.91, "elapsed_time": "0:03:23", "remaining_time": "2:53:44", "throughput": 23198.39, "total_tokens": 4719040} +{"current_steps": 1500, "total_steps": 78105, "loss": 0.5473, "lr": 9.595442324926386e-07, "epoch": 0.09602458229306703, "percentage": 1.92, "elapsed_time": "0:03:24", "remaining_time": "2:53:41", "throughput": 23197.57, "total_tokens": 4733696} +{"current_steps": 1505, "total_steps": 78105, "loss": 0.5672, "lr": 9.62744847010626e-07, "epoch": 0.09634466423404392, "percentage": 1.93, "elapsed_time": "0:03:24", "remaining_time": "2:53:39", "throughput": 23197.54, "total_tokens": 4748992} +{"current_steps": 1510, "total_steps": 78105, "loss": 0.703, "lr": 9.659454615286136e-07, "epoch": 0.0966647461750208, "percentage": 1.93, "elapsed_time": "0:03:25", "remaining_time": "2:53:39", "throughput": 23198.4, "total_tokens": 4764992} +{"current_steps": 1515, "total_steps": 78105, "loss": 0.6398, "lr": 9.69146076046601e-07, "epoch": 0.09698482811599769, "percentage": 1.94, "elapsed_time": "0:03:26", "remaining_time": "2:53:37", "throughput": 23197.81, "total_tokens": 4780352} +{"current_steps": 1520, "total_steps": 78105, "loss": 0.5853, "lr": 9.723466905645885e-07, "epoch": 0.09730491005697459, "percentage": 1.95, "elapsed_time": "0:03:26", "remaining_time": "2:53:37", "throughput": 23197.55, "total_tokens": 4796224} +{"current_steps": 1525, "total_steps": 78105, "loss": 0.5472, "lr": 9.75547305082576e-07, "epoch": 0.09762499199795148, "percentage": 1.95, "elapsed_time": "0:03:27", "remaining_time": "2:53:36", "throughput": 23197.2, "total_tokens": 4811840} +{"current_steps": 1530, "total_steps": 78105, "loss": 0.5887, "lr": 9.787479196005634e-07, "epoch": 0.09794507393892836, "percentage": 1.96, "elapsed_time": "0:03:28", "remaining_time": "2:53:34", "throughput": 23195.43, "total_tokens": 4826432} +{"current_steps": 1535, "total_steps": 78105, "loss": 0.4231, "lr": 9.81948534118551e-07, "epoch": 0.09826515587990525, "percentage": 1.97, "elapsed_time": "0:03:28", "remaining_time": "2:53:32", "throughput": 23195.28, "total_tokens": 4841920} +{"current_steps": 1540, "total_steps": 78105, "loss": 0.6139, "lr": 9.851491486365384e-07, "epoch": 0.09858523782088215, "percentage": 1.97, "elapsed_time": "0:03:29", "remaining_time": "2:53:31", "throughput": 23195.72, "total_tokens": 4857536} +{"current_steps": 1545, "total_steps": 78105, "loss": 0.7112, "lr": 9.883497631545258e-07, "epoch": 0.09890531976185904, "percentage": 1.98, "elapsed_time": "0:03:30", "remaining_time": "2:53:30", "throughput": 23196.15, "total_tokens": 4873408} +{"current_steps": 1550, "total_steps": 78105, "loss": 0.6346, "lr": 9.91550377672513e-07, "epoch": 0.09922540170283592, "percentage": 1.98, "elapsed_time": "0:03:30", "remaining_time": "2:53:30", "throughput": 23196.85, "total_tokens": 4889536} +{"current_steps": 1555, "total_steps": 78105, "loss": 0.5943, "lr": 9.947509921905006e-07, "epoch": 0.09954548364381281, "percentage": 1.99, "elapsed_time": "0:03:31", "remaining_time": "2:53:28", "throughput": 23195.86, "total_tokens": 4904448} +{"current_steps": 1560, "total_steps": 78105, "loss": 0.6641, "lr": 9.97951606708488e-07, "epoch": 0.0998655655847897, "percentage": 2.0, "elapsed_time": "0:03:32", "remaining_time": "2:53:27", "throughput": 23194.76, "total_tokens": 4919616} +{"current_steps": 1565, "total_steps": 78105, "loss": 0.569, "lr": 1.0011522212264755e-06, "epoch": 0.1001856475257666, "percentage": 2.0, "elapsed_time": "0:03:32", "remaining_time": "2:53:24", "throughput": 23192.8, "total_tokens": 4934144} +{"current_steps": 1570, "total_steps": 78105, "loss": 0.5557, "lr": 1.004352835744463e-06, "epoch": 0.1005057294667435, "percentage": 2.01, "elapsed_time": "0:03:33", "remaining_time": "2:53:24", "throughput": 23193.26, "total_tokens": 4950272} +{"current_steps": 1575, "total_steps": 78105, "loss": 0.6099, "lr": 1.0075534502624504e-06, "epoch": 0.10082581140772037, "percentage": 2.02, "elapsed_time": "0:03:34", "remaining_time": "2:53:22", "throughput": 23191.28, "total_tokens": 4965056} +{"current_steps": 1580, "total_steps": 78105, "loss": 0.5355, "lr": 1.0107540647804379e-06, "epoch": 0.10114589334869727, "percentage": 2.02, "elapsed_time": "0:03:34", "remaining_time": "2:53:21", "throughput": 23189.53, "total_tokens": 4980160} +{"current_steps": 1585, "total_steps": 78105, "loss": 0.4531, "lr": 1.0139546792984254e-06, "epoch": 0.10146597528967416, "percentage": 2.03, "elapsed_time": "0:03:35", "remaining_time": "2:53:22", "throughput": 23190.8, "total_tokens": 4996992} +{"current_steps": 1590, "total_steps": 78105, "loss": 0.5276, "lr": 1.0171552938164128e-06, "epoch": 0.10178605723065105, "percentage": 2.04, "elapsed_time": "0:03:36", "remaining_time": "2:53:21", "throughput": 23191.17, "total_tokens": 5012608} +{"current_steps": 1595, "total_steps": 78105, "loss": 0.4634, "lr": 1.0203559083344003e-06, "epoch": 0.10210613917162793, "percentage": 2.04, "elapsed_time": "0:03:36", "remaining_time": "2:53:19", "throughput": 23190.57, "total_tokens": 5027840} +{"current_steps": 1600, "total_steps": 78105, "loss": 0.6232, "lr": 1.0235565228523878e-06, "epoch": 0.10242622111260483, "percentage": 2.05, "elapsed_time": "0:03:37", "remaining_time": "2:53:17", "throughput": 23189.85, "total_tokens": 5042752} +{"current_steps": 1605, "total_steps": 78105, "loss": 0.5789, "lr": 1.0267571373703752e-06, "epoch": 0.10274630305358172, "percentage": 2.05, "elapsed_time": "0:03:38", "remaining_time": "2:53:17", "throughput": 23190.39, "total_tokens": 5058624} +{"current_steps": 1610, "total_steps": 78105, "loss": 0.6365, "lr": 1.0299577518883625e-06, "epoch": 0.10306638499455861, "percentage": 2.06, "elapsed_time": "0:03:38", "remaining_time": "2:53:16", "throughput": 23191.95, "total_tokens": 5075008} +{"current_steps": 1615, "total_steps": 78105, "loss": 0.5593, "lr": 1.0331583664063502e-06, "epoch": 0.10338646693553549, "percentage": 2.07, "elapsed_time": "0:03:39", "remaining_time": "2:53:16", "throughput": 23192.51, "total_tokens": 5090880} +{"current_steps": 1620, "total_steps": 78105, "loss": 0.5551, "lr": 1.0363589809243376e-06, "epoch": 0.10370654887651239, "percentage": 2.07, "elapsed_time": "0:03:40", "remaining_time": "2:53:15", "throughput": 23192.4, "total_tokens": 5106816} +{"current_steps": 1625, "total_steps": 78105, "loss": 0.4576, "lr": 1.039559595442325e-06, "epoch": 0.10402663081748928, "percentage": 2.08, "elapsed_time": "0:03:40", "remaining_time": "2:53:17", "throughput": 23194.06, "total_tokens": 5123904} +{"current_steps": 1630, "total_steps": 78105, "loss": 0.5034, "lr": 1.0427602099603126e-06, "epoch": 0.10434671275846617, "percentage": 2.09, "elapsed_time": "0:03:41", "remaining_time": "2:53:17", "throughput": 23194.24, "total_tokens": 5140160} +{"current_steps": 1635, "total_steps": 78105, "loss": 0.4584, "lr": 1.0459608244783e-06, "epoch": 0.10466679469944305, "percentage": 2.09, "elapsed_time": "0:03:42", "remaining_time": "2:53:17", "throughput": 23193.73, "total_tokens": 5156288} +{"current_steps": 1640, "total_steps": 78105, "loss": 0.5812, "lr": 1.0491614389962873e-06, "epoch": 0.10498687664041995, "percentage": 2.1, "elapsed_time": "0:03:42", "remaining_time": "2:53:16", "throughput": 23193.8, "total_tokens": 5171776} +{"current_steps": 1645, "total_steps": 78105, "loss": 0.7, "lr": 1.0523620535142747e-06, "epoch": 0.10530695858139684, "percentage": 2.11, "elapsed_time": "0:03:43", "remaining_time": "2:53:18", "throughput": 23196.43, "total_tokens": 5189504} +{"current_steps": 1650, "total_steps": 78105, "loss": 0.5602, "lr": 1.0555626680322622e-06, "epoch": 0.10562704052237373, "percentage": 2.11, "elapsed_time": "0:03:44", "remaining_time": "2:53:17", "throughput": 23196.81, "total_tokens": 5205056} +{"current_steps": 1655, "total_steps": 78105, "loss": 0.6155, "lr": 1.0587632825502497e-06, "epoch": 0.10594712246335061, "percentage": 2.12, "elapsed_time": "0:03:45", "remaining_time": "2:53:18", "throughput": 23199.82, "total_tokens": 5222656} +{"current_steps": 1660, "total_steps": 78105, "loss": 0.5936, "lr": 1.0619638970682371e-06, "epoch": 0.1062672044043275, "percentage": 2.13, "elapsed_time": "0:03:45", "remaining_time": "2:53:18", "throughput": 23200.34, "total_tokens": 5238848} +{"current_steps": 1665, "total_steps": 78105, "loss": 0.5589, "lr": 1.0651645115862246e-06, "epoch": 0.1065872863453044, "percentage": 2.13, "elapsed_time": "0:03:46", "remaining_time": "2:53:19", "throughput": 23201.28, "total_tokens": 5255296} +{"current_steps": 1670, "total_steps": 78105, "loss": 0.7539, "lr": 1.068365126104212e-06, "epoch": 0.10690736828628129, "percentage": 2.14, "elapsed_time": "0:03:47", "remaining_time": "2:53:17", "throughput": 23200.09, "total_tokens": 5270336} +{"current_steps": 1675, "total_steps": 78105, "loss": 0.5115, "lr": 1.0715657406221996e-06, "epoch": 0.10722745022725817, "percentage": 2.14, "elapsed_time": "0:03:47", "remaining_time": "2:53:17", "throughput": 23200.71, "total_tokens": 5286720} +{"current_steps": 1680, "total_steps": 78105, "loss": 0.621, "lr": 1.074766355140187e-06, "epoch": 0.10754753216823507, "percentage": 2.15, "elapsed_time": "0:03:48", "remaining_time": "2:53:15", "throughput": 23199.22, "total_tokens": 5301632} +{"current_steps": 1685, "total_steps": 78105, "loss": 0.3955, "lr": 1.0779669696581745e-06, "epoch": 0.10786761410921196, "percentage": 2.16, "elapsed_time": "0:03:49", "remaining_time": "2:53:14", "throughput": 23197.87, "total_tokens": 5316544} +{"current_steps": 1690, "total_steps": 78105, "loss": 0.6051, "lr": 1.081167584176162e-06, "epoch": 0.10818769605018885, "percentage": 2.16, "elapsed_time": "0:03:49", "remaining_time": "2:53:13", "throughput": 23197.77, "total_tokens": 5332544} +{"current_steps": 1695, "total_steps": 78105, "loss": 0.4972, "lr": 1.0843681986941494e-06, "epoch": 0.10850777799116573, "percentage": 2.17, "elapsed_time": "0:03:50", "remaining_time": "2:53:12", "throughput": 23197.66, "total_tokens": 5348096} +{"current_steps": 1700, "total_steps": 78105, "loss": 0.6385, "lr": 1.0875688132121367e-06, "epoch": 0.10882785993214263, "percentage": 2.18, "elapsed_time": "0:03:51", "remaining_time": "2:53:10", "throughput": 23195.72, "total_tokens": 5362368} +{"current_steps": 1705, "total_steps": 78105, "loss": 0.5557, "lr": 1.0907694277301241e-06, "epoch": 0.10914794187311952, "percentage": 2.18, "elapsed_time": "0:03:51", "remaining_time": "2:53:09", "throughput": 23196.08, "total_tokens": 5378176} +{"current_steps": 1710, "total_steps": 78105, "loss": 0.5577, "lr": 1.0939700422481116e-06, "epoch": 0.10946802381409641, "percentage": 2.19, "elapsed_time": "0:03:52", "remaining_time": "2:53:09", "throughput": 23197.53, "total_tokens": 5394752} +{"current_steps": 1715, "total_steps": 78105, "loss": 0.6704, "lr": 1.0971706567660993e-06, "epoch": 0.10978810575507329, "percentage": 2.2, "elapsed_time": "0:03:53", "remaining_time": "2:53:10", "throughput": 23200.4, "total_tokens": 5412160} +{"current_steps": 1720, "total_steps": 78105, "loss": 0.5459, "lr": 1.1003712712840868e-06, "epoch": 0.11010818769605019, "percentage": 2.2, "elapsed_time": "0:03:53", "remaining_time": "2:53:09", "throughput": 23200.4, "total_tokens": 5427840} +{"current_steps": 1725, "total_steps": 78105, "loss": 0.4625, "lr": 1.1035718858020742e-06, "epoch": 0.11042826963702708, "percentage": 2.21, "elapsed_time": "0:03:54", "remaining_time": "2:53:09", "throughput": 23201.49, "total_tokens": 5444224} +{"current_steps": 1730, "total_steps": 78105, "loss": 0.6443, "lr": 1.1067725003200615e-06, "epoch": 0.11074835157800397, "percentage": 2.21, "elapsed_time": "0:03:55", "remaining_time": "2:53:08", "throughput": 23201.22, "total_tokens": 5459648} +{"current_steps": 1735, "total_steps": 78105, "loss": 0.7315, "lr": 1.109973114838049e-06, "epoch": 0.11106843351898085, "percentage": 2.22, "elapsed_time": "0:03:55", "remaining_time": "2:53:07", "throughput": 23200.58, "total_tokens": 5475008} +{"current_steps": 1740, "total_steps": 78105, "loss": 0.5923, "lr": 1.1131737293560364e-06, "epoch": 0.11138851545995775, "percentage": 2.23, "elapsed_time": "0:03:56", "remaining_time": "2:53:07", "throughput": 23201.13, "total_tokens": 5491456} +{"current_steps": 1745, "total_steps": 78105, "loss": 0.5267, "lr": 1.1163743438740239e-06, "epoch": 0.11170859740093464, "percentage": 2.23, "elapsed_time": "0:03:57", "remaining_time": "2:53:07", "throughput": 23201.61, "total_tokens": 5507520} +{"current_steps": 1750, "total_steps": 78105, "loss": 0.6325, "lr": 1.1195749583920113e-06, "epoch": 0.11202867934191153, "percentage": 2.24, "elapsed_time": "0:03:58", "remaining_time": "2:53:06", "throughput": 23201.53, "total_tokens": 5523072} +{"current_steps": 1755, "total_steps": 78105, "loss": 0.5338, "lr": 1.1227755729099988e-06, "epoch": 0.11234876128288843, "percentage": 2.25, "elapsed_time": "0:03:58", "remaining_time": "2:53:05", "throughput": 23200.61, "total_tokens": 5538304} +{"current_steps": 1760, "total_steps": 78105, "loss": 0.4734, "lr": 1.1259761874279863e-06, "epoch": 0.1126688432238653, "percentage": 2.25, "elapsed_time": "0:03:59", "remaining_time": "2:53:04", "throughput": 23200.66, "total_tokens": 5553920} +{"current_steps": 1765, "total_steps": 78105, "loss": 0.4812, "lr": 1.1291768019459737e-06, "epoch": 0.1129889251648422, "percentage": 2.26, "elapsed_time": "0:04:00", "remaining_time": "2:53:02", "throughput": 23200.11, "total_tokens": 5569344} +{"current_steps": 1770, "total_steps": 78105, "loss": 0.5413, "lr": 1.1323774164639612e-06, "epoch": 0.11330900710581909, "percentage": 2.27, "elapsed_time": "0:04:00", "remaining_time": "2:53:03", "throughput": 23201.32, "total_tokens": 5585856} +{"current_steps": 1775, "total_steps": 78105, "loss": 0.5719, "lr": 1.1355780309819487e-06, "epoch": 0.11362908904679599, "percentage": 2.27, "elapsed_time": "0:04:01", "remaining_time": "2:53:05", "throughput": 23204.09, "total_tokens": 5603840} +{"current_steps": 1780, "total_steps": 78105, "loss": 0.5216, "lr": 1.1387786454999361e-06, "epoch": 0.11394917098777287, "percentage": 2.28, "elapsed_time": "0:04:02", "remaining_time": "2:53:05", "throughput": 23204.72, "total_tokens": 5620352} +{"current_steps": 1785, "total_steps": 78105, "loss": 0.5749, "lr": 1.1419792600179236e-06, "epoch": 0.11426925292874976, "percentage": 2.29, "elapsed_time": "0:04:02", "remaining_time": "2:53:05", "throughput": 23204.76, "total_tokens": 5636544} +{"current_steps": 1790, "total_steps": 78105, "loss": 0.5669, "lr": 1.1451798745359109e-06, "epoch": 0.11458933486972665, "percentage": 2.29, "elapsed_time": "0:04:03", "remaining_time": "2:53:05", "throughput": 23205.38, "total_tokens": 5652672} +{"current_steps": 1795, "total_steps": 78105, "loss": 0.4952, "lr": 1.1483804890538983e-06, "epoch": 0.11490941681070355, "percentage": 2.3, "elapsed_time": "0:04:04", "remaining_time": "2:53:04", "throughput": 23205.15, "total_tokens": 5668224} +{"current_steps": 1800, "total_steps": 78105, "loss": 0.4601, "lr": 1.1515811035718858e-06, "epoch": 0.11522949875168043, "percentage": 2.3, "elapsed_time": "0:04:04", "remaining_time": "2:53:03", "throughput": 23206.4, "total_tokens": 5684480} +{"current_steps": 1805, "total_steps": 78105, "loss": 0.4491, "lr": 1.1547817180898733e-06, "epoch": 0.11554958069265732, "percentage": 2.31, "elapsed_time": "0:04:05", "remaining_time": "2:53:04", "throughput": 23207.1, "total_tokens": 5700864} +{"current_steps": 1810, "total_steps": 78105, "loss": 0.5962, "lr": 1.1579823326078607e-06, "epoch": 0.11586966263363421, "percentage": 2.32, "elapsed_time": "0:04:06", "remaining_time": "2:53:02", "throughput": 23206.88, "total_tokens": 5716224} +{"current_steps": 1815, "total_steps": 78105, "loss": 0.4912, "lr": 1.1611829471258482e-06, "epoch": 0.1161897445746111, "percentage": 2.32, "elapsed_time": "0:04:07", "remaining_time": "2:53:03", "throughput": 23208.37, "total_tokens": 5733056} +{"current_steps": 1820, "total_steps": 78105, "loss": 0.6931, "lr": 1.1643835616438357e-06, "epoch": 0.11650982651558799, "percentage": 2.33, "elapsed_time": "0:04:07", "remaining_time": "2:53:02", "throughput": 23208.85, "total_tokens": 5749120} +{"current_steps": 1825, "total_steps": 78105, "loss": 0.6725, "lr": 1.1675841761618231e-06, "epoch": 0.11682990845656488, "percentage": 2.34, "elapsed_time": "0:04:08", "remaining_time": "2:53:00", "throughput": 23207.86, "total_tokens": 5763968} +{"current_steps": 1830, "total_steps": 78105, "loss": 0.602, "lr": 1.1707847906798106e-06, "epoch": 0.11714999039754177, "percentage": 2.34, "elapsed_time": "0:04:09", "remaining_time": "2:52:59", "throughput": 23207.86, "total_tokens": 5779520} +{"current_steps": 1835, "total_steps": 78105, "loss": 0.5777, "lr": 1.173985405197798e-06, "epoch": 0.11747007233851867, "percentage": 2.35, "elapsed_time": "0:04:09", "remaining_time": "2:53:03", "throughput": 23211.71, "total_tokens": 5798848} +{"current_steps": 1840, "total_steps": 78105, "loss": 0.4797, "lr": 1.1771860197157855e-06, "epoch": 0.11779015427949555, "percentage": 2.36, "elapsed_time": "0:04:10", "remaining_time": "2:53:02", "throughput": 23211.18, "total_tokens": 5813952} +{"current_steps": 1845, "total_steps": 78105, "loss": 0.427, "lr": 1.180386634233773e-06, "epoch": 0.11811023622047244, "percentage": 2.36, "elapsed_time": "0:04:11", "remaining_time": "2:53:01", "throughput": 23211.55, "total_tokens": 5830016} +{"current_steps": 1850, "total_steps": 78105, "loss": 0.5708, "lr": 1.1835872487517603e-06, "epoch": 0.11843031816144933, "percentage": 2.37, "elapsed_time": "0:04:11", "remaining_time": "2:53:00", "throughput": 23210.77, "total_tokens": 5845312} +{"current_steps": 1855, "total_steps": 78105, "loss": 0.6263, "lr": 1.186787863269748e-06, "epoch": 0.11875040010242623, "percentage": 2.38, "elapsed_time": "0:04:12", "remaining_time": "2:52:59", "throughput": 23210.29, "total_tokens": 5860672} +{"current_steps": 1860, "total_steps": 78105, "loss": 0.5924, "lr": 1.1899884777877354e-06, "epoch": 0.1190704820434031, "percentage": 2.38, "elapsed_time": "0:04:13", "remaining_time": "2:52:57", "throughput": 23209.29, "total_tokens": 5875776} +{"current_steps": 1865, "total_steps": 78105, "loss": 0.3965, "lr": 1.1931890923057229e-06, "epoch": 0.11939056398438, "percentage": 2.39, "elapsed_time": "0:04:13", "remaining_time": "2:52:56", "throughput": 23209.29, "total_tokens": 5891200} +{"current_steps": 1870, "total_steps": 78105, "loss": 0.4818, "lr": 1.1963897068237103e-06, "epoch": 0.11971064592535689, "percentage": 2.39, "elapsed_time": "0:04:14", "remaining_time": "2:52:55", "throughput": 23209.2, "total_tokens": 5907008} +{"current_steps": 1875, "total_steps": 78105, "loss": 0.4941, "lr": 1.1995903213416978e-06, "epoch": 0.12003072786633379, "percentage": 2.4, "elapsed_time": "0:04:15", "remaining_time": "2:52:55", "throughput": 23209.33, "total_tokens": 5923264} +{"current_steps": 1880, "total_steps": 78105, "loss": 0.5121, "lr": 1.202790935859685e-06, "epoch": 0.12035080980731067, "percentage": 2.41, "elapsed_time": "0:04:15", "remaining_time": "2:52:55", "throughput": 23210.3, "total_tokens": 5939648} +{"current_steps": 1885, "total_steps": 78105, "loss": 0.4495, "lr": 1.2059915503776725e-06, "epoch": 0.12067089174828756, "percentage": 2.41, "elapsed_time": "0:04:16", "remaining_time": "2:52:54", "throughput": 23211.28, "total_tokens": 5955520} +{"current_steps": 1890, "total_steps": 78105, "loss": 0.5622, "lr": 1.20919216489566e-06, "epoch": 0.12099097368926445, "percentage": 2.42, "elapsed_time": "0:04:17", "remaining_time": "2:52:53", "throughput": 23211.04, "total_tokens": 5971072} +{"current_steps": 1895, "total_steps": 78105, "loss": 0.605, "lr": 1.2123927794136475e-06, "epoch": 0.12131105563024135, "percentage": 2.43, "elapsed_time": "0:04:17", "remaining_time": "2:52:53", "throughput": 23211.07, "total_tokens": 5987264} +{"current_steps": 1900, "total_steps": 78105, "loss": 0.634, "lr": 1.215593393931635e-06, "epoch": 0.12163113757121823, "percentage": 2.43, "elapsed_time": "0:04:18", "remaining_time": "2:52:52", "throughput": 23210.89, "total_tokens": 6002880} +{"current_steps": 1905, "total_steps": 78105, "loss": 0.5494, "lr": 1.2187940084496224e-06, "epoch": 0.12195121951219512, "percentage": 2.44, "elapsed_time": "0:04:19", "remaining_time": "2:52:53", "throughput": 23212.26, "total_tokens": 6019520} +{"current_steps": 1910, "total_steps": 78105, "loss": 0.6188, "lr": 1.2219946229676099e-06, "epoch": 0.12227130145317201, "percentage": 2.45, "elapsed_time": "0:04:20", "remaining_time": "2:52:52", "throughput": 23213.01, "total_tokens": 6035904} +{"current_steps": 1915, "total_steps": 78105, "loss": 0.607, "lr": 1.2251952374855973e-06, "epoch": 0.1225913833941489, "percentage": 2.45, "elapsed_time": "0:04:20", "remaining_time": "2:52:51", "throughput": 23211.99, "total_tokens": 6050816} +{"current_steps": 1920, "total_steps": 78105, "loss": 0.4801, "lr": 1.2283958520035848e-06, "epoch": 0.12291146533512579, "percentage": 2.46, "elapsed_time": "0:04:21", "remaining_time": "2:52:49", "throughput": 23211.49, "total_tokens": 6065920} +{"current_steps": 1925, "total_steps": 78105, "loss": 0.6155, "lr": 1.2315964665215723e-06, "epoch": 0.12323154727610268, "percentage": 2.46, "elapsed_time": "0:04:22", "remaining_time": "2:52:49", "throughput": 23211.3, "total_tokens": 6081728} +{"current_steps": 1930, "total_steps": 78105, "loss": 0.5382, "lr": 1.2347970810395597e-06, "epoch": 0.12355162921707957, "percentage": 2.47, "elapsed_time": "0:04:22", "remaining_time": "2:52:47", "throughput": 23211.92, "total_tokens": 6097472} +{"current_steps": 1935, "total_steps": 78105, "loss": 0.5551, "lr": 1.2379976955575472e-06, "epoch": 0.12387171115805647, "percentage": 2.48, "elapsed_time": "0:04:23", "remaining_time": "2:52:47", "throughput": 23211.67, "total_tokens": 6113152} +{"current_steps": 1940, "total_steps": 78105, "loss": 0.6146, "lr": 1.2411983100755345e-06, "epoch": 0.12419179309903335, "percentage": 2.48, "elapsed_time": "0:04:24", "remaining_time": "2:52:46", "throughput": 23212.77, "total_tokens": 6129408} +{"current_steps": 1945, "total_steps": 78105, "loss": 0.5587, "lr": 1.244398924593522e-06, "epoch": 0.12451187504001024, "percentage": 2.49, "elapsed_time": "0:04:24", "remaining_time": "2:52:45", "throughput": 23213.04, "total_tokens": 6144896} +{"current_steps": 1950, "total_steps": 78105, "loss": 0.6047, "lr": 1.2475995391115094e-06, "epoch": 0.12483195698098713, "percentage": 2.5, "elapsed_time": "0:04:25", "remaining_time": "2:52:44", "throughput": 23212.56, "total_tokens": 6160256} +{"current_steps": 1955, "total_steps": 78105, "loss": 0.583, "lr": 1.250800153629497e-06, "epoch": 0.12515203892196403, "percentage": 2.5, "elapsed_time": "0:04:26", "remaining_time": "2:52:42", "throughput": 23211.93, "total_tokens": 6175104} +{"current_steps": 1960, "total_steps": 78105, "loss": 0.5475, "lr": 1.2540007681474845e-06, "epoch": 0.1254721208629409, "percentage": 2.51, "elapsed_time": "0:04:26", "remaining_time": "2:52:40", "throughput": 23211.09, "total_tokens": 6190272} +{"current_steps": 1965, "total_steps": 78105, "loss": 0.5469, "lr": 1.257201382665472e-06, "epoch": 0.1257922028039178, "percentage": 2.52, "elapsed_time": "0:04:27", "remaining_time": "2:52:40", "throughput": 23210.72, "total_tokens": 6205952} +{"current_steps": 1970, "total_steps": 78105, "loss": 0.5827, "lr": 1.2604019971834595e-06, "epoch": 0.1261122847448947, "percentage": 2.52, "elapsed_time": "0:04:28", "remaining_time": "2:52:40", "throughput": 23212.04, "total_tokens": 6222592} +{"current_steps": 1975, "total_steps": 78105, "loss": 0.4421, "lr": 1.263602611701447e-06, "epoch": 0.12643236668587157, "percentage": 2.53, "elapsed_time": "0:04:28", "remaining_time": "2:52:38", "throughput": 23211.62, "total_tokens": 6237696} +{"current_steps": 1980, "total_steps": 78105, "loss": 0.4855, "lr": 1.2668032262194344e-06, "epoch": 0.12675244862684848, "percentage": 2.54, "elapsed_time": "0:04:29", "remaining_time": "2:52:38", "throughput": 23212.74, "total_tokens": 6254080} +{"current_steps": 1985, "total_steps": 78105, "loss": 0.5988, "lr": 1.2700038407374215e-06, "epoch": 0.12707253056782536, "percentage": 2.54, "elapsed_time": "0:04:30", "remaining_time": "2:52:38", "throughput": 23212.71, "total_tokens": 6270016} +{"current_steps": 1990, "total_steps": 78105, "loss": 0.5707, "lr": 1.2732044552554091e-06, "epoch": 0.12739261250880227, "percentage": 2.55, "elapsed_time": "0:04:30", "remaining_time": "2:52:36", "throughput": 23212.62, "total_tokens": 6285184} +{"current_steps": 1995, "total_steps": 78105, "loss": 0.5036, "lr": 1.2764050697733966e-06, "epoch": 0.12771269444977915, "percentage": 2.55, "elapsed_time": "0:04:31", "remaining_time": "2:52:35", "throughput": 23212.35, "total_tokens": 6300992} +{"current_steps": 2000, "total_steps": 78105, "loss": 0.5292, "lr": 1.279605684291384e-06, "epoch": 0.12803277639075603, "percentage": 2.56, "elapsed_time": "0:04:32", "remaining_time": "2:52:33", "throughput": 23210.59, "total_tokens": 6315392} +{"current_steps": 2005, "total_steps": 78105, "loss": 0.4883, "lr": 1.2828062988093715e-06, "epoch": 0.12835285833173293, "percentage": 2.57, "elapsed_time": "0:04:32", "remaining_time": "2:52:33", "throughput": 23209.68, "total_tokens": 6331136} +{"current_steps": 2010, "total_steps": 78105, "loss": 0.5797, "lr": 1.286006913327359e-06, "epoch": 0.1286729402727098, "percentage": 2.57, "elapsed_time": "0:04:33", "remaining_time": "2:52:33", "throughput": 23210.18, "total_tokens": 6347584} +{"current_steps": 2015, "total_steps": 78105, "loss": 0.5378, "lr": 1.2892075278453465e-06, "epoch": 0.1289930222136867, "percentage": 2.58, "elapsed_time": "0:04:34", "remaining_time": "2:52:31", "throughput": 23209.61, "total_tokens": 6362560} +{"current_steps": 2020, "total_steps": 78105, "loss": 0.4951, "lr": 1.292408142363334e-06, "epoch": 0.1293131041546636, "percentage": 2.59, "elapsed_time": "0:04:34", "remaining_time": "2:52:30", "throughput": 23210.11, "total_tokens": 6378304} +{"current_steps": 2025, "total_steps": 78105, "loss": 0.5996, "lr": 1.2956087568813214e-06, "epoch": 0.12963318609564048, "percentage": 2.59, "elapsed_time": "0:04:35", "remaining_time": "2:52:29", "throughput": 23209.7, "total_tokens": 6393280} +{"current_steps": 2030, "total_steps": 78105, "loss": 0.5688, "lr": 1.2988093713993089e-06, "epoch": 0.1299532680366174, "percentage": 2.6, "elapsed_time": "0:04:36", "remaining_time": "2:52:28", "throughput": 23210.16, "total_tokens": 6409472} +{"current_steps": 2035, "total_steps": 78105, "loss": 0.7725, "lr": 1.3020099859172963e-06, "epoch": 0.13027334997759427, "percentage": 2.61, "elapsed_time": "0:04:36", "remaining_time": "2:52:27", "throughput": 23210.04, "total_tokens": 6424960} +{"current_steps": 2040, "total_steps": 78105, "loss": 0.5561, "lr": 1.3052106004352838e-06, "epoch": 0.13059343191857115, "percentage": 2.61, "elapsed_time": "0:04:37", "remaining_time": "2:52:26", "throughput": 23209.39, "total_tokens": 6440000} +{"current_steps": 2045, "total_steps": 78105, "loss": 0.4751, "lr": 1.308411214953271e-06, "epoch": 0.13091351385954805, "percentage": 2.62, "elapsed_time": "0:04:38", "remaining_time": "2:52:27", "throughput": 23211.56, "total_tokens": 6457600} +{"current_steps": 2050, "total_steps": 78105, "loss": 0.523, "lr": 1.3116118294712585e-06, "epoch": 0.13123359580052493, "percentage": 2.62, "elapsed_time": "0:04:38", "remaining_time": "2:52:26", "throughput": 23211.47, "total_tokens": 6473088} +{"current_steps": 2055, "total_steps": 78105, "loss": 0.6071, "lr": 1.314812443989246e-06, "epoch": 0.1315536777415018, "percentage": 2.63, "elapsed_time": "0:04:39", "remaining_time": "2:52:24", "throughput": 23210.11, "total_tokens": 6487680} +{"current_steps": 2060, "total_steps": 78105, "loss": 0.4731, "lr": 1.3180130585072335e-06, "epoch": 0.13187375968247872, "percentage": 2.64, "elapsed_time": "0:04:40", "remaining_time": "2:52:23", "throughput": 23210.11, "total_tokens": 6503296} +{"current_steps": 2065, "total_steps": 78105, "loss": 0.5632, "lr": 1.321213673025221e-06, "epoch": 0.1321938416234556, "percentage": 2.64, "elapsed_time": "0:04:40", "remaining_time": "2:52:22", "throughput": 23210.2, "total_tokens": 6518912} +{"current_steps": 2070, "total_steps": 78105, "loss": 0.5936, "lr": 1.3244142875432084e-06, "epoch": 0.1325139235644325, "percentage": 2.65, "elapsed_time": "0:04:41", "remaining_time": "2:52:21", "throughput": 23210.35, "total_tokens": 6534784} +{"current_steps": 2075, "total_steps": 78105, "loss": 0.5515, "lr": 1.3276149020611959e-06, "epoch": 0.13283400550540939, "percentage": 2.66, "elapsed_time": "0:04:42", "remaining_time": "2:52:20", "throughput": 23210.41, "total_tokens": 6550528} +{"current_steps": 2080, "total_steps": 78105, "loss": 0.5215, "lr": 1.3308155165791833e-06, "epoch": 0.13315408744638627, "percentage": 2.66, "elapsed_time": "0:04:42", "remaining_time": "2:52:20", "throughput": 23210.29, "total_tokens": 6566144} +{"current_steps": 2085, "total_steps": 78105, "loss": 0.5965, "lr": 1.3340161310971708e-06, "epoch": 0.13347416938736317, "percentage": 2.67, "elapsed_time": "0:04:43", "remaining_time": "2:52:21", "throughput": 23212.19, "total_tokens": 6583872} +{"current_steps": 2090, "total_steps": 78105, "loss": 0.4697, "lr": 1.3372167456151583e-06, "epoch": 0.13379425132834005, "percentage": 2.68, "elapsed_time": "0:04:44", "remaining_time": "2:52:23", "throughput": 23215.05, "total_tokens": 6602304} +{"current_steps": 2095, "total_steps": 78105, "loss": 0.4969, "lr": 1.3404173601331457e-06, "epoch": 0.13411433326931693, "percentage": 2.68, "elapsed_time": "0:04:45", "remaining_time": "2:52:23", "throughput": 23215.26, "total_tokens": 6618112} +{"current_steps": 2100, "total_steps": 78105, "loss": 0.6301, "lr": 1.3436179746511332e-06, "epoch": 0.13443441521029384, "percentage": 2.69, "elapsed_time": "0:04:45", "remaining_time": "2:52:24", "throughput": 23217.3, "total_tokens": 6635584} +{"current_steps": 2105, "total_steps": 78105, "loss": 0.5273, "lr": 1.3468185891691205e-06, "epoch": 0.13475449715127072, "percentage": 2.7, "elapsed_time": "0:04:46", "remaining_time": "2:52:25", "throughput": 23219.37, "total_tokens": 6653568} +{"current_steps": 2110, "total_steps": 78105, "loss": 0.4814, "lr": 1.350019203687108e-06, "epoch": 0.13507457909224763, "percentage": 2.7, "elapsed_time": "0:04:47", "remaining_time": "2:52:24", "throughput": 23218.67, "total_tokens": 6668864} +{"current_steps": 2115, "total_steps": 78105, "loss": 0.5259, "lr": 1.3532198182050954e-06, "epoch": 0.1353946610332245, "percentage": 2.71, "elapsed_time": "0:04:47", "remaining_time": "2:52:23", "throughput": 23218.36, "total_tokens": 6684416} +{"current_steps": 2120, "total_steps": 78105, "loss": 0.598, "lr": 1.3564204327230829e-06, "epoch": 0.13571474297420139, "percentage": 2.71, "elapsed_time": "0:04:48", "remaining_time": "2:52:22", "throughput": 23217.41, "total_tokens": 6699392} +{"current_steps": 2125, "total_steps": 78105, "loss": 0.5651, "lr": 1.3596210472410703e-06, "epoch": 0.1360348249151783, "percentage": 2.72, "elapsed_time": "0:04:49", "remaining_time": "2:52:20", "throughput": 23216.44, "total_tokens": 6714560} +{"current_steps": 2130, "total_steps": 78105, "loss": 0.5548, "lr": 1.3628216617590578e-06, "epoch": 0.13635490685615517, "percentage": 2.73, "elapsed_time": "0:04:49", "remaining_time": "2:52:19", "throughput": 23216.63, "total_tokens": 6729920} +{"current_steps": 2135, "total_steps": 78105, "loss": 0.5734, "lr": 1.3660222762770453e-06, "epoch": 0.13667498879713205, "percentage": 2.73, "elapsed_time": "0:04:50", "remaining_time": "2:52:17", "throughput": 23215.69, "total_tokens": 6744768} +{"current_steps": 2140, "total_steps": 78105, "loss": 0.4626, "lr": 1.3692228907950327e-06, "epoch": 0.13699507073810896, "percentage": 2.74, "elapsed_time": "0:04:51", "remaining_time": "2:52:16", "throughput": 23214.76, "total_tokens": 6759680} +{"current_steps": 2145, "total_steps": 78105, "loss": 0.5224, "lr": 1.3724235053130202e-06, "epoch": 0.13731515267908584, "percentage": 2.75, "elapsed_time": "0:04:51", "remaining_time": "2:52:14", "throughput": 23214.94, "total_tokens": 6775168} +{"current_steps": 2150, "total_steps": 78105, "loss": 0.5441, "lr": 1.3756241198310077e-06, "epoch": 0.13763523462006275, "percentage": 2.75, "elapsed_time": "0:04:52", "remaining_time": "2:52:14", "throughput": 23215.48, "total_tokens": 6791296} +{"current_steps": 2155, "total_steps": 78105, "loss": 0.446, "lr": 1.3788247343489951e-06, "epoch": 0.13795531656103963, "percentage": 2.76, "elapsed_time": "0:04:53", "remaining_time": "2:52:15", "throughput": 23217.28, "total_tokens": 6808640} +{"current_steps": 2160, "total_steps": 78105, "loss": 0.6584, "lr": 1.3820253488669826e-06, "epoch": 0.1382753985020165, "percentage": 2.77, "elapsed_time": "0:04:53", "remaining_time": "2:52:14", "throughput": 23217.21, "total_tokens": 6824064} +{"current_steps": 2165, "total_steps": 78105, "loss": 0.4944, "lr": 1.3852259633849698e-06, "epoch": 0.1385954804429934, "percentage": 2.77, "elapsed_time": "0:04:54", "remaining_time": "2:52:13", "throughput": 23218.02, "total_tokens": 6839872} +{"current_steps": 2170, "total_steps": 78105, "loss": 0.4855, "lr": 1.3884265779029573e-06, "epoch": 0.1389155623839703, "percentage": 2.78, "elapsed_time": "0:04:55", "remaining_time": "2:52:12", "throughput": 23217.71, "total_tokens": 6855744} +{"current_steps": 2175, "total_steps": 78105, "loss": 0.5269, "lr": 1.3916271924209448e-06, "epoch": 0.1392356443249472, "percentage": 2.78, "elapsed_time": "0:04:55", "remaining_time": "2:52:10", "throughput": 23215.83, "total_tokens": 6869632} +{"current_steps": 2180, "total_steps": 78105, "loss": 0.4592, "lr": 1.3948278069389322e-06, "epoch": 0.13955572626592408, "percentage": 2.79, "elapsed_time": "0:04:56", "remaining_time": "2:52:09", "throughput": 23215.67, "total_tokens": 6885696} +{"current_steps": 2185, "total_steps": 78105, "loss": 0.4235, "lr": 1.3980284214569197e-06, "epoch": 0.13987580820690096, "percentage": 2.8, "elapsed_time": "0:04:57", "remaining_time": "2:52:09", "throughput": 23215.9, "total_tokens": 6901504} +{"current_steps": 2190, "total_steps": 78105, "loss": 0.6028, "lr": 1.4012290359749072e-06, "epoch": 0.14019589014787787, "percentage": 2.8, "elapsed_time": "0:04:57", "remaining_time": "2:52:07", "throughput": 23215.61, "total_tokens": 6916480} +{"current_steps": 2195, "total_steps": 78105, "loss": 0.4976, "lr": 1.4044296504928949e-06, "epoch": 0.14051597208885475, "percentage": 2.81, "elapsed_time": "0:04:58", "remaining_time": "2:52:06", "throughput": 23215.62, "total_tokens": 6932416} +{"current_steps": 2200, "total_steps": 78105, "loss": 0.5292, "lr": 1.4076302650108823e-06, "epoch": 0.14083605402983163, "percentage": 2.82, "elapsed_time": "0:04:59", "remaining_time": "2:52:06", "throughput": 23216.71, "total_tokens": 6948992} +{"current_steps": 2205, "total_steps": 78105, "loss": 0.4567, "lr": 1.4108308795288698e-06, "epoch": 0.14115613597080853, "percentage": 2.82, "elapsed_time": "0:04:59", "remaining_time": "2:52:05", "throughput": 23216.6, "total_tokens": 6964416} +{"current_steps": 2210, "total_steps": 78105, "loss": 0.4673, "lr": 1.4140314940468573e-06, "epoch": 0.1414762179117854, "percentage": 2.83, "elapsed_time": "0:05:00", "remaining_time": "2:52:05", "throughput": 23217.18, "total_tokens": 6980544} +{"current_steps": 2215, "total_steps": 78105, "loss": 0.5736, "lr": 1.4172321085648447e-06, "epoch": 0.14179629985276232, "percentage": 2.84, "elapsed_time": "0:05:01", "remaining_time": "2:52:05", "throughput": 23218.16, "total_tokens": 6997440} +{"current_steps": 2220, "total_steps": 78105, "loss": 0.4089, "lr": 1.4204327230828322e-06, "epoch": 0.1421163817937392, "percentage": 2.84, "elapsed_time": "0:05:02", "remaining_time": "2:52:04", "throughput": 23217.47, "total_tokens": 7012672} +{"current_steps": 2225, "total_steps": 78105, "loss": 0.4398, "lr": 1.4236333376008192e-06, "epoch": 0.14243646373471608, "percentage": 2.85, "elapsed_time": "0:05:02", "remaining_time": "2:52:03", "throughput": 23216.24, "total_tokens": 7027648} +{"current_steps": 2230, "total_steps": 78105, "loss": 0.5415, "lr": 1.426833952118807e-06, "epoch": 0.142756545675693, "percentage": 2.86, "elapsed_time": "0:05:03", "remaining_time": "2:52:02", "throughput": 23216.0, "total_tokens": 7043200} +{"current_steps": 2235, "total_steps": 78105, "loss": 0.7296, "lr": 1.4300345666367944e-06, "epoch": 0.14307662761666987, "percentage": 2.86, "elapsed_time": "0:05:04", "remaining_time": "2:52:01", "throughput": 23215.44, "total_tokens": 7058688} +{"current_steps": 2240, "total_steps": 78105, "loss": 0.589, "lr": 1.4332351811547819e-06, "epoch": 0.14339670955764675, "percentage": 2.87, "elapsed_time": "0:05:04", "remaining_time": "2:52:00", "throughput": 23214.98, "total_tokens": 7074048} +{"current_steps": 2245, "total_steps": 78105, "loss": 0.5317, "lr": 1.4364357956727693e-06, "epoch": 0.14371679149862365, "percentage": 2.87, "elapsed_time": "0:05:05", "remaining_time": "2:51:59", "throughput": 23215.1, "total_tokens": 7089792} +{"current_steps": 2250, "total_steps": 78105, "loss": 0.4828, "lr": 1.4396364101907568e-06, "epoch": 0.14403687343960053, "percentage": 2.88, "elapsed_time": "0:05:06", "remaining_time": "2:51:58", "throughput": 23213.72, "total_tokens": 7104640} +{"current_steps": 2255, "total_steps": 78105, "loss": 0.5703, "lr": 1.4428370247087443e-06, "epoch": 0.14435695538057744, "percentage": 2.89, "elapsed_time": "0:05:06", "remaining_time": "2:51:57", "throughput": 23213.3, "total_tokens": 7120064} +{"current_steps": 2260, "total_steps": 78105, "loss": 0.4994, "lr": 1.4460376392267317e-06, "epoch": 0.14467703732155432, "percentage": 2.89, "elapsed_time": "0:05:07", "remaining_time": "2:51:55", "throughput": 23212.56, "total_tokens": 7135232} +{"current_steps": 2265, "total_steps": 78105, "loss": 0.5625, "lr": 1.4492382537447192e-06, "epoch": 0.1449971192625312, "percentage": 2.9, "elapsed_time": "0:05:08", "remaining_time": "2:51:53", "throughput": 23211.21, "total_tokens": 7149632} +{"current_steps": 2270, "total_steps": 78105, "loss": 0.421, "lr": 1.4524388682627067e-06, "epoch": 0.1453172012035081, "percentage": 2.91, "elapsed_time": "0:05:08", "remaining_time": "2:51:52", "throughput": 23209.98, "total_tokens": 7164352} +{"current_steps": 2275, "total_steps": 78105, "loss": 0.5397, "lr": 1.4556394827806941e-06, "epoch": 0.14563728314448499, "percentage": 2.91, "elapsed_time": "0:05:09", "remaining_time": "2:51:51", "throughput": 23209.22, "total_tokens": 7179776} +{"current_steps": 2280, "total_steps": 78105, "loss": 0.4446, "lr": 1.4588400972986816e-06, "epoch": 0.14595736508546187, "percentage": 2.92, "elapsed_time": "0:05:09", "remaining_time": "2:51:49", "throughput": 23207.64, "total_tokens": 7194240} +{"current_steps": 2285, "total_steps": 78105, "loss": 0.4157, "lr": 1.4620407118166688e-06, "epoch": 0.14627744702643877, "percentage": 2.93, "elapsed_time": "0:05:10", "remaining_time": "2:51:48", "throughput": 23207.26, "total_tokens": 7209472} +{"current_steps": 2290, "total_steps": 78105, "loss": 0.4357, "lr": 1.4652413263346563e-06, "epoch": 0.14659752896741565, "percentage": 2.93, "elapsed_time": "0:05:11", "remaining_time": "2:51:47", "throughput": 23207.33, "total_tokens": 7225088} +{"current_steps": 2295, "total_steps": 78105, "loss": 0.6335, "lr": 1.4684419408526438e-06, "epoch": 0.14691761090839256, "percentage": 2.94, "elapsed_time": "0:05:11", "remaining_time": "2:51:46", "throughput": 23207.45, "total_tokens": 7240704} +{"current_steps": 2300, "total_steps": 78105, "loss": 0.4996, "lr": 1.4716425553706312e-06, "epoch": 0.14723769284936944, "percentage": 2.94, "elapsed_time": "0:05:12", "remaining_time": "2:51:44", "throughput": 23208.03, "total_tokens": 7256000} +{"current_steps": 2305, "total_steps": 78105, "loss": 0.5397, "lr": 1.4748431698886187e-06, "epoch": 0.14755777479034632, "percentage": 2.95, "elapsed_time": "0:05:13", "remaining_time": "2:51:42", "throughput": 23206.68, "total_tokens": 7270336} +{"current_steps": 2310, "total_steps": 78105, "loss": 0.5644, "lr": 1.4780437844066062e-06, "epoch": 0.14787785673132323, "percentage": 2.96, "elapsed_time": "0:05:13", "remaining_time": "2:51:40", "throughput": 23206.44, "total_tokens": 7285440} +{"current_steps": 2315, "total_steps": 78105, "loss": 0.4965, "lr": 1.4812443989245936e-06, "epoch": 0.1481979386723001, "percentage": 2.96, "elapsed_time": "0:05:14", "remaining_time": "2:51:40", "throughput": 23206.05, "total_tokens": 7301120} +{"current_steps": 2320, "total_steps": 78105, "loss": 0.514, "lr": 1.4844450134425811e-06, "epoch": 0.14851802061327699, "percentage": 2.97, "elapsed_time": "0:05:15", "remaining_time": "2:51:39", "throughput": 23205.25, "total_tokens": 7316416} +{"current_steps": 2325, "total_steps": 78105, "loss": 0.5168, "lr": 1.4876456279605686e-06, "epoch": 0.1488381025542539, "percentage": 2.98, "elapsed_time": "0:05:15", "remaining_time": "2:51:38", "throughput": 23205.56, "total_tokens": 7332160} +{"current_steps": 2330, "total_steps": 78105, "loss": 0.6872, "lr": 1.490846242478556e-06, "epoch": 0.14915818449523077, "percentage": 2.98, "elapsed_time": "0:05:16", "remaining_time": "2:51:37", "throughput": 23205.29, "total_tokens": 7347776} +{"current_steps": 2335, "total_steps": 78105, "loss": 0.5837, "lr": 1.4940468569965435e-06, "epoch": 0.14947826643620768, "percentage": 2.99, "elapsed_time": "0:05:17", "remaining_time": "2:51:36", "throughput": 23204.91, "total_tokens": 7363200} +{"current_steps": 2340, "total_steps": 78105, "loss": 0.4732, "lr": 1.497247471514531e-06, "epoch": 0.14979834837718456, "percentage": 3.0, "elapsed_time": "0:05:17", "remaining_time": "2:51:35", "throughput": 23205.08, "total_tokens": 7378752} +{"current_steps": 2345, "total_steps": 78105, "loss": 0.4904, "lr": 1.5004480860325182e-06, "epoch": 0.15011843031816144, "percentage": 3.0, "elapsed_time": "0:05:18", "remaining_time": "2:51:34", "throughput": 23204.73, "total_tokens": 7393984} +{"current_steps": 2350, "total_steps": 78105, "loss": 0.5569, "lr": 1.5036487005505057e-06, "epoch": 0.15043851225913835, "percentage": 3.01, "elapsed_time": "0:05:19", "remaining_time": "2:51:32", "throughput": 23204.15, "total_tokens": 7408832} +{"current_steps": 2355, "total_steps": 78105, "loss": 0.4751, "lr": 1.5068493150684932e-06, "epoch": 0.15075859420011523, "percentage": 3.02, "elapsed_time": "0:05:19", "remaining_time": "2:51:30", "throughput": 23202.61, "total_tokens": 7423040} +{"current_steps": 2360, "total_steps": 78105, "loss": 0.5269, "lr": 1.5100499295864806e-06, "epoch": 0.15107867614109213, "percentage": 3.02, "elapsed_time": "0:05:20", "remaining_time": "2:51:29", "throughput": 23202.17, "total_tokens": 7438464} +{"current_steps": 2365, "total_steps": 78105, "loss": 0.5282, "lr": 1.5132505441044681e-06, "epoch": 0.151398758082069, "percentage": 3.03, "elapsed_time": "0:05:21", "remaining_time": "2:51:28", "throughput": 23201.7, "total_tokens": 7453824} +{"current_steps": 2370, "total_steps": 78105, "loss": 0.6004, "lr": 1.5164511586224556e-06, "epoch": 0.1517188400230459, "percentage": 3.03, "elapsed_time": "0:05:21", "remaining_time": "2:51:27", "throughput": 23201.32, "total_tokens": 7469312} +{"current_steps": 2375, "total_steps": 78105, "loss": 0.6537, "lr": 1.519651773140443e-06, "epoch": 0.1520389219640228, "percentage": 3.04, "elapsed_time": "0:05:22", "remaining_time": "2:51:26", "throughput": 23201.52, "total_tokens": 7485120} +{"current_steps": 2380, "total_steps": 78105, "loss": 0.5134, "lr": 1.5228523876584305e-06, "epoch": 0.15235900390499968, "percentage": 3.05, "elapsed_time": "0:05:23", "remaining_time": "2:51:25", "throughput": 23201.34, "total_tokens": 7500416} +{"current_steps": 2385, "total_steps": 78105, "loss": 0.4294, "lr": 1.526053002176418e-06, "epoch": 0.15267908584597656, "percentage": 3.05, "elapsed_time": "0:05:23", "remaining_time": "2:51:25", "throughput": 23202.0, "total_tokens": 7516672} +{"current_steps": 2390, "total_steps": 78105, "loss": 0.5174, "lr": 1.5292536166944054e-06, "epoch": 0.15299916778695347, "percentage": 3.06, "elapsed_time": "0:05:24", "remaining_time": "2:51:24", "throughput": 23202.59, "total_tokens": 7532800} +{"current_steps": 2395, "total_steps": 78105, "loss": 0.5293, "lr": 1.532454231212393e-06, "epoch": 0.15331924972793035, "percentage": 3.07, "elapsed_time": "0:05:25", "remaining_time": "2:51:23", "throughput": 23201.91, "total_tokens": 7547776} +{"current_steps": 2400, "total_steps": 78105, "loss": 0.6115, "lr": 1.5356548457303804e-06, "epoch": 0.15363933166890725, "percentage": 3.07, "elapsed_time": "0:05:25", "remaining_time": "2:51:21", "throughput": 23200.98, "total_tokens": 7562496} +{"current_steps": 2405, "total_steps": 78105, "loss": 0.5652, "lr": 1.5388554602483676e-06, "epoch": 0.15395941360988413, "percentage": 3.08, "elapsed_time": "0:05:26", "remaining_time": "2:51:20", "throughput": 23200.41, "total_tokens": 7577920} +{"current_steps": 2410, "total_steps": 78105, "loss": 0.5677, "lr": 1.542056074766355e-06, "epoch": 0.154279495550861, "percentage": 3.09, "elapsed_time": "0:05:27", "remaining_time": "2:51:19", "throughput": 23200.33, "total_tokens": 7593216} +{"current_steps": 2415, "total_steps": 78105, "loss": 0.5918, "lr": 1.5452566892843426e-06, "epoch": 0.15459957749183792, "percentage": 3.09, "elapsed_time": "0:05:27", "remaining_time": "2:51:18", "throughput": 23199.35, "total_tokens": 7608192} +{"current_steps": 2420, "total_steps": 78105, "loss": 0.4747, "lr": 1.54845730380233e-06, "epoch": 0.1549196594328148, "percentage": 3.1, "elapsed_time": "0:05:28", "remaining_time": "2:51:18", "throughput": 23199.99, "total_tokens": 7624448} +{"current_steps": 2425, "total_steps": 78105, "loss": 0.4965, "lr": 1.5516579183203175e-06, "epoch": 0.15523974137379168, "percentage": 3.1, "elapsed_time": "0:05:29", "remaining_time": "2:51:17", "throughput": 23200.06, "total_tokens": 7640192} +{"current_steps": 2430, "total_steps": 78105, "loss": 0.5224, "lr": 1.554858532838305e-06, "epoch": 0.15555982331476859, "percentage": 3.11, "elapsed_time": "0:05:29", "remaining_time": "2:51:15", "throughput": 23199.53, "total_tokens": 7654848} +{"current_steps": 2435, "total_steps": 78105, "loss": 0.6756, "lr": 1.5580591473562926e-06, "epoch": 0.15587990525574547, "percentage": 3.12, "elapsed_time": "0:05:30", "remaining_time": "2:51:15", "throughput": 23199.75, "total_tokens": 7670848} +{"current_steps": 2440, "total_steps": 78105, "loss": 0.6278, "lr": 1.5612597618742801e-06, "epoch": 0.15619998719672237, "percentage": 3.12, "elapsed_time": "0:05:31", "remaining_time": "2:51:13", "throughput": 23199.46, "total_tokens": 7686016} +{"current_steps": 2445, "total_steps": 78105, "loss": 0.549, "lr": 1.5644603763922676e-06, "epoch": 0.15652006913769925, "percentage": 3.13, "elapsed_time": "0:05:31", "remaining_time": "2:51:12", "throughput": 23199.79, "total_tokens": 7701760} +{"current_steps": 2450, "total_steps": 78105, "loss": 0.5704, "lr": 1.567660990910255e-06, "epoch": 0.15684015107867613, "percentage": 3.14, "elapsed_time": "0:05:32", "remaining_time": "2:51:12", "throughput": 23199.95, "total_tokens": 7717760} +{"current_steps": 2455, "total_steps": 78105, "loss": 0.3862, "lr": 1.5708616054282425e-06, "epoch": 0.15716023301965304, "percentage": 3.14, "elapsed_time": "0:05:33", "remaining_time": "2:51:11", "throughput": 23199.65, "total_tokens": 7733376} +{"current_steps": 2460, "total_steps": 78105, "loss": 0.5703, "lr": 1.57406221994623e-06, "epoch": 0.15748031496062992, "percentage": 3.15, "elapsed_time": "0:05:34", "remaining_time": "2:51:12", "throughput": 23200.93, "total_tokens": 7750464} +{"current_steps": 2465, "total_steps": 78105, "loss": 0.5254, "lr": 1.577262834464217e-06, "epoch": 0.1578003969016068, "percentage": 3.16, "elapsed_time": "0:05:34", "remaining_time": "2:51:13", "throughput": 23203.58, "total_tokens": 7768576} +{"current_steps": 2470, "total_steps": 78105, "loss": 0.4806, "lr": 1.5804634489822047e-06, "epoch": 0.1581204788425837, "percentage": 3.16, "elapsed_time": "0:05:35", "remaining_time": "2:51:12", "throughput": 23203.92, "total_tokens": 7784256} +{"current_steps": 2475, "total_steps": 78105, "loss": 0.3638, "lr": 1.5836640635001922e-06, "epoch": 0.15844056078356059, "percentage": 3.17, "elapsed_time": "0:05:36", "remaining_time": "2:51:11", "throughput": 23203.47, "total_tokens": 7799232} +{"current_steps": 2480, "total_steps": 78105, "loss": 0.6353, "lr": 1.5868646780181796e-06, "epoch": 0.1587606427245375, "percentage": 3.18, "elapsed_time": "0:05:36", "remaining_time": "2:51:10", "throughput": 23203.87, "total_tokens": 7815040} +{"current_steps": 2485, "total_steps": 78105, "loss": 0.4745, "lr": 1.5900652925361671e-06, "epoch": 0.15908072466551437, "percentage": 3.18, "elapsed_time": "0:05:37", "remaining_time": "2:51:09", "throughput": 23203.67, "total_tokens": 7830400} +{"current_steps": 2490, "total_steps": 78105, "loss": 0.5098, "lr": 1.5932659070541546e-06, "epoch": 0.15940080660649125, "percentage": 3.19, "elapsed_time": "0:05:38", "remaining_time": "2:51:08", "throughput": 23203.63, "total_tokens": 7846336} +{"current_steps": 2495, "total_steps": 78105, "loss": 0.6127, "lr": 1.596466521572142e-06, "epoch": 0.15972088854746816, "percentage": 3.19, "elapsed_time": "0:05:38", "remaining_time": "2:51:08", "throughput": 23204.52, "total_tokens": 7862528} +{"current_steps": 2500, "total_steps": 78105, "loss": 0.4496, "lr": 1.5996671360901295e-06, "epoch": 0.16004097048844504, "percentage": 3.2, "elapsed_time": "0:05:39", "remaining_time": "2:51:09", "throughput": 23205.78, "total_tokens": 7880000} +{"current_steps": 2505, "total_steps": 78105, "loss": 0.4487, "lr": 1.602867750608117e-06, "epoch": 0.16036105242942192, "percentage": 3.21, "elapsed_time": "0:05:40", "remaining_time": "2:51:08", "throughput": 23205.77, "total_tokens": 7895296} +{"current_steps": 2510, "total_steps": 78105, "loss": 0.636, "lr": 1.6060683651261044e-06, "epoch": 0.16068113437039883, "percentage": 3.21, "elapsed_time": "0:05:40", "remaining_time": "2:51:07", "throughput": 23205.79, "total_tokens": 7911104} +{"current_steps": 2515, "total_steps": 78105, "loss": 0.5421, "lr": 1.609268979644092e-06, "epoch": 0.1610012163113757, "percentage": 3.22, "elapsed_time": "0:05:41", "remaining_time": "2:51:05", "throughput": 23205.32, "total_tokens": 7925952} +{"current_steps": 2520, "total_steps": 78105, "loss": 0.6206, "lr": 1.6124695941620794e-06, "epoch": 0.1613212982523526, "percentage": 3.23, "elapsed_time": "0:05:42", "remaining_time": "2:51:04", "throughput": 23204.45, "total_tokens": 7940928} +{"current_steps": 2525, "total_steps": 78105, "loss": 0.5694, "lr": 1.6156702086800666e-06, "epoch": 0.1616413801933295, "percentage": 3.23, "elapsed_time": "0:05:42", "remaining_time": "2:51:02", "throughput": 23203.12, "total_tokens": 7955200} +{"current_steps": 2530, "total_steps": 78105, "loss": 0.6508, "lr": 1.618870823198054e-06, "epoch": 0.16196146213430637, "percentage": 3.24, "elapsed_time": "0:05:43", "remaining_time": "2:51:01", "throughput": 23202.87, "total_tokens": 7970944} +{"current_steps": 2535, "total_steps": 78105, "loss": 0.4962, "lr": 1.6220714377160416e-06, "epoch": 0.16228154407528328, "percentage": 3.25, "elapsed_time": "0:05:44", "remaining_time": "2:51:00", "throughput": 23202.42, "total_tokens": 7985856} +{"current_steps": 2540, "total_steps": 78105, "loss": 0.4368, "lr": 1.625272052234029e-06, "epoch": 0.16260162601626016, "percentage": 3.25, "elapsed_time": "0:05:44", "remaining_time": "2:50:59", "throughput": 23201.46, "total_tokens": 8001088} +{"current_steps": 2545, "total_steps": 78105, "loss": 0.5544, "lr": 1.6284726667520165e-06, "epoch": 0.16292170795723707, "percentage": 3.26, "elapsed_time": "0:05:45", "remaining_time": "2:50:59", "throughput": 23202.06, "total_tokens": 8017856} +{"current_steps": 2550, "total_steps": 78105, "loss": 0.5051, "lr": 1.631673281270004e-06, "epoch": 0.16324178989821395, "percentage": 3.26, "elapsed_time": "0:05:46", "remaining_time": "2:50:59", "throughput": 23202.42, "total_tokens": 8033792} +{"current_steps": 2555, "total_steps": 78105, "loss": 0.5028, "lr": 1.6348738957879914e-06, "epoch": 0.16356187183919083, "percentage": 3.27, "elapsed_time": "0:05:46", "remaining_time": "2:50:57", "throughput": 23200.95, "total_tokens": 8048256} +{"current_steps": 2560, "total_steps": 78105, "loss": 0.5597, "lr": 1.638074510305979e-06, "epoch": 0.16388195378016773, "percentage": 3.28, "elapsed_time": "0:05:47", "remaining_time": "2:50:56", "throughput": 23200.9, "total_tokens": 8063680} +{"current_steps": 2565, "total_steps": 78105, "loss": 0.4872, "lr": 1.6412751248239664e-06, "epoch": 0.1642020357211446, "percentage": 3.28, "elapsed_time": "0:05:48", "remaining_time": "2:50:55", "throughput": 23201.27, "total_tokens": 8079744} +{"current_steps": 2570, "total_steps": 78105, "loss": 0.5346, "lr": 1.6444757393419538e-06, "epoch": 0.1645221176621215, "percentage": 3.29, "elapsed_time": "0:05:48", "remaining_time": "2:50:54", "throughput": 23200.86, "total_tokens": 8094656} +{"current_steps": 2575, "total_steps": 78105, "loss": 0.5457, "lr": 1.6476763538599413e-06, "epoch": 0.1648421996030984, "percentage": 3.3, "elapsed_time": "0:05:49", "remaining_time": "2:50:53", "throughput": 23201.12, "total_tokens": 8110528} +{"current_steps": 2580, "total_steps": 78105, "loss": 0.5589, "lr": 1.6508769683779288e-06, "epoch": 0.16516228154407528, "percentage": 3.3, "elapsed_time": "0:05:50", "remaining_time": "2:50:53", "throughput": 23201.23, "total_tokens": 8126720} +{"current_steps": 2585, "total_steps": 78105, "loss": 0.5366, "lr": 1.654077582895916e-06, "epoch": 0.1654823634850522, "percentage": 3.31, "elapsed_time": "0:05:50", "remaining_time": "2:50:51", "throughput": 23200.33, "total_tokens": 8141312} +{"current_steps": 2590, "total_steps": 78105, "loss": 0.5181, "lr": 1.6572781974139035e-06, "epoch": 0.16580244542602907, "percentage": 3.32, "elapsed_time": "0:05:51", "remaining_time": "2:50:50", "throughput": 23199.87, "total_tokens": 8156736} +{"current_steps": 2595, "total_steps": 78105, "loss": 0.5483, "lr": 1.660478811931891e-06, "epoch": 0.16612252736700595, "percentage": 3.32, "elapsed_time": "0:05:52", "remaining_time": "2:50:50", "throughput": 23200.18, "total_tokens": 8172480} +{"current_steps": 2600, "total_steps": 78105, "loss": 0.6026, "lr": 1.6636794264498784e-06, "epoch": 0.16644260930798285, "percentage": 3.33, "elapsed_time": "0:05:52", "remaining_time": "2:50:49", "throughput": 23200.18, "total_tokens": 8188224} +{"current_steps": 2605, "total_steps": 78105, "loss": 0.4673, "lr": 1.666880040967866e-06, "epoch": 0.16676269124895973, "percentage": 3.34, "elapsed_time": "0:05:53", "remaining_time": "2:50:48", "throughput": 23200.92, "total_tokens": 8204352} +{"current_steps": 2610, "total_steps": 78105, "loss": 0.6196, "lr": 1.6700806554858534e-06, "epoch": 0.1670827731899366, "percentage": 3.34, "elapsed_time": "0:05:54", "remaining_time": "2:50:47", "throughput": 23200.65, "total_tokens": 8219328} +{"current_steps": 2615, "total_steps": 78105, "loss": 0.4395, "lr": 1.6732812700038408e-06, "epoch": 0.16740285513091352, "percentage": 3.35, "elapsed_time": "0:05:54", "remaining_time": "2:50:46", "throughput": 23200.05, "total_tokens": 8234560} +{"current_steps": 2620, "total_steps": 78105, "loss": 0.6626, "lr": 1.6764818845218283e-06, "epoch": 0.1677229370718904, "percentage": 3.35, "elapsed_time": "0:05:55", "remaining_time": "2:50:44", "throughput": 23199.17, "total_tokens": 8249344} +{"current_steps": 2625, "total_steps": 78105, "loss": 0.5272, "lr": 1.6796824990398158e-06, "epoch": 0.1680430190128673, "percentage": 3.36, "elapsed_time": "0:05:56", "remaining_time": "2:50:44", "throughput": 23199.33, "total_tokens": 8265600} +{"current_steps": 2630, "total_steps": 78105, "loss": 0.4894, "lr": 1.6828831135578032e-06, "epoch": 0.16836310095384419, "percentage": 3.37, "elapsed_time": "0:05:56", "remaining_time": "2:50:43", "throughput": 23198.66, "total_tokens": 8280384} +{"current_steps": 2635, "total_steps": 78105, "loss": 0.5581, "lr": 1.6860837280757907e-06, "epoch": 0.16868318289482107, "percentage": 3.37, "elapsed_time": "0:05:57", "remaining_time": "2:50:42", "throughput": 23197.98, "total_tokens": 8295744} +{"current_steps": 2640, "total_steps": 78105, "loss": 0.5212, "lr": 1.6892843425937782e-06, "epoch": 0.16900326483579797, "percentage": 3.38, "elapsed_time": "0:05:58", "remaining_time": "2:50:41", "throughput": 23197.94, "total_tokens": 8311680} +{"current_steps": 2645, "total_steps": 78105, "loss": 0.5057, "lr": 1.6924849571117654e-06, "epoch": 0.16932334677677485, "percentage": 3.39, "elapsed_time": "0:05:58", "remaining_time": "2:50:40", "throughput": 23197.4, "total_tokens": 8326592} +{"current_steps": 2650, "total_steps": 78105, "loss": 0.5038, "lr": 1.6956855716297529e-06, "epoch": 0.16964342871775173, "percentage": 3.39, "elapsed_time": "0:05:59", "remaining_time": "2:50:39", "throughput": 23196.88, "total_tokens": 8341632} +{"current_steps": 2655, "total_steps": 78105, "loss": 0.4613, "lr": 1.6988861861477404e-06, "epoch": 0.16996351065872864, "percentage": 3.4, "elapsed_time": "0:06:00", "remaining_time": "2:50:39", "throughput": 23197.32, "total_tokens": 8358400} +{"current_steps": 2660, "total_steps": 78105, "loss": 0.4923, "lr": 1.7020868006657278e-06, "epoch": 0.17028359259970552, "percentage": 3.41, "elapsed_time": "0:06:00", "remaining_time": "2:50:38", "throughput": 23197.26, "total_tokens": 8374144} +{"current_steps": 2665, "total_steps": 78105, "loss": 0.4093, "lr": 1.7052874151837153e-06, "epoch": 0.17060367454068243, "percentage": 3.41, "elapsed_time": "0:06:01", "remaining_time": "2:50:37", "throughput": 23196.78, "total_tokens": 8389440} +{"current_steps": 2670, "total_steps": 78105, "loss": 0.5168, "lr": 1.7084880297017028e-06, "epoch": 0.1709237564816593, "percentage": 3.42, "elapsed_time": "0:06:02", "remaining_time": "2:50:36", "throughput": 23196.52, "total_tokens": 8404672} +{"current_steps": 2675, "total_steps": 78105, "loss": 0.5451, "lr": 1.7116886442196904e-06, "epoch": 0.17124383842263619, "percentage": 3.42, "elapsed_time": "0:06:02", "remaining_time": "2:50:35", "throughput": 23196.18, "total_tokens": 8420096} +{"current_steps": 2680, "total_steps": 78105, "loss": 0.6514, "lr": 1.714889258737678e-06, "epoch": 0.1715639203636131, "percentage": 3.43, "elapsed_time": "0:06:03", "remaining_time": "2:50:35", "throughput": 23196.43, "total_tokens": 8436288} +{"current_steps": 2685, "total_steps": 78105, "loss": 0.5755, "lr": 1.7180898732556654e-06, "epoch": 0.17188400230458997, "percentage": 3.44, "elapsed_time": "0:06:04", "remaining_time": "2:50:35", "throughput": 23196.71, "total_tokens": 8452672} +{"current_steps": 2690, "total_steps": 78105, "loss": 0.4582, "lr": 1.7212904877736528e-06, "epoch": 0.17220408424556685, "percentage": 3.44, "elapsed_time": "0:06:05", "remaining_time": "2:50:35", "throughput": 23196.88, "total_tokens": 8469120} +{"current_steps": 2695, "total_steps": 78105, "loss": 0.4967, "lr": 1.7244911022916403e-06, "epoch": 0.17252416618654376, "percentage": 3.45, "elapsed_time": "0:06:05", "remaining_time": "2:50:34", "throughput": 23196.52, "total_tokens": 8484736} +{"current_steps": 2700, "total_steps": 78105, "loss": 0.738, "lr": 1.7276917168096278e-06, "epoch": 0.17284424812752064, "percentage": 3.46, "elapsed_time": "0:06:06", "remaining_time": "2:50:34", "throughput": 23196.74, "total_tokens": 8500480} +{"current_steps": 2705, "total_steps": 78105, "loss": 0.3984, "lr": 1.7308923313276148e-06, "epoch": 0.17316433006849755, "percentage": 3.46, "elapsed_time": "0:06:07", "remaining_time": "2:50:32", "throughput": 23195.43, "total_tokens": 8514624} +{"current_steps": 2710, "total_steps": 78105, "loss": 0.4841, "lr": 1.7340929458456025e-06, "epoch": 0.17348441200947443, "percentage": 3.47, "elapsed_time": "0:06:07", "remaining_time": "2:50:31", "throughput": 23194.49, "total_tokens": 8529664} +{"current_steps": 2715, "total_steps": 78105, "loss": 0.6277, "lr": 1.73729356036359e-06, "epoch": 0.1738044939504513, "percentage": 3.48, "elapsed_time": "0:06:08", "remaining_time": "2:50:30", "throughput": 23194.82, "total_tokens": 8545280} +{"current_steps": 2720, "total_steps": 78105, "loss": 0.5041, "lr": 1.7404941748815774e-06, "epoch": 0.1741245758914282, "percentage": 3.48, "elapsed_time": "0:06:09", "remaining_time": "2:50:28", "throughput": 23194.67, "total_tokens": 8560384} +{"current_steps": 2725, "total_steps": 78105, "loss": 0.582, "lr": 1.743694789399565e-06, "epoch": 0.1744446578324051, "percentage": 3.49, "elapsed_time": "0:06:09", "remaining_time": "2:50:28", "throughput": 23195.62, "total_tokens": 8576960} +{"current_steps": 2730, "total_steps": 78105, "loss": 0.4479, "lr": 1.7468954039175524e-06, "epoch": 0.174764739773382, "percentage": 3.5, "elapsed_time": "0:06:10", "remaining_time": "2:50:27", "throughput": 23195.1, "total_tokens": 8592384} +{"current_steps": 2735, "total_steps": 78105, "loss": 0.5252, "lr": 1.7500960184355398e-06, "epoch": 0.17508482171435888, "percentage": 3.5, "elapsed_time": "0:06:11", "remaining_time": "2:50:27", "throughput": 23195.87, "total_tokens": 8609024} +{"current_steps": 2740, "total_steps": 78105, "loss": 0.4628, "lr": 1.7532966329535273e-06, "epoch": 0.17540490365533576, "percentage": 3.51, "elapsed_time": "0:06:11", "remaining_time": "2:50:27", "throughput": 23196.16, "total_tokens": 8624768} +{"current_steps": 2745, "total_steps": 78105, "loss": 0.5124, "lr": 1.7564972474715148e-06, "epoch": 0.17572498559631267, "percentage": 3.51, "elapsed_time": "0:06:12", "remaining_time": "2:50:26", "throughput": 23195.89, "total_tokens": 8640448} +{"current_steps": 2750, "total_steps": 78105, "loss": 0.5235, "lr": 1.7596978619895022e-06, "epoch": 0.17604506753728955, "percentage": 3.52, "elapsed_time": "0:06:13", "remaining_time": "2:50:24", "throughput": 23195.44, "total_tokens": 8655296} +{"current_steps": 2755, "total_steps": 78105, "loss": 0.514, "lr": 1.7628984765074897e-06, "epoch": 0.17636514947826643, "percentage": 3.53, "elapsed_time": "0:06:13", "remaining_time": "2:50:24", "throughput": 23195.93, "total_tokens": 8671872} +{"current_steps": 2760, "total_steps": 78105, "loss": 0.5037, "lr": 1.7660990910254772e-06, "epoch": 0.17668523141924333, "percentage": 3.53, "elapsed_time": "0:06:14", "remaining_time": "2:50:24", "throughput": 23196.55, "total_tokens": 8687680} +{"current_steps": 2765, "total_steps": 78105, "loss": 0.6377, "lr": 1.7692997055434644e-06, "epoch": 0.1770053133602202, "percentage": 3.54, "elapsed_time": "0:06:15", "remaining_time": "2:50:23", "throughput": 23195.96, "total_tokens": 8702912} +{"current_steps": 2770, "total_steps": 78105, "loss": 0.4919, "lr": 1.7725003200614519e-06, "epoch": 0.17732539530119712, "percentage": 3.55, "elapsed_time": "0:06:15", "remaining_time": "2:50:21", "throughput": 23195.93, "total_tokens": 8718144} +{"current_steps": 2775, "total_steps": 78105, "loss": 0.5202, "lr": 1.7757009345794394e-06, "epoch": 0.177645477242174, "percentage": 3.55, "elapsed_time": "0:06:16", "remaining_time": "2:50:20", "throughput": 23195.7, "total_tokens": 8732992} +{"current_steps": 2780, "total_steps": 78105, "loss": 0.5555, "lr": 1.7789015490974268e-06, "epoch": 0.17796555918315088, "percentage": 3.56, "elapsed_time": "0:06:17", "remaining_time": "2:50:19", "throughput": 23195.97, "total_tokens": 8749056} +{"current_steps": 2785, "total_steps": 78105, "loss": 0.5075, "lr": 1.7821021636154143e-06, "epoch": 0.17828564112412779, "percentage": 3.57, "elapsed_time": "0:06:17", "remaining_time": "2:50:19", "throughput": 23196.31, "total_tokens": 8765184} +{"current_steps": 2790, "total_steps": 78105, "loss": 0.4381, "lr": 1.7853027781334018e-06, "epoch": 0.17860572306510467, "percentage": 3.57, "elapsed_time": "0:06:18", "remaining_time": "2:50:18", "throughput": 23196.22, "total_tokens": 8780480} +{"current_steps": 2795, "total_steps": 78105, "loss": 0.5762, "lr": 1.7885033926513892e-06, "epoch": 0.17892580500608155, "percentage": 3.58, "elapsed_time": "0:06:19", "remaining_time": "2:50:17", "throughput": 23196.52, "total_tokens": 8796352} +{"current_steps": 2800, "total_steps": 78105, "loss": 0.4723, "lr": 1.7917040071693767e-06, "epoch": 0.17924588694705845, "percentage": 3.58, "elapsed_time": "0:06:19", "remaining_time": "2:50:15", "throughput": 23195.22, "total_tokens": 8810688} +{"current_steps": 2805, "total_steps": 78105, "loss": 0.4801, "lr": 1.7949046216873642e-06, "epoch": 0.17956596888803533, "percentage": 3.59, "elapsed_time": "0:06:20", "remaining_time": "2:50:15", "throughput": 23195.17, "total_tokens": 8826304} +{"current_steps": 2810, "total_steps": 78105, "loss": 0.4333, "lr": 1.7981052362053516e-06, "epoch": 0.17988605082901224, "percentage": 3.6, "elapsed_time": "0:06:21", "remaining_time": "2:50:13", "throughput": 23194.53, "total_tokens": 8841344} +{"current_steps": 2815, "total_steps": 78105, "loss": 0.5636, "lr": 1.801305850723339e-06, "epoch": 0.18020613276998912, "percentage": 3.6, "elapsed_time": "0:06:21", "remaining_time": "2:50:13", "throughput": 23195.45, "total_tokens": 8857664} +{"current_steps": 2820, "total_steps": 78105, "loss": 0.597, "lr": 1.8045064652413266e-06, "epoch": 0.180526214710966, "percentage": 3.61, "elapsed_time": "0:06:22", "remaining_time": "2:50:12", "throughput": 23195.39, "total_tokens": 8873408} +{"current_steps": 2825, "total_steps": 78105, "loss": 0.4652, "lr": 1.8077070797593138e-06, "epoch": 0.1808462966519429, "percentage": 3.62, "elapsed_time": "0:06:23", "remaining_time": "2:50:12", "throughput": 23195.94, "total_tokens": 8889472} +{"current_steps": 2830, "total_steps": 78105, "loss": 0.6546, "lr": 1.8109076942773013e-06, "epoch": 0.18116637859291979, "percentage": 3.62, "elapsed_time": "0:06:23", "remaining_time": "2:50:11", "throughput": 23195.6, "total_tokens": 8904640} +{"current_steps": 2835, "total_steps": 78105, "loss": 0.4452, "lr": 1.8141083087952887e-06, "epoch": 0.18148646053389667, "percentage": 3.63, "elapsed_time": "0:06:24", "remaining_time": "2:50:09", "throughput": 23195.29, "total_tokens": 8919680} +{"current_steps": 2840, "total_steps": 78105, "loss": 0.4601, "lr": 1.8173089233132762e-06, "epoch": 0.18180654247487357, "percentage": 3.64, "elapsed_time": "0:06:25", "remaining_time": "2:50:09", "throughput": 23195.42, "total_tokens": 8935360} +{"current_steps": 2845, "total_steps": 78105, "loss": 0.4969, "lr": 1.8205095378312637e-06, "epoch": 0.18212662441585045, "percentage": 3.64, "elapsed_time": "0:06:25", "remaining_time": "2:50:08", "throughput": 23196.12, "total_tokens": 8951552} +{"current_steps": 2850, "total_steps": 78105, "loss": 0.6108, "lr": 1.8237101523492512e-06, "epoch": 0.18244670635682736, "percentage": 3.65, "elapsed_time": "0:06:26", "remaining_time": "2:50:08", "throughput": 23197.05, "total_tokens": 8968576} +{"current_steps": 2855, "total_steps": 78105, "loss": 0.4499, "lr": 1.8269107668672386e-06, "epoch": 0.18276678829780424, "percentage": 3.66, "elapsed_time": "0:06:27", "remaining_time": "2:50:07", "throughput": 23196.6, "total_tokens": 8983744} +{"current_steps": 2860, "total_steps": 78105, "loss": 0.4559, "lr": 1.830111381385226e-06, "epoch": 0.18308687023878112, "percentage": 3.66, "elapsed_time": "0:06:27", "remaining_time": "2:50:07", "throughput": 23195.51, "total_tokens": 8999040} +{"current_steps": 2865, "total_steps": 78105, "loss": 0.4791, "lr": 1.8333119959032136e-06, "epoch": 0.18340695217975803, "percentage": 3.67, "elapsed_time": "0:06:28", "remaining_time": "2:50:07", "throughput": 23196.52, "total_tokens": 9015872} +{"current_steps": 2870, "total_steps": 78105, "loss": 0.4226, "lr": 1.836512610421201e-06, "epoch": 0.1837270341207349, "percentage": 3.67, "elapsed_time": "0:06:29", "remaining_time": "2:50:06", "throughput": 23196.89, "total_tokens": 9031744} +{"current_steps": 2875, "total_steps": 78105, "loss": 0.6299, "lr": 1.8397132249391885e-06, "epoch": 0.18404711606171179, "percentage": 3.68, "elapsed_time": "0:06:30", "remaining_time": "2:50:06", "throughput": 23196.94, "total_tokens": 9047872} +{"current_steps": 2880, "total_steps": 78105, "loss": 0.5186, "lr": 1.842913839457176e-06, "epoch": 0.1843671980026887, "percentage": 3.69, "elapsed_time": "0:06:30", "remaining_time": "2:50:05", "throughput": 23196.44, "total_tokens": 9062976} +{"current_steps": 2885, "total_steps": 78105, "loss": 0.6009, "lr": 1.8461144539751632e-06, "epoch": 0.18468727994366557, "percentage": 3.69, "elapsed_time": "0:06:31", "remaining_time": "2:50:03", "throughput": 23196.39, "total_tokens": 9078208} +{"current_steps": 2890, "total_steps": 78105, "loss": 0.442, "lr": 1.8493150684931507e-06, "epoch": 0.18500736188464248, "percentage": 3.7, "elapsed_time": "0:06:32", "remaining_time": "2:50:04", "throughput": 23196.64, "total_tokens": 9094720} +{"current_steps": 2895, "total_steps": 78105, "loss": 0.5871, "lr": 1.8525156830111381e-06, "epoch": 0.18532744382561936, "percentage": 3.71, "elapsed_time": "0:06:32", "remaining_time": "2:50:03", "throughput": 23196.7, "total_tokens": 9110336} +{"current_steps": 2900, "total_steps": 78105, "loss": 0.5919, "lr": 1.8557162975291256e-06, "epoch": 0.18564752576659624, "percentage": 3.71, "elapsed_time": "0:06:33", "remaining_time": "2:50:03", "throughput": 23197.34, "total_tokens": 9126912} +{"current_steps": 2905, "total_steps": 78105, "loss": 0.5381, "lr": 1.858916912047113e-06, "epoch": 0.18596760770757315, "percentage": 3.72, "elapsed_time": "0:06:34", "remaining_time": "2:50:02", "throughput": 23197.44, "total_tokens": 9142400} +{"current_steps": 2910, "total_steps": 78105, "loss": 0.5612, "lr": 1.8621175265651005e-06, "epoch": 0.18628768964855003, "percentage": 3.73, "elapsed_time": "0:06:34", "remaining_time": "2:50:00", "throughput": 23196.74, "total_tokens": 9157184} +{"current_steps": 2915, "total_steps": 78105, "loss": 0.5459, "lr": 1.8653181410830882e-06, "epoch": 0.1866077715895269, "percentage": 3.73, "elapsed_time": "0:06:35", "remaining_time": "2:50:00", "throughput": 23197.13, "total_tokens": 9173696} +{"current_steps": 2920, "total_steps": 78105, "loss": 0.5249, "lr": 1.8685187556010757e-06, "epoch": 0.1869278535305038, "percentage": 3.74, "elapsed_time": "0:06:36", "remaining_time": "2:50:00", "throughput": 23197.44, "total_tokens": 9190080} +{"current_steps": 2925, "total_steps": 78105, "loss": 0.6124, "lr": 1.8717193701190632e-06, "epoch": 0.1872479354714807, "percentage": 3.74, "elapsed_time": "0:06:36", "remaining_time": "2:49:58", "throughput": 23196.1, "total_tokens": 9204352} +{"current_steps": 2930, "total_steps": 78105, "loss": 0.5798, "lr": 1.8749199846370506e-06, "epoch": 0.1875680174124576, "percentage": 3.75, "elapsed_time": "0:06:37", "remaining_time": "2:49:57", "throughput": 23195.61, "total_tokens": 9219200} +{"current_steps": 2935, "total_steps": 78105, "loss": 0.4787, "lr": 1.878120599155038e-06, "epoch": 0.18788809935343448, "percentage": 3.76, "elapsed_time": "0:06:38", "remaining_time": "2:49:55", "throughput": 23194.77, "total_tokens": 9233600} +{"current_steps": 2940, "total_steps": 78105, "loss": 0.4465, "lr": 1.8813212136730256e-06, "epoch": 0.18820818129441136, "percentage": 3.76, "elapsed_time": "0:06:38", "remaining_time": "2:49:55", "throughput": 23195.17, "total_tokens": 9249536} +{"current_steps": 2945, "total_steps": 78105, "loss": 0.6955, "lr": 1.8845218281910126e-06, "epoch": 0.18852826323538827, "percentage": 3.77, "elapsed_time": "0:06:39", "remaining_time": "2:49:53", "throughput": 23194.57, "total_tokens": 9264256} +{"current_steps": 2950, "total_steps": 78105, "loss": 0.4204, "lr": 1.8877224427090003e-06, "epoch": 0.18884834517636515, "percentage": 3.78, "elapsed_time": "0:06:40", "remaining_time": "2:49:52", "throughput": 23194.25, "total_tokens": 9279488} +{"current_steps": 2955, "total_steps": 78105, "loss": 0.5249, "lr": 1.8909230572269877e-06, "epoch": 0.18916842711734205, "percentage": 3.78, "elapsed_time": "0:06:40", "remaining_time": "2:49:51", "throughput": 23193.65, "total_tokens": 9294336} +{"current_steps": 2960, "total_steps": 78105, "loss": 0.5779, "lr": 1.8941236717449752e-06, "epoch": 0.18948850905831893, "percentage": 3.79, "elapsed_time": "0:06:41", "remaining_time": "2:49:49", "throughput": 23193.38, "total_tokens": 9309376} +{"current_steps": 2965, "total_steps": 78105, "loss": 0.4775, "lr": 1.8973242862629627e-06, "epoch": 0.1898085909992958, "percentage": 3.8, "elapsed_time": "0:06:42", "remaining_time": "2:49:48", "throughput": 23192.68, "total_tokens": 9324224} +{"current_steps": 2970, "total_steps": 78105, "loss": 0.4359, "lr": 1.9005249007809502e-06, "epoch": 0.19012867294027272, "percentage": 3.8, "elapsed_time": "0:06:42", "remaining_time": "2:49:47", "throughput": 23192.17, "total_tokens": 9339136} +{"current_steps": 2975, "total_steps": 78105, "loss": 0.4773, "lr": 1.9037255152989376e-06, "epoch": 0.1904487548812496, "percentage": 3.81, "elapsed_time": "0:06:43", "remaining_time": "2:49:46", "throughput": 23192.09, "total_tokens": 9354688} +{"current_steps": 2980, "total_steps": 78105, "loss": 0.5187, "lr": 1.906926129816925e-06, "epoch": 0.19076883682222648, "percentage": 3.82, "elapsed_time": "0:06:44", "remaining_time": "2:49:45", "throughput": 23191.4, "total_tokens": 9369664} +{"current_steps": 2985, "total_steps": 78105, "loss": 0.6318, "lr": 1.9101267443349123e-06, "epoch": 0.19108891876320339, "percentage": 3.82, "elapsed_time": "0:06:44", "remaining_time": "2:49:43", "throughput": 23190.94, "total_tokens": 9384768} +{"current_steps": 2990, "total_steps": 78105, "loss": 0.4131, "lr": 1.9133273588529e-06, "epoch": 0.19140900070418027, "percentage": 3.83, "elapsed_time": "0:06:45", "remaining_time": "2:49:43", "throughput": 23190.97, "total_tokens": 9400320} +{"current_steps": 2995, "total_steps": 78105, "loss": 0.4738, "lr": 1.9165279733708873e-06, "epoch": 0.19172908264515717, "percentage": 3.83, "elapsed_time": "0:06:46", "remaining_time": "2:49:42", "throughput": 23190.47, "total_tokens": 9415872} +{"current_steps": 3000, "total_steps": 78105, "loss": 0.5976, "lr": 1.9197285878888747e-06, "epoch": 0.19204916458613405, "percentage": 3.84, "elapsed_time": "0:06:46", "remaining_time": "2:49:42", "throughput": 23191.45, "total_tokens": 9432704} +{"current_steps": 3005, "total_steps": 78105, "loss": 0.493, "lr": 1.922929202406862e-06, "epoch": 0.19236924652711093, "percentage": 3.85, "elapsed_time": "0:06:47", "remaining_time": "2:49:41", "throughput": 23191.07, "total_tokens": 9448192} +{"current_steps": 3010, "total_steps": 78105, "loss": 0.4355, "lr": 1.9261298169248497e-06, "epoch": 0.19268932846808784, "percentage": 3.85, "elapsed_time": "0:06:48", "remaining_time": "2:49:41", "throughput": 23191.55, "total_tokens": 9464576} +{"current_steps": 3015, "total_steps": 78105, "loss": 0.5541, "lr": 1.929330431442837e-06, "epoch": 0.19300941040906472, "percentage": 3.86, "elapsed_time": "0:06:48", "remaining_time": "2:49:40", "throughput": 23191.27, "total_tokens": 9479936} +{"current_steps": 3020, "total_steps": 78105, "loss": 0.584, "lr": 1.9325310459608246e-06, "epoch": 0.1933294923500416, "percentage": 3.87, "elapsed_time": "0:06:49", "remaining_time": "2:49:41", "throughput": 23192.52, "total_tokens": 9497280} +{"current_steps": 3025, "total_steps": 78105, "loss": 0.4925, "lr": 1.935731660478812e-06, "epoch": 0.1936495742910185, "percentage": 3.87, "elapsed_time": "0:06:50", "remaining_time": "2:49:41", "throughput": 23194.08, "total_tokens": 9514496} +{"current_steps": 3030, "total_steps": 78105, "loss": 0.4291, "lr": 1.9389322749967995e-06, "epoch": 0.19396965623199539, "percentage": 3.88, "elapsed_time": "0:06:50", "remaining_time": "2:49:40", "throughput": 23193.72, "total_tokens": 9529536} +{"current_steps": 3035, "total_steps": 78105, "loss": 0.5895, "lr": 1.942132889514787e-06, "epoch": 0.1942897381729723, "percentage": 3.89, "elapsed_time": "0:06:51", "remaining_time": "2:49:39", "throughput": 23194.09, "total_tokens": 9545472} +{"current_steps": 3040, "total_steps": 78105, "loss": 0.6192, "lr": 1.9453335040327745e-06, "epoch": 0.19460982011394917, "percentage": 3.89, "elapsed_time": "0:06:52", "remaining_time": "2:49:38", "throughput": 23193.83, "total_tokens": 9560960} +{"current_steps": 3045, "total_steps": 78105, "loss": 0.5843, "lr": 1.948534118550762e-06, "epoch": 0.19492990205492605, "percentage": 3.9, "elapsed_time": "0:06:52", "remaining_time": "2:49:37", "throughput": 23193.23, "total_tokens": 9576320} +{"current_steps": 3050, "total_steps": 78105, "loss": 0.5828, "lr": 1.9517347330687494e-06, "epoch": 0.19524998399590296, "percentage": 3.9, "elapsed_time": "0:06:53", "remaining_time": "2:49:37", "throughput": 23192.86, "total_tokens": 9592192} +{"current_steps": 3055, "total_steps": 78105, "loss": 0.6384, "lr": 1.954935347586737e-06, "epoch": 0.19557006593687984, "percentage": 3.91, "elapsed_time": "0:06:54", "remaining_time": "2:49:37", "throughput": 23193.06, "total_tokens": 9608128} +{"current_steps": 3060, "total_steps": 78105, "loss": 0.5492, "lr": 1.9581359621047243e-06, "epoch": 0.19589014787785672, "percentage": 3.92, "elapsed_time": "0:06:54", "remaining_time": "2:49:36", "throughput": 23193.34, "total_tokens": 9623872} +{"current_steps": 3065, "total_steps": 78105, "loss": 0.5739, "lr": 1.9613365766227114e-06, "epoch": 0.19621022981883363, "percentage": 3.92, "elapsed_time": "0:06:55", "remaining_time": "2:49:35", "throughput": 23192.91, "total_tokens": 9639488} +{"current_steps": 3070, "total_steps": 78105, "loss": 0.514, "lr": 1.9645371911406993e-06, "epoch": 0.1965303117598105, "percentage": 3.93, "elapsed_time": "0:06:56", "remaining_time": "2:49:35", "throughput": 23192.99, "total_tokens": 9655680} +{"current_steps": 3075, "total_steps": 78105, "loss": 0.5939, "lr": 1.9677378056586867e-06, "epoch": 0.1968503937007874, "percentage": 3.94, "elapsed_time": "0:06:56", "remaining_time": "2:49:33", "throughput": 23192.45, "total_tokens": 9670400} +{"current_steps": 3080, "total_steps": 78105, "loss": 0.4051, "lr": 1.9709384201766742e-06, "epoch": 0.1971704756417643, "percentage": 3.94, "elapsed_time": "0:06:57", "remaining_time": "2:49:32", "throughput": 23191.84, "total_tokens": 9685504} +{"current_steps": 3085, "total_steps": 78105, "loss": 0.6816, "lr": 1.9741390346946617e-06, "epoch": 0.19749055758274117, "percentage": 3.95, "elapsed_time": "0:06:58", "remaining_time": "2:49:32", "throughput": 23192.25, "total_tokens": 9701952} +{"current_steps": 3090, "total_steps": 78105, "loss": 0.4583, "lr": 1.977339649212649e-06, "epoch": 0.19781063952371808, "percentage": 3.96, "elapsed_time": "0:06:58", "remaining_time": "2:49:31", "throughput": 23191.85, "total_tokens": 9716928} +{"current_steps": 3095, "total_steps": 78105, "loss": 0.6711, "lr": 1.9805402637306366e-06, "epoch": 0.19813072146469496, "percentage": 3.96, "elapsed_time": "0:06:59", "remaining_time": "2:49:30", "throughput": 23191.42, "total_tokens": 9732096} +{"current_steps": 3100, "total_steps": 78105, "loss": 0.5532, "lr": 1.983740878248624e-06, "epoch": 0.19845080340567184, "percentage": 3.97, "elapsed_time": "0:07:00", "remaining_time": "2:49:29", "throughput": 23191.0, "total_tokens": 9747648} +{"current_steps": 3105, "total_steps": 78105, "loss": 0.4788, "lr": 1.9869414927666116e-06, "epoch": 0.19877088534664875, "percentage": 3.98, "elapsed_time": "0:07:00", "remaining_time": "2:49:28", "throughput": 23190.52, "total_tokens": 9763072} +{"current_steps": 3110, "total_steps": 78105, "loss": 0.4999, "lr": 1.990142107284599e-06, "epoch": 0.19909096728762563, "percentage": 3.98, "elapsed_time": "0:07:01", "remaining_time": "2:49:27", "throughput": 23190.1, "total_tokens": 9778176} +{"current_steps": 3115, "total_steps": 78105, "loss": 0.5723, "lr": 1.9933427218025865e-06, "epoch": 0.19941104922860253, "percentage": 3.99, "elapsed_time": "0:07:02", "remaining_time": "2:49:26", "throughput": 23190.13, "total_tokens": 9793536} +{"current_steps": 3120, "total_steps": 78105, "loss": 0.51, "lr": 1.996543336320574e-06, "epoch": 0.1997311311695794, "percentage": 3.99, "elapsed_time": "0:07:02", "remaining_time": "2:49:25", "throughput": 23189.62, "total_tokens": 9808192} +{"current_steps": 3125, "total_steps": 78105, "loss": 0.615, "lr": 1.999743950838561e-06, "epoch": 0.2000512131105563, "percentage": 4.0, "elapsed_time": "0:07:03", "remaining_time": "2:49:24", "throughput": 23189.11, "total_tokens": 9823232} +{"current_steps": 3130, "total_steps": 78105, "loss": 0.6188, "lr": 2.0029445653565485e-06, "epoch": 0.2003712950515332, "percentage": 4.01, "elapsed_time": "0:07:04", "remaining_time": "2:49:23", "throughput": 23189.58, "total_tokens": 9839488} +{"current_steps": 3135, "total_steps": 78105, "loss": 0.4495, "lr": 2.006145179874536e-06, "epoch": 0.20069137699251008, "percentage": 4.01, "elapsed_time": "0:07:05", "remaining_time": "2:49:23", "throughput": 23190.62, "total_tokens": 9856192} +{"current_steps": 3140, "total_steps": 78105, "loss": 0.5418, "lr": 2.0093457943925234e-06, "epoch": 0.201011458933487, "percentage": 4.02, "elapsed_time": "0:07:05", "remaining_time": "2:49:22", "throughput": 23189.92, "total_tokens": 9871552} +{"current_steps": 3145, "total_steps": 78105, "loss": 0.5226, "lr": 2.012546408910511e-06, "epoch": 0.20133154087446387, "percentage": 4.03, "elapsed_time": "0:07:06", "remaining_time": "2:49:21", "throughput": 23189.08, "total_tokens": 9886016} +{"current_steps": 3150, "total_steps": 78105, "loss": 0.6394, "lr": 2.0157470234284983e-06, "epoch": 0.20165162281544075, "percentage": 4.03, "elapsed_time": "0:07:07", "remaining_time": "2:49:21", "throughput": 23190.43, "total_tokens": 9903552} +{"current_steps": 3155, "total_steps": 78105, "loss": 0.3662, "lr": 2.018947637946486e-06, "epoch": 0.20197170475641765, "percentage": 4.04, "elapsed_time": "0:07:07", "remaining_time": "2:49:21", "throughput": 23191.28, "total_tokens": 9920320} +{"current_steps": 3160, "total_steps": 78105, "loss": 0.6472, "lr": 2.0221482524644733e-06, "epoch": 0.20229178669739453, "percentage": 4.05, "elapsed_time": "0:07:08", "remaining_time": "2:49:20", "throughput": 23191.51, "total_tokens": 9935936} +{"current_steps": 3165, "total_steps": 78105, "loss": 0.5605, "lr": 2.0253488669824607e-06, "epoch": 0.2026118686383714, "percentage": 4.05, "elapsed_time": "0:07:09", "remaining_time": "2:49:19", "throughput": 23190.73, "total_tokens": 9950912} +{"current_steps": 3170, "total_steps": 78105, "loss": 0.5868, "lr": 2.028549481500448e-06, "epoch": 0.20293195057934832, "percentage": 4.06, "elapsed_time": "0:07:09", "remaining_time": "2:49:18", "throughput": 23190.24, "total_tokens": 9966080} +{"current_steps": 3175, "total_steps": 78105, "loss": 0.4591, "lr": 2.0317500960184357e-06, "epoch": 0.2032520325203252, "percentage": 4.07, "elapsed_time": "0:07:10", "remaining_time": "2:49:18", "throughput": 23190.4, "total_tokens": 9982080} +{"current_steps": 3180, "total_steps": 78105, "loss": 0.5424, "lr": 2.034950710536423e-06, "epoch": 0.2035721144613021, "percentage": 4.07, "elapsed_time": "0:07:11", "remaining_time": "2:49:16", "throughput": 23189.51, "total_tokens": 9996544} +{"current_steps": 3185, "total_steps": 78105, "loss": 0.546, "lr": 2.0381513250544106e-06, "epoch": 0.20389219640227899, "percentage": 4.08, "elapsed_time": "0:07:11", "remaining_time": "2:49:15", "throughput": 23189.42, "total_tokens": 10011776} +{"current_steps": 3190, "total_steps": 78105, "loss": 0.4516, "lr": 2.041351939572398e-06, "epoch": 0.20421227834325587, "percentage": 4.08, "elapsed_time": "0:07:12", "remaining_time": "2:49:14", "throughput": 23189.2, "total_tokens": 10027264} +{"current_steps": 3195, "total_steps": 78105, "loss": 0.4452, "lr": 2.0445525540903855e-06, "epoch": 0.20453236028423277, "percentage": 4.09, "elapsed_time": "0:07:13", "remaining_time": "2:49:13", "throughput": 23189.03, "total_tokens": 10042432} +{"current_steps": 3200, "total_steps": 78105, "loss": 0.4167, "lr": 2.047753168608373e-06, "epoch": 0.20485244222520965, "percentage": 4.1, "elapsed_time": "0:07:13", "remaining_time": "2:49:12", "throughput": 23188.45, "total_tokens": 10057792} +{"current_steps": 3205, "total_steps": 78105, "loss": 0.4757, "lr": 2.0509537831263605e-06, "epoch": 0.20517252416618653, "percentage": 4.1, "elapsed_time": "0:07:14", "remaining_time": "2:49:12", "throughput": 23190.0, "total_tokens": 10074752} +{"current_steps": 3210, "total_steps": 78105, "loss": 0.6621, "lr": 2.054154397644348e-06, "epoch": 0.20549260610716344, "percentage": 4.11, "elapsed_time": "0:07:15", "remaining_time": "2:49:12", "throughput": 23190.33, "total_tokens": 10090752} +{"current_steps": 3215, "total_steps": 78105, "loss": 0.5276, "lr": 2.0573550121623354e-06, "epoch": 0.20581268804814032, "percentage": 4.12, "elapsed_time": "0:07:15", "remaining_time": "2:49:11", "throughput": 23190.19, "total_tokens": 10106240} +{"current_steps": 3220, "total_steps": 78105, "loss": 0.4633, "lr": 2.060555626680323e-06, "epoch": 0.20613276998911723, "percentage": 4.12, "elapsed_time": "0:07:16", "remaining_time": "2:49:10", "throughput": 23190.1, "total_tokens": 10121856} +{"current_steps": 3225, "total_steps": 78105, "loss": 0.4951, "lr": 2.0637562411983103e-06, "epoch": 0.2064528519300941, "percentage": 4.13, "elapsed_time": "0:07:17", "remaining_time": "2:49:09", "throughput": 23190.13, "total_tokens": 10137408} +{"current_steps": 3230, "total_steps": 78105, "loss": 0.485, "lr": 2.066956855716298e-06, "epoch": 0.20677293387107099, "percentage": 4.14, "elapsed_time": "0:07:17", "remaining_time": "2:49:08", "throughput": 23189.77, "total_tokens": 10152640} +{"current_steps": 3235, "total_steps": 78105, "loss": 0.4234, "lr": 2.0701574702342853e-06, "epoch": 0.2070930158120479, "percentage": 4.14, "elapsed_time": "0:07:18", "remaining_time": "2:49:07", "throughput": 23189.35, "total_tokens": 10167936} +{"current_steps": 3240, "total_steps": 78105, "loss": 0.463, "lr": 2.0733580847522727e-06, "epoch": 0.20741309775302477, "percentage": 4.15, "elapsed_time": "0:07:19", "remaining_time": "2:49:06", "throughput": 23189.36, "total_tokens": 10183360} +{"current_steps": 3245, "total_steps": 78105, "loss": 0.5593, "lr": 2.0765586992702598e-06, "epoch": 0.20773317969400165, "percentage": 4.15, "elapsed_time": "0:07:19", "remaining_time": "2:49:05", "throughput": 23189.19, "total_tokens": 10198208} +{"current_steps": 3250, "total_steps": 78105, "loss": 0.5326, "lr": 2.0797593137882473e-06, "epoch": 0.20805326163497856, "percentage": 4.16, "elapsed_time": "0:07:20", "remaining_time": "2:49:04", "throughput": 23189.02, "total_tokens": 10213568} +{"current_steps": 3255, "total_steps": 78105, "loss": 0.4922, "lr": 2.0829599283062347e-06, "epoch": 0.20837334357595544, "percentage": 4.17, "elapsed_time": "0:07:21", "remaining_time": "2:49:04", "throughput": 23189.78, "total_tokens": 10230080} +{"current_steps": 3260, "total_steps": 78105, "loss": 0.5181, "lr": 2.086160542824222e-06, "epoch": 0.20869342551693235, "percentage": 4.17, "elapsed_time": "0:07:21", "remaining_time": "2:49:04", "throughput": 23190.46, "total_tokens": 10246912} +{"current_steps": 3265, "total_steps": 78105, "loss": 0.538, "lr": 2.0893611573422097e-06, "epoch": 0.20901350745790923, "percentage": 4.18, "elapsed_time": "0:07:22", "remaining_time": "2:49:03", "throughput": 23190.3, "total_tokens": 10262464} +{"current_steps": 3270, "total_steps": 78105, "loss": 0.6577, "lr": 2.092561771860197e-06, "epoch": 0.2093335893988861, "percentage": 4.19, "elapsed_time": "0:07:23", "remaining_time": "2:49:02", "throughput": 23190.09, "total_tokens": 10278016} +{"current_steps": 3275, "total_steps": 78105, "loss": 0.5375, "lr": 2.0957623863781846e-06, "epoch": 0.209653671339863, "percentage": 4.19, "elapsed_time": "0:07:23", "remaining_time": "2:49:02", "throughput": 23190.17, "total_tokens": 10293824} +{"current_steps": 3280, "total_steps": 78105, "loss": 0.5761, "lr": 2.0989630008961725e-06, "epoch": 0.2099737532808399, "percentage": 4.2, "elapsed_time": "0:07:24", "remaining_time": "2:49:01", "throughput": 23190.15, "total_tokens": 10309568} +{"current_steps": 3285, "total_steps": 78105, "loss": 0.439, "lr": 2.10216361541416e-06, "epoch": 0.21029383522181677, "percentage": 4.21, "elapsed_time": "0:07:25", "remaining_time": "2:49:01", "throughput": 23190.45, "total_tokens": 10325696} +{"current_steps": 3290, "total_steps": 78105, "loss": 0.5091, "lr": 2.1053642299321474e-06, "epoch": 0.21061391716279368, "percentage": 4.21, "elapsed_time": "0:07:25", "remaining_time": "2:49:00", "throughput": 23190.88, "total_tokens": 10342016} +{"current_steps": 3295, "total_steps": 78105, "loss": 0.4905, "lr": 2.108564844450135e-06, "epoch": 0.21093399910377056, "percentage": 4.22, "elapsed_time": "0:07:26", "remaining_time": "2:49:00", "throughput": 23190.53, "total_tokens": 10357632} +{"current_steps": 3300, "total_steps": 78105, "loss": 0.5472, "lr": 2.1117654589681223e-06, "epoch": 0.21125408104474747, "percentage": 4.23, "elapsed_time": "0:07:27", "remaining_time": "2:48:59", "throughput": 23190.33, "total_tokens": 10373056} +{"current_steps": 3305, "total_steps": 78105, "loss": 0.7056, "lr": 2.1149660734861094e-06, "epoch": 0.21157416298572435, "percentage": 4.23, "elapsed_time": "0:07:27", "remaining_time": "2:48:59", "throughput": 23190.78, "total_tokens": 10389248} +{"current_steps": 3310, "total_steps": 78105, "loss": 0.4812, "lr": 2.118166688004097e-06, "epoch": 0.21189424492670123, "percentage": 4.24, "elapsed_time": "0:07:28", "remaining_time": "2:48:58", "throughput": 23190.9, "total_tokens": 10404864} +{"current_steps": 3315, "total_steps": 78105, "loss": 0.4748, "lr": 2.1213673025220843e-06, "epoch": 0.21221432686767813, "percentage": 4.24, "elapsed_time": "0:07:29", "remaining_time": "2:48:57", "throughput": 23190.62, "total_tokens": 10420416} +{"current_steps": 3320, "total_steps": 78105, "loss": 0.4937, "lr": 2.124567917040072e-06, "epoch": 0.212534408808655, "percentage": 4.25, "elapsed_time": "0:07:30", "remaining_time": "2:48:56", "throughput": 23189.89, "total_tokens": 10435456} +{"current_steps": 3325, "total_steps": 78105, "loss": 0.5216, "lr": 2.1277685315580593e-06, "epoch": 0.21285449074963192, "percentage": 4.26, "elapsed_time": "0:07:30", "remaining_time": "2:48:56", "throughput": 23190.2, "total_tokens": 10451584} +{"current_steps": 3330, "total_steps": 78105, "loss": 0.3753, "lr": 2.1309691460760467e-06, "epoch": 0.2131745726906088, "percentage": 4.26, "elapsed_time": "0:07:31", "remaining_time": "2:48:55", "throughput": 23190.63, "total_tokens": 10467584} +{"current_steps": 3335, "total_steps": 78105, "loss": 0.5046, "lr": 2.134169760594034e-06, "epoch": 0.21349465463158568, "percentage": 4.27, "elapsed_time": "0:07:32", "remaining_time": "2:48:54", "throughput": 23190.5, "total_tokens": 10483264} +{"current_steps": 3340, "total_steps": 78105, "loss": 0.5896, "lr": 2.1373703751120217e-06, "epoch": 0.21381473657256259, "percentage": 4.28, "elapsed_time": "0:07:32", "remaining_time": "2:48:53", "throughput": 23190.21, "total_tokens": 10498560} +{"current_steps": 3345, "total_steps": 78105, "loss": 0.4651, "lr": 2.140570989630009e-06, "epoch": 0.21413481851353947, "percentage": 4.28, "elapsed_time": "0:07:33", "remaining_time": "2:48:53", "throughput": 23190.0, "total_tokens": 10514176} +{"current_steps": 3350, "total_steps": 78105, "loss": 0.4636, "lr": 2.1437716041479966e-06, "epoch": 0.21445490045451635, "percentage": 4.29, "elapsed_time": "0:07:34", "remaining_time": "2:48:52", "throughput": 23190.25, "total_tokens": 10529792} +{"current_steps": 3355, "total_steps": 78105, "loss": 0.5346, "lr": 2.146972218665984e-06, "epoch": 0.21477498239549325, "percentage": 4.3, "elapsed_time": "0:07:34", "remaining_time": "2:48:51", "throughput": 23190.47, "total_tokens": 10545856} +{"current_steps": 3360, "total_steps": 78105, "loss": 0.4138, "lr": 2.1501728331839715e-06, "epoch": 0.21509506433647013, "percentage": 4.3, "elapsed_time": "0:07:35", "remaining_time": "2:48:51", "throughput": 23190.86, "total_tokens": 10562368} +{"current_steps": 3365, "total_steps": 78105, "loss": 0.6783, "lr": 2.153373447701959e-06, "epoch": 0.21541514627744704, "percentage": 4.31, "elapsed_time": "0:07:36", "remaining_time": "2:48:50", "throughput": 23190.35, "total_tokens": 10577280} +{"current_steps": 3370, "total_steps": 78105, "loss": 0.537, "lr": 2.1565740622199465e-06, "epoch": 0.21573522821842392, "percentage": 4.31, "elapsed_time": "0:07:36", "remaining_time": "2:48:49", "throughput": 23190.24, "total_tokens": 10592384} +{"current_steps": 3375, "total_steps": 78105, "loss": 0.4837, "lr": 2.159774676737934e-06, "epoch": 0.2160553101594008, "percentage": 4.32, "elapsed_time": "0:07:37", "remaining_time": "2:48:49", "throughput": 23190.14, "total_tokens": 10608704} +{"current_steps": 3380, "total_steps": 78105, "loss": 0.4391, "lr": 2.1629752912559214e-06, "epoch": 0.2163753921003777, "percentage": 4.33, "elapsed_time": "0:07:38", "remaining_time": "2:48:49", "throughput": 23190.49, "total_tokens": 10625280} +{"current_steps": 3385, "total_steps": 78105, "loss": 0.5326, "lr": 2.166175905773909e-06, "epoch": 0.21669547404135459, "percentage": 4.33, "elapsed_time": "0:07:38", "remaining_time": "2:48:48", "throughput": 23190.91, "total_tokens": 10641152} +{"current_steps": 3390, "total_steps": 78105, "loss": 0.4427, "lr": 2.1693765202918963e-06, "epoch": 0.21701555598233147, "percentage": 4.34, "elapsed_time": "0:07:39", "remaining_time": "2:48:48", "throughput": 23190.96, "total_tokens": 10657472} +{"current_steps": 3395, "total_steps": 78105, "loss": 0.529, "lr": 2.172577134809884e-06, "epoch": 0.21733563792330837, "percentage": 4.35, "elapsed_time": "0:07:40", "remaining_time": "2:48:47", "throughput": 23191.4, "total_tokens": 10673600} +{"current_steps": 3400, "total_steps": 78105, "loss": 0.54, "lr": 2.1757777493278713e-06, "epoch": 0.21765571986428525, "percentage": 4.35, "elapsed_time": "0:07:40", "remaining_time": "2:48:47", "throughput": 23191.26, "total_tokens": 10689344} +{"current_steps": 3405, "total_steps": 78105, "loss": 0.5031, "lr": 2.1789783638458587e-06, "epoch": 0.21797580180526216, "percentage": 4.36, "elapsed_time": "0:07:41", "remaining_time": "2:48:46", "throughput": 23190.73, "total_tokens": 10704384} +{"current_steps": 3410, "total_steps": 78105, "loss": 0.4983, "lr": 2.182178978363846e-06, "epoch": 0.21829588374623904, "percentage": 4.37, "elapsed_time": "0:07:42", "remaining_time": "2:48:45", "throughput": 23191.29, "total_tokens": 10720512} +{"current_steps": 3415, "total_steps": 78105, "loss": 0.5506, "lr": 2.1853795928818337e-06, "epoch": 0.21861596568721592, "percentage": 4.37, "elapsed_time": "0:07:42", "remaining_time": "2:48:44", "throughput": 23190.89, "total_tokens": 10735744} +{"current_steps": 3420, "total_steps": 78105, "loss": 0.5132, "lr": 2.188580207399821e-06, "epoch": 0.21893604762819283, "percentage": 4.38, "elapsed_time": "0:07:43", "remaining_time": "2:48:43", "throughput": 23190.53, "total_tokens": 10750848} +{"current_steps": 3425, "total_steps": 78105, "loss": 0.5671, "lr": 2.191780821917808e-06, "epoch": 0.2192561295691697, "percentage": 4.39, "elapsed_time": "0:07:44", "remaining_time": "2:48:42", "throughput": 23189.64, "total_tokens": 10765632} +{"current_steps": 3430, "total_steps": 78105, "loss": 0.451, "lr": 2.1949814364357956e-06, "epoch": 0.21957621151014659, "percentage": 4.39, "elapsed_time": "0:07:44", "remaining_time": "2:48:41", "throughput": 23189.01, "total_tokens": 10780928} +{"current_steps": 3435, "total_steps": 78105, "loss": 0.4231, "lr": 2.198182050953783e-06, "epoch": 0.2198962934511235, "percentage": 4.4, "elapsed_time": "0:07:45", "remaining_time": "2:48:41", "throughput": 23189.03, "total_tokens": 10796864} +{"current_steps": 3440, "total_steps": 78105, "loss": 0.4668, "lr": 2.2013826654717706e-06, "epoch": 0.22021637539210037, "percentage": 4.4, "elapsed_time": "0:07:46", "remaining_time": "2:48:40", "throughput": 23189.57, "total_tokens": 10812672} +{"current_steps": 3445, "total_steps": 78105, "loss": 0.4731, "lr": 2.204583279989758e-06, "epoch": 0.22053645733307728, "percentage": 4.41, "elapsed_time": "0:07:46", "remaining_time": "2:48:39", "throughput": 23189.94, "total_tokens": 10828544} +{"current_steps": 3450, "total_steps": 78105, "loss": 0.5998, "lr": 2.2077838945077455e-06, "epoch": 0.22085653927405416, "percentage": 4.42, "elapsed_time": "0:07:47", "remaining_time": "2:48:38", "throughput": 23190.21, "total_tokens": 10844288} +{"current_steps": 3455, "total_steps": 78105, "loss": 0.5434, "lr": 2.210984509025733e-06, "epoch": 0.22117662121503104, "percentage": 4.42, "elapsed_time": "0:07:48", "remaining_time": "2:48:38", "throughput": 23190.37, "total_tokens": 10860160} +{"current_steps": 3460, "total_steps": 78105, "loss": 0.6148, "lr": 2.2141851235437204e-06, "epoch": 0.22149670315600795, "percentage": 4.43, "elapsed_time": "0:07:48", "remaining_time": "2:48:37", "throughput": 23190.27, "total_tokens": 10875968} +{"current_steps": 3465, "total_steps": 78105, "loss": 0.3782, "lr": 2.217385738061708e-06, "epoch": 0.22181678509698483, "percentage": 4.44, "elapsed_time": "0:07:49", "remaining_time": "2:48:37", "throughput": 23190.67, "total_tokens": 10892096} +{"current_steps": 3470, "total_steps": 78105, "loss": 0.441, "lr": 2.2205863525796954e-06, "epoch": 0.2221368670379617, "percentage": 4.44, "elapsed_time": "0:07:50", "remaining_time": "2:48:37", "throughput": 23191.08, "total_tokens": 10908544} +{"current_steps": 3475, "total_steps": 78105, "loss": 0.515, "lr": 2.223786967097683e-06, "epoch": 0.2224569489789386, "percentage": 4.45, "elapsed_time": "0:07:51", "remaining_time": "2:48:36", "throughput": 23191.43, "total_tokens": 10924544} +{"current_steps": 3480, "total_steps": 78105, "loss": 0.4791, "lr": 2.2269875816156703e-06, "epoch": 0.2227770309199155, "percentage": 4.46, "elapsed_time": "0:07:51", "remaining_time": "2:48:36", "throughput": 23192.53, "total_tokens": 10941888} +{"current_steps": 3485, "total_steps": 78105, "loss": 0.4866, "lr": 2.2301881961336578e-06, "epoch": 0.2230971128608924, "percentage": 4.46, "elapsed_time": "0:07:52", "remaining_time": "2:48:36", "throughput": 23193.35, "total_tokens": 10958592} +{"current_steps": 3490, "total_steps": 78105, "loss": 0.664, "lr": 2.2333888106516453e-06, "epoch": 0.22341719480186928, "percentage": 4.47, "elapsed_time": "0:07:53", "remaining_time": "2:48:36", "throughput": 23193.13, "total_tokens": 10974208} +{"current_steps": 3495, "total_steps": 78105, "loss": 0.4307, "lr": 2.2365894251696327e-06, "epoch": 0.22373727674284616, "percentage": 4.47, "elapsed_time": "0:07:53", "remaining_time": "2:48:34", "throughput": 23192.33, "total_tokens": 10988928} +{"current_steps": 3500, "total_steps": 78105, "loss": 0.4749, "lr": 2.23979003968762e-06, "epoch": 0.22405735868382307, "percentage": 4.48, "elapsed_time": "0:07:54", "remaining_time": "2:48:34", "throughput": 23193.52, "total_tokens": 11005952} +{"current_steps": 3505, "total_steps": 78105, "loss": 0.4606, "lr": 2.2429906542056077e-06, "epoch": 0.22437744062479995, "percentage": 4.49, "elapsed_time": "0:07:55", "remaining_time": "2:48:33", "throughput": 23192.77, "total_tokens": 11020608} +{"current_steps": 3510, "total_steps": 78105, "loss": 0.4978, "lr": 2.246191268723595e-06, "epoch": 0.22469752256577685, "percentage": 4.49, "elapsed_time": "0:07:55", "remaining_time": "2:48:33", "throughput": 23193.24, "total_tokens": 11036736} +{"current_steps": 3515, "total_steps": 78105, "loss": 0.3839, "lr": 2.2493918832415826e-06, "epoch": 0.22501760450675373, "percentage": 4.5, "elapsed_time": "0:07:56", "remaining_time": "2:48:32", "throughput": 23193.64, "total_tokens": 11052480} +{"current_steps": 3520, "total_steps": 78105, "loss": 0.5269, "lr": 2.25259249775957e-06, "epoch": 0.2253376864477306, "percentage": 4.51, "elapsed_time": "0:07:57", "remaining_time": "2:48:31", "throughput": 23193.98, "total_tokens": 11068416} +{"current_steps": 3525, "total_steps": 78105, "loss": 0.464, "lr": 2.2557931122775575e-06, "epoch": 0.22565776838870752, "percentage": 4.51, "elapsed_time": "0:07:57", "remaining_time": "2:48:32", "throughput": 23195.77, "total_tokens": 11086400} +{"current_steps": 3530, "total_steps": 78105, "loss": 0.574, "lr": 2.258993726795545e-06, "epoch": 0.2259778503296844, "percentage": 4.52, "elapsed_time": "0:07:58", "remaining_time": "2:48:32", "throughput": 23196.11, "total_tokens": 11102848} +{"current_steps": 3535, "total_steps": 78105, "loss": 0.5355, "lr": 2.2621943413135325e-06, "epoch": 0.22629793227066128, "percentage": 4.53, "elapsed_time": "0:07:59", "remaining_time": "2:48:30", "throughput": 23195.9, "total_tokens": 11118080} +{"current_steps": 3540, "total_steps": 78105, "loss": 0.5444, "lr": 2.26539495583152e-06, "epoch": 0.22661801421163819, "percentage": 4.53, "elapsed_time": "0:07:59", "remaining_time": "2:48:29", "throughput": 23195.46, "total_tokens": 11133120} +{"current_steps": 3545, "total_steps": 78105, "loss": 0.4621, "lr": 2.268595570349507e-06, "epoch": 0.22693809615261507, "percentage": 4.54, "elapsed_time": "0:08:00", "remaining_time": "2:48:30", "throughput": 23196.09, "total_tokens": 11150144} +{"current_steps": 3550, "total_steps": 78105, "loss": 0.5036, "lr": 2.271796184867495e-06, "epoch": 0.22725817809359197, "percentage": 4.55, "elapsed_time": "0:08:01", "remaining_time": "2:48:29", "throughput": 23195.83, "total_tokens": 11165760} +{"current_steps": 3555, "total_steps": 78105, "loss": 0.4894, "lr": 2.2749967993854823e-06, "epoch": 0.22757826003456885, "percentage": 4.55, "elapsed_time": "0:08:02", "remaining_time": "2:48:27", "throughput": 23195.05, "total_tokens": 11180096} +{"current_steps": 3560, "total_steps": 78105, "loss": 0.396, "lr": 2.27819741390347e-06, "epoch": 0.22789834197554573, "percentage": 4.56, "elapsed_time": "0:08:02", "remaining_time": "2:48:27", "throughput": 23194.31, "total_tokens": 11195584} +{"current_steps": 3565, "total_steps": 78105, "loss": 0.4426, "lr": 2.2813980284214573e-06, "epoch": 0.22821842391652264, "percentage": 4.56, "elapsed_time": "0:08:03", "remaining_time": "2:48:26", "throughput": 23194.12, "total_tokens": 11210944} +{"current_steps": 3570, "total_steps": 78105, "loss": 0.428, "lr": 2.2845986429394447e-06, "epoch": 0.22853850585749952, "percentage": 4.57, "elapsed_time": "0:08:04", "remaining_time": "2:48:26", "throughput": 23194.59, "total_tokens": 11227392} +{"current_steps": 3575, "total_steps": 78105, "loss": 0.6618, "lr": 2.287799257457432e-06, "epoch": 0.2288585877984764, "percentage": 4.58, "elapsed_time": "0:08:04", "remaining_time": "2:48:26", "throughput": 23195.34, "total_tokens": 11244416} +{"current_steps": 3580, "total_steps": 78105, "loss": 0.4212, "lr": 2.2909998719754197e-06, "epoch": 0.2291786697394533, "percentage": 4.58, "elapsed_time": "0:08:05", "remaining_time": "2:48:25", "throughput": 23195.58, "total_tokens": 11260672} +{"current_steps": 3585, "total_steps": 78105, "loss": 0.5228, "lr": 2.294200486493407e-06, "epoch": 0.22949875168043019, "percentage": 4.59, "elapsed_time": "0:08:06", "remaining_time": "2:48:25", "throughput": 23196.36, "total_tokens": 11277504} +{"current_steps": 3590, "total_steps": 78105, "loss": 0.5973, "lr": 2.2974011010113946e-06, "epoch": 0.2298188336214071, "percentage": 4.6, "elapsed_time": "0:08:06", "remaining_time": "2:48:25", "throughput": 23196.08, "total_tokens": 11293184} +{"current_steps": 3595, "total_steps": 78105, "loss": 0.3263, "lr": 2.300601715529382e-06, "epoch": 0.23013891556238397, "percentage": 4.6, "elapsed_time": "0:08:07", "remaining_time": "2:48:24", "throughput": 23196.08, "total_tokens": 11308480} +{"current_steps": 3600, "total_steps": 78105, "loss": 0.4499, "lr": 2.3038023300473695e-06, "epoch": 0.23045899750336085, "percentage": 4.61, "elapsed_time": "0:08:08", "remaining_time": "2:48:23", "throughput": 23195.87, "total_tokens": 11323840} +{"current_steps": 3605, "total_steps": 78105, "loss": 0.6712, "lr": 2.3070029445653566e-06, "epoch": 0.23077907944433776, "percentage": 4.62, "elapsed_time": "0:08:08", "remaining_time": "2:48:22", "throughput": 23196.03, "total_tokens": 11339456} +{"current_steps": 3610, "total_steps": 78105, "loss": 0.6056, "lr": 2.310203559083344e-06, "epoch": 0.23109916138531464, "percentage": 4.62, "elapsed_time": "0:08:09", "remaining_time": "2:48:21", "throughput": 23195.61, "total_tokens": 11354880} +{"current_steps": 3615, "total_steps": 78105, "loss": 0.5216, "lr": 2.3134041736013315e-06, "epoch": 0.23141924332629152, "percentage": 4.63, "elapsed_time": "0:08:10", "remaining_time": "2:48:20", "throughput": 23195.37, "total_tokens": 11369984} +{"current_steps": 3620, "total_steps": 78105, "loss": 0.4563, "lr": 2.316604788119319e-06, "epoch": 0.23173932526726843, "percentage": 4.63, "elapsed_time": "0:08:10", "remaining_time": "2:48:19", "throughput": 23194.93, "total_tokens": 11384640} +{"current_steps": 3625, "total_steps": 78105, "loss": 0.4994, "lr": 2.3198054026373064e-06, "epoch": 0.2320594072082453, "percentage": 4.64, "elapsed_time": "0:08:11", "remaining_time": "2:48:18", "throughput": 23195.51, "total_tokens": 11401152} +{"current_steps": 3630, "total_steps": 78105, "loss": 0.4981, "lr": 2.323006017155294e-06, "epoch": 0.2323794891492222, "percentage": 4.65, "elapsed_time": "0:08:12", "remaining_time": "2:48:18", "throughput": 23195.55, "total_tokens": 11416896} +{"current_steps": 3635, "total_steps": 78105, "loss": 0.5016, "lr": 2.3262066316732814e-06, "epoch": 0.2326995710901991, "percentage": 4.65, "elapsed_time": "0:08:12", "remaining_time": "2:48:17", "throughput": 23195.83, "total_tokens": 11432832} +{"current_steps": 3640, "total_steps": 78105, "loss": 0.5421, "lr": 2.329407246191269e-06, "epoch": 0.23301965303117597, "percentage": 4.66, "elapsed_time": "0:08:13", "remaining_time": "2:48:16", "throughput": 23195.65, "total_tokens": 11448320} +{"current_steps": 3645, "total_steps": 78105, "loss": 0.6042, "lr": 2.3326078607092563e-06, "epoch": 0.23333973497215288, "percentage": 4.67, "elapsed_time": "0:08:14", "remaining_time": "2:48:15", "throughput": 23195.16, "total_tokens": 11463488} +{"current_steps": 3650, "total_steps": 78105, "loss": 0.4562, "lr": 2.3358084752272438e-06, "epoch": 0.23365981691312976, "percentage": 4.67, "elapsed_time": "0:08:14", "remaining_time": "2:48:14", "throughput": 23194.94, "total_tokens": 11478592} +{"current_steps": 3655, "total_steps": 78105, "loss": 0.4173, "lr": 2.3390090897452312e-06, "epoch": 0.23397989885410664, "percentage": 4.68, "elapsed_time": "0:08:15", "remaining_time": "2:48:14", "throughput": 23194.77, "total_tokens": 11494720} +{"current_steps": 3660, "total_steps": 78105, "loss": 0.5285, "lr": 2.3422097042632187e-06, "epoch": 0.23429998079508355, "percentage": 4.69, "elapsed_time": "0:08:16", "remaining_time": "2:48:14", "throughput": 23194.91, "total_tokens": 11511232} +{"current_steps": 3665, "total_steps": 78105, "loss": 0.5821, "lr": 2.345410318781206e-06, "epoch": 0.23462006273606043, "percentage": 4.69, "elapsed_time": "0:08:16", "remaining_time": "2:48:13", "throughput": 23194.96, "total_tokens": 11527040} +{"current_steps": 3670, "total_steps": 78105, "loss": 0.5213, "lr": 2.3486109332991936e-06, "epoch": 0.23494014467703733, "percentage": 4.7, "elapsed_time": "0:08:17", "remaining_time": "2:48:12", "throughput": 23194.94, "total_tokens": 11542528} +{"current_steps": 3675, "total_steps": 78105, "loss": 0.4849, "lr": 2.351811547817181e-06, "epoch": 0.2352602266180142, "percentage": 4.71, "elapsed_time": "0:08:18", "remaining_time": "2:48:11", "throughput": 23194.61, "total_tokens": 11557696} +{"current_steps": 3680, "total_steps": 78105, "loss": 0.4685, "lr": 2.3550121623351686e-06, "epoch": 0.2355803085589911, "percentage": 4.71, "elapsed_time": "0:08:19", "remaining_time": "2:48:12", "throughput": 23195.44, "total_tokens": 11574848} +{"current_steps": 3685, "total_steps": 78105, "loss": 0.531, "lr": 2.358212776853156e-06, "epoch": 0.235900390499968, "percentage": 4.72, "elapsed_time": "0:08:19", "remaining_time": "2:48:10", "throughput": 23194.89, "total_tokens": 11589696} +{"current_steps": 3690, "total_steps": 78105, "loss": 0.4164, "lr": 2.3614133913711435e-06, "epoch": 0.23622047244094488, "percentage": 4.72, "elapsed_time": "0:08:20", "remaining_time": "2:48:11", "throughput": 23195.66, "total_tokens": 11606848} +{"current_steps": 3695, "total_steps": 78105, "loss": 0.7326, "lr": 2.364614005889131e-06, "epoch": 0.23654055438192176, "percentage": 4.73, "elapsed_time": "0:08:21", "remaining_time": "2:48:11", "throughput": 23196.72, "total_tokens": 11623744} +{"current_steps": 3700, "total_steps": 78105, "loss": 0.5326, "lr": 2.3678146204071184e-06, "epoch": 0.23686063632289867, "percentage": 4.74, "elapsed_time": "0:08:21", "remaining_time": "2:48:10", "throughput": 23197.16, "total_tokens": 11640256} +{"current_steps": 3705, "total_steps": 78105, "loss": 0.4121, "lr": 2.371015234925106e-06, "epoch": 0.23718071826387555, "percentage": 4.74, "elapsed_time": "0:08:22", "remaining_time": "2:48:10", "throughput": 23197.0, "total_tokens": 11655744} +{"current_steps": 3710, "total_steps": 78105, "loss": 0.3082, "lr": 2.3742158494430934e-06, "epoch": 0.23750080020485245, "percentage": 4.75, "elapsed_time": "0:08:23", "remaining_time": "2:48:09", "throughput": 23197.09, "total_tokens": 11671104} +{"current_steps": 3715, "total_steps": 78105, "loss": 0.4167, "lr": 2.377416463961081e-06, "epoch": 0.23782088214582933, "percentage": 4.76, "elapsed_time": "0:08:23", "remaining_time": "2:48:08", "throughput": 23197.43, "total_tokens": 11686848} +{"current_steps": 3720, "total_steps": 78105, "loss": 0.5506, "lr": 2.3806170784790683e-06, "epoch": 0.2381409640868062, "percentage": 4.76, "elapsed_time": "0:08:24", "remaining_time": "2:48:07", "throughput": 23197.18, "total_tokens": 11701952} +{"current_steps": 3725, "total_steps": 78105, "loss": 0.5445, "lr": 2.3838176929970554e-06, "epoch": 0.23846104602778312, "percentage": 4.77, "elapsed_time": "0:08:25", "remaining_time": "2:48:06", "throughput": 23197.81, "total_tokens": 11718592} +{"current_steps": 3730, "total_steps": 78105, "loss": 0.5111, "lr": 2.387018307515043e-06, "epoch": 0.23878112796876, "percentage": 4.78, "elapsed_time": "0:08:25", "remaining_time": "2:48:06", "throughput": 23197.87, "total_tokens": 11734208} +{"current_steps": 3735, "total_steps": 78105, "loss": 0.4335, "lr": 2.3902189220330303e-06, "epoch": 0.2391012099097369, "percentage": 4.78, "elapsed_time": "0:08:26", "remaining_time": "2:48:04", "throughput": 23197.47, "total_tokens": 11748992} +{"current_steps": 3740, "total_steps": 78105, "loss": 0.4791, "lr": 2.3934195365510178e-06, "epoch": 0.23942129185071379, "percentage": 4.79, "elapsed_time": "0:08:27", "remaining_time": "2:48:04", "throughput": 23197.47, "total_tokens": 11764608} +{"current_steps": 3745, "total_steps": 78105, "loss": 0.5135, "lr": 2.3966201510690052e-06, "epoch": 0.23974137379169067, "percentage": 4.79, "elapsed_time": "0:08:27", "remaining_time": "2:48:03", "throughput": 23197.86, "total_tokens": 11780800} +{"current_steps": 3750, "total_steps": 78105, "loss": 0.5004, "lr": 2.3998207655869927e-06, "epoch": 0.24006145573266757, "percentage": 4.8, "elapsed_time": "0:08:28", "remaining_time": "2:48:02", "throughput": 23197.5, "total_tokens": 11795776} +{"current_steps": 3755, "total_steps": 78105, "loss": 0.5629, "lr": 2.40302138010498e-06, "epoch": 0.24038153767364445, "percentage": 4.81, "elapsed_time": "0:08:29", "remaining_time": "2:48:01", "throughput": 23197.72, "total_tokens": 11811776} +{"current_steps": 3760, "total_steps": 78105, "loss": 0.5444, "lr": 2.406221994622968e-06, "epoch": 0.24070161961462133, "percentage": 4.81, "elapsed_time": "0:08:29", "remaining_time": "2:48:01", "throughput": 23198.07, "total_tokens": 11828224} +{"current_steps": 3765, "total_steps": 78105, "loss": 0.4867, "lr": 2.4094226091409555e-06, "epoch": 0.24102170155559824, "percentage": 4.82, "elapsed_time": "0:08:30", "remaining_time": "2:48:01", "throughput": 23197.63, "total_tokens": 11843904} +{"current_steps": 3770, "total_steps": 78105, "loss": 0.6049, "lr": 2.412623223658943e-06, "epoch": 0.24134178349657512, "percentage": 4.83, "elapsed_time": "0:08:31", "remaining_time": "2:48:00", "throughput": 23197.49, "total_tokens": 11859392} +{"current_steps": 3775, "total_steps": 78105, "loss": 0.5397, "lr": 2.4158238381769305e-06, "epoch": 0.24166186543755203, "percentage": 4.83, "elapsed_time": "0:08:31", "remaining_time": "2:48:00", "throughput": 23198.11, "total_tokens": 11875968} +{"current_steps": 3780, "total_steps": 78105, "loss": 0.4635, "lr": 2.419024452694918e-06, "epoch": 0.2419819473785289, "percentage": 4.84, "elapsed_time": "0:08:32", "remaining_time": "2:47:59", "throughput": 23197.73, "total_tokens": 11891584} +{"current_steps": 3785, "total_steps": 78105, "loss": 0.4962, "lr": 2.422225067212905e-06, "epoch": 0.24230202931950579, "percentage": 4.85, "elapsed_time": "0:08:33", "remaining_time": "2:47:58", "throughput": 23197.52, "total_tokens": 11906944} +{"current_steps": 3790, "total_steps": 78105, "loss": 0.5116, "lr": 2.4254256817308924e-06, "epoch": 0.2426221112604827, "percentage": 4.85, "elapsed_time": "0:08:33", "remaining_time": "2:47:57", "throughput": 23197.42, "total_tokens": 11922368} +{"current_steps": 3795, "total_steps": 78105, "loss": 0.5673, "lr": 2.42862629624888e-06, "epoch": 0.24294219320145957, "percentage": 4.86, "elapsed_time": "0:08:34", "remaining_time": "2:47:57", "throughput": 23197.81, "total_tokens": 11938432} +{"current_steps": 3800, "total_steps": 78105, "loss": 0.5269, "lr": 2.4318269107668674e-06, "epoch": 0.24326227514243645, "percentage": 4.87, "elapsed_time": "0:08:35", "remaining_time": "2:47:57", "throughput": 23199.39, "total_tokens": 11956608} +{"current_steps": 3805, "total_steps": 78105, "loss": 0.4994, "lr": 2.435027525284855e-06, "epoch": 0.24358235708341336, "percentage": 4.87, "elapsed_time": "0:08:36", "remaining_time": "2:47:57", "throughput": 23198.86, "total_tokens": 11972480} +{"current_steps": 3810, "total_steps": 78105, "loss": 0.4374, "lr": 2.4382281398028423e-06, "epoch": 0.24390243902439024, "percentage": 4.88, "elapsed_time": "0:08:36", "remaining_time": "2:47:56", "throughput": 23197.87, "total_tokens": 11987072} +{"current_steps": 3815, "total_steps": 78105, "loss": 0.4759, "lr": 2.4414287543208298e-06, "epoch": 0.24422252096536715, "percentage": 4.88, "elapsed_time": "0:08:37", "remaining_time": "2:47:55", "throughput": 23197.2, "total_tokens": 12002112} +{"current_steps": 3820, "total_steps": 78105, "loss": 0.3848, "lr": 2.4446293688388172e-06, "epoch": 0.24454260290634403, "percentage": 4.89, "elapsed_time": "0:08:38", "remaining_time": "2:47:54", "throughput": 23196.31, "total_tokens": 12016960} +{"current_steps": 3825, "total_steps": 78105, "loss": 0.4593, "lr": 2.4478299833568047e-06, "epoch": 0.2448626848473209, "percentage": 4.9, "elapsed_time": "0:08:38", "remaining_time": "2:47:53", "throughput": 23196.34, "total_tokens": 12032576} +{"current_steps": 3830, "total_steps": 78105, "loss": 0.3892, "lr": 2.451030597874792e-06, "epoch": 0.2451827667882978, "percentage": 4.9, "elapsed_time": "0:08:39", "remaining_time": "2:47:53", "throughput": 23196.58, "total_tokens": 12048768} +{"current_steps": 3835, "total_steps": 78105, "loss": 0.4523, "lr": 2.4542312123927796e-06, "epoch": 0.2455028487292747, "percentage": 4.91, "elapsed_time": "0:08:40", "remaining_time": "2:47:52", "throughput": 23197.29, "total_tokens": 12065088} +{"current_steps": 3840, "total_steps": 78105, "loss": 0.6496, "lr": 2.457431826910767e-06, "epoch": 0.24582293067025157, "percentage": 4.92, "elapsed_time": "0:08:40", "remaining_time": "2:47:51", "throughput": 23197.33, "total_tokens": 12080704} +{"current_steps": 3845, "total_steps": 78105, "loss": 0.4849, "lr": 2.4606324414287546e-06, "epoch": 0.24614301261122848, "percentage": 4.92, "elapsed_time": "0:08:41", "remaining_time": "2:47:51", "throughput": 23197.42, "total_tokens": 12096704} +{"current_steps": 3850, "total_steps": 78105, "loss": 0.5674, "lr": 2.463833055946742e-06, "epoch": 0.24646309455220536, "percentage": 4.93, "elapsed_time": "0:08:42", "remaining_time": "2:47:50", "throughput": 23196.77, "total_tokens": 12111488} +{"current_steps": 3855, "total_steps": 78105, "loss": 0.523, "lr": 2.4670336704647295e-06, "epoch": 0.24678317649318227, "percentage": 4.94, "elapsed_time": "0:08:42", "remaining_time": "2:47:49", "throughput": 23197.76, "total_tokens": 12128192} +{"current_steps": 3860, "total_steps": 78105, "loss": 0.4971, "lr": 2.470234284982717e-06, "epoch": 0.24710325843415915, "percentage": 4.94, "elapsed_time": "0:08:43", "remaining_time": "2:47:49", "throughput": 23197.61, "total_tokens": 12143808} +{"current_steps": 3865, "total_steps": 78105, "loss": 0.5054, "lr": 2.4734348995007044e-06, "epoch": 0.24742334037513602, "percentage": 4.95, "elapsed_time": "0:08:44", "remaining_time": "2:47:48", "throughput": 23197.05, "total_tokens": 12159232} +{"current_steps": 3870, "total_steps": 78105, "loss": 0.4884, "lr": 2.476635514018692e-06, "epoch": 0.24774342231611293, "percentage": 4.95, "elapsed_time": "0:08:44", "remaining_time": "2:47:48", "throughput": 23197.23, "total_tokens": 12175360} +{"current_steps": 3875, "total_steps": 78105, "loss": 0.4883, "lr": 2.4798361285366794e-06, "epoch": 0.2480635042570898, "percentage": 4.96, "elapsed_time": "0:08:45", "remaining_time": "2:47:46", "throughput": 23196.88, "total_tokens": 12190272} +{"current_steps": 3880, "total_steps": 78105, "loss": 0.5172, "lr": 2.483036743054667e-06, "epoch": 0.2483835861980667, "percentage": 4.97, "elapsed_time": "0:08:46", "remaining_time": "2:47:47", "throughput": 23198.02, "total_tokens": 12208448} +{"current_steps": 3885, "total_steps": 78105, "loss": 0.7252, "lr": 2.4862373575726543e-06, "epoch": 0.2487036681390436, "percentage": 4.97, "elapsed_time": "0:08:47", "remaining_time": "2:47:48", "throughput": 23198.68, "total_tokens": 12225984} +{"current_steps": 3890, "total_steps": 78105, "loss": 0.5371, "lr": 2.4894379720906418e-06, "epoch": 0.24902375008002048, "percentage": 4.98, "elapsed_time": "0:08:47", "remaining_time": "2:47:47", "throughput": 23198.17, "total_tokens": 12241408} +{"current_steps": 3895, "total_steps": 78105, "loss": 0.4739, "lr": 2.4926385866086292e-06, "epoch": 0.24934383202099739, "percentage": 4.99, "elapsed_time": "0:08:48", "remaining_time": "2:47:46", "throughput": 23197.33, "total_tokens": 12256064} +{"current_steps": 3900, "total_steps": 78105, "loss": 0.4953, "lr": 2.4958392011266167e-06, "epoch": 0.24966391396197427, "percentage": 4.99, "elapsed_time": "0:08:49", "remaining_time": "2:47:46", "throughput": 23197.78, "total_tokens": 12272832} +{"current_steps": 3905, "total_steps": 78105, "loss": 0.6094, "lr": 2.4990398156446038e-06, "epoch": 0.24998399590295114, "percentage": 5.0, "elapsed_time": "0:08:49", "remaining_time": "2:47:45", "throughput": 23198.11, "total_tokens": 12289024} +{"current_steps": 3906, "total_steps": 78105, "eval_loss": 0.5013530850410461, "epoch": 0.25004801229114654, "percentage": 5.0, "elapsed_time": "0:09:40", "remaining_time": "3:03:56", "throughput": 21157.41, "total_tokens": 12292032} +{"current_steps": 3910, "total_steps": 78105, "loss": 0.4658, "lr": 2.5022404301625912e-06, "epoch": 0.25030407784392805, "percentage": 5.01, "elapsed_time": "0:10:46", "remaining_time": "3:24:31", "throughput": 19025.89, "total_tokens": 12304064} +{"current_steps": 3915, "total_steps": 78105, "loss": 0.5182, "lr": 2.5054410446805787e-06, "epoch": 0.25062415978490493, "percentage": 5.01, "elapsed_time": "0:10:47", "remaining_time": "3:24:28", "throughput": 19030.42, "total_tokens": 12320256} +{"current_steps": 3920, "total_steps": 78105, "loss": 0.5085, "lr": 2.508641659198566e-06, "epoch": 0.2509442417258818, "percentage": 5.02, "elapsed_time": "0:10:48", "remaining_time": "3:24:24", "throughput": 19034.26, "total_tokens": 12335360} +{"current_steps": 3925, "total_steps": 78105, "loss": 0.5237, "lr": 2.5118422737165536e-06, "epoch": 0.2512643236668587, "percentage": 5.03, "elapsed_time": "0:10:48", "remaining_time": "3:24:20", "throughput": 19038.94, "total_tokens": 12351488} +{"current_steps": 3930, "total_steps": 78105, "loss": 0.6624, "lr": 2.515042888234541e-06, "epoch": 0.2515844056078356, "percentage": 5.03, "elapsed_time": "0:10:49", "remaining_time": "3:24:16", "throughput": 19042.34, "total_tokens": 12365952} +{"current_steps": 3935, "total_steps": 78105, "loss": 0.5619, "lr": 2.5182435027525286e-06, "epoch": 0.2519044875488125, "percentage": 5.04, "elapsed_time": "0:10:50", "remaining_time": "3:24:12", "throughput": 19046.81, "total_tokens": 12381376} +{"current_steps": 3940, "total_steps": 78105, "loss": 0.4985, "lr": 2.521444117270516e-06, "epoch": 0.2522245694897894, "percentage": 5.04, "elapsed_time": "0:10:50", "remaining_time": "3:24:08", "throughput": 19050.62, "total_tokens": 12396160} +{"current_steps": 3945, "total_steps": 78105, "loss": 0.5166, "lr": 2.5246447317885035e-06, "epoch": 0.25254465143076626, "percentage": 5.05, "elapsed_time": "0:10:51", "remaining_time": "3:24:04", "throughput": 19054.3, "total_tokens": 12410944} +{"current_steps": 3950, "total_steps": 78105, "loss": 0.3987, "lr": 2.527845346306491e-06, "epoch": 0.25286473337174314, "percentage": 5.06, "elapsed_time": "0:10:52", "remaining_time": "3:24:00", "throughput": 19058.38, "total_tokens": 12426304} +{"current_steps": 3955, "total_steps": 78105, "loss": 0.4964, "lr": 2.5310459608244784e-06, "epoch": 0.2531848153127201, "percentage": 5.06, "elapsed_time": "0:10:52", "remaining_time": "3:23:57", "throughput": 19063.0, "total_tokens": 12442432} +{"current_steps": 3960, "total_steps": 78105, "loss": 0.523, "lr": 2.534246575342466e-06, "epoch": 0.25350489725369696, "percentage": 5.07, "elapsed_time": "0:10:53", "remaining_time": "3:23:53", "throughput": 19067.28, "total_tokens": 12458368} +{"current_steps": 3965, "total_steps": 78105, "loss": 0.4316, "lr": 2.5374471898604538e-06, "epoch": 0.25382497919467384, "percentage": 5.08, "elapsed_time": "0:10:54", "remaining_time": "3:23:50", "throughput": 19071.69, "total_tokens": 12474368} +{"current_steps": 3970, "total_steps": 78105, "loss": 0.5835, "lr": 2.5406478043784412e-06, "epoch": 0.2541450611356507, "percentage": 5.08, "elapsed_time": "0:10:54", "remaining_time": "3:23:46", "throughput": 19075.96, "total_tokens": 12490176} +{"current_steps": 3975, "total_steps": 78105, "loss": 0.6054, "lr": 2.5438484188964287e-06, "epoch": 0.2544651430766276, "percentage": 5.09, "elapsed_time": "0:10:55", "remaining_time": "3:23:44", "throughput": 19081.43, "total_tokens": 12507840} +{"current_steps": 3980, "total_steps": 78105, "loss": 0.4305, "lr": 2.547049033414416e-06, "epoch": 0.25478522501760453, "percentage": 5.1, "elapsed_time": "0:10:56", "remaining_time": "3:23:40", "throughput": 19085.48, "total_tokens": 12523520} +{"current_steps": 3985, "total_steps": 78105, "loss": 0.5948, "lr": 2.5502496479324037e-06, "epoch": 0.2551053069585814, "percentage": 5.1, "elapsed_time": "0:10:56", "remaining_time": "3:23:38", "throughput": 19090.73, "total_tokens": 12540736} +{"current_steps": 3990, "total_steps": 78105, "loss": 0.5259, "lr": 2.553450262450391e-06, "epoch": 0.2554253888995583, "percentage": 5.11, "elapsed_time": "0:10:57", "remaining_time": "3:23:34", "throughput": 19095.07, "total_tokens": 12556352} +{"current_steps": 3995, "total_steps": 78105, "loss": 0.3837, "lr": 2.5566508769683777e-06, "epoch": 0.25574547084053517, "percentage": 5.11, "elapsed_time": "0:10:58", "remaining_time": "3:23:30", "throughput": 19098.36, "total_tokens": 12570688} +{"current_steps": 4000, "total_steps": 78105, "loss": 0.4606, "lr": 2.559851491486365e-06, "epoch": 0.25606555278151205, "percentage": 5.12, "elapsed_time": "0:10:58", "remaining_time": "3:23:26", "throughput": 19102.41, "total_tokens": 12585984} +{"current_steps": 4005, "total_steps": 78105, "loss": 0.5053, "lr": 2.5630521060043527e-06, "epoch": 0.25638563472248893, "percentage": 5.13, "elapsed_time": "0:10:59", "remaining_time": "3:23:22", "throughput": 19106.45, "total_tokens": 12601216} +{"current_steps": 4010, "total_steps": 78105, "loss": 0.4721, "lr": 2.56625272052234e-06, "epoch": 0.25670571666346587, "percentage": 5.13, "elapsed_time": "0:11:00", "remaining_time": "3:23:19", "throughput": 19111.81, "total_tokens": 12618624} +{"current_steps": 4015, "total_steps": 78105, "loss": 0.6074, "lr": 2.5694533350403276e-06, "epoch": 0.25702579860444275, "percentage": 5.14, "elapsed_time": "0:11:00", "remaining_time": "3:23:17", "throughput": 19116.73, "total_tokens": 12635840} +{"current_steps": 4020, "total_steps": 78105, "loss": 0.4088, "lr": 2.572653949558315e-06, "epoch": 0.2573458805454196, "percentage": 5.15, "elapsed_time": "0:11:01", "remaining_time": "3:23:14", "throughput": 19121.05, "total_tokens": 12651904} +{"current_steps": 4025, "total_steps": 78105, "loss": 0.599, "lr": 2.5758545640763025e-06, "epoch": 0.2576659624863965, "percentage": 5.15, "elapsed_time": "0:11:02", "remaining_time": "3:23:10", "throughput": 19124.88, "total_tokens": 12666944} +{"current_steps": 4030, "total_steps": 78105, "loss": 0.4438, "lr": 2.5790551785942904e-06, "epoch": 0.2579860444273734, "percentage": 5.16, "elapsed_time": "0:11:03", "remaining_time": "3:23:06", "throughput": 19128.99, "total_tokens": 12682688} +{"current_steps": 4035, "total_steps": 78105, "loss": 0.5512, "lr": 2.582255793112278e-06, "epoch": 0.2583061263683503, "percentage": 5.17, "elapsed_time": "0:11:03", "remaining_time": "3:23:03", "throughput": 19133.2, "total_tokens": 12698304} +{"current_steps": 4040, "total_steps": 78105, "loss": 0.5183, "lr": 2.5854564076302654e-06, "epoch": 0.2586262083093272, "percentage": 5.17, "elapsed_time": "0:11:04", "remaining_time": "3:22:59", "throughput": 19136.55, "total_tokens": 12712896} +{"current_steps": 4045, "total_steps": 78105, "loss": 0.3824, "lr": 2.588657022148253e-06, "epoch": 0.2589462902503041, "percentage": 5.18, "elapsed_time": "0:11:04", "remaining_time": "3:22:54", "throughput": 19139.92, "total_tokens": 12727488} +{"current_steps": 4050, "total_steps": 78105, "loss": 0.3892, "lr": 2.5918576366662403e-06, "epoch": 0.25926637219128096, "percentage": 5.19, "elapsed_time": "0:11:05", "remaining_time": "3:22:51", "throughput": 19143.74, "total_tokens": 12742720} +{"current_steps": 4055, "total_steps": 78105, "loss": 0.4508, "lr": 2.5950582511842278e-06, "epoch": 0.25958645413225784, "percentage": 5.19, "elapsed_time": "0:11:06", "remaining_time": "3:22:47", "throughput": 19147.74, "total_tokens": 12757760} +{"current_steps": 4060, "total_steps": 78105, "loss": 0.5575, "lr": 2.5982588657022152e-06, "epoch": 0.2599065360732348, "percentage": 5.2, "elapsed_time": "0:11:06", "remaining_time": "3:22:43", "throughput": 19151.84, "total_tokens": 12773312} +{"current_steps": 4065, "total_steps": 78105, "loss": 0.6196, "lr": 2.6014594802202027e-06, "epoch": 0.26022661801421165, "percentage": 5.2, "elapsed_time": "0:11:07", "remaining_time": "3:22:40", "throughput": 19156.93, "total_tokens": 12790336} +{"current_steps": 4070, "total_steps": 78105, "loss": 0.4902, "lr": 2.60466009473819e-06, "epoch": 0.26054669995518853, "percentage": 5.21, "elapsed_time": "0:11:08", "remaining_time": "3:22:37", "throughput": 19160.43, "total_tokens": 12805632} +{"current_steps": 4075, "total_steps": 78105, "loss": 0.5747, "lr": 2.6078607092561776e-06, "epoch": 0.2608667818961654, "percentage": 5.22, "elapsed_time": "0:11:09", "remaining_time": "3:22:33", "throughput": 19164.47, "total_tokens": 12821184} +{"current_steps": 4080, "total_steps": 78105, "loss": 0.5308, "lr": 2.611061323774165e-06, "epoch": 0.2611868638371423, "percentage": 5.22, "elapsed_time": "0:11:09", "remaining_time": "3:22:30", "throughput": 19168.18, "total_tokens": 12836544} +{"current_steps": 4085, "total_steps": 78105, "loss": 0.5001, "lr": 2.6142619382921526e-06, "epoch": 0.26150694577811917, "percentage": 5.23, "elapsed_time": "0:11:10", "remaining_time": "3:22:26", "throughput": 19172.05, "total_tokens": 12852032} +{"current_steps": 4090, "total_steps": 78105, "loss": 0.4556, "lr": 2.61746255281014e-06, "epoch": 0.2618270277190961, "percentage": 5.24, "elapsed_time": "0:11:11", "remaining_time": "3:22:23", "throughput": 19175.87, "total_tokens": 12867456} +{"current_steps": 4095, "total_steps": 78105, "loss": 0.3812, "lr": 2.6206631673281275e-06, "epoch": 0.262147109660073, "percentage": 5.24, "elapsed_time": "0:11:11", "remaining_time": "3:22:20", "throughput": 19181.45, "total_tokens": 12885184} +{"current_steps": 4100, "total_steps": 78105, "loss": 0.4032, "lr": 2.623863781846115e-06, "epoch": 0.26246719160104987, "percentage": 5.25, "elapsed_time": "0:11:12", "remaining_time": "3:22:16", "throughput": 19185.4, "total_tokens": 12900416} +{"current_steps": 4105, "total_steps": 78105, "loss": 0.6657, "lr": 2.6270643963641024e-06, "epoch": 0.26278727354202674, "percentage": 5.26, "elapsed_time": "0:11:13", "remaining_time": "3:22:13", "throughput": 19189.26, "total_tokens": 12915648} +{"current_steps": 4110, "total_steps": 78105, "loss": 0.4769, "lr": 2.63026501088209e-06, "epoch": 0.2631073554830036, "percentage": 5.26, "elapsed_time": "0:11:13", "remaining_time": "3:22:09", "throughput": 19192.8, "total_tokens": 12930368} +{"current_steps": 4115, "total_steps": 78105, "loss": 0.5281, "lr": 2.633465625400077e-06, "epoch": 0.26342743742398056, "percentage": 5.27, "elapsed_time": "0:11:14", "remaining_time": "3:22:05", "throughput": 19196.66, "total_tokens": 12945472} +{"current_steps": 4120, "total_steps": 78105, "loss": 0.5905, "lr": 2.6366662399180644e-06, "epoch": 0.26374751936495744, "percentage": 5.27, "elapsed_time": "0:11:15", "remaining_time": "3:22:02", "throughput": 19200.65, "total_tokens": 12961472} +{"current_steps": 4125, "total_steps": 78105, "loss": 0.6382, "lr": 2.639866854436052e-06, "epoch": 0.2640676013059343, "percentage": 5.28, "elapsed_time": "0:11:15", "remaining_time": "3:21:58", "throughput": 19203.93, "total_tokens": 12975872} +{"current_steps": 4130, "total_steps": 78105, "loss": 0.5455, "lr": 2.6430674689540394e-06, "epoch": 0.2643876832469112, "percentage": 5.29, "elapsed_time": "0:11:16", "remaining_time": "3:21:55", "throughput": 19208.31, "total_tokens": 12992128} +{"current_steps": 4135, "total_steps": 78105, "loss": 0.4478, "lr": 2.646268083472027e-06, "epoch": 0.2647077651878881, "percentage": 5.29, "elapsed_time": "0:11:17", "remaining_time": "3:22:00", "throughput": 19217.74, "total_tokens": 13021184} +{"current_steps": 4140, "total_steps": 78105, "loss": 0.4527, "lr": 2.6494686979900143e-06, "epoch": 0.265027847128865, "percentage": 5.3, "elapsed_time": "0:11:18", "remaining_time": "3:21:57", "throughput": 19222.13, "total_tokens": 13037568} +{"current_steps": 4145, "total_steps": 78105, "loss": 0.5069, "lr": 2.6526693125080018e-06, "epoch": 0.2653479290698419, "percentage": 5.31, "elapsed_time": "0:11:18", "remaining_time": "3:21:54", "throughput": 19226.19, "total_tokens": 13053440} +{"current_steps": 4150, "total_steps": 78105, "loss": 0.6118, "lr": 2.6558699270259892e-06, "epoch": 0.26566801101081877, "percentage": 5.31, "elapsed_time": "0:11:19", "remaining_time": "3:21:51", "throughput": 19230.33, "total_tokens": 13069440} +{"current_steps": 4155, "total_steps": 78105, "loss": 0.5617, "lr": 2.6590705415439767e-06, "epoch": 0.26598809295179565, "percentage": 5.32, "elapsed_time": "0:11:20", "remaining_time": "3:21:47", "throughput": 19233.72, "total_tokens": 13084224} +{"current_steps": 4160, "total_steps": 78105, "loss": 0.7227, "lr": 2.662271156061964e-06, "epoch": 0.26630817489277253, "percentage": 5.33, "elapsed_time": "0:11:20", "remaining_time": "3:21:43", "throughput": 19237.01, "total_tokens": 13098688} +{"current_steps": 4165, "total_steps": 78105, "loss": 0.4641, "lr": 2.6654717705799516e-06, "epoch": 0.26662825683374947, "percentage": 5.33, "elapsed_time": "0:11:21", "remaining_time": "3:21:39", "throughput": 19240.66, "total_tokens": 13114112} +{"current_steps": 4170, "total_steps": 78105, "loss": 0.4726, "lr": 2.668672385097939e-06, "epoch": 0.26694833877472635, "percentage": 5.34, "elapsed_time": "0:11:22", "remaining_time": "3:21:36", "throughput": 19245.0, "total_tokens": 13130240} +{"current_steps": 4175, "total_steps": 78105, "loss": 0.4111, "lr": 2.6718729996159266e-06, "epoch": 0.2672684207157032, "percentage": 5.35, "elapsed_time": "0:11:22", "remaining_time": "3:21:33", "throughput": 19248.71, "total_tokens": 13145536} +{"current_steps": 4180, "total_steps": 78105, "loss": 0.6038, "lr": 2.675073614133914e-06, "epoch": 0.2675885026566801, "percentage": 5.35, "elapsed_time": "0:11:23", "remaining_time": "3:21:29", "throughput": 19252.2, "total_tokens": 13160256} +{"current_steps": 4185, "total_steps": 78105, "loss": 0.5761, "lr": 2.6782742286519015e-06, "epoch": 0.267908584597657, "percentage": 5.36, "elapsed_time": "0:11:24", "remaining_time": "3:21:26", "throughput": 19256.92, "total_tokens": 13177088} +{"current_steps": 4190, "total_steps": 78105, "loss": 0.5292, "lr": 2.681474843169889e-06, "epoch": 0.26822866653863386, "percentage": 5.36, "elapsed_time": "0:11:24", "remaining_time": "3:21:23", "throughput": 19260.92, "total_tokens": 13192768} +{"current_steps": 4195, "total_steps": 78105, "loss": 0.5566, "lr": 2.6846754576878764e-06, "epoch": 0.2685487484796108, "percentage": 5.37, "elapsed_time": "0:11:25", "remaining_time": "3:21:19", "throughput": 19264.62, "total_tokens": 13208192} +{"current_steps": 4200, "total_steps": 78105, "loss": 0.603, "lr": 2.687876072205864e-06, "epoch": 0.2688688304205877, "percentage": 5.38, "elapsed_time": "0:11:26", "remaining_time": "3:21:16", "throughput": 19268.58, "total_tokens": 13223872} +{"current_steps": 4205, "total_steps": 78105, "loss": 0.5049, "lr": 2.6910766867238514e-06, "epoch": 0.26918891236156456, "percentage": 5.38, "elapsed_time": "0:11:26", "remaining_time": "3:21:12", "throughput": 19272.07, "total_tokens": 13239104} +{"current_steps": 4210, "total_steps": 78105, "loss": 0.3918, "lr": 2.694277301241839e-06, "epoch": 0.26950899430254144, "percentage": 5.39, "elapsed_time": "0:11:27", "remaining_time": "3:21:09", "throughput": 19275.94, "total_tokens": 13254464} +{"current_steps": 4215, "total_steps": 78105, "loss": 0.3768, "lr": 2.6974779157598263e-06, "epoch": 0.2698290762435183, "percentage": 5.4, "elapsed_time": "0:11:28", "remaining_time": "3:21:05", "throughput": 19279.69, "total_tokens": 13269824} +{"current_steps": 4220, "total_steps": 78105, "loss": 0.4682, "lr": 2.7006785302778138e-06, "epoch": 0.27014915818449525, "percentage": 5.4, "elapsed_time": "0:11:29", "remaining_time": "3:21:03", "throughput": 19285.38, "total_tokens": 13288448} +{"current_steps": 4225, "total_steps": 78105, "loss": 0.5645, "lr": 2.7038791447958012e-06, "epoch": 0.27046924012547213, "percentage": 5.41, "elapsed_time": "0:11:29", "remaining_time": "3:21:00", "throughput": 19289.27, "total_tokens": 13304320} +{"current_steps": 4230, "total_steps": 78105, "loss": 0.6966, "lr": 2.7070797593137883e-06, "epoch": 0.270789322066449, "percentage": 5.42, "elapsed_time": "0:11:30", "remaining_time": "3:20:58", "throughput": 19294.44, "total_tokens": 13321920} +{"current_steps": 4235, "total_steps": 78105, "loss": 0.4596, "lr": 2.7102803738317757e-06, "epoch": 0.2711094040074259, "percentage": 5.42, "elapsed_time": "0:11:31", "remaining_time": "3:20:55", "throughput": 19298.04, "total_tokens": 13337280} +{"current_steps": 4240, "total_steps": 78105, "loss": 0.4459, "lr": 2.713480988349763e-06, "epoch": 0.27142948594840277, "percentage": 5.43, "elapsed_time": "0:11:31", "remaining_time": "3:20:52", "throughput": 19302.42, "total_tokens": 13354112} +{"current_steps": 4245, "total_steps": 78105, "loss": 0.4562, "lr": 2.7166816028677507e-06, "epoch": 0.2717495678893797, "percentage": 5.43, "elapsed_time": "0:11:32", "remaining_time": "3:20:49", "throughput": 19305.86, "total_tokens": 13369600} +{"current_steps": 4250, "total_steps": 78105, "loss": 0.4506, "lr": 2.719882217385738e-06, "epoch": 0.2720696498303566, "percentage": 5.44, "elapsed_time": "0:11:33", "remaining_time": "3:20:46", "throughput": 19309.58, "total_tokens": 13385536} +{"current_steps": 4255, "total_steps": 78105, "loss": 0.4474, "lr": 2.7230828319037256e-06, "epoch": 0.27238973177133347, "percentage": 5.45, "elapsed_time": "0:11:33", "remaining_time": "3:20:42", "throughput": 19313.14, "total_tokens": 13400832} +{"current_steps": 4260, "total_steps": 78105, "loss": 0.3514, "lr": 2.726283446421713e-06, "epoch": 0.27270981371231034, "percentage": 5.45, "elapsed_time": "0:11:34", "remaining_time": "3:20:39", "throughput": 19316.65, "total_tokens": 13416000} +{"current_steps": 4265, "total_steps": 78105, "loss": 0.5444, "lr": 2.7294840609397005e-06, "epoch": 0.2730298956532872, "percentage": 5.46, "elapsed_time": "0:11:35", "remaining_time": "3:20:35", "throughput": 19320.31, "total_tokens": 13431360} +{"current_steps": 4270, "total_steps": 78105, "loss": 0.5426, "lr": 2.732684675457688e-06, "epoch": 0.2733499775942641, "percentage": 5.47, "elapsed_time": "0:11:35", "remaining_time": "3:20:32", "throughput": 19324.06, "total_tokens": 13446976} +{"current_steps": 4275, "total_steps": 78105, "loss": 0.4505, "lr": 2.7358852899756755e-06, "epoch": 0.27367005953524104, "percentage": 5.47, "elapsed_time": "0:11:36", "remaining_time": "3:20:29", "throughput": 19328.03, "total_tokens": 13463040} +{"current_steps": 4280, "total_steps": 78105, "loss": 0.7821, "lr": 2.739085904493663e-06, "epoch": 0.2739901414762179, "percentage": 5.48, "elapsed_time": "0:11:37", "remaining_time": "3:20:26", "throughput": 19331.7, "total_tokens": 13478656} +{"current_steps": 4285, "total_steps": 78105, "loss": 0.5213, "lr": 2.7422865190116504e-06, "epoch": 0.2743102234171948, "percentage": 5.49, "elapsed_time": "0:11:37", "remaining_time": "3:20:23", "throughput": 19335.72, "total_tokens": 13495296} +{"current_steps": 4290, "total_steps": 78105, "loss": 0.4987, "lr": 2.745487133529638e-06, "epoch": 0.2746303053581717, "percentage": 5.49, "elapsed_time": "0:11:38", "remaining_time": "3:20:20", "throughput": 19339.29, "total_tokens": 13510912} +{"current_steps": 4295, "total_steps": 78105, "loss": 0.4589, "lr": 2.7486877480476253e-06, "epoch": 0.27495038729914856, "percentage": 5.5, "elapsed_time": "0:11:39", "remaining_time": "3:20:17", "throughput": 19342.52, "total_tokens": 13525952} +{"current_steps": 4300, "total_steps": 78105, "loss": 0.5413, "lr": 2.751888362565613e-06, "epoch": 0.2752704692401255, "percentage": 5.51, "elapsed_time": "0:11:40", "remaining_time": "3:20:16", "throughput": 19349.13, "total_tokens": 13546752} +{"current_steps": 4305, "total_steps": 78105, "loss": 0.5601, "lr": 2.7550889770836003e-06, "epoch": 0.2755905511811024, "percentage": 5.51, "elapsed_time": "0:11:40", "remaining_time": "3:20:13", "throughput": 19352.35, "total_tokens": 13561728} +{"current_steps": 4310, "total_steps": 78105, "loss": 0.4906, "lr": 2.7582895916015877e-06, "epoch": 0.27591063312207925, "percentage": 5.52, "elapsed_time": "0:11:41", "remaining_time": "3:20:10", "throughput": 19355.77, "total_tokens": 13577344} +{"current_steps": 4315, "total_steps": 78105, "loss": 0.5657, "lr": 2.761490206119575e-06, "epoch": 0.27623071506305613, "percentage": 5.52, "elapsed_time": "0:11:42", "remaining_time": "3:20:06", "throughput": 19358.75, "total_tokens": 13592256} +{"current_steps": 4320, "total_steps": 78105, "loss": 0.5641, "lr": 2.7646908206375627e-06, "epoch": 0.276550797004033, "percentage": 5.53, "elapsed_time": "0:11:42", "remaining_time": "3:20:04", "throughput": 19362.93, "total_tokens": 13608832} +{"current_steps": 4325, "total_steps": 78105, "loss": 0.5632, "lr": 2.76789143515555e-06, "epoch": 0.27687087894500995, "percentage": 5.54, "elapsed_time": "0:11:43", "remaining_time": "3:20:01", "throughput": 19366.15, "total_tokens": 13624128} +{"current_steps": 4330, "total_steps": 78105, "loss": 0.5651, "lr": 2.7710920496735376e-06, "epoch": 0.2771909608859868, "percentage": 5.54, "elapsed_time": "0:11:44", "remaining_time": "3:19:58", "throughput": 19370.05, "total_tokens": 13640576} +{"current_steps": 4335, "total_steps": 78105, "loss": 0.5601, "lr": 2.774292664191525e-06, "epoch": 0.2775110428269637, "percentage": 5.55, "elapsed_time": "0:11:44", "remaining_time": "3:19:55", "throughput": 19373.26, "total_tokens": 13655872} +{"current_steps": 4340, "total_steps": 78105, "loss": 0.7039, "lr": 2.7774932787095125e-06, "epoch": 0.2778311247679406, "percentage": 5.56, "elapsed_time": "0:11:45", "remaining_time": "3:19:52", "throughput": 19377.2, "total_tokens": 13672384} +{"current_steps": 4345, "total_steps": 78105, "loss": 0.4537, "lr": 2.7806938932275e-06, "epoch": 0.27815120670891746, "percentage": 5.56, "elapsed_time": "0:11:46", "remaining_time": "3:19:49", "throughput": 19380.28, "total_tokens": 13687296} +{"current_steps": 4350, "total_steps": 78105, "loss": 0.5639, "lr": 2.783894507745487e-06, "epoch": 0.2784712886498944, "percentage": 5.57, "elapsed_time": "0:11:46", "remaining_time": "3:19:45", "throughput": 19383.55, "total_tokens": 13702528} +{"current_steps": 4355, "total_steps": 78105, "loss": 0.4969, "lr": 2.7870951222634745e-06, "epoch": 0.2787913705908713, "percentage": 5.58, "elapsed_time": "0:11:47", "remaining_time": "3:19:42", "throughput": 19386.67, "total_tokens": 13717568} +{"current_steps": 4360, "total_steps": 78105, "loss": 0.4192, "lr": 2.790295736781462e-06, "epoch": 0.27911145253184816, "percentage": 5.58, "elapsed_time": "0:11:48", "remaining_time": "3:19:39", "throughput": 19390.39, "total_tokens": 13733568} +{"current_steps": 4365, "total_steps": 78105, "loss": 0.406, "lr": 2.7934963512994495e-06, "epoch": 0.27943153447282504, "percentage": 5.59, "elapsed_time": "0:11:48", "remaining_time": "3:19:36", "throughput": 19393.97, "total_tokens": 13749312} +{"current_steps": 4370, "total_steps": 78105, "loss": 0.5638, "lr": 2.796696965817437e-06, "epoch": 0.2797516164138019, "percentage": 5.6, "elapsed_time": "0:11:49", "remaining_time": "3:19:34", "throughput": 19397.92, "total_tokens": 13765952} +{"current_steps": 4375, "total_steps": 78105, "loss": 0.555, "lr": 2.7998975803354244e-06, "epoch": 0.2800716983547788, "percentage": 5.6, "elapsed_time": "0:11:50", "remaining_time": "3:19:31", "throughput": 19401.09, "total_tokens": 13781440} +{"current_steps": 4380, "total_steps": 78105, "loss": 0.4757, "lr": 2.803098194853412e-06, "epoch": 0.28039178029575573, "percentage": 5.61, "elapsed_time": "0:11:51", "remaining_time": "3:19:28", "throughput": 19404.31, "total_tokens": 13796864} +{"current_steps": 4385, "total_steps": 78105, "loss": 0.5962, "lr": 2.8062988093713993e-06, "epoch": 0.2807118622367326, "percentage": 5.61, "elapsed_time": "0:11:51", "remaining_time": "3:19:25", "throughput": 19407.9, "total_tokens": 13812736} +{"current_steps": 4390, "total_steps": 78105, "loss": 0.4975, "lr": 2.809499423889387e-06, "epoch": 0.2810319441777095, "percentage": 5.62, "elapsed_time": "0:11:52", "remaining_time": "3:19:22", "throughput": 19411.31, "total_tokens": 13828288} +{"current_steps": 4395, "total_steps": 78105, "loss": 0.491, "lr": 2.8127000384073743e-06, "epoch": 0.28135202611868637, "percentage": 5.63, "elapsed_time": "0:11:53", "remaining_time": "3:19:19", "throughput": 19415.82, "total_tokens": 13845568} +{"current_steps": 4400, "total_steps": 78105, "loss": 0.4427, "lr": 2.8159006529253617e-06, "epoch": 0.28167210805966325, "percentage": 5.63, "elapsed_time": "0:11:53", "remaining_time": "3:19:16", "throughput": 19419.04, "total_tokens": 13860608} +{"current_steps": 4405, "total_steps": 78105, "loss": 0.5652, "lr": 2.819101267443349e-06, "epoch": 0.2819921900006402, "percentage": 5.64, "elapsed_time": "0:11:54", "remaining_time": "3:19:13", "throughput": 19422.65, "total_tokens": 13876608} +{"current_steps": 4410, "total_steps": 78105, "loss": 0.5074, "lr": 2.8223018819613367e-06, "epoch": 0.28231227194161707, "percentage": 5.65, "elapsed_time": "0:11:55", "remaining_time": "3:19:11", "throughput": 19426.74, "total_tokens": 13893632} +{"current_steps": 4415, "total_steps": 78105, "loss": 0.4899, "lr": 2.825502496479324e-06, "epoch": 0.28263235388259395, "percentage": 5.65, "elapsed_time": "0:11:55", "remaining_time": "3:19:08", "throughput": 19430.0, "total_tokens": 13909312} +{"current_steps": 4420, "total_steps": 78105, "loss": 0.6169, "lr": 2.8287031109973116e-06, "epoch": 0.2829524358235708, "percentage": 5.66, "elapsed_time": "0:11:56", "remaining_time": "3:19:05", "throughput": 19433.75, "total_tokens": 13925824} +{"current_steps": 4425, "total_steps": 78105, "loss": 0.5673, "lr": 2.831903725515299e-06, "epoch": 0.2832725177645477, "percentage": 5.67, "elapsed_time": "0:11:57", "remaining_time": "3:19:03", "throughput": 19437.81, "total_tokens": 13942336} +{"current_steps": 4430, "total_steps": 78105, "loss": 0.4293, "lr": 2.8351043400332865e-06, "epoch": 0.28359259970552464, "percentage": 5.67, "elapsed_time": "0:11:57", "remaining_time": "3:19:00", "throughput": 19441.04, "total_tokens": 13957760} +{"current_steps": 4435, "total_steps": 78105, "loss": 0.5372, "lr": 2.838304954551274e-06, "epoch": 0.2839126816465015, "percentage": 5.68, "elapsed_time": "0:11:58", "remaining_time": "3:18:56", "throughput": 19444.27, "total_tokens": 13972992} +{"current_steps": 4440, "total_steps": 78105, "loss": 0.4939, "lr": 2.8415055690692615e-06, "epoch": 0.2842327635874784, "percentage": 5.68, "elapsed_time": "0:11:59", "remaining_time": "3:18:54", "throughput": 19447.72, "total_tokens": 13988736} +{"current_steps": 4445, "total_steps": 78105, "loss": 0.6253, "lr": 2.8447061835872494e-06, "epoch": 0.2845528455284553, "percentage": 5.69, "elapsed_time": "0:11:59", "remaining_time": "3:18:50", "throughput": 19450.39, "total_tokens": 14003520} +{"current_steps": 4450, "total_steps": 78105, "loss": 0.5701, "lr": 2.847906798105237e-06, "epoch": 0.28487292746943216, "percentage": 5.7, "elapsed_time": "0:12:00", "remaining_time": "3:18:48", "throughput": 19454.59, "total_tokens": 14020608} +{"current_steps": 4455, "total_steps": 78105, "loss": 0.4201, "lr": 2.8511074126232243e-06, "epoch": 0.28519300941040904, "percentage": 5.7, "elapsed_time": "0:12:01", "remaining_time": "3:18:45", "throughput": 19457.43, "total_tokens": 14035328} +{"current_steps": 4460, "total_steps": 78105, "loss": 0.484, "lr": 2.8543080271412118e-06, "epoch": 0.285513091351386, "percentage": 5.71, "elapsed_time": "0:12:02", "remaining_time": "3:18:42", "throughput": 19460.55, "total_tokens": 14050816} +{"current_steps": 4465, "total_steps": 78105, "loss": 0.3641, "lr": 2.8575086416591992e-06, "epoch": 0.28583317329236285, "percentage": 5.72, "elapsed_time": "0:12:02", "remaining_time": "3:18:39", "throughput": 19464.16, "total_tokens": 14066880} +{"current_steps": 4470, "total_steps": 78105, "loss": 0.5482, "lr": 2.860709256177186e-06, "epoch": 0.28615325523333973, "percentage": 5.72, "elapsed_time": "0:12:03", "remaining_time": "3:18:35", "throughput": 19467.18, "total_tokens": 14081728} +{"current_steps": 4475, "total_steps": 78105, "loss": 0.5251, "lr": 2.8639098706951733e-06, "epoch": 0.2864733371743166, "percentage": 5.73, "elapsed_time": "0:12:04", "remaining_time": "3:18:33", "throughput": 19471.06, "total_tokens": 14097984} +{"current_steps": 4480, "total_steps": 78105, "loss": 0.4527, "lr": 2.8671104852131608e-06, "epoch": 0.2867934191152935, "percentage": 5.74, "elapsed_time": "0:12:04", "remaining_time": "3:18:30", "throughput": 19475.52, "total_tokens": 14115264} +{"current_steps": 4485, "total_steps": 78105, "loss": 0.3157, "lr": 2.8703110997311482e-06, "epoch": 0.2871135010562704, "percentage": 5.74, "elapsed_time": "0:12:05", "remaining_time": "3:18:27", "throughput": 19478.07, "total_tokens": 14129472} +{"current_steps": 4490, "total_steps": 78105, "loss": 0.63, "lr": 2.8735117142491357e-06, "epoch": 0.2874335829972473, "percentage": 5.75, "elapsed_time": "0:12:06", "remaining_time": "3:18:24", "throughput": 19481.39, "total_tokens": 14144896} +{"current_steps": 4495, "total_steps": 78105, "loss": 0.3895, "lr": 2.876712328767123e-06, "epoch": 0.2877536649382242, "percentage": 5.76, "elapsed_time": "0:12:06", "remaining_time": "3:18:20", "throughput": 19484.32, "total_tokens": 14159744} +{"current_steps": 4500, "total_steps": 78105, "loss": 0.4867, "lr": 2.8799129432851106e-06, "epoch": 0.28807374687920106, "percentage": 5.76, "elapsed_time": "0:12:07", "remaining_time": "3:18:18", "throughput": 19488.44, "total_tokens": 14176384} +{"current_steps": 4505, "total_steps": 78105, "loss": 0.5708, "lr": 2.883113557803098e-06, "epoch": 0.28839382882017794, "percentage": 5.77, "elapsed_time": "0:12:08", "remaining_time": "3:18:15", "throughput": 19491.96, "total_tokens": 14192320} +{"current_steps": 4510, "total_steps": 78105, "loss": 0.4018, "lr": 2.886314172321086e-06, "epoch": 0.2887139107611549, "percentage": 5.77, "elapsed_time": "0:12:08", "remaining_time": "3:18:12", "throughput": 19495.33, "total_tokens": 14208192} +{"current_steps": 4515, "total_steps": 78105, "loss": 0.6319, "lr": 2.8895147868390735e-06, "epoch": 0.28903399270213176, "percentage": 5.78, "elapsed_time": "0:12:09", "remaining_time": "3:18:09", "throughput": 19498.3, "total_tokens": 14223104} +{"current_steps": 4520, "total_steps": 78105, "loss": 0.4392, "lr": 2.892715401357061e-06, "epoch": 0.28935407464310864, "percentage": 5.79, "elapsed_time": "0:12:10", "remaining_time": "3:18:06", "throughput": 19501.62, "total_tokens": 14238464} +{"current_steps": 4525, "total_steps": 78105, "loss": 0.4296, "lr": 2.8959160158750484e-06, "epoch": 0.2896741565840855, "percentage": 5.79, "elapsed_time": "0:12:10", "remaining_time": "3:18:03", "throughput": 19505.11, "total_tokens": 14254656} +{"current_steps": 4530, "total_steps": 78105, "loss": 0.4133, "lr": 2.899116630393036e-06, "epoch": 0.2899942385250624, "percentage": 5.8, "elapsed_time": "0:12:11", "remaining_time": "3:18:00", "throughput": 19507.63, "total_tokens": 14268928} +{"current_steps": 4535, "total_steps": 78105, "loss": 0.5015, "lr": 2.9023172449110233e-06, "epoch": 0.29031432046603933, "percentage": 5.81, "elapsed_time": "0:12:12", "remaining_time": "3:17:57", "throughput": 19511.56, "total_tokens": 14285952} +{"current_steps": 4540, "total_steps": 78105, "loss": 0.4114, "lr": 2.905517859429011e-06, "epoch": 0.2906344024070162, "percentage": 5.81, "elapsed_time": "0:12:12", "remaining_time": "3:17:55", "throughput": 19515.07, "total_tokens": 14301760} +{"current_steps": 4545, "total_steps": 78105, "loss": 0.5292, "lr": 2.9087184739469983e-06, "epoch": 0.2909544843479931, "percentage": 5.82, "elapsed_time": "0:12:13", "remaining_time": "3:17:52", "throughput": 19518.7, "total_tokens": 14318208} +{"current_steps": 4550, "total_steps": 78105, "loss": 0.4741, "lr": 2.9119190884649857e-06, "epoch": 0.29127456628896997, "percentage": 5.83, "elapsed_time": "0:12:14", "remaining_time": "3:17:49", "throughput": 19522.15, "total_tokens": 14334144} +{"current_steps": 4555, "total_steps": 78105, "loss": 0.6629, "lr": 2.915119702982973e-06, "epoch": 0.29159464822994685, "percentage": 5.83, "elapsed_time": "0:12:14", "remaining_time": "3:17:46", "throughput": 19524.95, "total_tokens": 14349120} +{"current_steps": 4560, "total_steps": 78105, "loss": 0.5884, "lr": 2.9183203175009607e-06, "epoch": 0.29191473017092373, "percentage": 5.84, "elapsed_time": "0:12:15", "remaining_time": "3:17:44", "throughput": 19528.61, "total_tokens": 14365376} +{"current_steps": 4565, "total_steps": 78105, "loss": 0.5375, "lr": 2.921520932018948e-06, "epoch": 0.29223481211190067, "percentage": 5.84, "elapsed_time": "0:12:16", "remaining_time": "3:17:41", "throughput": 19532.11, "total_tokens": 14381568} +{"current_steps": 4570, "total_steps": 78105, "loss": 0.4778, "lr": 2.9247215465369356e-06, "epoch": 0.29255489405287755, "percentage": 5.85, "elapsed_time": "0:12:16", "remaining_time": "3:17:38", "throughput": 19535.3, "total_tokens": 14397312} +{"current_steps": 4575, "total_steps": 78105, "loss": 0.4894, "lr": 2.927922161054923e-06, "epoch": 0.2928749759938544, "percentage": 5.86, "elapsed_time": "0:12:17", "remaining_time": "3:17:35", "throughput": 19538.62, "total_tokens": 14412928} +{"current_steps": 4580, "total_steps": 78105, "loss": 0.5225, "lr": 2.9311227755729105e-06, "epoch": 0.2931950579348313, "percentage": 5.86, "elapsed_time": "0:12:18", "remaining_time": "3:17:32", "throughput": 19541.47, "total_tokens": 14427968} +{"current_steps": 4585, "total_steps": 78105, "loss": 0.4422, "lr": 2.934323390090898e-06, "epoch": 0.2935151398758082, "percentage": 5.87, "elapsed_time": "0:12:18", "remaining_time": "3:17:29", "throughput": 19544.43, "total_tokens": 14442944} +{"current_steps": 4590, "total_steps": 78105, "loss": 0.5032, "lr": 2.937524004608885e-06, "epoch": 0.2938352218167851, "percentage": 5.88, "elapsed_time": "0:12:19", "remaining_time": "3:17:26", "throughput": 19547.71, "total_tokens": 14458176} +{"current_steps": 4595, "total_steps": 78105, "loss": 0.4244, "lr": 2.9407246191268725e-06, "epoch": 0.294155303757762, "percentage": 5.88, "elapsed_time": "0:12:20", "remaining_time": "3:17:23", "throughput": 19550.8, "total_tokens": 14473600} +{"current_steps": 4600, "total_steps": 78105, "loss": 0.6711, "lr": 2.94392523364486e-06, "epoch": 0.2944753856987389, "percentage": 5.89, "elapsed_time": "0:12:20", "remaining_time": "3:17:20", "throughput": 19553.83, "total_tokens": 14488896} +{"current_steps": 4605, "total_steps": 78105, "loss": 0.6182, "lr": 2.9471258481628475e-06, "epoch": 0.29479546763971576, "percentage": 5.9, "elapsed_time": "0:12:21", "remaining_time": "3:17:16", "throughput": 19556.04, "total_tokens": 14502784} +{"current_steps": 4610, "total_steps": 78105, "loss": 0.4884, "lr": 2.950326462680835e-06, "epoch": 0.29511554958069264, "percentage": 5.9, "elapsed_time": "0:12:22", "remaining_time": "3:17:14", "throughput": 19560.33, "total_tokens": 14520000} +{"current_steps": 4615, "total_steps": 78105, "loss": 0.3833, "lr": 2.9535270771988224e-06, "epoch": 0.2954356315216696, "percentage": 5.91, "elapsed_time": "0:12:23", "remaining_time": "3:17:11", "throughput": 19563.9, "total_tokens": 14536320} +{"current_steps": 4620, "total_steps": 78105, "loss": 0.4793, "lr": 2.95672769171681e-06, "epoch": 0.29575571346264645, "percentage": 5.92, "elapsed_time": "0:12:23", "remaining_time": "3:17:08", "throughput": 19567.14, "total_tokens": 14551808} +{"current_steps": 4625, "total_steps": 78105, "loss": 0.3634, "lr": 2.9599283062347973e-06, "epoch": 0.29607579540362333, "percentage": 5.92, "elapsed_time": "0:12:24", "remaining_time": "3:17:05", "throughput": 19570.14, "total_tokens": 14566976} +{"current_steps": 4630, "total_steps": 78105, "loss": 0.3677, "lr": 2.963128920752785e-06, "epoch": 0.2963958773446002, "percentage": 5.93, "elapsed_time": "0:12:25", "remaining_time": "3:17:03", "throughput": 19573.6, "total_tokens": 14583104} +{"current_steps": 4635, "total_steps": 78105, "loss": 0.4969, "lr": 2.9663295352707723e-06, "epoch": 0.2967159592855771, "percentage": 5.93, "elapsed_time": "0:12:25", "remaining_time": "3:17:00", "throughput": 19576.76, "total_tokens": 14598336} +{"current_steps": 4640, "total_steps": 78105, "loss": 0.5815, "lr": 2.9695301497887597e-06, "epoch": 0.29703604122655397, "percentage": 5.94, "elapsed_time": "0:12:26", "remaining_time": "3:16:57", "throughput": 19580.81, "total_tokens": 14615232} +{"current_steps": 4645, "total_steps": 78105, "loss": 0.4829, "lr": 2.972730764306747e-06, "epoch": 0.2973561231675309, "percentage": 5.95, "elapsed_time": "0:12:27", "remaining_time": "3:16:55", "throughput": 19583.89, "total_tokens": 14630848} +{"current_steps": 4650, "total_steps": 78105, "loss": 0.5286, "lr": 2.9759313788247347e-06, "epoch": 0.2976762051085078, "percentage": 5.95, "elapsed_time": "0:12:27", "remaining_time": "3:16:52", "throughput": 19587.35, "total_tokens": 14647040} +{"current_steps": 4655, "total_steps": 78105, "loss": 0.4687, "lr": 2.979131993342722e-06, "epoch": 0.29799628704948466, "percentage": 5.96, "elapsed_time": "0:12:28", "remaining_time": "3:16:49", "throughput": 19589.99, "total_tokens": 14661888} +{"current_steps": 4660, "total_steps": 78105, "loss": 0.8269, "lr": 2.9823326078607096e-06, "epoch": 0.29831636899046154, "percentage": 5.97, "elapsed_time": "0:12:29", "remaining_time": "3:16:46", "throughput": 19593.21, "total_tokens": 14677696} +{"current_steps": 4665, "total_steps": 78105, "loss": 0.4892, "lr": 2.985533222378697e-06, "epoch": 0.2986364509314384, "percentage": 5.97, "elapsed_time": "0:12:29", "remaining_time": "3:16:43", "throughput": 19596.5, "total_tokens": 14693184} +{"current_steps": 4670, "total_steps": 78105, "loss": 0.3668, "lr": 2.9887338368966845e-06, "epoch": 0.29895653287241536, "percentage": 5.98, "elapsed_time": "0:12:30", "remaining_time": "3:16:40", "throughput": 19599.33, "total_tokens": 14707904} +{"current_steps": 4675, "total_steps": 78105, "loss": 0.4194, "lr": 2.991934451414672e-06, "epoch": 0.29927661481339224, "percentage": 5.99, "elapsed_time": "0:12:31", "remaining_time": "3:16:37", "throughput": 19602.92, "total_tokens": 14724096} +{"current_steps": 4680, "total_steps": 78105, "loss": 0.575, "lr": 2.9951350659326595e-06, "epoch": 0.2995966967543691, "percentage": 5.99, "elapsed_time": "0:12:31", "remaining_time": "3:16:34", "throughput": 19605.53, "total_tokens": 14738752} +{"current_steps": 4685, "total_steps": 78105, "loss": 0.3559, "lr": 2.998335680450647e-06, "epoch": 0.299916778695346, "percentage": 6.0, "elapsed_time": "0:12:32", "remaining_time": "3:16:31", "throughput": 19608.39, "total_tokens": 14753664} +{"current_steps": 4690, "total_steps": 78105, "loss": 0.4628, "lr": 3.0015362949686344e-06, "epoch": 0.3002368606363229, "percentage": 6.0, "elapsed_time": "0:12:33", "remaining_time": "3:16:28", "throughput": 19611.68, "total_tokens": 14769152} +{"current_steps": 4695, "total_steps": 78105, "loss": 0.5057, "lr": 3.004736909486622e-06, "epoch": 0.3005569425772998, "percentage": 6.01, "elapsed_time": "0:12:33", "remaining_time": "3:16:25", "throughput": 19614.48, "total_tokens": 14784128} +{"current_steps": 4700, "total_steps": 78105, "loss": 0.5411, "lr": 3.0079375240046093e-06, "epoch": 0.3008770245182767, "percentage": 6.02, "elapsed_time": "0:12:34", "remaining_time": "3:16:22", "throughput": 19617.94, "total_tokens": 14799936} +{"current_steps": 4705, "total_steps": 78105, "loss": 0.641, "lr": 3.011138138522597e-06, "epoch": 0.30119710645925357, "percentage": 6.02, "elapsed_time": "0:12:35", "remaining_time": "3:16:19", "throughput": 19620.31, "total_tokens": 14814208} +{"current_steps": 4710, "total_steps": 78105, "loss": 0.5401, "lr": 3.014338753040584e-06, "epoch": 0.30151718840023045, "percentage": 6.03, "elapsed_time": "0:12:35", "remaining_time": "3:16:16", "throughput": 19623.24, "total_tokens": 14829504} +{"current_steps": 4715, "total_steps": 78105, "loss": 0.4631, "lr": 3.0175393675585713e-06, "epoch": 0.30183727034120733, "percentage": 6.04, "elapsed_time": "0:12:36", "remaining_time": "3:16:12", "throughput": 19625.88, "total_tokens": 14843968} +{"current_steps": 4720, "total_steps": 78105, "loss": 0.4741, "lr": 3.0207399820765588e-06, "epoch": 0.30215735228218427, "percentage": 6.04, "elapsed_time": "0:12:37", "remaining_time": "3:16:09", "throughput": 19629.42, "total_tokens": 14859840} +{"current_steps": 4725, "total_steps": 78105, "loss": 0.5715, "lr": 3.0239405965945462e-06, "epoch": 0.30247743422316115, "percentage": 6.05, "elapsed_time": "0:12:37", "remaining_time": "3:16:07", "throughput": 19632.33, "total_tokens": 14875648} +{"current_steps": 4730, "total_steps": 78105, "loss": 0.5296, "lr": 3.0271412111125337e-06, "epoch": 0.302797516164138, "percentage": 6.06, "elapsed_time": "0:12:38", "remaining_time": "3:16:04", "throughput": 19634.85, "total_tokens": 14890560} +{"current_steps": 4735, "total_steps": 78105, "loss": 0.5479, "lr": 3.030341825630521e-06, "epoch": 0.3031175981051149, "percentage": 6.06, "elapsed_time": "0:12:39", "remaining_time": "3:16:01", "throughput": 19638.27, "total_tokens": 14906688} +{"current_steps": 4740, "total_steps": 78105, "loss": 0.625, "lr": 3.0335424401485086e-06, "epoch": 0.3034376800460918, "percentage": 6.07, "elapsed_time": "0:12:39", "remaining_time": "3:15:59", "throughput": 19641.46, "total_tokens": 14922496} +{"current_steps": 4745, "total_steps": 78105, "loss": 0.5292, "lr": 3.036743054666496e-06, "epoch": 0.30375776198706866, "percentage": 6.08, "elapsed_time": "0:12:40", "remaining_time": "3:15:56", "throughput": 19644.86, "total_tokens": 14938880} +{"current_steps": 4750, "total_steps": 78105, "loss": 0.5366, "lr": 3.0399436691844836e-06, "epoch": 0.3040778439280456, "percentage": 6.08, "elapsed_time": "0:12:41", "remaining_time": "3:15:53", "throughput": 19647.68, "total_tokens": 14954112} +{"current_steps": 4755, "total_steps": 78105, "loss": 0.4153, "lr": 3.043144283702471e-06, "epoch": 0.3043979258690225, "percentage": 6.09, "elapsed_time": "0:12:41", "remaining_time": "3:15:51", "throughput": 19650.76, "total_tokens": 14969536} +{"current_steps": 4760, "total_steps": 78105, "loss": 0.3946, "lr": 3.0463448982204585e-06, "epoch": 0.30471800780999936, "percentage": 6.09, "elapsed_time": "0:12:42", "remaining_time": "3:15:48", "throughput": 19654.21, "total_tokens": 14985728} +{"current_steps": 4765, "total_steps": 78105, "loss": 0.5285, "lr": 3.049545512738446e-06, "epoch": 0.30503808975097624, "percentage": 6.1, "elapsed_time": "0:12:43", "remaining_time": "3:15:46", "throughput": 19658.77, "total_tokens": 15003904} +{"current_steps": 4770, "total_steps": 78105, "loss": 0.6277, "lr": 3.0527461272564334e-06, "epoch": 0.3053581716919531, "percentage": 6.11, "elapsed_time": "0:12:43", "remaining_time": "3:15:44", "throughput": 19661.78, "total_tokens": 15019328} +{"current_steps": 4775, "total_steps": 78105, "loss": 0.5619, "lr": 3.055946741774421e-06, "epoch": 0.30567825363293005, "percentage": 6.11, "elapsed_time": "0:12:44", "remaining_time": "3:15:40", "throughput": 19664.22, "total_tokens": 15033856} +{"current_steps": 4780, "total_steps": 78105, "loss": 0.4167, "lr": 3.0591473562924084e-06, "epoch": 0.30599833557390693, "percentage": 6.12, "elapsed_time": "0:12:45", "remaining_time": "3:15:39", "throughput": 19668.17, "total_tokens": 15051200} +{"current_steps": 4785, "total_steps": 78105, "loss": 0.4579, "lr": 3.062347970810396e-06, "epoch": 0.3063184175148838, "percentage": 6.13, "elapsed_time": "0:12:45", "remaining_time": "3:15:36", "throughput": 19671.04, "total_tokens": 15066368} +{"current_steps": 4790, "total_steps": 78105, "loss": 0.3633, "lr": 3.0655485853283833e-06, "epoch": 0.3066384994558607, "percentage": 6.13, "elapsed_time": "0:12:46", "remaining_time": "3:15:33", "throughput": 19673.83, "total_tokens": 15081600} +{"current_steps": 4795, "total_steps": 78105, "loss": 0.5975, "lr": 3.0687491998463708e-06, "epoch": 0.30695858139683757, "percentage": 6.14, "elapsed_time": "0:12:47", "remaining_time": "3:15:30", "throughput": 19677.06, "total_tokens": 15097536} +{"current_steps": 4800, "total_steps": 78105, "loss": 0.5337, "lr": 3.0719498143643583e-06, "epoch": 0.3072786633378145, "percentage": 6.15, "elapsed_time": "0:12:47", "remaining_time": "3:15:28", "throughput": 19680.49, "total_tokens": 15113728} +{"current_steps": 4805, "total_steps": 78105, "loss": 0.6239, "lr": 3.0751504288823457e-06, "epoch": 0.3075987452787914, "percentage": 6.15, "elapsed_time": "0:12:48", "remaining_time": "3:15:25", "throughput": 19683.69, "total_tokens": 15130048} +{"current_steps": 4810, "total_steps": 78105, "loss": 0.4298, "lr": 3.078351043400333e-06, "epoch": 0.30791882721976827, "percentage": 6.16, "elapsed_time": "0:12:49", "remaining_time": "3:15:23", "throughput": 19686.74, "total_tokens": 15145600} +{"current_steps": 4815, "total_steps": 78105, "loss": 0.5121, "lr": 3.0815516579183207e-06, "epoch": 0.30823890916074514, "percentage": 6.16, "elapsed_time": "0:12:50", "remaining_time": "3:15:20", "throughput": 19690.43, "total_tokens": 15162368} +{"current_steps": 4820, "total_steps": 78105, "loss": 0.497, "lr": 3.084752272436308e-06, "epoch": 0.308558991101722, "percentage": 6.17, "elapsed_time": "0:12:50", "remaining_time": "3:15:18", "throughput": 19693.73, "total_tokens": 15178368} +{"current_steps": 4825, "total_steps": 78105, "loss": 0.5909, "lr": 3.0879528869542956e-06, "epoch": 0.3088790730426989, "percentage": 6.18, "elapsed_time": "0:12:51", "remaining_time": "3:15:15", "throughput": 19696.61, "total_tokens": 15193408} +{"current_steps": 4830, "total_steps": 78105, "loss": 0.3568, "lr": 3.0911535014722826e-06, "epoch": 0.30919915498367584, "percentage": 6.18, "elapsed_time": "0:12:52", "remaining_time": "3:15:12", "throughput": 19699.48, "total_tokens": 15208640} +{"current_steps": 4835, "total_steps": 78105, "loss": 0.532, "lr": 3.09435411599027e-06, "epoch": 0.3095192369246527, "percentage": 6.19, "elapsed_time": "0:12:52", "remaining_time": "3:15:09", "throughput": 19702.68, "total_tokens": 15224192} +{"current_steps": 4840, "total_steps": 78105, "loss": 0.591, "lr": 3.0975547305082576e-06, "epoch": 0.3098393188656296, "percentage": 6.2, "elapsed_time": "0:12:53", "remaining_time": "3:15:06", "throughput": 19705.59, "total_tokens": 15239616} +{"current_steps": 4845, "total_steps": 78105, "loss": 0.4536, "lr": 3.100755345026245e-06, "epoch": 0.3101594008066065, "percentage": 6.2, "elapsed_time": "0:12:54", "remaining_time": "3:15:04", "throughput": 19708.86, "total_tokens": 15255680} +{"current_steps": 4850, "total_steps": 78105, "loss": 0.5104, "lr": 3.1039559595442325e-06, "epoch": 0.31047948274758336, "percentage": 6.21, "elapsed_time": "0:12:54", "remaining_time": "3:15:01", "throughput": 19711.86, "total_tokens": 15271232} +{"current_steps": 4855, "total_steps": 78105, "loss": 0.4745, "lr": 3.10715657406222e-06, "epoch": 0.3107995646885603, "percentage": 6.22, "elapsed_time": "0:12:55", "remaining_time": "3:14:58", "throughput": 19714.9, "total_tokens": 15287040} +{"current_steps": 4860, "total_steps": 78105, "loss": 0.5914, "lr": 3.1103571885802074e-06, "epoch": 0.31111964662953717, "percentage": 6.22, "elapsed_time": "0:12:56", "remaining_time": "3:14:56", "throughput": 19718.83, "total_tokens": 15304256} +{"current_steps": 4865, "total_steps": 78105, "loss": 0.3885, "lr": 3.113557803098195e-06, "epoch": 0.31143972857051405, "percentage": 6.23, "elapsed_time": "0:12:56", "remaining_time": "3:14:55", "throughput": 19723.02, "total_tokens": 15322112} +{"current_steps": 4870, "total_steps": 78105, "loss": 0.5413, "lr": 3.1167584176161824e-06, "epoch": 0.31175981051149093, "percentage": 6.24, "elapsed_time": "0:12:57", "remaining_time": "3:14:52", "throughput": 19725.83, "total_tokens": 15337216} +{"current_steps": 4875, "total_steps": 78105, "loss": 0.5022, "lr": 3.11995903213417e-06, "epoch": 0.3120798924524678, "percentage": 6.24, "elapsed_time": "0:12:58", "remaining_time": "3:14:50", "throughput": 19729.42, "total_tokens": 15354048} +{"current_steps": 4880, "total_steps": 78105, "loss": 0.4865, "lr": 3.1231596466521573e-06, "epoch": 0.31239997439344475, "percentage": 6.25, "elapsed_time": "0:12:58", "remaining_time": "3:14:47", "throughput": 19732.12, "total_tokens": 15369344} +{"current_steps": 4885, "total_steps": 78105, "loss": 0.5673, "lr": 3.1263602611701448e-06, "epoch": 0.3127200563344216, "percentage": 6.25, "elapsed_time": "0:12:59", "remaining_time": "3:14:45", "throughput": 19735.71, "total_tokens": 15385920} +{"current_steps": 4890, "total_steps": 78105, "loss": 0.4472, "lr": 3.1295608756881322e-06, "epoch": 0.3130401382753985, "percentage": 6.26, "elapsed_time": "0:13:00", "remaining_time": "3:14:42", "throughput": 19738.66, "total_tokens": 15401536} +{"current_steps": 4895, "total_steps": 78105, "loss": 0.5766, "lr": 3.1327614902061197e-06, "epoch": 0.3133602202163754, "percentage": 6.27, "elapsed_time": "0:13:00", "remaining_time": "3:14:39", "throughput": 19741.56, "total_tokens": 15417152} +{"current_steps": 4900, "total_steps": 78105, "loss": 0.6036, "lr": 3.135962104724107e-06, "epoch": 0.31368030215735226, "percentage": 6.27, "elapsed_time": "0:13:01", "remaining_time": "3:14:36", "throughput": 19744.08, "total_tokens": 15431872} +{"current_steps": 4905, "total_steps": 78105, "loss": 0.4521, "lr": 3.1391627192420946e-06, "epoch": 0.3140003840983292, "percentage": 6.28, "elapsed_time": "0:13:02", "remaining_time": "3:14:34", "throughput": 19747.07, "total_tokens": 15447552} +{"current_steps": 4910, "total_steps": 78105, "loss": 0.5343, "lr": 3.142363333760082e-06, "epoch": 0.3143204660393061, "percentage": 6.29, "elapsed_time": "0:13:02", "remaining_time": "3:14:32", "throughput": 19750.49, "total_tokens": 15464384} +{"current_steps": 4915, "total_steps": 78105, "loss": 0.5376, "lr": 3.1455639482780696e-06, "epoch": 0.31464054798028296, "percentage": 6.29, "elapsed_time": "0:13:03", "remaining_time": "3:14:29", "throughput": 19752.99, "total_tokens": 15479808} +{"current_steps": 4920, "total_steps": 78105, "loss": 0.5259, "lr": 3.148764562796057e-06, "epoch": 0.31496062992125984, "percentage": 6.3, "elapsed_time": "0:13:04", "remaining_time": "3:14:27", "throughput": 19756.32, "total_tokens": 15496512} +{"current_steps": 4925, "total_steps": 78105, "loss": 0.4217, "lr": 3.151965177314045e-06, "epoch": 0.3152807118622367, "percentage": 6.31, "elapsed_time": "0:13:05", "remaining_time": "3:14:25", "throughput": 19759.69, "total_tokens": 15512896} +{"current_steps": 4930, "total_steps": 78105, "loss": 0.3742, "lr": 3.1551657918320324e-06, "epoch": 0.3156007938032136, "percentage": 6.31, "elapsed_time": "0:13:05", "remaining_time": "3:14:22", "throughput": 19762.41, "total_tokens": 15528064} +{"current_steps": 4935, "total_steps": 78105, "loss": 0.4034, "lr": 3.15836640635002e-06, "epoch": 0.31592087574419053, "percentage": 6.32, "elapsed_time": "0:13:06", "remaining_time": "3:14:19", "throughput": 19765.21, "total_tokens": 15543424} +{"current_steps": 4940, "total_steps": 78105, "loss": 0.6094, "lr": 3.1615670208680073e-06, "epoch": 0.3162409576851674, "percentage": 6.32, "elapsed_time": "0:13:07", "remaining_time": "3:14:17", "throughput": 19768.55, "total_tokens": 15560192} +{"current_steps": 4945, "total_steps": 78105, "loss": 0.5051, "lr": 3.164767635385995e-06, "epoch": 0.3165610396261443, "percentage": 6.33, "elapsed_time": "0:13:07", "remaining_time": "3:14:15", "throughput": 19771.46, "total_tokens": 15576128} +{"current_steps": 4950, "total_steps": 78105, "loss": 0.7277, "lr": 3.1679682499039814e-06, "epoch": 0.31688112156712117, "percentage": 6.34, "elapsed_time": "0:13:08", "remaining_time": "3:14:13", "throughput": 19774.78, "total_tokens": 15592576} +{"current_steps": 4955, "total_steps": 78105, "loss": 0.4515, "lr": 3.171168864421969e-06, "epoch": 0.31720120350809805, "percentage": 6.34, "elapsed_time": "0:13:09", "remaining_time": "3:14:10", "throughput": 19777.34, "total_tokens": 15607744} +{"current_steps": 4960, "total_steps": 78105, "loss": 0.6054, "lr": 3.1743694789399564e-06, "epoch": 0.317521285449075, "percentage": 6.35, "elapsed_time": "0:13:09", "remaining_time": "3:14:07", "throughput": 19780.23, "total_tokens": 15623360} +{"current_steps": 4965, "total_steps": 78105, "loss": 0.4599, "lr": 3.177570093457944e-06, "epoch": 0.31784136739005187, "percentage": 6.36, "elapsed_time": "0:13:10", "remaining_time": "3:14:05", "throughput": 19783.11, "total_tokens": 15638656} +{"current_steps": 4970, "total_steps": 78105, "loss": 0.5218, "lr": 3.1807707079759313e-06, "epoch": 0.31816144933102875, "percentage": 6.36, "elapsed_time": "0:13:11", "remaining_time": "3:14:02", "throughput": 19786.06, "total_tokens": 15654400} +{"current_steps": 4975, "total_steps": 78105, "loss": 0.4533, "lr": 3.1839713224939188e-06, "epoch": 0.3184815312720056, "percentage": 6.37, "elapsed_time": "0:13:11", "remaining_time": "3:13:59", "throughput": 19788.87, "total_tokens": 15670016} +{"current_steps": 4980, "total_steps": 78105, "loss": 0.5557, "lr": 3.1871719370119062e-06, "epoch": 0.3188016132129825, "percentage": 6.38, "elapsed_time": "0:13:12", "remaining_time": "3:13:57", "throughput": 19792.17, "total_tokens": 15686208} +{"current_steps": 4985, "total_steps": 78105, "loss": 0.4331, "lr": 3.1903725515298937e-06, "epoch": 0.31912169515395944, "percentage": 6.38, "elapsed_time": "0:13:13", "remaining_time": "3:13:55", "throughput": 19795.28, "total_tokens": 15702592} +{"current_steps": 4990, "total_steps": 78105, "loss": 0.5049, "lr": 3.193573166047881e-06, "epoch": 0.3194417770949363, "percentage": 6.39, "elapsed_time": "0:13:13", "remaining_time": "3:13:52", "throughput": 19797.96, "total_tokens": 15717696} +{"current_steps": 4995, "total_steps": 78105, "loss": 0.4435, "lr": 3.196773780565869e-06, "epoch": 0.3197618590359132, "percentage": 6.4, "elapsed_time": "0:13:14", "remaining_time": "3:13:49", "throughput": 19800.76, "total_tokens": 15733184} +{"current_steps": 5000, "total_steps": 78105, "loss": 0.5851, "lr": 3.1999743950838565e-06, "epoch": 0.3200819409768901, "percentage": 6.4, "elapsed_time": "0:13:15", "remaining_time": "3:13:47", "throughput": 19804.0, "total_tokens": 15749888} +{"current_steps": 5005, "total_steps": 78105, "loss": 0.4457, "lr": 3.203175009601844e-06, "epoch": 0.32040202291786696, "percentage": 6.41, "elapsed_time": "0:13:16", "remaining_time": "3:13:46", "throughput": 19808.21, "total_tokens": 15768384} +{"current_steps": 5010, "total_steps": 78105, "loss": 0.3612, "lr": 3.2063756241198314e-06, "epoch": 0.32072210485884384, "percentage": 6.41, "elapsed_time": "0:13:16", "remaining_time": "3:13:44", "throughput": 19811.23, "total_tokens": 15784448} +{"current_steps": 5015, "total_steps": 78105, "loss": 0.4858, "lr": 3.209576238637819e-06, "epoch": 0.3210421867998208, "percentage": 6.42, "elapsed_time": "0:13:17", "remaining_time": "3:13:42", "throughput": 19814.2, "total_tokens": 15800512} +{"current_steps": 5020, "total_steps": 78105, "loss": 0.5056, "lr": 3.2127768531558064e-06, "epoch": 0.32136226874079765, "percentage": 6.43, "elapsed_time": "0:13:18", "remaining_time": "3:13:39", "throughput": 19816.7, "total_tokens": 15815680} +{"current_steps": 5025, "total_steps": 78105, "loss": 0.5082, "lr": 3.215977467673794e-06, "epoch": 0.32168235068177453, "percentage": 6.43, "elapsed_time": "0:13:18", "remaining_time": "3:13:36", "throughput": 19819.08, "total_tokens": 15830528} +{"current_steps": 5030, "total_steps": 78105, "loss": 0.5727, "lr": 3.2191780821917813e-06, "epoch": 0.3220024326227514, "percentage": 6.44, "elapsed_time": "0:13:19", "remaining_time": "3:13:33", "throughput": 19821.51, "total_tokens": 15845568} +{"current_steps": 5035, "total_steps": 78105, "loss": 0.5703, "lr": 3.2223786967097688e-06, "epoch": 0.3223225145637283, "percentage": 6.45, "elapsed_time": "0:13:20", "remaining_time": "3:13:31", "throughput": 19824.66, "total_tokens": 15861632} +{"current_steps": 5040, "total_steps": 78105, "loss": 0.6083, "lr": 3.2255793112277563e-06, "epoch": 0.3226425965047052, "percentage": 6.45, "elapsed_time": "0:13:20", "remaining_time": "3:13:28", "throughput": 19827.62, "total_tokens": 15877632} +{"current_steps": 5045, "total_steps": 78105, "loss": 0.4118, "lr": 3.2287799257457437e-06, "epoch": 0.3229626784456821, "percentage": 6.46, "elapsed_time": "0:13:21", "remaining_time": "3:13:26", "throughput": 19830.47, "total_tokens": 15893184} +{"current_steps": 5050, "total_steps": 78105, "loss": 0.4441, "lr": 3.231980540263731e-06, "epoch": 0.323282760386659, "percentage": 6.47, "elapsed_time": "0:13:22", "remaining_time": "3:13:23", "throughput": 19833.03, "total_tokens": 15908416} +{"current_steps": 5055, "total_steps": 78105, "loss": 0.4126, "lr": 3.2351811547817187e-06, "epoch": 0.32360284232763586, "percentage": 6.47, "elapsed_time": "0:13:22", "remaining_time": "3:13:22", "throughput": 19833.95, "total_tokens": 15924224} +{"current_steps": 5060, "total_steps": 78105, "loss": 0.341, "lr": 3.238381769299706e-06, "epoch": 0.32392292426861274, "percentage": 6.48, "elapsed_time": "0:13:23", "remaining_time": "3:13:19", "throughput": 19836.83, "total_tokens": 15940032} +{"current_steps": 5065, "total_steps": 78105, "loss": 0.5604, "lr": 3.2415823838176936e-06, "epoch": 0.3242430062095897, "percentage": 6.48, "elapsed_time": "0:13:24", "remaining_time": "3:13:17", "throughput": 19839.25, "total_tokens": 15954944} +{"current_steps": 5070, "total_steps": 78105, "loss": 0.5718, "lr": 3.2447829983356806e-06, "epoch": 0.32456308815056656, "percentage": 6.49, "elapsed_time": "0:13:24", "remaining_time": "3:13:14", "throughput": 19842.21, "total_tokens": 15970624} +{"current_steps": 5075, "total_steps": 78105, "loss": 0.5341, "lr": 3.247983612853668e-06, "epoch": 0.32488317009154344, "percentage": 6.5, "elapsed_time": "0:13:25", "remaining_time": "3:13:12", "throughput": 19845.2, "total_tokens": 15986688} +{"current_steps": 5080, "total_steps": 78105, "loss": 0.4263, "lr": 3.2511842273716556e-06, "epoch": 0.3252032520325203, "percentage": 6.5, "elapsed_time": "0:13:26", "remaining_time": "3:13:09", "throughput": 19848.17, "total_tokens": 16002752} +{"current_steps": 5085, "total_steps": 78105, "loss": 0.535, "lr": 3.254384841889643e-06, "epoch": 0.3255233339734972, "percentage": 6.51, "elapsed_time": "0:13:26", "remaining_time": "3:13:07", "throughput": 19850.96, "total_tokens": 16018112} +{"current_steps": 5090, "total_steps": 78105, "loss": 0.481, "lr": 3.2575854564076305e-06, "epoch": 0.32584341591447413, "percentage": 6.52, "elapsed_time": "0:13:27", "remaining_time": "3:13:04", "throughput": 19853.84, "total_tokens": 16033984} +{"current_steps": 5095, "total_steps": 78105, "loss": 0.3992, "lr": 3.260786070925618e-06, "epoch": 0.326163497855451, "percentage": 6.52, "elapsed_time": "0:13:28", "remaining_time": "3:13:02", "throughput": 19856.52, "total_tokens": 16049536} +{"current_steps": 5100, "total_steps": 78105, "loss": 0.4262, "lr": 3.2639866854436054e-06, "epoch": 0.3264835797964279, "percentage": 6.53, "elapsed_time": "0:13:28", "remaining_time": "3:13:00", "throughput": 19859.92, "total_tokens": 16066304} +{"current_steps": 5105, "total_steps": 78105, "loss": 0.5625, "lr": 3.267187299961593e-06, "epoch": 0.32680366173740477, "percentage": 6.54, "elapsed_time": "0:13:29", "remaining_time": "3:12:57", "throughput": 19862.4, "total_tokens": 16081536} +{"current_steps": 5110, "total_steps": 78105, "loss": 0.708, "lr": 3.2703879144795804e-06, "epoch": 0.32712374367838165, "percentage": 6.54, "elapsed_time": "0:13:30", "remaining_time": "3:12:55", "throughput": 19865.09, "total_tokens": 16096896} +{"current_steps": 5115, "total_steps": 78105, "loss": 0.5131, "lr": 3.273588528997568e-06, "epoch": 0.32744382561935853, "percentage": 6.55, "elapsed_time": "0:13:30", "remaining_time": "3:12:52", "throughput": 19868.15, "total_tokens": 16112960} +{"current_steps": 5120, "total_steps": 78105, "loss": 0.467, "lr": 3.2767891435155553e-06, "epoch": 0.32776390756033547, "percentage": 6.56, "elapsed_time": "0:13:31", "remaining_time": "3:12:50", "throughput": 19871.39, "total_tokens": 16129344} +{"current_steps": 5125, "total_steps": 78105, "loss": 0.6375, "lr": 3.2799897580335428e-06, "epoch": 0.32808398950131235, "percentage": 6.56, "elapsed_time": "0:13:32", "remaining_time": "3:12:49", "throughput": 19875.61, "total_tokens": 16147776} +{"current_steps": 5130, "total_steps": 78105, "loss": 0.4597, "lr": 3.2831903725515302e-06, "epoch": 0.3284040714422892, "percentage": 6.57, "elapsed_time": "0:13:33", "remaining_time": "3:12:46", "throughput": 19877.97, "total_tokens": 16162880} +{"current_steps": 5135, "total_steps": 78105, "loss": 0.2809, "lr": 3.2863909870695177e-06, "epoch": 0.3287241533832661, "percentage": 6.57, "elapsed_time": "0:13:33", "remaining_time": "3:12:44", "throughput": 19881.21, "total_tokens": 16179392} +{"current_steps": 5140, "total_steps": 78105, "loss": 0.6244, "lr": 3.289591601587505e-06, "epoch": 0.329044235324243, "percentage": 6.58, "elapsed_time": "0:13:34", "remaining_time": "3:12:41", "throughput": 19883.63, "total_tokens": 16194560} +{"current_steps": 5145, "total_steps": 78105, "loss": 0.5175, "lr": 3.2927922161054926e-06, "epoch": 0.3293643172652199, "percentage": 6.59, "elapsed_time": "0:13:35", "remaining_time": "3:12:39", "throughput": 19886.42, "total_tokens": 16210112} +{"current_steps": 5150, "total_steps": 78105, "loss": 0.5084, "lr": 3.29599283062348e-06, "epoch": 0.3296843992061968, "percentage": 6.59, "elapsed_time": "0:13:35", "remaining_time": "3:12:36", "throughput": 19889.29, "total_tokens": 16225856} +{"current_steps": 5155, "total_steps": 78105, "loss": 0.5103, "lr": 3.2991934451414676e-06, "epoch": 0.3300044811471737, "percentage": 6.6, "elapsed_time": "0:13:36", "remaining_time": "3:12:34", "throughput": 19891.84, "total_tokens": 16241536} +{"current_steps": 5160, "total_steps": 78105, "loss": 0.4566, "lr": 3.302394059659455e-06, "epoch": 0.33032456308815056, "percentage": 6.61, "elapsed_time": "0:13:37", "remaining_time": "3:12:31", "throughput": 19894.15, "total_tokens": 16256384} +{"current_steps": 5165, "total_steps": 78105, "loss": 0.4815, "lr": 3.3055946741774425e-06, "epoch": 0.33064464502912744, "percentage": 6.61, "elapsed_time": "0:13:37", "remaining_time": "3:12:28", "throughput": 19896.58, "total_tokens": 16271360} +{"current_steps": 5170, "total_steps": 78105, "loss": 0.4515, "lr": 3.30879528869543e-06, "epoch": 0.3309647269701044, "percentage": 6.62, "elapsed_time": "0:13:38", "remaining_time": "3:12:26", "throughput": 19899.07, "total_tokens": 16286656} +{"current_steps": 5175, "total_steps": 78105, "loss": 0.4787, "lr": 3.3119959032134174e-06, "epoch": 0.33128480891108125, "percentage": 6.63, "elapsed_time": "0:13:39", "remaining_time": "3:12:24", "throughput": 19901.89, "total_tokens": 16302592} +{"current_steps": 5180, "total_steps": 78105, "loss": 0.6982, "lr": 3.315196517731405e-06, "epoch": 0.33160489085205813, "percentage": 6.63, "elapsed_time": "0:13:39", "remaining_time": "3:12:21", "throughput": 19904.7, "total_tokens": 16318656} +{"current_steps": 5185, "total_steps": 78105, "loss": 0.5162, "lr": 3.3183971322493924e-06, "epoch": 0.331924972793035, "percentage": 6.64, "elapsed_time": "0:13:40", "remaining_time": "3:12:19", "throughput": 19907.43, "total_tokens": 16334016} +{"current_steps": 5190, "total_steps": 78105, "loss": 0.5793, "lr": 3.3215977467673794e-06, "epoch": 0.3322450547340119, "percentage": 6.64, "elapsed_time": "0:13:41", "remaining_time": "3:12:16", "throughput": 19910.33, "total_tokens": 16349888} +{"current_steps": 5195, "total_steps": 78105, "loss": 0.5796, "lr": 3.324798361285367e-06, "epoch": 0.33256513667498877, "percentage": 6.65, "elapsed_time": "0:13:41", "remaining_time": "3:12:14", "throughput": 19912.81, "total_tokens": 16365056} +{"current_steps": 5200, "total_steps": 78105, "loss": 0.4358, "lr": 3.3279989758033544e-06, "epoch": 0.3328852186159657, "percentage": 6.66, "elapsed_time": "0:13:42", "remaining_time": "3:12:11", "throughput": 19915.09, "total_tokens": 16380032} +{"current_steps": 5205, "total_steps": 78105, "loss": 0.6268, "lr": 3.331199590321342e-06, "epoch": 0.3332053005569426, "percentage": 6.66, "elapsed_time": "0:13:43", "remaining_time": "3:12:08", "throughput": 19917.28, "total_tokens": 16394752} +{"current_steps": 5210, "total_steps": 78105, "loss": 0.5224, "lr": 3.3344002048393293e-06, "epoch": 0.33352538249791946, "percentage": 6.67, "elapsed_time": "0:13:43", "remaining_time": "3:12:06", "throughput": 19919.88, "total_tokens": 16410368} +{"current_steps": 5215, "total_steps": 78105, "loss": 0.4788, "lr": 3.3376008193573168e-06, "epoch": 0.33384546443889634, "percentage": 6.68, "elapsed_time": "0:13:44", "remaining_time": "3:12:03", "throughput": 19921.99, "total_tokens": 16425088} +{"current_steps": 5220, "total_steps": 78105, "loss": 0.4521, "lr": 3.3408014338753042e-06, "epoch": 0.3341655463798732, "percentage": 6.68, "elapsed_time": "0:13:45", "remaining_time": "3:12:01", "throughput": 19924.95, "total_tokens": 16441152} +{"current_steps": 5225, "total_steps": 78105, "loss": 0.5389, "lr": 3.3440020483932917e-06, "epoch": 0.33448562832085016, "percentage": 6.69, "elapsed_time": "0:13:45", "remaining_time": "3:11:59", "throughput": 19928.35, "total_tokens": 16458240} +{"current_steps": 5230, "total_steps": 78105, "loss": 0.575, "lr": 3.347202662911279e-06, "epoch": 0.33480571026182704, "percentage": 6.7, "elapsed_time": "0:13:46", "remaining_time": "3:11:57", "throughput": 19931.19, "total_tokens": 16474112} +{"current_steps": 5235, "total_steps": 78105, "loss": 0.4176, "lr": 3.3504032774292666e-06, "epoch": 0.3351257922028039, "percentage": 6.7, "elapsed_time": "0:13:47", "remaining_time": "3:11:55", "throughput": 19934.33, "total_tokens": 16490560} +{"current_steps": 5240, "total_steps": 78105, "loss": 0.7561, "lr": 3.353603891947254e-06, "epoch": 0.3354458741437808, "percentage": 6.71, "elapsed_time": "0:13:47", "remaining_time": "3:11:52", "throughput": 19936.69, "total_tokens": 16505728} +{"current_steps": 5245, "total_steps": 78105, "loss": 0.491, "lr": 3.3568045064652416e-06, "epoch": 0.3357659560847577, "percentage": 6.72, "elapsed_time": "0:13:48", "remaining_time": "3:11:50", "throughput": 19939.65, "total_tokens": 16521856} +{"current_steps": 5250, "total_steps": 78105, "loss": 0.4595, "lr": 3.360005120983229e-06, "epoch": 0.3360860380257346, "percentage": 6.72, "elapsed_time": "0:13:49", "remaining_time": "3:11:47", "throughput": 19942.53, "total_tokens": 16537792} +{"current_steps": 5255, "total_steps": 78105, "loss": 0.4857, "lr": 3.3632057355012165e-06, "epoch": 0.3364061199667115, "percentage": 6.73, "elapsed_time": "0:13:49", "remaining_time": "3:11:45", "throughput": 19945.14, "total_tokens": 16553408} +{"current_steps": 5260, "total_steps": 78105, "loss": 0.4904, "lr": 3.366406350019204e-06, "epoch": 0.33672620190768837, "percentage": 6.73, "elapsed_time": "0:13:50", "remaining_time": "3:11:43", "throughput": 19948.25, "total_tokens": 16570112} +{"current_steps": 5265, "total_steps": 78105, "loss": 0.5798, "lr": 3.3696069645371914e-06, "epoch": 0.33704628384866525, "percentage": 6.74, "elapsed_time": "0:13:51", "remaining_time": "3:11:41", "throughput": 19951.03, "total_tokens": 16586304} +{"current_steps": 5270, "total_steps": 78105, "loss": 0.5247, "lr": 3.372807579055179e-06, "epoch": 0.33736636578964213, "percentage": 6.75, "elapsed_time": "0:13:52", "remaining_time": "3:11:39", "throughput": 19953.31, "total_tokens": 16601920} +{"current_steps": 5275, "total_steps": 78105, "loss": 0.5074, "lr": 3.3760081935731664e-06, "epoch": 0.33768644773061907, "percentage": 6.75, "elapsed_time": "0:13:52", "remaining_time": "3:11:37", "throughput": 19956.33, "total_tokens": 16618240} +{"current_steps": 5280, "total_steps": 78105, "loss": 0.6338, "lr": 3.379208808091154e-06, "epoch": 0.33800652967159595, "percentage": 6.76, "elapsed_time": "0:13:53", "remaining_time": "3:11:34", "throughput": 19958.75, "total_tokens": 16633408} +{"current_steps": 5285, "total_steps": 78105, "loss": 0.4926, "lr": 3.3824094226091413e-06, "epoch": 0.3383266116125728, "percentage": 6.77, "elapsed_time": "0:13:54", "remaining_time": "3:11:32", "throughput": 19961.61, "total_tokens": 16649664} +{"current_steps": 5290, "total_steps": 78105, "loss": 0.4736, "lr": 3.3856100371271288e-06, "epoch": 0.3386466935535497, "percentage": 6.77, "elapsed_time": "0:13:54", "remaining_time": "3:11:30", "throughput": 19963.97, "total_tokens": 16664896} +{"current_steps": 5295, "total_steps": 78105, "loss": 0.5782, "lr": 3.3888106516451162e-06, "epoch": 0.3389667754945266, "percentage": 6.78, "elapsed_time": "0:13:55", "remaining_time": "3:11:27", "throughput": 19966.38, "total_tokens": 16680384} +{"current_steps": 5300, "total_steps": 78105, "loss": 0.4507, "lr": 3.3920112661631037e-06, "epoch": 0.33928685743550346, "percentage": 6.79, "elapsed_time": "0:13:56", "remaining_time": "3:11:25", "throughput": 19969.2, "total_tokens": 16696192} +{"current_steps": 5305, "total_steps": 78105, "loss": 0.63, "lr": 3.395211880681091e-06, "epoch": 0.3396069393764804, "percentage": 6.79, "elapsed_time": "0:13:57", "remaining_time": "3:11:30", "throughput": 19976.64, "total_tokens": 16727104} +{"current_steps": 5310, "total_steps": 78105, "loss": 0.5304, "lr": 3.398412495199078e-06, "epoch": 0.3399270213174573, "percentage": 6.8, "elapsed_time": "0:13:57", "remaining_time": "3:11:27", "throughput": 19978.78, "total_tokens": 16741696} +{"current_steps": 5315, "total_steps": 78105, "loss": 0.589, "lr": 3.4016131097170657e-06, "epoch": 0.34024710325843416, "percentage": 6.8, "elapsed_time": "0:13:58", "remaining_time": "3:11:25", "throughput": 19981.23, "total_tokens": 16757120} +{"current_steps": 5320, "total_steps": 78105, "loss": 0.5482, "lr": 3.404813724235053e-06, "epoch": 0.34056718519941104, "percentage": 6.81, "elapsed_time": "0:13:59", "remaining_time": "3:11:22", "throughput": 19983.48, "total_tokens": 16772416} +{"current_steps": 5325, "total_steps": 78105, "loss": 0.3581, "lr": 3.4080143387530406e-06, "epoch": 0.3408872671403879, "percentage": 6.82, "elapsed_time": "0:13:59", "remaining_time": "3:11:20", "throughput": 19986.34, "total_tokens": 16788352} +{"current_steps": 5330, "total_steps": 78105, "loss": 0.4131, "lr": 3.411214953271028e-06, "epoch": 0.34120734908136485, "percentage": 6.82, "elapsed_time": "0:14:00", "remaining_time": "3:11:18", "throughput": 19988.71, "total_tokens": 16803648} +{"current_steps": 5335, "total_steps": 78105, "loss": 0.5274, "lr": 3.4144155677890155e-06, "epoch": 0.34152743102234173, "percentage": 6.83, "elapsed_time": "0:14:01", "remaining_time": "3:11:15", "throughput": 19991.11, "total_tokens": 16818944} +{"current_steps": 5340, "total_steps": 78105, "loss": 0.4915, "lr": 3.417616182307003e-06, "epoch": 0.3418475129633186, "percentage": 6.84, "elapsed_time": "0:14:01", "remaining_time": "3:11:13", "throughput": 19993.36, "total_tokens": 16833984} +{"current_steps": 5345, "total_steps": 78105, "loss": 0.6687, "lr": 3.4208167968249905e-06, "epoch": 0.3421675949042955, "percentage": 6.84, "elapsed_time": "0:14:02", "remaining_time": "3:11:10", "throughput": 19995.94, "total_tokens": 16849280} +{"current_steps": 5350, "total_steps": 78105, "loss": 0.4588, "lr": 3.424017411342978e-06, "epoch": 0.34248767684527237, "percentage": 6.85, "elapsed_time": "0:14:03", "remaining_time": "3:11:08", "throughput": 19998.72, "total_tokens": 16865664} +{"current_steps": 5355, "total_steps": 78105, "loss": 0.6085, "lr": 3.4272180258609654e-06, "epoch": 0.3428077587862493, "percentage": 6.86, "elapsed_time": "0:14:04", "remaining_time": "3:11:06", "throughput": 20001.15, "total_tokens": 16881536} +{"current_steps": 5360, "total_steps": 78105, "loss": 0.4435, "lr": 3.430418640378953e-06, "epoch": 0.3431278407272262, "percentage": 6.86, "elapsed_time": "0:14:04", "remaining_time": "3:11:04", "throughput": 20004.47, "total_tokens": 16898816} +{"current_steps": 5365, "total_steps": 78105, "loss": 0.5517, "lr": 3.4336192548969403e-06, "epoch": 0.34344792266820307, "percentage": 6.87, "elapsed_time": "0:14:05", "remaining_time": "3:11:02", "throughput": 20007.31, "total_tokens": 16915136} +{"current_steps": 5370, "total_steps": 78105, "loss": 0.59, "lr": 3.436819869414928e-06, "epoch": 0.34376800460917994, "percentage": 6.88, "elapsed_time": "0:14:06", "remaining_time": "3:11:00", "throughput": 20009.89, "total_tokens": 16931200} +{"current_steps": 5375, "total_steps": 78105, "loss": 0.5054, "lr": 3.4400204839329153e-06, "epoch": 0.3440880865501568, "percentage": 6.88, "elapsed_time": "0:14:06", "remaining_time": "3:10:58", "throughput": 20012.26, "total_tokens": 16946816} +{"current_steps": 5380, "total_steps": 78105, "loss": 0.4416, "lr": 3.4432210984509027e-06, "epoch": 0.3444081684911337, "percentage": 6.89, "elapsed_time": "0:14:07", "remaining_time": "3:10:56", "throughput": 20014.89, "total_tokens": 16962880} +{"current_steps": 5385, "total_steps": 78105, "loss": 0.5929, "lr": 3.4464217129688902e-06, "epoch": 0.34472825043211064, "percentage": 6.89, "elapsed_time": "0:14:08", "remaining_time": "3:10:54", "throughput": 20017.19, "total_tokens": 16978240} +{"current_steps": 5390, "total_steps": 78105, "loss": 0.4171, "lr": 3.4496223274868777e-06, "epoch": 0.3450483323730875, "percentage": 6.9, "elapsed_time": "0:14:08", "remaining_time": "3:10:51", "throughput": 20019.69, "total_tokens": 16993728} +{"current_steps": 5395, "total_steps": 78105, "loss": 0.6317, "lr": 3.452822942004865e-06, "epoch": 0.3453684143140644, "percentage": 6.91, "elapsed_time": "0:14:09", "remaining_time": "3:10:49", "throughput": 20022.24, "total_tokens": 17009728} +{"current_steps": 5400, "total_steps": 78105, "loss": 0.4645, "lr": 3.4560235565228526e-06, "epoch": 0.3456884962550413, "percentage": 6.91, "elapsed_time": "0:14:10", "remaining_time": "3:10:46", "throughput": 20024.39, "total_tokens": 17024640} +{"current_steps": 5405, "total_steps": 78105, "loss": 0.4109, "lr": 3.4592241710408405e-06, "epoch": 0.34600857819601816, "percentage": 6.92, "elapsed_time": "0:14:10", "remaining_time": "3:10:44", "throughput": 20026.91, "total_tokens": 17040512} +{"current_steps": 5410, "total_steps": 78105, "loss": 0.5849, "lr": 3.462424785558828e-06, "epoch": 0.3463286601369951, "percentage": 6.93, "elapsed_time": "0:14:11", "remaining_time": "3:10:42", "throughput": 20029.4, "total_tokens": 17056064} +{"current_steps": 5415, "total_steps": 78105, "loss": 0.492, "lr": 3.4656254000768154e-06, "epoch": 0.34664874207797197, "percentage": 6.93, "elapsed_time": "0:14:12", "remaining_time": "3:10:40", "throughput": 20032.3, "total_tokens": 17072448} +{"current_steps": 5420, "total_steps": 78105, "loss": 0.626, "lr": 3.468826014594803e-06, "epoch": 0.34696882401894885, "percentage": 6.94, "elapsed_time": "0:14:12", "remaining_time": "3:10:38", "throughput": 20034.79, "total_tokens": 17088320} +{"current_steps": 5425, "total_steps": 78105, "loss": 0.5002, "lr": 3.4720266291127904e-06, "epoch": 0.34728890595992573, "percentage": 6.95, "elapsed_time": "0:14:13", "remaining_time": "3:10:36", "throughput": 20037.55, "total_tokens": 17104448} +{"current_steps": 5430, "total_steps": 78105, "loss": 0.4201, "lr": 3.475227243630777e-06, "epoch": 0.3476089879009026, "percentage": 6.95, "elapsed_time": "0:14:14", "remaining_time": "3:10:34", "throughput": 20040.99, "total_tokens": 17121856} +{"current_steps": 5435, "total_steps": 78105, "loss": 0.6419, "lr": 3.4784278581487645e-06, "epoch": 0.34792906984187955, "percentage": 6.96, "elapsed_time": "0:14:14", "remaining_time": "3:10:31", "throughput": 20043.07, "total_tokens": 17136512} +{"current_steps": 5440, "total_steps": 78105, "loss": 0.5514, "lr": 3.481628472666752e-06, "epoch": 0.3482491517828564, "percentage": 6.96, "elapsed_time": "0:14:15", "remaining_time": "3:10:29", "throughput": 20045.86, "total_tokens": 17152640} +{"current_steps": 5445, "total_steps": 78105, "loss": 0.3865, "lr": 3.4848290871847394e-06, "epoch": 0.3485692337238333, "percentage": 6.97, "elapsed_time": "0:14:16", "remaining_time": "3:10:27", "throughput": 20047.99, "total_tokens": 17167680} +{"current_steps": 5450, "total_steps": 78105, "loss": 0.7384, "lr": 3.488029701702727e-06, "epoch": 0.3488893156648102, "percentage": 6.98, "elapsed_time": "0:14:17", "remaining_time": "3:10:24", "throughput": 20050.3, "total_tokens": 17183296} +{"current_steps": 5455, "total_steps": 78105, "loss": 0.6669, "lr": 3.4912303162207143e-06, "epoch": 0.34920939760578706, "percentage": 6.98, "elapsed_time": "0:14:17", "remaining_time": "3:10:22", "throughput": 20052.45, "total_tokens": 17198272} +{"current_steps": 5460, "total_steps": 78105, "loss": 0.5908, "lr": 3.494430930738702e-06, "epoch": 0.349529479546764, "percentage": 6.99, "elapsed_time": "0:14:18", "remaining_time": "3:10:19", "throughput": 20054.81, "total_tokens": 17213376} +{"current_steps": 5465, "total_steps": 78105, "loss": 0.5236, "lr": 3.4976315452566893e-06, "epoch": 0.3498495614877409, "percentage": 7.0, "elapsed_time": "0:14:19", "remaining_time": "3:10:17", "throughput": 20057.7, "total_tokens": 17229632} +{"current_steps": 5470, "total_steps": 78105, "loss": 0.6976, "lr": 3.5008321597746767e-06, "epoch": 0.35016964342871776, "percentage": 7.0, "elapsed_time": "0:14:19", "remaining_time": "3:10:15", "throughput": 20060.37, "total_tokens": 17245696} +{"current_steps": 5475, "total_steps": 78105, "loss": 0.4405, "lr": 3.5040327742926646e-06, "epoch": 0.35048972536969464, "percentage": 7.01, "elapsed_time": "0:14:20", "remaining_time": "3:10:13", "throughput": 20063.66, "total_tokens": 17262976} +{"current_steps": 5480, "total_steps": 78105, "loss": 0.48, "lr": 3.507233388810652e-06, "epoch": 0.3508098073106715, "percentage": 7.02, "elapsed_time": "0:14:21", "remaining_time": "3:10:11", "throughput": 20066.0, "total_tokens": 17278336} +{"current_steps": 5485, "total_steps": 78105, "loss": 0.4554, "lr": 3.5104340033286396e-06, "epoch": 0.3511298892516484, "percentage": 7.02, "elapsed_time": "0:14:21", "remaining_time": "3:10:09", "throughput": 20068.15, "total_tokens": 17293248} +{"current_steps": 5490, "total_steps": 78105, "loss": 0.4985, "lr": 3.513634617846627e-06, "epoch": 0.35144997119262533, "percentage": 7.03, "elapsed_time": "0:14:22", "remaining_time": "3:10:06", "throughput": 20070.62, "total_tokens": 17309184} +{"current_steps": 5495, "total_steps": 78105, "loss": 0.527, "lr": 3.5168352323646145e-06, "epoch": 0.3517700531336022, "percentage": 7.04, "elapsed_time": "0:14:23", "remaining_time": "3:10:04", "throughput": 20073.23, "total_tokens": 17325248} +{"current_steps": 5500, "total_steps": 78105, "loss": 0.5258, "lr": 3.520035846882602e-06, "epoch": 0.3520901350745791, "percentage": 7.04, "elapsed_time": "0:14:23", "remaining_time": "3:10:02", "throughput": 20075.6, "total_tokens": 17340736} +{"current_steps": 5505, "total_steps": 78105, "loss": 0.474, "lr": 3.5232364614005894e-06, "epoch": 0.35241021701555597, "percentage": 7.05, "elapsed_time": "0:14:24", "remaining_time": "3:10:00", "throughput": 20077.91, "total_tokens": 17355904} +{"current_steps": 5510, "total_steps": 78105, "loss": 0.5218, "lr": 3.526437075918577e-06, "epoch": 0.35273029895653285, "percentage": 7.05, "elapsed_time": "0:14:25", "remaining_time": "3:09:57", "throughput": 20080.21, "total_tokens": 17371264} +{"current_steps": 5515, "total_steps": 78105, "loss": 0.4929, "lr": 3.5296376904365644e-06, "epoch": 0.3530503808975098, "percentage": 7.06, "elapsed_time": "0:14:25", "remaining_time": "3:09:56", "throughput": 20083.24, "total_tokens": 17388608} +{"current_steps": 5520, "total_steps": 78105, "loss": 0.4981, "lr": 3.532838304954552e-06, "epoch": 0.35337046283848667, "percentage": 7.07, "elapsed_time": "0:14:26", "remaining_time": "3:09:53", "throughput": 20085.53, "total_tokens": 17403712} +{"current_steps": 5525, "total_steps": 78105, "loss": 0.5438, "lr": 3.5360389194725393e-06, "epoch": 0.35369054477946354, "percentage": 7.07, "elapsed_time": "0:14:27", "remaining_time": "3:09:52", "throughput": 20088.84, "total_tokens": 17421056} +{"current_steps": 5530, "total_steps": 78105, "loss": 0.6835, "lr": 3.5392395339905268e-06, "epoch": 0.3540106267204404, "percentage": 7.08, "elapsed_time": "0:14:27", "remaining_time": "3:09:49", "throughput": 20090.96, "total_tokens": 17436096} +{"current_steps": 5535, "total_steps": 78105, "loss": 0.4615, "lr": 3.5424401485085142e-06, "epoch": 0.3543307086614173, "percentage": 7.09, "elapsed_time": "0:14:28", "remaining_time": "3:09:47", "throughput": 20093.62, "total_tokens": 17452480} +{"current_steps": 5540, "total_steps": 78105, "loss": 0.454, "lr": 3.5456407630265017e-06, "epoch": 0.35465079060239424, "percentage": 7.09, "elapsed_time": "0:14:29", "remaining_time": "3:09:45", "throughput": 20095.9, "total_tokens": 17467776} +{"current_steps": 5545, "total_steps": 78105, "loss": 0.5886, "lr": 3.548841377544489e-06, "epoch": 0.3549708725433711, "percentage": 7.1, "elapsed_time": "0:14:29", "remaining_time": "3:09:42", "throughput": 20098.06, "total_tokens": 17482752} +{"current_steps": 5550, "total_steps": 78105, "loss": 0.4031, "lr": 3.552041992062476e-06, "epoch": 0.355290954484348, "percentage": 7.11, "elapsed_time": "0:14:30", "remaining_time": "3:09:40", "throughput": 20100.25, "total_tokens": 17497792} +{"current_steps": 5555, "total_steps": 78105, "loss": 0.6044, "lr": 3.5552426065804637e-06, "epoch": 0.3556110364253249, "percentage": 7.11, "elapsed_time": "0:14:31", "remaining_time": "3:09:37", "throughput": 20102.49, "total_tokens": 17513024} +{"current_steps": 5560, "total_steps": 78105, "loss": 0.4138, "lr": 3.558443221098451e-06, "epoch": 0.35593111836630176, "percentage": 7.12, "elapsed_time": "0:14:31", "remaining_time": "3:09:36", "throughput": 20104.77, "total_tokens": 17529024} +{"current_steps": 5565, "total_steps": 78105, "loss": 0.5137, "lr": 3.5616438356164386e-06, "epoch": 0.35625120030727864, "percentage": 7.13, "elapsed_time": "0:14:32", "remaining_time": "3:09:33", "throughput": 20106.7, "total_tokens": 17543936} +{"current_steps": 5570, "total_steps": 78105, "loss": 0.6699, "lr": 3.564844450134426e-06, "epoch": 0.35657128224825557, "percentage": 7.13, "elapsed_time": "0:14:33", "remaining_time": "3:09:31", "throughput": 20108.88, "total_tokens": 17559168} +{"current_steps": 5575, "total_steps": 78105, "loss": 0.4501, "lr": 3.5680450646524135e-06, "epoch": 0.35689136418923245, "percentage": 7.14, "elapsed_time": "0:14:33", "remaining_time": "3:09:29", "throughput": 20111.32, "total_tokens": 17574912} +{"current_steps": 5580, "total_steps": 78105, "loss": 0.4311, "lr": 3.571245679170401e-06, "epoch": 0.35721144613020933, "percentage": 7.14, "elapsed_time": "0:14:34", "remaining_time": "3:09:26", "throughput": 20113.6, "total_tokens": 17590272} +{"current_steps": 5585, "total_steps": 78105, "loss": 0.5236, "lr": 3.5744462936883885e-06, "epoch": 0.3575315280711862, "percentage": 7.15, "elapsed_time": "0:14:35", "remaining_time": "3:09:24", "throughput": 20115.96, "total_tokens": 17605952} +{"current_steps": 5590, "total_steps": 78105, "loss": 0.4981, "lr": 3.577646908206376e-06, "epoch": 0.3578516100121631, "percentage": 7.16, "elapsed_time": "0:14:35", "remaining_time": "3:09:22", "throughput": 20118.11, "total_tokens": 17621120} +{"current_steps": 5595, "total_steps": 78105, "loss": 0.6862, "lr": 3.5808475227243634e-06, "epoch": 0.35817169195314, "percentage": 7.16, "elapsed_time": "0:14:36", "remaining_time": "3:09:19", "throughput": 20120.21, "total_tokens": 17636096} +{"current_steps": 5600, "total_steps": 78105, "loss": 0.5436, "lr": 3.584048137242351e-06, "epoch": 0.3584917738941169, "percentage": 7.17, "elapsed_time": "0:14:37", "remaining_time": "3:09:17", "throughput": 20123.04, "total_tokens": 17652672} +{"current_steps": 5605, "total_steps": 78105, "loss": 0.6614, "lr": 3.5872487517603383e-06, "epoch": 0.3588118558350938, "percentage": 7.18, "elapsed_time": "0:14:37", "remaining_time": "3:09:15", "throughput": 20125.14, "total_tokens": 17668032} +{"current_steps": 5610, "total_steps": 78105, "loss": 0.5959, "lr": 3.590449366278326e-06, "epoch": 0.35913193777607066, "percentage": 7.18, "elapsed_time": "0:14:38", "remaining_time": "3:09:13", "throughput": 20127.28, "total_tokens": 17683264} +{"current_steps": 5615, "total_steps": 78105, "loss": 0.4085, "lr": 3.5936499807963133e-06, "epoch": 0.35945201971704754, "percentage": 7.19, "elapsed_time": "0:14:39", "remaining_time": "3:09:11", "throughput": 20129.77, "total_tokens": 17699200} +{"current_steps": 5620, "total_steps": 78105, "loss": 0.3946, "lr": 3.5968505953143007e-06, "epoch": 0.3597721016580245, "percentage": 7.2, "elapsed_time": "0:14:39", "remaining_time": "3:09:09", "throughput": 20132.11, "total_tokens": 17714816} +{"current_steps": 5625, "total_steps": 78105, "loss": 0.4728, "lr": 3.6000512098322882e-06, "epoch": 0.36009218359900136, "percentage": 7.2, "elapsed_time": "0:14:40", "remaining_time": "3:09:07", "throughput": 20135.36, "total_tokens": 17732352} +{"current_steps": 5630, "total_steps": 78105, "loss": 0.6305, "lr": 3.6032518243502757e-06, "epoch": 0.36041226553997824, "percentage": 7.21, "elapsed_time": "0:14:41", "remaining_time": "3:09:05", "throughput": 20137.87, "total_tokens": 17748288} +{"current_steps": 5635, "total_steps": 78105, "loss": 0.4213, "lr": 3.606452438868263e-06, "epoch": 0.3607323474809551, "percentage": 7.21, "elapsed_time": "0:14:42", "remaining_time": "3:09:03", "throughput": 20139.87, "total_tokens": 17763520} +{"current_steps": 5640, "total_steps": 78105, "loss": 0.5041, "lr": 3.6096530533862506e-06, "epoch": 0.361052429421932, "percentage": 7.22, "elapsed_time": "0:14:42", "remaining_time": "3:09:01", "throughput": 20142.27, "total_tokens": 17779328} +{"current_steps": 5645, "total_steps": 78105, "loss": 0.4788, "lr": 3.612853667904238e-06, "epoch": 0.3613725113629089, "percentage": 7.23, "elapsed_time": "0:14:43", "remaining_time": "3:08:59", "throughput": 20144.83, "total_tokens": 17795584} +{"current_steps": 5650, "total_steps": 78105, "loss": 0.463, "lr": 3.6160542824222255e-06, "epoch": 0.3616925933038858, "percentage": 7.23, "elapsed_time": "0:14:44", "remaining_time": "3:08:56", "throughput": 20146.7, "total_tokens": 17810368} +{"current_steps": 5655, "total_steps": 78105, "loss": 0.4958, "lr": 3.619254896940213e-06, "epoch": 0.3620126752448627, "percentage": 7.24, "elapsed_time": "0:14:44", "remaining_time": "3:08:54", "throughput": 20149.15, "total_tokens": 17826496} +{"current_steps": 5660, "total_steps": 78105, "loss": 0.5192, "lr": 3.6224555114582005e-06, "epoch": 0.36233275718583957, "percentage": 7.25, "elapsed_time": "0:14:45", "remaining_time": "3:08:52", "throughput": 20151.06, "total_tokens": 17841344} +{"current_steps": 5665, "total_steps": 78105, "loss": 0.5501, "lr": 3.625656125976188e-06, "epoch": 0.36265283912681645, "percentage": 7.25, "elapsed_time": "0:14:46", "remaining_time": "3:08:50", "throughput": 20153.41, "total_tokens": 17857408} +{"current_steps": 5670, "total_steps": 78105, "loss": 0.5163, "lr": 3.628856740494175e-06, "epoch": 0.36297292106779333, "percentage": 7.26, "elapsed_time": "0:14:46", "remaining_time": "3:08:48", "throughput": 20155.78, "total_tokens": 17873280} +{"current_steps": 5675, "total_steps": 78105, "loss": 0.5723, "lr": 3.6320573550121625e-06, "epoch": 0.36329300300877027, "percentage": 7.27, "elapsed_time": "0:14:47", "remaining_time": "3:08:46", "throughput": 20157.89, "total_tokens": 17888448} +{"current_steps": 5680, "total_steps": 78105, "loss": 0.5371, "lr": 3.63525796953015e-06, "epoch": 0.36361308494974715, "percentage": 7.27, "elapsed_time": "0:14:48", "remaining_time": "3:08:44", "throughput": 20160.2, "total_tokens": 17904320} +{"current_steps": 5685, "total_steps": 78105, "loss": 0.3783, "lr": 3.6384585840481374e-06, "epoch": 0.363933166890724, "percentage": 7.28, "elapsed_time": "0:14:48", "remaining_time": "3:08:41", "throughput": 20162.37, "total_tokens": 17919744} +{"current_steps": 5690, "total_steps": 78105, "loss": 0.3927, "lr": 3.641659198566125e-06, "epoch": 0.3642532488317009, "percentage": 7.29, "elapsed_time": "0:14:49", "remaining_time": "3:08:39", "throughput": 20164.49, "total_tokens": 17934656} +{"current_steps": 5695, "total_steps": 78105, "loss": 0.6628, "lr": 3.6448598130841123e-06, "epoch": 0.3645733307726778, "percentage": 7.29, "elapsed_time": "0:14:50", "remaining_time": "3:08:37", "throughput": 20167.06, "total_tokens": 17950976} +{"current_steps": 5700, "total_steps": 78105, "loss": 0.6469, "lr": 3.6480604276021e-06, "epoch": 0.3648934127136547, "percentage": 7.3, "elapsed_time": "0:14:50", "remaining_time": "3:08:35", "throughput": 20169.28, "total_tokens": 17966400} +{"current_steps": 5705, "total_steps": 78105, "loss": 0.6176, "lr": 3.6512610421200873e-06, "epoch": 0.3652134946546316, "percentage": 7.3, "elapsed_time": "0:14:51", "remaining_time": "3:08:33", "throughput": 20172.31, "total_tokens": 17983872} +{"current_steps": 5710, "total_steps": 78105, "loss": 0.548, "lr": 3.6544616566380747e-06, "epoch": 0.3655335765956085, "percentage": 7.31, "elapsed_time": "0:14:52", "remaining_time": "3:08:31", "throughput": 20174.83, "total_tokens": 17999680} +{"current_steps": 5715, "total_steps": 78105, "loss": 0.4054, "lr": 3.657662271156062e-06, "epoch": 0.36585365853658536, "percentage": 7.32, "elapsed_time": "0:14:52", "remaining_time": "3:08:29", "throughput": 20176.91, "total_tokens": 18014784} +{"current_steps": 5720, "total_steps": 78105, "loss": 0.5321, "lr": 3.6608628856740497e-06, "epoch": 0.36617374047756224, "percentage": 7.32, "elapsed_time": "0:14:53", "remaining_time": "3:08:26", "throughput": 20178.94, "total_tokens": 18029888} +{"current_steps": 5725, "total_steps": 78105, "loss": 0.4562, "lr": 3.664063500192037e-06, "epoch": 0.3664938224185392, "percentage": 7.33, "elapsed_time": "0:14:54", "remaining_time": "3:08:24", "throughput": 20181.26, "total_tokens": 18045632} +{"current_steps": 5730, "total_steps": 78105, "loss": 0.6684, "lr": 3.6672641147100246e-06, "epoch": 0.36681390435951605, "percentage": 7.34, "elapsed_time": "0:14:54", "remaining_time": "3:08:22", "throughput": 20183.3, "total_tokens": 18060928} +{"current_steps": 5735, "total_steps": 78105, "loss": 0.4735, "lr": 3.670464729228012e-06, "epoch": 0.36713398630049293, "percentage": 7.34, "elapsed_time": "0:14:55", "remaining_time": "3:08:20", "throughput": 20185.41, "total_tokens": 18076352} +{"current_steps": 5740, "total_steps": 78105, "loss": 0.5248, "lr": 3.6736653437459995e-06, "epoch": 0.3674540682414698, "percentage": 7.35, "elapsed_time": "0:14:56", "remaining_time": "3:08:18", "throughput": 20187.75, "total_tokens": 18092096} +{"current_steps": 5745, "total_steps": 78105, "loss": 0.7352, "lr": 3.676865958263987e-06, "epoch": 0.3677741501824467, "percentage": 7.36, "elapsed_time": "0:14:56", "remaining_time": "3:08:16", "throughput": 20190.03, "total_tokens": 18107648} +{"current_steps": 5750, "total_steps": 78105, "loss": 0.489, "lr": 3.6800665727819745e-06, "epoch": 0.36809423212342357, "percentage": 7.36, "elapsed_time": "0:14:57", "remaining_time": "3:08:14", "throughput": 20192.29, "total_tokens": 18123264} +{"current_steps": 5755, "total_steps": 78105, "loss": 0.6408, "lr": 3.683267187299962e-06, "epoch": 0.3684143140644005, "percentage": 7.37, "elapsed_time": "0:14:58", "remaining_time": "3:08:11", "throughput": 20194.42, "total_tokens": 18138368} +{"current_steps": 5760, "total_steps": 78105, "loss": 0.3899, "lr": 3.6864678018179494e-06, "epoch": 0.3687343960053774, "percentage": 7.37, "elapsed_time": "0:14:58", "remaining_time": "3:08:09", "throughput": 20196.84, "total_tokens": 18154432} +{"current_steps": 5765, "total_steps": 78105, "loss": 0.4416, "lr": 3.689668416335937e-06, "epoch": 0.36905447794635426, "percentage": 7.38, "elapsed_time": "0:14:59", "remaining_time": "3:08:08", "throughput": 20199.56, "total_tokens": 18171200} +{"current_steps": 5770, "total_steps": 78105, "loss": 0.4342, "lr": 3.6928690308539243e-06, "epoch": 0.36937455988733114, "percentage": 7.39, "elapsed_time": "0:15:00", "remaining_time": "3:08:05", "throughput": 20202.15, "total_tokens": 18187008} +{"current_steps": 5775, "total_steps": 78105, "loss": 0.4311, "lr": 3.696069645371912e-06, "epoch": 0.369694641828308, "percentage": 7.39, "elapsed_time": "0:15:00", "remaining_time": "3:08:03", "throughput": 20204.49, "total_tokens": 18202944} +{"current_steps": 5780, "total_steps": 78105, "loss": 0.6343, "lr": 3.6992702598898993e-06, "epoch": 0.37001472376928496, "percentage": 7.4, "elapsed_time": "0:15:01", "remaining_time": "3:08:01", "throughput": 20206.43, "total_tokens": 18217984} +{"current_steps": 5785, "total_steps": 78105, "loss": 0.6752, "lr": 3.7024708744078867e-06, "epoch": 0.37033480571026184, "percentage": 7.41, "elapsed_time": "0:15:02", "remaining_time": "3:07:59", "throughput": 20208.66, "total_tokens": 18233984} +{"current_steps": 5790, "total_steps": 78105, "loss": 0.5054, "lr": 3.7056714889258738e-06, "epoch": 0.3706548876512387, "percentage": 7.41, "elapsed_time": "0:15:02", "remaining_time": "3:07:57", "throughput": 20210.57, "total_tokens": 18249024} +{"current_steps": 5795, "total_steps": 78105, "loss": 0.4128, "lr": 3.7088721034438612e-06, "epoch": 0.3709749695922156, "percentage": 7.42, "elapsed_time": "0:15:03", "remaining_time": "3:07:55", "throughput": 20212.52, "total_tokens": 18264256} +{"current_steps": 5800, "total_steps": 78105, "loss": 0.6297, "lr": 3.7120727179618487e-06, "epoch": 0.3712950515331925, "percentage": 7.43, "elapsed_time": "0:15:04", "remaining_time": "3:07:53", "throughput": 20214.62, "total_tokens": 18279936} +{"current_steps": 5805, "total_steps": 78105, "loss": 0.4761, "lr": 3.715273332479836e-06, "epoch": 0.3716151334741694, "percentage": 7.43, "elapsed_time": "0:15:04", "remaining_time": "3:07:51", "throughput": 20216.98, "total_tokens": 18295488} +{"current_steps": 5810, "total_steps": 78105, "loss": 0.6025, "lr": 3.7184739469978236e-06, "epoch": 0.3719352154151463, "percentage": 7.44, "elapsed_time": "0:15:05", "remaining_time": "3:07:49", "throughput": 20219.3, "total_tokens": 18311360} +{"current_steps": 5815, "total_steps": 78105, "loss": 0.4959, "lr": 3.721674561515811e-06, "epoch": 0.37225529735612317, "percentage": 7.45, "elapsed_time": "0:15:06", "remaining_time": "3:07:46", "throughput": 20221.36, "total_tokens": 18326592} +{"current_steps": 5820, "total_steps": 78105, "loss": 0.3994, "lr": 3.7248751760337986e-06, "epoch": 0.37257537929710005, "percentage": 7.45, "elapsed_time": "0:15:07", "remaining_time": "3:07:45", "throughput": 20224.22, "total_tokens": 18344000} +{"current_steps": 5825, "total_steps": 78105, "loss": 0.6157, "lr": 3.728075790551786e-06, "epoch": 0.37289546123807693, "percentage": 7.46, "elapsed_time": "0:15:07", "remaining_time": "3:07:43", "throughput": 20226.62, "total_tokens": 18359872} +{"current_steps": 5830, "total_steps": 78105, "loss": 0.4682, "lr": 3.7312764050697735e-06, "epoch": 0.3732155431790538, "percentage": 7.46, "elapsed_time": "0:15:08", "remaining_time": "3:07:40", "throughput": 20228.25, "total_tokens": 18374400} +{"current_steps": 5835, "total_steps": 78105, "loss": 0.3252, "lr": 3.734477019587761e-06, "epoch": 0.37353562512003075, "percentage": 7.47, "elapsed_time": "0:15:09", "remaining_time": "3:07:39", "throughput": 20230.71, "total_tokens": 18390656} +{"current_steps": 5840, "total_steps": 78105, "loss": 0.5131, "lr": 3.7376776341057485e-06, "epoch": 0.3738557070610076, "percentage": 7.48, "elapsed_time": "0:15:09", "remaining_time": "3:07:37", "throughput": 20232.75, "total_tokens": 18406400} +{"current_steps": 5845, "total_steps": 78105, "loss": 0.7071, "lr": 3.740878248623736e-06, "epoch": 0.3741757890019845, "percentage": 7.48, "elapsed_time": "0:15:10", "remaining_time": "3:07:35", "throughput": 20235.22, "total_tokens": 18422848} +{"current_steps": 5850, "total_steps": 78105, "loss": 0.6232, "lr": 3.7440788631417234e-06, "epoch": 0.3744958709429614, "percentage": 7.49, "elapsed_time": "0:15:11", "remaining_time": "3:07:33", "throughput": 20237.54, "total_tokens": 18438912} +{"current_steps": 5855, "total_steps": 78105, "loss": 0.5506, "lr": 3.747279477659711e-06, "epoch": 0.37481595288393826, "percentage": 7.5, "elapsed_time": "0:15:11", "remaining_time": "3:07:31", "throughput": 20240.08, "total_tokens": 18455488} +{"current_steps": 5860, "total_steps": 78105, "loss": 0.451, "lr": 3.7504800921776983e-06, "epoch": 0.3751360348249152, "percentage": 7.5, "elapsed_time": "0:15:12", "remaining_time": "3:07:29", "throughput": 20241.64, "total_tokens": 18470080} +{"current_steps": 5865, "total_steps": 78105, "loss": 0.5892, "lr": 3.7536807066956858e-06, "epoch": 0.3754561167658921, "percentage": 7.51, "elapsed_time": "0:15:13", "remaining_time": "3:07:27", "throughput": 20243.24, "total_tokens": 18484480} +{"current_steps": 5870, "total_steps": 78105, "loss": 0.5427, "lr": 3.7568813212136733e-06, "epoch": 0.37577619870686896, "percentage": 7.52, "elapsed_time": "0:15:13", "remaining_time": "3:07:25", "throughput": 20246.03, "total_tokens": 18501312} +{"current_steps": 5875, "total_steps": 78105, "loss": 0.5258, "lr": 3.7600819357316607e-06, "epoch": 0.37609628064784584, "percentage": 7.52, "elapsed_time": "0:15:14", "remaining_time": "3:07:22", "throughput": 20247.8, "total_tokens": 18515968} +{"current_steps": 5880, "total_steps": 78105, "loss": 0.4716, "lr": 3.763282550249648e-06, "epoch": 0.3764163625888227, "percentage": 7.53, "elapsed_time": "0:15:15", "remaining_time": "3:07:20", "throughput": 20249.68, "total_tokens": 18531200} +{"current_steps": 5885, "total_steps": 78105, "loss": 0.4008, "lr": 3.766483164767636e-06, "epoch": 0.37673644452979965, "percentage": 7.53, "elapsed_time": "0:15:15", "remaining_time": "3:07:19", "throughput": 20252.16, "total_tokens": 18547712} +{"current_steps": 5890, "total_steps": 78105, "loss": 0.4438, "lr": 3.7696837792856235e-06, "epoch": 0.37705652647077653, "percentage": 7.54, "elapsed_time": "0:15:16", "remaining_time": "3:07:16", "throughput": 20254.03, "total_tokens": 18562752} +{"current_steps": 5895, "total_steps": 78105, "loss": 0.4763, "lr": 3.772884393803611e-06, "epoch": 0.3773766084117534, "percentage": 7.55, "elapsed_time": "0:15:17", "remaining_time": "3:07:14", "throughput": 20256.07, "total_tokens": 18578368} +{"current_steps": 5900, "total_steps": 78105, "loss": 0.4597, "lr": 3.7760850083215985e-06, "epoch": 0.3776966903527303, "percentage": 7.55, "elapsed_time": "0:15:17", "remaining_time": "3:07:12", "throughput": 20257.94, "total_tokens": 18593408} +{"current_steps": 5905, "total_steps": 78105, "loss": 0.4968, "lr": 3.779285622839586e-06, "epoch": 0.37801677229370717, "percentage": 7.56, "elapsed_time": "0:15:18", "remaining_time": "3:07:10", "throughput": 20259.67, "total_tokens": 18607872} +{"current_steps": 5910, "total_steps": 78105, "loss": 0.414, "lr": 3.7824862373575726e-06, "epoch": 0.3783368542346841, "percentage": 7.57, "elapsed_time": "0:15:19", "remaining_time": "3:07:08", "throughput": 20262.02, "total_tokens": 18624000} +{"current_steps": 5915, "total_steps": 78105, "loss": 0.6451, "lr": 3.78568685187556e-06, "epoch": 0.378656936175661, "percentage": 7.57, "elapsed_time": "0:15:19", "remaining_time": "3:07:06", "throughput": 20264.78, "total_tokens": 18640704} +{"current_steps": 5920, "total_steps": 78105, "loss": 0.501, "lr": 3.7888874663935475e-06, "epoch": 0.37897701811663786, "percentage": 7.58, "elapsed_time": "0:15:20", "remaining_time": "3:07:05", "throughput": 20267.91, "total_tokens": 18658368} +{"current_steps": 5925, "total_steps": 78105, "loss": 0.5381, "lr": 3.792088080911535e-06, "epoch": 0.37929710005761474, "percentage": 7.59, "elapsed_time": "0:15:21", "remaining_time": "3:07:03", "throughput": 20270.81, "total_tokens": 18675584} +{"current_steps": 5930, "total_steps": 78105, "loss": 0.6799, "lr": 3.7952886954295224e-06, "epoch": 0.3796171819985916, "percentage": 7.59, "elapsed_time": "0:15:21", "remaining_time": "3:07:01", "throughput": 20273.23, "total_tokens": 18691904} +{"current_steps": 5935, "total_steps": 78105, "loss": 0.4492, "lr": 3.79848930994751e-06, "epoch": 0.3799372639395685, "percentage": 7.6, "elapsed_time": "0:15:22", "remaining_time": "3:07:00", "throughput": 20275.91, "total_tokens": 18708736} +{"current_steps": 5940, "total_steps": 78105, "loss": 0.4253, "lr": 3.8016899244654974e-06, "epoch": 0.38025734588054544, "percentage": 7.61, "elapsed_time": "0:15:23", "remaining_time": "3:06:58", "throughput": 20277.8, "total_tokens": 18724032} +{"current_steps": 5945, "total_steps": 78105, "loss": 0.5151, "lr": 3.804890538983485e-06, "epoch": 0.3805774278215223, "percentage": 7.61, "elapsed_time": "0:15:24", "remaining_time": "3:06:56", "throughput": 20280.18, "total_tokens": 18740352} +{"current_steps": 5950, "total_steps": 78105, "loss": 0.4548, "lr": 3.8080911535014723e-06, "epoch": 0.3808975097624992, "percentage": 7.62, "elapsed_time": "0:15:24", "remaining_time": "3:06:53", "throughput": 20281.72, "total_tokens": 18754560} +{"current_steps": 5955, "total_steps": 78105, "loss": 0.7133, "lr": 3.81129176801946e-06, "epoch": 0.3812175917034761, "percentage": 7.62, "elapsed_time": "0:15:25", "remaining_time": "3:06:51", "throughput": 20283.59, "total_tokens": 18769280} +{"current_steps": 5960, "total_steps": 78105, "loss": 0.4603, "lr": 3.8144923825374477e-06, "epoch": 0.38153767364445296, "percentage": 7.63, "elapsed_time": "0:15:26", "remaining_time": "3:06:49", "throughput": 20285.55, "total_tokens": 18784512} +{"current_steps": 5965, "total_steps": 78105, "loss": 0.4459, "lr": 3.817692997055435e-06, "epoch": 0.3818577555854299, "percentage": 7.64, "elapsed_time": "0:15:26", "remaining_time": "3:06:47", "throughput": 20287.75, "total_tokens": 18800384} +{"current_steps": 5970, "total_steps": 78105, "loss": 0.4875, "lr": 3.820893611573423e-06, "epoch": 0.38217783752640677, "percentage": 7.64, "elapsed_time": "0:15:27", "remaining_time": "3:06:44", "throughput": 20289.77, "total_tokens": 18815552} +{"current_steps": 5975, "total_steps": 78105, "loss": 0.4904, "lr": 3.82409422609141e-06, "epoch": 0.38249791946738365, "percentage": 7.65, "elapsed_time": "0:15:27", "remaining_time": "3:06:42", "throughput": 20291.48, "total_tokens": 18830080} +{"current_steps": 5980, "total_steps": 78105, "loss": 0.5087, "lr": 3.8272948406093975e-06, "epoch": 0.38281800140836053, "percentage": 7.66, "elapsed_time": "0:15:28", "remaining_time": "3:06:44", "throughput": 20287.83, "total_tokens": 18846272} +{"current_steps": 5985, "total_steps": 78105, "loss": 0.4675, "lr": 3.8304954551273846e-06, "epoch": 0.3831380833493374, "percentage": 7.66, "elapsed_time": "0:15:29", "remaining_time": "3:06:41", "throughput": 20289.49, "total_tokens": 18860928} +{"current_steps": 5990, "total_steps": 78105, "loss": 0.4753, "lr": 3.8336960696453725e-06, "epoch": 0.38345816529031435, "percentage": 7.67, "elapsed_time": "0:15:30", "remaining_time": "3:06:39", "throughput": 20291.63, "total_tokens": 18877120} +{"current_steps": 5995, "total_steps": 78105, "loss": 0.6253, "lr": 3.8368966841633595e-06, "epoch": 0.3837782472312912, "percentage": 7.68, "elapsed_time": "0:15:30", "remaining_time": "3:06:37", "throughput": 20293.57, "total_tokens": 18892224} +{"current_steps": 6000, "total_steps": 78105, "loss": 0.5498, "lr": 3.840097298681347e-06, "epoch": 0.3840983291722681, "percentage": 7.68, "elapsed_time": "0:15:31", "remaining_time": "3:06:35", "throughput": 20295.28, "total_tokens": 18907008} +{"current_steps": 6005, "total_steps": 78105, "loss": 0.3618, "lr": 3.8432979131993344e-06, "epoch": 0.384418411113245, "percentage": 7.69, "elapsed_time": "0:15:32", "remaining_time": "3:06:33", "throughput": 20297.2, "total_tokens": 18922368} +{"current_steps": 6010, "total_steps": 78105, "loss": 0.4821, "lr": 3.846498527717322e-06, "epoch": 0.38473849305422186, "percentage": 7.69, "elapsed_time": "0:15:32", "remaining_time": "3:06:31", "throughput": 20299.05, "total_tokens": 18937536} +{"current_steps": 6015, "total_steps": 78105, "loss": 0.5246, "lr": 3.849699142235309e-06, "epoch": 0.38505857499519874, "percentage": 7.7, "elapsed_time": "0:15:33", "remaining_time": "3:06:29", "throughput": 20301.55, "total_tokens": 18954048} +{"current_steps": 6020, "total_steps": 78105, "loss": 0.5951, "lr": 3.852899756753297e-06, "epoch": 0.3853786569361757, "percentage": 7.71, "elapsed_time": "0:15:34", "remaining_time": "3:06:27", "throughput": 20303.45, "total_tokens": 18969472} +{"current_steps": 6025, "total_steps": 78105, "loss": 0.4919, "lr": 3.856100371271284e-06, "epoch": 0.38569873887715256, "percentage": 7.71, "elapsed_time": "0:15:34", "remaining_time": "3:06:25", "throughput": 20305.06, "total_tokens": 18983872} +{"current_steps": 6030, "total_steps": 78105, "loss": 0.4294, "lr": 3.859300985789271e-06, "epoch": 0.38601882081812944, "percentage": 7.72, "elapsed_time": "0:15:35", "remaining_time": "3:06:23", "throughput": 20306.95, "total_tokens": 18999360} +{"current_steps": 6035, "total_steps": 78105, "loss": 0.5733, "lr": 3.862501600307259e-06, "epoch": 0.3863389027591063, "percentage": 7.73, "elapsed_time": "0:15:36", "remaining_time": "3:06:21", "throughput": 20309.08, "total_tokens": 19015424} +{"current_steps": 6040, "total_steps": 78105, "loss": 0.4235, "lr": 3.865702214825246e-06, "epoch": 0.3866589847000832, "percentage": 7.73, "elapsed_time": "0:15:36", "remaining_time": "3:06:19", "throughput": 20311.28, "total_tokens": 19031616} +{"current_steps": 6045, "total_steps": 78105, "loss": 0.5539, "lr": 3.868902829343234e-06, "epoch": 0.38697906664106013, "percentage": 7.74, "elapsed_time": "0:15:37", "remaining_time": "3:06:17", "throughput": 20313.24, "total_tokens": 19047040} +{"current_steps": 6050, "total_steps": 78105, "loss": 0.551, "lr": 3.872103443861221e-06, "epoch": 0.387299148582037, "percentage": 7.75, "elapsed_time": "0:15:38", "remaining_time": "3:06:15", "throughput": 20315.4, "total_tokens": 19062848} +{"current_steps": 6055, "total_steps": 78105, "loss": 0.7136, "lr": 3.875304058379209e-06, "epoch": 0.3876192305230139, "percentage": 7.75, "elapsed_time": "0:15:39", "remaining_time": "3:06:13", "throughput": 20317.76, "total_tokens": 19078976} +{"current_steps": 6060, "total_steps": 78105, "loss": 0.4144, "lr": 3.878504672897196e-06, "epoch": 0.38793931246399077, "percentage": 7.76, "elapsed_time": "0:15:39", "remaining_time": "3:06:11", "throughput": 20319.45, "total_tokens": 19093632} +{"current_steps": 6065, "total_steps": 78105, "loss": 0.3784, "lr": 3.881705287415184e-06, "epoch": 0.38825939440496765, "percentage": 7.77, "elapsed_time": "0:15:40", "remaining_time": "3:06:09", "throughput": 20321.4, "total_tokens": 19109056} +{"current_steps": 6070, "total_steps": 78105, "loss": 0.3711, "lr": 3.884905901933171e-06, "epoch": 0.3885794763459446, "percentage": 7.77, "elapsed_time": "0:15:41", "remaining_time": "3:06:07", "throughput": 20323.32, "total_tokens": 19124544} +{"current_steps": 6075, "total_steps": 78105, "loss": 0.4527, "lr": 3.888106516451159e-06, "epoch": 0.38889955828692147, "percentage": 7.78, "elapsed_time": "0:15:41", "remaining_time": "3:06:05", "throughput": 20325.46, "total_tokens": 19140480} +{"current_steps": 6080, "total_steps": 78105, "loss": 0.46, "lr": 3.891307130969146e-06, "epoch": 0.38921964022789834, "percentage": 7.78, "elapsed_time": "0:15:42", "remaining_time": "3:06:03", "throughput": 20327.66, "total_tokens": 19156736} +{"current_steps": 6085, "total_steps": 78105, "loss": 0.5563, "lr": 3.894507745487134e-06, "epoch": 0.3895397221688752, "percentage": 7.79, "elapsed_time": "0:15:43", "remaining_time": "3:06:01", "throughput": 20329.37, "total_tokens": 19171520} +{"current_steps": 6090, "total_steps": 78105, "loss": 0.6042, "lr": 3.897708360005121e-06, "epoch": 0.3898598041098521, "percentage": 7.8, "elapsed_time": "0:15:43", "remaining_time": "3:05:59", "throughput": 20331.45, "total_tokens": 19187136} +{"current_steps": 6095, "total_steps": 78105, "loss": 0.5486, "lr": 3.900908974523109e-06, "epoch": 0.39017988605082904, "percentage": 7.8, "elapsed_time": "0:15:44", "remaining_time": "3:05:57", "throughput": 20333.12, "total_tokens": 19201856} +{"current_steps": 6100, "total_steps": 78105, "loss": 0.4568, "lr": 3.904109589041096e-06, "epoch": 0.3904999679918059, "percentage": 7.81, "elapsed_time": "0:15:45", "remaining_time": "3:05:55", "throughput": 20334.72, "total_tokens": 19216704} +{"current_steps": 6105, "total_steps": 78105, "loss": 0.6902, "lr": 3.907310203559084e-06, "epoch": 0.3908200499327828, "percentage": 7.82, "elapsed_time": "0:15:45", "remaining_time": "3:05:53", "throughput": 20337.36, "total_tokens": 19233408} +{"current_steps": 6110, "total_steps": 78105, "loss": 0.4412, "lr": 3.910510818077071e-06, "epoch": 0.3911401318737597, "percentage": 7.82, "elapsed_time": "0:15:46", "remaining_time": "3:05:51", "throughput": 20339.88, "total_tokens": 19250048} +{"current_steps": 6115, "total_steps": 78105, "loss": 0.5017, "lr": 3.913711432595059e-06, "epoch": 0.39146021381473656, "percentage": 7.83, "elapsed_time": "0:15:47", "remaining_time": "3:05:50", "throughput": 20342.31, "total_tokens": 19266560} +{"current_steps": 6120, "total_steps": 78105, "loss": 0.4863, "lr": 3.916912047113046e-06, "epoch": 0.39178029575571344, "percentage": 7.84, "elapsed_time": "0:15:47", "remaining_time": "3:05:47", "throughput": 20344.02, "total_tokens": 19281408} +{"current_steps": 6125, "total_steps": 78105, "loss": 0.4705, "lr": 3.920112661631034e-06, "epoch": 0.39210037769669037, "percentage": 7.84, "elapsed_time": "0:15:48", "remaining_time": "3:05:45", "throughput": 20345.74, "total_tokens": 19296384} +{"current_steps": 6130, "total_steps": 78105, "loss": 0.4233, "lr": 3.9233132761490215e-06, "epoch": 0.39242045963766725, "percentage": 7.85, "elapsed_time": "0:15:49", "remaining_time": "3:05:43", "throughput": 20347.74, "total_tokens": 19312000} +{"current_steps": 6135, "total_steps": 78105, "loss": 0.4467, "lr": 3.926513890667009e-06, "epoch": 0.39274054157864413, "percentage": 7.85, "elapsed_time": "0:15:49", "remaining_time": "3:05:41", "throughput": 20349.71, "total_tokens": 19327488} +{"current_steps": 6140, "total_steps": 78105, "loss": 0.6074, "lr": 3.9297145051849965e-06, "epoch": 0.393060623519621, "percentage": 7.86, "elapsed_time": "0:15:50", "remaining_time": "3:05:39", "throughput": 20351.37, "total_tokens": 19342080} +{"current_steps": 6145, "total_steps": 78105, "loss": 0.4601, "lr": 3.9329151197029835e-06, "epoch": 0.3933807054605979, "percentage": 7.87, "elapsed_time": "0:15:51", "remaining_time": "3:05:37", "throughput": 20353.6, "total_tokens": 19358336} +{"current_steps": 6150, "total_steps": 78105, "loss": 0.6262, "lr": 3.9361157342209706e-06, "epoch": 0.3937007874015748, "percentage": 7.87, "elapsed_time": "0:15:51", "remaining_time": "3:05:35", "throughput": 20355.41, "total_tokens": 19373376} +{"current_steps": 6155, "total_steps": 78105, "loss": 0.5754, "lr": 3.939316348738958e-06, "epoch": 0.3940208693425517, "percentage": 7.88, "elapsed_time": "0:15:52", "remaining_time": "3:05:33", "throughput": 20357.28, "total_tokens": 19388608} +{"current_steps": 6160, "total_steps": 78105, "loss": 0.6771, "lr": 3.9425169632569455e-06, "epoch": 0.3943409512835286, "percentage": 7.89, "elapsed_time": "0:15:53", "remaining_time": "3:05:31", "throughput": 20358.86, "total_tokens": 19403264} +{"current_steps": 6165, "total_steps": 78105, "loss": 0.602, "lr": 3.9457175777749325e-06, "epoch": 0.39466103322450546, "percentage": 7.89, "elapsed_time": "0:15:53", "remaining_time": "3:05:29", "throughput": 20360.7, "total_tokens": 19418816} +{"current_steps": 6170, "total_steps": 78105, "loss": 0.5712, "lr": 3.9489181922929204e-06, "epoch": 0.39498111516548234, "percentage": 7.9, "elapsed_time": "0:15:54", "remaining_time": "3:05:27", "throughput": 20363.04, "total_tokens": 19435392} +{"current_steps": 6175, "total_steps": 78105, "loss": 0.5541, "lr": 3.9521188068109075e-06, "epoch": 0.3953011971064593, "percentage": 7.91, "elapsed_time": "0:15:55", "remaining_time": "3:05:25", "throughput": 20364.66, "total_tokens": 19450048} +{"current_steps": 6180, "total_steps": 78105, "loss": 0.529, "lr": 3.955319421328895e-06, "epoch": 0.39562127904743616, "percentage": 7.91, "elapsed_time": "0:15:55", "remaining_time": "3:05:23", "throughput": 20367.03, "total_tokens": 19466368} +{"current_steps": 6185, "total_steps": 78105, "loss": 0.385, "lr": 3.958520035846882e-06, "epoch": 0.39594136098841304, "percentage": 7.92, "elapsed_time": "0:15:56", "remaining_time": "3:05:22", "throughput": 20369.13, "total_tokens": 19482624} +{"current_steps": 6190, "total_steps": 78105, "loss": 0.5878, "lr": 3.96172065036487e-06, "epoch": 0.3962614429293899, "percentage": 7.93, "elapsed_time": "0:15:57", "remaining_time": "3:05:20", "throughput": 20371.1, "total_tokens": 19498048} +{"current_steps": 6195, "total_steps": 78105, "loss": 0.5465, "lr": 3.964921264882858e-06, "epoch": 0.3965815248703668, "percentage": 7.93, "elapsed_time": "0:15:57", "remaining_time": "3:05:18", "throughput": 20373.57, "total_tokens": 19515264} +{"current_steps": 6200, "total_steps": 78105, "loss": 0.484, "lr": 3.968121879400845e-06, "epoch": 0.3969016068113437, "percentage": 7.94, "elapsed_time": "0:15:58", "remaining_time": "3:05:17", "throughput": 20376.37, "total_tokens": 19533056} +{"current_steps": 6205, "total_steps": 78105, "loss": 0.4691, "lr": 3.971322493918833e-06, "epoch": 0.3972216887523206, "percentage": 7.94, "elapsed_time": "0:15:59", "remaining_time": "3:05:15", "throughput": 20378.18, "total_tokens": 19548480} +{"current_steps": 6210, "total_steps": 78105, "loss": 0.3764, "lr": 3.97452310843682e-06, "epoch": 0.3975417706932975, "percentage": 7.95, "elapsed_time": "0:15:59", "remaining_time": "3:05:13", "throughput": 20379.99, "total_tokens": 19563712} +{"current_steps": 6215, "total_steps": 78105, "loss": 0.4834, "lr": 3.977723722954808e-06, "epoch": 0.39786185263427437, "percentage": 7.96, "elapsed_time": "0:16:00", "remaining_time": "3:05:11", "throughput": 20381.73, "total_tokens": 19578624} +{"current_steps": 6220, "total_steps": 78105, "loss": 0.4655, "lr": 3.980924337472795e-06, "epoch": 0.39818193457525125, "percentage": 7.96, "elapsed_time": "0:16:01", "remaining_time": "3:05:09", "throughput": 20383.3, "total_tokens": 19593408} +{"current_steps": 6225, "total_steps": 78105, "loss": 0.6619, "lr": 3.984124951990783e-06, "epoch": 0.39850201651622813, "percentage": 7.97, "elapsed_time": "0:16:01", "remaining_time": "3:05:07", "throughput": 20385.45, "total_tokens": 19609216} +{"current_steps": 6230, "total_steps": 78105, "loss": 0.578, "lr": 3.98732556650877e-06, "epoch": 0.39882209845720507, "percentage": 7.98, "elapsed_time": "0:16:02", "remaining_time": "3:05:05", "throughput": 20387.27, "total_tokens": 19624896} +{"current_steps": 6235, "total_steps": 78105, "loss": 0.5431, "lr": 3.990526181026758e-06, "epoch": 0.39914218039818194, "percentage": 7.98, "elapsed_time": "0:16:03", "remaining_time": "3:05:03", "throughput": 20389.12, "total_tokens": 19640128} +{"current_steps": 6240, "total_steps": 78105, "loss": 0.3773, "lr": 3.993726795544745e-06, "epoch": 0.3994622623391588, "percentage": 7.99, "elapsed_time": "0:16:03", "remaining_time": "3:05:01", "throughput": 20391.19, "total_tokens": 19655680} +{"current_steps": 6245, "total_steps": 78105, "loss": 0.6672, "lr": 3.996927410062733e-06, "epoch": 0.3997823442801357, "percentage": 8.0, "elapsed_time": "0:16:04", "remaining_time": "3:04:59", "throughput": 20393.41, "total_tokens": 19672192} +{"current_steps": 6250, "total_steps": 78105, "loss": 0.6138, "lr": 4.00012802458072e-06, "epoch": 0.4001024262211126, "percentage": 8.0, "elapsed_time": "0:16:05", "remaining_time": "3:04:58", "throughput": 20395.76, "total_tokens": 19688896} +{"current_steps": 6255, "total_steps": 78105, "loss": 0.4616, "lr": 4.003328639098708e-06, "epoch": 0.4004225081620895, "percentage": 8.01, "elapsed_time": "0:16:06", "remaining_time": "3:04:56", "throughput": 20397.62, "total_tokens": 19704128} +{"current_steps": 6260, "total_steps": 78105, "loss": 0.5449, "lr": 4.006529253616695e-06, "epoch": 0.4007425901030664, "percentage": 8.01, "elapsed_time": "0:16:06", "remaining_time": "3:04:54", "throughput": 20399.6, "total_tokens": 19719744} +{"current_steps": 6265, "total_steps": 78105, "loss": 0.5009, "lr": 4.009729868134683e-06, "epoch": 0.4010626720440433, "percentage": 8.02, "elapsed_time": "0:16:07", "remaining_time": "3:04:52", "throughput": 20401.92, "total_tokens": 19736128} +{"current_steps": 6270, "total_steps": 78105, "loss": 0.5497, "lr": 4.01293048265267e-06, "epoch": 0.40138275398502016, "percentage": 8.03, "elapsed_time": "0:16:08", "remaining_time": "3:04:50", "throughput": 20403.59, "total_tokens": 19751296} +{"current_steps": 6275, "total_steps": 78105, "loss": 0.5978, "lr": 4.016131097170657e-06, "epoch": 0.40170283592599704, "percentage": 8.03, "elapsed_time": "0:16:08", "remaining_time": "3:04:48", "throughput": 20405.07, "total_tokens": 19765888} +{"current_steps": 6280, "total_steps": 78105, "loss": 0.5703, "lr": 4.019331711688645e-06, "epoch": 0.402022917866974, "percentage": 8.04, "elapsed_time": "0:16:09", "remaining_time": "3:04:46", "throughput": 20406.63, "total_tokens": 19780864} +{"current_steps": 6285, "total_steps": 78105, "loss": 0.491, "lr": 4.022532326206632e-06, "epoch": 0.40234299980795085, "percentage": 8.05, "elapsed_time": "0:16:10", "remaining_time": "3:04:44", "throughput": 20408.75, "total_tokens": 19796864} +{"current_steps": 6290, "total_steps": 78105, "loss": 0.5399, "lr": 4.02573294072462e-06, "epoch": 0.40266308174892773, "percentage": 8.05, "elapsed_time": "0:16:10", "remaining_time": "3:04:42", "throughput": 20410.49, "total_tokens": 19811904} +{"current_steps": 6295, "total_steps": 78105, "loss": 0.5094, "lr": 4.028933555242607e-06, "epoch": 0.4029831636899046, "percentage": 8.06, "elapsed_time": "0:16:11", "remaining_time": "3:04:40", "throughput": 20412.33, "total_tokens": 19827456} +{"current_steps": 6300, "total_steps": 78105, "loss": 0.4796, "lr": 4.032134169760595e-06, "epoch": 0.4033032456308815, "percentage": 8.07, "elapsed_time": "0:16:12", "remaining_time": "3:04:38", "throughput": 20414.03, "total_tokens": 19842496} +{"current_steps": 6305, "total_steps": 78105, "loss": 0.5461, "lr": 4.035334784278582e-06, "epoch": 0.40362332757185837, "percentage": 8.07, "elapsed_time": "0:16:12", "remaining_time": "3:04:36", "throughput": 20415.94, "total_tokens": 19858240} +{"current_steps": 6310, "total_steps": 78105, "loss": 0.4699, "lr": 4.0385353987965695e-06, "epoch": 0.4039434095128353, "percentage": 8.08, "elapsed_time": "0:16:13", "remaining_time": "3:04:34", "throughput": 20418.23, "total_tokens": 19874368} +{"current_steps": 6315, "total_steps": 78105, "loss": 0.5295, "lr": 4.0417360133145566e-06, "epoch": 0.4042634914538122, "percentage": 8.09, "elapsed_time": "0:16:14", "remaining_time": "3:04:33", "throughput": 20420.75, "total_tokens": 19891904} +{"current_steps": 6320, "total_steps": 78105, "loss": 0.4789, "lr": 4.0449366278325445e-06, "epoch": 0.40458357339478906, "percentage": 8.09, "elapsed_time": "0:16:14", "remaining_time": "3:04:31", "throughput": 20422.51, "total_tokens": 19907392} +{"current_steps": 6325, "total_steps": 78105, "loss": 0.5229, "lr": 4.0481372423505315e-06, "epoch": 0.40490365533576594, "percentage": 8.1, "elapsed_time": "0:16:15", "remaining_time": "3:04:29", "throughput": 20424.34, "total_tokens": 19922624} +{"current_steps": 6330, "total_steps": 78105, "loss": 0.5257, "lr": 4.051337856868519e-06, "epoch": 0.4052237372767428, "percentage": 8.1, "elapsed_time": "0:16:16", "remaining_time": "3:04:28", "throughput": 20426.66, "total_tokens": 19939328} +{"current_steps": 6335, "total_steps": 78105, "loss": 0.4435, "lr": 4.0545384713865064e-06, "epoch": 0.40554381921771976, "percentage": 8.11, "elapsed_time": "0:16:16", "remaining_time": "3:04:26", "throughput": 20428.7, "total_tokens": 19955392} +{"current_steps": 6340, "total_steps": 78105, "loss": 0.5389, "lr": 4.057739085904494e-06, "epoch": 0.40586390115869664, "percentage": 8.12, "elapsed_time": "0:16:17", "remaining_time": "3:04:24", "throughput": 20430.85, "total_tokens": 19971520} +{"current_steps": 6345, "total_steps": 78105, "loss": 0.621, "lr": 4.060939700422481e-06, "epoch": 0.4061839830996735, "percentage": 8.12, "elapsed_time": "0:16:18", "remaining_time": "3:04:23", "throughput": 20433.24, "total_tokens": 19988352} +{"current_steps": 6350, "total_steps": 78105, "loss": 0.5246, "lr": 4.064140314940469e-06, "epoch": 0.4065040650406504, "percentage": 8.13, "elapsed_time": "0:16:18", "remaining_time": "3:04:22", "throughput": 20435.54, "total_tokens": 20005184} +{"current_steps": 6355, "total_steps": 78105, "loss": 0.4853, "lr": 4.067340929458456e-06, "epoch": 0.4068241469816273, "percentage": 8.14, "elapsed_time": "0:16:19", "remaining_time": "3:04:20", "throughput": 20437.42, "total_tokens": 20020800} +{"current_steps": 6360, "total_steps": 78105, "loss": 0.5291, "lr": 4.070541543976444e-06, "epoch": 0.4071442289226042, "percentage": 8.14, "elapsed_time": "0:16:20", "remaining_time": "3:04:18", "throughput": 20439.31, "total_tokens": 20036416} +{"current_steps": 6365, "total_steps": 78105, "loss": 0.5981, "lr": 4.073742158494431e-06, "epoch": 0.4074643108635811, "percentage": 8.15, "elapsed_time": "0:16:20", "remaining_time": "3:04:16", "throughput": 20441.64, "total_tokens": 20053184} +{"current_steps": 6370, "total_steps": 78105, "loss": 0.4081, "lr": 4.076942773012419e-06, "epoch": 0.40778439280455797, "percentage": 8.16, "elapsed_time": "0:16:21", "remaining_time": "3:04:15", "throughput": 20443.93, "total_tokens": 20069888} +{"current_steps": 6375, "total_steps": 78105, "loss": 0.5855, "lr": 4.080143387530406e-06, "epoch": 0.40810447474553485, "percentage": 8.16, "elapsed_time": "0:16:22", "remaining_time": "3:04:13", "throughput": 20445.83, "total_tokens": 20085760} +{"current_steps": 6380, "total_steps": 78105, "loss": 0.4317, "lr": 4.083344002048394e-06, "epoch": 0.40842455668651173, "percentage": 8.17, "elapsed_time": "0:16:23", "remaining_time": "3:04:11", "throughput": 20447.67, "total_tokens": 20101056} +{"current_steps": 6385, "total_steps": 78105, "loss": 0.5518, "lr": 4.086544616566381e-06, "epoch": 0.4087446386274886, "percentage": 8.17, "elapsed_time": "0:16:23", "remaining_time": "3:04:09", "throughput": 20449.22, "total_tokens": 20116352} +{"current_steps": 6390, "total_steps": 78105, "loss": 0.4846, "lr": 4.089745231084368e-06, "epoch": 0.40906472056846555, "percentage": 8.18, "elapsed_time": "0:16:24", "remaining_time": "3:04:07", "throughput": 20450.93, "total_tokens": 20131712} +{"current_steps": 6395, "total_steps": 78105, "loss": 0.4319, "lr": 4.092945845602356e-06, "epoch": 0.4093848025094424, "percentage": 8.19, "elapsed_time": "0:16:25", "remaining_time": "3:04:05", "throughput": 20452.63, "total_tokens": 20146880} +{"current_steps": 6400, "total_steps": 78105, "loss": 0.3733, "lr": 4.096146460120343e-06, "epoch": 0.4097048844504193, "percentage": 8.19, "elapsed_time": "0:16:25", "remaining_time": "3:04:04", "throughput": 20454.89, "total_tokens": 20163520} +{"current_steps": 6405, "total_steps": 78105, "loss": 0.4392, "lr": 4.099347074638331e-06, "epoch": 0.4100249663913962, "percentage": 8.2, "elapsed_time": "0:16:26", "remaining_time": "3:04:02", "throughput": 20456.26, "total_tokens": 20177984} +{"current_steps": 6410, "total_steps": 78105, "loss": 0.4381, "lr": 4.102547689156318e-06, "epoch": 0.41034504833237306, "percentage": 8.21, "elapsed_time": "0:16:27", "remaining_time": "3:04:00", "throughput": 20458.08, "total_tokens": 20193472} +{"current_steps": 6415, "total_steps": 78105, "loss": 0.5462, "lr": 4.105748303674306e-06, "epoch": 0.41066513027335, "percentage": 8.21, "elapsed_time": "0:16:27", "remaining_time": "3:03:58", "throughput": 20460.68, "total_tokens": 20210624} +{"current_steps": 6420, "total_steps": 78105, "loss": 0.5894, "lr": 4.108948918192293e-06, "epoch": 0.4109852122143269, "percentage": 8.22, "elapsed_time": "0:16:28", "remaining_time": "3:03:57", "throughput": 20462.71, "total_tokens": 20226688} +{"current_steps": 6425, "total_steps": 78105, "loss": 0.4931, "lr": 4.112149532710281e-06, "epoch": 0.41130529415530376, "percentage": 8.23, "elapsed_time": "0:16:29", "remaining_time": "3:03:55", "throughput": 20465.55, "total_tokens": 20244608} +{"current_steps": 6430, "total_steps": 78105, "loss": 0.4032, "lr": 4.115350147228268e-06, "epoch": 0.41162537609628064, "percentage": 8.23, "elapsed_time": "0:16:29", "remaining_time": "3:03:54", "throughput": 20467.67, "total_tokens": 20261184} +{"current_steps": 6435, "total_steps": 78105, "loss": 0.4591, "lr": 4.118550761746256e-06, "epoch": 0.4119454580372575, "percentage": 8.24, "elapsed_time": "0:16:30", "remaining_time": "3:03:52", "throughput": 20469.4, "total_tokens": 20276736} +{"current_steps": 6440, "total_steps": 78105, "loss": 0.5466, "lr": 4.121751376264243e-06, "epoch": 0.41226553997823445, "percentage": 8.25, "elapsed_time": "0:16:31", "remaining_time": "3:03:51", "throughput": 20471.74, "total_tokens": 20293888} +{"current_steps": 6445, "total_steps": 78105, "loss": 0.6605, "lr": 4.124951990782231e-06, "epoch": 0.41258562191921133, "percentage": 8.25, "elapsed_time": "0:16:31", "remaining_time": "3:03:49", "throughput": 20473.59, "total_tokens": 20309696} +{"current_steps": 6450, "total_steps": 78105, "loss": 0.5178, "lr": 4.128152605300218e-06, "epoch": 0.4129057038601882, "percentage": 8.26, "elapsed_time": "0:16:32", "remaining_time": "3:03:47", "throughput": 20475.49, "total_tokens": 20325632} +{"current_steps": 6455, "total_steps": 78105, "loss": 0.4484, "lr": 4.131353219818206e-06, "epoch": 0.4132257858011651, "percentage": 8.26, "elapsed_time": "0:16:33", "remaining_time": "3:03:46", "throughput": 20477.56, "total_tokens": 20341888} +{"current_steps": 6460, "total_steps": 78105, "loss": 0.5741, "lr": 4.134553834336193e-06, "epoch": 0.41354586774214197, "percentage": 8.27, "elapsed_time": "0:16:34", "remaining_time": "3:03:44", "throughput": 20479.93, "total_tokens": 20358656} +{"current_steps": 6465, "total_steps": 78105, "loss": 0.4933, "lr": 4.1377544488541806e-06, "epoch": 0.4138659496831189, "percentage": 8.28, "elapsed_time": "0:16:34", "remaining_time": "3:03:42", "throughput": 20481.21, "total_tokens": 20372864} +{"current_steps": 6470, "total_steps": 78105, "loss": 0.5224, "lr": 4.140955063372168e-06, "epoch": 0.4141860316240958, "percentage": 8.28, "elapsed_time": "0:16:35", "remaining_time": "3:03:40", "throughput": 20482.68, "total_tokens": 20387840} +{"current_steps": 6475, "total_steps": 78105, "loss": 0.4308, "lr": 4.1441556778901555e-06, "epoch": 0.41450611356507266, "percentage": 8.29, "elapsed_time": "0:16:36", "remaining_time": "3:03:39", "throughput": 20484.96, "total_tokens": 20404736} +{"current_steps": 6480, "total_steps": 78105, "loss": 0.6879, "lr": 4.1473562924081426e-06, "epoch": 0.41482619550604954, "percentage": 8.3, "elapsed_time": "0:16:36", "remaining_time": "3:03:38", "throughput": 20487.83, "total_tokens": 20422848} +{"current_steps": 6485, "total_steps": 78105, "loss": 0.5472, "lr": 4.1505569069261304e-06, "epoch": 0.4151462774470264, "percentage": 8.3, "elapsed_time": "0:16:37", "remaining_time": "3:03:36", "throughput": 20489.25, "total_tokens": 20438016} +{"current_steps": 6490, "total_steps": 78105, "loss": 0.6798, "lr": 4.1537575214441175e-06, "epoch": 0.4154663593880033, "percentage": 8.31, "elapsed_time": "0:16:38", "remaining_time": "3:03:34", "throughput": 20491.39, "total_tokens": 20454464} +{"current_steps": 6495, "total_steps": 78105, "loss": 0.4954, "lr": 4.156958135962105e-06, "epoch": 0.41578644132898024, "percentage": 8.32, "elapsed_time": "0:16:38", "remaining_time": "3:03:33", "throughput": 20493.4, "total_tokens": 20470464} +{"current_steps": 6500, "total_steps": 78105, "loss": 0.5747, "lr": 4.160158750480092e-06, "epoch": 0.4161065232699571, "percentage": 8.32, "elapsed_time": "0:16:39", "remaining_time": "3:03:31", "throughput": 20495.14, "total_tokens": 20485696} +{"current_steps": 6505, "total_steps": 78105, "loss": 0.4195, "lr": 4.16335936499808e-06, "epoch": 0.416426605210934, "percentage": 8.33, "elapsed_time": "0:16:40", "remaining_time": "3:03:29", "throughput": 20496.66, "total_tokens": 20500608} +{"current_steps": 6510, "total_steps": 78105, "loss": 0.5438, "lr": 4.166559979516067e-06, "epoch": 0.4167466871519109, "percentage": 8.33, "elapsed_time": "0:16:40", "remaining_time": "3:03:27", "throughput": 20498.28, "total_tokens": 20515776} +{"current_steps": 6515, "total_steps": 78105, "loss": 0.5452, "lr": 4.169760594034054e-06, "epoch": 0.41706676909288776, "percentage": 8.34, "elapsed_time": "0:16:41", "remaining_time": "3:03:25", "throughput": 20500.13, "total_tokens": 20531456} +{"current_steps": 6520, "total_steps": 78105, "loss": 0.5425, "lr": 4.172961208552042e-06, "epoch": 0.4173868510338647, "percentage": 8.35, "elapsed_time": "0:16:42", "remaining_time": "3:03:23", "throughput": 20501.88, "total_tokens": 20547200} +{"current_steps": 6525, "total_steps": 78105, "loss": 0.5122, "lr": 4.176161823070029e-06, "epoch": 0.41770693297484157, "percentage": 8.35, "elapsed_time": "0:16:42", "remaining_time": "3:03:21", "throughput": 20503.52, "total_tokens": 20562560} +{"current_steps": 6530, "total_steps": 78105, "loss": 0.4084, "lr": 4.179362437588017e-06, "epoch": 0.41802701491581845, "percentage": 8.36, "elapsed_time": "0:16:43", "remaining_time": "3:03:19", "throughput": 20504.97, "total_tokens": 20577344} +{"current_steps": 6535, "total_steps": 78105, "loss": 0.4662, "lr": 4.182563052106004e-06, "epoch": 0.41834709685679533, "percentage": 8.37, "elapsed_time": "0:16:44", "remaining_time": "3:03:17", "throughput": 20506.72, "total_tokens": 20593088} +{"current_steps": 6540, "total_steps": 78105, "loss": 0.4794, "lr": 4.185763666623992e-06, "epoch": 0.4186671787977722, "percentage": 8.37, "elapsed_time": "0:16:44", "remaining_time": "3:03:16", "throughput": 20508.98, "total_tokens": 20609792} +{"current_steps": 6545, "total_steps": 78105, "loss": 0.4879, "lr": 4.188964281141979e-06, "epoch": 0.41898726073874915, "percentage": 8.38, "elapsed_time": "0:16:45", "remaining_time": "3:03:14", "throughput": 20510.72, "total_tokens": 20625280} +{"current_steps": 6550, "total_steps": 78105, "loss": 0.5136, "lr": 4.192164895659967e-06, "epoch": 0.419307342679726, "percentage": 8.39, "elapsed_time": "0:16:46", "remaining_time": "3:03:13", "throughput": 20512.98, "total_tokens": 20641984} +{"current_steps": 6555, "total_steps": 78105, "loss": 0.6363, "lr": 4.195365510177954e-06, "epoch": 0.4196274246207029, "percentage": 8.39, "elapsed_time": "0:16:46", "remaining_time": "3:03:11", "throughput": 20514.98, "total_tokens": 20658240} +{"current_steps": 6560, "total_steps": 78105, "loss": 0.5114, "lr": 4.198566124695942e-06, "epoch": 0.4199475065616798, "percentage": 8.4, "elapsed_time": "0:16:47", "remaining_time": "3:03:09", "throughput": 20516.69, "total_tokens": 20673408} +{"current_steps": 6565, "total_steps": 78105, "loss": 0.4934, "lr": 4.201766739213929e-06, "epoch": 0.42026758850265666, "percentage": 8.41, "elapsed_time": "0:16:48", "remaining_time": "3:03:07", "throughput": 20518.45, "total_tokens": 20688960} +{"current_steps": 6570, "total_steps": 78105, "loss": 0.4829, "lr": 4.204967353731917e-06, "epoch": 0.42058767044363354, "percentage": 8.41, "elapsed_time": "0:16:48", "remaining_time": "3:03:06", "throughput": 20520.21, "total_tokens": 20704768} +{"current_steps": 6575, "total_steps": 78105, "loss": 0.4683, "lr": 4.208167968249904e-06, "epoch": 0.4209077523846105, "percentage": 8.42, "elapsed_time": "0:16:49", "remaining_time": "3:03:04", "throughput": 20521.78, "total_tokens": 20719808} +{"current_steps": 6580, "total_steps": 78105, "loss": 0.6623, "lr": 4.211368582767892e-06, "epoch": 0.42122783432558736, "percentage": 8.42, "elapsed_time": "0:16:50", "remaining_time": "3:03:02", "throughput": 20523.67, "total_tokens": 20735680} +{"current_steps": 6585, "total_steps": 78105, "loss": 0.4296, "lr": 4.214569197285879e-06, "epoch": 0.42154791626656424, "percentage": 8.43, "elapsed_time": "0:16:50", "remaining_time": "3:03:00", "throughput": 20525.04, "total_tokens": 20750144} +{"current_steps": 6590, "total_steps": 78105, "loss": 0.473, "lr": 4.217769811803867e-06, "epoch": 0.4218679982075411, "percentage": 8.44, "elapsed_time": "0:16:51", "remaining_time": "3:02:58", "throughput": 20526.92, "total_tokens": 20766016} +{"current_steps": 6595, "total_steps": 78105, "loss": 0.6441, "lr": 4.220970426321854e-06, "epoch": 0.422188080148518, "percentage": 8.44, "elapsed_time": "0:16:52", "remaining_time": "3:02:57", "throughput": 20529.1, "total_tokens": 20782720} +{"current_steps": 6600, "total_steps": 78105, "loss": 0.4494, "lr": 4.224171040839842e-06, "epoch": 0.42250816208949493, "percentage": 8.45, "elapsed_time": "0:16:53", "remaining_time": "3:02:55", "throughput": 20530.83, "total_tokens": 20798208} +{"current_steps": 6605, "total_steps": 78105, "loss": 0.763, "lr": 4.227371655357829e-06, "epoch": 0.4228282440304718, "percentage": 8.46, "elapsed_time": "0:16:53", "remaining_time": "3:02:53", "throughput": 20532.35, "total_tokens": 20813056} +{"current_steps": 6610, "total_steps": 78105, "loss": 0.483, "lr": 4.230572269875817e-06, "epoch": 0.4231483259714487, "percentage": 8.46, "elapsed_time": "0:16:54", "remaining_time": "3:02:51", "throughput": 20534.21, "total_tokens": 20828800} +{"current_steps": 6615, "total_steps": 78105, "loss": 0.5142, "lr": 4.233772884393805e-06, "epoch": 0.42346840791242557, "percentage": 8.47, "elapsed_time": "0:16:55", "remaining_time": "3:02:49", "throughput": 20536.03, "total_tokens": 20844736} +{"current_steps": 6620, "total_steps": 78105, "loss": 0.4001, "lr": 4.236973498911792e-06, "epoch": 0.42378848985340245, "percentage": 8.48, "elapsed_time": "0:16:55", "remaining_time": "3:02:48", "throughput": 20537.94, "total_tokens": 20860672} +{"current_steps": 6625, "total_steps": 78105, "loss": 0.4318, "lr": 4.2401741134297795e-06, "epoch": 0.4241085717943794, "percentage": 8.48, "elapsed_time": "0:16:56", "remaining_time": "3:02:46", "throughput": 20539.93, "total_tokens": 20877184} +{"current_steps": 6630, "total_steps": 78105, "loss": 0.4935, "lr": 4.243374727947766e-06, "epoch": 0.42442865373535626, "percentage": 8.49, "elapsed_time": "0:16:57", "remaining_time": "3:02:45", "throughput": 20542.16, "total_tokens": 20894208} +{"current_steps": 6635, "total_steps": 78105, "loss": 0.5097, "lr": 4.246575342465754e-06, "epoch": 0.42474873567633314, "percentage": 8.49, "elapsed_time": "0:16:57", "remaining_time": "3:02:43", "throughput": 20543.85, "total_tokens": 20909696} +{"current_steps": 6640, "total_steps": 78105, "loss": 0.4985, "lr": 4.249775956983741e-06, "epoch": 0.42506881761731, "percentage": 8.5, "elapsed_time": "0:16:58", "remaining_time": "3:02:41", "throughput": 20545.88, "total_tokens": 20926016} +{"current_steps": 6645, "total_steps": 78105, "loss": 0.4899, "lr": 4.2529765715017285e-06, "epoch": 0.4253888995582869, "percentage": 8.51, "elapsed_time": "0:16:59", "remaining_time": "3:02:40", "throughput": 20547.49, "total_tokens": 20941440} +{"current_steps": 6650, "total_steps": 78105, "loss": 0.5684, "lr": 4.256177186019716e-06, "epoch": 0.42570898149926384, "percentage": 8.51, "elapsed_time": "0:16:59", "remaining_time": "3:02:38", "throughput": 20548.99, "total_tokens": 20956352} +{"current_steps": 6655, "total_steps": 78105, "loss": 0.541, "lr": 4.2593778005377035e-06, "epoch": 0.4260290634402407, "percentage": 8.52, "elapsed_time": "0:17:00", "remaining_time": "3:02:36", "throughput": 20551.18, "total_tokens": 20973056} +{"current_steps": 6660, "total_steps": 78105, "loss": 0.5376, "lr": 4.2625784150556905e-06, "epoch": 0.4263491453812176, "percentage": 8.53, "elapsed_time": "0:17:01", "remaining_time": "3:02:35", "throughput": 20553.02, "total_tokens": 20989248} +{"current_steps": 6665, "total_steps": 78105, "loss": 0.5234, "lr": 4.265779029573678e-06, "epoch": 0.4266692273221945, "percentage": 8.53, "elapsed_time": "0:17:01", "remaining_time": "3:02:33", "throughput": 20554.59, "total_tokens": 21004096} +{"current_steps": 6670, "total_steps": 78105, "loss": 0.5485, "lr": 4.2689796440916655e-06, "epoch": 0.42698930926317136, "percentage": 8.54, "elapsed_time": "0:17:02", "remaining_time": "3:02:31", "throughput": 20556.6, "total_tokens": 21020224} +{"current_steps": 6675, "total_steps": 78105, "loss": 0.4845, "lr": 4.272180258609653e-06, "epoch": 0.42730939120414824, "percentage": 8.55, "elapsed_time": "0:17:03", "remaining_time": "3:02:29", "throughput": 20558.21, "total_tokens": 21035264} +{"current_steps": 6680, "total_steps": 78105, "loss": 0.4489, "lr": 4.275380873127641e-06, "epoch": 0.42762947314512517, "percentage": 8.55, "elapsed_time": "0:17:03", "remaining_time": "3:02:28", "throughput": 20560.22, "total_tokens": 21051904} +{"current_steps": 6685, "total_steps": 78105, "loss": 0.4542, "lr": 4.278581487645628e-06, "epoch": 0.42794955508610205, "percentage": 8.56, "elapsed_time": "0:17:04", "remaining_time": "3:02:26", "throughput": 20562.63, "total_tokens": 21069504} +{"current_steps": 6690, "total_steps": 78105, "loss": 0.569, "lr": 4.281782102163616e-06, "epoch": 0.42826963702707893, "percentage": 8.57, "elapsed_time": "0:17:05", "remaining_time": "3:02:24", "throughput": 20564.13, "total_tokens": 21084288} +{"current_steps": 6695, "total_steps": 78105, "loss": 0.4573, "lr": 4.284982716681603e-06, "epoch": 0.4285897189680558, "percentage": 8.57, "elapsed_time": "0:17:05", "remaining_time": "3:02:23", "throughput": 20566.13, "total_tokens": 21100544} +{"current_steps": 6700, "total_steps": 78105, "loss": 0.52, "lr": 4.288183331199591e-06, "epoch": 0.4289098009090327, "percentage": 8.58, "elapsed_time": "0:17:06", "remaining_time": "3:02:21", "throughput": 20567.93, "total_tokens": 21116608} +{"current_steps": 6705, "total_steps": 78105, "loss": 0.734, "lr": 4.291383945717578e-06, "epoch": 0.4292298828500096, "percentage": 8.58, "elapsed_time": "0:17:07", "remaining_time": "3:02:19", "throughput": 20569.53, "total_tokens": 21131776} +{"current_steps": 6710, "total_steps": 78105, "loss": 0.4443, "lr": 4.294584560235566e-06, "epoch": 0.4295499647909865, "percentage": 8.59, "elapsed_time": "0:17:07", "remaining_time": "3:02:17", "throughput": 20570.75, "total_tokens": 21146368} +{"current_steps": 6715, "total_steps": 78105, "loss": 0.6303, "lr": 4.297785174753553e-06, "epoch": 0.4298700467319634, "percentage": 8.6, "elapsed_time": "0:17:08", "remaining_time": "3:02:16", "throughput": 20572.58, "total_tokens": 21162368} +{"current_steps": 6720, "total_steps": 78105, "loss": 0.5002, "lr": 4.300985789271541e-06, "epoch": 0.43019012867294026, "percentage": 8.6, "elapsed_time": "0:17:09", "remaining_time": "3:02:14", "throughput": 20574.75, "total_tokens": 21179008} +{"current_steps": 6725, "total_steps": 78105, "loss": 0.777, "lr": 4.304186403789528e-06, "epoch": 0.43051021061391714, "percentage": 8.61, "elapsed_time": "0:17:10", "remaining_time": "3:02:13", "throughput": 20576.9, "total_tokens": 21196160} +{"current_steps": 6730, "total_steps": 78105, "loss": 0.5936, "lr": 4.307387018307516e-06, "epoch": 0.4308302925548941, "percentage": 8.62, "elapsed_time": "0:17:10", "remaining_time": "3:02:11", "throughput": 20578.73, "total_tokens": 21212032} +{"current_steps": 6735, "total_steps": 78105, "loss": 0.4063, "lr": 4.310587632825503e-06, "epoch": 0.43115037449587096, "percentage": 8.62, "elapsed_time": "0:17:11", "remaining_time": "3:02:10", "throughput": 20581.5, "total_tokens": 21230272} +{"current_steps": 6740, "total_steps": 78105, "loss": 0.469, "lr": 4.313788247343491e-06, "epoch": 0.43147045643684784, "percentage": 8.63, "elapsed_time": "0:17:12", "remaining_time": "3:02:09", "throughput": 20583.13, "total_tokens": 21245760} +{"current_steps": 6745, "total_steps": 78105, "loss": 0.415, "lr": 4.316988861861478e-06, "epoch": 0.4317905383778247, "percentage": 8.64, "elapsed_time": "0:17:12", "remaining_time": "3:02:07", "throughput": 20584.64, "total_tokens": 21260992} +{"current_steps": 6750, "total_steps": 78105, "loss": 0.3589, "lr": 4.320189476379465e-06, "epoch": 0.4321106203188016, "percentage": 8.64, "elapsed_time": "0:17:13", "remaining_time": "3:02:05", "throughput": 20585.75, "total_tokens": 21275136} +{"current_steps": 6755, "total_steps": 78105, "loss": 0.6596, "lr": 4.323390090897453e-06, "epoch": 0.4324307022597785, "percentage": 8.65, "elapsed_time": "0:17:14", "remaining_time": "3:02:03", "throughput": 20587.39, "total_tokens": 21290560} +{"current_steps": 6760, "total_steps": 78105, "loss": 0.5297, "lr": 4.32659070541544e-06, "epoch": 0.4327507842007554, "percentage": 8.66, "elapsed_time": "0:17:14", "remaining_time": "3:02:01", "throughput": 20588.9, "total_tokens": 21305536} +{"current_steps": 6765, "total_steps": 78105, "loss": 0.4501, "lr": 4.329791319933428e-06, "epoch": 0.4330708661417323, "percentage": 8.66, "elapsed_time": "0:17:15", "remaining_time": "3:01:59", "throughput": 20590.43, "total_tokens": 21320576} +{"current_steps": 6770, "total_steps": 78105, "loss": 0.54, "lr": 4.332991934451415e-06, "epoch": 0.43339094808270917, "percentage": 8.67, "elapsed_time": "0:17:16", "remaining_time": "3:01:57", "throughput": 20591.87, "total_tokens": 21335680} +{"current_steps": 6775, "total_steps": 78105, "loss": 0.4078, "lr": 4.336192548969403e-06, "epoch": 0.43371103002368605, "percentage": 8.67, "elapsed_time": "0:17:16", "remaining_time": "3:01:56", "throughput": 20593.59, "total_tokens": 21351808} +{"current_steps": 6780, "total_steps": 78105, "loss": 0.6034, "lr": 4.33939316348739e-06, "epoch": 0.43403111196466293, "percentage": 8.68, "elapsed_time": "0:17:17", "remaining_time": "3:01:53", "throughput": 20595.03, "total_tokens": 21366464} +{"current_steps": 6785, "total_steps": 78105, "loss": 0.3572, "lr": 4.342593778005378e-06, "epoch": 0.43435119390563987, "percentage": 8.69, "elapsed_time": "0:17:18", "remaining_time": "3:01:52", "throughput": 20597.02, "total_tokens": 21383296} +{"current_steps": 6790, "total_steps": 78105, "loss": 0.5126, "lr": 4.345794392523365e-06, "epoch": 0.43467127584661674, "percentage": 8.69, "elapsed_time": "0:17:18", "remaining_time": "3:01:51", "throughput": 20598.96, "total_tokens": 21399616} +{"current_steps": 6795, "total_steps": 78105, "loss": 0.5213, "lr": 4.3489950070413526e-06, "epoch": 0.4349913577875936, "percentage": 8.7, "elapsed_time": "0:17:19", "remaining_time": "3:01:49", "throughput": 20600.45, "total_tokens": 21414848} +{"current_steps": 6800, "total_steps": 78105, "loss": 0.3968, "lr": 4.35219562155934e-06, "epoch": 0.4353114397285705, "percentage": 8.71, "elapsed_time": "0:17:20", "remaining_time": "3:01:47", "throughput": 20601.99, "total_tokens": 21430144} +{"current_steps": 6805, "total_steps": 78105, "loss": 0.4789, "lr": 4.3553962360773275e-06, "epoch": 0.4356315216695474, "percentage": 8.71, "elapsed_time": "0:17:20", "remaining_time": "3:01:45", "throughput": 20603.4, "total_tokens": 21445248} +{"current_steps": 6810, "total_steps": 78105, "loss": 0.7234, "lr": 4.3585968505953145e-06, "epoch": 0.4359516036105243, "percentage": 8.72, "elapsed_time": "0:17:21", "remaining_time": "3:01:43", "throughput": 20604.94, "total_tokens": 21460544} +{"current_steps": 6815, "total_steps": 78105, "loss": 0.5245, "lr": 4.3617974651133024e-06, "epoch": 0.4362716855515012, "percentage": 8.73, "elapsed_time": "0:17:22", "remaining_time": "3:01:42", "throughput": 20606.69, "total_tokens": 21476224} +{"current_steps": 6820, "total_steps": 78105, "loss": 0.6448, "lr": 4.3649980796312895e-06, "epoch": 0.4365917674924781, "percentage": 8.73, "elapsed_time": "0:17:22", "remaining_time": "3:01:40", "throughput": 20608.43, "total_tokens": 21491712} +{"current_steps": 6825, "total_steps": 78105, "loss": 0.6611, "lr": 4.368198694149277e-06, "epoch": 0.43691184943345496, "percentage": 8.74, "elapsed_time": "0:17:23", "remaining_time": "3:01:39", "throughput": 20610.63, "total_tokens": 21508928} +{"current_steps": 6830, "total_steps": 78105, "loss": 0.5358, "lr": 4.371399308667264e-06, "epoch": 0.43723193137443184, "percentage": 8.74, "elapsed_time": "0:17:24", "remaining_time": "3:01:37", "throughput": 20611.94, "total_tokens": 21523648} +{"current_steps": 6835, "total_steps": 78105, "loss": 0.5569, "lr": 4.374599923185252e-06, "epoch": 0.43755201331540877, "percentage": 8.75, "elapsed_time": "0:17:24", "remaining_time": "3:01:35", "throughput": 20613.55, "total_tokens": 21539264} +{"current_steps": 6840, "total_steps": 78105, "loss": 0.4689, "lr": 4.377800537703239e-06, "epoch": 0.43787209525638565, "percentage": 8.76, "elapsed_time": "0:17:25", "remaining_time": "3:01:34", "throughput": 20615.26, "total_tokens": 21555520} +{"current_steps": 6845, "total_steps": 78105, "loss": 0.6048, "lr": 4.381001152221227e-06, "epoch": 0.43819217719736253, "percentage": 8.76, "elapsed_time": "0:17:26", "remaining_time": "3:01:31", "throughput": 20616.62, "total_tokens": 21570112} +{"current_steps": 6850, "total_steps": 78105, "loss": 0.4128, "lr": 4.384201766739214e-06, "epoch": 0.4385122591383394, "percentage": 8.77, "elapsed_time": "0:17:26", "remaining_time": "3:01:30", "throughput": 20617.89, "total_tokens": 21584768} +{"current_steps": 6855, "total_steps": 78105, "loss": 0.367, "lr": 4.387402381257202e-06, "epoch": 0.4388323410793163, "percentage": 8.78, "elapsed_time": "0:17:27", "remaining_time": "3:01:28", "throughput": 20619.39, "total_tokens": 21599872} +{"current_steps": 6860, "total_steps": 78105, "loss": 0.5798, "lr": 4.390602995775189e-06, "epoch": 0.43915242302029317, "percentage": 8.78, "elapsed_time": "0:17:28", "remaining_time": "3:01:26", "throughput": 20621.08, "total_tokens": 21615808} +{"current_steps": 6865, "total_steps": 78105, "loss": 0.6669, "lr": 4.393803610293177e-06, "epoch": 0.4394725049612701, "percentage": 8.79, "elapsed_time": "0:17:28", "remaining_time": "3:01:24", "throughput": 20622.45, "total_tokens": 21630656} +{"current_steps": 6870, "total_steps": 78105, "loss": 0.5259, "lr": 4.397004224811164e-06, "epoch": 0.439792586902247, "percentage": 8.8, "elapsed_time": "0:17:29", "remaining_time": "3:01:23", "throughput": 20624.35, "total_tokens": 21646976} +{"current_steps": 6875, "total_steps": 78105, "loss": 0.6549, "lr": 4.400204839329151e-06, "epoch": 0.44011266884322386, "percentage": 8.8, "elapsed_time": "0:17:30", "remaining_time": "3:01:22", "throughput": 20626.92, "total_tokens": 21665024} +{"current_steps": 6880, "total_steps": 78105, "loss": 0.5625, "lr": 4.403405453847139e-06, "epoch": 0.44043275078420074, "percentage": 8.81, "elapsed_time": "0:17:30", "remaining_time": "3:01:20", "throughput": 20628.39, "total_tokens": 21680128} +{"current_steps": 6885, "total_steps": 78105, "loss": 0.3949, "lr": 4.406606068365126e-06, "epoch": 0.4407528327251776, "percentage": 8.82, "elapsed_time": "0:17:31", "remaining_time": "3:01:18", "throughput": 20629.96, "total_tokens": 21695808} +{"current_steps": 6890, "total_steps": 78105, "loss": 0.4253, "lr": 4.409806682883114e-06, "epoch": 0.44107291466615456, "percentage": 8.82, "elapsed_time": "0:17:32", "remaining_time": "3:01:17", "throughput": 20631.64, "total_tokens": 21711936} +{"current_steps": 6895, "total_steps": 78105, "loss": 0.5217, "lr": 4.413007297401101e-06, "epoch": 0.44139299660713144, "percentage": 8.83, "elapsed_time": "0:17:33", "remaining_time": "3:01:15", "throughput": 20633.15, "total_tokens": 21727488} +{"current_steps": 6900, "total_steps": 78105, "loss": 0.6068, "lr": 4.416207911919089e-06, "epoch": 0.4417130785481083, "percentage": 8.83, "elapsed_time": "0:17:33", "remaining_time": "3:01:14", "throughput": 20635.61, "total_tokens": 21745280} +{"current_steps": 6905, "total_steps": 78105, "loss": 0.5917, "lr": 4.419408526437076e-06, "epoch": 0.4420331604890852, "percentage": 8.84, "elapsed_time": "0:17:34", "remaining_time": "3:01:12", "throughput": 20636.93, "total_tokens": 21760000} +{"current_steps": 6910, "total_steps": 78105, "loss": 0.4796, "lr": 4.422609140955064e-06, "epoch": 0.4423532424300621, "percentage": 8.85, "elapsed_time": "0:17:35", "remaining_time": "3:01:10", "throughput": 20638.48, "total_tokens": 21775232} +{"current_steps": 6915, "total_steps": 78105, "loss": 0.5064, "lr": 4.425809755473051e-06, "epoch": 0.442673324371039, "percentage": 8.85, "elapsed_time": "0:17:35", "remaining_time": "3:01:08", "throughput": 20640.17, "total_tokens": 21790656} +{"current_steps": 6920, "total_steps": 78105, "loss": 0.5419, "lr": 4.429010369991039e-06, "epoch": 0.4429934063120159, "percentage": 8.86, "elapsed_time": "0:17:36", "remaining_time": "3:01:07", "throughput": 20641.71, "total_tokens": 21806336} +{"current_steps": 6925, "total_steps": 78105, "loss": 0.4058, "lr": 4.432210984509026e-06, "epoch": 0.44331348825299277, "percentage": 8.87, "elapsed_time": "0:17:37", "remaining_time": "3:01:06", "throughput": 20644.06, "total_tokens": 21823744} +{"current_steps": 6930, "total_steps": 78105, "loss": 0.645, "lr": 4.435411599027014e-06, "epoch": 0.44363357019396965, "percentage": 8.87, "elapsed_time": "0:17:37", "remaining_time": "3:01:04", "throughput": 20645.52, "total_tokens": 21839104} +{"current_steps": 6935, "total_steps": 78105, "loss": 0.4493, "lr": 4.438612213545001e-06, "epoch": 0.44395365213494653, "percentage": 8.88, "elapsed_time": "0:17:38", "remaining_time": "3:01:02", "throughput": 20647.09, "total_tokens": 21854528} +{"current_steps": 6940, "total_steps": 78105, "loss": 0.7441, "lr": 4.441812828062989e-06, "epoch": 0.4442737340759234, "percentage": 8.89, "elapsed_time": "0:17:39", "remaining_time": "3:01:01", "throughput": 20648.87, "total_tokens": 21870592} +{"current_steps": 6945, "total_steps": 78105, "loss": 0.5566, "lr": 4.445013442580976e-06, "epoch": 0.44459381601690035, "percentage": 8.89, "elapsed_time": "0:17:39", "remaining_time": "3:00:59", "throughput": 20650.22, "total_tokens": 21885696} +{"current_steps": 6950, "total_steps": 78105, "loss": 0.4665, "lr": 4.448214057098964e-06, "epoch": 0.4449138979578772, "percentage": 8.9, "elapsed_time": "0:17:40", "remaining_time": "3:00:57", "throughput": 20651.8, "total_tokens": 21901248} +{"current_steps": 6955, "total_steps": 78105, "loss": 0.6195, "lr": 4.451414671616951e-06, "epoch": 0.4452339798988541, "percentage": 8.9, "elapsed_time": "0:17:41", "remaining_time": "3:00:55", "throughput": 20653.09, "total_tokens": 21916352} +{"current_steps": 6960, "total_steps": 78105, "loss": 0.4685, "lr": 4.4546152861349385e-06, "epoch": 0.445554061839831, "percentage": 8.91, "elapsed_time": "0:17:41", "remaining_time": "3:00:53", "throughput": 20654.3, "total_tokens": 21931072} +{"current_steps": 6965, "total_steps": 78105, "loss": 0.515, "lr": 4.457815900652926e-06, "epoch": 0.44587414378080786, "percentage": 8.92, "elapsed_time": "0:17:42", "remaining_time": "3:00:52", "throughput": 20655.91, "total_tokens": 21946752} +{"current_steps": 6970, "total_steps": 78105, "loss": 0.5767, "lr": 4.4610165151709135e-06, "epoch": 0.4461942257217848, "percentage": 8.92, "elapsed_time": "0:17:43", "remaining_time": "3:00:50", "throughput": 20657.43, "total_tokens": 21962176} +{"current_steps": 6975, "total_steps": 78105, "loss": 0.4379, "lr": 4.4642171296889005e-06, "epoch": 0.4465143076627617, "percentage": 8.93, "elapsed_time": "0:17:43", "remaining_time": "3:00:49", "throughput": 20659.19, "total_tokens": 21978496} +{"current_steps": 6980, "total_steps": 78105, "loss": 0.6503, "lr": 4.467417744206888e-06, "epoch": 0.44683438960373856, "percentage": 8.94, "elapsed_time": "0:17:44", "remaining_time": "3:00:47", "throughput": 20660.14, "total_tokens": 21992576} +{"current_steps": 6985, "total_steps": 78105, "loss": 0.5465, "lr": 4.4706183587248755e-06, "epoch": 0.44715447154471544, "percentage": 8.94, "elapsed_time": "0:17:45", "remaining_time": "3:00:45", "throughput": 20661.58, "total_tokens": 22008000} +{"current_steps": 6990, "total_steps": 78105, "loss": 0.7151, "lr": 4.4738189732428625e-06, "epoch": 0.4474745534856923, "percentage": 8.95, "elapsed_time": "0:17:45", "remaining_time": "3:00:43", "throughput": 20662.95, "total_tokens": 22022976} +{"current_steps": 6995, "total_steps": 78105, "loss": 0.5282, "lr": 4.47701958776085e-06, "epoch": 0.44779463542666925, "percentage": 8.96, "elapsed_time": "0:17:46", "remaining_time": "3:00:41", "throughput": 20664.46, "total_tokens": 22038144} +{"current_steps": 7000, "total_steps": 78105, "loss": 0.5655, "lr": 4.4802202022788374e-06, "epoch": 0.44811471736764613, "percentage": 8.96, "elapsed_time": "0:17:47", "remaining_time": "3:00:40", "throughput": 20666.19, "total_tokens": 22054016} +{"current_steps": 7005, "total_steps": 78105, "loss": 0.4632, "lr": 4.483420816796825e-06, "epoch": 0.448434799308623, "percentage": 8.97, "elapsed_time": "0:17:47", "remaining_time": "3:00:39", "throughput": 20669.04, "total_tokens": 22073920} +{"current_steps": 7010, "total_steps": 78105, "loss": 0.4887, "lr": 4.486621431314812e-06, "epoch": 0.4487548812495999, "percentage": 8.98, "elapsed_time": "0:17:48", "remaining_time": "3:00:39", "throughput": 20667.87, "total_tokens": 22089728} +{"current_steps": 7015, "total_steps": 78105, "loss": 0.6692, "lr": 4.4898220458328e-06, "epoch": 0.44907496319057677, "percentage": 8.98, "elapsed_time": "0:17:49", "remaining_time": "3:00:38", "throughput": 20669.19, "total_tokens": 22105344} +{"current_steps": 7020, "total_steps": 78105, "loss": 0.5347, "lr": 4.493022660350787e-06, "epoch": 0.4493950451315537, "percentage": 8.99, "elapsed_time": "0:17:50", "remaining_time": "3:00:36", "throughput": 20670.97, "total_tokens": 22121792} +{"current_steps": 7025, "total_steps": 78105, "loss": 0.4648, "lr": 4.496223274868775e-06, "epoch": 0.4497151270725306, "percentage": 8.99, "elapsed_time": "0:17:50", "remaining_time": "3:00:34", "throughput": 20672.01, "total_tokens": 22136128} +{"current_steps": 7030, "total_steps": 78105, "loss": 0.4841, "lr": 4.499423889386762e-06, "epoch": 0.45003520901350746, "percentage": 9.0, "elapsed_time": "0:17:51", "remaining_time": "3:00:33", "throughput": 20673.67, "total_tokens": 22151936} +{"current_steps": 7035, "total_steps": 78105, "loss": 0.3882, "lr": 4.50262450390475e-06, "epoch": 0.45035529095448434, "percentage": 9.01, "elapsed_time": "0:17:52", "remaining_time": "3:00:31", "throughput": 20675.37, "total_tokens": 22168128} +{"current_steps": 7040, "total_steps": 78105, "loss": 0.4943, "lr": 4.505825118422737e-06, "epoch": 0.4506753728954612, "percentage": 9.01, "elapsed_time": "0:17:52", "remaining_time": "3:00:30", "throughput": 20677.12, "total_tokens": 22184512} +{"current_steps": 7045, "total_steps": 78105, "loss": 0.6257, "lr": 4.509025732940725e-06, "epoch": 0.4509954548364381, "percentage": 9.02, "elapsed_time": "0:17:53", "remaining_time": "3:00:28", "throughput": 20678.59, "total_tokens": 22199872} +{"current_steps": 7050, "total_steps": 78105, "loss": 0.5604, "lr": 4.512226347458712e-06, "epoch": 0.45131553677741504, "percentage": 9.03, "elapsed_time": "0:17:54", "remaining_time": "3:00:27", "throughput": 20680.54, "total_tokens": 22216960} +{"current_steps": 7055, "total_steps": 78105, "loss": 0.3448, "lr": 4.5154269619767e-06, "epoch": 0.4516356187183919, "percentage": 9.03, "elapsed_time": "0:17:54", "remaining_time": "3:00:25", "throughput": 20681.71, "total_tokens": 22231488} +{"current_steps": 7060, "total_steps": 78105, "loss": 0.5967, "lr": 4.518627576494687e-06, "epoch": 0.4519557006593688, "percentage": 9.04, "elapsed_time": "0:17:55", "remaining_time": "3:00:23", "throughput": 20683.13, "total_tokens": 22246976} +{"current_steps": 7065, "total_steps": 78105, "loss": 0.4857, "lr": 4.521828191012675e-06, "epoch": 0.4522757826003457, "percentage": 9.05, "elapsed_time": "0:17:56", "remaining_time": "3:00:22", "throughput": 20684.81, "total_tokens": 22263680} +{"current_steps": 7070, "total_steps": 78105, "loss": 0.4252, "lr": 4.525028805530662e-06, "epoch": 0.45259586454132256, "percentage": 9.05, "elapsed_time": "0:17:57", "remaining_time": "3:00:21", "throughput": 20686.39, "total_tokens": 22279552} +{"current_steps": 7075, "total_steps": 78105, "loss": 0.5059, "lr": 4.52822942004865e-06, "epoch": 0.4529159464822995, "percentage": 9.06, "elapsed_time": "0:17:57", "remaining_time": "3:00:19", "throughput": 20688.06, "total_tokens": 22295296} +{"current_steps": 7080, "total_steps": 78105, "loss": 0.5148, "lr": 4.531430034566637e-06, "epoch": 0.45323602842327637, "percentage": 9.06, "elapsed_time": "0:17:58", "remaining_time": "3:00:17", "throughput": 20689.34, "total_tokens": 22310400} +{"current_steps": 7085, "total_steps": 78105, "loss": 0.5518, "lr": 4.534630649084625e-06, "epoch": 0.45355611036425325, "percentage": 9.07, "elapsed_time": "0:17:59", "remaining_time": "3:00:16", "throughput": 20691.28, "total_tokens": 22326848} +{"current_steps": 7090, "total_steps": 78105, "loss": 0.5397, "lr": 4.537831263602613e-06, "epoch": 0.45387619230523013, "percentage": 9.08, "elapsed_time": "0:17:59", "remaining_time": "3:00:14", "throughput": 20692.88, "total_tokens": 22342592} +{"current_steps": 7095, "total_steps": 78105, "loss": 0.5794, "lr": 4.5410318781206e-06, "epoch": 0.454196274246207, "percentage": 9.08, "elapsed_time": "0:18:00", "remaining_time": "3:00:13", "throughput": 20694.71, "total_tokens": 22358912} +{"current_steps": 7100, "total_steps": 78105, "loss": 0.4531, "lr": 4.544232492638588e-06, "epoch": 0.45451635618718395, "percentage": 9.09, "elapsed_time": "0:18:01", "remaining_time": "3:00:11", "throughput": 20696.31, "total_tokens": 22374528} +{"current_steps": 7105, "total_steps": 78105, "loss": 0.4472, "lr": 4.547433107156575e-06, "epoch": 0.4548364381281608, "percentage": 9.1, "elapsed_time": "0:18:01", "remaining_time": "3:00:09", "throughput": 20697.77, "total_tokens": 22389824} +{"current_steps": 7110, "total_steps": 78105, "loss": 0.4667, "lr": 4.550633721674562e-06, "epoch": 0.4551565200691377, "percentage": 9.1, "elapsed_time": "0:18:02", "remaining_time": "3:00:08", "throughput": 20699.23, "total_tokens": 22405376} +{"current_steps": 7115, "total_steps": 78105, "loss": 0.4732, "lr": 4.553834336192549e-06, "epoch": 0.4554766020101146, "percentage": 9.11, "elapsed_time": "0:18:03", "remaining_time": "3:00:06", "throughput": 20700.54, "total_tokens": 22420032} +{"current_steps": 7120, "total_steps": 78105, "loss": 0.5929, "lr": 4.557034950710537e-06, "epoch": 0.45579668395109146, "percentage": 9.12, "elapsed_time": "0:18:03", "remaining_time": "3:00:04", "throughput": 20701.8, "total_tokens": 22434688} +{"current_steps": 7125, "total_steps": 78105, "loss": 0.4998, "lr": 4.560235565228524e-06, "epoch": 0.45611676589206834, "percentage": 9.12, "elapsed_time": "0:18:04", "remaining_time": "3:00:02", "throughput": 20703.57, "total_tokens": 22451072} +{"current_steps": 7130, "total_steps": 78105, "loss": 0.4491, "lr": 4.563436179746512e-06, "epoch": 0.4564368478330453, "percentage": 9.13, "elapsed_time": "0:18:05", "remaining_time": "3:00:01", "throughput": 20705.02, "total_tokens": 22466688} +{"current_steps": 7135, "total_steps": 78105, "loss": 0.5812, "lr": 4.566636794264499e-06, "epoch": 0.45675692977402216, "percentage": 9.14, "elapsed_time": "0:18:05", "remaining_time": "2:59:59", "throughput": 20706.56, "total_tokens": 22482432} +{"current_steps": 7140, "total_steps": 78105, "loss": 0.4904, "lr": 4.5698374087824865e-06, "epoch": 0.45707701171499904, "percentage": 9.14, "elapsed_time": "0:18:06", "remaining_time": "2:59:57", "throughput": 20707.74, "total_tokens": 22496960} +{"current_steps": 7145, "total_steps": 78105, "loss": 0.5087, "lr": 4.5730380233004736e-06, "epoch": 0.4573970936559759, "percentage": 9.15, "elapsed_time": "0:18:07", "remaining_time": "2:59:56", "throughput": 20709.2, "total_tokens": 22512256} +{"current_steps": 7150, "total_steps": 78105, "loss": 0.5976, "lr": 4.5762386378184615e-06, "epoch": 0.4577171755969528, "percentage": 9.15, "elapsed_time": "0:18:07", "remaining_time": "2:59:54", "throughput": 20710.91, "total_tokens": 22528192} +{"current_steps": 7155, "total_steps": 78105, "loss": 0.5641, "lr": 4.579439252336449e-06, "epoch": 0.45803725753792973, "percentage": 9.16, "elapsed_time": "0:18:08", "remaining_time": "2:59:53", "throughput": 20712.94, "total_tokens": 22545408} +{"current_steps": 7160, "total_steps": 78105, "loss": 0.6556, "lr": 4.582639866854436e-06, "epoch": 0.4583573394789066, "percentage": 9.17, "elapsed_time": "0:18:09", "remaining_time": "2:59:51", "throughput": 20714.39, "total_tokens": 22560832} +{"current_steps": 7165, "total_steps": 78105, "loss": 0.494, "lr": 4.585840481372424e-06, "epoch": 0.4586774214198835, "percentage": 9.17, "elapsed_time": "0:18:09", "remaining_time": "2:59:50", "throughput": 20716.44, "total_tokens": 22577728} +{"current_steps": 7170, "total_steps": 78105, "loss": 0.5734, "lr": 4.589041095890411e-06, "epoch": 0.45899750336086037, "percentage": 9.18, "elapsed_time": "0:18:10", "remaining_time": "2:59:49", "throughput": 20718.23, "total_tokens": 22594112} +{"current_steps": 7175, "total_steps": 78105, "loss": 0.5078, "lr": 4.592241710408399e-06, "epoch": 0.45931758530183725, "percentage": 9.19, "elapsed_time": "0:18:11", "remaining_time": "2:59:47", "throughput": 20719.62, "total_tokens": 22609728} +{"current_steps": 7180, "total_steps": 78105, "loss": 0.5578, "lr": 4.595442324926386e-06, "epoch": 0.4596376672428142, "percentage": 9.19, "elapsed_time": "0:18:11", "remaining_time": "2:59:45", "throughput": 20720.95, "total_tokens": 22624704} +{"current_steps": 7185, "total_steps": 78105, "loss": 0.4481, "lr": 4.598642939444374e-06, "epoch": 0.45995774918379106, "percentage": 9.2, "elapsed_time": "0:18:12", "remaining_time": "2:59:44", "throughput": 20722.46, "total_tokens": 22640448} +{"current_steps": 7190, "total_steps": 78105, "loss": 0.3506, "lr": 4.601843553962361e-06, "epoch": 0.46027783112476794, "percentage": 9.21, "elapsed_time": "0:18:13", "remaining_time": "2:59:42", "throughput": 20723.86, "total_tokens": 22655680} +{"current_steps": 7195, "total_steps": 78105, "loss": 0.5025, "lr": 4.605044168480349e-06, "epoch": 0.4605979130657448, "percentage": 9.21, "elapsed_time": "0:18:13", "remaining_time": "2:59:40", "throughput": 20725.82, "total_tokens": 22672128} +{"current_steps": 7200, "total_steps": 78105, "loss": 0.5757, "lr": 4.608244782998336e-06, "epoch": 0.4609179950067217, "percentage": 9.22, "elapsed_time": "0:18:14", "remaining_time": "2:59:39", "throughput": 20727.08, "total_tokens": 22687104} +{"current_steps": 7205, "total_steps": 78105, "loss": 0.5052, "lr": 4.611445397516324e-06, "epoch": 0.4612380769476986, "percentage": 9.22, "elapsed_time": "0:18:15", "remaining_time": "2:59:37", "throughput": 20728.69, "total_tokens": 22702976} +{"current_steps": 7210, "total_steps": 78105, "loss": 0.4947, "lr": 4.614646012034311e-06, "epoch": 0.4615581588886755, "percentage": 9.23, "elapsed_time": "0:18:15", "remaining_time": "2:59:35", "throughput": 20730.17, "total_tokens": 22718464} +{"current_steps": 7215, "total_steps": 78105, "loss": 0.4546, "lr": 4.617846626552299e-06, "epoch": 0.4618782408296524, "percentage": 9.24, "elapsed_time": "0:18:16", "remaining_time": "2:59:34", "throughput": 20731.9, "total_tokens": 22734656} +{"current_steps": 7220, "total_steps": 78105, "loss": 0.449, "lr": 4.621047241070286e-06, "epoch": 0.4621983227706293, "percentage": 9.24, "elapsed_time": "0:18:17", "remaining_time": "2:59:32", "throughput": 20733.43, "total_tokens": 22750016} +{"current_steps": 7225, "total_steps": 78105, "loss": 0.4982, "lr": 4.624247855588273e-06, "epoch": 0.46251840471160616, "percentage": 9.25, "elapsed_time": "0:18:17", "remaining_time": "2:59:31", "throughput": 20734.94, "total_tokens": 22765632} +{"current_steps": 7230, "total_steps": 78105, "loss": 0.5168, "lr": 4.627448470106261e-06, "epoch": 0.46283848665258304, "percentage": 9.26, "elapsed_time": "0:18:18", "remaining_time": "2:59:29", "throughput": 20736.44, "total_tokens": 22781184} +{"current_steps": 7235, "total_steps": 78105, "loss": 0.6873, "lr": 4.630649084624248e-06, "epoch": 0.46315856859355997, "percentage": 9.26, "elapsed_time": "0:18:19", "remaining_time": "2:59:28", "throughput": 20738.0, "total_tokens": 22797248} +{"current_steps": 7240, "total_steps": 78105, "loss": 0.4643, "lr": 4.633849699142236e-06, "epoch": 0.46347865053453685, "percentage": 9.27, "elapsed_time": "0:18:19", "remaining_time": "2:59:26", "throughput": 20739.41, "total_tokens": 22812800} +{"current_steps": 7245, "total_steps": 78105, "loss": 0.5374, "lr": 4.637050313660223e-06, "epoch": 0.46379873247551373, "percentage": 9.28, "elapsed_time": "0:18:20", "remaining_time": "2:59:24", "throughput": 20740.51, "total_tokens": 22827200} +{"current_steps": 7250, "total_steps": 78105, "loss": 0.6252, "lr": 4.640250928178211e-06, "epoch": 0.4641188144164906, "percentage": 9.28, "elapsed_time": "0:18:21", "remaining_time": "2:59:23", "throughput": 20742.12, "total_tokens": 22843264} +{"current_steps": 7255, "total_steps": 78105, "loss": 0.5602, "lr": 4.643451542696198e-06, "epoch": 0.4644388963574675, "percentage": 9.29, "elapsed_time": "0:18:21", "remaining_time": "2:59:21", "throughput": 20743.55, "total_tokens": 22858624} +{"current_steps": 7260, "total_steps": 78105, "loss": 0.5698, "lr": 4.646652157214186e-06, "epoch": 0.4647589782984444, "percentage": 9.3, "elapsed_time": "0:18:22", "remaining_time": "2:59:19", "throughput": 20745.02, "total_tokens": 22873856} +{"current_steps": 7265, "total_steps": 78105, "loss": 0.4438, "lr": 4.649852771732173e-06, "epoch": 0.4650790602394213, "percentage": 9.3, "elapsed_time": "0:18:23", "remaining_time": "2:59:18", "throughput": 20746.49, "total_tokens": 22889600} +{"current_steps": 7270, "total_steps": 78105, "loss": 0.5235, "lr": 4.653053386250161e-06, "epoch": 0.4653991421803982, "percentage": 9.31, "elapsed_time": "0:18:23", "remaining_time": "2:59:16", "throughput": 20747.81, "total_tokens": 22904704} +{"current_steps": 7275, "total_steps": 78105, "loss": 0.4328, "lr": 4.656254000768148e-06, "epoch": 0.46571922412137506, "percentage": 9.31, "elapsed_time": "0:18:24", "remaining_time": "2:59:14", "throughput": 20749.21, "total_tokens": 22920384} +{"current_steps": 7280, "total_steps": 78105, "loss": 0.5346, "lr": 4.659454615286136e-06, "epoch": 0.46603930606235194, "percentage": 9.32, "elapsed_time": "0:18:25", "remaining_time": "2:59:13", "throughput": 20750.97, "total_tokens": 22937024} +{"current_steps": 7285, "total_steps": 78105, "loss": 0.5461, "lr": 4.662655229804123e-06, "epoch": 0.4663593880033289, "percentage": 9.33, "elapsed_time": "0:18:26", "remaining_time": "2:59:12", "throughput": 20753.19, "total_tokens": 22954816} +{"current_steps": 7290, "total_steps": 78105, "loss": 0.3186, "lr": 4.6658558443221105e-06, "epoch": 0.46667946994430576, "percentage": 9.33, "elapsed_time": "0:18:26", "remaining_time": "2:59:10", "throughput": 20754.41, "total_tokens": 22969472} +{"current_steps": 7295, "total_steps": 78105, "loss": 0.493, "lr": 4.669056458840098e-06, "epoch": 0.46699955188528264, "percentage": 9.34, "elapsed_time": "0:18:27", "remaining_time": "2:59:08", "throughput": 20755.61, "total_tokens": 22984192} +{"current_steps": 7300, "total_steps": 78105, "loss": 0.4327, "lr": 4.6722570733580855e-06, "epoch": 0.4673196338262595, "percentage": 9.35, "elapsed_time": "0:18:28", "remaining_time": "2:59:07", "throughput": 20757.09, "total_tokens": 22999680} +{"current_steps": 7305, "total_steps": 78105, "loss": 0.5308, "lr": 4.6754576878760725e-06, "epoch": 0.4676397157672364, "percentage": 9.35, "elapsed_time": "0:18:28", "remaining_time": "2:59:05", "throughput": 20758.33, "total_tokens": 23014720} +{"current_steps": 7310, "total_steps": 78105, "loss": 0.5436, "lr": 4.67865830239406e-06, "epoch": 0.4679597977082133, "percentage": 9.36, "elapsed_time": "0:18:29", "remaining_time": "2:59:03", "throughput": 20759.95, "total_tokens": 23030528} +{"current_steps": 7315, "total_steps": 78105, "loss": 0.5756, "lr": 4.6818589169120474e-06, "epoch": 0.4682798796491902, "percentage": 9.37, "elapsed_time": "0:18:30", "remaining_time": "2:59:02", "throughput": 20761.66, "total_tokens": 23046784} +{"current_steps": 7320, "total_steps": 78105, "loss": 0.4716, "lr": 4.685059531430035e-06, "epoch": 0.4685999615901671, "percentage": 9.37, "elapsed_time": "0:18:30", "remaining_time": "2:59:00", "throughput": 20762.84, "total_tokens": 23061632} +{"current_steps": 7325, "total_steps": 78105, "loss": 0.4359, "lr": 4.688260145948022e-06, "epoch": 0.46892004353114397, "percentage": 9.38, "elapsed_time": "0:18:31", "remaining_time": "2:58:59", "throughput": 20764.49, "total_tokens": 23077888} +{"current_steps": 7330, "total_steps": 78105, "loss": 0.5006, "lr": 4.69146076046601e-06, "epoch": 0.46924012547212085, "percentage": 9.38, "elapsed_time": "0:18:32", "remaining_time": "2:58:57", "throughput": 20765.85, "total_tokens": 23093568} +{"current_steps": 7335, "total_steps": 78105, "loss": 0.6237, "lr": 4.694661374983997e-06, "epoch": 0.46956020741309773, "percentage": 9.39, "elapsed_time": "0:18:32", "remaining_time": "2:58:56", "throughput": 20767.18, "total_tokens": 23108992} +{"current_steps": 7340, "total_steps": 78105, "loss": 0.468, "lr": 4.697861989501985e-06, "epoch": 0.46988028935407467, "percentage": 9.4, "elapsed_time": "0:18:33", "remaining_time": "2:58:54", "throughput": 20768.77, "total_tokens": 23124992} +{"current_steps": 7345, "total_steps": 78105, "loss": 0.4068, "lr": 4.701062604019972e-06, "epoch": 0.47020037129505154, "percentage": 9.4, "elapsed_time": "0:18:34", "remaining_time": "2:58:53", "throughput": 20770.63, "total_tokens": 23141888} +{"current_steps": 7350, "total_steps": 78105, "loss": 0.4744, "lr": 4.704263218537959e-06, "epoch": 0.4705204532360284, "percentage": 9.41, "elapsed_time": "0:18:34", "remaining_time": "2:58:52", "throughput": 20772.2, "total_tokens": 23157888} +{"current_steps": 7355, "total_steps": 78105, "loss": 0.3915, "lr": 4.707463833055947e-06, "epoch": 0.4708405351770053, "percentage": 9.42, "elapsed_time": "0:18:35", "remaining_time": "2:58:50", "throughput": 20773.45, "total_tokens": 23172864} +{"current_steps": 7360, "total_steps": 78105, "loss": 0.558, "lr": 4.710664447573934e-06, "epoch": 0.4711606171179822, "percentage": 9.42, "elapsed_time": "0:18:36", "remaining_time": "2:58:48", "throughput": 20774.99, "total_tokens": 23188352} +{"current_steps": 7365, "total_steps": 78105, "loss": 0.4398, "lr": 4.713865062091922e-06, "epoch": 0.4714806990589591, "percentage": 9.43, "elapsed_time": "0:18:36", "remaining_time": "2:58:47", "throughput": 20776.32, "total_tokens": 23204096} +{"current_steps": 7370, "total_steps": 78105, "loss": 0.4179, "lr": 4.717065676609909e-06, "epoch": 0.471800780999936, "percentage": 9.44, "elapsed_time": "0:18:37", "remaining_time": "2:58:45", "throughput": 20777.51, "total_tokens": 23219072} +{"current_steps": 7375, "total_steps": 78105, "loss": 0.5466, "lr": 4.720266291127897e-06, "epoch": 0.4721208629409129, "percentage": 9.44, "elapsed_time": "0:18:38", "remaining_time": "2:58:43", "throughput": 20779.07, "total_tokens": 23234560} +{"current_steps": 7380, "total_steps": 78105, "loss": 0.4228, "lr": 4.723466905645884e-06, "epoch": 0.47244094488188976, "percentage": 9.45, "elapsed_time": "0:18:38", "remaining_time": "2:58:42", "throughput": 20780.7, "total_tokens": 23250304} +{"current_steps": 7385, "total_steps": 78105, "loss": 0.8351, "lr": 4.726667520163872e-06, "epoch": 0.47276102682286664, "percentage": 9.46, "elapsed_time": "0:18:39", "remaining_time": "2:58:40", "throughput": 20782.02, "total_tokens": 23265664} +{"current_steps": 7390, "total_steps": 78105, "loss": 0.6303, "lr": 4.729868134681859e-06, "epoch": 0.4730811087638435, "percentage": 9.46, "elapsed_time": "0:18:40", "remaining_time": "2:58:39", "throughput": 20783.59, "total_tokens": 23281472} +{"current_steps": 7395, "total_steps": 78105, "loss": 0.5959, "lr": 4.733068749199847e-06, "epoch": 0.47340119070482045, "percentage": 9.47, "elapsed_time": "0:18:40", "remaining_time": "2:58:37", "throughput": 20784.94, "total_tokens": 23297024} +{"current_steps": 7400, "total_steps": 78105, "loss": 0.5036, "lr": 4.736269363717834e-06, "epoch": 0.47372127264579733, "percentage": 9.47, "elapsed_time": "0:18:41", "remaining_time": "2:58:36", "throughput": 20786.49, "total_tokens": 23313344} +{"current_steps": 7405, "total_steps": 78105, "loss": 0.5866, "lr": 4.739469978235822e-06, "epoch": 0.4740413545867742, "percentage": 9.48, "elapsed_time": "0:18:42", "remaining_time": "2:58:34", "throughput": 20787.71, "total_tokens": 23328512} +{"current_steps": 7410, "total_steps": 78105, "loss": 0.544, "lr": 4.742670592753809e-06, "epoch": 0.4743614365277511, "percentage": 9.49, "elapsed_time": "0:18:42", "remaining_time": "2:58:32", "throughput": 20788.48, "total_tokens": 23342400} +{"current_steps": 7415, "total_steps": 78105, "loss": 0.5637, "lr": 4.745871207271797e-06, "epoch": 0.47468151846872797, "percentage": 9.49, "elapsed_time": "0:18:43", "remaining_time": "2:58:31", "throughput": 20790.03, "total_tokens": 23358400} +{"current_steps": 7420, "total_steps": 78105, "loss": 0.5588, "lr": 4.749071821789784e-06, "epoch": 0.4750016004097049, "percentage": 9.5, "elapsed_time": "0:18:44", "remaining_time": "2:58:29", "throughput": 20791.27, "total_tokens": 23373376} +{"current_steps": 7425, "total_steps": 78105, "loss": 0.6253, "lr": 4.752272436307772e-06, "epoch": 0.4753216823506818, "percentage": 9.51, "elapsed_time": "0:18:44", "remaining_time": "2:58:28", "throughput": 20793.22, "total_tokens": 23390400} +{"current_steps": 7430, "total_steps": 78105, "loss": 0.4955, "lr": 4.755473050825759e-06, "epoch": 0.47564176429165866, "percentage": 9.51, "elapsed_time": "0:18:45", "remaining_time": "2:58:26", "throughput": 20794.7, "total_tokens": 23406400} +{"current_steps": 7435, "total_steps": 78105, "loss": 0.3974, "lr": 4.758673665343747e-06, "epoch": 0.47596184623263554, "percentage": 9.52, "elapsed_time": "0:18:46", "remaining_time": "2:58:25", "throughput": 20796.05, "total_tokens": 23421824} +{"current_steps": 7440, "total_steps": 78105, "loss": 0.6372, "lr": 4.761874279861734e-06, "epoch": 0.4762819281736124, "percentage": 9.53, "elapsed_time": "0:18:46", "remaining_time": "2:58:23", "throughput": 20797.86, "total_tokens": 23438272} +{"current_steps": 7445, "total_steps": 78105, "loss": 0.576, "lr": 4.765074894379722e-06, "epoch": 0.47660201011458936, "percentage": 9.53, "elapsed_time": "0:18:47", "remaining_time": "2:58:22", "throughput": 20799.48, "total_tokens": 23454976} +{"current_steps": 7450, "total_steps": 78105, "loss": 0.5894, "lr": 4.768275508897709e-06, "epoch": 0.47692209205556624, "percentage": 9.54, "elapsed_time": "0:18:48", "remaining_time": "2:58:21", "throughput": 20801.22, "total_tokens": 23471168} +{"current_steps": 7455, "total_steps": 78105, "loss": 0.5048, "lr": 4.7714761234156965e-06, "epoch": 0.4772421739965431, "percentage": 9.54, "elapsed_time": "0:18:49", "remaining_time": "2:58:19", "throughput": 20802.68, "total_tokens": 23486720} +{"current_steps": 7460, "total_steps": 78105, "loss": 0.3781, "lr": 4.7746767379336836e-06, "epoch": 0.47756225593752, "percentage": 9.55, "elapsed_time": "0:18:49", "remaining_time": "2:58:18", "throughput": 20804.29, "total_tokens": 23502912} +{"current_steps": 7465, "total_steps": 78105, "loss": 0.4494, "lr": 4.777877352451671e-06, "epoch": 0.4778823378784969, "percentage": 9.56, "elapsed_time": "0:18:50", "remaining_time": "2:58:16", "throughput": 20806.23, "total_tokens": 23519552} +{"current_steps": 7470, "total_steps": 78105, "loss": 0.5935, "lr": 4.7810779669696585e-06, "epoch": 0.4782024198194738, "percentage": 9.56, "elapsed_time": "0:18:51", "remaining_time": "2:58:15", "throughput": 20807.87, "total_tokens": 23535936} +{"current_steps": 7475, "total_steps": 78105, "loss": 0.5767, "lr": 4.7842785814876455e-06, "epoch": 0.4785225017604507, "percentage": 9.57, "elapsed_time": "0:18:51", "remaining_time": "2:58:13", "throughput": 20809.07, "total_tokens": 23551040} +{"current_steps": 7480, "total_steps": 78105, "loss": 0.3451, "lr": 4.7874791960056334e-06, "epoch": 0.47884258370142757, "percentage": 9.58, "elapsed_time": "0:18:52", "remaining_time": "2:58:12", "throughput": 20810.25, "total_tokens": 23566208} +{"current_steps": 7485, "total_steps": 78105, "loss": 0.5582, "lr": 4.7906798105236205e-06, "epoch": 0.47916266564240445, "percentage": 9.58, "elapsed_time": "0:18:53", "remaining_time": "2:58:10", "throughput": 20811.68, "total_tokens": 23581696} +{"current_steps": 7490, "total_steps": 78105, "loss": 0.4705, "lr": 4.793880425041608e-06, "epoch": 0.47948274758338133, "percentage": 9.59, "elapsed_time": "0:18:53", "remaining_time": "2:58:09", "throughput": 20813.07, "total_tokens": 23597248} +{"current_steps": 7495, "total_steps": 78105, "loss": 0.7067, "lr": 4.797081039559595e-06, "epoch": 0.4798028295243582, "percentage": 9.6, "elapsed_time": "0:18:54", "remaining_time": "2:58:07", "throughput": 20814.35, "total_tokens": 23612800} +{"current_steps": 7500, "total_steps": 78105, "loss": 0.5679, "lr": 4.800281654077583e-06, "epoch": 0.48012291146533514, "percentage": 9.6, "elapsed_time": "0:18:55", "remaining_time": "2:58:06", "throughput": 20815.93, "total_tokens": 23629056} +{"current_steps": 7505, "total_steps": 78105, "loss": 0.5621, "lr": 4.80348226859557e-06, "epoch": 0.480442993406312, "percentage": 9.61, "elapsed_time": "0:18:55", "remaining_time": "2:58:04", "throughput": 20817.2, "total_tokens": 23644352} +{"current_steps": 7510, "total_steps": 78105, "loss": 0.5127, "lr": 4.806682883113558e-06, "epoch": 0.4807630753472889, "percentage": 9.62, "elapsed_time": "0:18:56", "remaining_time": "2:58:03", "throughput": 20819.0, "total_tokens": 23661120} +{"current_steps": 7515, "total_steps": 78105, "loss": 0.4116, "lr": 4.809883497631545e-06, "epoch": 0.4810831572882658, "percentage": 9.62, "elapsed_time": "0:18:57", "remaining_time": "2:58:02", "throughput": 20820.6, "total_tokens": 23677632} +{"current_steps": 7520, "total_steps": 78105, "loss": 0.4893, "lr": 4.813084112149533e-06, "epoch": 0.48140323922924266, "percentage": 9.63, "elapsed_time": "0:18:57", "remaining_time": "2:58:00", "throughput": 20822.3, "total_tokens": 23693952} +{"current_steps": 7525, "total_steps": 78105, "loss": 0.4928, "lr": 4.81628472666752e-06, "epoch": 0.4817233211702196, "percentage": 9.63, "elapsed_time": "0:18:58", "remaining_time": "2:57:58", "throughput": 20823.52, "total_tokens": 23708736} +{"current_steps": 7530, "total_steps": 78105, "loss": 0.5116, "lr": 4.819485341185508e-06, "epoch": 0.4820434031111965, "percentage": 9.64, "elapsed_time": "0:18:59", "remaining_time": "2:57:57", "throughput": 20825.13, "total_tokens": 23724864} +{"current_steps": 7535, "total_steps": 78105, "loss": 0.4219, "lr": 4.822685955703495e-06, "epoch": 0.48236348505217336, "percentage": 9.65, "elapsed_time": "0:18:59", "remaining_time": "2:57:55", "throughput": 20826.4, "total_tokens": 23740160} +{"current_steps": 7540, "total_steps": 78105, "loss": 0.6203, "lr": 4.825886570221483e-06, "epoch": 0.48268356699315024, "percentage": 9.65, "elapsed_time": "0:19:00", "remaining_time": "2:57:54", "throughput": 20827.62, "total_tokens": 23755264} +{"current_steps": 7545, "total_steps": 78105, "loss": 0.4138, "lr": 4.82908718473947e-06, "epoch": 0.4830036489341271, "percentage": 9.66, "elapsed_time": "0:19:01", "remaining_time": "2:57:52", "throughput": 20828.62, "total_tokens": 23770112} +{"current_steps": 7550, "total_steps": 78105, "loss": 0.5278, "lr": 4.832287799257458e-06, "epoch": 0.48332373087510405, "percentage": 9.67, "elapsed_time": "0:19:01", "remaining_time": "2:57:50", "throughput": 20830.05, "total_tokens": 23785536} +{"current_steps": 7555, "total_steps": 78105, "loss": 0.4705, "lr": 4.835488413775445e-06, "epoch": 0.48364381281608093, "percentage": 9.67, "elapsed_time": "0:19:02", "remaining_time": "2:57:49", "throughput": 20831.55, "total_tokens": 23801280} +{"current_steps": 7560, "total_steps": 78105, "loss": 0.5496, "lr": 4.838689028293433e-06, "epoch": 0.4839638947570578, "percentage": 9.68, "elapsed_time": "0:19:03", "remaining_time": "2:57:47", "throughput": 20832.75, "total_tokens": 23816448} +{"current_steps": 7565, "total_steps": 78105, "loss": 0.6405, "lr": 4.84188964281142e-06, "epoch": 0.4842839766980347, "percentage": 9.69, "elapsed_time": "0:19:03", "remaining_time": "2:57:46", "throughput": 20834.27, "total_tokens": 23832512} +{"current_steps": 7570, "total_steps": 78105, "loss": 0.4612, "lr": 4.845090257329408e-06, "epoch": 0.48460405863901157, "percentage": 9.69, "elapsed_time": "0:19:04", "remaining_time": "2:57:44", "throughput": 20835.67, "total_tokens": 23848192} +{"current_steps": 7575, "total_steps": 78105, "loss": 0.5829, "lr": 4.848290871847396e-06, "epoch": 0.48492414057998845, "percentage": 9.7, "elapsed_time": "0:19:05", "remaining_time": "2:57:43", "throughput": 20837.07, "total_tokens": 23863808} +{"current_steps": 7580, "total_steps": 78105, "loss": 0.4312, "lr": 4.851491486365383e-06, "epoch": 0.4852442225209654, "percentage": 9.7, "elapsed_time": "0:19:05", "remaining_time": "2:57:42", "throughput": 20838.91, "total_tokens": 23880960} +{"current_steps": 7585, "total_steps": 78105, "loss": 0.4587, "lr": 4.85469210088337e-06, "epoch": 0.48556430446194226, "percentage": 9.71, "elapsed_time": "0:19:06", "remaining_time": "2:57:40", "throughput": 20840.17, "total_tokens": 23896256} +{"current_steps": 7590, "total_steps": 78105, "loss": 0.596, "lr": 4.857892715401357e-06, "epoch": 0.48588438640291914, "percentage": 9.72, "elapsed_time": "0:19:07", "remaining_time": "2:57:39", "throughput": 20841.89, "total_tokens": 23912832} +{"current_steps": 7595, "total_steps": 78105, "loss": 0.5466, "lr": 4.861093329919345e-06, "epoch": 0.486204468343896, "percentage": 9.72, "elapsed_time": "0:19:08", "remaining_time": "2:57:37", "throughput": 20843.42, "total_tokens": 23928768} +{"current_steps": 7600, "total_steps": 78105, "loss": 0.5774, "lr": 4.864293944437332e-06, "epoch": 0.4865245502848729, "percentage": 9.73, "elapsed_time": "0:19:08", "remaining_time": "2:57:36", "throughput": 20845.06, "total_tokens": 23945088} +{"current_steps": 7605, "total_steps": 78105, "loss": 0.5805, "lr": 4.86749455895532e-06, "epoch": 0.48684463222584984, "percentage": 9.74, "elapsed_time": "0:19:09", "remaining_time": "2:57:34", "throughput": 20846.22, "total_tokens": 23960192} +{"current_steps": 7610, "total_steps": 78105, "loss": 0.4313, "lr": 4.870695173473307e-06, "epoch": 0.4871647141668267, "percentage": 9.74, "elapsed_time": "0:19:10", "remaining_time": "2:57:33", "throughput": 20847.98, "total_tokens": 23977152} +{"current_steps": 7615, "total_steps": 78105, "loss": 0.457, "lr": 4.873895787991295e-06, "epoch": 0.4874847961078036, "percentage": 9.75, "elapsed_time": "0:19:10", "remaining_time": "2:57:32", "throughput": 20849.11, "total_tokens": 23992448} +{"current_steps": 7620, "total_steps": 78105, "loss": 0.4882, "lr": 4.877096402509282e-06, "epoch": 0.4878048780487805, "percentage": 9.76, "elapsed_time": "0:19:11", "remaining_time": "2:57:30", "throughput": 20850.5, "total_tokens": 24008384} +{"current_steps": 7625, "total_steps": 78105, "loss": 0.6539, "lr": 4.8802970170272696e-06, "epoch": 0.48812495998975736, "percentage": 9.76, "elapsed_time": "0:19:12", "remaining_time": "2:57:29", "throughput": 20852.36, "total_tokens": 24025728} +{"current_steps": 7630, "total_steps": 78105, "loss": 0.5459, "lr": 4.883497631545257e-06, "epoch": 0.4884450419307343, "percentage": 9.77, "elapsed_time": "0:19:12", "remaining_time": "2:57:28", "throughput": 20853.41, "total_tokens": 24040448} +{"current_steps": 7635, "total_steps": 78105, "loss": 0.5034, "lr": 4.8866982460632445e-06, "epoch": 0.48876512387171117, "percentage": 9.78, "elapsed_time": "0:19:13", "remaining_time": "2:57:26", "throughput": 20854.44, "total_tokens": 24055168} +{"current_steps": 7640, "total_steps": 78105, "loss": 0.6462, "lr": 4.889898860581232e-06, "epoch": 0.48908520581268805, "percentage": 9.78, "elapsed_time": "0:19:14", "remaining_time": "2:57:24", "throughput": 20855.51, "total_tokens": 24070016} +{"current_steps": 7645, "total_steps": 78105, "loss": 0.363, "lr": 4.8930994750992194e-06, "epoch": 0.48940528775366493, "percentage": 9.79, "elapsed_time": "0:19:14", "remaining_time": "2:57:23", "throughput": 20856.58, "total_tokens": 24084800} +{"current_steps": 7650, "total_steps": 78105, "loss": 0.5898, "lr": 4.896300089617207e-06, "epoch": 0.4897253696946418, "percentage": 9.79, "elapsed_time": "0:19:15", "remaining_time": "2:57:21", "throughput": 20857.97, "total_tokens": 24100416} +{"current_steps": 7655, "total_steps": 78105, "loss": 0.42, "lr": 4.899500704135194e-06, "epoch": 0.49004545163561875, "percentage": 9.8, "elapsed_time": "0:19:16", "remaining_time": "2:57:20", "throughput": 20859.6, "total_tokens": 24117120} +{"current_steps": 7660, "total_steps": 78105, "loss": 0.5558, "lr": 4.902701318653182e-06, "epoch": 0.4903655335765956, "percentage": 9.81, "elapsed_time": "0:19:16", "remaining_time": "2:57:18", "throughput": 20860.78, "total_tokens": 24132224} +{"current_steps": 7665, "total_steps": 78105, "loss": 0.472, "lr": 4.905901933171169e-06, "epoch": 0.4906856155175725, "percentage": 9.81, "elapsed_time": "0:19:17", "remaining_time": "2:57:17", "throughput": 20861.98, "total_tokens": 24147712} +{"current_steps": 7670, "total_steps": 78105, "loss": 0.4881, "lr": 4.909102547689157e-06, "epoch": 0.4910056974585494, "percentage": 9.82, "elapsed_time": "0:19:18", "remaining_time": "2:57:15", "throughput": 20863.66, "total_tokens": 24164288} +{"current_steps": 7675, "total_steps": 78105, "loss": 0.6297, "lr": 4.912303162207144e-06, "epoch": 0.49132577939952626, "percentage": 9.83, "elapsed_time": "0:19:19", "remaining_time": "2:57:19", "throughput": 20867.69, "total_tokens": 24193728} +{"current_steps": 7680, "total_steps": 78105, "loss": 0.5525, "lr": 4.915503776725132e-06, "epoch": 0.49164586134050314, "percentage": 9.83, "elapsed_time": "0:19:20", "remaining_time": "2:57:17", "throughput": 20868.9, "total_tokens": 24208832} +{"current_steps": 7685, "total_steps": 78105, "loss": 0.617, "lr": 4.918704391243119e-06, "epoch": 0.4919659432814801, "percentage": 9.84, "elapsed_time": "0:19:20", "remaining_time": "2:57:15", "throughput": 20869.98, "total_tokens": 24223424} +{"current_steps": 7690, "total_steps": 78105, "loss": 0.5242, "lr": 4.921905005761107e-06, "epoch": 0.49228602522245696, "percentage": 9.85, "elapsed_time": "0:19:21", "remaining_time": "2:57:14", "throughput": 20871.21, "total_tokens": 24238976} +{"current_steps": 7695, "total_steps": 78105, "loss": 0.4704, "lr": 4.925105620279094e-06, "epoch": 0.49260610716343384, "percentage": 9.85, "elapsed_time": "0:19:22", "remaining_time": "2:57:12", "throughput": 20872.27, "total_tokens": 24253632} +{"current_steps": 7700, "total_steps": 78105, "loss": 0.4639, "lr": 4.928306234797082e-06, "epoch": 0.4929261891044107, "percentage": 9.86, "elapsed_time": "0:19:22", "remaining_time": "2:57:11", "throughput": 20874.23, "total_tokens": 24270720} +{"current_steps": 7705, "total_steps": 78105, "loss": 0.533, "lr": 4.931506849315069e-06, "epoch": 0.4932462710453876, "percentage": 9.86, "elapsed_time": "0:19:23", "remaining_time": "2:57:09", "throughput": 20875.17, "total_tokens": 24285312} +{"current_steps": 7710, "total_steps": 78105, "loss": 0.5732, "lr": 4.934707463833056e-06, "epoch": 0.49356635298636453, "percentage": 9.87, "elapsed_time": "0:19:24", "remaining_time": "2:57:07", "throughput": 20876.3, "total_tokens": 24300352} +{"current_steps": 7715, "total_steps": 78105, "loss": 0.5093, "lr": 4.937908078351044e-06, "epoch": 0.4938864349273414, "percentage": 9.88, "elapsed_time": "0:19:24", "remaining_time": "2:57:06", "throughput": 20877.88, "total_tokens": 24317056} +{"current_steps": 7720, "total_steps": 78105, "loss": 0.4937, "lr": 4.941108692869031e-06, "epoch": 0.4942065168683183, "percentage": 9.88, "elapsed_time": "0:19:25", "remaining_time": "2:57:05", "throughput": 20879.33, "total_tokens": 24333184} +{"current_steps": 7725, "total_steps": 78105, "loss": 0.6299, "lr": 4.944309307387019e-06, "epoch": 0.49452659880929517, "percentage": 9.89, "elapsed_time": "0:19:26", "remaining_time": "2:57:03", "throughput": 20880.41, "total_tokens": 24348672} +{"current_steps": 7730, "total_steps": 78105, "loss": 0.5236, "lr": 4.947509921905006e-06, "epoch": 0.49484668075027205, "percentage": 9.9, "elapsed_time": "0:19:26", "remaining_time": "2:57:02", "throughput": 20881.44, "total_tokens": 24363648} +{"current_steps": 7735, "total_steps": 78105, "loss": 0.4852, "lr": 4.950710536422994e-06, "epoch": 0.495166762691249, "percentage": 9.9, "elapsed_time": "0:19:27", "remaining_time": "2:57:01", "throughput": 20883.37, "total_tokens": 24380992} +{"current_steps": 7740, "total_steps": 78105, "loss": 0.5179, "lr": 4.953911150940981e-06, "epoch": 0.49548684463222586, "percentage": 9.91, "elapsed_time": "0:19:28", "remaining_time": "2:56:59", "throughput": 20884.4, "total_tokens": 24395776} +{"current_steps": 7745, "total_steps": 78105, "loss": 0.4997, "lr": 4.957111765458969e-06, "epoch": 0.49580692657320274, "percentage": 9.92, "elapsed_time": "0:19:28", "remaining_time": "2:56:58", "throughput": 20885.83, "total_tokens": 24411584} +{"current_steps": 7750, "total_steps": 78105, "loss": 0.7475, "lr": 4.960312379976956e-06, "epoch": 0.4961270085141796, "percentage": 9.92, "elapsed_time": "0:19:29", "remaining_time": "2:56:56", "throughput": 20886.95, "total_tokens": 24426560} +{"current_steps": 7755, "total_steps": 78105, "loss": 0.6749, "lr": 4.963512994494944e-06, "epoch": 0.4964470904551565, "percentage": 9.93, "elapsed_time": "0:19:30", "remaining_time": "2:56:54", "throughput": 20888.2, "total_tokens": 24441856} +{"current_steps": 7760, "total_steps": 78105, "loss": 0.4507, "lr": 4.966713609012931e-06, "epoch": 0.4967671723961334, "percentage": 9.94, "elapsed_time": "0:19:30", "remaining_time": "2:56:53", "throughput": 20889.55, "total_tokens": 24457472} +{"current_steps": 7765, "total_steps": 78105, "loss": 0.6491, "lr": 4.969914223530919e-06, "epoch": 0.4970872543371103, "percentage": 9.94, "elapsed_time": "0:19:31", "remaining_time": "2:56:51", "throughput": 20890.56, "total_tokens": 24471936} +{"current_steps": 7770, "total_steps": 78105, "loss": 0.4794, "lr": 4.973114838048906e-06, "epoch": 0.4974073362780872, "percentage": 9.95, "elapsed_time": "0:19:32", "remaining_time": "2:56:50", "throughput": 20891.95, "total_tokens": 24487616} +{"current_steps": 7775, "total_steps": 78105, "loss": 0.6671, "lr": 4.9763154525668936e-06, "epoch": 0.4977274182190641, "percentage": 9.95, "elapsed_time": "0:19:32", "remaining_time": "2:56:48", "throughput": 20893.35, "total_tokens": 24503936} +{"current_steps": 7780, "total_steps": 78105, "loss": 0.6155, "lr": 4.979516067084881e-06, "epoch": 0.49804750016004096, "percentage": 9.96, "elapsed_time": "0:19:33", "remaining_time": "2:56:47", "throughput": 20894.35, "total_tokens": 24518464} +{"current_steps": 7785, "total_steps": 78105, "loss": 0.3628, "lr": 4.9827166816028685e-06, "epoch": 0.49836758210101784, "percentage": 9.97, "elapsed_time": "0:19:34", "remaining_time": "2:56:45", "throughput": 20895.34, "total_tokens": 24533184} +{"current_steps": 7790, "total_steps": 78105, "loss": 0.5781, "lr": 4.9859172961208556e-06, "epoch": 0.49868766404199477, "percentage": 9.97, "elapsed_time": "0:19:34", "remaining_time": "2:56:43", "throughput": 20896.74, "total_tokens": 24548992} +{"current_steps": 7795, "total_steps": 78105, "loss": 0.5001, "lr": 4.9891179106388434e-06, "epoch": 0.49900774598297165, "percentage": 9.98, "elapsed_time": "0:19:35", "remaining_time": "2:56:43", "throughput": 20898.55, "total_tokens": 24566592} +{"current_steps": 7800, "total_steps": 78105, "loss": 0.6452, "lr": 4.9923185251568305e-06, "epoch": 0.49932782792394853, "percentage": 9.99, "elapsed_time": "0:19:36", "remaining_time": "2:56:41", "throughput": 20900.33, "total_tokens": 24583552} +{"current_steps": 7805, "total_steps": 78105, "loss": 0.5224, "lr": 4.995519139674818e-06, "epoch": 0.4996479098649254, "percentage": 9.99, "elapsed_time": "0:19:36", "remaining_time": "2:56:40", "throughput": 20901.7, "total_tokens": 24599552} +{"current_steps": 7810, "total_steps": 78105, "loss": 0.4689, "lr": 4.998719754192805e-06, "epoch": 0.4999679918059023, "percentage": 10.0, "elapsed_time": "0:19:37", "remaining_time": "2:56:39", "throughput": 20902.86, "total_tokens": 24614912} +{"current_steps": 7812, "total_steps": 78105, "eval_loss": 0.5265359282493591, "epoch": 0.5000960245822931, "percentage": 10.0, "elapsed_time": "0:20:28", "remaining_time": "3:04:17", "throughput": 20034.66, "total_tokens": 24620672} +{"current_steps": 7815, "total_steps": 78105, "loss": 0.4975, "lr": 4.999999977529344e-06, "epoch": 0.5002880737468792, "percentage": 10.01, "elapsed_time": "0:21:19", "remaining_time": "3:11:50", "throughput": 19245.69, "total_tokens": 24629696} +{"current_steps": 7820, "total_steps": 78105, "loss": 0.5334, "lr": 4.99999984020867e-06, "epoch": 0.5006081556878561, "percentage": 10.01, "elapsed_time": "0:21:20", "remaining_time": "3:11:48", "throughput": 19247.95, "total_tokens": 24646208} +{"current_steps": 7825, "total_steps": 78105, "loss": 0.6436, "lr": 4.999999578051025e-06, "epoch": 0.500928237628833, "percentage": 10.02, "elapsed_time": "0:21:21", "remaining_time": "3:11:46", "throughput": 19249.79, "total_tokens": 24661120} +{"current_steps": 7830, "total_steps": 78105, "loss": 0.5722, "lr": 4.999999191056423e-06, "epoch": 0.5012483195698099, "percentage": 10.02, "elapsed_time": "0:21:21", "remaining_time": "3:11:44", "throughput": 19252.47, "total_tokens": 24678336} +{"current_steps": 7835, "total_steps": 78105, "loss": 0.6323, "lr": 4.999998679224884e-06, "epoch": 0.5015684015107867, "percentage": 10.03, "elapsed_time": "0:21:22", "remaining_time": "3:11:42", "throughput": 19254.6, "total_tokens": 24694336} +{"current_steps": 7840, "total_steps": 78105, "loss": 0.4604, "lr": 4.999998042556433e-06, "epoch": 0.5018884834517636, "percentage": 10.04, "elapsed_time": "0:21:23", "remaining_time": "3:11:40", "throughput": 19256.24, "total_tokens": 24708608} +{"current_steps": 7845, "total_steps": 78105, "loss": 0.5815, "lr": 4.999997281051102e-06, "epoch": 0.5022085653927405, "percentage": 10.04, "elapsed_time": "0:21:23", "remaining_time": "3:11:38", "throughput": 19258.35, "total_tokens": 24724672} +{"current_steps": 7850, "total_steps": 78105, "loss": 0.5677, "lr": 4.999996394708928e-06, "epoch": 0.5025286473337174, "percentage": 10.05, "elapsed_time": "0:21:24", "remaining_time": "3:11:36", "throughput": 19260.37, "total_tokens": 24740608} +{"current_steps": 7855, "total_steps": 78105, "loss": 0.5456, "lr": 4.999995383529957e-06, "epoch": 0.5028487292746944, "percentage": 10.06, "elapsed_time": "0:21:25", "remaining_time": "3:11:34", "throughput": 19263.13, "total_tokens": 24758528} +{"current_steps": 7860, "total_steps": 78105, "loss": 0.4849, "lr": 4.9999942475142375e-06, "epoch": 0.5031688112156713, "percentage": 10.06, "elapsed_time": "0:21:25", "remaining_time": "3:11:32", "throughput": 19264.82, "total_tokens": 24772992} +{"current_steps": 7865, "total_steps": 78105, "loss": 0.5574, "lr": 4.999992986661828e-06, "epoch": 0.5034888931566481, "percentage": 10.07, "elapsed_time": "0:21:26", "remaining_time": "3:11:30", "throughput": 19266.67, "total_tokens": 24788160} +{"current_steps": 7870, "total_steps": 78105, "loss": 0.5406, "lr": 4.99999160097279e-06, "epoch": 0.503808975097625, "percentage": 10.08, "elapsed_time": "0:21:27", "remaining_time": "3:11:28", "throughput": 19268.85, "total_tokens": 24804224} +{"current_steps": 7875, "total_steps": 78105, "loss": 0.6051, "lr": 4.999990090447194e-06, "epoch": 0.5041290570386019, "percentage": 10.08, "elapsed_time": "0:21:27", "remaining_time": "3:11:26", "throughput": 19271.21, "total_tokens": 24820928} +{"current_steps": 7880, "total_steps": 78105, "loss": 0.5021, "lr": 4.9999884550851155e-06, "epoch": 0.5044491389795788, "percentage": 10.09, "elapsed_time": "0:21:28", "remaining_time": "3:11:24", "throughput": 19272.97, "total_tokens": 24835776} +{"current_steps": 7885, "total_steps": 78105, "loss": 0.4682, "lr": 4.999986694886634e-06, "epoch": 0.5047692209205557, "percentage": 10.1, "elapsed_time": "0:21:29", "remaining_time": "3:11:22", "throughput": 19275.05, "total_tokens": 24851648} +{"current_steps": 7890, "total_steps": 78105, "loss": 0.5255, "lr": 4.99998480985184e-06, "epoch": 0.5050893028615325, "percentage": 10.1, "elapsed_time": "0:21:29", "remaining_time": "3:11:19", "throughput": 19277.01, "total_tokens": 24866880} +{"current_steps": 7895, "total_steps": 78105, "loss": 0.4494, "lr": 4.9999827999808256e-06, "epoch": 0.5054093848025094, "percentage": 10.11, "elapsed_time": "0:21:30", "remaining_time": "3:11:17", "throughput": 19278.84, "total_tokens": 24881856} +{"current_steps": 7900, "total_steps": 78105, "loss": 0.5709, "lr": 4.999980665273693e-06, "epoch": 0.5057294667434863, "percentage": 10.11, "elapsed_time": "0:21:31", "remaining_time": "3:11:15", "throughput": 19281.31, "total_tokens": 24898560} +{"current_steps": 7905, "total_steps": 78105, "loss": 0.4448, "lr": 4.999978405730548e-06, "epoch": 0.5060495486844632, "percentage": 10.12, "elapsed_time": "0:21:31", "remaining_time": "3:11:13", "throughput": 19283.05, "total_tokens": 24913216} +{"current_steps": 7910, "total_steps": 78105, "loss": 0.6069, "lr": 4.999976021351503e-06, "epoch": 0.5063696306254402, "percentage": 10.13, "elapsed_time": "0:21:32", "remaining_time": "3:11:11", "throughput": 19285.03, "total_tokens": 24928832} +{"current_steps": 7915, "total_steps": 78105, "loss": 0.6528, "lr": 4.999973512136677e-06, "epoch": 0.506689712566417, "percentage": 10.13, "elapsed_time": "0:21:33", "remaining_time": "3:11:09", "throughput": 19287.07, "total_tokens": 24944512} +{"current_steps": 7920, "total_steps": 78105, "loss": 0.6276, "lr": 4.999970878086197e-06, "epoch": 0.5070097945073939, "percentage": 10.14, "elapsed_time": "0:21:33", "remaining_time": "3:11:07", "throughput": 19288.91, "total_tokens": 24959744} +{"current_steps": 7925, "total_steps": 78105, "loss": 0.5793, "lr": 4.999968119200192e-06, "epoch": 0.5073298764483708, "percentage": 10.15, "elapsed_time": "0:21:34", "remaining_time": "3:11:05", "throughput": 19290.94, "total_tokens": 24975488} +{"current_steps": 7930, "total_steps": 78105, "loss": 0.5143, "lr": 4.999965235478801e-06, "epoch": 0.5076499583893477, "percentage": 10.15, "elapsed_time": "0:21:35", "remaining_time": "3:11:02", "throughput": 19292.68, "total_tokens": 24990272} +{"current_steps": 7935, "total_steps": 78105, "loss": 0.3795, "lr": 4.9999622269221685e-06, "epoch": 0.5079700403303246, "percentage": 10.16, "elapsed_time": "0:21:36", "remaining_time": "3:11:00", "throughput": 19294.88, "total_tokens": 25006272} +{"current_steps": 7940, "total_steps": 78105, "loss": 0.4828, "lr": 4.9999590935304435e-06, "epoch": 0.5082901222713014, "percentage": 10.17, "elapsed_time": "0:21:36", "remaining_time": "3:10:58", "throughput": 19297.05, "total_tokens": 25022464} +{"current_steps": 7945, "total_steps": 78105, "loss": 0.4114, "lr": 4.999955835303784e-06, "epoch": 0.5086102042122783, "percentage": 10.17, "elapsed_time": "0:21:37", "remaining_time": "3:10:56", "throughput": 19298.83, "total_tokens": 25037376} +{"current_steps": 7950, "total_steps": 78105, "loss": 0.5628, "lr": 4.999952452242352e-06, "epoch": 0.5089302861532552, "percentage": 10.18, "elapsed_time": "0:21:38", "remaining_time": "3:10:54", "throughput": 19300.96, "total_tokens": 25053440} +{"current_steps": 7955, "total_steps": 78105, "loss": 0.6121, "lr": 4.999948944346315e-06, "epoch": 0.5092503680942321, "percentage": 10.19, "elapsed_time": "0:21:38", "remaining_time": "3:10:52", "throughput": 19302.87, "total_tokens": 25069120} +{"current_steps": 7960, "total_steps": 78105, "loss": 0.4008, "lr": 4.9999453116158505e-06, "epoch": 0.5095704500352091, "percentage": 10.19, "elapsed_time": "0:21:39", "remaining_time": "3:10:50", "throughput": 19304.69, "total_tokens": 25084224} +{"current_steps": 7965, "total_steps": 78105, "loss": 0.6273, "lr": 4.999941554051139e-06, "epoch": 0.509890531976186, "percentage": 10.2, "elapsed_time": "0:21:40", "remaining_time": "3:10:48", "throughput": 19306.51, "total_tokens": 25099520} +{"current_steps": 7970, "total_steps": 78105, "loss": 0.5022, "lr": 4.999937671652367e-06, "epoch": 0.5102106139171628, "percentage": 10.2, "elapsed_time": "0:21:40", "remaining_time": "3:10:46", "throughput": 19308.42, "total_tokens": 25115200} +{"current_steps": 7975, "total_steps": 78105, "loss": 0.4842, "lr": 4.9999336644197306e-06, "epoch": 0.5105306958581397, "percentage": 10.21, "elapsed_time": "0:21:41", "remaining_time": "3:10:44", "throughput": 19310.55, "total_tokens": 25131520} +{"current_steps": 7980, "total_steps": 78105, "loss": 0.6849, "lr": 4.999929532353428e-06, "epoch": 0.5108507777991166, "percentage": 10.22, "elapsed_time": "0:21:42", "remaining_time": "3:10:42", "throughput": 19312.59, "total_tokens": 25147776} +{"current_steps": 7985, "total_steps": 78105, "loss": 0.5415, "lr": 4.999925275453666e-06, "epoch": 0.5111708597400935, "percentage": 10.22, "elapsed_time": "0:21:42", "remaining_time": "3:10:40", "throughput": 19314.33, "total_tokens": 25162816} +{"current_steps": 7990, "total_steps": 78105, "loss": 0.4387, "lr": 4.999920893720659e-06, "epoch": 0.5114909416810703, "percentage": 10.23, "elapsed_time": "0:21:43", "remaining_time": "3:10:38", "throughput": 19316.44, "total_tokens": 25178752} +{"current_steps": 7995, "total_steps": 78105, "loss": 0.5952, "lr": 4.999916387154622e-06, "epoch": 0.5118110236220472, "percentage": 10.24, "elapsed_time": "0:21:44", "remaining_time": "3:10:36", "throughput": 19318.27, "total_tokens": 25194048} +{"current_steps": 8000, "total_steps": 78105, "loss": 0.4733, "lr": 4.999911755755783e-06, "epoch": 0.5121311055630241, "percentage": 10.24, "elapsed_time": "0:21:44", "remaining_time": "3:10:34", "throughput": 19320.18, "total_tokens": 25209216} +{"current_steps": 8005, "total_steps": 78105, "loss": 0.6132, "lr": 4.999906999524373e-06, "epoch": 0.512451187504001, "percentage": 10.25, "elapsed_time": "0:21:45", "remaining_time": "3:10:32", "throughput": 19322.09, "total_tokens": 25224640} +{"current_steps": 8010, "total_steps": 78105, "loss": 0.3993, "lr": 4.999902118460629e-06, "epoch": 0.5127712694449779, "percentage": 10.26, "elapsed_time": "0:21:46", "remaining_time": "3:10:30", "throughput": 19323.87, "total_tokens": 25239808} +{"current_steps": 8015, "total_steps": 78105, "loss": 0.4945, "lr": 4.999897112564795e-06, "epoch": 0.5130913513859549, "percentage": 10.26, "elapsed_time": "0:21:46", "remaining_time": "3:10:28", "throughput": 19326.51, "total_tokens": 25257344} +{"current_steps": 8020, "total_steps": 78105, "loss": 0.4241, "lr": 4.999891981837119e-06, "epoch": 0.5134114333269317, "percentage": 10.27, "elapsed_time": "0:21:47", "remaining_time": "3:10:26", "throughput": 19328.19, "total_tokens": 25272064} +{"current_steps": 8025, "total_steps": 78105, "loss": 0.4864, "lr": 4.99988672627786e-06, "epoch": 0.5137315152679086, "percentage": 10.27, "elapsed_time": "0:21:48", "remaining_time": "3:10:24", "throughput": 19330.28, "total_tokens": 25287936} +{"current_steps": 8030, "total_steps": 78105, "loss": 0.4549, "lr": 4.9998813458872795e-06, "epoch": 0.5140515972088855, "percentage": 10.28, "elapsed_time": "0:21:48", "remaining_time": "3:10:22", "throughput": 19332.12, "total_tokens": 25303232} +{"current_steps": 8035, "total_steps": 78105, "loss": 0.4298, "lr": 4.999875840665646e-06, "epoch": 0.5143716791498624, "percentage": 10.29, "elapsed_time": "0:21:49", "remaining_time": "3:10:19", "throughput": 19333.99, "total_tokens": 25318656} +{"current_steps": 8040, "total_steps": 78105, "loss": 0.486, "lr": 4.9998702106132336e-06, "epoch": 0.5146917610908393, "percentage": 10.29, "elapsed_time": "0:21:50", "remaining_time": "3:10:17", "throughput": 19335.74, "total_tokens": 25333760} +{"current_steps": 8045, "total_steps": 78105, "loss": 0.5555, "lr": 4.999864455730324e-06, "epoch": 0.5150118430318161, "percentage": 10.3, "elapsed_time": "0:21:50", "remaining_time": "3:10:15", "throughput": 19337.84, "total_tokens": 25349824} +{"current_steps": 8050, "total_steps": 78105, "loss": 0.4811, "lr": 4.9998585760172065e-06, "epoch": 0.515331924972793, "percentage": 10.31, "elapsed_time": "0:21:51", "remaining_time": "3:10:13", "throughput": 19339.78, "total_tokens": 25365376} +{"current_steps": 8055, "total_steps": 78105, "loss": 0.5462, "lr": 4.999852571474172e-06, "epoch": 0.5156520069137699, "percentage": 10.31, "elapsed_time": "0:21:52", "remaining_time": "3:10:11", "throughput": 19341.76, "total_tokens": 25380928} +{"current_steps": 8060, "total_steps": 78105, "loss": 0.4673, "lr": 4.999846442101522e-06, "epoch": 0.5159720888547468, "percentage": 10.32, "elapsed_time": "0:21:52", "remaining_time": "3:10:09", "throughput": 19343.63, "total_tokens": 25396352} +{"current_steps": 8065, "total_steps": 78105, "loss": 0.5058, "lr": 4.999840187899561e-06, "epoch": 0.5162921707957238, "percentage": 10.33, "elapsed_time": "0:21:53", "remaining_time": "3:10:07", "throughput": 19345.45, "total_tokens": 25411456} +{"current_steps": 8070, "total_steps": 78105, "loss": 0.4942, "lr": 4.9998338088686024e-06, "epoch": 0.5166122527367006, "percentage": 10.33, "elapsed_time": "0:21:54", "remaining_time": "3:10:05", "throughput": 19347.95, "total_tokens": 25428544} +{"current_steps": 8075, "total_steps": 78105, "loss": 0.5725, "lr": 4.999827305008964e-06, "epoch": 0.5169323346776775, "percentage": 10.34, "elapsed_time": "0:21:54", "remaining_time": "3:10:03", "throughput": 19349.89, "total_tokens": 25443968} +{"current_steps": 8080, "total_steps": 78105, "loss": 0.4588, "lr": 4.999820676320972e-06, "epoch": 0.5172524166186544, "percentage": 10.35, "elapsed_time": "0:21:55", "remaining_time": "3:10:01", "throughput": 19351.68, "total_tokens": 25458880} +{"current_steps": 8085, "total_steps": 78105, "loss": 0.3874, "lr": 4.999813922804956e-06, "epoch": 0.5175724985596313, "percentage": 10.35, "elapsed_time": "0:21:56", "remaining_time": "3:09:59", "throughput": 19353.56, "total_tokens": 25474304} +{"current_steps": 8090, "total_steps": 78105, "loss": 0.6068, "lr": 4.999807044461255e-06, "epoch": 0.5178925805006082, "percentage": 10.36, "elapsed_time": "0:21:56", "remaining_time": "3:09:57", "throughput": 19355.34, "total_tokens": 25489472} +{"current_steps": 8095, "total_steps": 78105, "loss": 0.488, "lr": 4.99980004129021e-06, "epoch": 0.518212662441585, "percentage": 10.36, "elapsed_time": "0:21:57", "remaining_time": "3:09:55", "throughput": 19357.18, "total_tokens": 25504896} +{"current_steps": 8100, "total_steps": 78105, "loss": 0.5093, "lr": 4.999792913292172e-06, "epoch": 0.5185327443825619, "percentage": 10.37, "elapsed_time": "0:21:58", "remaining_time": "3:09:52", "throughput": 19358.77, "total_tokens": 25519360} +{"current_steps": 8105, "total_steps": 78105, "loss": 0.6216, "lr": 4.999785660467496e-06, "epoch": 0.5188528263235388, "percentage": 10.38, "elapsed_time": "0:21:58", "remaining_time": "3:09:51", "throughput": 19360.93, "total_tokens": 25535680} +{"current_steps": 8110, "total_steps": 78105, "loss": 0.5825, "lr": 4.999778282816547e-06, "epoch": 0.5191729082645157, "percentage": 10.38, "elapsed_time": "0:21:59", "remaining_time": "3:09:48", "throughput": 19362.65, "total_tokens": 25550592} +{"current_steps": 8115, "total_steps": 78105, "loss": 0.5011, "lr": 4.99977078033969e-06, "epoch": 0.5194929902054926, "percentage": 10.39, "elapsed_time": "0:22:00", "remaining_time": "3:09:46", "throughput": 19364.38, "total_tokens": 25565696} +{"current_steps": 8120, "total_steps": 78105, "loss": 0.4527, "lr": 4.999763153037302e-06, "epoch": 0.5198130721464695, "percentage": 10.4, "elapsed_time": "0:22:00", "remaining_time": "3:09:44", "throughput": 19366.48, "total_tokens": 25581568} +{"current_steps": 8125, "total_steps": 78105, "loss": 0.6174, "lr": 4.9997554009097625e-06, "epoch": 0.5201331540874464, "percentage": 10.4, "elapsed_time": "0:22:01", "remaining_time": "3:09:42", "throughput": 19368.53, "total_tokens": 25597760} +{"current_steps": 8130, "total_steps": 78105, "loss": 0.534, "lr": 4.999747523957459e-06, "epoch": 0.5204532360284233, "percentage": 10.41, "elapsed_time": "0:22:02", "remaining_time": "3:09:41", "throughput": 19371.09, "total_tokens": 25615424} +{"current_steps": 8135, "total_steps": 78105, "loss": 0.5229, "lr": 4.9997395221807845e-06, "epoch": 0.5207733179694002, "percentage": 10.42, "elapsed_time": "0:22:03", "remaining_time": "3:09:39", "throughput": 19372.97, "total_tokens": 25630720} +{"current_steps": 8140, "total_steps": 78105, "loss": 0.5855, "lr": 4.99973139558014e-06, "epoch": 0.5210933999103771, "percentage": 10.42, "elapsed_time": "0:22:03", "remaining_time": "3:09:38", "throughput": 19375.75, "total_tokens": 25649280} +{"current_steps": 8145, "total_steps": 78105, "loss": 0.6326, "lr": 4.999723144155929e-06, "epoch": 0.5214134818513539, "percentage": 10.43, "elapsed_time": "0:22:04", "remaining_time": "3:09:36", "throughput": 19377.45, "total_tokens": 25664576} +{"current_steps": 8150, "total_steps": 78105, "loss": 0.5045, "lr": 4.999714767908565e-06, "epoch": 0.5217335637923308, "percentage": 10.43, "elapsed_time": "0:22:05", "remaining_time": "3:09:34", "throughput": 19379.43, "total_tokens": 25680384} +{"current_steps": 8155, "total_steps": 78105, "loss": 0.466, "lr": 4.999706266838466e-06, "epoch": 0.5220536457333077, "percentage": 10.44, "elapsed_time": "0:22:05", "remaining_time": "3:09:32", "throughput": 19381.39, "total_tokens": 25696128} +{"current_steps": 8160, "total_steps": 78105, "loss": 0.4224, "lr": 4.9996976409460575e-06, "epoch": 0.5223737276742846, "percentage": 10.45, "elapsed_time": "0:22:06", "remaining_time": "3:09:30", "throughput": 19383.6, "total_tokens": 25712768} +{"current_steps": 8165, "total_steps": 78105, "loss": 0.607, "lr": 4.9996888902317684e-06, "epoch": 0.5226938096152615, "percentage": 10.45, "elapsed_time": "0:22:07", "remaining_time": "3:09:28", "throughput": 19385.28, "total_tokens": 25727616} +{"current_steps": 8170, "total_steps": 78105, "loss": 0.4967, "lr": 4.999680014696037e-06, "epoch": 0.5230138915562383, "percentage": 10.46, "elapsed_time": "0:22:07", "remaining_time": "3:09:26", "throughput": 19387.63, "total_tokens": 25744384} +{"current_steps": 8175, "total_steps": 78105, "loss": 0.434, "lr": 4.999671014339306e-06, "epoch": 0.5233339734972153, "percentage": 10.47, "elapsed_time": "0:22:08", "remaining_time": "3:09:24", "throughput": 19389.64, "total_tokens": 25760640} +{"current_steps": 8180, "total_steps": 78105, "loss": 0.4633, "lr": 4.999661889162024e-06, "epoch": 0.5236540554381922, "percentage": 10.47, "elapsed_time": "0:22:09", "remaining_time": "3:09:22", "throughput": 19391.64, "total_tokens": 25776640} +{"current_steps": 8185, "total_steps": 78105, "loss": 0.4204, "lr": 4.999652639164648e-06, "epoch": 0.5239741373791691, "percentage": 10.48, "elapsed_time": "0:22:09", "remaining_time": "3:09:20", "throughput": 19393.41, "total_tokens": 25791744} +{"current_steps": 8190, "total_steps": 78105, "loss": 0.7351, "lr": 4.99964326434764e-06, "epoch": 0.524294219320146, "percentage": 10.49, "elapsed_time": "0:22:10", "remaining_time": "3:09:19", "throughput": 19395.53, "total_tokens": 25808256} +{"current_steps": 8195, "total_steps": 78105, "loss": 0.4311, "lr": 4.9996337647114675e-06, "epoch": 0.5246143012611229, "percentage": 10.49, "elapsed_time": "0:22:11", "remaining_time": "3:09:17", "throughput": 19397.41, "total_tokens": 25824128} +{"current_steps": 8200, "total_steps": 78105, "loss": 0.4729, "lr": 4.999624140256605e-06, "epoch": 0.5249343832020997, "percentage": 10.5, "elapsed_time": "0:22:12", "remaining_time": "3:09:15", "throughput": 19399.59, "total_tokens": 25840576} +{"current_steps": 8205, "total_steps": 78105, "loss": 0.3891, "lr": 4.999614390983532e-06, "epoch": 0.5252544651430766, "percentage": 10.51, "elapsed_time": "0:22:12", "remaining_time": "3:09:13", "throughput": 19401.47, "total_tokens": 25856256} +{"current_steps": 8210, "total_steps": 78105, "loss": 0.4825, "lr": 4.9996045168927365e-06, "epoch": 0.5255745470840535, "percentage": 10.51, "elapsed_time": "0:22:13", "remaining_time": "3:09:11", "throughput": 19403.22, "total_tokens": 25871424} +{"current_steps": 8215, "total_steps": 78105, "loss": 0.619, "lr": 4.999594517984711e-06, "epoch": 0.5258946290250304, "percentage": 10.52, "elapsed_time": "0:22:14", "remaining_time": "3:09:09", "throughput": 19404.81, "total_tokens": 25886272} +{"current_steps": 8220, "total_steps": 78105, "loss": 0.7025, "lr": 4.999584394259956e-06, "epoch": 0.5262147109660072, "percentage": 10.52, "elapsed_time": "0:22:14", "remaining_time": "3:09:07", "throughput": 19406.69, "total_tokens": 25901952} +{"current_steps": 8225, "total_steps": 78105, "loss": 0.4938, "lr": 4.999574145718977e-06, "epoch": 0.5265347929069842, "percentage": 10.53, "elapsed_time": "0:22:15", "remaining_time": "3:09:05", "throughput": 19408.51, "total_tokens": 25917888} +{"current_steps": 8230, "total_steps": 78105, "loss": 0.4671, "lr": 4.9995637723622834e-06, "epoch": 0.5268548748479611, "percentage": 10.54, "elapsed_time": "0:22:16", "remaining_time": "3:09:03", "throughput": 19410.39, "total_tokens": 25933824} +{"current_steps": 8235, "total_steps": 78105, "loss": 0.5235, "lr": 4.999553274190396e-06, "epoch": 0.527174956788938, "percentage": 10.54, "elapsed_time": "0:22:16", "remaining_time": "3:09:02", "throughput": 19412.8, "total_tokens": 25951360} +{"current_steps": 8240, "total_steps": 78105, "loss": 0.4596, "lr": 4.999542651203837e-06, "epoch": 0.5274950387299149, "percentage": 10.55, "elapsed_time": "0:22:17", "remaining_time": "3:09:00", "throughput": 19414.71, "total_tokens": 25967232} +{"current_steps": 8245, "total_steps": 78105, "loss": 0.5693, "lr": 4.999531903403138e-06, "epoch": 0.5278151206708918, "percentage": 10.56, "elapsed_time": "0:22:18", "remaining_time": "3:08:58", "throughput": 19416.44, "total_tokens": 25982592} +{"current_steps": 8250, "total_steps": 78105, "loss": 0.5243, "lr": 4.9995210307888354e-06, "epoch": 0.5281352026118686, "percentage": 10.56, "elapsed_time": "0:22:18", "remaining_time": "3:08:56", "throughput": 19418.13, "total_tokens": 25997824} +{"current_steps": 8255, "total_steps": 78105, "loss": 0.5829, "lr": 4.999510033361472e-06, "epoch": 0.5284552845528455, "percentage": 10.57, "elapsed_time": "0:22:19", "remaining_time": "3:08:54", "throughput": 19420.05, "total_tokens": 26013632} +{"current_steps": 8260, "total_steps": 78105, "loss": 0.4653, "lr": 4.999498911121596e-06, "epoch": 0.5287753664938224, "percentage": 10.58, "elapsed_time": "0:22:20", "remaining_time": "3:08:52", "throughput": 19421.66, "total_tokens": 26028800} +{"current_steps": 8265, "total_steps": 78105, "loss": 0.455, "lr": 4.999487664069765e-06, "epoch": 0.5290954484347993, "percentage": 10.58, "elapsed_time": "0:22:20", "remaining_time": "3:08:50", "throughput": 19423.41, "total_tokens": 26044672} +{"current_steps": 8270, "total_steps": 78105, "loss": 0.5519, "lr": 4.999476292206539e-06, "epoch": 0.5294155303757762, "percentage": 10.59, "elapsed_time": "0:22:21", "remaining_time": "3:08:48", "throughput": 19425.22, "total_tokens": 26060544} +{"current_steps": 8275, "total_steps": 78105, "loss": 0.5508, "lr": 4.999464795532487e-06, "epoch": 0.529735612316753, "percentage": 10.59, "elapsed_time": "0:22:22", "remaining_time": "3:08:46", "throughput": 19426.98, "total_tokens": 26076160} +{"current_steps": 8280, "total_steps": 78105, "loss": 0.512, "lr": 4.999453174048181e-06, "epoch": 0.53005569425773, "percentage": 10.6, "elapsed_time": "0:22:22", "remaining_time": "3:08:45", "throughput": 19428.96, "total_tokens": 26092352} +{"current_steps": 8285, "total_steps": 78105, "loss": 0.4312, "lr": 4.999441427754204e-06, "epoch": 0.5303757761987069, "percentage": 10.61, "elapsed_time": "0:22:23", "remaining_time": "3:08:43", "throughput": 19430.54, "total_tokens": 26107776} +{"current_steps": 8290, "total_steps": 78105, "loss": 0.4546, "lr": 4.9994295566511405e-06, "epoch": 0.5306958581396838, "percentage": 10.61, "elapsed_time": "0:22:24", "remaining_time": "3:08:41", "throughput": 19432.49, "total_tokens": 26123712} +{"current_steps": 8295, "total_steps": 78105, "loss": 0.534, "lr": 4.9994175607395835e-06, "epoch": 0.5310159400806607, "percentage": 10.62, "elapsed_time": "0:22:25", "remaining_time": "3:08:39", "throughput": 19434.31, "total_tokens": 26139200} +{"current_steps": 8300, "total_steps": 78105, "loss": 0.5576, "lr": 4.999405440020133e-06, "epoch": 0.5313360220216375, "percentage": 10.63, "elapsed_time": "0:22:25", "remaining_time": "3:08:37", "throughput": 19435.74, "total_tokens": 26153600} +{"current_steps": 8305, "total_steps": 78105, "loss": 0.6323, "lr": 4.999393194493395e-06, "epoch": 0.5316561039626144, "percentage": 10.63, "elapsed_time": "0:22:26", "remaining_time": "3:08:35", "throughput": 19437.67, "total_tokens": 26169600} +{"current_steps": 8310, "total_steps": 78105, "loss": 0.4154, "lr": 4.999380824159978e-06, "epoch": 0.5319761859035913, "percentage": 10.64, "elapsed_time": "0:22:27", "remaining_time": "3:08:33", "throughput": 19439.69, "total_tokens": 26185536} +{"current_steps": 8315, "total_steps": 78105, "loss": 0.4978, "lr": 4.999368329020502e-06, "epoch": 0.5322962678445682, "percentage": 10.65, "elapsed_time": "0:22:27", "remaining_time": "3:08:31", "throughput": 19441.52, "total_tokens": 26201536} +{"current_steps": 8320, "total_steps": 78105, "loss": 0.4387, "lr": 4.99935570907559e-06, "epoch": 0.5326163497855451, "percentage": 10.65, "elapsed_time": "0:22:28", "remaining_time": "3:08:29", "throughput": 19443.11, "total_tokens": 26216576} +{"current_steps": 8325, "total_steps": 78105, "loss": 0.3684, "lr": 4.999342964325873e-06, "epoch": 0.5329364317265219, "percentage": 10.66, "elapsed_time": "0:22:29", "remaining_time": "3:08:27", "throughput": 19444.96, "total_tokens": 26232640} +{"current_steps": 8330, "total_steps": 78105, "loss": 0.5969, "lr": 4.999330094771987e-06, "epoch": 0.5332565136674989, "percentage": 10.67, "elapsed_time": "0:22:29", "remaining_time": "3:08:26", "throughput": 19446.72, "total_tokens": 26248448} +{"current_steps": 8335, "total_steps": 78105, "loss": 0.5528, "lr": 4.999317100414575e-06, "epoch": 0.5335765956084758, "percentage": 10.67, "elapsed_time": "0:22:30", "remaining_time": "3:08:24", "throughput": 19448.38, "total_tokens": 26263872} +{"current_steps": 8340, "total_steps": 78105, "loss": 0.594, "lr": 4.999303981254284e-06, "epoch": 0.5338966775494527, "percentage": 10.68, "elapsed_time": "0:22:31", "remaining_time": "3:08:22", "throughput": 19450.33, "total_tokens": 26279872} +{"current_steps": 8345, "total_steps": 78105, "loss": 0.4841, "lr": 4.999290737291772e-06, "epoch": 0.5342167594904296, "percentage": 10.68, "elapsed_time": "0:22:31", "remaining_time": "3:08:20", "throughput": 19452.32, "total_tokens": 26295680} +{"current_steps": 8350, "total_steps": 78105, "loss": 0.5733, "lr": 4.999277368527699e-06, "epoch": 0.5345368414314065, "percentage": 10.69, "elapsed_time": "0:22:32", "remaining_time": "3:08:18", "throughput": 19454.01, "total_tokens": 26311040} +{"current_steps": 8355, "total_steps": 78105, "loss": 0.8566, "lr": 4.9992638749627315e-06, "epoch": 0.5348569233723833, "percentage": 10.7, "elapsed_time": "0:22:33", "remaining_time": "3:08:16", "throughput": 19455.72, "total_tokens": 26326144} +{"current_steps": 8360, "total_steps": 78105, "loss": 0.433, "lr": 4.999250256597544e-06, "epoch": 0.5351770053133602, "percentage": 10.7, "elapsed_time": "0:22:33", "remaining_time": "3:08:14", "throughput": 19457.44, "total_tokens": 26341952} +{"current_steps": 8365, "total_steps": 78105, "loss": 0.3803, "lr": 4.999236513432818e-06, "epoch": 0.5354970872543371, "percentage": 10.71, "elapsed_time": "0:22:34", "remaining_time": "3:08:12", "throughput": 19458.92, "total_tokens": 26356672} +{"current_steps": 8370, "total_steps": 78105, "loss": 0.572, "lr": 4.999222645469239e-06, "epoch": 0.535817169195314, "percentage": 10.72, "elapsed_time": "0:22:35", "remaining_time": "3:08:10", "throughput": 19460.87, "total_tokens": 26373056} +{"current_steps": 8375, "total_steps": 78105, "loss": 0.48, "lr": 4.999208652707497e-06, "epoch": 0.5361372511362908, "percentage": 10.72, "elapsed_time": "0:22:35", "remaining_time": "3:08:08", "throughput": 19462.45, "total_tokens": 26388032} +{"current_steps": 8380, "total_steps": 78105, "loss": 0.5506, "lr": 4.999194535148294e-06, "epoch": 0.5364573330772677, "percentage": 10.73, "elapsed_time": "0:22:36", "remaining_time": "3:08:06", "throughput": 19464.01, "total_tokens": 26402880} +{"current_steps": 8385, "total_steps": 78105, "loss": 0.6615, "lr": 4.999180292792334e-06, "epoch": 0.5367774150182447, "percentage": 10.74, "elapsed_time": "0:22:37", "remaining_time": "3:08:04", "throughput": 19466.0, "total_tokens": 26419328} +{"current_steps": 8390, "total_steps": 78105, "loss": 0.5452, "lr": 4.9991659256403276e-06, "epoch": 0.5370974969592216, "percentage": 10.74, "elapsed_time": "0:22:37", "remaining_time": "3:08:02", "throughput": 19467.62, "total_tokens": 26434112} +{"current_steps": 8395, "total_steps": 78105, "loss": 0.49, "lr": 4.999151433692992e-06, "epoch": 0.5374175789001985, "percentage": 10.75, "elapsed_time": "0:22:38", "remaining_time": "3:08:01", "throughput": 19469.77, "total_tokens": 26450688} +{"current_steps": 8400, "total_steps": 78105, "loss": 0.4958, "lr": 4.999136816951053e-06, "epoch": 0.5377376608411754, "percentage": 10.75, "elapsed_time": "0:22:39", "remaining_time": "3:07:59", "throughput": 19471.57, "total_tokens": 26466368} +{"current_steps": 8405, "total_steps": 78105, "loss": 0.4806, "lr": 4.9991220754152366e-06, "epoch": 0.5380577427821522, "percentage": 10.76, "elapsed_time": "0:22:39", "remaining_time": "3:07:57", "throughput": 19473.57, "total_tokens": 26482624} +{"current_steps": 8410, "total_steps": 78105, "loss": 0.49, "lr": 4.999107209086283e-06, "epoch": 0.5383778247231291, "percentage": 10.77, "elapsed_time": "0:22:40", "remaining_time": "3:07:55", "throughput": 19475.63, "total_tokens": 26499200} +{"current_steps": 8415, "total_steps": 78105, "loss": 0.5364, "lr": 4.9990922179649325e-06, "epoch": 0.538697906664106, "percentage": 10.77, "elapsed_time": "0:22:41", "remaining_time": "3:07:53", "throughput": 19477.2, "total_tokens": 26514048} +{"current_steps": 8420, "total_steps": 78105, "loss": 0.4705, "lr": 4.9990771020519336e-06, "epoch": 0.5390179886050829, "percentage": 10.78, "elapsed_time": "0:22:41", "remaining_time": "3:07:51", "throughput": 19478.89, "total_tokens": 26529216} +{"current_steps": 8425, "total_steps": 78105, "loss": 0.497, "lr": 4.999061861348041e-06, "epoch": 0.5393380705460598, "percentage": 10.79, "elapsed_time": "0:22:42", "remaining_time": "3:07:49", "throughput": 19480.8, "total_tokens": 26545152} +{"current_steps": 8430, "total_steps": 78105, "loss": 0.6447, "lr": 4.999046495854017e-06, "epoch": 0.5396581524870366, "percentage": 10.79, "elapsed_time": "0:22:43", "remaining_time": "3:07:47", "throughput": 19482.55, "total_tokens": 26560512} +{"current_steps": 8435, "total_steps": 78105, "loss": 0.4271, "lr": 4.999031005570628e-06, "epoch": 0.5399782344280136, "percentage": 10.8, "elapsed_time": "0:22:43", "remaining_time": "3:07:45", "throughput": 19484.16, "total_tokens": 26575488} +{"current_steps": 8440, "total_steps": 78105, "loss": 0.4814, "lr": 4.9990153904986475e-06, "epoch": 0.5402983163689905, "percentage": 10.81, "elapsed_time": "0:22:44", "remaining_time": "3:07:43", "throughput": 19486.04, "total_tokens": 26591040} +{"current_steps": 8445, "total_steps": 78105, "loss": 0.4528, "lr": 4.998999650638855e-06, "epoch": 0.5406183983099674, "percentage": 10.81, "elapsed_time": "0:22:45", "remaining_time": "3:07:41", "throughput": 19488.04, "total_tokens": 26606976} +{"current_steps": 8450, "total_steps": 78105, "loss": 0.483, "lr": 4.998983785992036e-06, "epoch": 0.5409384802509443, "percentage": 10.82, "elapsed_time": "0:22:45", "remaining_time": "3:07:40", "throughput": 19490.12, "total_tokens": 26623296} +{"current_steps": 8455, "total_steps": 78105, "loss": 0.4677, "lr": 4.9989677965589846e-06, "epoch": 0.5412585621919211, "percentage": 10.83, "elapsed_time": "0:22:46", "remaining_time": "3:07:38", "throughput": 19491.77, "total_tokens": 26638336} +{"current_steps": 8460, "total_steps": 78105, "loss": 0.3693, "lr": 4.9989516823404985e-06, "epoch": 0.541578644132898, "percentage": 10.83, "elapsed_time": "0:22:47", "remaining_time": "3:07:36", "throughput": 19493.86, "total_tokens": 26654400} +{"current_steps": 8465, "total_steps": 78105, "loss": 0.4609, "lr": 4.9989354433373805e-06, "epoch": 0.5418987260738749, "percentage": 10.84, "elapsed_time": "0:22:47", "remaining_time": "3:07:34", "throughput": 19495.53, "total_tokens": 26669568} +{"current_steps": 8470, "total_steps": 78105, "loss": 0.6815, "lr": 4.9989190795504435e-06, "epoch": 0.5422188080148518, "percentage": 10.84, "elapsed_time": "0:22:48", "remaining_time": "3:07:32", "throughput": 19497.56, "total_tokens": 26686016} +{"current_steps": 8475, "total_steps": 78105, "loss": 0.5297, "lr": 4.998902590980504e-06, "epoch": 0.5425388899558287, "percentage": 10.85, "elapsed_time": "0:22:49", "remaining_time": "3:07:30", "throughput": 19499.34, "total_tokens": 26701504} +{"current_steps": 8480, "total_steps": 78105, "loss": 0.8467, "lr": 4.998885977628386e-06, "epoch": 0.5428589718968055, "percentage": 10.86, "elapsed_time": "0:22:50", "remaining_time": "3:07:28", "throughput": 19501.16, "total_tokens": 26717120} +{"current_steps": 8485, "total_steps": 78105, "loss": 0.6439, "lr": 4.998869239494918e-06, "epoch": 0.5431790538377824, "percentage": 10.86, "elapsed_time": "0:22:50", "remaining_time": "3:07:26", "throughput": 19502.99, "total_tokens": 26732736} +{"current_steps": 8490, "total_steps": 78105, "loss": 0.5143, "lr": 4.998852376580937e-06, "epoch": 0.5434991357787594, "percentage": 10.87, "elapsed_time": "0:22:51", "remaining_time": "3:07:24", "throughput": 19504.42, "total_tokens": 26747392} +{"current_steps": 8495, "total_steps": 78105, "loss": 0.5548, "lr": 4.9988353888872835e-06, "epoch": 0.5438192177197363, "percentage": 10.88, "elapsed_time": "0:22:51", "remaining_time": "3:07:22", "throughput": 19505.94, "total_tokens": 26762112} +{"current_steps": 8500, "total_steps": 78105, "loss": 0.5419, "lr": 4.998818276414807e-06, "epoch": 0.5441392996607132, "percentage": 10.88, "elapsed_time": "0:22:52", "remaining_time": "3:07:20", "throughput": 19507.73, "total_tokens": 26777856} +{"current_steps": 8505, "total_steps": 78105, "loss": 0.5263, "lr": 4.998801039164361e-06, "epoch": 0.54445938160169, "percentage": 10.89, "elapsed_time": "0:22:53", "remaining_time": "3:07:18", "throughput": 19509.71, "total_tokens": 26794048} +{"current_steps": 8510, "total_steps": 78105, "loss": 0.3765, "lr": 4.998783677136808e-06, "epoch": 0.5447794635426669, "percentage": 10.9, "elapsed_time": "0:22:54", "remaining_time": "3:07:16", "throughput": 19511.6, "total_tokens": 26809792} +{"current_steps": 8515, "total_steps": 78105, "loss": 0.5347, "lr": 4.998766190333013e-06, "epoch": 0.5450995454836438, "percentage": 10.9, "elapsed_time": "0:22:54", "remaining_time": "3:07:14", "throughput": 19513.3, "total_tokens": 26825024} +{"current_steps": 8520, "total_steps": 78105, "loss": 0.5222, "lr": 4.998748578753851e-06, "epoch": 0.5454196274246207, "percentage": 10.91, "elapsed_time": "0:22:55", "remaining_time": "3:07:13", "throughput": 19515.52, "total_tokens": 26841536} +{"current_steps": 8525, "total_steps": 78105, "loss": 0.4838, "lr": 4.9987308424002e-06, "epoch": 0.5457397093655976, "percentage": 10.91, "elapsed_time": "0:22:56", "remaining_time": "3:07:11", "throughput": 19517.47, "total_tokens": 26857600} +{"current_steps": 8530, "total_steps": 78105, "loss": 0.4069, "lr": 4.998712981272946e-06, "epoch": 0.5460597913065744, "percentage": 10.92, "elapsed_time": "0:22:56", "remaining_time": "3:07:09", "throughput": 19519.69, "total_tokens": 26874176} +{"current_steps": 8535, "total_steps": 78105, "loss": 0.5914, "lr": 4.9986949953729815e-06, "epoch": 0.5463798732475513, "percentage": 10.93, "elapsed_time": "0:22:57", "remaining_time": "3:07:07", "throughput": 19521.63, "total_tokens": 26890176} +{"current_steps": 8540, "total_steps": 78105, "loss": 0.3921, "lr": 4.998676884701203e-06, "epoch": 0.5466999551885282, "percentage": 10.93, "elapsed_time": "0:22:58", "remaining_time": "3:07:05", "throughput": 19523.32, "total_tokens": 26905728} +{"current_steps": 8545, "total_steps": 78105, "loss": 0.5498, "lr": 4.998658649258517e-06, "epoch": 0.5470200371295052, "percentage": 10.94, "elapsed_time": "0:22:58", "remaining_time": "3:07:04", "throughput": 19525.0, "total_tokens": 26921280} +{"current_steps": 8550, "total_steps": 78105, "loss": 0.5778, "lr": 4.998640289045833e-06, "epoch": 0.5473401190704821, "percentage": 10.95, "elapsed_time": "0:22:59", "remaining_time": "3:07:02", "throughput": 19526.66, "total_tokens": 26936704} +{"current_steps": 8555, "total_steps": 78105, "loss": 0.4431, "lr": 4.998621804064068e-06, "epoch": 0.547660201011459, "percentage": 10.95, "elapsed_time": "0:23:00", "remaining_time": "3:07:00", "throughput": 19528.52, "total_tokens": 26952448} +{"current_steps": 8560, "total_steps": 78105, "loss": 0.4593, "lr": 4.9986031943141444e-06, "epoch": 0.5479802829524358, "percentage": 10.96, "elapsed_time": "0:23:00", "remaining_time": "3:06:58", "throughput": 19530.16, "total_tokens": 26967680} +{"current_steps": 8565, "total_steps": 78105, "loss": 0.4198, "lr": 4.998584459796992e-06, "epoch": 0.5483003648934127, "percentage": 10.97, "elapsed_time": "0:23:01", "remaining_time": "3:06:56", "throughput": 19531.63, "total_tokens": 26982400} +{"current_steps": 8570, "total_steps": 78105, "loss": 0.4486, "lr": 4.998565600513546e-06, "epoch": 0.5486204468343896, "percentage": 10.97, "elapsed_time": "0:23:02", "remaining_time": "3:06:54", "throughput": 19533.58, "total_tokens": 26998400} +{"current_steps": 8575, "total_steps": 78105, "loss": 0.4721, "lr": 4.99854661646475e-06, "epoch": 0.5489405287753665, "percentage": 10.98, "elapsed_time": "0:23:02", "remaining_time": "3:06:53", "throughput": 19535.84, "total_tokens": 27015936} +{"current_steps": 8580, "total_steps": 78105, "loss": 0.3714, "lr": 4.998527507651549e-06, "epoch": 0.5492606107163434, "percentage": 10.99, "elapsed_time": "0:23:03", "remaining_time": "3:06:50", "throughput": 19537.33, "total_tokens": 27030528} +{"current_steps": 8585, "total_steps": 78105, "loss": 0.5324, "lr": 4.9985082740748996e-06, "epoch": 0.5495806926573202, "percentage": 10.99, "elapsed_time": "0:23:04", "remaining_time": "3:06:49", "throughput": 19539.08, "total_tokens": 27046080} +{"current_steps": 8590, "total_steps": 78105, "loss": 0.545, "lr": 4.998488915735761e-06, "epoch": 0.5499007745982971, "percentage": 11.0, "elapsed_time": "0:23:04", "remaining_time": "3:06:47", "throughput": 19540.76, "total_tokens": 27061504} +{"current_steps": 8595, "total_steps": 78105, "loss": 0.4747, "lr": 4.9984694326351e-06, "epoch": 0.5502208565392741, "percentage": 11.0, "elapsed_time": "0:23:05", "remaining_time": "3:06:45", "throughput": 19542.49, "total_tokens": 27077056} +{"current_steps": 8600, "total_steps": 78105, "loss": 0.4366, "lr": 4.998449824773889e-06, "epoch": 0.550540938480251, "percentage": 11.01, "elapsed_time": "0:23:06", "remaining_time": "3:06:43", "throughput": 19545.06, "total_tokens": 27095168} +{"current_steps": 8605, "total_steps": 78105, "loss": 0.4849, "lr": 4.998430092153108e-06, "epoch": 0.5508610204212279, "percentage": 11.02, "elapsed_time": "0:23:06", "remaining_time": "3:06:41", "throughput": 19546.64, "total_tokens": 27110144} +{"current_steps": 8610, "total_steps": 78105, "loss": 0.6933, "lr": 4.9984102347737425e-06, "epoch": 0.5511811023622047, "percentage": 11.02, "elapsed_time": "0:23:07", "remaining_time": "3:06:39", "throughput": 19548.13, "total_tokens": 27124864} +{"current_steps": 8615, "total_steps": 78105, "loss": 0.4974, "lr": 4.998390252636783e-06, "epoch": 0.5515011843031816, "percentage": 11.03, "elapsed_time": "0:23:08", "remaining_time": "3:06:37", "throughput": 19549.76, "total_tokens": 27140160} +{"current_steps": 8620, "total_steps": 78105, "loss": 0.5157, "lr": 4.998370145743229e-06, "epoch": 0.5518212662441585, "percentage": 11.04, "elapsed_time": "0:23:08", "remaining_time": "3:06:36", "throughput": 19551.42, "total_tokens": 27155712} +{"current_steps": 8625, "total_steps": 78105, "loss": 0.5579, "lr": 4.998349914094082e-06, "epoch": 0.5521413481851354, "percentage": 11.04, "elapsed_time": "0:23:09", "remaining_time": "3:06:34", "throughput": 19553.31, "total_tokens": 27171520} +{"current_steps": 8630, "total_steps": 78105, "loss": 0.4922, "lr": 4.998329557690354e-06, "epoch": 0.5524614301261123, "percentage": 11.05, "elapsed_time": "0:23:10", "remaining_time": "3:06:32", "throughput": 19554.99, "total_tokens": 27186752} +{"current_steps": 8635, "total_steps": 78105, "loss": 0.4771, "lr": 4.998309076533062e-06, "epoch": 0.5527815120670891, "percentage": 11.06, "elapsed_time": "0:23:10", "remaining_time": "3:06:30", "throughput": 19556.55, "total_tokens": 27201664} +{"current_steps": 8640, "total_steps": 78105, "loss": 0.6043, "lr": 4.998288470623228e-06, "epoch": 0.553101594008066, "percentage": 11.06, "elapsed_time": "0:23:11", "remaining_time": "3:06:28", "throughput": 19558.78, "total_tokens": 27218880} +{"current_steps": 8645, "total_steps": 78105, "loss": 0.4912, "lr": 4.9982677399618805e-06, "epoch": 0.5534216759490429, "percentage": 11.07, "elapsed_time": "0:23:12", "remaining_time": "3:06:27", "throughput": 19560.82, "total_tokens": 27235648} +{"current_steps": 8650, "total_steps": 78105, "loss": 0.4957, "lr": 4.998246884550056e-06, "epoch": 0.5537417578900199, "percentage": 11.07, "elapsed_time": "0:23:13", "remaining_time": "3:06:25", "throughput": 19562.43, "total_tokens": 27250880} +{"current_steps": 8655, "total_steps": 78105, "loss": 0.3812, "lr": 4.998225904388793e-06, "epoch": 0.5540618398309968, "percentage": 11.08, "elapsed_time": "0:23:13", "remaining_time": "3:06:23", "throughput": 19564.27, "total_tokens": 27266880} +{"current_steps": 8660, "total_steps": 78105, "loss": 0.6054, "lr": 4.998204799479142e-06, "epoch": 0.5543819217719737, "percentage": 11.09, "elapsed_time": "0:23:14", "remaining_time": "3:06:21", "throughput": 19566.09, "total_tokens": 27282816} +{"current_steps": 8665, "total_steps": 78105, "loss": 0.5391, "lr": 4.998183569822156e-06, "epoch": 0.5547020037129505, "percentage": 11.09, "elapsed_time": "0:23:15", "remaining_time": "3:06:19", "throughput": 19567.82, "total_tokens": 27298496} +{"current_steps": 8670, "total_steps": 78105, "loss": 0.4191, "lr": 4.998162215418896e-06, "epoch": 0.5550220856539274, "percentage": 11.1, "elapsed_time": "0:23:15", "remaining_time": "3:06:17", "throughput": 19569.48, "total_tokens": 27313856} +{"current_steps": 8675, "total_steps": 78105, "loss": 0.3481, "lr": 4.998140736270426e-06, "epoch": 0.5553421675949043, "percentage": 11.11, "elapsed_time": "0:23:16", "remaining_time": "3:06:15", "throughput": 19570.97, "total_tokens": 27328448} +{"current_steps": 8680, "total_steps": 78105, "loss": 0.4723, "lr": 4.998119132377821e-06, "epoch": 0.5556622495358812, "percentage": 11.11, "elapsed_time": "0:23:17", "remaining_time": "3:06:13", "throughput": 19572.66, "total_tokens": 27343872} +{"current_steps": 8685, "total_steps": 78105, "loss": 0.6518, "lr": 4.998097403742158e-06, "epoch": 0.555982331476858, "percentage": 11.12, "elapsed_time": "0:23:17", "remaining_time": "3:06:11", "throughput": 19574.17, "total_tokens": 27358720} +{"current_steps": 8690, "total_steps": 78105, "loss": 0.4923, "lr": 4.998075550364523e-06, "epoch": 0.5563024134178349, "percentage": 11.13, "elapsed_time": "0:23:18", "remaining_time": "3:06:09", "throughput": 19575.58, "total_tokens": 27373312} +{"current_steps": 8695, "total_steps": 78105, "loss": 0.438, "lr": 4.998053572246007e-06, "epoch": 0.5566224953588118, "percentage": 11.13, "elapsed_time": "0:23:19", "remaining_time": "3:06:07", "throughput": 19577.19, "total_tokens": 27388544} +{"current_steps": 8700, "total_steps": 78105, "loss": 0.4999, "lr": 4.998031469387709e-06, "epoch": 0.5569425772997888, "percentage": 11.14, "elapsed_time": "0:23:19", "remaining_time": "3:06:06", "throughput": 19579.19, "total_tokens": 27405120} +{"current_steps": 8705, "total_steps": 78105, "loss": 0.4881, "lr": 4.998009241790729e-06, "epoch": 0.5572626592407657, "percentage": 11.15, "elapsed_time": "0:23:20", "remaining_time": "3:06:04", "throughput": 19580.8, "total_tokens": 27420544} +{"current_steps": 8710, "total_steps": 78105, "loss": 0.4088, "lr": 4.99798688945618e-06, "epoch": 0.5575827411817426, "percentage": 11.15, "elapsed_time": "0:23:21", "remaining_time": "3:06:02", "throughput": 19582.41, "total_tokens": 27435456} +{"current_steps": 8715, "total_steps": 78105, "loss": 0.3932, "lr": 4.9979644123851776e-06, "epoch": 0.5579028231227194, "percentage": 11.16, "elapsed_time": "0:23:21", "remaining_time": "3:06:00", "throughput": 19584.38, "total_tokens": 27452032} +{"current_steps": 8720, "total_steps": 78105, "loss": 0.6223, "lr": 4.997941810578843e-06, "epoch": 0.5582229050636963, "percentage": 11.16, "elapsed_time": "0:23:22", "remaining_time": "3:05:58", "throughput": 19586.15, "total_tokens": 27467520} +{"current_steps": 8725, "total_steps": 78105, "loss": 0.3775, "lr": 4.9979190840383065e-06, "epoch": 0.5585429870046732, "percentage": 11.17, "elapsed_time": "0:23:23", "remaining_time": "3:05:56", "throughput": 19587.78, "total_tokens": 27482816} +{"current_steps": 8730, "total_steps": 78105, "loss": 0.4589, "lr": 4.997896232764702e-06, "epoch": 0.5588630689456501, "percentage": 11.18, "elapsed_time": "0:23:23", "remaining_time": "3:05:54", "throughput": 19589.27, "total_tokens": 27497600} +{"current_steps": 8735, "total_steps": 78105, "loss": 0.4818, "lr": 4.997873256759171e-06, "epoch": 0.559183150886627, "percentage": 11.18, "elapsed_time": "0:23:24", "remaining_time": "3:05:53", "throughput": 19590.95, "total_tokens": 27513344} +{"current_steps": 8740, "total_steps": 78105, "loss": 0.4708, "lr": 4.997850156022859e-06, "epoch": 0.5595032328276038, "percentage": 11.19, "elapsed_time": "0:23:25", "remaining_time": "3:05:51", "throughput": 19593.09, "total_tokens": 27530304} +{"current_steps": 8745, "total_steps": 78105, "loss": 0.5141, "lr": 4.997826930556922e-06, "epoch": 0.5598233147685807, "percentage": 11.2, "elapsed_time": "0:23:25", "remaining_time": "3:05:49", "throughput": 19594.66, "total_tokens": 27545664} +{"current_steps": 8750, "total_steps": 78105, "loss": 0.3639, "lr": 4.997803580362519e-06, "epoch": 0.5601433967095576, "percentage": 11.2, "elapsed_time": "0:23:26", "remaining_time": "3:05:47", "throughput": 19596.23, "total_tokens": 27560704} +{"current_steps": 8755, "total_steps": 78105, "loss": 0.6498, "lr": 4.997780105440816e-06, "epoch": 0.5604634786505346, "percentage": 11.21, "elapsed_time": "0:23:27", "remaining_time": "3:05:46", "throughput": 19598.17, "total_tokens": 27577472} +{"current_steps": 8760, "total_steps": 78105, "loss": 0.4584, "lr": 4.9977565057929845e-06, "epoch": 0.5607835605915115, "percentage": 11.22, "elapsed_time": "0:23:27", "remaining_time": "3:05:44", "throughput": 19599.89, "total_tokens": 27592960} +{"current_steps": 8765, "total_steps": 78105, "loss": 0.4441, "lr": 4.9977327814202036e-06, "epoch": 0.5611036425324883, "percentage": 11.22, "elapsed_time": "0:23:28", "remaining_time": "3:05:42", "throughput": 19602.14, "total_tokens": 27610304} +{"current_steps": 8770, "total_steps": 78105, "loss": 0.7925, "lr": 4.9977089323236575e-06, "epoch": 0.5614237244734652, "percentage": 11.23, "elapsed_time": "0:23:29", "remaining_time": "3:05:41", "throughput": 19604.59, "total_tokens": 27628288} +{"current_steps": 8775, "total_steps": 78105, "loss": 0.5821, "lr": 4.997684958504537e-06, "epoch": 0.5617438064144421, "percentage": 11.23, "elapsed_time": "0:23:29", "remaining_time": "3:05:39", "throughput": 19606.33, "total_tokens": 27643904} +{"current_steps": 8780, "total_steps": 78105, "loss": 0.5352, "lr": 4.99766085996404e-06, "epoch": 0.562063888355419, "percentage": 11.24, "elapsed_time": "0:23:30", "remaining_time": "3:05:38", "throughput": 19608.22, "total_tokens": 27660224} +{"current_steps": 8785, "total_steps": 78105, "loss": 0.4474, "lr": 4.997636636703371e-06, "epoch": 0.5623839702963959, "percentage": 11.25, "elapsed_time": "0:23:31", "remaining_time": "3:05:36", "throughput": 19609.77, "total_tokens": 27675136} +{"current_steps": 8790, "total_steps": 78105, "loss": 0.4169, "lr": 4.997612288723736e-06, "epoch": 0.5627040522373727, "percentage": 11.25, "elapsed_time": "0:23:31", "remaining_time": "3:05:34", "throughput": 19611.32, "total_tokens": 27690176} +{"current_steps": 8795, "total_steps": 78105, "loss": 0.5149, "lr": 4.997587816026355e-06, "epoch": 0.5630241341783496, "percentage": 11.26, "elapsed_time": "0:23:32", "remaining_time": "3:05:32", "throughput": 19612.67, "total_tokens": 27704512} +{"current_steps": 8800, "total_steps": 78105, "loss": 0.6121, "lr": 4.997563218612446e-06, "epoch": 0.5633442161193265, "percentage": 11.27, "elapsed_time": "0:23:33", "remaining_time": "3:05:30", "throughput": 19614.48, "total_tokens": 27720448} +{"current_steps": 8805, "total_steps": 78105, "loss": 0.6136, "lr": 4.99753849648324e-06, "epoch": 0.5636642980603035, "percentage": 11.27, "elapsed_time": "0:23:33", "remaining_time": "3:05:28", "throughput": 19616.2, "total_tokens": 27735808} +{"current_steps": 8810, "total_steps": 78105, "loss": 0.4028, "lr": 4.9975136496399705e-06, "epoch": 0.5639843800012804, "percentage": 11.28, "elapsed_time": "0:23:34", "remaining_time": "3:05:26", "throughput": 19617.62, "total_tokens": 27750464} +{"current_steps": 8815, "total_steps": 78105, "loss": 0.4814, "lr": 4.997488678083879e-06, "epoch": 0.5643044619422573, "percentage": 11.29, "elapsed_time": "0:23:35", "remaining_time": "3:05:24", "throughput": 19618.83, "total_tokens": 27764352} +{"current_steps": 8820, "total_steps": 78105, "loss": 0.4174, "lr": 4.997463581816211e-06, "epoch": 0.5646245438832341, "percentage": 11.29, "elapsed_time": "0:23:35", "remaining_time": "3:05:22", "throughput": 19620.39, "total_tokens": 27779840} +{"current_steps": 8825, "total_steps": 78105, "loss": 0.3541, "lr": 4.997438360838221e-06, "epoch": 0.564944625824211, "percentage": 11.3, "elapsed_time": "0:23:36", "remaining_time": "3:05:20", "throughput": 19622.34, "total_tokens": 27796352} +{"current_steps": 8830, "total_steps": 78105, "loss": 0.5327, "lr": 4.9974130151511675e-06, "epoch": 0.5652647077651879, "percentage": 11.31, "elapsed_time": "0:23:37", "remaining_time": "3:05:18", "throughput": 19623.66, "total_tokens": 27810624} +{"current_steps": 8835, "total_steps": 78105, "loss": 0.4414, "lr": 4.997387544756317e-06, "epoch": 0.5655847897061648, "percentage": 11.31, "elapsed_time": "0:23:37", "remaining_time": "3:05:16", "throughput": 19625.09, "total_tokens": 27825152} +{"current_steps": 8840, "total_steps": 78105, "loss": 0.6232, "lr": 4.997361949654941e-06, "epoch": 0.5659048716471416, "percentage": 11.32, "elapsed_time": "0:23:38", "remaining_time": "3:05:14", "throughput": 19626.74, "total_tokens": 27840384} +{"current_steps": 8845, "total_steps": 78105, "loss": 0.4049, "lr": 4.9973362298483175e-06, "epoch": 0.5662249535881185, "percentage": 11.32, "elapsed_time": "0:23:39", "remaining_time": "3:05:12", "throughput": 19628.55, "total_tokens": 27856640} +{"current_steps": 8850, "total_steps": 78105, "loss": 0.3466, "lr": 4.99731038533773e-06, "epoch": 0.5665450355290954, "percentage": 11.33, "elapsed_time": "0:23:39", "remaining_time": "3:05:11", "throughput": 19630.38, "total_tokens": 27872768} +{"current_steps": 8855, "total_steps": 78105, "loss": 0.597, "lr": 4.997284416124471e-06, "epoch": 0.5668651174700723, "percentage": 11.34, "elapsed_time": "0:23:40", "remaining_time": "3:05:09", "throughput": 19631.76, "total_tokens": 27887488} +{"current_steps": 8860, "total_steps": 78105, "loss": 0.4015, "lr": 4.997258322209836e-06, "epoch": 0.5671851994110493, "percentage": 11.34, "elapsed_time": "0:23:41", "remaining_time": "3:05:07", "throughput": 19633.25, "total_tokens": 27902720} +{"current_steps": 8865, "total_steps": 78105, "loss": 0.4278, "lr": 4.997232103595127e-06, "epoch": 0.5675052813520262, "percentage": 11.35, "elapsed_time": "0:23:41", "remaining_time": "3:05:05", "throughput": 19634.87, "total_tokens": 27918080} +{"current_steps": 8870, "total_steps": 78105, "loss": 0.5455, "lr": 4.997205760281656e-06, "epoch": 0.567825363293003, "percentage": 11.36, "elapsed_time": "0:23:42", "remaining_time": "3:05:03", "throughput": 19636.7, "total_tokens": 27933760} +{"current_steps": 8875, "total_steps": 78105, "loss": 0.5835, "lr": 4.997179292270736e-06, "epoch": 0.5681454452339799, "percentage": 11.36, "elapsed_time": "0:23:43", "remaining_time": "3:05:01", "throughput": 19638.22, "total_tokens": 27948992} +{"current_steps": 8880, "total_steps": 78105, "loss": 0.5552, "lr": 4.997152699563689e-06, "epoch": 0.5684655271749568, "percentage": 11.37, "elapsed_time": "0:23:43", "remaining_time": "3:05:00", "throughput": 19640.46, "total_tokens": 27966848} +{"current_steps": 8885, "total_steps": 78105, "loss": 0.4773, "lr": 4.997125982161845e-06, "epoch": 0.5687856091159337, "percentage": 11.38, "elapsed_time": "0:23:44", "remaining_time": "3:04:58", "throughput": 19641.83, "total_tokens": 27981696} +{"current_steps": 8890, "total_steps": 78105, "loss": 0.4981, "lr": 4.997099140066535e-06, "epoch": 0.5691056910569106, "percentage": 11.38, "elapsed_time": "0:23:45", "remaining_time": "3:04:56", "throughput": 19643.5, "total_tokens": 27997376} +{"current_steps": 8895, "total_steps": 78105, "loss": 0.4624, "lr": 4.9970721732791005e-06, "epoch": 0.5694257729978874, "percentage": 11.39, "elapsed_time": "0:23:45", "remaining_time": "3:04:55", "throughput": 19645.46, "total_tokens": 28014016} +{"current_steps": 8900, "total_steps": 78105, "loss": 0.5496, "lr": 4.99704508180089e-06, "epoch": 0.5697458549388643, "percentage": 11.39, "elapsed_time": "0:23:46", "remaining_time": "3:04:53", "throughput": 19647.3, "total_tokens": 28030400} +{"current_steps": 8905, "total_steps": 78105, "loss": 0.456, "lr": 4.997017865633253e-06, "epoch": 0.5700659368798412, "percentage": 11.4, "elapsed_time": "0:23:47", "remaining_time": "3:04:52", "throughput": 19649.39, "total_tokens": 28047104} +{"current_steps": 8910, "total_steps": 78105, "loss": 0.5433, "lr": 4.996990524777552e-06, "epoch": 0.5703860188208181, "percentage": 11.41, "elapsed_time": "0:23:48", "remaining_time": "3:04:50", "throughput": 19651.24, "total_tokens": 28063168} +{"current_steps": 8915, "total_steps": 78105, "loss": 0.593, "lr": 4.99696305923515e-06, "epoch": 0.5707061007617951, "percentage": 11.41, "elapsed_time": "0:23:48", "remaining_time": "3:04:48", "throughput": 19652.93, "total_tokens": 28079104} +{"current_steps": 8920, "total_steps": 78105, "loss": 0.5787, "lr": 4.996935469007418e-06, "epoch": 0.571026182702772, "percentage": 11.42, "elapsed_time": "0:23:49", "remaining_time": "3:04:46", "throughput": 19654.62, "total_tokens": 28094720} +{"current_steps": 8925, "total_steps": 78105, "loss": 0.4108, "lr": 4.996907754095735e-06, "epoch": 0.5713462646437488, "percentage": 11.43, "elapsed_time": "0:23:50", "remaining_time": "3:04:45", "throughput": 19656.42, "total_tokens": 28110848} +{"current_steps": 8930, "total_steps": 78105, "loss": 0.5035, "lr": 4.996879914501486e-06, "epoch": 0.5716663465847257, "percentage": 11.43, "elapsed_time": "0:23:50", "remaining_time": "3:04:43", "throughput": 19658.24, "total_tokens": 28127040} +{"current_steps": 8935, "total_steps": 78105, "loss": 0.4915, "lr": 4.996851950226059e-06, "epoch": 0.5719864285257026, "percentage": 11.44, "elapsed_time": "0:23:51", "remaining_time": "3:04:41", "throughput": 19660.18, "total_tokens": 28143424} +{"current_steps": 8940, "total_steps": 78105, "loss": 0.7354, "lr": 4.996823861270851e-06, "epoch": 0.5723065104666795, "percentage": 11.45, "elapsed_time": "0:23:52", "remaining_time": "3:04:39", "throughput": 19661.69, "total_tokens": 28158528} +{"current_steps": 8945, "total_steps": 78105, "loss": 0.5087, "lr": 4.9967956476372645e-06, "epoch": 0.5726265924076563, "percentage": 11.45, "elapsed_time": "0:23:52", "remaining_time": "3:04:38", "throughput": 19663.26, "total_tokens": 28173888} +{"current_steps": 8950, "total_steps": 78105, "loss": 0.5592, "lr": 4.99676730932671e-06, "epoch": 0.5729466743486332, "percentage": 11.46, "elapsed_time": "0:23:53", "remaining_time": "3:04:36", "throughput": 19665.53, "total_tokens": 28191872} +{"current_steps": 8955, "total_steps": 78105, "loss": 0.6421, "lr": 4.996738846340601e-06, "epoch": 0.5732667562896101, "percentage": 11.47, "elapsed_time": "0:23:54", "remaining_time": "3:04:34", "throughput": 19667.06, "total_tokens": 28206912} +{"current_steps": 8960, "total_steps": 78105, "loss": 0.618, "lr": 4.996710258680358e-06, "epoch": 0.573586838230587, "percentage": 11.47, "elapsed_time": "0:23:54", "remaining_time": "3:04:33", "throughput": 19668.7, "total_tokens": 28222336} +{"current_steps": 8965, "total_steps": 78105, "loss": 0.4461, "lr": 4.99668154634741e-06, "epoch": 0.573906920171564, "percentage": 11.48, "elapsed_time": "0:23:55", "remaining_time": "3:04:31", "throughput": 19670.35, "total_tokens": 28238144} +{"current_steps": 8970, "total_steps": 78105, "loss": 0.3794, "lr": 4.996652709343191e-06, "epoch": 0.5742270021125409, "percentage": 11.48, "elapsed_time": "0:23:56", "remaining_time": "3:04:29", "throughput": 19671.87, "total_tokens": 28253376} +{"current_steps": 8975, "total_steps": 78105, "loss": 0.5317, "lr": 4.9966237476691395e-06, "epoch": 0.5745470840535177, "percentage": 11.49, "elapsed_time": "0:23:56", "remaining_time": "3:04:27", "throughput": 19673.69, "total_tokens": 28269248} +{"current_steps": 8980, "total_steps": 78105, "loss": 0.4716, "lr": 4.996594661326702e-06, "epoch": 0.5748671659944946, "percentage": 11.5, "elapsed_time": "0:23:57", "remaining_time": "3:04:26", "throughput": 19675.53, "total_tokens": 28285440} +{"current_steps": 8985, "total_steps": 78105, "loss": 0.3745, "lr": 4.996565450317333e-06, "epoch": 0.5751872479354715, "percentage": 11.5, "elapsed_time": "0:23:58", "remaining_time": "3:04:24", "throughput": 19677.04, "total_tokens": 28300736} +{"current_steps": 8990, "total_steps": 78105, "loss": 0.4753, "lr": 4.996536114642489e-06, "epoch": 0.5755073298764484, "percentage": 11.51, "elapsed_time": "0:23:58", "remaining_time": "3:04:22", "throughput": 19678.51, "total_tokens": 28315712} +{"current_steps": 8995, "total_steps": 78105, "loss": 0.5826, "lr": 4.996506654303634e-06, "epoch": 0.5758274118174252, "percentage": 11.52, "elapsed_time": "0:23:59", "remaining_time": "3:04:20", "throughput": 19680.27, "total_tokens": 28332032} +{"current_steps": 9000, "total_steps": 78105, "loss": 0.5248, "lr": 4.996477069302243e-06, "epoch": 0.5761474937584021, "percentage": 11.52, "elapsed_time": "0:24:00", "remaining_time": "3:04:19", "throughput": 19681.97, "total_tokens": 28347968} +{"current_steps": 9005, "total_steps": 78105, "loss": 0.4764, "lr": 4.996447359639789e-06, "epoch": 0.576467575699379, "percentage": 11.53, "elapsed_time": "0:24:00", "remaining_time": "3:04:17", "throughput": 19683.44, "total_tokens": 28362944} +{"current_steps": 9010, "total_steps": 78105, "loss": 0.4941, "lr": 4.996417525317757e-06, "epoch": 0.5767876576403559, "percentage": 11.54, "elapsed_time": "0:24:01", "remaining_time": "3:04:15", "throughput": 19685.11, "total_tokens": 28378624} +{"current_steps": 9015, "total_steps": 78105, "loss": 0.5076, "lr": 4.9963875663376385e-06, "epoch": 0.5771077395813328, "percentage": 11.54, "elapsed_time": "0:24:02", "remaining_time": "3:04:13", "throughput": 19686.67, "total_tokens": 28394048} +{"current_steps": 9020, "total_steps": 78105, "loss": 0.537, "lr": 4.996357482700927e-06, "epoch": 0.5774278215223098, "percentage": 11.55, "elapsed_time": "0:24:02", "remaining_time": "3:04:11", "throughput": 19688.21, "total_tokens": 28409664} +{"current_steps": 9025, "total_steps": 78105, "loss": 0.4628, "lr": 4.9963272744091275e-06, "epoch": 0.5777479034632866, "percentage": 11.55, "elapsed_time": "0:24:03", "remaining_time": "3:04:10", "throughput": 19689.66, "total_tokens": 28424768} +{"current_steps": 9030, "total_steps": 78105, "loss": 0.4314, "lr": 4.996296941463745e-06, "epoch": 0.5780679854042635, "percentage": 11.56, "elapsed_time": "0:24:04", "remaining_time": "3:04:08", "throughput": 19690.97, "total_tokens": 28439424} +{"current_steps": 9035, "total_steps": 78105, "loss": 0.6351, "lr": 4.9962664838662974e-06, "epoch": 0.5783880673452404, "percentage": 11.57, "elapsed_time": "0:24:04", "remaining_time": "3:04:06", "throughput": 19692.92, "total_tokens": 28456064} +{"current_steps": 9040, "total_steps": 78105, "loss": 0.5251, "lr": 4.996235901618304e-06, "epoch": 0.5787081492862173, "percentage": 11.57, "elapsed_time": "0:24:05", "remaining_time": "3:04:05", "throughput": 19694.85, "total_tokens": 28472768} +{"current_steps": 9045, "total_steps": 78105, "loss": 0.4512, "lr": 4.996205194721291e-06, "epoch": 0.5790282312271942, "percentage": 11.58, "elapsed_time": "0:24:06", "remaining_time": "3:04:03", "throughput": 19696.47, "total_tokens": 28488704} +{"current_steps": 9050, "total_steps": 78105, "loss": 0.5944, "lr": 4.9961743631767935e-06, "epoch": 0.579348313168171, "percentage": 11.59, "elapsed_time": "0:24:07", "remaining_time": "3:04:01", "throughput": 19697.91, "total_tokens": 28503808} +{"current_steps": 9055, "total_steps": 78105, "loss": 0.5982, "lr": 4.99614340698635e-06, "epoch": 0.5796683951091479, "percentage": 11.59, "elapsed_time": "0:24:07", "remaining_time": "3:03:59", "throughput": 19699.78, "total_tokens": 28520320} +{"current_steps": 9060, "total_steps": 78105, "loss": 0.5961, "lr": 4.9961123261515074e-06, "epoch": 0.5799884770501248, "percentage": 11.6, "elapsed_time": "0:24:08", "remaining_time": "3:03:58", "throughput": 19701.35, "total_tokens": 28535680} +{"current_steps": 9065, "total_steps": 78105, "loss": 0.4588, "lr": 4.996081120673817e-06, "epoch": 0.5803085589911017, "percentage": 11.61, "elapsed_time": "0:24:09", "remaining_time": "3:03:56", "throughput": 19702.96, "total_tokens": 28551488} +{"current_steps": 9070, "total_steps": 78105, "loss": 0.5326, "lr": 4.996049790554837e-06, "epoch": 0.5806286409320787, "percentage": 11.61, "elapsed_time": "0:24:09", "remaining_time": "3:03:54", "throughput": 19704.57, "total_tokens": 28567040} +{"current_steps": 9075, "total_steps": 78105, "loss": 0.4566, "lr": 4.996018335796131e-06, "epoch": 0.5809487228730555, "percentage": 11.62, "elapsed_time": "0:24:10", "remaining_time": "3:03:52", "throughput": 19705.75, "total_tokens": 28581568} +{"current_steps": 9080, "total_steps": 78105, "loss": 0.5191, "lr": 4.995986756399272e-06, "epoch": 0.5812688048140324, "percentage": 11.63, "elapsed_time": "0:24:11", "remaining_time": "3:03:50", "throughput": 19707.4, "total_tokens": 28596992} +{"current_steps": 9085, "total_steps": 78105, "loss": 0.7651, "lr": 4.995955052365834e-06, "epoch": 0.5815888867550093, "percentage": 11.63, "elapsed_time": "0:24:11", "remaining_time": "3:03:49", "throughput": 19709.43, "total_tokens": 28613952} +{"current_steps": 9090, "total_steps": 78105, "loss": 0.6473, "lr": 4.995923223697403e-06, "epoch": 0.5819089686959862, "percentage": 11.64, "elapsed_time": "0:24:12", "remaining_time": "3:03:47", "throughput": 19710.84, "total_tokens": 28628736} +{"current_steps": 9095, "total_steps": 78105, "loss": 0.5631, "lr": 4.995891270395566e-06, "epoch": 0.5822290506369631, "percentage": 11.64, "elapsed_time": "0:24:13", "remaining_time": "3:03:45", "throughput": 19712.56, "total_tokens": 28644928} +{"current_steps": 9100, "total_steps": 78105, "loss": 0.4256, "lr": 4.99585919246192e-06, "epoch": 0.5825491325779399, "percentage": 11.65, "elapsed_time": "0:24:13", "remaining_time": "3:03:44", "throughput": 19714.28, "total_tokens": 28661184} +{"current_steps": 9105, "total_steps": 78105, "loss": 0.3767, "lr": 4.995826989898066e-06, "epoch": 0.5828692145189168, "percentage": 11.66, "elapsed_time": "0:24:14", "remaining_time": "3:03:42", "throughput": 19715.64, "total_tokens": 28675776} +{"current_steps": 9110, "total_steps": 78105, "loss": 0.49, "lr": 4.995794662705613e-06, "epoch": 0.5831892964598937, "percentage": 11.66, "elapsed_time": "0:24:15", "remaining_time": "3:03:40", "throughput": 19717.38, "total_tokens": 28692160} +{"current_steps": 9115, "total_steps": 78105, "loss": 0.361, "lr": 4.995762210886175e-06, "epoch": 0.5835093784008706, "percentage": 11.67, "elapsed_time": "0:24:15", "remaining_time": "3:03:38", "throughput": 19718.81, "total_tokens": 28707328} +{"current_steps": 9120, "total_steps": 78105, "loss": 0.4557, "lr": 4.995729634441371e-06, "epoch": 0.5838294603418475, "percentage": 11.68, "elapsed_time": "0:24:16", "remaining_time": "3:03:37", "throughput": 19720.28, "total_tokens": 28722624} +{"current_steps": 9125, "total_steps": 78105, "loss": 0.4721, "lr": 4.995696933372829e-06, "epoch": 0.5841495422828245, "percentage": 11.68, "elapsed_time": "0:24:17", "remaining_time": "3:03:35", "throughput": 19721.72, "total_tokens": 28737856} +{"current_steps": 9130, "total_steps": 78105, "loss": 0.5019, "lr": 4.995664107682182e-06, "epoch": 0.5844696242238013, "percentage": 11.69, "elapsed_time": "0:24:17", "remaining_time": "3:03:33", "throughput": 19723.09, "total_tokens": 28752448} +{"current_steps": 9135, "total_steps": 78105, "loss": 0.4909, "lr": 4.995631157371069e-06, "epoch": 0.5847897061647782, "percentage": 11.7, "elapsed_time": "0:24:18", "remaining_time": "3:03:31", "throughput": 19724.6, "total_tokens": 28767616} +{"current_steps": 9140, "total_steps": 78105, "loss": 0.5935, "lr": 4.995598082441136e-06, "epoch": 0.5851097881057551, "percentage": 11.7, "elapsed_time": "0:24:19", "remaining_time": "3:03:29", "throughput": 19725.92, "total_tokens": 28782400} +{"current_steps": 9145, "total_steps": 78105, "loss": 0.6664, "lr": 4.995564882894033e-06, "epoch": 0.585429870046732, "percentage": 11.71, "elapsed_time": "0:24:19", "remaining_time": "3:03:27", "throughput": 19727.57, "total_tokens": 28798336} +{"current_steps": 9150, "total_steps": 78105, "loss": 0.4209, "lr": 4.995531558731419e-06, "epoch": 0.5857499519877089, "percentage": 11.71, "elapsed_time": "0:24:20", "remaining_time": "3:03:26", "throughput": 19729.26, "total_tokens": 28814336} +{"current_steps": 9155, "total_steps": 78105, "loss": 0.7192, "lr": 4.995498109954957e-06, "epoch": 0.5860700339286857, "percentage": 11.72, "elapsed_time": "0:24:21", "remaining_time": "3:03:24", "throughput": 19731.14, "total_tokens": 28830656} +{"current_steps": 9160, "total_steps": 78105, "loss": 0.6507, "lr": 4.995464536566319e-06, "epoch": 0.5863901158696626, "percentage": 11.73, "elapsed_time": "0:24:21", "remaining_time": "3:03:23", "throughput": 19732.76, "total_tokens": 28846656} +{"current_steps": 9165, "total_steps": 78105, "loss": 0.5404, "lr": 4.99543083856718e-06, "epoch": 0.5867101978106395, "percentage": 11.73, "elapsed_time": "0:24:22", "remaining_time": "3:03:21", "throughput": 19734.56, "total_tokens": 28862848} +{"current_steps": 9170, "total_steps": 78105, "loss": 0.5889, "lr": 4.995397015959223e-06, "epoch": 0.5870302797516164, "percentage": 11.74, "elapsed_time": "0:24:23", "remaining_time": "3:03:19", "throughput": 19736.12, "total_tokens": 28878400} +{"current_steps": 9175, "total_steps": 78105, "loss": 0.7242, "lr": 4.995363068744137e-06, "epoch": 0.5873503616925934, "percentage": 11.75, "elapsed_time": "0:24:23", "remaining_time": "3:03:17", "throughput": 19737.56, "total_tokens": 28893568} +{"current_steps": 9180, "total_steps": 78105, "loss": 0.6035, "lr": 4.9953289969236174e-06, "epoch": 0.5876704436335702, "percentage": 11.75, "elapsed_time": "0:24:24", "remaining_time": "3:03:16", "throughput": 19739.33, "total_tokens": 28909824} +{"current_steps": 9185, "total_steps": 78105, "loss": 0.4631, "lr": 4.995294800499366e-06, "epoch": 0.5879905255745471, "percentage": 11.76, "elapsed_time": "0:24:25", "remaining_time": "3:03:14", "throughput": 19740.94, "total_tokens": 28925568} +{"current_steps": 9190, "total_steps": 78105, "loss": 0.4697, "lr": 4.995260479473089e-06, "epoch": 0.588310607515524, "percentage": 11.77, "elapsed_time": "0:24:25", "remaining_time": "3:03:12", "throughput": 19742.44, "total_tokens": 28941056} +{"current_steps": 9195, "total_steps": 78105, "loss": 0.5873, "lr": 4.995226033846501e-06, "epoch": 0.5886306894565009, "percentage": 11.77, "elapsed_time": "0:24:26", "remaining_time": "3:03:10", "throughput": 19743.63, "total_tokens": 28955456} +{"current_steps": 9200, "total_steps": 78105, "loss": 0.4355, "lr": 4.9951914636213225e-06, "epoch": 0.5889507713974778, "percentage": 11.78, "elapsed_time": "0:24:27", "remaining_time": "3:03:09", "throughput": 19745.16, "total_tokens": 28971136} +{"current_steps": 9205, "total_steps": 78105, "loss": 0.5336, "lr": 4.995156768799279e-06, "epoch": 0.5892708533384546, "percentage": 11.79, "elapsed_time": "0:24:27", "remaining_time": "3:03:07", "throughput": 19747.17, "total_tokens": 28988480} +{"current_steps": 9210, "total_steps": 78105, "loss": 0.4822, "lr": 4.995121949382103e-06, "epoch": 0.5895909352794315, "percentage": 11.79, "elapsed_time": "0:24:28", "remaining_time": "3:03:06", "throughput": 19748.84, "total_tokens": 29004544} +{"current_steps": 9215, "total_steps": 78105, "loss": 0.5036, "lr": 4.995087005371534e-06, "epoch": 0.5899110172204084, "percentage": 11.8, "elapsed_time": "0:24:29", "remaining_time": "3:03:04", "throughput": 19750.17, "total_tokens": 29019328} +{"current_steps": 9220, "total_steps": 78105, "loss": 0.5889, "lr": 4.995051936769316e-06, "epoch": 0.5902310991613853, "percentage": 11.8, "elapsed_time": "0:24:29", "remaining_time": "3:03:02", "throughput": 19751.67, "total_tokens": 29034752} +{"current_steps": 9225, "total_steps": 78105, "loss": 0.6516, "lr": 4.9950167435772e-06, "epoch": 0.5905511811023622, "percentage": 11.81, "elapsed_time": "0:24:30", "remaining_time": "3:03:01", "throughput": 19753.26, "total_tokens": 29050816} +{"current_steps": 9230, "total_steps": 78105, "loss": 0.3739, "lr": 4.994981425796945e-06, "epoch": 0.5908712630433391, "percentage": 11.82, "elapsed_time": "0:24:31", "remaining_time": "3:02:59", "throughput": 19754.68, "total_tokens": 29065920} +{"current_steps": 9235, "total_steps": 78105, "loss": 0.6625, "lr": 4.994945983430313e-06, "epoch": 0.591191344984316, "percentage": 11.82, "elapsed_time": "0:24:31", "remaining_time": "3:02:57", "throughput": 19756.17, "total_tokens": 29080960} +{"current_steps": 9240, "total_steps": 78105, "loss": 0.611, "lr": 4.994910416479074e-06, "epoch": 0.5915114269252929, "percentage": 11.83, "elapsed_time": "0:24:32", "remaining_time": "3:02:55", "throughput": 19757.79, "total_tokens": 29096768} +{"current_steps": 9245, "total_steps": 78105, "loss": 0.6683, "lr": 4.994874724945005e-06, "epoch": 0.5918315088662698, "percentage": 11.84, "elapsed_time": "0:24:33", "remaining_time": "3:02:54", "throughput": 19759.4, "total_tokens": 29112448} +{"current_steps": 9250, "total_steps": 78105, "loss": 0.6542, "lr": 4.994838908829887e-06, "epoch": 0.5921515908072467, "percentage": 11.84, "elapsed_time": "0:24:34", "remaining_time": "3:02:52", "throughput": 19760.83, "total_tokens": 29127808} +{"current_steps": 9255, "total_steps": 78105, "loss": 0.4267, "lr": 4.994802968135509e-06, "epoch": 0.5924716727482235, "percentage": 11.85, "elapsed_time": "0:24:34", "remaining_time": "3:02:50", "throughput": 19762.21, "total_tokens": 29142912} +{"current_steps": 9260, "total_steps": 78105, "loss": 0.4546, "lr": 4.994766902863666e-06, "epoch": 0.5927917546892004, "percentage": 11.86, "elapsed_time": "0:24:35", "remaining_time": "3:02:48", "throughput": 19763.83, "total_tokens": 29159168} +{"current_steps": 9265, "total_steps": 78105, "loss": 0.5397, "lr": 4.9947307130161586e-06, "epoch": 0.5931118366301773, "percentage": 11.86, "elapsed_time": "0:24:36", "remaining_time": "3:02:47", "throughput": 19765.58, "total_tokens": 29175488} +{"current_steps": 9270, "total_steps": 78105, "loss": 0.5568, "lr": 4.994694398594794e-06, "epoch": 0.5934319185711542, "percentage": 11.87, "elapsed_time": "0:24:36", "remaining_time": "3:02:45", "throughput": 19767.11, "total_tokens": 29191104} +{"current_steps": 9275, "total_steps": 78105, "loss": 0.5317, "lr": 4.994657959601385e-06, "epoch": 0.5937520005121311, "percentage": 11.88, "elapsed_time": "0:24:37", "remaining_time": "3:02:43", "throughput": 19768.51, "total_tokens": 29206208} +{"current_steps": 9280, "total_steps": 78105, "loss": 0.5157, "lr": 4.994621396037752e-06, "epoch": 0.5940720824531079, "percentage": 11.88, "elapsed_time": "0:24:38", "remaining_time": "3:02:42", "throughput": 19769.84, "total_tokens": 29221312} +{"current_steps": 9285, "total_steps": 78105, "loss": 0.5074, "lr": 4.994584707905721e-06, "epoch": 0.5943921643940849, "percentage": 11.89, "elapsed_time": "0:24:38", "remaining_time": "3:02:40", "throughput": 19771.35, "total_tokens": 29236800} +{"current_steps": 9290, "total_steps": 78105, "loss": 0.6807, "lr": 4.994547895207123e-06, "epoch": 0.5947122463350618, "percentage": 11.89, "elapsed_time": "0:24:39", "remaining_time": "3:02:38", "throughput": 19772.98, "total_tokens": 29252480} +{"current_steps": 9295, "total_steps": 78105, "loss": 0.6824, "lr": 4.994510957943797e-06, "epoch": 0.5950323282760387, "percentage": 11.9, "elapsed_time": "0:24:40", "remaining_time": "3:02:36", "throughput": 19774.25, "total_tokens": 29266944} +{"current_steps": 9300, "total_steps": 78105, "loss": 0.501, "lr": 4.994473896117587e-06, "epoch": 0.5953524102170156, "percentage": 11.91, "elapsed_time": "0:24:40", "remaining_time": "3:02:35", "throughput": 19775.77, "total_tokens": 29282816} +{"current_steps": 9305, "total_steps": 78105, "loss": 0.3954, "lr": 4.994436709730344e-06, "epoch": 0.5956724921579925, "percentage": 11.91, "elapsed_time": "0:24:41", "remaining_time": "3:02:33", "throughput": 19777.14, "total_tokens": 29297984} +{"current_steps": 9310, "total_steps": 78105, "loss": 0.4635, "lr": 4.9943993987839245e-06, "epoch": 0.5959925740989693, "percentage": 11.92, "elapsed_time": "0:24:42", "remaining_time": "3:02:31", "throughput": 19778.76, "total_tokens": 29313664} +{"current_steps": 9315, "total_steps": 78105, "loss": 0.5479, "lr": 4.994361963280192e-06, "epoch": 0.5963126560399462, "percentage": 11.93, "elapsed_time": "0:24:42", "remaining_time": "3:02:29", "throughput": 19780.38, "total_tokens": 29329472} +{"current_steps": 9320, "total_steps": 78105, "loss": 0.5641, "lr": 4.994324403221017e-06, "epoch": 0.5966327379809231, "percentage": 11.93, "elapsed_time": "0:24:43", "remaining_time": "3:02:28", "throughput": 19782.28, "total_tokens": 29346048} +{"current_steps": 9325, "total_steps": 78105, "loss": 0.4047, "lr": 4.994286718608272e-06, "epoch": 0.5969528199219, "percentage": 11.94, "elapsed_time": "0:24:44", "remaining_time": "3:02:26", "throughput": 19783.73, "total_tokens": 29361664} +{"current_steps": 9330, "total_steps": 78105, "loss": 0.5308, "lr": 4.994248909443841e-06, "epoch": 0.5972729018628768, "percentage": 11.95, "elapsed_time": "0:24:44", "remaining_time": "3:02:25", "throughput": 19785.07, "total_tokens": 29376896} +{"current_steps": 9335, "total_steps": 78105, "loss": 0.54, "lr": 4.994210975729611e-06, "epoch": 0.5975929838038538, "percentage": 11.95, "elapsed_time": "0:24:45", "remaining_time": "3:02:23", "throughput": 19786.57, "total_tokens": 29392640} +{"current_steps": 9340, "total_steps": 78105, "loss": 0.5542, "lr": 4.994172917467478e-06, "epoch": 0.5979130657448307, "percentage": 11.96, "elapsed_time": "0:24:46", "remaining_time": "3:02:21", "throughput": 19787.87, "total_tokens": 29407808} +{"current_steps": 9345, "total_steps": 78105, "loss": 0.317, "lr": 4.994134734659341e-06, "epoch": 0.5982331476858076, "percentage": 11.96, "elapsed_time": "0:24:46", "remaining_time": "3:02:19", "throughput": 19789.1, "total_tokens": 29422528} +{"current_steps": 9350, "total_steps": 78105, "loss": 0.5401, "lr": 4.994096427307105e-06, "epoch": 0.5985532296267845, "percentage": 11.97, "elapsed_time": "0:24:47", "remaining_time": "3:02:18", "throughput": 19790.62, "total_tokens": 29438272} +{"current_steps": 9355, "total_steps": 78105, "loss": 0.6292, "lr": 4.994057995412687e-06, "epoch": 0.5988733115677614, "percentage": 11.98, "elapsed_time": "0:24:48", "remaining_time": "3:02:16", "throughput": 19792.01, "total_tokens": 29453376} +{"current_steps": 9360, "total_steps": 78105, "loss": 0.5249, "lr": 4.994019438978002e-06, "epoch": 0.5991933935087382, "percentage": 11.98, "elapsed_time": "0:24:48", "remaining_time": "3:02:14", "throughput": 19793.68, "total_tokens": 29469248} +{"current_steps": 9365, "total_steps": 78105, "loss": 0.4756, "lr": 4.993980758004979e-06, "epoch": 0.5995134754497151, "percentage": 11.99, "elapsed_time": "0:24:49", "remaining_time": "3:02:13", "throughput": 19795.43, "total_tokens": 29485888} +{"current_steps": 9370, "total_steps": 78105, "loss": 0.5408, "lr": 4.993941952495546e-06, "epoch": 0.599833557390692, "percentage": 12.0, "elapsed_time": "0:24:50", "remaining_time": "3:02:11", "throughput": 19796.72, "total_tokens": 29500864} +{"current_steps": 9375, "total_steps": 78105, "loss": 0.3278, "lr": 4.993903022451643e-06, "epoch": 0.6001536393316689, "percentage": 12.0, "elapsed_time": "0:24:50", "remaining_time": "3:02:09", "throughput": 19798.22, "total_tokens": 29516288} +{"current_steps": 9380, "total_steps": 78105, "loss": 0.4373, "lr": 4.993863967875213e-06, "epoch": 0.6004737212726458, "percentage": 12.01, "elapsed_time": "0:24:51", "remaining_time": "3:02:07", "throughput": 19799.39, "total_tokens": 29530880} +{"current_steps": 9385, "total_steps": 78105, "loss": 0.4586, "lr": 4.993824788768207e-06, "epoch": 0.6007938032136226, "percentage": 12.02, "elapsed_time": "0:24:52", "remaining_time": "3:02:06", "throughput": 19801.27, "total_tokens": 29547840} +{"current_steps": 9390, "total_steps": 78105, "loss": 0.4992, "lr": 4.99378548513258e-06, "epoch": 0.6011138851545996, "percentage": 12.02, "elapsed_time": "0:24:52", "remaining_time": "3:02:04", "throughput": 19802.93, "total_tokens": 29564096} +{"current_steps": 9395, "total_steps": 78105, "loss": 0.6268, "lr": 4.993746056970297e-06, "epoch": 0.6014339670955765, "percentage": 12.03, "elapsed_time": "0:24:53", "remaining_time": "3:02:03", "throughput": 19804.36, "total_tokens": 29579520} +{"current_steps": 9400, "total_steps": 78105, "loss": 0.5077, "lr": 4.993706504283324e-06, "epoch": 0.6017540490365534, "percentage": 12.04, "elapsed_time": "0:24:54", "remaining_time": "3:02:01", "throughput": 19805.64, "total_tokens": 29594048} +{"current_steps": 9405, "total_steps": 78105, "loss": 0.4874, "lr": 4.993666827073639e-06, "epoch": 0.6020741309775303, "percentage": 12.04, "elapsed_time": "0:24:54", "remaining_time": "3:01:59", "throughput": 19807.34, "total_tokens": 29610368} +{"current_steps": 9410, "total_steps": 78105, "loss": 0.5101, "lr": 4.99362702534322e-06, "epoch": 0.6023942129185071, "percentage": 12.05, "elapsed_time": "0:24:55", "remaining_time": "3:01:58", "throughput": 19808.79, "total_tokens": 29625792} +{"current_steps": 9415, "total_steps": 78105, "loss": 0.4477, "lr": 4.993587099094057e-06, "epoch": 0.602714294859484, "percentage": 12.05, "elapsed_time": "0:24:56", "remaining_time": "3:01:56", "throughput": 19810.44, "total_tokens": 29642240} +{"current_steps": 9420, "total_steps": 78105, "loss": 0.484, "lr": 4.993547048328143e-06, "epoch": 0.6030343768004609, "percentage": 12.06, "elapsed_time": "0:24:56", "remaining_time": "3:01:55", "throughput": 19812.01, "total_tokens": 29658176} +{"current_steps": 9425, "total_steps": 78105, "loss": 0.4888, "lr": 4.9935068730474774e-06, "epoch": 0.6033544587414378, "percentage": 12.07, "elapsed_time": "0:24:57", "remaining_time": "3:01:53", "throughput": 19814.01, "total_tokens": 29675392} +{"current_steps": 9430, "total_steps": 78105, "loss": 0.6311, "lr": 4.993466573254067e-06, "epoch": 0.6036745406824147, "percentage": 12.07, "elapsed_time": "0:24:58", "remaining_time": "3:01:51", "throughput": 19815.32, "total_tokens": 29690048} +{"current_steps": 9435, "total_steps": 78105, "loss": 0.4616, "lr": 4.993426148949924e-06, "epoch": 0.6039946226233915, "percentage": 12.08, "elapsed_time": "0:24:58", "remaining_time": "3:01:49", "throughput": 19816.71, "total_tokens": 29705088} +{"current_steps": 9440, "total_steps": 78105, "loss": 0.5847, "lr": 4.993385600137066e-06, "epoch": 0.6043147045643685, "percentage": 12.09, "elapsed_time": "0:24:59", "remaining_time": "3:01:48", "throughput": 19818.18, "total_tokens": 29720576} +{"current_steps": 9445, "total_steps": 78105, "loss": 0.5055, "lr": 4.99334492681752e-06, "epoch": 0.6046347865053454, "percentage": 12.09, "elapsed_time": "0:25:00", "remaining_time": "3:01:46", "throughput": 19819.89, "total_tokens": 29736896} +{"current_steps": 9450, "total_steps": 78105, "loss": 0.4512, "lr": 4.9933041289933145e-06, "epoch": 0.6049548684463223, "percentage": 12.1, "elapsed_time": "0:25:01", "remaining_time": "3:01:45", "throughput": 19821.41, "total_tokens": 29752768} +{"current_steps": 9455, "total_steps": 78105, "loss": 0.5214, "lr": 4.993263206666489e-06, "epoch": 0.6052749503872992, "percentage": 12.11, "elapsed_time": "0:25:01", "remaining_time": "3:01:43", "throughput": 19822.97, "total_tokens": 29768640} +{"current_steps": 9460, "total_steps": 78105, "loss": 0.4837, "lr": 4.993222159839086e-06, "epoch": 0.605595032328276, "percentage": 12.11, "elapsed_time": "0:25:02", "remaining_time": "3:01:42", "throughput": 19824.9, "total_tokens": 29786240} +{"current_steps": 9465, "total_steps": 78105, "loss": 0.6349, "lr": 4.9931809885131545e-06, "epoch": 0.6059151142692529, "percentage": 12.12, "elapsed_time": "0:25:03", "remaining_time": "3:01:40", "throughput": 19826.44, "total_tokens": 29802176} +{"current_steps": 9470, "total_steps": 78105, "loss": 0.4686, "lr": 4.99313969269075e-06, "epoch": 0.6062351962102298, "percentage": 12.12, "elapsed_time": "0:25:03", "remaining_time": "3:01:39", "throughput": 19827.89, "total_tokens": 29817600} +{"current_steps": 9475, "total_steps": 78105, "loss": 0.5119, "lr": 4.993098272373937e-06, "epoch": 0.6065552781512067, "percentage": 12.13, "elapsed_time": "0:25:04", "remaining_time": "3:01:37", "throughput": 19829.34, "total_tokens": 29832960} +{"current_steps": 9480, "total_steps": 78105, "loss": 0.3367, "lr": 4.993056727564782e-06, "epoch": 0.6068753600921836, "percentage": 12.14, "elapsed_time": "0:25:05", "remaining_time": "3:01:35", "throughput": 19830.73, "total_tokens": 29848448} +{"current_steps": 9485, "total_steps": 78105, "loss": 0.4875, "lr": 4.99301505826536e-06, "epoch": 0.6071954420331604, "percentage": 12.14, "elapsed_time": "0:25:05", "remaining_time": "3:01:33", "throughput": 19832.07, "total_tokens": 29863296} +{"current_steps": 9490, "total_steps": 78105, "loss": 0.4788, "lr": 4.992973264477752e-06, "epoch": 0.6075155239741373, "percentage": 12.15, "elapsed_time": "0:25:06", "remaining_time": "3:01:32", "throughput": 19833.88, "total_tokens": 29880128} +{"current_steps": 9495, "total_steps": 78105, "loss": 0.4658, "lr": 4.9929313462040435e-06, "epoch": 0.6078356059151143, "percentage": 12.16, "elapsed_time": "0:25:07", "remaining_time": "3:01:30", "throughput": 19835.34, "total_tokens": 29895296} +{"current_steps": 9500, "total_steps": 78105, "loss": 0.5559, "lr": 4.992889303446329e-06, "epoch": 0.6081556878560912, "percentage": 12.16, "elapsed_time": "0:25:07", "remaining_time": "3:01:29", "throughput": 19836.94, "total_tokens": 29911296} +{"current_steps": 9505, "total_steps": 78105, "loss": 0.598, "lr": 4.992847136206708e-06, "epoch": 0.6084757697970681, "percentage": 12.17, "elapsed_time": "0:25:08", "remaining_time": "3:01:27", "throughput": 19838.5, "total_tokens": 29926912} +{"current_steps": 9510, "total_steps": 78105, "loss": 0.6065, "lr": 4.9928048444872854e-06, "epoch": 0.608795851738045, "percentage": 12.18, "elapsed_time": "0:25:09", "remaining_time": "3:01:25", "throughput": 19839.96, "total_tokens": 29942272} +{"current_steps": 9515, "total_steps": 78105, "loss": 0.5619, "lr": 4.992762428290174e-06, "epoch": 0.6091159336790218, "percentage": 12.18, "elapsed_time": "0:25:09", "remaining_time": "3:01:24", "throughput": 19841.35, "total_tokens": 29958016} +{"current_steps": 9520, "total_steps": 78105, "loss": 0.595, "lr": 4.992719887617491e-06, "epoch": 0.6094360156199987, "percentage": 12.19, "elapsed_time": "0:25:10", "remaining_time": "3:01:22", "throughput": 19843.12, "total_tokens": 29974720} +{"current_steps": 9525, "total_steps": 78105, "loss": 0.5034, "lr": 4.992677222471361e-06, "epoch": 0.6097560975609756, "percentage": 12.2, "elapsed_time": "0:25:11", "remaining_time": "3:01:21", "throughput": 19844.92, "total_tokens": 29991680} +{"current_steps": 9530, "total_steps": 78105, "loss": 0.3773, "lr": 4.992634432853914e-06, "epoch": 0.6100761795019525, "percentage": 12.2, "elapsed_time": "0:25:11", "remaining_time": "3:01:19", "throughput": 19846.39, "total_tokens": 30007168} +{"current_steps": 9535, "total_steps": 78105, "loss": 0.4996, "lr": 4.992591518767288e-06, "epoch": 0.6103962614429294, "percentage": 12.21, "elapsed_time": "0:25:12", "remaining_time": "3:01:18", "throughput": 19847.79, "total_tokens": 30022656} +{"current_steps": 9540, "total_steps": 78105, "loss": 0.4819, "lr": 4.992548480213624e-06, "epoch": 0.6107163433839062, "percentage": 12.21, "elapsed_time": "0:25:13", "remaining_time": "3:01:16", "throughput": 19849.26, "total_tokens": 30038144} +{"current_steps": 9545, "total_steps": 78105, "loss": 0.4784, "lr": 4.992505317195072e-06, "epoch": 0.6110364253248832, "percentage": 12.22, "elapsed_time": "0:25:14", "remaining_time": "3:01:15", "throughput": 19851.24, "total_tokens": 30055488} +{"current_steps": 9550, "total_steps": 78105, "loss": 0.4976, "lr": 4.992462029713789e-06, "epoch": 0.6113565072658601, "percentage": 12.23, "elapsed_time": "0:25:14", "remaining_time": "3:01:13", "throughput": 19852.75, "total_tokens": 30071040} +{"current_steps": 9555, "total_steps": 78105, "loss": 0.4546, "lr": 4.992418617771933e-06, "epoch": 0.611676589206837, "percentage": 12.23, "elapsed_time": "0:25:15", "remaining_time": "3:01:12", "throughput": 19854.56, "total_tokens": 30088000} +{"current_steps": 9560, "total_steps": 78105, "loss": 0.5951, "lr": 4.992375081371675e-06, "epoch": 0.6119966711478139, "percentage": 12.24, "elapsed_time": "0:25:16", "remaining_time": "3:01:10", "throughput": 19855.83, "total_tokens": 30103040} +{"current_steps": 9565, "total_steps": 78105, "loss": 0.5744, "lr": 4.992331420515187e-06, "epoch": 0.6123167530887907, "percentage": 12.25, "elapsed_time": "0:25:16", "remaining_time": "3:01:08", "throughput": 19857.15, "total_tokens": 30117760} +{"current_steps": 9570, "total_steps": 78105, "loss": 0.572, "lr": 4.992287635204651e-06, "epoch": 0.6126368350297676, "percentage": 12.25, "elapsed_time": "0:25:17", "remaining_time": "3:01:06", "throughput": 19858.79, "total_tokens": 30133888} +{"current_steps": 9575, "total_steps": 78105, "loss": 0.5694, "lr": 4.9922437254422515e-06, "epoch": 0.6129569169707445, "percentage": 12.26, "elapsed_time": "0:25:18", "remaining_time": "3:01:05", "throughput": 19860.16, "total_tokens": 30149312} +{"current_steps": 9580, "total_steps": 78105, "loss": 0.4877, "lr": 4.992199691230183e-06, "epoch": 0.6132769989117214, "percentage": 12.27, "elapsed_time": "0:25:18", "remaining_time": "3:01:03", "throughput": 19861.46, "total_tokens": 30163840} +{"current_steps": 9585, "total_steps": 78105, "loss": 0.6384, "lr": 4.992155532570641e-06, "epoch": 0.6135970808526983, "percentage": 12.27, "elapsed_time": "0:25:19", "remaining_time": "3:01:01", "throughput": 19863.28, "total_tokens": 30180544} +{"current_steps": 9590, "total_steps": 78105, "loss": 0.4069, "lr": 4.992111249465836e-06, "epoch": 0.6139171627936751, "percentage": 12.28, "elapsed_time": "0:25:20", "remaining_time": "3:01:00", "throughput": 19864.94, "total_tokens": 30197120} +{"current_steps": 9595, "total_steps": 78105, "loss": 0.5439, "lr": 4.992066841917974e-06, "epoch": 0.614237244734652, "percentage": 12.28, "elapsed_time": "0:25:20", "remaining_time": "3:00:58", "throughput": 19866.52, "total_tokens": 30213376} +{"current_steps": 9600, "total_steps": 78105, "loss": 0.5553, "lr": 4.992022309929275e-06, "epoch": 0.614557326675629, "percentage": 12.29, "elapsed_time": "0:25:21", "remaining_time": "3:00:57", "throughput": 19868.1, "total_tokens": 30229120} +{"current_steps": 9605, "total_steps": 78105, "loss": 0.5806, "lr": 4.991977653501963e-06, "epoch": 0.6148774086166059, "percentage": 12.3, "elapsed_time": "0:25:22", "remaining_time": "3:00:55", "throughput": 19869.55, "total_tokens": 30244544} +{"current_steps": 9610, "total_steps": 78105, "loss": 0.5126, "lr": 4.991932872638267e-06, "epoch": 0.6151974905575828, "percentage": 12.3, "elapsed_time": "0:25:22", "remaining_time": "3:00:54", "throughput": 19871.21, "total_tokens": 30260864} +{"current_steps": 9615, "total_steps": 78105, "loss": 0.5995, "lr": 4.991887967340422e-06, "epoch": 0.6155175724985597, "percentage": 12.31, "elapsed_time": "0:25:23", "remaining_time": "3:00:52", "throughput": 19872.4, "total_tokens": 30275456} +{"current_steps": 9620, "total_steps": 78105, "loss": 0.6657, "lr": 4.991842937610673e-06, "epoch": 0.6158376544395365, "percentage": 12.32, "elapsed_time": "0:25:24", "remaining_time": "3:00:50", "throughput": 19873.87, "total_tokens": 30291008} +{"current_steps": 9625, "total_steps": 78105, "loss": 0.588, "lr": 4.991797783451267e-06, "epoch": 0.6161577363805134, "percentage": 12.32, "elapsed_time": "0:25:24", "remaining_time": "3:00:48", "throughput": 19875.42, "total_tokens": 30306816} +{"current_steps": 9630, "total_steps": 78105, "loss": 0.4988, "lr": 4.991752504864459e-06, "epoch": 0.6164778183214903, "percentage": 12.33, "elapsed_time": "0:25:25", "remaining_time": "3:00:47", "throughput": 19876.77, "total_tokens": 30322240} +{"current_steps": 9635, "total_steps": 78105, "loss": 0.4139, "lr": 4.99170710185251e-06, "epoch": 0.6167979002624672, "percentage": 12.34, "elapsed_time": "0:25:26", "remaining_time": "3:00:45", "throughput": 19878.24, "total_tokens": 30338048} +{"current_steps": 9640, "total_steps": 78105, "loss": 0.4087, "lr": 4.991661574417687e-06, "epoch": 0.617117982203444, "percentage": 12.34, "elapsed_time": "0:25:26", "remaining_time": "3:00:44", "throughput": 19879.8, "total_tokens": 30353984} +{"current_steps": 9645, "total_steps": 78105, "loss": 0.6605, "lr": 4.991615922562264e-06, "epoch": 0.6174380641444209, "percentage": 12.35, "elapsed_time": "0:25:27", "remaining_time": "3:00:42", "throughput": 19881.3, "total_tokens": 30370048} +{"current_steps": 9650, "total_steps": 78105, "loss": 0.4528, "lr": 4.9915701462885194e-06, "epoch": 0.6177581460853978, "percentage": 12.36, "elapsed_time": "0:25:28", "remaining_time": "3:00:40", "throughput": 19882.65, "total_tokens": 30385344} +{"current_steps": 9655, "total_steps": 78105, "loss": 0.578, "lr": 4.99152424559874e-06, "epoch": 0.6180782280263748, "percentage": 12.36, "elapsed_time": "0:25:28", "remaining_time": "3:00:39", "throughput": 19883.98, "total_tokens": 30400576} +{"current_steps": 9660, "total_steps": 78105, "loss": 0.4926, "lr": 4.991478220495218e-06, "epoch": 0.6183983099673517, "percentage": 12.37, "elapsed_time": "0:25:29", "remaining_time": "3:00:37", "throughput": 19885.32, "total_tokens": 30415424} +{"current_steps": 9665, "total_steps": 78105, "loss": 0.5363, "lr": 4.991432070980251e-06, "epoch": 0.6187183919083286, "percentage": 12.37, "elapsed_time": "0:25:30", "remaining_time": "3:00:35", "throughput": 19886.68, "total_tokens": 30430592} +{"current_steps": 9670, "total_steps": 78105, "loss": 0.5223, "lr": 4.991385797056144e-06, "epoch": 0.6190384738493054, "percentage": 12.38, "elapsed_time": "0:25:30", "remaining_time": "3:00:34", "throughput": 19888.04, "total_tokens": 30445952} +{"current_steps": 9675, "total_steps": 78105, "loss": 0.4267, "lr": 4.9913393987252075e-06, "epoch": 0.6193585557902823, "percentage": 12.39, "elapsed_time": "0:25:31", "remaining_time": "3:00:32", "throughput": 19889.52, "total_tokens": 30461568} +{"current_steps": 9680, "total_steps": 78105, "loss": 0.4994, "lr": 4.9912928759897585e-06, "epoch": 0.6196786377312592, "percentage": 12.39, "elapsed_time": "0:25:32", "remaining_time": "3:00:30", "throughput": 19890.82, "total_tokens": 30476672} +{"current_steps": 9685, "total_steps": 78105, "loss": 0.446, "lr": 4.991246228852119e-06, "epoch": 0.6199987196722361, "percentage": 12.4, "elapsed_time": "0:25:32", "remaining_time": "3:00:29", "throughput": 19892.29, "total_tokens": 30492672} +{"current_steps": 9690, "total_steps": 78105, "loss": 0.5466, "lr": 4.991199457314621e-06, "epoch": 0.620318801613213, "percentage": 12.41, "elapsed_time": "0:25:33", "remaining_time": "3:00:27", "throughput": 19893.53, "total_tokens": 30507712} +{"current_steps": 9695, "total_steps": 78105, "loss": 0.7476, "lr": 4.991152561379596e-06, "epoch": 0.6206388835541898, "percentage": 12.41, "elapsed_time": "0:25:34", "remaining_time": "3:00:25", "throughput": 19895.2, "total_tokens": 30524032} +{"current_steps": 9700, "total_steps": 78105, "loss": 0.4074, "lr": 4.9911055410493905e-06, "epoch": 0.6209589654951667, "percentage": 12.42, "elapsed_time": "0:25:34", "remaining_time": "3:00:24", "throughput": 19896.61, "total_tokens": 30539776} +{"current_steps": 9705, "total_steps": 78105, "loss": 0.4596, "lr": 4.991058396326348e-06, "epoch": 0.6212790474361437, "percentage": 12.43, "elapsed_time": "0:25:35", "remaining_time": "3:00:22", "throughput": 19897.88, "total_tokens": 30554752} +{"current_steps": 9710, "total_steps": 78105, "loss": 0.4597, "lr": 4.991011127212826e-06, "epoch": 0.6215991293771206, "percentage": 12.43, "elapsed_time": "0:25:36", "remaining_time": "3:00:21", "throughput": 19899.74, "total_tokens": 30571968} +{"current_steps": 9715, "total_steps": 78105, "loss": 0.5344, "lr": 4.990963733711183e-06, "epoch": 0.6219192113180975, "percentage": 12.44, "elapsed_time": "0:25:36", "remaining_time": "3:00:19", "throughput": 19901.06, "total_tokens": 30587264} +{"current_steps": 9720, "total_steps": 78105, "loss": 0.6401, "lr": 4.990916215823785e-06, "epoch": 0.6222392932590743, "percentage": 12.44, "elapsed_time": "0:25:37", "remaining_time": "3:00:18", "throughput": 19902.53, "total_tokens": 30603200} +{"current_steps": 9725, "total_steps": 78105, "loss": 0.4959, "lr": 4.990868573553007e-06, "epoch": 0.6225593752000512, "percentage": 12.45, "elapsed_time": "0:25:38", "remaining_time": "3:00:16", "throughput": 19903.68, "total_tokens": 30617856} +{"current_steps": 9730, "total_steps": 78105, "loss": 0.6265, "lr": 4.990820806901227e-06, "epoch": 0.6228794571410281, "percentage": 12.46, "elapsed_time": "0:25:38", "remaining_time": "3:00:14", "throughput": 19904.81, "total_tokens": 30632448} +{"current_steps": 9735, "total_steps": 78105, "loss": 0.5136, "lr": 4.990772915870829e-06, "epoch": 0.623199539082005, "percentage": 12.46, "elapsed_time": "0:25:39", "remaining_time": "3:00:12", "throughput": 19906.12, "total_tokens": 30647680} +{"current_steps": 9740, "total_steps": 78105, "loss": 0.6384, "lr": 4.9907249004642076e-06, "epoch": 0.6235196210229819, "percentage": 12.47, "elapsed_time": "0:25:40", "remaining_time": "3:00:11", "throughput": 19907.58, "total_tokens": 30663616} +{"current_steps": 9745, "total_steps": 78105, "loss": 0.6294, "lr": 4.990676760683757e-06, "epoch": 0.6238397029639587, "percentage": 12.48, "elapsed_time": "0:25:41", "remaining_time": "3:00:10", "throughput": 19909.9, "total_tokens": 30682496} +{"current_steps": 9750, "total_steps": 78105, "loss": 0.5362, "lr": 4.990628496531883e-06, "epoch": 0.6241597849049356, "percentage": 12.48, "elapsed_time": "0:25:41", "remaining_time": "3:00:08", "throughput": 19911.18, "total_tokens": 30697664} +{"current_steps": 9755, "total_steps": 78105, "loss": 0.5101, "lr": 4.990580108010994e-06, "epoch": 0.6244798668459125, "percentage": 12.49, "elapsed_time": "0:25:42", "remaining_time": "3:00:07", "throughput": 19912.62, "total_tokens": 30713408} +{"current_steps": 9760, "total_steps": 78105, "loss": 0.4221, "lr": 4.990531595123509e-06, "epoch": 0.6247999487868895, "percentage": 12.5, "elapsed_time": "0:25:43", "remaining_time": "3:00:05", "throughput": 19914.47, "total_tokens": 30730496} +{"current_steps": 9765, "total_steps": 78105, "loss": 0.4812, "lr": 4.9904829578718474e-06, "epoch": 0.6251200307278664, "percentage": 12.5, "elapsed_time": "0:25:43", "remaining_time": "3:00:04", "throughput": 19915.81, "total_tokens": 30745664} +{"current_steps": 9770, "total_steps": 78105, "loss": 0.4585, "lr": 4.9904341962584396e-06, "epoch": 0.6254401126688433, "percentage": 12.51, "elapsed_time": "0:25:44", "remaining_time": "3:00:02", "throughput": 19917.18, "total_tokens": 30760960} +{"current_steps": 9775, "total_steps": 78105, "loss": 0.5699, "lr": 4.99038531028572e-06, "epoch": 0.6257601946098201, "percentage": 12.52, "elapsed_time": "0:25:45", "remaining_time": "3:00:00", "throughput": 19918.32, "total_tokens": 30775872} +{"current_steps": 9780, "total_steps": 78105, "loss": 0.4519, "lr": 4.990336299956131e-06, "epoch": 0.626080276550797, "percentage": 12.52, "elapsed_time": "0:25:45", "remaining_time": "2:59:59", "throughput": 19919.77, "total_tokens": 30791424} +{"current_steps": 9785, "total_steps": 78105, "loss": 0.5148, "lr": 4.990287165272119e-06, "epoch": 0.6264003584917739, "percentage": 12.53, "elapsed_time": "0:25:46", "remaining_time": "2:59:57", "throughput": 19921.26, "total_tokens": 30807680} +{"current_steps": 9790, "total_steps": 78105, "loss": 0.5699, "lr": 4.990237906236136e-06, "epoch": 0.6267204404327508, "percentage": 12.53, "elapsed_time": "0:25:47", "remaining_time": "2:59:56", "throughput": 19922.74, "total_tokens": 30823680} +{"current_steps": 9795, "total_steps": 78105, "loss": 0.6064, "lr": 4.9901885228506444e-06, "epoch": 0.6270405223737276, "percentage": 12.54, "elapsed_time": "0:25:47", "remaining_time": "2:59:54", "throughput": 19924.1, "total_tokens": 30839360} +{"current_steps": 9800, "total_steps": 78105, "loss": 0.4785, "lr": 4.990139015118108e-06, "epoch": 0.6273606043147045, "percentage": 12.55, "elapsed_time": "0:25:48", "remaining_time": "2:59:52", "throughput": 19925.44, "total_tokens": 30854784} +{"current_steps": 9805, "total_steps": 78105, "loss": 0.5806, "lr": 4.990089383041e-06, "epoch": 0.6276806862556814, "percentage": 12.55, "elapsed_time": "0:25:49", "remaining_time": "2:59:51", "throughput": 19926.87, "total_tokens": 30870592} +{"current_steps": 9810, "total_steps": 78105, "loss": 0.5823, "lr": 4.9900396266217985e-06, "epoch": 0.6280007681966584, "percentage": 12.56, "elapsed_time": "0:25:49", "remaining_time": "2:59:49", "throughput": 19928.2, "total_tokens": 30885952} +{"current_steps": 9815, "total_steps": 78105, "loss": 0.4227, "lr": 4.9899897458629886e-06, "epoch": 0.6283208501376353, "percentage": 12.57, "elapsed_time": "0:25:50", "remaining_time": "2:59:48", "throughput": 19929.56, "total_tokens": 30902080} +{"current_steps": 9820, "total_steps": 78105, "loss": 0.519, "lr": 4.989939740767061e-06, "epoch": 0.6286409320786122, "percentage": 12.57, "elapsed_time": "0:25:51", "remaining_time": "2:59:47", "throughput": 19931.36, "total_tokens": 30919232} +{"current_steps": 9825, "total_steps": 78105, "loss": 0.4413, "lr": 4.989889611336512e-06, "epoch": 0.628961014019589, "percentage": 12.58, "elapsed_time": "0:25:52", "remaining_time": "2:59:45", "throughput": 19933.19, "total_tokens": 30936448} +{"current_steps": 9830, "total_steps": 78105, "loss": 0.3904, "lr": 4.989839357573845e-06, "epoch": 0.6292810959605659, "percentage": 12.59, "elapsed_time": "0:25:52", "remaining_time": "2:59:44", "throughput": 19934.64, "total_tokens": 30952256} +{"current_steps": 9835, "total_steps": 78105, "loss": 0.527, "lr": 4.98978897948157e-06, "epoch": 0.6296011779015428, "percentage": 12.59, "elapsed_time": "0:25:53", "remaining_time": "2:59:42", "throughput": 19935.86, "total_tokens": 30967360} +{"current_steps": 9840, "total_steps": 78105, "loss": 0.5358, "lr": 4.989738477062201e-06, "epoch": 0.6299212598425197, "percentage": 12.6, "elapsed_time": "0:25:54", "remaining_time": "2:59:40", "throughput": 19936.92, "total_tokens": 30982016} +{"current_steps": 9845, "total_steps": 78105, "loss": 0.4595, "lr": 4.989687850318262e-06, "epoch": 0.6302413417834966, "percentage": 12.6, "elapsed_time": "0:25:54", "remaining_time": "2:59:39", "throughput": 19938.39, "total_tokens": 30998080} +{"current_steps": 9850, "total_steps": 78105, "loss": 0.4813, "lr": 4.98963709925228e-06, "epoch": 0.6305614237244734, "percentage": 12.61, "elapsed_time": "0:25:55", "remaining_time": "2:59:37", "throughput": 19939.62, "total_tokens": 31013184} +{"current_steps": 9855, "total_steps": 78105, "loss": 0.5403, "lr": 4.98958622386679e-06, "epoch": 0.6308815056654503, "percentage": 12.62, "elapsed_time": "0:25:56", "remaining_time": "2:59:36", "throughput": 19941.02, "total_tokens": 31028800} +{"current_steps": 9860, "total_steps": 78105, "loss": 0.5813, "lr": 4.989535224164331e-06, "epoch": 0.6312015876064272, "percentage": 12.62, "elapsed_time": "0:25:56", "remaining_time": "2:59:34", "throughput": 19942.53, "total_tokens": 31044736} +{"current_steps": 9865, "total_steps": 78105, "loss": 0.4064, "lr": 4.98948410014745e-06, "epoch": 0.6315216695474042, "percentage": 12.63, "elapsed_time": "0:25:57", "remaining_time": "2:59:32", "throughput": 19943.84, "total_tokens": 31059968} +{"current_steps": 9870, "total_steps": 78105, "loss": 0.4707, "lr": 4.989432851818701e-06, "epoch": 0.6318417514883811, "percentage": 12.64, "elapsed_time": "0:25:58", "remaining_time": "2:59:31", "throughput": 19945.03, "total_tokens": 31074880} +{"current_steps": 9875, "total_steps": 78105, "loss": 0.4296, "lr": 4.989381479180643e-06, "epoch": 0.6321618334293579, "percentage": 12.64, "elapsed_time": "0:25:58", "remaining_time": "2:59:29", "throughput": 19946.41, "total_tokens": 31090432} +{"current_steps": 9880, "total_steps": 78105, "loss": 0.6233, "lr": 4.989329982235839e-06, "epoch": 0.6324819153703348, "percentage": 12.65, "elapsed_time": "0:25:59", "remaining_time": "2:59:28", "throughput": 19948.01, "total_tokens": 31107264} +{"current_steps": 9885, "total_steps": 78105, "loss": 0.4683, "lr": 4.989278360986864e-06, "epoch": 0.6328019973113117, "percentage": 12.66, "elapsed_time": "0:26:00", "remaining_time": "2:59:27", "throughput": 19949.68, "total_tokens": 31124096} +{"current_steps": 9890, "total_steps": 78105, "loss": 0.4671, "lr": 4.9892266154362915e-06, "epoch": 0.6331220792522886, "percentage": 12.66, "elapsed_time": "0:26:00", "remaining_time": "2:59:25", "throughput": 19950.87, "total_tokens": 31138752} +{"current_steps": 9895, "total_steps": 78105, "loss": 0.6549, "lr": 4.989174745586709e-06, "epoch": 0.6334421611932655, "percentage": 12.67, "elapsed_time": "0:26:01", "remaining_time": "2:59:23", "throughput": 19952.16, "total_tokens": 31154048} +{"current_steps": 9900, "total_steps": 78105, "loss": 0.7086, "lr": 4.989122751440706e-06, "epoch": 0.6337622431342423, "percentage": 12.68, "elapsed_time": "0:26:02", "remaining_time": "2:59:22", "throughput": 19953.79, "total_tokens": 31170304} +{"current_steps": 9905, "total_steps": 78105, "loss": 0.4503, "lr": 4.989070633000877e-06, "epoch": 0.6340823250752192, "percentage": 12.68, "elapsed_time": "0:26:02", "remaining_time": "2:59:20", "throughput": 19955.26, "total_tokens": 31186368} +{"current_steps": 9910, "total_steps": 78105, "loss": 0.5373, "lr": 4.989018390269827e-06, "epoch": 0.6344024070161961, "percentage": 12.69, "elapsed_time": "0:26:03", "remaining_time": "2:59:18", "throughput": 19956.61, "total_tokens": 31201664} +{"current_steps": 9915, "total_steps": 78105, "loss": 0.6288, "lr": 4.988966023250162e-06, "epoch": 0.634722488957173, "percentage": 12.69, "elapsed_time": "0:26:04", "remaining_time": "2:59:17", "throughput": 19957.87, "total_tokens": 31217216} +{"current_steps": 9920, "total_steps": 78105, "loss": 0.4829, "lr": 4.988913531944498e-06, "epoch": 0.63504257089815, "percentage": 12.7, "elapsed_time": "0:26:04", "remaining_time": "2:59:16", "throughput": 19959.46, "total_tokens": 31233728} +{"current_steps": 9925, "total_steps": 78105, "loss": 0.5572, "lr": 4.988860916355458e-06, "epoch": 0.6353626528391269, "percentage": 12.71, "elapsed_time": "0:26:05", "remaining_time": "2:59:14", "throughput": 19960.89, "total_tokens": 31249664} +{"current_steps": 9930, "total_steps": 78105, "loss": 0.4233, "lr": 4.988808176485668e-06, "epoch": 0.6356827347801037, "percentage": 12.71, "elapsed_time": "0:26:06", "remaining_time": "2:59:13", "throughput": 19962.29, "total_tokens": 31265408} +{"current_steps": 9935, "total_steps": 78105, "loss": 0.3763, "lr": 4.98875531233776e-06, "epoch": 0.6360028167210806, "percentage": 12.72, "elapsed_time": "0:26:06", "remaining_time": "2:59:11", "throughput": 19963.66, "total_tokens": 31281088} +{"current_steps": 9940, "total_steps": 78105, "loss": 0.5096, "lr": 4.9887023239143766e-06, "epoch": 0.6363228986620575, "percentage": 12.73, "elapsed_time": "0:26:07", "remaining_time": "2:59:09", "throughput": 19964.95, "total_tokens": 31296512} +{"current_steps": 9945, "total_steps": 78105, "loss": 0.5026, "lr": 4.988649211218161e-06, "epoch": 0.6366429806030344, "percentage": 12.73, "elapsed_time": "0:26:08", "remaining_time": "2:59:08", "throughput": 19966.38, "total_tokens": 31312000} +{"current_steps": 9950, "total_steps": 78105, "loss": 0.4883, "lr": 4.988595974251768e-06, "epoch": 0.6369630625440112, "percentage": 12.74, "elapsed_time": "0:26:08", "remaining_time": "2:59:06", "throughput": 19967.62, "total_tokens": 31327488} +{"current_steps": 9955, "total_steps": 78105, "loss": 0.4126, "lr": 4.988542613017853e-06, "epoch": 0.6372831444849881, "percentage": 12.75, "elapsed_time": "0:26:09", "remaining_time": "2:59:05", "throughput": 19969.19, "total_tokens": 31343936} +{"current_steps": 9960, "total_steps": 78105, "loss": 0.4868, "lr": 4.988489127519084e-06, "epoch": 0.637603226425965, "percentage": 12.75, "elapsed_time": "0:26:10", "remaining_time": "2:59:03", "throughput": 19970.45, "total_tokens": 31359232} +{"current_steps": 9965, "total_steps": 78105, "loss": 0.4822, "lr": 4.988435517758129e-06, "epoch": 0.6379233083669419, "percentage": 12.76, "elapsed_time": "0:26:10", "remaining_time": "2:59:02", "throughput": 19972.02, "total_tokens": 31375616} +{"current_steps": 9970, "total_steps": 78105, "loss": 0.5156, "lr": 4.988381783737666e-06, "epoch": 0.6382433903079189, "percentage": 12.76, "elapsed_time": "0:26:11", "remaining_time": "2:59:00", "throughput": 19973.5, "total_tokens": 31391680} +{"current_steps": 9975, "total_steps": 78105, "loss": 0.503, "lr": 4.988327925460378e-06, "epoch": 0.6385634722488958, "percentage": 12.77, "elapsed_time": "0:26:12", "remaining_time": "2:58:59", "throughput": 19974.91, "total_tokens": 31407424} +{"current_steps": 9980, "total_steps": 78105, "loss": 0.4856, "lr": 4.988273942928955e-06, "epoch": 0.6388835541898726, "percentage": 12.78, "elapsed_time": "0:26:13", "remaining_time": "2:58:57", "throughput": 19976.23, "total_tokens": 31422912} +{"current_steps": 9985, "total_steps": 78105, "loss": 0.5529, "lr": 4.988219836146092e-06, "epoch": 0.6392036361308495, "percentage": 12.78, "elapsed_time": "0:26:13", "remaining_time": "2:58:56", "throughput": 19977.74, "total_tokens": 31438912} +{"current_steps": 9990, "total_steps": 78105, "loss": 0.404, "lr": 4.988165605114492e-06, "epoch": 0.6395237180718264, "percentage": 12.79, "elapsed_time": "0:26:14", "remaining_time": "2:58:54", "throughput": 19979.31, "total_tokens": 31455168} +{"current_steps": 9995, "total_steps": 78105, "loss": 0.4475, "lr": 4.988111249836861e-06, "epoch": 0.6398438000128033, "percentage": 12.8, "elapsed_time": "0:26:15", "remaining_time": "2:58:53", "throughput": 19980.68, "total_tokens": 31470592} +{"current_steps": 10000, "total_steps": 78105, "loss": 0.4538, "lr": 4.988056770315915e-06, "epoch": 0.6401638819537802, "percentage": 12.8, "elapsed_time": "0:26:15", "remaining_time": "2:58:51", "throughput": 19981.77, "total_tokens": 31485632} +{"current_steps": 10005, "total_steps": 78105, "loss": 0.4255, "lr": 4.988002166554373e-06, "epoch": 0.640483963894757, "percentage": 12.81, "elapsed_time": "0:26:16", "remaining_time": "2:58:49", "throughput": 19982.87, "total_tokens": 31500352} +{"current_steps": 10010, "total_steps": 78105, "loss": 0.5204, "lr": 4.987947438554963e-06, "epoch": 0.6408040458357339, "percentage": 12.82, "elapsed_time": "0:26:17", "remaining_time": "2:58:48", "throughput": 19984.37, "total_tokens": 31516928} +{"current_steps": 10015, "total_steps": 78105, "loss": 0.4247, "lr": 4.987892586320417e-06, "epoch": 0.6411241277767108, "percentage": 12.82, "elapsed_time": "0:26:17", "remaining_time": "2:58:46", "throughput": 19985.61, "total_tokens": 31532608} +{"current_steps": 10020, "total_steps": 78105, "loss": 0.6965, "lr": 4.987837609853474e-06, "epoch": 0.6414442097176877, "percentage": 12.83, "elapsed_time": "0:26:18", "remaining_time": "2:58:45", "throughput": 19987.14, "total_tokens": 31548608} +{"current_steps": 10025, "total_steps": 78105, "loss": 0.6434, "lr": 4.98778250915688e-06, "epoch": 0.6417642916586647, "percentage": 12.84, "elapsed_time": "0:26:19", "remaining_time": "2:58:43", "throughput": 19988.47, "total_tokens": 31564224} +{"current_steps": 10030, "total_steps": 78105, "loss": 0.5233, "lr": 4.9877272842333855e-06, "epoch": 0.6420843735996415, "percentage": 12.84, "elapsed_time": "0:26:19", "remaining_time": "2:58:42", "throughput": 19989.64, "total_tokens": 31579200} +{"current_steps": 10035, "total_steps": 78105, "loss": 0.5963, "lr": 4.987671935085749e-06, "epoch": 0.6424044555406184, "percentage": 12.85, "elapsed_time": "0:26:20", "remaining_time": "2:58:40", "throughput": 19990.97, "total_tokens": 31594816} +{"current_steps": 10040, "total_steps": 78105, "loss": 0.4608, "lr": 4.987616461716732e-06, "epoch": 0.6427245374815953, "percentage": 12.85, "elapsed_time": "0:26:21", "remaining_time": "2:58:39", "throughput": 19992.35, "total_tokens": 31610432} +{"current_steps": 10045, "total_steps": 78105, "loss": 0.5298, "lr": 4.987560864129109e-06, "epoch": 0.6430446194225722, "percentage": 12.86, "elapsed_time": "0:26:21", "remaining_time": "2:58:37", "throughput": 19994.11, "total_tokens": 31627520} +{"current_steps": 10050, "total_steps": 78105, "loss": 0.4993, "lr": 4.9875051423256515e-06, "epoch": 0.6433647013635491, "percentage": 12.87, "elapsed_time": "0:26:22", "remaining_time": "2:58:36", "throughput": 19995.76, "total_tokens": 31644352} +{"current_steps": 10055, "total_steps": 78105, "loss": 0.6124, "lr": 4.987449296309145e-06, "epoch": 0.6436847833045259, "percentage": 12.87, "elapsed_time": "0:26:23", "remaining_time": "2:58:35", "throughput": 19997.02, "total_tokens": 31660160} +{"current_steps": 10060, "total_steps": 78105, "loss": 0.597, "lr": 4.987393326082377e-06, "epoch": 0.6440048652455028, "percentage": 12.88, "elapsed_time": "0:26:23", "remaining_time": "2:58:33", "throughput": 19998.54, "total_tokens": 31676352} +{"current_steps": 10065, "total_steps": 78105, "loss": 0.4995, "lr": 4.987337231648143e-06, "epoch": 0.6443249471864797, "percentage": 12.89, "elapsed_time": "0:26:24", "remaining_time": "2:58:32", "throughput": 19999.98, "total_tokens": 31692160} +{"current_steps": 10070, "total_steps": 78105, "loss": 0.475, "lr": 4.987281013009244e-06, "epoch": 0.6446450291274566, "percentage": 12.89, "elapsed_time": "0:26:25", "remaining_time": "2:58:30", "throughput": 20001.29, "total_tokens": 31707712} +{"current_steps": 10075, "total_steps": 78105, "loss": 0.385, "lr": 4.987224670168487e-06, "epoch": 0.6449651110684336, "percentage": 12.9, "elapsed_time": "0:26:25", "remaining_time": "2:58:29", "throughput": 20002.81, "total_tokens": 31723776} +{"current_steps": 10080, "total_steps": 78105, "loss": 0.5022, "lr": 4.987168203128685e-06, "epoch": 0.6452851930094105, "percentage": 12.91, "elapsed_time": "0:26:26", "remaining_time": "2:58:27", "throughput": 20004.51, "total_tokens": 31740672} +{"current_steps": 10085, "total_steps": 78105, "loss": 0.3749, "lr": 4.987111611892658e-06, "epoch": 0.6456052749503873, "percentage": 12.91, "elapsed_time": "0:26:27", "remaining_time": "2:58:26", "throughput": 20005.73, "total_tokens": 31755648} +{"current_steps": 10090, "total_steps": 78105, "loss": 0.6307, "lr": 4.987054896463233e-06, "epoch": 0.6459253568913642, "percentage": 12.92, "elapsed_time": "0:26:27", "remaining_time": "2:58:24", "throughput": 20006.98, "total_tokens": 31770624} +{"current_steps": 10095, "total_steps": 78105, "loss": 0.5458, "lr": 4.9869980568432395e-06, "epoch": 0.6462454388323411, "percentage": 12.92, "elapsed_time": "0:26:28", "remaining_time": "2:58:22", "throughput": 20008.41, "total_tokens": 31786816} +{"current_steps": 10100, "total_steps": 78105, "loss": 0.4628, "lr": 4.986941093035519e-06, "epoch": 0.646565520773318, "percentage": 12.93, "elapsed_time": "0:26:29", "remaining_time": "2:58:21", "throughput": 20009.62, "total_tokens": 31801792} +{"current_steps": 10105, "total_steps": 78105, "loss": 0.5302, "lr": 4.986884005042913e-06, "epoch": 0.6468856027142948, "percentage": 12.94, "elapsed_time": "0:26:30", "remaining_time": "2:58:19", "throughput": 20011.33, "total_tokens": 31818944} +{"current_steps": 10110, "total_steps": 78105, "loss": 0.5139, "lr": 4.986826792868274e-06, "epoch": 0.6472056846552717, "percentage": 12.94, "elapsed_time": "0:26:30", "remaining_time": "2:58:18", "throughput": 20012.55, "total_tokens": 31834176} +{"current_steps": 10115, "total_steps": 78105, "loss": 0.4676, "lr": 4.986769456514459e-06, "epoch": 0.6475257665962486, "percentage": 12.95, "elapsed_time": "0:26:31", "remaining_time": "2:58:16", "throughput": 20013.97, "total_tokens": 31850176} +{"current_steps": 10120, "total_steps": 78105, "loss": 0.5846, "lr": 4.986711995984329e-06, "epoch": 0.6478458485372255, "percentage": 12.96, "elapsed_time": "0:26:32", "remaining_time": "2:58:15", "throughput": 20015.36, "total_tokens": 31865600} +{"current_steps": 10125, "total_steps": 78105, "loss": 0.5411, "lr": 4.986654411280756e-06, "epoch": 0.6481659304782024, "percentage": 12.96, "elapsed_time": "0:26:32", "remaining_time": "2:58:13", "throughput": 20016.52, "total_tokens": 31880512} +{"current_steps": 10130, "total_steps": 78105, "loss": 0.5552, "lr": 4.986596702406613e-06, "epoch": 0.6484860124191794, "percentage": 12.97, "elapsed_time": "0:26:33", "remaining_time": "2:58:12", "throughput": 20017.91, "total_tokens": 31896512} +{"current_steps": 10135, "total_steps": 78105, "loss": 0.5344, "lr": 4.986538869364784e-06, "epoch": 0.6488060943601562, "percentage": 12.98, "elapsed_time": "0:26:34", "remaining_time": "2:58:10", "throughput": 20019.34, "total_tokens": 31912768} +{"current_steps": 10140, "total_steps": 78105, "loss": 0.5518, "lr": 4.986480912158156e-06, "epoch": 0.6491261763011331, "percentage": 12.98, "elapsed_time": "0:26:34", "remaining_time": "2:58:09", "throughput": 20020.43, "total_tokens": 31927488} +{"current_steps": 10145, "total_steps": 78105, "loss": 0.448, "lr": 4.986422830789622e-06, "epoch": 0.64944625824211, "percentage": 12.99, "elapsed_time": "0:26:35", "remaining_time": "2:58:07", "throughput": 20021.69, "total_tokens": 31942784} +{"current_steps": 10150, "total_steps": 78105, "loss": 0.4547, "lr": 4.986364625262083e-06, "epoch": 0.6497663401830869, "percentage": 13.0, "elapsed_time": "0:26:36", "remaining_time": "2:58:05", "throughput": 20023.02, "total_tokens": 31958528} +{"current_steps": 10155, "total_steps": 78105, "loss": 0.4755, "lr": 4.986306295578446e-06, "epoch": 0.6500864221240638, "percentage": 13.0, "elapsed_time": "0:26:36", "remaining_time": "2:58:04", "throughput": 20024.25, "total_tokens": 31974016} +{"current_steps": 10160, "total_steps": 78105, "loss": 0.4424, "lr": 4.986247841741624e-06, "epoch": 0.6504065040650406, "percentage": 13.01, "elapsed_time": "0:26:37", "remaining_time": "2:58:03", "throughput": 20025.86, "total_tokens": 31990720} +{"current_steps": 10165, "total_steps": 78105, "loss": 0.4666, "lr": 4.986189263754534e-06, "epoch": 0.6507265860060175, "percentage": 13.01, "elapsed_time": "0:26:38", "remaining_time": "2:58:01", "throughput": 20027.46, "total_tokens": 32007168} +{"current_steps": 10170, "total_steps": 78105, "loss": 0.4939, "lr": 4.9861305616201024e-06, "epoch": 0.6510466679469944, "percentage": 13.02, "elapsed_time": "0:26:38", "remaining_time": "2:58:00", "throughput": 20028.66, "total_tokens": 32022208} +{"current_steps": 10175, "total_steps": 78105, "loss": 0.6421, "lr": 4.986071735341261e-06, "epoch": 0.6513667498879713, "percentage": 13.03, "elapsed_time": "0:26:39", "remaining_time": "2:57:58", "throughput": 20030.05, "total_tokens": 32037760} +{"current_steps": 10180, "total_steps": 78105, "loss": 0.4327, "lr": 4.986012784920946e-06, "epoch": 0.6516868318289483, "percentage": 13.03, "elapsed_time": "0:26:40", "remaining_time": "2:57:56", "throughput": 20031.27, "total_tokens": 32052800} +{"current_steps": 10185, "total_steps": 78105, "loss": 0.6118, "lr": 4.9859537103621026e-06, "epoch": 0.6520069137699251, "percentage": 13.04, "elapsed_time": "0:26:40", "remaining_time": "2:57:55", "throughput": 20032.52, "total_tokens": 32068288} +{"current_steps": 10190, "total_steps": 78105, "loss": 0.4767, "lr": 4.985894511667679e-06, "epoch": 0.652326995710902, "percentage": 13.05, "elapsed_time": "0:26:41", "remaining_time": "2:57:53", "throughput": 20033.59, "total_tokens": 32083200} +{"current_steps": 10195, "total_steps": 78105, "loss": 0.4958, "lr": 4.985835188840632e-06, "epoch": 0.6526470776518789, "percentage": 13.05, "elapsed_time": "0:26:42", "remaining_time": "2:57:51", "throughput": 20034.88, "total_tokens": 32098432} +{"current_steps": 10200, "total_steps": 78105, "loss": 0.4854, "lr": 4.985775741883924e-06, "epoch": 0.6529671595928558, "percentage": 13.06, "elapsed_time": "0:26:42", "remaining_time": "2:57:50", "throughput": 20036.46, "total_tokens": 32114944} +{"current_steps": 10205, "total_steps": 78105, "loss": 0.404, "lr": 4.985716170800524e-06, "epoch": 0.6532872415338327, "percentage": 13.07, "elapsed_time": "0:26:43", "remaining_time": "2:57:49", "throughput": 20038.05, "total_tokens": 32131520} +{"current_steps": 10210, "total_steps": 78105, "loss": 0.4456, "lr": 4.985656475593405e-06, "epoch": 0.6536073234748095, "percentage": 13.07, "elapsed_time": "0:26:44", "remaining_time": "2:57:47", "throughput": 20039.39, "total_tokens": 32147008} +{"current_steps": 10215, "total_steps": 78105, "loss": 0.5132, "lr": 4.98559665626555e-06, "epoch": 0.6539274054157864, "percentage": 13.08, "elapsed_time": "0:26:44", "remaining_time": "2:57:46", "throughput": 20040.66, "total_tokens": 32162560} +{"current_steps": 10220, "total_steps": 78105, "loss": 0.4752, "lr": 4.985536712819944e-06, "epoch": 0.6542474873567633, "percentage": 13.08, "elapsed_time": "0:26:45", "remaining_time": "2:57:44", "throughput": 20041.66, "total_tokens": 32177024} +{"current_steps": 10225, "total_steps": 78105, "loss": 0.5257, "lr": 4.9854766452595815e-06, "epoch": 0.6545675692977402, "percentage": 13.09, "elapsed_time": "0:26:46", "remaining_time": "2:57:42", "throughput": 20043.02, "total_tokens": 32193024} +{"current_steps": 10230, "total_steps": 78105, "loss": 0.4498, "lr": 4.985416453587462e-06, "epoch": 0.6548876512387171, "percentage": 13.1, "elapsed_time": "0:26:46", "remaining_time": "2:57:41", "throughput": 20044.33, "total_tokens": 32208896} +{"current_steps": 10235, "total_steps": 78105, "loss": 0.4368, "lr": 4.985356137806591e-06, "epoch": 0.655207733179694, "percentage": 13.1, "elapsed_time": "0:26:47", "remaining_time": "2:57:40", "throughput": 20045.77, "total_tokens": 32225280} +{"current_steps": 10240, "total_steps": 78105, "loss": 0.4657, "lr": 4.98529569791998e-06, "epoch": 0.6555278151206709, "percentage": 13.11, "elapsed_time": "0:26:48", "remaining_time": "2:57:38", "throughput": 20047.0, "total_tokens": 32240768} +{"current_steps": 10245, "total_steps": 78105, "loss": 0.5121, "lr": 4.985235133930647e-06, "epoch": 0.6558478970616478, "percentage": 13.12, "elapsed_time": "0:26:48", "remaining_time": "2:57:37", "throughput": 20048.21, "total_tokens": 32255872} +{"current_steps": 10250, "total_steps": 78105, "loss": 0.5042, "lr": 4.9851744458416175e-06, "epoch": 0.6561679790026247, "percentage": 13.12, "elapsed_time": "0:26:49", "remaining_time": "2:57:35", "throughput": 20049.76, "total_tokens": 32272512} +{"current_steps": 10255, "total_steps": 78105, "loss": 0.5399, "lr": 4.98511363365592e-06, "epoch": 0.6564880609436016, "percentage": 13.13, "elapsed_time": "0:26:50", "remaining_time": "2:57:33", "throughput": 20050.62, "total_tokens": 32286272} +{"current_steps": 10260, "total_steps": 78105, "loss": 0.5322, "lr": 4.985052697376592e-06, "epoch": 0.6568081428845784, "percentage": 13.14, "elapsed_time": "0:26:50", "remaining_time": "2:57:32", "throughput": 20051.98, "total_tokens": 32301952} +{"current_steps": 10265, "total_steps": 78105, "loss": 0.6091, "lr": 4.984991637006677e-06, "epoch": 0.6571282248255553, "percentage": 13.14, "elapsed_time": "0:26:51", "remaining_time": "2:57:30", "throughput": 20053.37, "total_tokens": 32318208} +{"current_steps": 10270, "total_steps": 78105, "loss": 0.5593, "lr": 4.984930452549224e-06, "epoch": 0.6574483067665322, "percentage": 13.15, "elapsed_time": "0:26:52", "remaining_time": "2:57:29", "throughput": 20054.57, "total_tokens": 32333504} +{"current_steps": 10275, "total_steps": 78105, "loss": 0.5094, "lr": 4.984869144007286e-06, "epoch": 0.6577683887075091, "percentage": 13.16, "elapsed_time": "0:26:52", "remaining_time": "2:57:28", "throughput": 20056.22, "total_tokens": 32350208} +{"current_steps": 10280, "total_steps": 78105, "loss": 0.5346, "lr": 4.984807711383928e-06, "epoch": 0.658088470648486, "percentage": 13.16, "elapsed_time": "0:26:53", "remaining_time": "2:57:26", "throughput": 20057.45, "total_tokens": 32365376} +{"current_steps": 10285, "total_steps": 78105, "loss": 0.4705, "lr": 4.984746154682215e-06, "epoch": 0.6584085525894628, "percentage": 13.17, "elapsed_time": "0:26:54", "remaining_time": "2:57:24", "throughput": 20058.84, "total_tokens": 32381248} +{"current_steps": 10290, "total_steps": 78105, "loss": 0.5344, "lr": 4.984684473905221e-06, "epoch": 0.6587286345304398, "percentage": 13.17, "elapsed_time": "0:26:54", "remaining_time": "2:57:23", "throughput": 20059.89, "total_tokens": 32395968} +{"current_steps": 10295, "total_steps": 78105, "loss": 0.459, "lr": 4.9846226690560275e-06, "epoch": 0.6590487164714167, "percentage": 13.18, "elapsed_time": "0:26:55", "remaining_time": "2:57:21", "throughput": 20061.22, "total_tokens": 32411456} +{"current_steps": 10300, "total_steps": 78105, "loss": 0.4806, "lr": 4.984560740137719e-06, "epoch": 0.6593687984123936, "percentage": 13.19, "elapsed_time": "0:26:56", "remaining_time": "2:57:20", "throughput": 20062.51, "total_tokens": 32426880} +{"current_steps": 10305, "total_steps": 78105, "loss": 0.5004, "lr": 4.98449868715339e-06, "epoch": 0.6596888803533705, "percentage": 13.19, "elapsed_time": "0:26:56", "remaining_time": "2:57:18", "throughput": 20063.84, "total_tokens": 32442368} +{"current_steps": 10310, "total_steps": 78105, "loss": 0.5125, "lr": 4.984436510106137e-06, "epoch": 0.6600089622943474, "percentage": 13.2, "elapsed_time": "0:26:57", "remaining_time": "2:57:16", "throughput": 20065.09, "total_tokens": 32457920} +{"current_steps": 10315, "total_steps": 78105, "loss": 0.4846, "lr": 4.984374208999066e-06, "epoch": 0.6603290442353242, "percentage": 13.21, "elapsed_time": "0:26:58", "remaining_time": "2:57:15", "throughput": 20066.45, "total_tokens": 32473536} +{"current_steps": 10320, "total_steps": 78105, "loss": 0.4919, "lr": 4.984311783835287e-06, "epoch": 0.6606491261763011, "percentage": 13.21, "elapsed_time": "0:26:59", "remaining_time": "2:57:14", "throughput": 20067.95, "total_tokens": 32490240} +{"current_steps": 10325, "total_steps": 78105, "loss": 0.4298, "lr": 4.984249234617919e-06, "epoch": 0.660969208117278, "percentage": 13.22, "elapsed_time": "0:26:59", "remaining_time": "2:57:12", "throughput": 20069.16, "total_tokens": 32505728} +{"current_steps": 10330, "total_steps": 78105, "loss": 0.5663, "lr": 4.984186561350083e-06, "epoch": 0.6612892900582549, "percentage": 13.23, "elapsed_time": "0:27:00", "remaining_time": "2:57:11", "throughput": 20070.23, "total_tokens": 32520960} +{"current_steps": 10335, "total_steps": 78105, "loss": 0.5126, "lr": 4.98412376403491e-06, "epoch": 0.6616093719992318, "percentage": 13.23, "elapsed_time": "0:27:01", "remaining_time": "2:57:09", "throughput": 20071.38, "total_tokens": 32535872} +{"current_steps": 10340, "total_steps": 78105, "loss": 0.4722, "lr": 4.984060842675536e-06, "epoch": 0.6619294539402087, "percentage": 13.24, "elapsed_time": "0:27:01", "remaining_time": "2:57:08", "throughput": 20072.87, "total_tokens": 32552448} +{"current_steps": 10345, "total_steps": 78105, "loss": 0.4801, "lr": 4.983997797275103e-06, "epoch": 0.6622495358811856, "percentage": 13.24, "elapsed_time": "0:27:02", "remaining_time": "2:57:06", "throughput": 20073.92, "total_tokens": 32567232} +{"current_steps": 10350, "total_steps": 78105, "loss": 0.4348, "lr": 4.983934627836758e-06, "epoch": 0.6625696178221625, "percentage": 13.25, "elapsed_time": "0:27:03", "remaining_time": "2:57:05", "throughput": 20075.41, "total_tokens": 32583360} +{"current_steps": 10355, "total_steps": 78105, "loss": 0.4362, "lr": 4.983871334363656e-06, "epoch": 0.6628896997631394, "percentage": 13.26, "elapsed_time": "0:27:03", "remaining_time": "2:57:03", "throughput": 20076.68, "total_tokens": 32598656} +{"current_steps": 10360, "total_steps": 78105, "loss": 0.4203, "lr": 4.9838079168589574e-06, "epoch": 0.6632097817041163, "percentage": 13.26, "elapsed_time": "0:27:04", "remaining_time": "2:57:01", "throughput": 20077.9, "total_tokens": 32614144} +{"current_steps": 10365, "total_steps": 78105, "loss": 0.5201, "lr": 4.98374437532583e-06, "epoch": 0.6635298636450931, "percentage": 13.27, "elapsed_time": "0:27:05", "remaining_time": "2:57:00", "throughput": 20079.03, "total_tokens": 32629120} +{"current_steps": 10370, "total_steps": 78105, "loss": 0.6147, "lr": 4.9836807097674445e-06, "epoch": 0.66384994558607, "percentage": 13.28, "elapsed_time": "0:27:05", "remaining_time": "2:56:59", "throughput": 20080.62, "total_tokens": 32645952} +{"current_steps": 10375, "total_steps": 78105, "loss": 0.6087, "lr": 4.983616920186982e-06, "epoch": 0.6641700275270469, "percentage": 13.28, "elapsed_time": "0:27:06", "remaining_time": "2:56:57", "throughput": 20081.97, "total_tokens": 32661696} +{"current_steps": 10380, "total_steps": 78105, "loss": 0.5026, "lr": 4.983553006587627e-06, "epoch": 0.6644901094680238, "percentage": 13.29, "elapsed_time": "0:27:07", "remaining_time": "2:56:56", "throughput": 20083.39, "total_tokens": 32677760} +{"current_steps": 10385, "total_steps": 78105, "loss": 0.4457, "lr": 4.983488968972572e-06, "epoch": 0.6648101914090007, "percentage": 13.3, "elapsed_time": "0:27:07", "remaining_time": "2:56:54", "throughput": 20084.7, "total_tokens": 32693568} +{"current_steps": 10390, "total_steps": 78105, "loss": 0.4913, "lr": 4.983424807345013e-06, "epoch": 0.6651302733499775, "percentage": 13.3, "elapsed_time": "0:27:08", "remaining_time": "2:56:53", "throughput": 20085.93, "total_tokens": 32708736} +{"current_steps": 10395, "total_steps": 78105, "loss": 0.4128, "lr": 4.983360521708156e-06, "epoch": 0.6654503552909545, "percentage": 13.31, "elapsed_time": "0:27:09", "remaining_time": "2:56:51", "throughput": 20087.46, "total_tokens": 32725440} +{"current_steps": 10400, "total_steps": 78105, "loss": 0.4205, "lr": 4.9832961120652085e-06, "epoch": 0.6657704372319314, "percentage": 13.32, "elapsed_time": "0:27:09", "remaining_time": "2:56:50", "throughput": 20088.66, "total_tokens": 32740608} +{"current_steps": 10405, "total_steps": 78105, "loss": 0.6104, "lr": 4.983231578419389e-06, "epoch": 0.6660905191729083, "percentage": 13.32, "elapsed_time": "0:27:10", "remaining_time": "2:56:48", "throughput": 20089.93, "total_tokens": 32756416} +{"current_steps": 10410, "total_steps": 78105, "loss": 0.3898, "lr": 4.983166920773919e-06, "epoch": 0.6664106011138852, "percentage": 13.33, "elapsed_time": "0:27:11", "remaining_time": "2:56:47", "throughput": 20091.26, "total_tokens": 32771904} +{"current_steps": 10415, "total_steps": 78105, "loss": 0.6124, "lr": 4.983102139132027e-06, "epoch": 0.666730683054862, "percentage": 13.33, "elapsed_time": "0:27:11", "remaining_time": "2:56:45", "throughput": 20092.54, "total_tokens": 32787392} +{"current_steps": 10420, "total_steps": 78105, "loss": 0.4642, "lr": 4.983037233496948e-06, "epoch": 0.6670507649958389, "percentage": 13.34, "elapsed_time": "0:27:12", "remaining_time": "2:56:44", "throughput": 20093.9, "total_tokens": 32803136} +{"current_steps": 10425, "total_steps": 78105, "loss": 0.4817, "lr": 4.982972203871924e-06, "epoch": 0.6673708469368158, "percentage": 13.35, "elapsed_time": "0:27:13", "remaining_time": "2:56:42", "throughput": 20095.09, "total_tokens": 32818624} +{"current_steps": 10430, "total_steps": 78105, "loss": 0.4955, "lr": 4.9829070502602e-06, "epoch": 0.6676909288777927, "percentage": 13.35, "elapsed_time": "0:27:13", "remaining_time": "2:56:41", "throughput": 20096.36, "total_tokens": 32833920} +{"current_steps": 10435, "total_steps": 78105, "loss": 0.4401, "lr": 4.982841772665033e-06, "epoch": 0.6680110108187696, "percentage": 13.36, "elapsed_time": "0:27:14", "remaining_time": "2:56:39", "throughput": 20097.43, "total_tokens": 32848512} +{"current_steps": 10440, "total_steps": 78105, "loss": 0.4935, "lr": 4.9827763710896795e-06, "epoch": 0.6683310927597464, "percentage": 13.37, "elapsed_time": "0:27:15", "remaining_time": "2:56:38", "throughput": 20098.96, "total_tokens": 32865088} +{"current_steps": 10445, "total_steps": 78105, "loss": 0.6524, "lr": 4.982710845537406e-06, "epoch": 0.6686511747007234, "percentage": 13.37, "elapsed_time": "0:27:15", "remaining_time": "2:56:36", "throughput": 20100.58, "total_tokens": 32882048} +{"current_steps": 10450, "total_steps": 78105, "loss": 0.4655, "lr": 4.982645196011485e-06, "epoch": 0.6689712566417003, "percentage": 13.38, "elapsed_time": "0:27:16", "remaining_time": "2:56:35", "throughput": 20101.97, "total_tokens": 32898624} +{"current_steps": 10455, "total_steps": 78105, "loss": 0.6318, "lr": 4.9825794225151956e-06, "epoch": 0.6692913385826772, "percentage": 13.39, "elapsed_time": "0:27:17", "remaining_time": "2:56:34", "throughput": 20103.16, "total_tokens": 32914176} +{"current_steps": 10460, "total_steps": 78105, "loss": 0.4735, "lr": 4.98251352505182e-06, "epoch": 0.6696114205236541, "percentage": 13.39, "elapsed_time": "0:27:17", "remaining_time": "2:56:32", "throughput": 20104.74, "total_tokens": 32931136} +{"current_steps": 10465, "total_steps": 78105, "loss": 0.4761, "lr": 4.98244750362465e-06, "epoch": 0.669931502464631, "percentage": 13.4, "elapsed_time": "0:27:18", "remaining_time": "2:56:31", "throughput": 20106.17, "total_tokens": 32947648} +{"current_steps": 10470, "total_steps": 78105, "loss": 0.4036, "lr": 4.982381358236983e-06, "epoch": 0.6702515844056078, "percentage": 13.41, "elapsed_time": "0:27:19", "remaining_time": "2:56:30", "throughput": 20107.62, "total_tokens": 32964224} +{"current_steps": 10475, "total_steps": 78105, "loss": 0.6034, "lr": 4.98231508889212e-06, "epoch": 0.6705716663465847, "percentage": 13.41, "elapsed_time": "0:27:20", "remaining_time": "2:56:29", "throughput": 20109.45, "total_tokens": 32982080} +{"current_steps": 10480, "total_steps": 78105, "loss": 0.6726, "lr": 4.9822486955933725e-06, "epoch": 0.6708917482875616, "percentage": 13.42, "elapsed_time": "0:27:20", "remaining_time": "2:56:27", "throughput": 20110.63, "total_tokens": 32997440} +{"current_steps": 10485, "total_steps": 78105, "loss": 0.5825, "lr": 4.982182178344054e-06, "epoch": 0.6712118302285385, "percentage": 13.42, "elapsed_time": "0:27:21", "remaining_time": "2:56:26", "throughput": 20111.9, "total_tokens": 33013056} +{"current_steps": 10490, "total_steps": 78105, "loss": 0.5856, "lr": 4.982115537147487e-06, "epoch": 0.6715319121695154, "percentage": 13.43, "elapsed_time": "0:27:22", "remaining_time": "2:56:24", "throughput": 20113.03, "total_tokens": 33028416} +{"current_steps": 10495, "total_steps": 78105, "loss": 0.5513, "lr": 4.982048772006998e-06, "epoch": 0.6718519941104922, "percentage": 13.44, "elapsed_time": "0:27:22", "remaining_time": "2:56:22", "throughput": 20114.02, "total_tokens": 33042880} +{"current_steps": 10500, "total_steps": 78105, "loss": 0.408, "lr": 4.981981882925924e-06, "epoch": 0.6721720760514692, "percentage": 13.44, "elapsed_time": "0:27:23", "remaining_time": "2:56:21", "throughput": 20115.19, "total_tokens": 33058240} +{"current_steps": 10505, "total_steps": 78105, "loss": 0.5072, "lr": 4.981914869907601e-06, "epoch": 0.6724921579924461, "percentage": 13.45, "elapsed_time": "0:27:24", "remaining_time": "2:56:19", "throughput": 20116.46, "total_tokens": 33073920} +{"current_steps": 10510, "total_steps": 78105, "loss": 0.4809, "lr": 4.981847732955378e-06, "epoch": 0.672812239933423, "percentage": 13.46, "elapsed_time": "0:27:24", "remaining_time": "2:56:18", "throughput": 20117.65, "total_tokens": 33089344} +{"current_steps": 10515, "total_steps": 78105, "loss": 0.477, "lr": 4.9817804720726074e-06, "epoch": 0.6731323218743999, "percentage": 13.46, "elapsed_time": "0:27:25", "remaining_time": "2:56:16", "throughput": 20118.74, "total_tokens": 33104384} +{"current_steps": 10520, "total_steps": 78105, "loss": 0.5688, "lr": 4.981713087262646e-06, "epoch": 0.6734524038153767, "percentage": 13.47, "elapsed_time": "0:27:26", "remaining_time": "2:56:15", "throughput": 20119.98, "total_tokens": 33120064} +{"current_steps": 10525, "total_steps": 78105, "loss": 0.5787, "lr": 4.98164557852886e-06, "epoch": 0.6737724857563536, "percentage": 13.48, "elapsed_time": "0:27:26", "remaining_time": "2:56:14", "throughput": 20121.27, "total_tokens": 33136128} +{"current_steps": 10530, "total_steps": 78105, "loss": 0.4445, "lr": 4.981577945874621e-06, "epoch": 0.6740925676973305, "percentage": 13.48, "elapsed_time": "0:27:27", "remaining_time": "2:56:13", "throughput": 20123.19, "total_tokens": 33154816} +{"current_steps": 10535, "total_steps": 78105, "loss": 0.6317, "lr": 4.981510189303305e-06, "epoch": 0.6744126496383074, "percentage": 13.49, "elapsed_time": "0:27:28", "remaining_time": "2:56:11", "throughput": 20124.35, "total_tokens": 33169920} +{"current_steps": 10540, "total_steps": 78105, "loss": 0.7115, "lr": 4.981442308818295e-06, "epoch": 0.6747327315792843, "percentage": 13.49, "elapsed_time": "0:27:28", "remaining_time": "2:56:10", "throughput": 20126.13, "total_tokens": 33187712} +{"current_steps": 10545, "total_steps": 78105, "loss": 0.6101, "lr": 4.981374304422982e-06, "epoch": 0.6750528135202611, "percentage": 13.5, "elapsed_time": "0:27:29", "remaining_time": "2:56:09", "throughput": 20127.24, "total_tokens": 33202880} +{"current_steps": 10550, "total_steps": 78105, "loss": 0.4095, "lr": 4.981306176120761e-06, "epoch": 0.6753728954612381, "percentage": 13.51, "elapsed_time": "0:27:30", "remaining_time": "2:56:07", "throughput": 20128.62, "total_tokens": 33218944} +{"current_steps": 10555, "total_steps": 78105, "loss": 0.5573, "lr": 4.981237923915036e-06, "epoch": 0.675692977402215, "percentage": 13.51, "elapsed_time": "0:27:31", "remaining_time": "2:56:06", "throughput": 20129.85, "total_tokens": 33234688} +{"current_steps": 10560, "total_steps": 78105, "loss": 0.4964, "lr": 4.981169547809212e-06, "epoch": 0.6760130593431919, "percentage": 13.52, "elapsed_time": "0:27:31", "remaining_time": "2:56:04", "throughput": 20131.04, "total_tokens": 33249920} +{"current_steps": 10565, "total_steps": 78105, "loss": 0.4489, "lr": 4.981101047806704e-06, "epoch": 0.6763331412841688, "percentage": 13.53, "elapsed_time": "0:27:32", "remaining_time": "2:56:03", "throughput": 20132.35, "total_tokens": 33265792} +{"current_steps": 10570, "total_steps": 78105, "loss": 0.5544, "lr": 4.981032423910934e-06, "epoch": 0.6766532232251457, "percentage": 13.53, "elapsed_time": "0:27:33", "remaining_time": "2:56:01", "throughput": 20133.56, "total_tokens": 33280832} +{"current_steps": 10575, "total_steps": 78105, "loss": 0.4573, "lr": 4.980963676125328e-06, "epoch": 0.6769733051661225, "percentage": 13.54, "elapsed_time": "0:27:33", "remaining_time": "2:56:00", "throughput": 20134.88, "total_tokens": 33296576} +{"current_steps": 10580, "total_steps": 78105, "loss": 0.4879, "lr": 4.980894804453319e-06, "epoch": 0.6772933871070994, "percentage": 13.55, "elapsed_time": "0:27:34", "remaining_time": "2:55:59", "throughput": 20137.09, "total_tokens": 33316672} +{"current_steps": 10585, "total_steps": 78105, "loss": 0.5271, "lr": 4.980825808898346e-06, "epoch": 0.6776134690480763, "percentage": 13.55, "elapsed_time": "0:27:35", "remaining_time": "2:55:57", "throughput": 20138.12, "total_tokens": 33331456} +{"current_steps": 10590, "total_steps": 78105, "loss": 0.4008, "lr": 4.980756689463854e-06, "epoch": 0.6779335509890532, "percentage": 13.56, "elapsed_time": "0:27:35", "remaining_time": "2:55:56", "throughput": 20139.26, "total_tokens": 33346752} +{"current_steps": 10595, "total_steps": 78105, "loss": 0.5284, "lr": 4.980687446153294e-06, "epoch": 0.67825363293003, "percentage": 13.57, "elapsed_time": "0:27:36", "remaining_time": "2:55:54", "throughput": 20140.49, "total_tokens": 33362688} +{"current_steps": 10600, "total_steps": 78105, "loss": 0.4703, "lr": 4.980618078970125e-06, "epoch": 0.6785737148710069, "percentage": 13.57, "elapsed_time": "0:27:37", "remaining_time": "2:55:53", "throughput": 20141.51, "total_tokens": 33377600} +{"current_steps": 10605, "total_steps": 78105, "loss": 0.6389, "lr": 4.98054858791781e-06, "epoch": 0.6788937968119839, "percentage": 13.58, "elapsed_time": "0:27:37", "remaining_time": "2:55:51", "throughput": 20142.69, "total_tokens": 33393280} +{"current_steps": 10610, "total_steps": 78105, "loss": 0.5023, "lr": 4.98047897299982e-06, "epoch": 0.6792138787529608, "percentage": 13.58, "elapsed_time": "0:27:38", "remaining_time": "2:55:50", "throughput": 20144.25, "total_tokens": 33410112} +{"current_steps": 10615, "total_steps": 78105, "loss": 0.494, "lr": 4.98040923421963e-06, "epoch": 0.6795339606939377, "percentage": 13.59, "elapsed_time": "0:27:39", "remaining_time": "2:55:49", "throughput": 20145.75, "total_tokens": 33426624} +{"current_steps": 10620, "total_steps": 78105, "loss": 0.5107, "lr": 4.980339371580723e-06, "epoch": 0.6798540426349146, "percentage": 13.6, "elapsed_time": "0:27:39", "remaining_time": "2:55:47", "throughput": 20147.03, "total_tokens": 33442240} +{"current_steps": 10625, "total_steps": 78105, "loss": 0.6039, "lr": 4.980269385086587e-06, "epoch": 0.6801741245758914, "percentage": 13.6, "elapsed_time": "0:27:40", "remaining_time": "2:55:46", "throughput": 20148.36, "total_tokens": 33458432} +{"current_steps": 10630, "total_steps": 78105, "loss": 0.6173, "lr": 4.9801992747407175e-06, "epoch": 0.6804942065168683, "percentage": 13.61, "elapsed_time": "0:27:41", "remaining_time": "2:55:45", "throughput": 20149.58, "total_tokens": 33473856} +{"current_steps": 10635, "total_steps": 78105, "loss": 0.5634, "lr": 4.980129040546615e-06, "epoch": 0.6808142884578452, "percentage": 13.62, "elapsed_time": "0:27:41", "remaining_time": "2:55:43", "throughput": 20150.73, "total_tokens": 33489216} +{"current_steps": 10640, "total_steps": 78105, "loss": 0.627, "lr": 4.980058682507787e-06, "epoch": 0.6811343703988221, "percentage": 13.62, "elapsed_time": "0:27:42", "remaining_time": "2:55:42", "throughput": 20152.07, "total_tokens": 33505280} +{"current_steps": 10645, "total_steps": 78105, "loss": 0.4155, "lr": 4.9799882006277464e-06, "epoch": 0.681454452339799, "percentage": 13.63, "elapsed_time": "0:27:43", "remaining_time": "2:55:40", "throughput": 20153.2, "total_tokens": 33520512} +{"current_steps": 10650, "total_steps": 78105, "loss": 0.5237, "lr": 4.979917594910012e-06, "epoch": 0.6817745342807758, "percentage": 13.64, "elapsed_time": "0:27:43", "remaining_time": "2:55:39", "throughput": 20154.68, "total_tokens": 33537344} +{"current_steps": 10655, "total_steps": 78105, "loss": 0.4356, "lr": 4.979846865358111e-06, "epoch": 0.6820946162217527, "percentage": 13.64, "elapsed_time": "0:27:44", "remaining_time": "2:55:37", "throughput": 20155.7, "total_tokens": 33552128} +{"current_steps": 10660, "total_steps": 78105, "loss": 0.425, "lr": 4.979776011975575e-06, "epoch": 0.6824146981627297, "percentage": 13.65, "elapsed_time": "0:27:45", "remaining_time": "2:55:36", "throughput": 20156.76, "total_tokens": 33566784} +{"current_steps": 10665, "total_steps": 78105, "loss": 0.4357, "lr": 4.979705034765942e-06, "epoch": 0.6827347801037066, "percentage": 13.65, "elapsed_time": "0:27:45", "remaining_time": "2:55:34", "throughput": 20157.97, "total_tokens": 33582080} +{"current_steps": 10670, "total_steps": 78105, "loss": 0.5631, "lr": 4.9796339337327545e-06, "epoch": 0.6830548620446835, "percentage": 13.66, "elapsed_time": "0:27:46", "remaining_time": "2:55:32", "throughput": 20159.0, "total_tokens": 33596864} +{"current_steps": 10675, "total_steps": 78105, "loss": 0.4965, "lr": 4.979562708879565e-06, "epoch": 0.6833749439856603, "percentage": 13.67, "elapsed_time": "0:27:47", "remaining_time": "2:55:31", "throughput": 20160.31, "total_tokens": 33613056} +{"current_steps": 10680, "total_steps": 78105, "loss": 0.582, "lr": 4.979491360209929e-06, "epoch": 0.6836950259266372, "percentage": 13.67, "elapsed_time": "0:27:47", "remaining_time": "2:55:30", "throughput": 20161.4, "total_tokens": 33628160} +{"current_steps": 10685, "total_steps": 78105, "loss": 0.4595, "lr": 4.97941988772741e-06, "epoch": 0.6840151078676141, "percentage": 13.68, "elapsed_time": "0:27:48", "remaining_time": "2:55:28", "throughput": 20163.01, "total_tokens": 33645184} +{"current_steps": 10690, "total_steps": 78105, "loss": 0.5501, "lr": 4.979348291435577e-06, "epoch": 0.684335189808591, "percentage": 13.69, "elapsed_time": "0:27:49", "remaining_time": "2:55:27", "throughput": 20164.17, "total_tokens": 33660800} +{"current_steps": 10695, "total_steps": 78105, "loss": 0.4411, "lr": 4.979276571338004e-06, "epoch": 0.6846552717495679, "percentage": 13.69, "elapsed_time": "0:27:50", "remaining_time": "2:55:26", "throughput": 20165.5, "total_tokens": 33676736} +{"current_steps": 10700, "total_steps": 78105, "loss": 0.6194, "lr": 4.979204727438274e-06, "epoch": 0.6849753536905447, "percentage": 13.7, "elapsed_time": "0:27:50", "remaining_time": "2:55:24", "throughput": 20166.79, "total_tokens": 33692928} +{"current_steps": 10705, "total_steps": 78105, "loss": 0.5011, "lr": 4.979132759739973e-06, "epoch": 0.6852954356315216, "percentage": 13.71, "elapsed_time": "0:27:51", "remaining_time": "2:55:23", "throughput": 20167.98, "total_tokens": 33708352} +{"current_steps": 10710, "total_steps": 78105, "loss": 0.5775, "lr": 4.979060668246696e-06, "epoch": 0.6856155175724986, "percentage": 13.71, "elapsed_time": "0:27:52", "remaining_time": "2:55:21", "throughput": 20169.21, "total_tokens": 33723968} +{"current_steps": 10715, "total_steps": 78105, "loss": 0.5769, "lr": 4.9789884529620415e-06, "epoch": 0.6859355995134755, "percentage": 13.72, "elapsed_time": "0:27:52", "remaining_time": "2:55:20", "throughput": 20170.34, "total_tokens": 33739200} +{"current_steps": 10720, "total_steps": 78105, "loss": 0.5149, "lr": 4.978916113889616e-06, "epoch": 0.6862556814544524, "percentage": 13.73, "elapsed_time": "0:27:53", "remaining_time": "2:55:18", "throughput": 20171.57, "total_tokens": 33754944} +{"current_steps": 10725, "total_steps": 78105, "loss": 0.4469, "lr": 4.978843651033032e-06, "epoch": 0.6865757633954293, "percentage": 13.73, "elapsed_time": "0:27:54", "remaining_time": "2:55:17", "throughput": 20172.54, "total_tokens": 33769472} +{"current_steps": 10730, "total_steps": 78105, "loss": 0.55, "lr": 4.978771064395909e-06, "epoch": 0.6868958453364061, "percentage": 13.74, "elapsed_time": "0:27:54", "remaining_time": "2:55:15", "throughput": 20173.69, "total_tokens": 33784896} +{"current_steps": 10735, "total_steps": 78105, "loss": 0.4415, "lr": 4.978698353981869e-06, "epoch": 0.687215927277383, "percentage": 13.74, "elapsed_time": "0:27:55", "remaining_time": "2:55:14", "throughput": 20174.79, "total_tokens": 33800320} +{"current_steps": 10740, "total_steps": 78105, "loss": 0.5544, "lr": 4.978625519794546e-06, "epoch": 0.6875360092183599, "percentage": 13.75, "elapsed_time": "0:27:56", "remaining_time": "2:55:12", "throughput": 20176.25, "total_tokens": 33816896} +{"current_steps": 10745, "total_steps": 78105, "loss": 0.6134, "lr": 4.9785525618375735e-06, "epoch": 0.6878560911593368, "percentage": 13.76, "elapsed_time": "0:27:56", "remaining_time": "2:55:11", "throughput": 20177.64, "total_tokens": 33833344} +{"current_steps": 10750, "total_steps": 78105, "loss": 0.5297, "lr": 4.978479480114598e-06, "epoch": 0.6881761731003136, "percentage": 13.76, "elapsed_time": "0:27:57", "remaining_time": "2:55:10", "throughput": 20179.05, "total_tokens": 33849856} +{"current_steps": 10755, "total_steps": 78105, "loss": 0.5039, "lr": 4.978406274629265e-06, "epoch": 0.6884962550412905, "percentage": 13.77, "elapsed_time": "0:27:58", "remaining_time": "2:55:08", "throughput": 20180.24, "total_tokens": 33865344} +{"current_steps": 10760, "total_steps": 78105, "loss": 0.5573, "lr": 4.978332945385234e-06, "epoch": 0.6888163369822674, "percentage": 13.78, "elapsed_time": "0:27:58", "remaining_time": "2:55:07", "throughput": 20181.74, "total_tokens": 33881792} +{"current_steps": 10765, "total_steps": 78105, "loss": 0.6204, "lr": 4.978259492386164e-06, "epoch": 0.6891364189232444, "percentage": 13.78, "elapsed_time": "0:27:59", "remaining_time": "2:55:06", "throughput": 20183.19, "total_tokens": 33898368} +{"current_steps": 10770, "total_steps": 78105, "loss": 0.5, "lr": 4.978185915635725e-06, "epoch": 0.6894565008642213, "percentage": 13.79, "elapsed_time": "0:28:00", "remaining_time": "2:55:04", "throughput": 20184.47, "total_tokens": 33914368} +{"current_steps": 10775, "total_steps": 78105, "loss": 0.4288, "lr": 4.978112215137588e-06, "epoch": 0.6897765828051982, "percentage": 13.8, "elapsed_time": "0:28:00", "remaining_time": "2:55:03", "throughput": 20185.61, "total_tokens": 33929728} +{"current_steps": 10780, "total_steps": 78105, "loss": 0.4813, "lr": 4.978038390895437e-06, "epoch": 0.690096664746175, "percentage": 13.8, "elapsed_time": "0:28:01", "remaining_time": "2:55:01", "throughput": 20186.63, "total_tokens": 33944704} +{"current_steps": 10785, "total_steps": 78105, "loss": 0.7686, "lr": 4.977964442912955e-06, "epoch": 0.6904167466871519, "percentage": 13.81, "elapsed_time": "0:28:02", "remaining_time": "2:55:00", "throughput": 20187.81, "total_tokens": 33960128} +{"current_steps": 10790, "total_steps": 78105, "loss": 0.6656, "lr": 4.977890371193836e-06, "epoch": 0.6907368286281288, "percentage": 13.81, "elapsed_time": "0:28:02", "remaining_time": "2:54:58", "throughput": 20188.79, "total_tokens": 33975104} +{"current_steps": 10795, "total_steps": 78105, "loss": 0.5917, "lr": 4.977816175741779e-06, "epoch": 0.6910569105691057, "percentage": 13.82, "elapsed_time": "0:28:03", "remaining_time": "2:54:57", "throughput": 20190.01, "total_tokens": 33990720} +{"current_steps": 10800, "total_steps": 78105, "loss": 0.5768, "lr": 4.9777418565604885e-06, "epoch": 0.6913769925100826, "percentage": 13.83, "elapsed_time": "0:28:04", "remaining_time": "2:54:55", "throughput": 20191.14, "total_tokens": 34006272} +{"current_steps": 10805, "total_steps": 78105, "loss": 0.4049, "lr": 4.977667413653676e-06, "epoch": 0.6916970744510594, "percentage": 13.83, "elapsed_time": "0:28:04", "remaining_time": "2:54:54", "throughput": 20192.46, "total_tokens": 34022400} +{"current_steps": 10810, "total_steps": 78105, "loss": 0.4865, "lr": 4.977592847025058e-06, "epoch": 0.6920171563920363, "percentage": 13.84, "elapsed_time": "0:28:05", "remaining_time": "2:54:53", "throughput": 20193.82, "total_tokens": 34038720} +{"current_steps": 10815, "total_steps": 78105, "loss": 0.5691, "lr": 4.9775181566783595e-06, "epoch": 0.6923372383330133, "percentage": 13.85, "elapsed_time": "0:28:06", "remaining_time": "2:54:51", "throughput": 20194.94, "total_tokens": 34053760} +{"current_steps": 10820, "total_steps": 78105, "loss": 0.396, "lr": 4.977443342617308e-06, "epoch": 0.6926573202739902, "percentage": 13.85, "elapsed_time": "0:28:06", "remaining_time": "2:54:50", "throughput": 20196.19, "total_tokens": 34069568} +{"current_steps": 10825, "total_steps": 78105, "loss": 0.4147, "lr": 4.977368404845641e-06, "epoch": 0.6929774022149671, "percentage": 13.86, "elapsed_time": "0:28:07", "remaining_time": "2:54:49", "throughput": 20197.76, "total_tokens": 34086912} +{"current_steps": 10830, "total_steps": 78105, "loss": 0.4735, "lr": 4.9772933433671006e-06, "epoch": 0.6932974841559439, "percentage": 13.87, "elapsed_time": "0:28:08", "remaining_time": "2:54:47", "throughput": 20198.79, "total_tokens": 34101696} +{"current_steps": 10835, "total_steps": 78105, "loss": 0.4479, "lr": 4.977218158185434e-06, "epoch": 0.6936175660969208, "percentage": 13.87, "elapsed_time": "0:28:08", "remaining_time": "2:54:45", "throughput": 20199.65, "total_tokens": 34115904} +{"current_steps": 10840, "total_steps": 78105, "loss": 0.6332, "lr": 4.977142849304396e-06, "epoch": 0.6939376480378977, "percentage": 13.88, "elapsed_time": "0:28:09", "remaining_time": "2:54:44", "throughput": 20200.83, "total_tokens": 34131328} +{"current_steps": 10845, "total_steps": 78105, "loss": 0.5561, "lr": 4.977067416727747e-06, "epoch": 0.6942577299788746, "percentage": 13.89, "elapsed_time": "0:28:10", "remaining_time": "2:54:43", "throughput": 20202.34, "total_tokens": 34148544} +{"current_steps": 10850, "total_steps": 78105, "loss": 0.5371, "lr": 4.976991860459254e-06, "epoch": 0.6945778119198515, "percentage": 13.89, "elapsed_time": "0:28:10", "remaining_time": "2:54:41", "throughput": 20203.43, "total_tokens": 34163520} +{"current_steps": 10855, "total_steps": 78105, "loss": 0.4885, "lr": 4.976916180502689e-06, "epoch": 0.6948978938608283, "percentage": 13.9, "elapsed_time": "0:28:11", "remaining_time": "2:54:40", "throughput": 20204.5, "total_tokens": 34178368} +{"current_steps": 10860, "total_steps": 78105, "loss": 0.6393, "lr": 4.976840376861832e-06, "epoch": 0.6952179758018052, "percentage": 13.9, "elapsed_time": "0:28:12", "remaining_time": "2:54:38", "throughput": 20205.69, "total_tokens": 34193920} +{"current_steps": 10865, "total_steps": 78105, "loss": 0.6128, "lr": 4.976764449540468e-06, "epoch": 0.6955380577427821, "percentage": 13.91, "elapsed_time": "0:28:12", "remaining_time": "2:54:37", "throughput": 20207.04, "total_tokens": 34210176} +{"current_steps": 10870, "total_steps": 78105, "loss": 0.5699, "lr": 4.976688398542389e-06, "epoch": 0.6958581396837591, "percentage": 13.92, "elapsed_time": "0:28:13", "remaining_time": "2:54:36", "throughput": 20208.6, "total_tokens": 34227328} +{"current_steps": 10875, "total_steps": 78105, "loss": 0.4597, "lr": 4.976612223871391e-06, "epoch": 0.696178221624736, "percentage": 13.92, "elapsed_time": "0:28:14", "remaining_time": "2:54:34", "throughput": 20209.46, "total_tokens": 34241984} +{"current_steps": 10880, "total_steps": 78105, "loss": 0.4676, "lr": 4.976535925531279e-06, "epoch": 0.6964983035657129, "percentage": 13.93, "elapsed_time": "0:28:15", "remaining_time": "2:54:33", "throughput": 20210.4, "total_tokens": 34256896} +{"current_steps": 10885, "total_steps": 78105, "loss": 0.5493, "lr": 4.9764595035258625e-06, "epoch": 0.6968183855066897, "percentage": 13.94, "elapsed_time": "0:28:15", "remaining_time": "2:54:31", "throughput": 20211.71, "total_tokens": 34273792} +{"current_steps": 10890, "total_steps": 78105, "loss": 0.5085, "lr": 4.9763829578589586e-06, "epoch": 0.6971384674476666, "percentage": 13.94, "elapsed_time": "0:28:16", "remaining_time": "2:54:30", "throughput": 20213.07, "total_tokens": 34290752} +{"current_steps": 10895, "total_steps": 78105, "loss": 0.5571, "lr": 4.976306288534388e-06, "epoch": 0.6974585493886435, "percentage": 13.95, "elapsed_time": "0:28:17", "remaining_time": "2:54:29", "throughput": 20214.67, "total_tokens": 34308416} +{"current_steps": 10900, "total_steps": 78105, "loss": 0.7076, "lr": 4.976229495555981e-06, "epoch": 0.6977786313296204, "percentage": 13.96, "elapsed_time": "0:28:17", "remaining_time": "2:54:28", "throughput": 20215.75, "total_tokens": 34323776} +{"current_steps": 10905, "total_steps": 78105, "loss": 0.4328, "lr": 4.97615257892757e-06, "epoch": 0.6980987132705972, "percentage": 13.96, "elapsed_time": "0:28:18", "remaining_time": "2:54:26", "throughput": 20216.78, "total_tokens": 34338944} +{"current_steps": 10910, "total_steps": 78105, "loss": 0.4736, "lr": 4.976075538652998e-06, "epoch": 0.6984187952115741, "percentage": 13.97, "elapsed_time": "0:28:19", "remaining_time": "2:54:25", "throughput": 20217.9, "total_tokens": 34354432} +{"current_steps": 10915, "total_steps": 78105, "loss": 0.4492, "lr": 4.97599837473611e-06, "epoch": 0.698738877152551, "percentage": 13.97, "elapsed_time": "0:28:19", "remaining_time": "2:54:24", "throughput": 20219.04, "total_tokens": 34370432} +{"current_steps": 10920, "total_steps": 78105, "loss": 0.5545, "lr": 4.975921087180761e-06, "epoch": 0.699058959093528, "percentage": 13.98, "elapsed_time": "0:28:20", "remaining_time": "2:54:22", "throughput": 20220.21, "total_tokens": 34386496} +{"current_steps": 10925, "total_steps": 78105, "loss": 0.5357, "lr": 4.975843675990809e-06, "epoch": 0.6993790410345049, "percentage": 13.99, "elapsed_time": "0:28:21", "remaining_time": "2:54:21", "throughput": 20221.52, "total_tokens": 34402560} +{"current_steps": 10930, "total_steps": 78105, "loss": 0.4802, "lr": 4.97576614117012e-06, "epoch": 0.6996991229754818, "percentage": 13.99, "elapsed_time": "0:28:21", "remaining_time": "2:54:20", "throughput": 20222.74, "total_tokens": 34418496} +{"current_steps": 10935, "total_steps": 78105, "loss": 0.494, "lr": 4.9756884827225665e-06, "epoch": 0.7000192049164586, "percentage": 14.0, "elapsed_time": "0:28:22", "remaining_time": "2:54:18", "throughput": 20223.96, "total_tokens": 34434752} +{"current_steps": 10940, "total_steps": 78105, "loss": 0.5727, "lr": 4.9756107006520245e-06, "epoch": 0.7003392868574355, "percentage": 14.01, "elapsed_time": "0:28:23", "remaining_time": "2:54:17", "throughput": 20225.08, "total_tokens": 34450176} +{"current_steps": 10945, "total_steps": 78105, "loss": 0.5875, "lr": 4.9755327949623796e-06, "epoch": 0.7006593687984124, "percentage": 14.01, "elapsed_time": "0:28:24", "remaining_time": "2:54:15", "throughput": 20226.27, "total_tokens": 34465600} +{"current_steps": 10950, "total_steps": 78105, "loss": 0.4471, "lr": 4.975454765657522e-06, "epoch": 0.7009794507393893, "percentage": 14.02, "elapsed_time": "0:28:24", "remaining_time": "2:54:14", "throughput": 20227.46, "total_tokens": 34481600} +{"current_steps": 10955, "total_steps": 78105, "loss": 0.4347, "lr": 4.975376612741347e-06, "epoch": 0.7012995326803662, "percentage": 14.03, "elapsed_time": "0:28:25", "remaining_time": "2:54:13", "throughput": 20229.02, "total_tokens": 34498752} +{"current_steps": 10960, "total_steps": 78105, "loss": 0.5944, "lr": 4.975298336217757e-06, "epoch": 0.701619614621343, "percentage": 14.03, "elapsed_time": "0:28:26", "remaining_time": "2:54:12", "throughput": 20230.2, "total_tokens": 34514240} +{"current_steps": 10965, "total_steps": 78105, "loss": 0.5639, "lr": 4.975219936090664e-06, "epoch": 0.7019396965623199, "percentage": 14.04, "elapsed_time": "0:28:26", "remaining_time": "2:54:10", "throughput": 20231.11, "total_tokens": 34528704} +{"current_steps": 10970, "total_steps": 78105, "loss": 0.5451, "lr": 4.9751414123639776e-06, "epoch": 0.7022597785032968, "percentage": 14.05, "elapsed_time": "0:28:27", "remaining_time": "2:54:09", "throughput": 20232.38, "total_tokens": 34544704} +{"current_steps": 10975, "total_steps": 78105, "loss": 0.5939, "lr": 4.975062765041623e-06, "epoch": 0.7025798604442738, "percentage": 14.05, "elapsed_time": "0:28:28", "remaining_time": "2:54:07", "throughput": 20233.39, "total_tokens": 34560000} +{"current_steps": 10980, "total_steps": 78105, "loss": 0.4441, "lr": 4.974983994127526e-06, "epoch": 0.7028999423852507, "percentage": 14.06, "elapsed_time": "0:28:28", "remaining_time": "2:54:06", "throughput": 20234.52, "total_tokens": 34575296} +{"current_steps": 10985, "total_steps": 78105, "loss": 0.493, "lr": 4.974905099625619e-06, "epoch": 0.7032200243262275, "percentage": 14.06, "elapsed_time": "0:28:29", "remaining_time": "2:54:04", "throughput": 20235.54, "total_tokens": 34590528} +{"current_steps": 10990, "total_steps": 78105, "loss": 0.5594, "lr": 4.974826081539843e-06, "epoch": 0.7035401062672044, "percentage": 14.07, "elapsed_time": "0:28:30", "remaining_time": "2:54:03", "throughput": 20236.53, "total_tokens": 34605312} +{"current_steps": 10995, "total_steps": 78105, "loss": 0.5058, "lr": 4.974746939874144e-06, "epoch": 0.7038601882081813, "percentage": 14.08, "elapsed_time": "0:28:30", "remaining_time": "2:54:01", "throughput": 20237.65, "total_tokens": 34620736} +{"current_steps": 11000, "total_steps": 78105, "loss": 0.5211, "lr": 4.974667674632474e-06, "epoch": 0.7041802701491582, "percentage": 14.08, "elapsed_time": "0:28:31", "remaining_time": "2:54:00", "throughput": 20238.88, "total_tokens": 34636736} +{"current_steps": 11005, "total_steps": 78105, "loss": 0.5566, "lr": 4.974588285818789e-06, "epoch": 0.7045003520901351, "percentage": 14.09, "elapsed_time": "0:28:32", "remaining_time": "2:53:58", "throughput": 20239.91, "total_tokens": 34651776} +{"current_steps": 11010, "total_steps": 78105, "loss": 0.4289, "lr": 4.9745087734370545e-06, "epoch": 0.7048204340311119, "percentage": 14.1, "elapsed_time": "0:28:32", "remaining_time": "2:53:57", "throughput": 20240.8, "total_tokens": 34666176} +{"current_steps": 11015, "total_steps": 78105, "loss": 0.3934, "lr": 4.974429137491242e-06, "epoch": 0.7051405159720888, "percentage": 14.1, "elapsed_time": "0:28:33", "remaining_time": "2:53:55", "throughput": 20242.09, "total_tokens": 34682112} +{"current_steps": 11020, "total_steps": 78105, "loss": 0.5075, "lr": 4.974349377985327e-06, "epoch": 0.7054605979130657, "percentage": 14.11, "elapsed_time": "0:28:34", "remaining_time": "2:53:54", "throughput": 20243.09, "total_tokens": 34697024} +{"current_steps": 11025, "total_steps": 78105, "loss": 0.5785, "lr": 4.974269494923293e-06, "epoch": 0.7057806798540426, "percentage": 14.12, "elapsed_time": "0:28:34", "remaining_time": "2:53:52", "throughput": 20244.23, "total_tokens": 34712704} +{"current_steps": 11030, "total_steps": 78105, "loss": 0.3854, "lr": 4.974189488309128e-06, "epoch": 0.7061007617950196, "percentage": 14.12, "elapsed_time": "0:28:35", "remaining_time": "2:53:51", "throughput": 20245.33, "total_tokens": 34728256} +{"current_steps": 11035, "total_steps": 78105, "loss": 0.4843, "lr": 4.974109358146827e-06, "epoch": 0.7064208437359965, "percentage": 14.13, "elapsed_time": "0:28:36", "remaining_time": "2:53:50", "throughput": 20246.61, "total_tokens": 34744512} +{"current_steps": 11040, "total_steps": 78105, "loss": 0.5917, "lr": 4.974029104440392e-06, "epoch": 0.7067409256769733, "percentage": 14.13, "elapsed_time": "0:28:36", "remaining_time": "2:53:48", "throughput": 20247.5, "total_tokens": 34758912} +{"current_steps": 11045, "total_steps": 78105, "loss": 0.4637, "lr": 4.97394872719383e-06, "epoch": 0.7070610076179502, "percentage": 14.14, "elapsed_time": "0:28:37", "remaining_time": "2:53:47", "throughput": 20248.78, "total_tokens": 34774848} +{"current_steps": 11050, "total_steps": 78105, "loss": 0.5963, "lr": 4.973868226411156e-06, "epoch": 0.7073810895589271, "percentage": 14.15, "elapsed_time": "0:28:38", "remaining_time": "2:53:45", "throughput": 20249.87, "total_tokens": 34790016} +{"current_steps": 11055, "total_steps": 78105, "loss": 0.5213, "lr": 4.9737876020963875e-06, "epoch": 0.707701171499904, "percentage": 14.15, "elapsed_time": "0:28:38", "remaining_time": "2:53:44", "throughput": 20250.9, "total_tokens": 34804992} +{"current_steps": 11060, "total_steps": 78105, "loss": 0.3757, "lr": 4.9737068542535525e-06, "epoch": 0.7080212534408808, "percentage": 14.16, "elapsed_time": "0:28:39", "remaining_time": "2:53:42", "throughput": 20252.0, "total_tokens": 34820288} +{"current_steps": 11065, "total_steps": 78105, "loss": 0.4366, "lr": 4.9736259828866814e-06, "epoch": 0.7083413353818577, "percentage": 14.17, "elapsed_time": "0:28:40", "remaining_time": "2:53:41", "throughput": 20253.19, "total_tokens": 34836096} +{"current_steps": 11070, "total_steps": 78105, "loss": 0.6283, "lr": 4.9735449879998145e-06, "epoch": 0.7086614173228346, "percentage": 14.17, "elapsed_time": "0:28:40", "remaining_time": "2:53:39", "throughput": 20254.32, "total_tokens": 34851584} +{"current_steps": 11075, "total_steps": 78105, "loss": 0.4686, "lr": 4.973463869596994e-06, "epoch": 0.7089814992638115, "percentage": 14.18, "elapsed_time": "0:28:41", "remaining_time": "2:53:38", "throughput": 20255.66, "total_tokens": 34868032} +{"current_steps": 11080, "total_steps": 78105, "loss": 0.5039, "lr": 4.973382627682273e-06, "epoch": 0.7093015812047885, "percentage": 14.19, "elapsed_time": "0:28:42", "remaining_time": "2:53:37", "throughput": 20256.87, "total_tokens": 34884032} +{"current_steps": 11085, "total_steps": 78105, "loss": 0.4791, "lr": 4.973301262259705e-06, "epoch": 0.7096216631457654, "percentage": 14.19, "elapsed_time": "0:28:42", "remaining_time": "2:53:35", "throughput": 20257.92, "total_tokens": 34899456} +{"current_steps": 11090, "total_steps": 78105, "loss": 0.4094, "lr": 4.973219773333356e-06, "epoch": 0.7099417450867422, "percentage": 14.2, "elapsed_time": "0:28:43", "remaining_time": "2:53:34", "throughput": 20259.18, "total_tokens": 34915456} +{"current_steps": 11095, "total_steps": 78105, "loss": 0.4623, "lr": 4.9731381609072945e-06, "epoch": 0.7102618270277191, "percentage": 14.21, "elapsed_time": "0:28:44", "remaining_time": "2:53:33", "throughput": 20260.36, "total_tokens": 34931328} +{"current_steps": 11100, "total_steps": 78105, "loss": 0.5094, "lr": 4.973056424985595e-06, "epoch": 0.710581908968696, "percentage": 14.21, "elapsed_time": "0:28:44", "remaining_time": "2:53:31", "throughput": 20261.4, "total_tokens": 34946816} +{"current_steps": 11105, "total_steps": 78105, "loss": 0.4377, "lr": 4.97297456557234e-06, "epoch": 0.7109019909096729, "percentage": 14.22, "elapsed_time": "0:28:45", "remaining_time": "2:53:30", "throughput": 20263.01, "total_tokens": 34964544} +{"current_steps": 11110, "total_steps": 78105, "loss": 0.4891, "lr": 4.972892582671615e-06, "epoch": 0.7112220728506498, "percentage": 14.22, "elapsed_time": "0:28:46", "remaining_time": "2:53:29", "throughput": 20264.6, "total_tokens": 34982208} +{"current_steps": 11115, "total_steps": 78105, "loss": 0.6064, "lr": 4.972810476287516e-06, "epoch": 0.7115421547916266, "percentage": 14.23, "elapsed_time": "0:28:47", "remaining_time": "2:53:28", "throughput": 20266.36, "total_tokens": 35000576} +{"current_steps": 11120, "total_steps": 78105, "loss": 0.3861, "lr": 4.9727282464241425e-06, "epoch": 0.7118622367326035, "percentage": 14.24, "elapsed_time": "0:28:47", "remaining_time": "2:53:27", "throughput": 20267.21, "total_tokens": 35014912} +{"current_steps": 11125, "total_steps": 78105, "loss": 0.4301, "lr": 4.9726458930855994e-06, "epoch": 0.7121823186735804, "percentage": 14.24, "elapsed_time": "0:28:48", "remaining_time": "2:53:25", "throughput": 20268.14, "total_tokens": 35029632} +{"current_steps": 11130, "total_steps": 78105, "loss": 0.487, "lr": 4.9725634162760005e-06, "epoch": 0.7125024006145573, "percentage": 14.25, "elapsed_time": "0:28:48", "remaining_time": "2:53:24", "throughput": 20269.21, "total_tokens": 35044992} +{"current_steps": 11135, "total_steps": 78105, "loss": 0.6388, "lr": 4.972480815999464e-06, "epoch": 0.7128224825555343, "percentage": 14.26, "elapsed_time": "0:28:49", "remaining_time": "2:53:22", "throughput": 20270.28, "total_tokens": 35060544} +{"current_steps": 11140, "total_steps": 78105, "loss": 0.5645, "lr": 4.972398092260115e-06, "epoch": 0.7131425644965111, "percentage": 14.26, "elapsed_time": "0:28:50", "remaining_time": "2:53:21", "throughput": 20271.26, "total_tokens": 35075648} +{"current_steps": 11145, "total_steps": 78105, "loss": 0.4202, "lr": 4.9723152450620815e-06, "epoch": 0.713462646437488, "percentage": 14.27, "elapsed_time": "0:28:50", "remaining_time": "2:53:19", "throughput": 20272.3, "total_tokens": 35090880} +{"current_steps": 11150, "total_steps": 78105, "loss": 0.6373, "lr": 4.9722322744095036e-06, "epoch": 0.7137827283784649, "percentage": 14.28, "elapsed_time": "0:28:51", "remaining_time": "2:53:18", "throughput": 20273.4, "total_tokens": 35106432} +{"current_steps": 11155, "total_steps": 78105, "loss": 0.5008, "lr": 4.972149180306524e-06, "epoch": 0.7141028103194418, "percentage": 14.28, "elapsed_time": "0:28:52", "remaining_time": "2:53:16", "throughput": 20274.48, "total_tokens": 35121664} +{"current_steps": 11160, "total_steps": 78105, "loss": 0.6063, "lr": 4.97206596275729e-06, "epoch": 0.7144228922604187, "percentage": 14.29, "elapsed_time": "0:28:52", "remaining_time": "2:53:15", "throughput": 20275.54, "total_tokens": 35137152} +{"current_steps": 11165, "total_steps": 78105, "loss": 0.5116, "lr": 4.971982621765959e-06, "epoch": 0.7147429742013955, "percentage": 14.29, "elapsed_time": "0:28:53", "remaining_time": "2:53:14", "throughput": 20276.73, "total_tokens": 35153088} +{"current_steps": 11170, "total_steps": 78105, "loss": 0.4851, "lr": 4.971899157336691e-06, "epoch": 0.7150630561423724, "percentage": 14.3, "elapsed_time": "0:28:54", "remaining_time": "2:53:12", "throughput": 20277.72, "total_tokens": 35168192} +{"current_steps": 11175, "total_steps": 78105, "loss": 0.5449, "lr": 4.971815569473656e-06, "epoch": 0.7153831380833493, "percentage": 14.31, "elapsed_time": "0:28:55", "remaining_time": "2:53:11", "throughput": 20278.91, "total_tokens": 35184192} +{"current_steps": 11180, "total_steps": 78105, "loss": 0.5011, "lr": 4.971731858181026e-06, "epoch": 0.7157032200243262, "percentage": 14.31, "elapsed_time": "0:28:55", "remaining_time": "2:53:10", "throughput": 20280.14, "total_tokens": 35200256} +{"current_steps": 11185, "total_steps": 78105, "loss": 0.4837, "lr": 4.9716480234629825e-06, "epoch": 0.7160233019653032, "percentage": 14.32, "elapsed_time": "0:28:56", "remaining_time": "2:53:08", "throughput": 20281.04, "total_tokens": 35214720} +{"current_steps": 11190, "total_steps": 78105, "loss": 0.4837, "lr": 4.97156406532371e-06, "epoch": 0.71634338390628, "percentage": 14.33, "elapsed_time": "0:28:57", "remaining_time": "2:53:07", "throughput": 20282.18, "total_tokens": 35230720} +{"current_steps": 11195, "total_steps": 78105, "loss": 0.4826, "lr": 4.971479983767403e-06, "epoch": 0.7166634658472569, "percentage": 14.33, "elapsed_time": "0:28:57", "remaining_time": "2:53:05", "throughput": 20283.3, "total_tokens": 35246400} +{"current_steps": 11200, "total_steps": 78105, "loss": 0.4933, "lr": 4.971395778798258e-06, "epoch": 0.7169835477882338, "percentage": 14.34, "elapsed_time": "0:28:58", "remaining_time": "2:53:04", "throughput": 20284.58, "total_tokens": 35262784} +{"current_steps": 11205, "total_steps": 78105, "loss": 0.4372, "lr": 4.9713114504204815e-06, "epoch": 0.7173036297292107, "percentage": 14.35, "elapsed_time": "0:28:59", "remaining_time": "2:53:03", "throughput": 20285.58, "total_tokens": 35277632} +{"current_steps": 11210, "total_steps": 78105, "loss": 0.4651, "lr": 4.971226998638284e-06, "epoch": 0.7176237116701876, "percentage": 14.35, "elapsed_time": "0:28:59", "remaining_time": "2:53:01", "throughput": 20286.55, "total_tokens": 35292544} +{"current_steps": 11215, "total_steps": 78105, "loss": 0.4884, "lr": 4.971142423455882e-06, "epoch": 0.7179437936111644, "percentage": 14.36, "elapsed_time": "0:29:00", "remaining_time": "2:53:00", "throughput": 20287.77, "total_tokens": 35308352} +{"current_steps": 11220, "total_steps": 78105, "loss": 0.5077, "lr": 4.971057724877499e-06, "epoch": 0.7182638755521413, "percentage": 14.37, "elapsed_time": "0:29:01", "remaining_time": "2:52:58", "throughput": 20288.93, "total_tokens": 35323904} +{"current_steps": 11225, "total_steps": 78105, "loss": 0.5591, "lr": 4.970972902907365e-06, "epoch": 0.7185839574931182, "percentage": 14.37, "elapsed_time": "0:29:01", "remaining_time": "2:52:57", "throughput": 20290.18, "total_tokens": 35340224} +{"current_steps": 11230, "total_steps": 78105, "loss": 0.5306, "lr": 4.970887957549715e-06, "epoch": 0.7189040394340951, "percentage": 14.38, "elapsed_time": "0:29:02", "remaining_time": "2:52:56", "throughput": 20291.25, "total_tokens": 35355776} +{"current_steps": 11235, "total_steps": 78105, "loss": 0.5181, "lr": 4.97080288880879e-06, "epoch": 0.719224121375072, "percentage": 14.38, "elapsed_time": "0:29:03", "remaining_time": "2:52:54", "throughput": 20292.28, "total_tokens": 35371072} +{"current_steps": 11240, "total_steps": 78105, "loss": 0.4511, "lr": 4.970717696688839e-06, "epoch": 0.719544203316049, "percentage": 14.39, "elapsed_time": "0:29:03", "remaining_time": "2:52:53", "throughput": 20293.53, "total_tokens": 35387520} +{"current_steps": 11245, "total_steps": 78105, "loss": 0.56, "lr": 4.970632381194116e-06, "epoch": 0.7198642852570258, "percentage": 14.4, "elapsed_time": "0:29:04", "remaining_time": "2:52:52", "throughput": 20294.81, "total_tokens": 35403840} +{"current_steps": 11250, "total_steps": 78105, "loss": 0.4955, "lr": 4.970546942328881e-06, "epoch": 0.7201843671980027, "percentage": 14.4, "elapsed_time": "0:29:05", "remaining_time": "2:52:50", "throughput": 20296.01, "total_tokens": 35420032} +{"current_steps": 11255, "total_steps": 78105, "loss": 0.412, "lr": 4.9704613800974e-06, "epoch": 0.7205044491389796, "percentage": 14.41, "elapsed_time": "0:29:05", "remaining_time": "2:52:49", "throughput": 20297.22, "total_tokens": 35436032} +{"current_steps": 11260, "total_steps": 78105, "loss": 0.4155, "lr": 4.9703756945039465e-06, "epoch": 0.7208245310799565, "percentage": 14.42, "elapsed_time": "0:29:06", "remaining_time": "2:52:48", "throughput": 20298.15, "total_tokens": 35451136} +{"current_steps": 11265, "total_steps": 78105, "loss": 0.5428, "lr": 4.970289885552797e-06, "epoch": 0.7211446130209334, "percentage": 14.42, "elapsed_time": "0:29:07", "remaining_time": "2:52:46", "throughput": 20299.25, "total_tokens": 35466816} +{"current_steps": 11270, "total_steps": 78105, "loss": 0.4465, "lr": 4.97020395324824e-06, "epoch": 0.7214646949619102, "percentage": 14.43, "elapsed_time": "0:29:07", "remaining_time": "2:52:45", "throughput": 20300.64, "total_tokens": 35483456} +{"current_steps": 11275, "total_steps": 78105, "loss": 0.6662, "lr": 4.970117897594562e-06, "epoch": 0.7217847769028871, "percentage": 14.44, "elapsed_time": "0:29:08", "remaining_time": "2:52:44", "throughput": 20301.92, "total_tokens": 35499584} +{"current_steps": 11280, "total_steps": 78105, "loss": 0.577, "lr": 4.970031718596065e-06, "epoch": 0.722104858843864, "percentage": 14.44, "elapsed_time": "0:29:09", "remaining_time": "2:52:42", "throughput": 20303.03, "total_tokens": 35515328} +{"current_steps": 11285, "total_steps": 78105, "loss": 0.5868, "lr": 4.969945416257048e-06, "epoch": 0.7224249407848409, "percentage": 14.45, "elapsed_time": "0:29:09", "remaining_time": "2:52:41", "throughput": 20303.88, "total_tokens": 35530048} +{"current_steps": 11290, "total_steps": 78105, "loss": 0.4074, "lr": 4.969858990581823e-06, "epoch": 0.7227450227258178, "percentage": 14.45, "elapsed_time": "0:29:10", "remaining_time": "2:52:40", "throughput": 20305.07, "total_tokens": 35545792} +{"current_steps": 11295, "total_steps": 78105, "loss": 0.4847, "lr": 4.969772441574705e-06, "epoch": 0.7230651046667947, "percentage": 14.46, "elapsed_time": "0:29:11", "remaining_time": "2:52:38", "throughput": 20306.22, "total_tokens": 35561344} +{"current_steps": 11300, "total_steps": 78105, "loss": 0.4858, "lr": 4.9696857692400165e-06, "epoch": 0.7233851866077716, "percentage": 14.47, "elapsed_time": "0:29:11", "remaining_time": "2:52:37", "throughput": 20307.37, "total_tokens": 35577024} +{"current_steps": 11305, "total_steps": 78105, "loss": 0.5038, "lr": 4.969598973582084e-06, "epoch": 0.7237052685487485, "percentage": 14.47, "elapsed_time": "0:29:12", "remaining_time": "2:52:35", "throughput": 20308.36, "total_tokens": 35592192} +{"current_steps": 11310, "total_steps": 78105, "loss": 0.4985, "lr": 4.969512054605243e-06, "epoch": 0.7240253504897254, "percentage": 14.48, "elapsed_time": "0:29:13", "remaining_time": "2:52:34", "throughput": 20309.54, "total_tokens": 35607872} +{"current_steps": 11315, "total_steps": 78105, "loss": 0.469, "lr": 4.969425012313833e-06, "epoch": 0.7243454324307023, "percentage": 14.49, "elapsed_time": "0:29:13", "remaining_time": "2:52:33", "throughput": 20310.77, "total_tokens": 35623872} +{"current_steps": 11320, "total_steps": 78105, "loss": 0.4882, "lr": 4.969337846712201e-06, "epoch": 0.7246655143716791, "percentage": 14.49, "elapsed_time": "0:29:14", "remaining_time": "2:52:31", "throughput": 20311.61, "total_tokens": 35638720} +{"current_steps": 11325, "total_steps": 78105, "loss": 0.6997, "lr": 4.9692505578046975e-06, "epoch": 0.724985596312656, "percentage": 14.5, "elapsed_time": "0:29:15", "remaining_time": "2:52:30", "throughput": 20313.07, "total_tokens": 35655680} +{"current_steps": 11330, "total_steps": 78105, "loss": 0.4621, "lr": 4.9691631455956855e-06, "epoch": 0.7253056782536329, "percentage": 14.51, "elapsed_time": "0:29:15", "remaining_time": "2:52:29", "throughput": 20314.14, "total_tokens": 35671168} +{"current_steps": 11335, "total_steps": 78105, "loss": 0.4241, "lr": 4.969075610089526e-06, "epoch": 0.7256257601946098, "percentage": 14.51, "elapsed_time": "0:29:16", "remaining_time": "2:52:27", "throughput": 20315.29, "total_tokens": 35686848} +{"current_steps": 11340, "total_steps": 78105, "loss": 0.401, "lr": 4.968987951290592e-06, "epoch": 0.7259458421355867, "percentage": 14.52, "elapsed_time": "0:29:17", "remaining_time": "2:52:26", "throughput": 20316.47, "total_tokens": 35702720} +{"current_steps": 11345, "total_steps": 78105, "loss": 0.6738, "lr": 4.968900169203259e-06, "epoch": 0.7262659240765637, "percentage": 14.53, "elapsed_time": "0:29:18", "remaining_time": "2:52:25", "throughput": 20318.14, "total_tokens": 35720640} +{"current_steps": 11350, "total_steps": 78105, "loss": 0.4632, "lr": 4.968812263831913e-06, "epoch": 0.7265860060175405, "percentage": 14.53, "elapsed_time": "0:29:18", "remaining_time": "2:52:23", "throughput": 20319.02, "total_tokens": 35735424} +{"current_steps": 11355, "total_steps": 78105, "loss": 0.5915, "lr": 4.968724235180942e-06, "epoch": 0.7269060879585174, "percentage": 14.54, "elapsed_time": "0:29:19", "remaining_time": "2:52:22", "throughput": 20320.16, "total_tokens": 35751168} +{"current_steps": 11360, "total_steps": 78105, "loss": 0.5772, "lr": 4.968636083254741e-06, "epoch": 0.7272261698994943, "percentage": 14.54, "elapsed_time": "0:29:20", "remaining_time": "2:52:21", "throughput": 20321.23, "total_tokens": 35767168} +{"current_steps": 11365, "total_steps": 78105, "loss": 0.6086, "lr": 4.968547808057713e-06, "epoch": 0.7275462518404712, "percentage": 14.55, "elapsed_time": "0:29:20", "remaining_time": "2:52:19", "throughput": 20322.25, "total_tokens": 35782464} +{"current_steps": 11370, "total_steps": 78105, "loss": 0.5673, "lr": 4.968459409594266e-06, "epoch": 0.727866333781448, "percentage": 14.56, "elapsed_time": "0:29:21", "remaining_time": "2:52:18", "throughput": 20323.36, "total_tokens": 35798592} +{"current_steps": 11375, "total_steps": 78105, "loss": 0.5758, "lr": 4.968370887868813e-06, "epoch": 0.7281864157224249, "percentage": 14.56, "elapsed_time": "0:29:22", "remaining_time": "2:52:17", "throughput": 20324.58, "total_tokens": 35814720} +{"current_steps": 11380, "total_steps": 78105, "loss": 0.3895, "lr": 4.968282242885776e-06, "epoch": 0.7285064976634018, "percentage": 14.57, "elapsed_time": "0:29:22", "remaining_time": "2:52:15", "throughput": 20325.58, "total_tokens": 35830016} +{"current_steps": 11385, "total_steps": 78105, "loss": 0.45, "lr": 4.968193474649581e-06, "epoch": 0.7288265796043787, "percentage": 14.58, "elapsed_time": "0:29:23", "remaining_time": "2:52:14", "throughput": 20326.62, "total_tokens": 35845632} +{"current_steps": 11390, "total_steps": 78105, "loss": 0.5233, "lr": 4.968104583164659e-06, "epoch": 0.7291466615453556, "percentage": 14.58, "elapsed_time": "0:29:24", "remaining_time": "2:52:13", "throughput": 20327.71, "total_tokens": 35861184} +{"current_steps": 11395, "total_steps": 78105, "loss": 0.687, "lr": 4.968015568435451e-06, "epoch": 0.7294667434863324, "percentage": 14.59, "elapsed_time": "0:29:24", "remaining_time": "2:52:11", "throughput": 20328.94, "total_tokens": 35876864} +{"current_steps": 11400, "total_steps": 78105, "loss": 0.4534, "lr": 4.967926430466401e-06, "epoch": 0.7297868254273094, "percentage": 14.6, "elapsed_time": "0:29:25", "remaining_time": "2:52:10", "throughput": 20330.32, "total_tokens": 35893568} +{"current_steps": 11405, "total_steps": 78105, "loss": 0.4689, "lr": 4.967837169261959e-06, "epoch": 0.7301069073682863, "percentage": 14.6, "elapsed_time": "0:29:26", "remaining_time": "2:52:09", "throughput": 20331.47, "total_tokens": 35909696} +{"current_steps": 11410, "total_steps": 78105, "loss": 0.4996, "lr": 4.9677477848265835e-06, "epoch": 0.7304269893092632, "percentage": 14.61, "elapsed_time": "0:29:26", "remaining_time": "2:52:07", "throughput": 20332.51, "total_tokens": 35925120} +{"current_steps": 11415, "total_steps": 78105, "loss": 0.597, "lr": 4.967658277164738e-06, "epoch": 0.7307470712502401, "percentage": 14.61, "elapsed_time": "0:29:27", "remaining_time": "2:52:06", "throughput": 20333.66, "total_tokens": 35940800} +{"current_steps": 11420, "total_steps": 78105, "loss": 0.455, "lr": 4.967568646280891e-06, "epoch": 0.731067153191217, "percentage": 14.62, "elapsed_time": "0:29:28", "remaining_time": "2:52:05", "throughput": 20334.69, "total_tokens": 35955968} +{"current_steps": 11425, "total_steps": 78105, "loss": 0.4821, "lr": 4.9674788921795205e-06, "epoch": 0.7313872351321938, "percentage": 14.63, "elapsed_time": "0:29:28", "remaining_time": "2:52:03", "throughput": 20335.87, "total_tokens": 35972096} +{"current_steps": 11430, "total_steps": 78105, "loss": 0.4694, "lr": 4.9673890148651055e-06, "epoch": 0.7317073170731707, "percentage": 14.63, "elapsed_time": "0:29:29", "remaining_time": "2:52:02", "throughput": 20336.77, "total_tokens": 35986624} +{"current_steps": 11435, "total_steps": 78105, "loss": 0.4582, "lr": 4.967299014342136e-06, "epoch": 0.7320273990141476, "percentage": 14.64, "elapsed_time": "0:29:30", "remaining_time": "2:52:01", "throughput": 20338.03, "total_tokens": 36003008} +{"current_steps": 11440, "total_steps": 78105, "loss": 0.6141, "lr": 4.967208890615105e-06, "epoch": 0.7323474809551245, "percentage": 14.65, "elapsed_time": "0:29:30", "remaining_time": "2:51:59", "throughput": 20338.74, "total_tokens": 36017152} +{"current_steps": 11445, "total_steps": 78105, "loss": 0.36, "lr": 4.967118643688513e-06, "epoch": 0.7326675628961014, "percentage": 14.65, "elapsed_time": "0:29:31", "remaining_time": "2:51:57", "throughput": 20339.63, "total_tokens": 36031936} +{"current_steps": 11450, "total_steps": 78105, "loss": 0.5462, "lr": 4.967028273566866e-06, "epoch": 0.7329876448370783, "percentage": 14.66, "elapsed_time": "0:29:32", "remaining_time": "2:51:56", "throughput": 20340.83, "total_tokens": 36048064} +{"current_steps": 11455, "total_steps": 78105, "loss": 0.8065, "lr": 4.966937780254679e-06, "epoch": 0.7333077267780552, "percentage": 14.67, "elapsed_time": "0:29:32", "remaining_time": "2:51:55", "throughput": 20342.05, "total_tokens": 36064256} +{"current_steps": 11460, "total_steps": 78105, "loss": 0.8037, "lr": 4.966847163756468e-06, "epoch": 0.7336278087190321, "percentage": 14.67, "elapsed_time": "0:29:33", "remaining_time": "2:51:54", "throughput": 20342.99, "total_tokens": 36079424} +{"current_steps": 11465, "total_steps": 78105, "loss": 0.4751, "lr": 4.96675642407676e-06, "epoch": 0.733947890660009, "percentage": 14.68, "elapsed_time": "0:29:34", "remaining_time": "2:51:52", "throughput": 20344.17, "total_tokens": 36095424} +{"current_steps": 11470, "total_steps": 78105, "loss": 0.4355, "lr": 4.966665561220083e-06, "epoch": 0.7342679726009859, "percentage": 14.69, "elapsed_time": "0:29:34", "remaining_time": "2:51:51", "throughput": 20345.33, "total_tokens": 36111360} +{"current_steps": 11475, "total_steps": 78105, "loss": 0.4282, "lr": 4.966574575190978e-06, "epoch": 0.7345880545419627, "percentage": 14.69, "elapsed_time": "0:29:35", "remaining_time": "2:51:50", "throughput": 20346.43, "total_tokens": 36127232} +{"current_steps": 11480, "total_steps": 78105, "loss": 0.4187, "lr": 4.966483465993987e-06, "epoch": 0.7349081364829396, "percentage": 14.7, "elapsed_time": "0:29:36", "remaining_time": "2:51:48", "throughput": 20347.52, "total_tokens": 36143168} +{"current_steps": 11485, "total_steps": 78105, "loss": 0.3868, "lr": 4.966392233633658e-06, "epoch": 0.7352282184239165, "percentage": 14.7, "elapsed_time": "0:29:36", "remaining_time": "2:51:47", "throughput": 20348.56, "total_tokens": 36158272} +{"current_steps": 11490, "total_steps": 78105, "loss": 0.4495, "lr": 4.966300878114548e-06, "epoch": 0.7355483003648934, "percentage": 14.71, "elapsed_time": "0:29:37", "remaining_time": "2:51:45", "throughput": 20349.46, "total_tokens": 36173056} +{"current_steps": 11495, "total_steps": 78105, "loss": 0.4979, "lr": 4.9662093994412185e-06, "epoch": 0.7358683823058703, "percentage": 14.72, "elapsed_time": "0:29:38", "remaining_time": "2:51:44", "throughput": 20350.37, "total_tokens": 36187904} +{"current_steps": 11500, "total_steps": 78105, "loss": 0.5348, "lr": 4.966117797618238e-06, "epoch": 0.7361884642468471, "percentage": 14.72, "elapsed_time": "0:29:38", "remaining_time": "2:51:43", "throughput": 20351.91, "total_tokens": 36205632} +{"current_steps": 11505, "total_steps": 78105, "loss": 0.4951, "lr": 4.96602607265018e-06, "epoch": 0.7365085461878241, "percentage": 14.73, "elapsed_time": "0:29:39", "remaining_time": "2:51:41", "throughput": 20352.78, "total_tokens": 36220160} +{"current_steps": 11510, "total_steps": 78105, "loss": 0.4787, "lr": 4.965934224541626e-06, "epoch": 0.736828628128801, "percentage": 14.74, "elapsed_time": "0:29:40", "remaining_time": "2:51:40", "throughput": 20354.06, "total_tokens": 36236416} +{"current_steps": 11515, "total_steps": 78105, "loss": 0.5312, "lr": 4.965842253297159e-06, "epoch": 0.7371487100697779, "percentage": 14.74, "elapsed_time": "0:29:40", "remaining_time": "2:51:39", "throughput": 20355.01, "total_tokens": 36251584} +{"current_steps": 11520, "total_steps": 78105, "loss": 0.4703, "lr": 4.965750158921376e-06, "epoch": 0.7374687920107548, "percentage": 14.75, "elapsed_time": "0:29:41", "remaining_time": "2:51:37", "throughput": 20355.75, "total_tokens": 36265856} +{"current_steps": 11525, "total_steps": 78105, "loss": 0.5367, "lr": 4.965657941418873e-06, "epoch": 0.7377888739517316, "percentage": 14.76, "elapsed_time": "0:29:42", "remaining_time": "2:51:36", "throughput": 20357.39, "total_tokens": 36283904} +{"current_steps": 11530, "total_steps": 78105, "loss": 0.5535, "lr": 4.965565600794256e-06, "epoch": 0.7381089558927085, "percentage": 14.76, "elapsed_time": "0:29:42", "remaining_time": "2:51:35", "throughput": 20358.18, "total_tokens": 36298432} +{"current_steps": 11535, "total_steps": 78105, "loss": 0.3427, "lr": 4.965473137052135e-06, "epoch": 0.7384290378336854, "percentage": 14.77, "elapsed_time": "0:29:43", "remaining_time": "2:51:33", "throughput": 20359.23, "total_tokens": 36313728} +{"current_steps": 11540, "total_steps": 78105, "loss": 0.4738, "lr": 4.965380550197129e-06, "epoch": 0.7387491197746623, "percentage": 14.77, "elapsed_time": "0:29:44", "remaining_time": "2:51:32", "throughput": 20360.22, "total_tokens": 36329088} +{"current_steps": 11545, "total_steps": 78105, "loss": 0.52, "lr": 4.965287840233859e-06, "epoch": 0.7390692017156392, "percentage": 14.78, "elapsed_time": "0:29:44", "remaining_time": "2:51:30", "throughput": 20361.39, "total_tokens": 36345024} +{"current_steps": 11550, "total_steps": 78105, "loss": 0.4575, "lr": 4.965195007166955e-06, "epoch": 0.739389283656616, "percentage": 14.79, "elapsed_time": "0:29:45", "remaining_time": "2:51:29", "throughput": 20362.81, "total_tokens": 36361792} +{"current_steps": 11555, "total_steps": 78105, "loss": 0.4301, "lr": 4.965102051001054e-06, "epoch": 0.739709365597593, "percentage": 14.79, "elapsed_time": "0:29:46", "remaining_time": "2:51:28", "throughput": 20363.84, "total_tokens": 36377152} +{"current_steps": 11560, "total_steps": 78105, "loss": 0.509, "lr": 4.965008971740796e-06, "epoch": 0.7400294475385699, "percentage": 14.8, "elapsed_time": "0:29:47", "remaining_time": "2:51:27", "throughput": 20365.14, "total_tokens": 36394048} +{"current_steps": 11565, "total_steps": 78105, "loss": 0.3984, "lr": 4.96491576939083e-06, "epoch": 0.7403495294795468, "percentage": 14.81, "elapsed_time": "0:29:47", "remaining_time": "2:51:25", "throughput": 20366.03, "total_tokens": 36409152} +{"current_steps": 11570, "total_steps": 78105, "loss": 0.5386, "lr": 4.9648224439558094e-06, "epoch": 0.7406696114205237, "percentage": 14.81, "elapsed_time": "0:29:48", "remaining_time": "2:51:24", "throughput": 20367.57, "total_tokens": 36427072} +{"current_steps": 11575, "total_steps": 78105, "loss": 0.5373, "lr": 4.964728995440396e-06, "epoch": 0.7409896933615006, "percentage": 14.82, "elapsed_time": "0:29:49", "remaining_time": "2:51:23", "throughput": 20368.87, "total_tokens": 36444288} +{"current_steps": 11580, "total_steps": 78105, "loss": 0.6004, "lr": 4.964635423849253e-06, "epoch": 0.7413097753024774, "percentage": 14.83, "elapsed_time": "0:29:49", "remaining_time": "2:51:22", "throughput": 20369.98, "total_tokens": 36460096} +{"current_steps": 11585, "total_steps": 78105, "loss": 0.4622, "lr": 4.964541729187056e-06, "epoch": 0.7416298572434543, "percentage": 14.83, "elapsed_time": "0:29:50", "remaining_time": "2:51:21", "throughput": 20370.96, "total_tokens": 36475264} +{"current_steps": 11590, "total_steps": 78105, "loss": 0.5301, "lr": 4.964447911458483e-06, "epoch": 0.7419499391844312, "percentage": 14.84, "elapsed_time": "0:29:51", "remaining_time": "2:51:19", "throughput": 20372.12, "total_tokens": 36491264} +{"current_steps": 11595, "total_steps": 78105, "loss": 0.4956, "lr": 4.964353970668217e-06, "epoch": 0.7422700211254081, "percentage": 14.85, "elapsed_time": "0:29:51", "remaining_time": "2:51:18", "throughput": 20373.03, "total_tokens": 36506368} +{"current_steps": 11600, "total_steps": 78105, "loss": 0.488, "lr": 4.96425990682095e-06, "epoch": 0.742590103066385, "percentage": 14.85, "elapsed_time": "0:29:52", "remaining_time": "2:51:17", "throughput": 20374.3, "total_tokens": 36522432} +{"current_steps": 11605, "total_steps": 78105, "loss": 0.5611, "lr": 4.96416571992138e-06, "epoch": 0.7429101850073618, "percentage": 14.86, "elapsed_time": "0:29:53", "remaining_time": "2:51:15", "throughput": 20375.61, "total_tokens": 36539008} +{"current_steps": 11610, "total_steps": 78105, "loss": 0.4755, "lr": 4.9640714099742086e-06, "epoch": 0.7432302669483388, "percentage": 14.86, "elapsed_time": "0:29:53", "remaining_time": "2:51:14", "throughput": 20376.6, "total_tokens": 36554240} +{"current_steps": 11615, "total_steps": 78105, "loss": 0.3484, "lr": 4.963976976984146e-06, "epoch": 0.7435503488893157, "percentage": 14.87, "elapsed_time": "0:29:54", "remaining_time": "2:51:13", "throughput": 20377.62, "total_tokens": 36569920} +{"current_steps": 11620, "total_steps": 78105, "loss": 0.6534, "lr": 4.963882420955907e-06, "epoch": 0.7438704308302926, "percentage": 14.88, "elapsed_time": "0:29:55", "remaining_time": "2:51:11", "throughput": 20378.7, "total_tokens": 36585792} +{"current_steps": 11625, "total_steps": 78105, "loss": 0.4732, "lr": 4.9637877418942145e-06, "epoch": 0.7441905127712695, "percentage": 14.88, "elapsed_time": "0:29:55", "remaining_time": "2:51:10", "throughput": 20379.72, "total_tokens": 36601280} +{"current_steps": 11630, "total_steps": 78105, "loss": 0.7017, "lr": 4.963692939803796e-06, "epoch": 0.7445105947122463, "percentage": 14.89, "elapsed_time": "0:29:56", "remaining_time": "2:51:09", "throughput": 20380.8, "total_tokens": 36617152} +{"current_steps": 11635, "total_steps": 78105, "loss": 0.553, "lr": 4.963598014689385e-06, "epoch": 0.7448306766532232, "percentage": 14.9, "elapsed_time": "0:29:57", "remaining_time": "2:51:07", "throughput": 20381.73, "total_tokens": 36632512} +{"current_steps": 11640, "total_steps": 78105, "loss": 0.4394, "lr": 4.963502966555721e-06, "epoch": 0.7451507585942001, "percentage": 14.9, "elapsed_time": "0:29:57", "remaining_time": "2:51:06", "throughput": 20382.74, "total_tokens": 36647744} +{"current_steps": 11645, "total_steps": 78105, "loss": 0.5441, "lr": 4.96340779540755e-06, "epoch": 0.745470840535177, "percentage": 14.91, "elapsed_time": "0:29:58", "remaining_time": "2:51:05", "throughput": 20383.72, "total_tokens": 36663360} +{"current_steps": 11650, "total_steps": 78105, "loss": 0.5831, "lr": 4.9633125012496264e-06, "epoch": 0.7457909224761539, "percentage": 14.92, "elapsed_time": "0:29:59", "remaining_time": "2:51:03", "throughput": 20384.67, "total_tokens": 36678656} +{"current_steps": 11655, "total_steps": 78105, "loss": 0.6294, "lr": 4.963217084086707e-06, "epoch": 0.7461110044171307, "percentage": 14.92, "elapsed_time": "0:29:59", "remaining_time": "2:51:02", "throughput": 20385.71, "total_tokens": 36693952} +{"current_steps": 11660, "total_steps": 78105, "loss": 0.4533, "lr": 4.963121543923557e-06, "epoch": 0.7464310863581076, "percentage": 14.93, "elapsed_time": "0:30:00", "remaining_time": "2:51:01", "throughput": 20386.81, "total_tokens": 36709888} +{"current_steps": 11665, "total_steps": 78105, "loss": 0.4112, "lr": 4.963025880764947e-06, "epoch": 0.7467511682990846, "percentage": 14.94, "elapsed_time": "0:30:01", "remaining_time": "2:50:59", "throughput": 20387.83, "total_tokens": 36725504} +{"current_steps": 11670, "total_steps": 78105, "loss": 0.4568, "lr": 4.962930094615654e-06, "epoch": 0.7470712502400615, "percentage": 14.94, "elapsed_time": "0:30:02", "remaining_time": "2:50:58", "throughput": 20388.98, "total_tokens": 36741376} +{"current_steps": 11675, "total_steps": 78105, "loss": 0.5701, "lr": 4.962834185480461e-06, "epoch": 0.7473913321810384, "percentage": 14.95, "elapsed_time": "0:30:02", "remaining_time": "2:50:57", "throughput": 20390.0, "total_tokens": 36756864} +{"current_steps": 11680, "total_steps": 78105, "loss": 0.598, "lr": 4.962738153364156e-06, "epoch": 0.7477114141220152, "percentage": 14.95, "elapsed_time": "0:30:03", "remaining_time": "2:50:55", "throughput": 20391.07, "total_tokens": 36772608} +{"current_steps": 11685, "total_steps": 78105, "loss": 0.432, "lr": 4.9626419982715366e-06, "epoch": 0.7480314960629921, "percentage": 14.96, "elapsed_time": "0:30:04", "remaining_time": "2:50:54", "throughput": 20392.13, "total_tokens": 36788352} +{"current_steps": 11690, "total_steps": 78105, "loss": 0.5105, "lr": 4.962545720207404e-06, "epoch": 0.748351578003969, "percentage": 14.97, "elapsed_time": "0:30:04", "remaining_time": "2:50:53", "throughput": 20393.17, "total_tokens": 36804096} +{"current_steps": 11695, "total_steps": 78105, "loss": 0.4935, "lr": 4.962449319176564e-06, "epoch": 0.7486716599449459, "percentage": 14.97, "elapsed_time": "0:30:05", "remaining_time": "2:50:51", "throughput": 20394.25, "total_tokens": 36819776} +{"current_steps": 11700, "total_steps": 78105, "loss": 0.7523, "lr": 4.962352795183832e-06, "epoch": 0.7489917418859228, "percentage": 14.98, "elapsed_time": "0:30:06", "remaining_time": "2:50:50", "throughput": 20395.27, "total_tokens": 36835072} +{"current_steps": 11705, "total_steps": 78105, "loss": 0.4038, "lr": 4.962256148234027e-06, "epoch": 0.7493118238268996, "percentage": 14.99, "elapsed_time": "0:30:06", "remaining_time": "2:50:49", "throughput": 20396.32, "total_tokens": 36850816} +{"current_steps": 11710, "total_steps": 78105, "loss": 0.5105, "lr": 4.9621593783319754e-06, "epoch": 0.7496319057678765, "percentage": 14.99, "elapsed_time": "0:30:07", "remaining_time": "2:50:47", "throughput": 20397.31, "total_tokens": 36866368} +{"current_steps": 11715, "total_steps": 78105, "loss": 0.5124, "lr": 4.9620624854825094e-06, "epoch": 0.7499519877088535, "percentage": 15.0, "elapsed_time": "0:30:08", "remaining_time": "2:50:47", "throughput": 20399.02, "total_tokens": 36885376} +{"current_steps": 11718, "total_steps": 78105, "eval_loss": 0.49851447343826294, "epoch": 0.7501440368734396, "percentage": 15.0, "elapsed_time": "0:30:59", "remaining_time": "2:55:35", "throughput": 19840.19, "total_tokens": 36894016} +{"current_steps": 11720, "total_steps": 78105, "loss": 0.4566, "lr": 4.9619654696904675e-06, "epoch": 0.7502720696498304, "percentage": 15.01, "elapsed_time": "0:31:32", "remaining_time": "2:58:40", "throughput": 19496.53, "total_tokens": 36900288} +{"current_steps": 11725, "total_steps": 78105, "loss": 0.5913, "lr": 4.961868330960693e-06, "epoch": 0.7505921515908073, "percentage": 15.01, "elapsed_time": "0:31:33", "remaining_time": "2:58:38", "throughput": 19497.93, "total_tokens": 36916224} +{"current_steps": 11730, "total_steps": 78105, "loss": 0.4233, "lr": 4.961771069298038e-06, "epoch": 0.7509122335317842, "percentage": 15.02, "elapsed_time": "0:31:34", "remaining_time": "2:58:37", "throughput": 19499.21, "total_tokens": 36931648} +{"current_steps": 11735, "total_steps": 78105, "loss": 0.5752, "lr": 4.961673684707359e-06, "epoch": 0.751232315472761, "percentage": 15.02, "elapsed_time": "0:31:34", "remaining_time": "2:58:35", "throughput": 19500.2, "total_tokens": 36945856} +{"current_steps": 11740, "total_steps": 78105, "loss": 0.5424, "lr": 4.961576177193519e-06, "epoch": 0.7515523974137379, "percentage": 15.03, "elapsed_time": "0:31:35", "remaining_time": "2:58:34", "throughput": 19501.56, "total_tokens": 36961792} +{"current_steps": 11745, "total_steps": 78105, "loss": 0.4873, "lr": 4.961478546761386e-06, "epoch": 0.7518724793547148, "percentage": 15.04, "elapsed_time": "0:31:35", "remaining_time": "2:58:32", "throughput": 19502.81, "total_tokens": 36977152} +{"current_steps": 11750, "total_steps": 78105, "loss": 0.5303, "lr": 4.961380793415835e-06, "epoch": 0.7521925612956917, "percentage": 15.04, "elapsed_time": "0:31:36", "remaining_time": "2:58:30", "throughput": 19503.94, "total_tokens": 36991936} +{"current_steps": 11755, "total_steps": 78105, "loss": 0.605, "lr": 4.9612829171617494e-06, "epoch": 0.7525126432366686, "percentage": 15.05, "elapsed_time": "0:31:37", "remaining_time": "2:58:29", "throughput": 19505.08, "total_tokens": 37006784} +{"current_steps": 11760, "total_steps": 78105, "loss": 0.4821, "lr": 4.961184918004015e-06, "epoch": 0.7528327251776454, "percentage": 15.06, "elapsed_time": "0:31:37", "remaining_time": "2:58:27", "throughput": 19506.37, "total_tokens": 37022208} +{"current_steps": 11765, "total_steps": 78105, "loss": 0.4572, "lr": 4.961086795947525e-06, "epoch": 0.7531528071186223, "percentage": 15.06, "elapsed_time": "0:31:38", "remaining_time": "2:58:26", "throughput": 19508.06, "total_tokens": 37039488} +{"current_steps": 11770, "total_steps": 78105, "loss": 0.5268, "lr": 4.96098855099718e-06, "epoch": 0.7534728890595993, "percentage": 15.07, "elapsed_time": "0:31:39", "remaining_time": "2:58:24", "throughput": 19509.6, "total_tokens": 37056064} +{"current_steps": 11775, "total_steps": 78105, "loss": 0.3497, "lr": 4.960890183157886e-06, "epoch": 0.7537929710005762, "percentage": 15.08, "elapsed_time": "0:31:40", "remaining_time": "2:58:23", "throughput": 19511.04, "total_tokens": 37072256} +{"current_steps": 11780, "total_steps": 78105, "loss": 0.6945, "lr": 4.960791692434554e-06, "epoch": 0.7541130529415531, "percentage": 15.08, "elapsed_time": "0:31:40", "remaining_time": "2:58:21", "throughput": 19512.17, "total_tokens": 37087360} +{"current_steps": 11785, "total_steps": 78105, "loss": 0.4554, "lr": 4.960693078832103e-06, "epoch": 0.7544331348825299, "percentage": 15.09, "elapsed_time": "0:31:41", "remaining_time": "2:58:20", "throughput": 19513.26, "total_tokens": 37102144} +{"current_steps": 11790, "total_steps": 78105, "loss": 0.4506, "lr": 4.960594342355457e-06, "epoch": 0.7547532168235068, "percentage": 15.1, "elapsed_time": "0:31:42", "remaining_time": "2:58:18", "throughput": 19514.52, "total_tokens": 37117568} +{"current_steps": 11795, "total_steps": 78105, "loss": 0.5544, "lr": 4.960495483009546e-06, "epoch": 0.7550732987644837, "percentage": 15.1, "elapsed_time": "0:31:42", "remaining_time": "2:58:16", "throughput": 19515.71, "total_tokens": 37132800} +{"current_steps": 11800, "total_steps": 78105, "loss": 0.7038, "lr": 4.960396500799307e-06, "epoch": 0.7553933807054606, "percentage": 15.11, "elapsed_time": "0:31:43", "remaining_time": "2:58:15", "throughput": 19516.79, "total_tokens": 37147776} +{"current_steps": 11805, "total_steps": 78105, "loss": 0.4506, "lr": 4.960297395729683e-06, "epoch": 0.7557134626464375, "percentage": 15.11, "elapsed_time": "0:31:44", "remaining_time": "2:58:13", "throughput": 19518.26, "total_tokens": 37163904} +{"current_steps": 11810, "total_steps": 78105, "loss": 0.5447, "lr": 4.960198167805621e-06, "epoch": 0.7560335445874143, "percentage": 15.12, "elapsed_time": "0:31:44", "remaining_time": "2:58:12", "throughput": 19519.76, "total_tokens": 37180416} +{"current_steps": 11815, "total_steps": 78105, "loss": 0.4104, "lr": 4.960098817032078e-06, "epoch": 0.7563536265283912, "percentage": 15.13, "elapsed_time": "0:31:45", "remaining_time": "2:58:10", "throughput": 19521.12, "total_tokens": 37196480} +{"current_steps": 11820, "total_steps": 78105, "loss": 0.5251, "lr": 4.959999343414015e-06, "epoch": 0.7566737084693682, "percentage": 15.13, "elapsed_time": "0:31:46", "remaining_time": "2:58:09", "throughput": 19522.33, "total_tokens": 37211648} +{"current_steps": 11825, "total_steps": 78105, "loss": 0.4863, "lr": 4.959899746956397e-06, "epoch": 0.7569937904103451, "percentage": 15.14, "elapsed_time": "0:31:46", "remaining_time": "2:58:07", "throughput": 19523.62, "total_tokens": 37227072} +{"current_steps": 11830, "total_steps": 78105, "loss": 0.4256, "lr": 4.9598000276642e-06, "epoch": 0.757313872351322, "percentage": 15.15, "elapsed_time": "0:31:47", "remaining_time": "2:58:06", "throughput": 19525.22, "total_tokens": 37243968} +{"current_steps": 11835, "total_steps": 78105, "loss": 0.5758, "lr": 4.959700185542401e-06, "epoch": 0.7576339542922989, "percentage": 15.15, "elapsed_time": "0:31:48", "remaining_time": "2:58:04", "throughput": 19526.4, "total_tokens": 37259200} +{"current_steps": 11840, "total_steps": 78105, "loss": 0.3839, "lr": 4.959600220595988e-06, "epoch": 0.7579540362332757, "percentage": 15.16, "elapsed_time": "0:31:48", "remaining_time": "2:58:03", "throughput": 19527.75, "total_tokens": 37275072} +{"current_steps": 11845, "total_steps": 78105, "loss": 0.4547, "lr": 4.959500132829951e-06, "epoch": 0.7582741181742526, "percentage": 15.17, "elapsed_time": "0:31:49", "remaining_time": "2:58:01", "throughput": 19529.18, "total_tokens": 37291392} +{"current_steps": 11850, "total_steps": 78105, "loss": 0.4876, "lr": 4.959399922249289e-06, "epoch": 0.7585942001152295, "percentage": 15.17, "elapsed_time": "0:31:50", "remaining_time": "2:58:00", "throughput": 19530.63, "total_tokens": 37307648} +{"current_steps": 11855, "total_steps": 78105, "loss": 0.3464, "lr": 4.959299588859005e-06, "epoch": 0.7589142820562064, "percentage": 15.18, "elapsed_time": "0:31:50", "remaining_time": "2:57:58", "throughput": 19531.86, "total_tokens": 37323136} +{"current_steps": 11860, "total_steps": 78105, "loss": 0.4314, "lr": 4.959199132664109e-06, "epoch": 0.7592343639971832, "percentage": 15.18, "elapsed_time": "0:31:51", "remaining_time": "2:57:57", "throughput": 19533.1, "total_tokens": 37338496} +{"current_steps": 11865, "total_steps": 78105, "loss": 0.445, "lr": 4.959098553669619e-06, "epoch": 0.7595544459381601, "percentage": 15.19, "elapsed_time": "0:31:52", "remaining_time": "2:57:56", "throughput": 19534.88, "total_tokens": 37356800} +{"current_steps": 11870, "total_steps": 78105, "loss": 0.4906, "lr": 4.958997851880555e-06, "epoch": 0.759874527879137, "percentage": 15.2, "elapsed_time": "0:31:53", "remaining_time": "2:57:54", "throughput": 19536.18, "total_tokens": 37372800} +{"current_steps": 11875, "total_steps": 78105, "loss": 0.4387, "lr": 4.958897027301947e-06, "epoch": 0.760194609820114, "percentage": 15.2, "elapsed_time": "0:31:53", "remaining_time": "2:57:53", "throughput": 19537.47, "total_tokens": 37388608} +{"current_steps": 11880, "total_steps": 78105, "loss": 0.5326, "lr": 4.95879607993883e-06, "epoch": 0.7605146917610909, "percentage": 15.21, "elapsed_time": "0:31:54", "remaining_time": "2:57:51", "throughput": 19538.97, "total_tokens": 37405184} +{"current_steps": 11885, "total_steps": 78105, "loss": 0.3786, "lr": 4.958695009796244e-06, "epoch": 0.7608347737020678, "percentage": 15.22, "elapsed_time": "0:31:55", "remaining_time": "2:57:50", "throughput": 19540.08, "total_tokens": 37420096} +{"current_steps": 11890, "total_steps": 78105, "loss": 0.6187, "lr": 4.958593816879236e-06, "epoch": 0.7611548556430446, "percentage": 15.22, "elapsed_time": "0:31:55", "remaining_time": "2:57:48", "throughput": 19541.02, "total_tokens": 37434368} +{"current_steps": 11895, "total_steps": 78105, "loss": 0.4273, "lr": 4.958492501192859e-06, "epoch": 0.7614749375840215, "percentage": 15.23, "elapsed_time": "0:31:56", "remaining_time": "2:57:46", "throughput": 19542.05, "total_tokens": 37449344} +{"current_steps": 11900, "total_steps": 78105, "loss": 0.3901, "lr": 4.958391062742173e-06, "epoch": 0.7617950195249984, "percentage": 15.24, "elapsed_time": "0:31:57", "remaining_time": "2:57:45", "throughput": 19543.21, "total_tokens": 37464448} +{"current_steps": 11905, "total_steps": 78105, "loss": 0.4547, "lr": 4.958289501532242e-06, "epoch": 0.7621151014659753, "percentage": 15.24, "elapsed_time": "0:31:57", "remaining_time": "2:57:43", "throughput": 19544.86, "total_tokens": 37481856} +{"current_steps": 11910, "total_steps": 78105, "loss": 0.463, "lr": 4.958187817568138e-06, "epoch": 0.7624351834069522, "percentage": 15.25, "elapsed_time": "0:31:58", "remaining_time": "2:57:42", "throughput": 19546.14, "total_tokens": 37497856} +{"current_steps": 11915, "total_steps": 78105, "loss": 0.5234, "lr": 4.958086010854938e-06, "epoch": 0.762755265347929, "percentage": 15.26, "elapsed_time": "0:31:59", "remaining_time": "2:57:41", "throughput": 19547.36, "total_tokens": 37513600} +{"current_steps": 11920, "total_steps": 78105, "loss": 0.5496, "lr": 4.957984081397728e-06, "epoch": 0.7630753472889059, "percentage": 15.26, "elapsed_time": "0:31:59", "remaining_time": "2:57:39", "throughput": 19548.61, "total_tokens": 37529408} +{"current_steps": 11925, "total_steps": 78105, "loss": 0.4377, "lr": 4.957882029201595e-06, "epoch": 0.7633954292298829, "percentage": 15.27, "elapsed_time": "0:32:00", "remaining_time": "2:57:38", "throughput": 19550.11, "total_tokens": 37545984} +{"current_steps": 11930, "total_steps": 78105, "loss": 0.5404, "lr": 4.957779854271636e-06, "epoch": 0.7637155111708598, "percentage": 15.27, "elapsed_time": "0:32:01", "remaining_time": "2:57:36", "throughput": 19551.42, "total_tokens": 37561856} +{"current_steps": 11935, "total_steps": 78105, "loss": 0.4171, "lr": 4.957677556612953e-06, "epoch": 0.7640355931118367, "percentage": 15.28, "elapsed_time": "0:32:01", "remaining_time": "2:57:35", "throughput": 19552.88, "total_tokens": 37578624} +{"current_steps": 11940, "total_steps": 78105, "loss": 0.6082, "lr": 4.957575136230655e-06, "epoch": 0.7643556750528135, "percentage": 15.29, "elapsed_time": "0:32:02", "remaining_time": "2:57:33", "throughput": 19554.06, "total_tokens": 37593920} +{"current_steps": 11945, "total_steps": 78105, "loss": 0.8119, "lr": 4.9574725931298565e-06, "epoch": 0.7646757569937904, "percentage": 15.29, "elapsed_time": "0:32:03", "remaining_time": "2:57:32", "throughput": 19555.26, "total_tokens": 37609664} +{"current_steps": 11950, "total_steps": 78105, "loss": 0.6045, "lr": 4.957369927315676e-06, "epoch": 0.7649958389347673, "percentage": 15.3, "elapsed_time": "0:32:03", "remaining_time": "2:57:30", "throughput": 19556.58, "total_tokens": 37626048} +{"current_steps": 11955, "total_steps": 78105, "loss": 0.6714, "lr": 4.957267138793241e-06, "epoch": 0.7653159208757442, "percentage": 15.31, "elapsed_time": "0:32:04", "remaining_time": "2:57:29", "throughput": 19557.84, "total_tokens": 37641792} +{"current_steps": 11960, "total_steps": 78105, "loss": 0.5358, "lr": 4.957164227567685e-06, "epoch": 0.7656360028167211, "percentage": 15.31, "elapsed_time": "0:32:05", "remaining_time": "2:57:27", "throughput": 19558.85, "total_tokens": 37656512} +{"current_steps": 11965, "total_steps": 78105, "loss": 0.4715, "lr": 4.957061193644147e-06, "epoch": 0.7659560847576979, "percentage": 15.32, "elapsed_time": "0:32:05", "remaining_time": "2:57:26", "throughput": 19560.0, "total_tokens": 37672000} +{"current_steps": 11970, "total_steps": 78105, "loss": 0.4264, "lr": 4.95695803702777e-06, "epoch": 0.7662761666986748, "percentage": 15.33, "elapsed_time": "0:32:06", "remaining_time": "2:57:24", "throughput": 19561.18, "total_tokens": 37687232} +{"current_steps": 11975, "total_steps": 78105, "loss": 0.4438, "lr": 4.956854757723708e-06, "epoch": 0.7665962486396517, "percentage": 15.33, "elapsed_time": "0:32:07", "remaining_time": "2:57:23", "throughput": 19562.38, "total_tokens": 37702656} +{"current_steps": 11980, "total_steps": 78105, "loss": 0.5411, "lr": 4.956751355737116e-06, "epoch": 0.7669163305806287, "percentage": 15.34, "elapsed_time": "0:32:07", "remaining_time": "2:57:21", "throughput": 19563.54, "total_tokens": 37718080} +{"current_steps": 11985, "total_steps": 78105, "loss": 0.3796, "lr": 4.956647831073158e-06, "epoch": 0.7672364125216056, "percentage": 15.34, "elapsed_time": "0:32:08", "remaining_time": "2:57:20", "throughput": 19565.16, "total_tokens": 37735680} +{"current_steps": 11990, "total_steps": 78105, "loss": 0.4634, "lr": 4.956544183737003e-06, "epoch": 0.7675564944625825, "percentage": 15.35, "elapsed_time": "0:32:09", "remaining_time": "2:57:18", "throughput": 19566.13, "total_tokens": 37749952} +{"current_steps": 11995, "total_steps": 78105, "loss": 0.7163, "lr": 4.956440413733828e-06, "epoch": 0.7678765764035593, "percentage": 15.36, "elapsed_time": "0:32:10", "remaining_time": "2:57:21", "throughput": 19559.58, "total_tokens": 37765312} +{"current_steps": 12000, "total_steps": 78105, "loss": 0.5274, "lr": 4.956336521068814e-06, "epoch": 0.7681966583445362, "percentage": 15.36, "elapsed_time": "0:32:11", "remaining_time": "2:57:19", "throughput": 19560.65, "total_tokens": 37780160} +{"current_steps": 12005, "total_steps": 78105, "loss": 0.3818, "lr": 4.956232505747148e-06, "epoch": 0.7685167402855131, "percentage": 15.37, "elapsed_time": "0:32:12", "remaining_time": "2:57:18", "throughput": 19561.98, "total_tokens": 37796352} +{"current_steps": 12010, "total_steps": 78105, "loss": 0.4416, "lr": 4.9561283677740254e-06, "epoch": 0.76883682222649, "percentage": 15.38, "elapsed_time": "0:32:12", "remaining_time": "2:57:17", "throughput": 19563.45, "total_tokens": 37812864} +{"current_steps": 12015, "total_steps": 78105, "loss": 0.5501, "lr": 4.956024107154645e-06, "epoch": 0.7691569041674668, "percentage": 15.38, "elapsed_time": "0:32:13", "remaining_time": "2:57:15", "throughput": 19564.54, "total_tokens": 37827840} +{"current_steps": 12020, "total_steps": 78105, "loss": 0.5076, "lr": 4.9559197238942135e-06, "epoch": 0.7694769861084437, "percentage": 15.39, "elapsed_time": "0:32:14", "remaining_time": "2:57:14", "throughput": 19566.59, "total_tokens": 37847040} +{"current_steps": 12025, "total_steps": 78105, "loss": 0.4629, "lr": 4.955815217997944e-06, "epoch": 0.7697970680494206, "percentage": 15.4, "elapsed_time": "0:32:14", "remaining_time": "2:57:12", "throughput": 19567.82, "total_tokens": 37862464} +{"current_steps": 12030, "total_steps": 78105, "loss": 0.5349, "lr": 4.9557105894710545e-06, "epoch": 0.7701171499903975, "percentage": 15.4, "elapsed_time": "0:32:15", "remaining_time": "2:57:11", "throughput": 19568.85, "total_tokens": 37877760} +{"current_steps": 12035, "total_steps": 78105, "loss": 0.4056, "lr": 4.955605838318769e-06, "epoch": 0.7704372319313745, "percentage": 15.41, "elapsed_time": "0:32:16", "remaining_time": "2:57:09", "throughput": 19569.95, "total_tokens": 37892800} +{"current_steps": 12040, "total_steps": 78105, "loss": 0.7183, "lr": 4.955500964546319e-06, "epoch": 0.7707573138723514, "percentage": 15.42, "elapsed_time": "0:32:16", "remaining_time": "2:57:08", "throughput": 19571.36, "total_tokens": 37909056} +{"current_steps": 12045, "total_steps": 78105, "loss": 0.4416, "lr": 4.9553959681589404e-06, "epoch": 0.7710773958133282, "percentage": 15.42, "elapsed_time": "0:32:17", "remaining_time": "2:57:06", "throughput": 19572.83, "total_tokens": 37925376} +{"current_steps": 12050, "total_steps": 78105, "loss": 0.494, "lr": 4.955290849161878e-06, "epoch": 0.7713974777543051, "percentage": 15.43, "elapsed_time": "0:32:18", "remaining_time": "2:57:05", "throughput": 19574.23, "total_tokens": 37941632} +{"current_steps": 12055, "total_steps": 78105, "loss": 0.4919, "lr": 4.955185607560379e-06, "epoch": 0.771717559695282, "percentage": 15.43, "elapsed_time": "0:32:19", "remaining_time": "2:57:03", "throughput": 19575.42, "total_tokens": 37957056} +{"current_steps": 12060, "total_steps": 78105, "loss": 0.5699, "lr": 4.955080243359699e-06, "epoch": 0.7720376416362589, "percentage": 15.44, "elapsed_time": "0:32:19", "remaining_time": "2:57:02", "throughput": 19576.7, "total_tokens": 37973184} +{"current_steps": 12065, "total_steps": 78105, "loss": 0.4621, "lr": 4.954974756565101e-06, "epoch": 0.7723577235772358, "percentage": 15.45, "elapsed_time": "0:32:20", "remaining_time": "2:57:00", "throughput": 19577.82, "total_tokens": 37988288} +{"current_steps": 12070, "total_steps": 78105, "loss": 0.5975, "lr": 4.95486914718185e-06, "epoch": 0.7726778055182126, "percentage": 15.45, "elapsed_time": "0:32:21", "remaining_time": "2:56:59", "throughput": 19579.07, "total_tokens": 38004224} +{"current_steps": 12075, "total_steps": 78105, "loss": 0.6549, "lr": 4.954763415215221e-06, "epoch": 0.7729978874591895, "percentage": 15.46, "elapsed_time": "0:32:21", "remaining_time": "2:56:57", "throughput": 19580.28, "total_tokens": 38019456} +{"current_steps": 12080, "total_steps": 78105, "loss": 0.584, "lr": 4.954657560670494e-06, "epoch": 0.7733179694001664, "percentage": 15.47, "elapsed_time": "0:32:22", "remaining_time": "2:56:56", "throughput": 19581.49, "total_tokens": 38035264} +{"current_steps": 12085, "total_steps": 78105, "loss": 0.522, "lr": 4.9545515835529535e-06, "epoch": 0.7736380513411434, "percentage": 15.47, "elapsed_time": "0:32:23", "remaining_time": "2:56:55", "throughput": 19582.81, "total_tokens": 38051072} +{"current_steps": 12090, "total_steps": 78105, "loss": 0.5556, "lr": 4.954445483867892e-06, "epoch": 0.7739581332821203, "percentage": 15.48, "elapsed_time": "0:32:23", "remaining_time": "2:56:53", "throughput": 19583.97, "total_tokens": 38066688} +{"current_steps": 12095, "total_steps": 78105, "loss": 0.4577, "lr": 4.9543392616206085e-06, "epoch": 0.7742782152230971, "percentage": 15.49, "elapsed_time": "0:32:24", "remaining_time": "2:56:51", "throughput": 19585.18, "total_tokens": 38082048} +{"current_steps": 12100, "total_steps": 78105, "loss": 0.4157, "lr": 4.954232916816406e-06, "epoch": 0.774598297164074, "percentage": 15.49, "elapsed_time": "0:32:25", "remaining_time": "2:56:50", "throughput": 19586.33, "total_tokens": 38097344} +{"current_steps": 12105, "total_steps": 78105, "loss": 0.6133, "lr": 4.954126449460596e-06, "epoch": 0.7749183791050509, "percentage": 15.5, "elapsed_time": "0:32:25", "remaining_time": "2:56:48", "throughput": 19587.39, "total_tokens": 38112320} +{"current_steps": 12110, "total_steps": 78105, "loss": 0.4779, "lr": 4.954019859558493e-06, "epoch": 0.7752384610460278, "percentage": 15.5, "elapsed_time": "0:32:26", "remaining_time": "2:56:47", "throughput": 19588.65, "total_tokens": 38128000} +{"current_steps": 12115, "total_steps": 78105, "loss": 0.4682, "lr": 4.953913147115422e-06, "epoch": 0.7755585429870047, "percentage": 15.51, "elapsed_time": "0:32:27", "remaining_time": "2:56:45", "throughput": 19589.76, "total_tokens": 38143168} +{"current_steps": 12120, "total_steps": 78105, "loss": 0.3558, "lr": 4.953806312136709e-06, "epoch": 0.7758786249279815, "percentage": 15.52, "elapsed_time": "0:32:27", "remaining_time": "2:56:44", "throughput": 19590.77, "total_tokens": 38157824} +{"current_steps": 12125, "total_steps": 78105, "loss": 0.4056, "lr": 4.953699354627692e-06, "epoch": 0.7761987068689584, "percentage": 15.52, "elapsed_time": "0:32:28", "remaining_time": "2:56:42", "throughput": 19591.91, "total_tokens": 38172864} +{"current_steps": 12130, "total_steps": 78105, "loss": 0.6804, "lr": 4.953592274593709e-06, "epoch": 0.7765187888099353, "percentage": 15.53, "elapsed_time": "0:32:29", "remaining_time": "2:56:40", "throughput": 19592.89, "total_tokens": 38187264} +{"current_steps": 12135, "total_steps": 78105, "loss": 0.4035, "lr": 4.953485072040108e-06, "epoch": 0.7768388707509122, "percentage": 15.54, "elapsed_time": "0:32:29", "remaining_time": "2:56:39", "throughput": 19594.5, "total_tokens": 38204288} +{"current_steps": 12140, "total_steps": 78105, "loss": 0.3822, "lr": 4.9533777469722425e-06, "epoch": 0.7771589526918892, "percentage": 15.54, "elapsed_time": "0:32:30", "remaining_time": "2:56:37", "throughput": 19595.57, "total_tokens": 38219264} +{"current_steps": 12145, "total_steps": 78105, "loss": 0.6172, "lr": 4.953270299395472e-06, "epoch": 0.777479034632866, "percentage": 15.55, "elapsed_time": "0:32:31", "remaining_time": "2:56:36", "throughput": 19596.74, "total_tokens": 38234496} +{"current_steps": 12150, "total_steps": 78105, "loss": 0.3488, "lr": 4.953162729315161e-06, "epoch": 0.7777991165738429, "percentage": 15.56, "elapsed_time": "0:32:31", "remaining_time": "2:56:34", "throughput": 19598.17, "total_tokens": 38251072} +{"current_steps": 12155, "total_steps": 78105, "loss": 0.5489, "lr": 4.953055036736681e-06, "epoch": 0.7781191985148198, "percentage": 15.56, "elapsed_time": "0:32:32", "remaining_time": "2:56:33", "throughput": 19599.35, "total_tokens": 38266560} +{"current_steps": 12160, "total_steps": 78105, "loss": 0.4762, "lr": 4.952947221665409e-06, "epoch": 0.7784392804557967, "percentage": 15.57, "elapsed_time": "0:32:33", "remaining_time": "2:56:32", "throughput": 19600.79, "total_tokens": 38282944} +{"current_steps": 12165, "total_steps": 78105, "loss": 0.4529, "lr": 4.952839284106731e-06, "epoch": 0.7787593623967736, "percentage": 15.58, "elapsed_time": "0:32:33", "remaining_time": "2:56:30", "throughput": 19602.21, "total_tokens": 38299264} +{"current_steps": 12170, "total_steps": 78105, "loss": 0.5542, "lr": 4.952731224066036e-06, "epoch": 0.7790794443377504, "percentage": 15.58, "elapsed_time": "0:32:34", "remaining_time": "2:56:29", "throughput": 19603.36, "total_tokens": 38314368} +{"current_steps": 12175, "total_steps": 78105, "loss": 0.5084, "lr": 4.952623041548719e-06, "epoch": 0.7793995262787273, "percentage": 15.59, "elapsed_time": "0:32:35", "remaining_time": "2:56:27", "throughput": 19604.52, "total_tokens": 38329664} +{"current_steps": 12180, "total_steps": 78105, "loss": 0.4042, "lr": 4.952514736560182e-06, "epoch": 0.7797196082197042, "percentage": 15.59, "elapsed_time": "0:32:35", "remaining_time": "2:56:26", "throughput": 19605.73, "total_tokens": 38345408} +{"current_steps": 12185, "total_steps": 78105, "loss": 0.4182, "lr": 4.9524063091058335e-06, "epoch": 0.7800396901606811, "percentage": 15.6, "elapsed_time": "0:32:36", "remaining_time": "2:56:24", "throughput": 19607.32, "total_tokens": 38362688} +{"current_steps": 12190, "total_steps": 78105, "loss": 0.3383, "lr": 4.952297759191089e-06, "epoch": 0.7803597721016581, "percentage": 15.61, "elapsed_time": "0:32:37", "remaining_time": "2:56:23", "throughput": 19608.4, "total_tokens": 38377600} +{"current_steps": 12195, "total_steps": 78105, "loss": 0.5621, "lr": 4.952189086821367e-06, "epoch": 0.780679854042635, "percentage": 15.61, "elapsed_time": "0:32:37", "remaining_time": "2:56:21", "throughput": 19609.49, "total_tokens": 38392640} +{"current_steps": 12200, "total_steps": 78105, "loss": 0.6388, "lr": 4.952080292002096e-06, "epoch": 0.7809999359836118, "percentage": 15.62, "elapsed_time": "0:32:38", "remaining_time": "2:56:20", "throughput": 19610.68, "total_tokens": 38408448} +{"current_steps": 12205, "total_steps": 78105, "loss": 0.5865, "lr": 4.9519713747387075e-06, "epoch": 0.7813200179245887, "percentage": 15.63, "elapsed_time": "0:32:39", "remaining_time": "2:56:18", "throughput": 19611.95, "total_tokens": 38424512} +{"current_steps": 12210, "total_steps": 78105, "loss": 0.6003, "lr": 4.95186233503664e-06, "epoch": 0.7816400998655656, "percentage": 15.63, "elapsed_time": "0:32:39", "remaining_time": "2:56:17", "throughput": 19612.98, "total_tokens": 38439296} +{"current_steps": 12215, "total_steps": 78105, "loss": 0.3559, "lr": 4.951753172901339e-06, "epoch": 0.7819601818065425, "percentage": 15.64, "elapsed_time": "0:32:40", "remaining_time": "2:56:15", "throughput": 19614.31, "total_tokens": 38455424} +{"current_steps": 12220, "total_steps": 78105, "loss": 0.5281, "lr": 4.951643888338256e-06, "epoch": 0.7822802637475194, "percentage": 15.65, "elapsed_time": "0:32:41", "remaining_time": "2:56:14", "throughput": 19615.45, "total_tokens": 38470976} +{"current_steps": 12225, "total_steps": 78105, "loss": 0.663, "lr": 4.951534481352845e-06, "epoch": 0.7826003456884962, "percentage": 15.65, "elapsed_time": "0:32:41", "remaining_time": "2:56:12", "throughput": 19616.57, "total_tokens": 38486016} +{"current_steps": 12230, "total_steps": 78105, "loss": 0.4021, "lr": 4.951424951950574e-06, "epoch": 0.7829204276294731, "percentage": 15.66, "elapsed_time": "0:32:42", "remaining_time": "2:56:11", "throughput": 19617.69, "total_tokens": 38501248} +{"current_steps": 12235, "total_steps": 78105, "loss": 0.4555, "lr": 4.951315300136909e-06, "epoch": 0.78324050957045, "percentage": 15.66, "elapsed_time": "0:32:43", "remaining_time": "2:56:09", "throughput": 19618.9, "total_tokens": 38516992} +{"current_steps": 12240, "total_steps": 78105, "loss": 0.4991, "lr": 4.951205525917326e-06, "epoch": 0.7835605915114269, "percentage": 15.67, "elapsed_time": "0:32:43", "remaining_time": "2:56:07", "throughput": 19619.89, "total_tokens": 38531456} +{"current_steps": 12245, "total_steps": 78105, "loss": 0.4745, "lr": 4.951095629297308e-06, "epoch": 0.7838806734524039, "percentage": 15.68, "elapsed_time": "0:32:44", "remaining_time": "2:56:06", "throughput": 19621.09, "total_tokens": 38546880} +{"current_steps": 12250, "total_steps": 78105, "loss": 0.4795, "lr": 4.9509856102823404e-06, "epoch": 0.7842007553933807, "percentage": 15.68, "elapsed_time": "0:32:45", "remaining_time": "2:56:04", "throughput": 19622.19, "total_tokens": 38562176} +{"current_steps": 12255, "total_steps": 78105, "loss": 0.53, "lr": 4.950875468877918e-06, "epoch": 0.7845208373343576, "percentage": 15.69, "elapsed_time": "0:32:45", "remaining_time": "2:56:03", "throughput": 19623.36, "total_tokens": 38577472} +{"current_steps": 12260, "total_steps": 78105, "loss": 0.4954, "lr": 4.9507652050895415e-06, "epoch": 0.7848409192753345, "percentage": 15.7, "elapsed_time": "0:32:46", "remaining_time": "2:56:01", "throughput": 19624.52, "total_tokens": 38593088} +{"current_steps": 12265, "total_steps": 78105, "loss": 0.6472, "lr": 4.950654818922716e-06, "epoch": 0.7851610012163114, "percentage": 15.7, "elapsed_time": "0:32:47", "remaining_time": "2:56:00", "throughput": 19625.58, "total_tokens": 38607936} +{"current_steps": 12270, "total_steps": 78105, "loss": 0.3883, "lr": 4.950544310382954e-06, "epoch": 0.7854810831572883, "percentage": 15.71, "elapsed_time": "0:32:47", "remaining_time": "2:55:59", "throughput": 19627.05, "total_tokens": 38625024} +{"current_steps": 12275, "total_steps": 78105, "loss": 0.4866, "lr": 4.950433679475774e-06, "epoch": 0.7858011650982651, "percentage": 15.72, "elapsed_time": "0:32:48", "remaining_time": "2:55:57", "throughput": 19628.49, "total_tokens": 38641792} +{"current_steps": 12280, "total_steps": 78105, "loss": 0.5882, "lr": 4.950322926206699e-06, "epoch": 0.786121247039242, "percentage": 15.72, "elapsed_time": "0:32:49", "remaining_time": "2:55:56", "throughput": 19629.62, "total_tokens": 38656896} +{"current_steps": 12285, "total_steps": 78105, "loss": 0.4555, "lr": 4.950212050581261e-06, "epoch": 0.7864413289802189, "percentage": 15.73, "elapsed_time": "0:32:49", "remaining_time": "2:55:54", "throughput": 19630.91, "total_tokens": 38672448} +{"current_steps": 12290, "total_steps": 78105, "loss": 0.4815, "lr": 4.950101052604995e-06, "epoch": 0.7867614109211958, "percentage": 15.74, "elapsed_time": "0:32:50", "remaining_time": "2:55:53", "throughput": 19632.24, "total_tokens": 38688576} +{"current_steps": 12295, "total_steps": 78105, "loss": 0.6147, "lr": 4.9499899322834455e-06, "epoch": 0.7870814928621728, "percentage": 15.74, "elapsed_time": "0:32:51", "remaining_time": "2:55:51", "throughput": 19633.43, "total_tokens": 38704064} +{"current_steps": 12300, "total_steps": 78105, "loss": 0.4661, "lr": 4.949878689622161e-06, "epoch": 0.7874015748031497, "percentage": 15.75, "elapsed_time": "0:32:51", "remaining_time": "2:55:50", "throughput": 19634.47, "total_tokens": 38718976} +{"current_steps": 12305, "total_steps": 78105, "loss": 0.5098, "lr": 4.949767324626694e-06, "epoch": 0.7877216567441265, "percentage": 15.75, "elapsed_time": "0:32:52", "remaining_time": "2:55:48", "throughput": 19635.59, "total_tokens": 38734272} +{"current_steps": 12310, "total_steps": 78105, "loss": 0.4012, "lr": 4.949655837302608e-06, "epoch": 0.7880417386851034, "percentage": 15.76, "elapsed_time": "0:32:53", "remaining_time": "2:55:47", "throughput": 19636.71, "total_tokens": 38749504} +{"current_steps": 12315, "total_steps": 78105, "loss": 0.6175, "lr": 4.949544227655469e-06, "epoch": 0.7883618206260803, "percentage": 15.77, "elapsed_time": "0:32:53", "remaining_time": "2:55:45", "throughput": 19637.78, "total_tokens": 38764352} +{"current_steps": 12320, "total_steps": 78105, "loss": 0.4962, "lr": 4.9494324956908505e-06, "epoch": 0.7886819025670572, "percentage": 15.77, "elapsed_time": "0:32:54", "remaining_time": "2:55:43", "throughput": 19639.05, "total_tokens": 38780096} +{"current_steps": 12325, "total_steps": 78105, "loss": 0.4824, "lr": 4.949320641414332e-06, "epoch": 0.789001984508034, "percentage": 15.78, "elapsed_time": "0:32:55", "remaining_time": "2:55:42", "throughput": 19640.24, "total_tokens": 38795712} +{"current_steps": 12330, "total_steps": 78105, "loss": 0.4731, "lr": 4.9492086648314984e-06, "epoch": 0.7893220664490109, "percentage": 15.79, "elapsed_time": "0:32:55", "remaining_time": "2:55:41", "throughput": 19641.47, "total_tokens": 38811328} +{"current_steps": 12335, "total_steps": 78105, "loss": 0.4184, "lr": 4.949096565947942e-06, "epoch": 0.7896421483899878, "percentage": 15.79, "elapsed_time": "0:32:56", "remaining_time": "2:55:39", "throughput": 19642.58, "total_tokens": 38826240} +{"current_steps": 12340, "total_steps": 78105, "loss": 0.4814, "lr": 4.94898434476926e-06, "epoch": 0.7899622303309647, "percentage": 15.8, "elapsed_time": "0:32:57", "remaining_time": "2:55:38", "throughput": 19643.91, "total_tokens": 38842624} +{"current_steps": 12345, "total_steps": 78105, "loss": 0.394, "lr": 4.9488720013010556e-06, "epoch": 0.7902823122719416, "percentage": 15.81, "elapsed_time": "0:32:57", "remaining_time": "2:55:36", "throughput": 19644.92, "total_tokens": 38857472} +{"current_steps": 12350, "total_steps": 78105, "loss": 0.586, "lr": 4.948759535548939e-06, "epoch": 0.7906023942129186, "percentage": 15.81, "elapsed_time": "0:32:58", "remaining_time": "2:55:34", "throughput": 19645.95, "total_tokens": 38872256} +{"current_steps": 12355, "total_steps": 78105, "loss": 0.5738, "lr": 4.948646947518527e-06, "epoch": 0.7909224761538954, "percentage": 15.82, "elapsed_time": "0:32:59", "remaining_time": "2:55:33", "throughput": 19647.64, "total_tokens": 38890048} +{"current_steps": 12360, "total_steps": 78105, "loss": 0.3512, "lr": 4.948534237215441e-06, "epoch": 0.7912425580948723, "percentage": 15.82, "elapsed_time": "0:33:00", "remaining_time": "2:55:32", "throughput": 19648.88, "total_tokens": 38905664} +{"current_steps": 12365, "total_steps": 78105, "loss": 0.4868, "lr": 4.948421404645308e-06, "epoch": 0.7915626400358492, "percentage": 15.83, "elapsed_time": "0:33:00", "remaining_time": "2:55:31", "throughput": 19651.01, "total_tokens": 38926144} +{"current_steps": 12370, "total_steps": 78105, "loss": 0.62, "lr": 4.948308449813764e-06, "epoch": 0.7918827219768261, "percentage": 15.84, "elapsed_time": "0:33:01", "remaining_time": "2:55:30", "throughput": 19652.34, "total_tokens": 38942720} +{"current_steps": 12375, "total_steps": 78105, "loss": 0.4897, "lr": 4.948195372726449e-06, "epoch": 0.792202803917803, "percentage": 15.84, "elapsed_time": "0:33:02", "remaining_time": "2:55:28", "throughput": 19653.48, "total_tokens": 38958144} +{"current_steps": 12380, "total_steps": 78105, "loss": 0.3435, "lr": 4.948082173389011e-06, "epoch": 0.7925228858587798, "percentage": 15.85, "elapsed_time": "0:33:02", "remaining_time": "2:55:27", "throughput": 19654.66, "total_tokens": 38973376} +{"current_steps": 12385, "total_steps": 78105, "loss": 0.4778, "lr": 4.947968851807098e-06, "epoch": 0.7928429677997567, "percentage": 15.86, "elapsed_time": "0:33:03", "remaining_time": "2:55:25", "throughput": 19655.98, "total_tokens": 38989504} +{"current_steps": 12390, "total_steps": 78105, "loss": 0.5404, "lr": 4.947855407986373e-06, "epoch": 0.7931630497407336, "percentage": 15.86, "elapsed_time": "0:33:04", "remaining_time": "2:55:24", "throughput": 19657.22, "total_tokens": 39005056} +{"current_steps": 12395, "total_steps": 78105, "loss": 0.5248, "lr": 4.947741841932499e-06, "epoch": 0.7934831316817105, "percentage": 15.87, "elapsed_time": "0:33:04", "remaining_time": "2:55:22", "throughput": 19658.41, "total_tokens": 39020608} +{"current_steps": 12400, "total_steps": 78105, "loss": 0.6509, "lr": 4.947628153651147e-06, "epoch": 0.7938032136226874, "percentage": 15.88, "elapsed_time": "0:33:05", "remaining_time": "2:55:21", "throughput": 19659.52, "total_tokens": 39035712} +{"current_steps": 12405, "total_steps": 78105, "loss": 0.4714, "lr": 4.947514343147995e-06, "epoch": 0.7941232955636643, "percentage": 15.88, "elapsed_time": "0:33:06", "remaining_time": "2:55:19", "throughput": 19660.61, "total_tokens": 39050880} +{"current_steps": 12410, "total_steps": 78105, "loss": 0.5796, "lr": 4.947400410428725e-06, "epoch": 0.7944433775046412, "percentage": 15.89, "elapsed_time": "0:33:06", "remaining_time": "2:55:18", "throughput": 19661.74, "total_tokens": 39065792} +{"current_steps": 12415, "total_steps": 78105, "loss": 0.4938, "lr": 4.947286355499026e-06, "epoch": 0.7947634594456181, "percentage": 15.9, "elapsed_time": "0:33:07", "remaining_time": "2:55:16", "throughput": 19662.9, "total_tokens": 39080960} +{"current_steps": 12420, "total_steps": 78105, "loss": 0.3604, "lr": 4.947172178364595e-06, "epoch": 0.795083541386595, "percentage": 15.9, "elapsed_time": "0:33:08", "remaining_time": "2:55:15", "throughput": 19664.2, "total_tokens": 39097216} +{"current_steps": 12425, "total_steps": 78105, "loss": 0.321, "lr": 4.947057879031131e-06, "epoch": 0.7954036233275719, "percentage": 15.91, "elapsed_time": "0:33:08", "remaining_time": "2:55:13", "throughput": 19665.48, "total_tokens": 39113152} +{"current_steps": 12430, "total_steps": 78105, "loss": 0.618, "lr": 4.946943457504343e-06, "epoch": 0.7957237052685487, "percentage": 15.91, "elapsed_time": "0:33:09", "remaining_time": "2:55:12", "throughput": 19666.97, "total_tokens": 39130176} +{"current_steps": 12435, "total_steps": 78105, "loss": 0.3819, "lr": 4.946828913789945e-06, "epoch": 0.7960437872095256, "percentage": 15.92, "elapsed_time": "0:33:10", "remaining_time": "2:55:10", "throughput": 19668.15, "total_tokens": 39145792} +{"current_steps": 12440, "total_steps": 78105, "loss": 0.593, "lr": 4.9467142478936555e-06, "epoch": 0.7963638691505025, "percentage": 15.93, "elapsed_time": "0:33:10", "remaining_time": "2:55:09", "throughput": 19669.32, "total_tokens": 39161280} +{"current_steps": 12445, "total_steps": 78105, "loss": 0.3727, "lr": 4.946599459821202e-06, "epoch": 0.7966839510914794, "percentage": 15.93, "elapsed_time": "0:33:11", "remaining_time": "2:55:07", "throughput": 19670.41, "total_tokens": 39176512} +{"current_steps": 12450, "total_steps": 78105, "loss": 0.5835, "lr": 4.946484549578314e-06, "epoch": 0.7970040330324563, "percentage": 15.94, "elapsed_time": "0:33:12", "remaining_time": "2:55:06", "throughput": 19671.51, "total_tokens": 39191808} +{"current_steps": 12455, "total_steps": 78105, "loss": 0.3811, "lr": 4.946369517170731e-06, "epoch": 0.7973241149734333, "percentage": 15.95, "elapsed_time": "0:33:13", "remaining_time": "2:55:05", "throughput": 19672.92, "total_tokens": 39208640} +{"current_steps": 12460, "total_steps": 78105, "loss": 0.4684, "lr": 4.946254362604198e-06, "epoch": 0.7976441969144101, "percentage": 15.95, "elapsed_time": "0:33:13", "remaining_time": "2:55:03", "throughput": 19673.9, "total_tokens": 39223232} +{"current_steps": 12465, "total_steps": 78105, "loss": 0.4334, "lr": 4.9461390858844635e-06, "epoch": 0.797964278855387, "percentage": 15.96, "elapsed_time": "0:33:14", "remaining_time": "2:55:02", "throughput": 19675.0, "total_tokens": 39238656} +{"current_steps": 12470, "total_steps": 78105, "loss": 0.553, "lr": 4.946023687017285e-06, "epoch": 0.7982843607963639, "percentage": 15.97, "elapsed_time": "0:33:15", "remaining_time": "2:55:00", "throughput": 19676.52, "total_tokens": 39255872} +{"current_steps": 12475, "total_steps": 78105, "loss": 0.4521, "lr": 4.945908166008424e-06, "epoch": 0.7986044427373408, "percentage": 15.97, "elapsed_time": "0:33:15", "remaining_time": "2:54:59", "throughput": 19677.48, "total_tokens": 39270784} +{"current_steps": 12480, "total_steps": 78105, "loss": 0.4391, "lr": 4.945792522863649e-06, "epoch": 0.7989245246783176, "percentage": 15.98, "elapsed_time": "0:33:16", "remaining_time": "2:54:57", "throughput": 19678.67, "total_tokens": 39286080} +{"current_steps": 12485, "total_steps": 78105, "loss": 0.5249, "lr": 4.945676757588736e-06, "epoch": 0.7992446066192945, "percentage": 15.98, "elapsed_time": "0:33:17", "remaining_time": "2:54:56", "throughput": 19679.82, "total_tokens": 39301312} +{"current_steps": 12490, "total_steps": 78105, "loss": 0.4352, "lr": 4.945560870189465e-06, "epoch": 0.7995646885602714, "percentage": 15.99, "elapsed_time": "0:33:17", "remaining_time": "2:54:54", "throughput": 19680.94, "total_tokens": 39316736} +{"current_steps": 12495, "total_steps": 78105, "loss": 0.4878, "lr": 4.945444860671622e-06, "epoch": 0.7998847705012483, "percentage": 16.0, "elapsed_time": "0:33:18", "remaining_time": "2:54:53", "throughput": 19681.93, "total_tokens": 39331264} +{"current_steps": 12500, "total_steps": 78105, "loss": 0.4433, "lr": 4.945328729041e-06, "epoch": 0.8002048524422252, "percentage": 16.0, "elapsed_time": "0:33:18", "remaining_time": "2:54:51", "throughput": 19682.93, "total_tokens": 39345856} +{"current_steps": 12505, "total_steps": 78105, "loss": 0.5399, "lr": 4.945212475303399e-06, "epoch": 0.800524934383202, "percentage": 16.01, "elapsed_time": "0:33:19", "remaining_time": "2:54:49", "throughput": 19684.13, "total_tokens": 39361280} +{"current_steps": 12510, "total_steps": 78105, "loss": 0.4515, "lr": 4.9450960994646245e-06, "epoch": 0.800845016324179, "percentage": 16.02, "elapsed_time": "0:33:20", "remaining_time": "2:54:48", "throughput": 19685.08, "total_tokens": 39375872} +{"current_steps": 12515, "total_steps": 78105, "loss": 0.3918, "lr": 4.944979601530486e-06, "epoch": 0.8011650982651559, "percentage": 16.02, "elapsed_time": "0:33:20", "remaining_time": "2:54:47", "throughput": 19686.43, "total_tokens": 39392320} +{"current_steps": 12520, "total_steps": 78105, "loss": 0.4591, "lr": 4.944862981506802e-06, "epoch": 0.8014851802061328, "percentage": 16.03, "elapsed_time": "0:33:21", "remaining_time": "2:54:45", "throughput": 19687.56, "total_tokens": 39407680} +{"current_steps": 12525, "total_steps": 78105, "loss": 0.4409, "lr": 4.944746239399395e-06, "epoch": 0.8018052621471097, "percentage": 16.04, "elapsed_time": "0:33:22", "remaining_time": "2:54:44", "throughput": 19688.95, "total_tokens": 39424512} +{"current_steps": 12530, "total_steps": 78105, "loss": 0.505, "lr": 4.944629375214095e-06, "epoch": 0.8021253440880866, "percentage": 16.04, "elapsed_time": "0:33:23", "remaining_time": "2:54:42", "throughput": 19689.88, "total_tokens": 39439296} +{"current_steps": 12535, "total_steps": 78105, "loss": 0.577, "lr": 4.944512388956738e-06, "epoch": 0.8024454260290634, "percentage": 16.05, "elapsed_time": "0:33:23", "remaining_time": "2:54:41", "throughput": 19691.4, "total_tokens": 39456576} +{"current_steps": 12540, "total_steps": 78105, "loss": 0.3687, "lr": 4.944395280633165e-06, "epoch": 0.8027655079700403, "percentage": 16.06, "elapsed_time": "0:33:24", "remaining_time": "2:54:40", "throughput": 19692.74, "total_tokens": 39472512} +{"current_steps": 12545, "total_steps": 78105, "loss": 0.4122, "lr": 4.944278050249224e-06, "epoch": 0.8030855899110172, "percentage": 16.06, "elapsed_time": "0:33:25", "remaining_time": "2:54:38", "throughput": 19693.9, "total_tokens": 39488192} +{"current_steps": 12550, "total_steps": 78105, "loss": 0.5375, "lr": 4.944160697810769e-06, "epoch": 0.8034056718519941, "percentage": 16.07, "elapsed_time": "0:33:25", "remaining_time": "2:54:37", "throughput": 19694.93, "total_tokens": 39503552} +{"current_steps": 12555, "total_steps": 78105, "loss": 0.5382, "lr": 4.94404322332366e-06, "epoch": 0.803725753792971, "percentage": 16.07, "elapsed_time": "0:33:26", "remaining_time": "2:54:35", "throughput": 19696.21, "total_tokens": 39519744} +{"current_steps": 12560, "total_steps": 78105, "loss": 0.6759, "lr": 4.943925626793764e-06, "epoch": 0.804045835733948, "percentage": 16.08, "elapsed_time": "0:33:27", "remaining_time": "2:54:34", "throughput": 19697.32, "total_tokens": 39535232} +{"current_steps": 12565, "total_steps": 78105, "loss": 0.5241, "lr": 4.943807908226951e-06, "epoch": 0.8043659176749248, "percentage": 16.09, "elapsed_time": "0:33:27", "remaining_time": "2:54:32", "throughput": 19698.26, "total_tokens": 39549568} +{"current_steps": 12570, "total_steps": 78105, "loss": 0.4066, "lr": 4.9436900676291e-06, "epoch": 0.8046859996159017, "percentage": 16.09, "elapsed_time": "0:33:28", "remaining_time": "2:54:31", "throughput": 19699.65, "total_tokens": 39566016} +{"current_steps": 12575, "total_steps": 78105, "loss": 0.4023, "lr": 4.943572105006097e-06, "epoch": 0.8050060815568786, "percentage": 16.1, "elapsed_time": "0:33:29", "remaining_time": "2:54:29", "throughput": 19700.85, "total_tokens": 39581760} +{"current_steps": 12580, "total_steps": 78105, "loss": 0.5066, "lr": 4.94345402036383e-06, "epoch": 0.8053261634978555, "percentage": 16.11, "elapsed_time": "0:33:29", "remaining_time": "2:54:28", "throughput": 19702.09, "total_tokens": 39597888} +{"current_steps": 12585, "total_steps": 78105, "loss": 0.5907, "lr": 4.9433358137081974e-06, "epoch": 0.8056462454388323, "percentage": 16.11, "elapsed_time": "0:33:30", "remaining_time": "2:54:27", "throughput": 19703.24, "total_tokens": 39613568} +{"current_steps": 12590, "total_steps": 78105, "loss": 0.4664, "lr": 4.943217485045101e-06, "epoch": 0.8059663273798092, "percentage": 16.12, "elapsed_time": "0:33:31", "remaining_time": "2:54:25", "throughput": 19704.22, "total_tokens": 39628096} +{"current_steps": 12595, "total_steps": 78105, "loss": 0.3563, "lr": 4.94309903438045e-06, "epoch": 0.8062864093207861, "percentage": 16.13, "elapsed_time": "0:33:31", "remaining_time": "2:54:23", "throughput": 19705.11, "total_tokens": 39642560} +{"current_steps": 12600, "total_steps": 78105, "loss": 0.4185, "lr": 4.9429804617201585e-06, "epoch": 0.806606491261763, "percentage": 16.13, "elapsed_time": "0:33:32", "remaining_time": "2:54:22", "throughput": 19706.29, "total_tokens": 39658112} +{"current_steps": 12605, "total_steps": 78105, "loss": 0.5237, "lr": 4.9428617670701475e-06, "epoch": 0.8069265732027399, "percentage": 16.14, "elapsed_time": "0:33:33", "remaining_time": "2:54:20", "throughput": 19707.57, "total_tokens": 39674112} +{"current_steps": 12610, "total_steps": 78105, "loss": 0.4363, "lr": 4.942742950436344e-06, "epoch": 0.8072466551437167, "percentage": 16.14, "elapsed_time": "0:33:33", "remaining_time": "2:54:19", "throughput": 19708.93, "total_tokens": 39690432} +{"current_steps": 12615, "total_steps": 78105, "loss": 0.4683, "lr": 4.942624011824683e-06, "epoch": 0.8075667370846937, "percentage": 16.15, "elapsed_time": "0:33:34", "remaining_time": "2:54:18", "throughput": 19710.04, "total_tokens": 39705792} +{"current_steps": 12620, "total_steps": 78105, "loss": 0.5312, "lr": 4.942504951241101e-06, "epoch": 0.8078868190256706, "percentage": 16.16, "elapsed_time": "0:33:35", "remaining_time": "2:54:16", "throughput": 19711.27, "total_tokens": 39722048} +{"current_steps": 12625, "total_steps": 78105, "loss": 0.4972, "lr": 4.9423857686915435e-06, "epoch": 0.8082069009666475, "percentage": 16.16, "elapsed_time": "0:33:35", "remaining_time": "2:54:15", "throughput": 19712.56, "total_tokens": 39738304} +{"current_steps": 12630, "total_steps": 78105, "loss": 0.4825, "lr": 4.9422664641819634e-06, "epoch": 0.8085269829076244, "percentage": 16.17, "elapsed_time": "0:33:36", "remaining_time": "2:54:13", "throughput": 19713.69, "total_tokens": 39753728} +{"current_steps": 12635, "total_steps": 78105, "loss": 0.4945, "lr": 4.942147037718317e-06, "epoch": 0.8088470648486012, "percentage": 16.18, "elapsed_time": "0:33:37", "remaining_time": "2:54:12", "throughput": 19714.84, "total_tokens": 39769600} +{"current_steps": 12640, "total_steps": 78105, "loss": 0.4703, "lr": 4.942027489306569e-06, "epoch": 0.8091671467895781, "percentage": 16.18, "elapsed_time": "0:33:37", "remaining_time": "2:54:10", "throughput": 19715.76, "total_tokens": 39784000} +{"current_steps": 12645, "total_steps": 78105, "loss": 0.4305, "lr": 4.941907818952687e-06, "epoch": 0.809487228730555, "percentage": 16.19, "elapsed_time": "0:33:38", "remaining_time": "2:54:09", "throughput": 19717.02, "total_tokens": 39800320} +{"current_steps": 12650, "total_steps": 78105, "loss": 0.4837, "lr": 4.941788026662649e-06, "epoch": 0.8098073106715319, "percentage": 16.2, "elapsed_time": "0:33:39", "remaining_time": "2:54:08", "throughput": 19718.07, "total_tokens": 39815360} +{"current_steps": 12655, "total_steps": 78105, "loss": 0.4893, "lr": 4.941668112442436e-06, "epoch": 0.8101273926125088, "percentage": 16.2, "elapsed_time": "0:33:39", "remaining_time": "2:54:06", "throughput": 19718.98, "total_tokens": 39829952} +{"current_steps": 12660, "total_steps": 78105, "loss": 0.473, "lr": 4.941548076298035e-06, "epoch": 0.8104474745534856, "percentage": 16.21, "elapsed_time": "0:33:40", "remaining_time": "2:54:05", "throughput": 19720.1, "total_tokens": 39845376} +{"current_steps": 12665, "total_steps": 78105, "loss": 0.5856, "lr": 4.94142791823544e-06, "epoch": 0.8107675564944626, "percentage": 16.22, "elapsed_time": "0:33:41", "remaining_time": "2:54:03", "throughput": 19721.41, "total_tokens": 39861696} +{"current_steps": 12670, "total_steps": 78105, "loss": 0.48, "lr": 4.941307638260653e-06, "epoch": 0.8110876384354395, "percentage": 16.22, "elapsed_time": "0:33:41", "remaining_time": "2:54:02", "throughput": 19722.61, "total_tokens": 39877440} +{"current_steps": 12675, "total_steps": 78105, "loss": 0.4398, "lr": 4.941187236379678e-06, "epoch": 0.8114077203764164, "percentage": 16.23, "elapsed_time": "0:33:42", "remaining_time": "2:54:00", "throughput": 19723.86, "total_tokens": 39893760} +{"current_steps": 12680, "total_steps": 78105, "loss": 0.3593, "lr": 4.941066712598528e-06, "epoch": 0.8117278023173933, "percentage": 16.23, "elapsed_time": "0:33:43", "remaining_time": "2:53:59", "throughput": 19725.12, "total_tokens": 39909568} +{"current_steps": 12685, "total_steps": 78105, "loss": 0.5778, "lr": 4.940946066923222e-06, "epoch": 0.8120478842583702, "percentage": 16.24, "elapsed_time": "0:33:43", "remaining_time": "2:53:57", "throughput": 19725.99, "total_tokens": 39923648} +{"current_steps": 12690, "total_steps": 78105, "loss": 0.5954, "lr": 4.940825299359784e-06, "epoch": 0.812367966199347, "percentage": 16.25, "elapsed_time": "0:33:44", "remaining_time": "2:53:56", "throughput": 19727.02, "total_tokens": 39939008} +{"current_steps": 12695, "total_steps": 78105, "loss": 0.5824, "lr": 4.9407044099142435e-06, "epoch": 0.8126880481403239, "percentage": 16.25, "elapsed_time": "0:33:45", "remaining_time": "2:53:55", "throughput": 19728.21, "total_tokens": 39955072} +{"current_steps": 12700, "total_steps": 78105, "loss": 0.5653, "lr": 4.940583398592639e-06, "epoch": 0.8130081300813008, "percentage": 16.26, "elapsed_time": "0:33:45", "remaining_time": "2:53:53", "throughput": 19729.44, "total_tokens": 39970880} +{"current_steps": 12705, "total_steps": 78105, "loss": 0.5909, "lr": 4.940462265401012e-06, "epoch": 0.8133282120222777, "percentage": 16.27, "elapsed_time": "0:33:46", "remaining_time": "2:53:52", "throughput": 19730.52, "total_tokens": 39986240} +{"current_steps": 12710, "total_steps": 78105, "loss": 0.4893, "lr": 4.940341010345409e-06, "epoch": 0.8136482939632546, "percentage": 16.27, "elapsed_time": "0:33:47", "remaining_time": "2:53:50", "throughput": 19731.7, "total_tokens": 40001856} +{"current_steps": 12715, "total_steps": 78105, "loss": 0.5328, "lr": 4.94021963343189e-06, "epoch": 0.8139683759042314, "percentage": 16.28, "elapsed_time": "0:33:47", "remaining_time": "2:53:49", "throughput": 19732.98, "total_tokens": 40018112} +{"current_steps": 12720, "total_steps": 78105, "loss": 0.4697, "lr": 4.940098134666512e-06, "epoch": 0.8142884578452084, "percentage": 16.29, "elapsed_time": "0:33:48", "remaining_time": "2:53:47", "throughput": 19734.03, "total_tokens": 40033664} +{"current_steps": 12725, "total_steps": 78105, "loss": 0.4335, "lr": 4.939976514055344e-06, "epoch": 0.8146085397861853, "percentage": 16.29, "elapsed_time": "0:33:49", "remaining_time": "2:53:46", "throughput": 19735.07, "total_tokens": 40048768} +{"current_steps": 12730, "total_steps": 78105, "loss": 0.4576, "lr": 4.939854771604457e-06, "epoch": 0.8149286217271622, "percentage": 16.3, "elapsed_time": "0:33:49", "remaining_time": "2:53:44", "throughput": 19735.98, "total_tokens": 40063232} +{"current_steps": 12735, "total_steps": 78105, "loss": 0.5915, "lr": 4.939732907319933e-06, "epoch": 0.8152487036681391, "percentage": 16.3, "elapsed_time": "0:33:50", "remaining_time": "2:53:43", "throughput": 19737.2, "total_tokens": 40079296} +{"current_steps": 12740, "total_steps": 78105, "loss": 0.57, "lr": 4.9396109212078545e-06, "epoch": 0.8155687856091159, "percentage": 16.31, "elapsed_time": "0:33:51", "remaining_time": "2:53:42", "throughput": 19738.39, "total_tokens": 40094976} +{"current_steps": 12745, "total_steps": 78105, "loss": 0.4082, "lr": 4.939488813274315e-06, "epoch": 0.8158888675500928, "percentage": 16.32, "elapsed_time": "0:33:51", "remaining_time": "2:53:40", "throughput": 19739.53, "total_tokens": 40110464} +{"current_steps": 12750, "total_steps": 78105, "loss": 0.6115, "lr": 4.93936658352541e-06, "epoch": 0.8162089494910697, "percentage": 16.32, "elapsed_time": "0:33:52", "remaining_time": "2:53:39", "throughput": 19740.59, "total_tokens": 40125568} +{"current_steps": 12755, "total_steps": 78105, "loss": 0.4509, "lr": 4.939244231967244e-06, "epoch": 0.8165290314320466, "percentage": 16.33, "elapsed_time": "0:33:53", "remaining_time": "2:53:37", "throughput": 19741.83, "total_tokens": 40141440} +{"current_steps": 12760, "total_steps": 78105, "loss": 0.5462, "lr": 4.939121758605927e-06, "epoch": 0.8168491133730235, "percentage": 16.34, "elapsed_time": "0:33:53", "remaining_time": "2:53:36", "throughput": 19742.79, "total_tokens": 40156416} +{"current_steps": 12765, "total_steps": 78105, "loss": 0.5609, "lr": 4.938999163447574e-06, "epoch": 0.8171691953140003, "percentage": 16.34, "elapsed_time": "0:33:54", "remaining_time": "2:53:34", "throughput": 19744.07, "total_tokens": 40172928} +{"current_steps": 12770, "total_steps": 78105, "loss": 0.3699, "lr": 4.9388764464983075e-06, "epoch": 0.8174892772549772, "percentage": 16.35, "elapsed_time": "0:33:55", "remaining_time": "2:53:33", "throughput": 19745.16, "total_tokens": 40188096} +{"current_steps": 12775, "total_steps": 78105, "loss": 0.5429, "lr": 4.938753607764255e-06, "epoch": 0.8178093591959542, "percentage": 16.36, "elapsed_time": "0:33:56", "remaining_time": "2:53:32", "throughput": 19746.35, "total_tokens": 40204032} +{"current_steps": 12780, "total_steps": 78105, "loss": 0.4195, "lr": 4.93863064725155e-06, "epoch": 0.8181294411369311, "percentage": 16.36, "elapsed_time": "0:33:56", "remaining_time": "2:53:30", "throughput": 19747.39, "total_tokens": 40218944} +{"current_steps": 12785, "total_steps": 78105, "loss": 0.7037, "lr": 4.9385075649663334e-06, "epoch": 0.818449523077908, "percentage": 16.37, "elapsed_time": "0:33:57", "remaining_time": "2:53:29", "throughput": 19748.71, "total_tokens": 40235456} +{"current_steps": 12790, "total_steps": 78105, "loss": 0.5339, "lr": 4.93838436091475e-06, "epoch": 0.8187696050188848, "percentage": 16.38, "elapsed_time": "0:33:58", "remaining_time": "2:53:27", "throughput": 19749.62, "total_tokens": 40250176} +{"current_steps": 12795, "total_steps": 78105, "loss": 0.4056, "lr": 4.9382610351029535e-06, "epoch": 0.8190896869598617, "percentage": 16.38, "elapsed_time": "0:33:58", "remaining_time": "2:53:26", "throughput": 19750.68, "total_tokens": 40265472} +{"current_steps": 12800, "total_steps": 78105, "loss": 0.4793, "lr": 4.9381375875371005e-06, "epoch": 0.8194097689008386, "percentage": 16.39, "elapsed_time": "0:33:59", "remaining_time": "2:53:24", "throughput": 19751.92, "total_tokens": 40281728} +{"current_steps": 12805, "total_steps": 78105, "loss": 0.4221, "lr": 4.938014018223356e-06, "epoch": 0.8197298508418155, "percentage": 16.39, "elapsed_time": "0:34:00", "remaining_time": "2:53:24", "throughput": 19750.58, "total_tokens": 40296768} +{"current_steps": 12810, "total_steps": 78105, "loss": 0.6157, "lr": 4.937890327167891e-06, "epoch": 0.8200499327827924, "percentage": 16.4, "elapsed_time": "0:34:00", "remaining_time": "2:53:23", "throughput": 19751.46, "total_tokens": 40311488} +{"current_steps": 12815, "total_steps": 78105, "loss": 0.3821, "lr": 4.937766514376882e-06, "epoch": 0.8203700147237692, "percentage": 16.41, "elapsed_time": "0:34:01", "remaining_time": "2:53:21", "throughput": 19752.58, "total_tokens": 40327232} +{"current_steps": 12820, "total_steps": 78105, "loss": 0.6138, "lr": 4.937642579856511e-06, "epoch": 0.8206900966647461, "percentage": 16.41, "elapsed_time": "0:34:02", "remaining_time": "2:53:20", "throughput": 19753.89, "total_tokens": 40344064} +{"current_steps": 12825, "total_steps": 78105, "loss": 0.5867, "lr": 4.9375185236129665e-06, "epoch": 0.8210101786057231, "percentage": 16.42, "elapsed_time": "0:34:02", "remaining_time": "2:53:18", "throughput": 19754.89, "total_tokens": 40359040} +{"current_steps": 12830, "total_steps": 78105, "loss": 0.4853, "lr": 4.937394345652444e-06, "epoch": 0.8213302605467, "percentage": 16.43, "elapsed_time": "0:34:03", "remaining_time": "2:53:17", "throughput": 19756.33, "total_tokens": 40376384} +{"current_steps": 12835, "total_steps": 78105, "loss": 0.701, "lr": 4.937270045981143e-06, "epoch": 0.8216503424876769, "percentage": 16.43, "elapsed_time": "0:34:04", "remaining_time": "2:53:16", "throughput": 19757.43, "total_tokens": 40391936} +{"current_steps": 12840, "total_steps": 78105, "loss": 0.3718, "lr": 4.937145624605272e-06, "epoch": 0.8219704244286538, "percentage": 16.44, "elapsed_time": "0:34:05", "remaining_time": "2:53:15", "throughput": 19758.87, "total_tokens": 40408832} +{"current_steps": 12845, "total_steps": 78105, "loss": 0.5003, "lr": 4.937021081531042e-06, "epoch": 0.8222905063696306, "percentage": 16.45, "elapsed_time": "0:34:05", "remaining_time": "2:53:13", "throughput": 19760.16, "total_tokens": 40425280} +{"current_steps": 12850, "total_steps": 78105, "loss": 0.4998, "lr": 4.936896416764674e-06, "epoch": 0.8226105883106075, "percentage": 16.45, "elapsed_time": "0:34:06", "remaining_time": "2:53:12", "throughput": 19761.24, "total_tokens": 40440832} +{"current_steps": 12855, "total_steps": 78105, "loss": 0.3566, "lr": 4.936771630312392e-06, "epoch": 0.8229306702515844, "percentage": 16.46, "elapsed_time": "0:34:07", "remaining_time": "2:53:11", "throughput": 19762.78, "total_tokens": 40458624} +{"current_steps": 12860, "total_steps": 78105, "loss": 0.5574, "lr": 4.936646722180428e-06, "epoch": 0.8232507521925613, "percentage": 16.47, "elapsed_time": "0:34:07", "remaining_time": "2:53:09", "throughput": 19764.04, "total_tokens": 40474688} +{"current_steps": 12865, "total_steps": 78105, "loss": 0.4617, "lr": 4.936521692375018e-06, "epoch": 0.8235708341335382, "percentage": 16.47, "elapsed_time": "0:34:08", "remaining_time": "2:53:08", "throughput": 19765.3, "total_tokens": 40490816} +{"current_steps": 12870, "total_steps": 78105, "loss": 0.6127, "lr": 4.9363965409024065e-06, "epoch": 0.823890916074515, "percentage": 16.48, "elapsed_time": "0:34:09", "remaining_time": "2:53:07", "throughput": 19766.34, "total_tokens": 40506112} +{"current_steps": 12875, "total_steps": 78105, "loss": 0.5227, "lr": 4.936271267768842e-06, "epoch": 0.8242109980154919, "percentage": 16.48, "elapsed_time": "0:34:09", "remaining_time": "2:53:05", "throughput": 19767.37, "total_tokens": 40521344} +{"current_steps": 12880, "total_steps": 78105, "loss": 0.3535, "lr": 4.936145872980581e-06, "epoch": 0.8245310799564689, "percentage": 16.49, "elapsed_time": "0:34:10", "remaining_time": "2:53:04", "throughput": 19768.56, "total_tokens": 40537024} +{"current_steps": 12885, "total_steps": 78105, "loss": 0.5369, "lr": 4.9360203565438845e-06, "epoch": 0.8248511618974458, "percentage": 16.5, "elapsed_time": "0:34:11", "remaining_time": "2:53:02", "throughput": 19769.69, "total_tokens": 40552640} +{"current_steps": 12890, "total_steps": 78105, "loss": 0.4837, "lr": 4.9358947184650195e-06, "epoch": 0.8251712438384227, "percentage": 16.5, "elapsed_time": "0:34:11", "remaining_time": "2:53:01", "throughput": 19770.75, "total_tokens": 40568000} +{"current_steps": 12895, "total_steps": 78105, "loss": 0.4927, "lr": 4.935768958750261e-06, "epoch": 0.8254913257793995, "percentage": 16.51, "elapsed_time": "0:34:12", "remaining_time": "2:53:00", "throughput": 19772.11, "total_tokens": 40584960} +{"current_steps": 12900, "total_steps": 78105, "loss": 0.5667, "lr": 4.935643077405888e-06, "epoch": 0.8258114077203764, "percentage": 16.52, "elapsed_time": "0:34:13", "remaining_time": "2:52:58", "throughput": 19773.2, "total_tokens": 40600704} +{"current_steps": 12905, "total_steps": 78105, "loss": 0.4383, "lr": 4.9355170744381866e-06, "epoch": 0.8261314896613533, "percentage": 16.52, "elapsed_time": "0:34:13", "remaining_time": "2:52:57", "throughput": 19774.25, "total_tokens": 40615872} +{"current_steps": 12910, "total_steps": 78105, "loss": 0.5866, "lr": 4.9353909498534495e-06, "epoch": 0.8264515716023302, "percentage": 16.53, "elapsed_time": "0:34:14", "remaining_time": "2:52:56", "throughput": 19775.72, "total_tokens": 40632640} +{"current_steps": 12915, "total_steps": 78105, "loss": 0.4792, "lr": 4.935264703657972e-06, "epoch": 0.8267716535433071, "percentage": 16.54, "elapsed_time": "0:34:15", "remaining_time": "2:52:54", "throughput": 19776.79, "total_tokens": 40648064} +{"current_steps": 12920, "total_steps": 78105, "loss": 0.5778, "lr": 4.9351383358580615e-06, "epoch": 0.8270917354842839, "percentage": 16.54, "elapsed_time": "0:34:16", "remaining_time": "2:52:53", "throughput": 19778.16, "total_tokens": 40664704} +{"current_steps": 12925, "total_steps": 78105, "loss": 0.6154, "lr": 4.935011846460026e-06, "epoch": 0.8274118174252608, "percentage": 16.55, "elapsed_time": "0:34:16", "remaining_time": "2:52:52", "throughput": 19779.37, "total_tokens": 40681024} +{"current_steps": 12930, "total_steps": 78105, "loss": 0.5939, "lr": 4.934885235470183e-06, "epoch": 0.8277318993662378, "percentage": 16.55, "elapsed_time": "0:34:17", "remaining_time": "2:52:50", "throughput": 19780.34, "total_tokens": 40695936} +{"current_steps": 12935, "total_steps": 78105, "loss": 0.4226, "lr": 4.934758502894853e-06, "epoch": 0.8280519813072147, "percentage": 16.56, "elapsed_time": "0:34:18", "remaining_time": "2:52:49", "throughput": 19781.81, "total_tokens": 40713472} +{"current_steps": 12940, "total_steps": 78105, "loss": 0.508, "lr": 4.934631648740367e-06, "epoch": 0.8283720632481916, "percentage": 16.57, "elapsed_time": "0:34:18", "remaining_time": "2:52:48", "throughput": 19782.9, "total_tokens": 40729344} +{"current_steps": 12945, "total_steps": 78105, "loss": 0.4391, "lr": 4.934504673013057e-06, "epoch": 0.8286921451891684, "percentage": 16.57, "elapsed_time": "0:34:19", "remaining_time": "2:52:46", "throughput": 19784.15, "total_tokens": 40745856} +{"current_steps": 12950, "total_steps": 78105, "loss": 0.4285, "lr": 4.934377575719265e-06, "epoch": 0.8290122271301453, "percentage": 16.58, "elapsed_time": "0:34:20", "remaining_time": "2:52:45", "throughput": 19785.36, "total_tokens": 40761920} +{"current_steps": 12955, "total_steps": 78105, "loss": 0.3555, "lr": 4.934250356865337e-06, "epoch": 0.8293323090711222, "percentage": 16.59, "elapsed_time": "0:34:20", "remaining_time": "2:52:43", "throughput": 19786.38, "total_tokens": 40776960} +{"current_steps": 12960, "total_steps": 78105, "loss": 0.5046, "lr": 4.934123016457625e-06, "epoch": 0.8296523910120991, "percentage": 16.59, "elapsed_time": "0:34:21", "remaining_time": "2:52:42", "throughput": 19787.39, "total_tokens": 40792192} +{"current_steps": 12965, "total_steps": 78105, "loss": 0.5857, "lr": 4.933995554502489e-06, "epoch": 0.829972472953076, "percentage": 16.6, "elapsed_time": "0:34:22", "remaining_time": "2:52:41", "throughput": 19788.56, "total_tokens": 40808256} +{"current_steps": 12970, "total_steps": 78105, "loss": 0.5344, "lr": 4.933867971006294e-06, "epoch": 0.8302925548940528, "percentage": 16.61, "elapsed_time": "0:34:22", "remaining_time": "2:52:39", "throughput": 19789.57, "total_tokens": 40823424} +{"current_steps": 12975, "total_steps": 78105, "loss": 0.4892, "lr": 4.933740265975409e-06, "epoch": 0.8306126368350297, "percentage": 16.61, "elapsed_time": "0:34:23", "remaining_time": "2:52:38", "throughput": 19791.05, "total_tokens": 40840960} +{"current_steps": 12980, "total_steps": 78105, "loss": 0.5096, "lr": 4.933612439416212e-06, "epoch": 0.8309327187760066, "percentage": 16.62, "elapsed_time": "0:34:24", "remaining_time": "2:52:37", "throughput": 19792.32, "total_tokens": 40857536} +{"current_steps": 12985, "total_steps": 78105, "loss": 0.4609, "lr": 4.933484491335086e-06, "epoch": 0.8312528007169836, "percentage": 16.63, "elapsed_time": "0:34:25", "remaining_time": "2:52:36", "throughput": 19793.68, "total_tokens": 40874240} +{"current_steps": 12990, "total_steps": 78105, "loss": 0.5848, "lr": 4.933356421738421e-06, "epoch": 0.8315728826579605, "percentage": 16.63, "elapsed_time": "0:34:25", "remaining_time": "2:52:34", "throughput": 19794.54, "total_tokens": 40888960} +{"current_steps": 12995, "total_steps": 78105, "loss": 0.6522, "lr": 4.9332282306326105e-06, "epoch": 0.8318929645989374, "percentage": 16.64, "elapsed_time": "0:34:26", "remaining_time": "2:52:33", "throughput": 19795.59, "total_tokens": 40904512} +{"current_steps": 13000, "total_steps": 78105, "loss": 0.4739, "lr": 4.933099918024057e-06, "epoch": 0.8322130465399142, "percentage": 16.64, "elapsed_time": "0:34:27", "remaining_time": "2:52:32", "throughput": 19797.03, "total_tokens": 40921856} +{"current_steps": 13005, "total_steps": 78105, "loss": 0.472, "lr": 4.932971483919166e-06, "epoch": 0.8325331284808911, "percentage": 16.65, "elapsed_time": "0:34:27", "remaining_time": "2:52:30", "throughput": 19798.4, "total_tokens": 40938752} +{"current_steps": 13010, "total_steps": 78105, "loss": 0.4399, "lr": 4.932842928324353e-06, "epoch": 0.832853210421868, "percentage": 16.66, "elapsed_time": "0:34:28", "remaining_time": "2:52:29", "throughput": 19799.4, "total_tokens": 40954048} +{"current_steps": 13015, "total_steps": 78105, "loss": 0.511, "lr": 4.9327142512460355e-06, "epoch": 0.8331732923628449, "percentage": 16.66, "elapsed_time": "0:34:29", "remaining_time": "2:52:28", "throughput": 19801.09, "total_tokens": 40972672} +{"current_steps": 13020, "total_steps": 78105, "loss": 0.3619, "lr": 4.932585452690641e-06, "epoch": 0.8334933743038218, "percentage": 16.67, "elapsed_time": "0:34:29", "remaining_time": "2:52:26", "throughput": 19802.08, "total_tokens": 40987648} +{"current_steps": 13025, "total_steps": 78105, "loss": 0.4629, "lr": 4.932456532664599e-06, "epoch": 0.8338134562447986, "percentage": 16.68, "elapsed_time": "0:34:30", "remaining_time": "2:52:25", "throughput": 19803.26, "total_tokens": 41003328} +{"current_steps": 13030, "total_steps": 78105, "loss": 0.4555, "lr": 4.932327491174348e-06, "epoch": 0.8341335381857755, "percentage": 16.68, "elapsed_time": "0:34:31", "remaining_time": "2:52:24", "throughput": 19804.3, "total_tokens": 41018624} +{"current_steps": 13035, "total_steps": 78105, "loss": 0.4254, "lr": 4.932198328226332e-06, "epoch": 0.8344536201267524, "percentage": 16.69, "elapsed_time": "0:34:31", "remaining_time": "2:52:22", "throughput": 19805.45, "total_tokens": 41034624} +{"current_steps": 13040, "total_steps": 78105, "loss": 0.554, "lr": 4.932069043827e-06, "epoch": 0.8347737020677294, "percentage": 16.7, "elapsed_time": "0:34:32", "remaining_time": "2:52:21", "throughput": 19806.46, "total_tokens": 41049664} +{"current_steps": 13045, "total_steps": 78105, "loss": 0.5138, "lr": 4.931939637982809e-06, "epoch": 0.8350937840087063, "percentage": 16.7, "elapsed_time": "0:34:33", "remaining_time": "2:52:19", "throughput": 19807.78, "total_tokens": 41066368} +{"current_steps": 13050, "total_steps": 78105, "loss": 0.4383, "lr": 4.931810110700219e-06, "epoch": 0.8354138659496831, "percentage": 16.71, "elapsed_time": "0:34:33", "remaining_time": "2:52:18", "throughput": 19808.87, "total_tokens": 41082048} +{"current_steps": 13055, "total_steps": 78105, "loss": 0.5354, "lr": 4.931680461985699e-06, "epoch": 0.83573394789066, "percentage": 16.71, "elapsed_time": "0:34:34", "remaining_time": "2:52:17", "throughput": 19810.02, "total_tokens": 41098048} +{"current_steps": 13060, "total_steps": 78105, "loss": 0.4701, "lr": 4.931550691845724e-06, "epoch": 0.8360540298316369, "percentage": 16.72, "elapsed_time": "0:34:35", "remaining_time": "2:52:15", "throughput": 19810.92, "total_tokens": 41112768} +{"current_steps": 13065, "total_steps": 78105, "loss": 0.5058, "lr": 4.9314208002867715e-06, "epoch": 0.8363741117726138, "percentage": 16.73, "elapsed_time": "0:34:35", "remaining_time": "2:52:14", "throughput": 19811.9, "total_tokens": 41127488} +{"current_steps": 13070, "total_steps": 78105, "loss": 0.3496, "lr": 4.9312907873153295e-06, "epoch": 0.8366941937135907, "percentage": 16.73, "elapsed_time": "0:34:36", "remaining_time": "2:52:12", "throughput": 19812.79, "total_tokens": 41142272} +{"current_steps": 13075, "total_steps": 78105, "loss": 0.3812, "lr": 4.931160652937889e-06, "epoch": 0.8370142756545675, "percentage": 16.74, "elapsed_time": "0:34:37", "remaining_time": "2:52:11", "throughput": 19813.86, "total_tokens": 41157952} +{"current_steps": 13080, "total_steps": 78105, "loss": 0.6256, "lr": 4.93103039716095e-06, "epoch": 0.8373343575955444, "percentage": 16.75, "elapsed_time": "0:34:37", "remaining_time": "2:52:09", "throughput": 19814.78, "total_tokens": 41172992} +{"current_steps": 13085, "total_steps": 78105, "loss": 0.5281, "lr": 4.930900019991016e-06, "epoch": 0.8376544395365213, "percentage": 16.75, "elapsed_time": "0:34:38", "remaining_time": "2:52:08", "throughput": 19815.63, "total_tokens": 41187776} +{"current_steps": 13090, "total_steps": 78105, "loss": 0.4531, "lr": 4.930769521434597e-06, "epoch": 0.8379745214774983, "percentage": 16.76, "elapsed_time": "0:34:39", "remaining_time": "2:52:07", "throughput": 19816.98, "total_tokens": 41204416} +{"current_steps": 13095, "total_steps": 78105, "loss": 0.5544, "lr": 4.930638901498208e-06, "epoch": 0.8382946034184752, "percentage": 16.77, "elapsed_time": "0:34:39", "remaining_time": "2:52:05", "throughput": 19818.16, "total_tokens": 41220032} +{"current_steps": 13100, "total_steps": 78105, "loss": 0.5094, "lr": 4.930508160188374e-06, "epoch": 0.838614685359452, "percentage": 16.77, "elapsed_time": "0:34:40", "remaining_time": "2:52:04", "throughput": 19819.27, "total_tokens": 41235776} +{"current_steps": 13105, "total_steps": 78105, "loss": 0.4823, "lr": 4.930377297511623e-06, "epoch": 0.8389347673004289, "percentage": 16.78, "elapsed_time": "0:34:41", "remaining_time": "2:52:03", "throughput": 19820.53, "total_tokens": 41252160} +{"current_steps": 13110, "total_steps": 78105, "loss": 0.4357, "lr": 4.930246313474488e-06, "epoch": 0.8392548492414058, "percentage": 16.79, "elapsed_time": "0:34:41", "remaining_time": "2:52:01", "throughput": 19821.9, "total_tokens": 41269056} +{"current_steps": 13115, "total_steps": 78105, "loss": 0.4334, "lr": 4.930115208083512e-06, "epoch": 0.8395749311823827, "percentage": 16.79, "elapsed_time": "0:34:42", "remaining_time": "2:52:00", "throughput": 19823.14, "total_tokens": 41285312} +{"current_steps": 13120, "total_steps": 78105, "loss": 0.4662, "lr": 4.92998398134524e-06, "epoch": 0.8398950131233596, "percentage": 16.8, "elapsed_time": "0:34:43", "remaining_time": "2:51:59", "throughput": 19824.25, "total_tokens": 41300992} +{"current_steps": 13125, "total_steps": 78105, "loss": 0.3655, "lr": 4.9298526332662255e-06, "epoch": 0.8402150950643364, "percentage": 16.8, "elapsed_time": "0:34:44", "remaining_time": "2:51:57", "throughput": 19825.61, "total_tokens": 41317952} +{"current_steps": 13130, "total_steps": 78105, "loss": 0.4765, "lr": 4.929721163853028e-06, "epoch": 0.8405351770053133, "percentage": 16.81, "elapsed_time": "0:34:44", "remaining_time": "2:51:56", "throughput": 19826.64, "total_tokens": 41333440} +{"current_steps": 13135, "total_steps": 78105, "loss": 0.5691, "lr": 4.929589573112209e-06, "epoch": 0.8408552589462902, "percentage": 16.82, "elapsed_time": "0:34:45", "remaining_time": "2:51:55", "throughput": 19827.78, "total_tokens": 41349312} +{"current_steps": 13140, "total_steps": 78105, "loss": 0.4671, "lr": 4.929457861050344e-06, "epoch": 0.8411753408872671, "percentage": 16.82, "elapsed_time": "0:34:46", "remaining_time": "2:51:53", "throughput": 19828.68, "total_tokens": 41364288} +{"current_steps": 13145, "total_steps": 78105, "loss": 0.4784, "lr": 4.929326027674007e-06, "epoch": 0.8414954228282441, "percentage": 16.83, "elapsed_time": "0:34:46", "remaining_time": "2:51:52", "throughput": 19829.7, "total_tokens": 41379904} +{"current_steps": 13150, "total_steps": 78105, "loss": 0.4815, "lr": 4.929194072989783e-06, "epoch": 0.841815504769221, "percentage": 16.84, "elapsed_time": "0:34:47", "remaining_time": "2:51:50", "throughput": 19830.6, "total_tokens": 41394432} +{"current_steps": 13155, "total_steps": 78105, "loss": 0.405, "lr": 4.92906199700426e-06, "epoch": 0.8421355867101978, "percentage": 16.84, "elapsed_time": "0:34:48", "remaining_time": "2:51:49", "throughput": 19831.6, "total_tokens": 41409984} +{"current_steps": 13160, "total_steps": 78105, "loss": 0.5116, "lr": 4.9289297997240325e-06, "epoch": 0.8424556686511747, "percentage": 16.85, "elapsed_time": "0:34:48", "remaining_time": "2:51:48", "throughput": 19832.67, "total_tokens": 41425984} +{"current_steps": 13165, "total_steps": 78105, "loss": 0.5143, "lr": 4.928797481155704e-06, "epoch": 0.8427757505921516, "percentage": 16.86, "elapsed_time": "0:34:49", "remaining_time": "2:51:46", "throughput": 19833.71, "total_tokens": 41441920} +{"current_steps": 13170, "total_steps": 78105, "loss": 0.43, "lr": 4.928665041305879e-06, "epoch": 0.8430958325331285, "percentage": 16.86, "elapsed_time": "0:34:50", "remaining_time": "2:51:45", "throughput": 19834.82, "total_tokens": 41457664} +{"current_steps": 13175, "total_steps": 78105, "loss": 0.4053, "lr": 4.928532480181173e-06, "epoch": 0.8434159144741054, "percentage": 16.87, "elapsed_time": "0:34:50", "remaining_time": "2:51:44", "throughput": 19835.79, "total_tokens": 41472832} +{"current_steps": 13180, "total_steps": 78105, "loss": 0.4232, "lr": 4.928399797788205e-06, "epoch": 0.8437359964150822, "percentage": 16.87, "elapsed_time": "0:34:51", "remaining_time": "2:51:42", "throughput": 19837.31, "total_tokens": 41490368} +{"current_steps": 13185, "total_steps": 78105, "loss": 0.5054, "lr": 4.9282669941336e-06, "epoch": 0.8440560783560591, "percentage": 16.88, "elapsed_time": "0:34:52", "remaining_time": "2:51:41", "throughput": 19838.52, "total_tokens": 41506624} +{"current_steps": 13190, "total_steps": 78105, "loss": 0.3315, "lr": 4.9281340692239895e-06, "epoch": 0.844376160297036, "percentage": 16.89, "elapsed_time": "0:34:52", "remaining_time": "2:51:40", "throughput": 19839.79, "total_tokens": 41523264} +{"current_steps": 13195, "total_steps": 78105, "loss": 0.4889, "lr": 4.928001023066011e-06, "epoch": 0.844696242238013, "percentage": 16.89, "elapsed_time": "0:34:53", "remaining_time": "2:51:39", "throughput": 19840.81, "total_tokens": 41538944} +{"current_steps": 13200, "total_steps": 78105, "loss": 0.4197, "lr": 4.92786785566631e-06, "epoch": 0.8450163241789899, "percentage": 16.9, "elapsed_time": "0:34:54", "remaining_time": "2:51:37", "throughput": 19841.98, "total_tokens": 41554880} +{"current_steps": 13205, "total_steps": 78105, "loss": 0.6719, "lr": 4.927734567031533e-06, "epoch": 0.8453364061199667, "percentage": 16.91, "elapsed_time": "0:34:54", "remaining_time": "2:51:36", "throughput": 19842.83, "total_tokens": 41569280} +{"current_steps": 13210, "total_steps": 78105, "loss": 0.3513, "lr": 4.9276011571683375e-06, "epoch": 0.8456564880609436, "percentage": 16.91, "elapsed_time": "0:34:55", "remaining_time": "2:51:34", "throughput": 19843.82, "total_tokens": 41584576} +{"current_steps": 13215, "total_steps": 78105, "loss": 0.4335, "lr": 4.927467626083385e-06, "epoch": 0.8459765700019205, "percentage": 16.92, "elapsed_time": "0:34:56", "remaining_time": "2:51:33", "throughput": 19844.84, "total_tokens": 41600000} +{"current_steps": 13220, "total_steps": 78105, "loss": 0.477, "lr": 4.927333973783344e-06, "epoch": 0.8462966519428974, "percentage": 16.93, "elapsed_time": "0:34:56", "remaining_time": "2:51:31", "throughput": 19845.78, "total_tokens": 41615040} +{"current_steps": 13225, "total_steps": 78105, "loss": 0.4326, "lr": 4.9272002002748875e-06, "epoch": 0.8466167338838743, "percentage": 16.93, "elapsed_time": "0:34:57", "remaining_time": "2:51:30", "throughput": 19846.8, "total_tokens": 41630208} +{"current_steps": 13230, "total_steps": 78105, "loss": 0.5416, "lr": 4.927066305564696e-06, "epoch": 0.8469368158248511, "percentage": 16.94, "elapsed_time": "0:34:58", "remaining_time": "2:51:28", "throughput": 19847.83, "total_tokens": 41645440} +{"current_steps": 13235, "total_steps": 78105, "loss": 0.4848, "lr": 4.926932289659456e-06, "epoch": 0.847256897765828, "percentage": 16.95, "elapsed_time": "0:34:58", "remaining_time": "2:51:27", "throughput": 19848.91, "total_tokens": 41661184} +{"current_steps": 13240, "total_steps": 78105, "loss": 0.5978, "lr": 4.926798152565858e-06, "epoch": 0.8475769797068049, "percentage": 16.95, "elapsed_time": "0:34:59", "remaining_time": "2:51:26", "throughput": 19849.89, "total_tokens": 41676224} +{"current_steps": 13245, "total_steps": 78105, "loss": 0.4259, "lr": 4.926663894290601e-06, "epoch": 0.8478970616477818, "percentage": 16.96, "elapsed_time": "0:35:00", "remaining_time": "2:51:25", "throughput": 19849.43, "total_tokens": 41690816} +{"current_steps": 13250, "total_steps": 78105, "loss": 0.4539, "lr": 4.926529514840389e-06, "epoch": 0.8482171435887588, "percentage": 16.96, "elapsed_time": "0:35:01", "remaining_time": "2:51:23", "throughput": 19850.55, "total_tokens": 41706688} +{"current_steps": 13255, "total_steps": 78105, "loss": 0.4773, "lr": 4.926395014221933e-06, "epoch": 0.8485372255297357, "percentage": 16.97, "elapsed_time": "0:35:01", "remaining_time": "2:51:22", "throughput": 19851.59, "total_tokens": 41721920} +{"current_steps": 13260, "total_steps": 78105, "loss": 0.482, "lr": 4.926260392441948e-06, "epoch": 0.8488573074707125, "percentage": 16.98, "elapsed_time": "0:35:02", "remaining_time": "2:51:21", "throughput": 19852.79, "total_tokens": 41738112} +{"current_steps": 13265, "total_steps": 78105, "loss": 0.4383, "lr": 4.926125649507157e-06, "epoch": 0.8491773894116894, "percentage": 16.98, "elapsed_time": "0:35:03", "remaining_time": "2:51:19", "throughput": 19853.85, "total_tokens": 41753792} +{"current_steps": 13270, "total_steps": 78105, "loss": 0.3857, "lr": 4.92599078542429e-06, "epoch": 0.8494974713526663, "percentage": 16.99, "elapsed_time": "0:35:03", "remaining_time": "2:51:18", "throughput": 19855.39, "total_tokens": 41771328} +{"current_steps": 13275, "total_steps": 78105, "loss": 0.5227, "lr": 4.925855800200078e-06, "epoch": 0.8498175532936432, "percentage": 17.0, "elapsed_time": "0:35:04", "remaining_time": "2:51:17", "throughput": 19856.68, "total_tokens": 41787712} +{"current_steps": 13280, "total_steps": 78105, "loss": 0.3605, "lr": 4.925720693841263e-06, "epoch": 0.85013763523462, "percentage": 17.0, "elapsed_time": "0:35:05", "remaining_time": "2:51:16", "throughput": 19857.63, "total_tokens": 41803072} +{"current_steps": 13285, "total_steps": 78105, "loss": 0.4722, "lr": 4.925585466354592e-06, "epoch": 0.8504577171755969, "percentage": 17.01, "elapsed_time": "0:35:05", "remaining_time": "2:51:14", "throughput": 19858.74, "total_tokens": 41818688} +{"current_steps": 13290, "total_steps": 78105, "loss": 0.4006, "lr": 4.925450117746817e-06, "epoch": 0.8507777991165738, "percentage": 17.02, "elapsed_time": "0:35:06", "remaining_time": "2:51:13", "throughput": 19859.73, "total_tokens": 41833792} +{"current_steps": 13295, "total_steps": 78105, "loss": 0.5135, "lr": 4.925314648024697e-06, "epoch": 0.8510978810575507, "percentage": 17.02, "elapsed_time": "0:35:07", "remaining_time": "2:51:11", "throughput": 19861.11, "total_tokens": 41850880} +{"current_steps": 13300, "total_steps": 78105, "loss": 0.3769, "lr": 4.925179057194997e-06, "epoch": 0.8514179629985277, "percentage": 17.03, "elapsed_time": "0:35:07", "remaining_time": "2:51:10", "throughput": 19862.13, "total_tokens": 41866560} +{"current_steps": 13305, "total_steps": 78105, "loss": 0.5361, "lr": 4.925043345264486e-06, "epoch": 0.8517380449395046, "percentage": 17.03, "elapsed_time": "0:35:08", "remaining_time": "2:51:09", "throughput": 19863.2, "total_tokens": 41882240} +{"current_steps": 13310, "total_steps": 78105, "loss": 0.6143, "lr": 4.9249075122399435e-06, "epoch": 0.8520581268804814, "percentage": 17.04, "elapsed_time": "0:35:09", "remaining_time": "2:51:08", "throughput": 19864.49, "total_tokens": 41898880} +{"current_steps": 13315, "total_steps": 78105, "loss": 0.3941, "lr": 4.9247715581281495e-06, "epoch": 0.8523782088214583, "percentage": 17.05, "elapsed_time": "0:35:09", "remaining_time": "2:51:06", "throughput": 19865.84, "total_tokens": 41915968} +{"current_steps": 13320, "total_steps": 78105, "loss": 0.4157, "lr": 4.924635482935895e-06, "epoch": 0.8526982907624352, "percentage": 17.05, "elapsed_time": "0:35:10", "remaining_time": "2:51:05", "throughput": 19866.81, "total_tokens": 41930816} +{"current_steps": 13325, "total_steps": 78105, "loss": 0.6425, "lr": 4.924499286669974e-06, "epoch": 0.8530183727034121, "percentage": 17.06, "elapsed_time": "0:35:11", "remaining_time": "2:51:04", "throughput": 19868.02, "total_tokens": 41947200} +{"current_steps": 13330, "total_steps": 78105, "loss": 0.3953, "lr": 4.9243629693371865e-06, "epoch": 0.853338454644389, "percentage": 17.07, "elapsed_time": "0:35:11", "remaining_time": "2:51:02", "throughput": 19868.96, "total_tokens": 41962240} +{"current_steps": 13335, "total_steps": 78105, "loss": 0.6252, "lr": 4.924226530944341e-06, "epoch": 0.8536585365853658, "percentage": 17.07, "elapsed_time": "0:35:12", "remaining_time": "2:51:01", "throughput": 19870.16, "total_tokens": 41978752} +{"current_steps": 13340, "total_steps": 78105, "loss": 0.4485, "lr": 4.924089971498249e-06, "epoch": 0.8539786185263427, "percentage": 17.08, "elapsed_time": "0:35:13", "remaining_time": "2:51:00", "throughput": 19871.21, "total_tokens": 41994112} +{"current_steps": 13345, "total_steps": 78105, "loss": 0.4313, "lr": 4.923953291005731e-06, "epoch": 0.8542987004673196, "percentage": 17.09, "elapsed_time": "0:35:14", "remaining_time": "2:50:58", "throughput": 19872.36, "total_tokens": 42010432} +{"current_steps": 13350, "total_steps": 78105, "loss": 0.4504, "lr": 4.923816489473612e-06, "epoch": 0.8546187824082965, "percentage": 17.09, "elapsed_time": "0:35:14", "remaining_time": "2:50:57", "throughput": 19873.37, "total_tokens": 42025984} +{"current_steps": 13355, "total_steps": 78105, "loss": 0.4746, "lr": 4.923679566908722e-06, "epoch": 0.8549388643492735, "percentage": 17.1, "elapsed_time": "0:35:15", "remaining_time": "2:50:55", "throughput": 19874.3, "total_tokens": 42040832} +{"current_steps": 13360, "total_steps": 78105, "loss": 0.5126, "lr": 4.923542523317899e-06, "epoch": 0.8552589462902503, "percentage": 17.11, "elapsed_time": "0:35:16", "remaining_time": "2:50:54", "throughput": 19875.57, "total_tokens": 42057536} +{"current_steps": 13365, "total_steps": 78105, "loss": 0.4442, "lr": 4.923405358707986e-06, "epoch": 0.8555790282312272, "percentage": 17.11, "elapsed_time": "0:35:16", "remaining_time": "2:50:53", "throughput": 19876.65, "total_tokens": 42073152} +{"current_steps": 13370, "total_steps": 78105, "loss": 0.4596, "lr": 4.923268073085833e-06, "epoch": 0.8558991101722041, "percentage": 17.12, "elapsed_time": "0:35:17", "remaining_time": "2:50:51", "throughput": 19877.67, "total_tokens": 42088512} +{"current_steps": 13375, "total_steps": 78105, "loss": 0.507, "lr": 4.923130666458294e-06, "epoch": 0.856219192113181, "percentage": 17.12, "elapsed_time": "0:35:18", "remaining_time": "2:50:50", "throughput": 19878.59, "total_tokens": 42103552} +{"current_steps": 13380, "total_steps": 78105, "loss": 0.4841, "lr": 4.922993138832232e-06, "epoch": 0.8565392740541579, "percentage": 17.13, "elapsed_time": "0:35:18", "remaining_time": "2:50:49", "throughput": 19879.81, "total_tokens": 42119872} +{"current_steps": 13385, "total_steps": 78105, "loss": 0.5768, "lr": 4.922855490214514e-06, "epoch": 0.8568593559951347, "percentage": 17.14, "elapsed_time": "0:35:19", "remaining_time": "2:50:48", "throughput": 19881.15, "total_tokens": 42136832} +{"current_steps": 13390, "total_steps": 78105, "loss": 0.4577, "lr": 4.922717720612012e-06, "epoch": 0.8571794379361116, "percentage": 17.14, "elapsed_time": "0:35:20", "remaining_time": "2:50:46", "throughput": 19882.26, "total_tokens": 42152896} +{"current_steps": 13395, "total_steps": 78105, "loss": 0.6398, "lr": 4.922579830031606e-06, "epoch": 0.8574995198770885, "percentage": 17.15, "elapsed_time": "0:35:20", "remaining_time": "2:50:45", "throughput": 19883.17, "total_tokens": 42168064} +{"current_steps": 13400, "total_steps": 78105, "loss": 0.4283, "lr": 4.922441818480183e-06, "epoch": 0.8578196018180654, "percentage": 17.16, "elapsed_time": "0:35:21", "remaining_time": "2:50:43", "throughput": 19884.05, "total_tokens": 42182784} +{"current_steps": 13405, "total_steps": 78105, "loss": 0.4372, "lr": 4.922303685964634e-06, "epoch": 0.8581396837590423, "percentage": 17.16, "elapsed_time": "0:35:22", "remaining_time": "2:50:42", "throughput": 19885.3, "total_tokens": 42199744} +{"current_steps": 13410, "total_steps": 78105, "loss": 0.3618, "lr": 4.922165432491855e-06, "epoch": 0.8584597657000193, "percentage": 17.17, "elapsed_time": "0:35:22", "remaining_time": "2:50:41", "throughput": 19886.3, "total_tokens": 42215040} +{"current_steps": 13415, "total_steps": 78105, "loss": 0.4587, "lr": 4.922027058068752e-06, "epoch": 0.8587798476409961, "percentage": 17.18, "elapsed_time": "0:35:23", "remaining_time": "2:50:39", "throughput": 19887.2, "total_tokens": 42230144} +{"current_steps": 13420, "total_steps": 78105, "loss": 0.5418, "lr": 4.9218885627022336e-06, "epoch": 0.859099929581973, "percentage": 17.18, "elapsed_time": "0:35:24", "remaining_time": "2:50:38", "throughput": 19888.32, "total_tokens": 42246528} +{"current_steps": 13425, "total_steps": 78105, "loss": 0.5169, "lr": 4.921749946399215e-06, "epoch": 0.8594200115229499, "percentage": 17.19, "elapsed_time": "0:35:24", "remaining_time": "2:50:37", "throughput": 19889.51, "total_tokens": 42263168} +{"current_steps": 13430, "total_steps": 78105, "loss": 0.5094, "lr": 4.921611209166618e-06, "epoch": 0.8597400934639268, "percentage": 17.19, "elapsed_time": "0:35:25", "remaining_time": "2:50:36", "throughput": 19890.56, "total_tokens": 42278912} +{"current_steps": 13435, "total_steps": 78105, "loss": 0.495, "lr": 4.921472351011372e-06, "epoch": 0.8600601754049036, "percentage": 17.2, "elapsed_time": "0:35:26", "remaining_time": "2:50:34", "throughput": 19891.69, "total_tokens": 42294656} +{"current_steps": 13440, "total_steps": 78105, "loss": 0.7637, "lr": 4.92133337194041e-06, "epoch": 0.8603802573458805, "percentage": 17.21, "elapsed_time": "0:35:26", "remaining_time": "2:50:33", "throughput": 19892.93, "total_tokens": 42311552} +{"current_steps": 13445, "total_steps": 78105, "loss": 0.5743, "lr": 4.9211942719606705e-06, "epoch": 0.8607003392868574, "percentage": 17.21, "elapsed_time": "0:35:27", "remaining_time": "2:50:32", "throughput": 19894.08, "total_tokens": 42327552} +{"current_steps": 13450, "total_steps": 78105, "loss": 0.4584, "lr": 4.921055051079101e-06, "epoch": 0.8610204212278343, "percentage": 17.22, "elapsed_time": "0:35:28", "remaining_time": "2:50:31", "throughput": 19895.14, "total_tokens": 42343360} +{"current_steps": 13455, "total_steps": 78105, "loss": 0.5159, "lr": 4.920915709302653e-06, "epoch": 0.8613405031688112, "percentage": 17.23, "elapsed_time": "0:35:29", "remaining_time": "2:50:29", "throughput": 19896.42, "total_tokens": 42360064} +{"current_steps": 13460, "total_steps": 78105, "loss": 0.4687, "lr": 4.920776246638285e-06, "epoch": 0.8616605851097882, "percentage": 17.23, "elapsed_time": "0:35:29", "remaining_time": "2:50:28", "throughput": 19897.27, "total_tokens": 42374976} +{"current_steps": 13465, "total_steps": 78105, "loss": 0.4043, "lr": 4.920636663092961e-06, "epoch": 0.861980667050765, "percentage": 17.24, "elapsed_time": "0:35:30", "remaining_time": "2:50:27", "throughput": 19898.56, "total_tokens": 42391616} +{"current_steps": 13470, "total_steps": 78105, "loss": 0.4318, "lr": 4.9204969586736495e-06, "epoch": 0.8623007489917419, "percentage": 17.25, "elapsed_time": "0:35:31", "remaining_time": "2:50:25", "throughput": 19899.43, "total_tokens": 42406528} +{"current_steps": 13475, "total_steps": 78105, "loss": 0.5153, "lr": 4.9203571333873284e-06, "epoch": 0.8626208309327188, "percentage": 17.25, "elapsed_time": "0:35:31", "remaining_time": "2:50:24", "throughput": 19900.93, "total_tokens": 42424320} +{"current_steps": 13480, "total_steps": 78105, "loss": 0.611, "lr": 4.92021718724098e-06, "epoch": 0.8629409128736957, "percentage": 17.26, "elapsed_time": "0:35:32", "remaining_time": "2:50:23", "throughput": 19902.19, "total_tokens": 42440960} +{"current_steps": 13485, "total_steps": 78105, "loss": 0.3195, "lr": 4.92007712024159e-06, "epoch": 0.8632609948146726, "percentage": 17.27, "elapsed_time": "0:35:33", "remaining_time": "2:50:22", "throughput": 19903.2, "total_tokens": 42456448} +{"current_steps": 13490, "total_steps": 78105, "loss": 0.4619, "lr": 4.9199369323961554e-06, "epoch": 0.8635810767556494, "percentage": 17.27, "elapsed_time": "0:35:33", "remaining_time": "2:50:20", "throughput": 19904.28, "total_tokens": 42472448} +{"current_steps": 13495, "total_steps": 78105, "loss": 0.6213, "lr": 4.919796623711675e-06, "epoch": 0.8639011586966263, "percentage": 17.28, "elapsed_time": "0:35:34", "remaining_time": "2:50:19", "throughput": 19905.25, "total_tokens": 42487808} +{"current_steps": 13500, "total_steps": 78105, "loss": 0.4896, "lr": 4.919656194195156e-06, "epoch": 0.8642212406376032, "percentage": 17.28, "elapsed_time": "0:35:35", "remaining_time": "2:50:18", "throughput": 19906.35, "total_tokens": 42503744} +{"current_steps": 13505, "total_steps": 78105, "loss": 0.6244, "lr": 4.9195156438536095e-06, "epoch": 0.8645413225785801, "percentage": 17.29, "elapsed_time": "0:35:35", "remaining_time": "2:50:16", "throughput": 19907.73, "total_tokens": 42520768} +{"current_steps": 13510, "total_steps": 78105, "loss": 0.5117, "lr": 4.9193749726940555e-06, "epoch": 0.864861404519557, "percentage": 17.3, "elapsed_time": "0:35:36", "remaining_time": "2:50:15", "throughput": 19908.93, "total_tokens": 42537408} +{"current_steps": 13515, "total_steps": 78105, "loss": 0.4307, "lr": 4.919234180723517e-06, "epoch": 0.8651814864605339, "percentage": 17.3, "elapsed_time": "0:35:37", "remaining_time": "2:50:14", "throughput": 19910.09, "total_tokens": 42553728} +{"current_steps": 13520, "total_steps": 78105, "loss": 0.4898, "lr": 4.9190932679490245e-06, "epoch": 0.8655015684015108, "percentage": 17.31, "elapsed_time": "0:35:37", "remaining_time": "2:50:13", "throughput": 19911.04, "total_tokens": 42568896} +{"current_steps": 13525, "total_steps": 78105, "loss": 0.4423, "lr": 4.918952234377615e-06, "epoch": 0.8658216503424877, "percentage": 17.32, "elapsed_time": "0:35:38", "remaining_time": "2:50:11", "throughput": 19911.95, "total_tokens": 42584000} +{"current_steps": 13530, "total_steps": 78105, "loss": 0.4808, "lr": 4.9188110800163306e-06, "epoch": 0.8661417322834646, "percentage": 17.32, "elapsed_time": "0:35:39", "remaining_time": "2:50:10", "throughput": 19913.06, "total_tokens": 42600192} +{"current_steps": 13535, "total_steps": 78105, "loss": 0.3951, "lr": 4.9186698048722205e-06, "epoch": 0.8664618142244415, "percentage": 17.33, "elapsed_time": "0:35:39", "remaining_time": "2:50:08", "throughput": 19913.81, "total_tokens": 42614656} +{"current_steps": 13540, "total_steps": 78105, "loss": 0.4872, "lr": 4.918528408952338e-06, "epoch": 0.8667818961654183, "percentage": 17.34, "elapsed_time": "0:35:40", "remaining_time": "2:50:07", "throughput": 19914.69, "total_tokens": 42629504} +{"current_steps": 13545, "total_steps": 78105, "loss": 0.5433, "lr": 4.9183868922637446e-06, "epoch": 0.8671019781063952, "percentage": 17.34, "elapsed_time": "0:35:41", "remaining_time": "2:50:05", "throughput": 19915.54, "total_tokens": 42644224} +{"current_steps": 13550, "total_steps": 78105, "loss": 0.359, "lr": 4.918245254813507e-06, "epoch": 0.8674220600473721, "percentage": 17.35, "elapsed_time": "0:35:41", "remaining_time": "2:50:04", "throughput": 19916.49, "total_tokens": 42659584} +{"current_steps": 13555, "total_steps": 78105, "loss": 0.4165, "lr": 4.918103496608698e-06, "epoch": 0.867742141988349, "percentage": 17.35, "elapsed_time": "0:35:42", "remaining_time": "2:50:03", "throughput": 19917.65, "total_tokens": 42675776} +{"current_steps": 13560, "total_steps": 78105, "loss": 0.5199, "lr": 4.917961617656395e-06, "epoch": 0.8680622239293259, "percentage": 17.36, "elapsed_time": "0:35:43", "remaining_time": "2:50:02", "throughput": 19919.01, "total_tokens": 42693184} +{"current_steps": 13565, "total_steps": 78105, "loss": 0.5467, "lr": 4.917819617963684e-06, "epoch": 0.8683823058703029, "percentage": 17.37, "elapsed_time": "0:35:44", "remaining_time": "2:50:01", "throughput": 19920.44, "total_tokens": 42710784} +{"current_steps": 13570, "total_steps": 78105, "loss": 0.5478, "lr": 4.917677497537655e-06, "epoch": 0.8687023878112797, "percentage": 17.37, "elapsed_time": "0:35:44", "remaining_time": "2:49:59", "throughput": 19921.6, "total_tokens": 42727040} +{"current_steps": 13575, "total_steps": 78105, "loss": 0.4198, "lr": 4.917535256385405e-06, "epoch": 0.8690224697522566, "percentage": 17.38, "elapsed_time": "0:35:45", "remaining_time": "2:49:58", "throughput": 19922.55, "total_tokens": 42742208} +{"current_steps": 13580, "total_steps": 78105, "loss": 0.464, "lr": 4.917392894514037e-06, "epoch": 0.8693425516932335, "percentage": 17.39, "elapsed_time": "0:35:46", "remaining_time": "2:49:57", "throughput": 19923.71, "total_tokens": 42758464} +{"current_steps": 13585, "total_steps": 78105, "loss": 0.5945, "lr": 4.917250411930659e-06, "epoch": 0.8696626336342104, "percentage": 17.39, "elapsed_time": "0:35:46", "remaining_time": "2:49:55", "throughput": 19924.73, "total_tokens": 42774016} +{"current_steps": 13590, "total_steps": 78105, "loss": 0.4659, "lr": 4.917107808642387e-06, "epoch": 0.8699827155751872, "percentage": 17.4, "elapsed_time": "0:35:47", "remaining_time": "2:49:54", "throughput": 19925.7, "total_tokens": 42789248} +{"current_steps": 13595, "total_steps": 78105, "loss": 0.4435, "lr": 4.916965084656341e-06, "epoch": 0.8703027975161641, "percentage": 17.41, "elapsed_time": "0:35:48", "remaining_time": "2:49:53", "throughput": 19926.68, "total_tokens": 42804608} +{"current_steps": 13600, "total_steps": 78105, "loss": 0.5812, "lr": 4.916822239979649e-06, "epoch": 0.870622879457141, "percentage": 17.41, "elapsed_time": "0:35:48", "remaining_time": "2:49:51", "throughput": 19927.55, "total_tokens": 42819584} +{"current_steps": 13605, "total_steps": 78105, "loss": 0.4933, "lr": 4.916679274619442e-06, "epoch": 0.8709429613981179, "percentage": 17.42, "elapsed_time": "0:35:49", "remaining_time": "2:49:50", "throughput": 19928.53, "total_tokens": 42835200} +{"current_steps": 13610, "total_steps": 78105, "loss": 0.4593, "lr": 4.91653618858286e-06, "epoch": 0.8712630433390948, "percentage": 17.43, "elapsed_time": "0:35:50", "remaining_time": "2:49:49", "throughput": 19929.86, "total_tokens": 42852672} +{"current_steps": 13615, "total_steps": 78105, "loss": 0.3882, "lr": 4.916392981877048e-06, "epoch": 0.8715831252800716, "percentage": 17.43, "elapsed_time": "0:35:50", "remaining_time": "2:49:47", "throughput": 19930.95, "total_tokens": 42868672} +{"current_steps": 13620, "total_steps": 78105, "loss": 0.5423, "lr": 4.916249654509159e-06, "epoch": 0.8719032072210486, "percentage": 17.44, "elapsed_time": "0:35:51", "remaining_time": "2:49:46", "throughput": 19931.92, "total_tokens": 42884096} +{"current_steps": 13625, "total_steps": 78105, "loss": 0.5687, "lr": 4.916106206486346e-06, "epoch": 0.8722232891620255, "percentage": 17.44, "elapsed_time": "0:35:52", "remaining_time": "2:49:45", "throughput": 19932.81, "total_tokens": 42898752} +{"current_steps": 13630, "total_steps": 78105, "loss": 0.632, "lr": 4.915962637815774e-06, "epoch": 0.8725433711030024, "percentage": 17.45, "elapsed_time": "0:35:52", "remaining_time": "2:49:43", "throughput": 19933.9, "total_tokens": 42914688} +{"current_steps": 13635, "total_steps": 78105, "loss": 0.4514, "lr": 4.915818948504614e-06, "epoch": 0.8728634530439793, "percentage": 17.46, "elapsed_time": "0:35:53", "remaining_time": "2:49:42", "throughput": 19935.06, "total_tokens": 42930688} +{"current_steps": 13640, "total_steps": 78105, "loss": 0.5396, "lr": 4.9156751385600375e-06, "epoch": 0.8731835349849562, "percentage": 17.46, "elapsed_time": "0:35:54", "remaining_time": "2:49:41", "throughput": 19936.24, "total_tokens": 42947008} +{"current_steps": 13645, "total_steps": 78105, "loss": 0.6046, "lr": 4.915531207989228e-06, "epoch": 0.873503616925933, "percentage": 17.47, "elapsed_time": "0:35:54", "remaining_time": "2:49:39", "throughput": 19937.25, "total_tokens": 42962816} +{"current_steps": 13650, "total_steps": 78105, "loss": 0.4804, "lr": 4.915387156799371e-06, "epoch": 0.8738236988669099, "percentage": 17.48, "elapsed_time": "0:35:55", "remaining_time": "2:49:38", "throughput": 19938.06, "total_tokens": 42977664} +{"current_steps": 13655, "total_steps": 78105, "loss": 0.5684, "lr": 4.9152429849976625e-06, "epoch": 0.8741437808078868, "percentage": 17.48, "elapsed_time": "0:35:56", "remaining_time": "2:49:37", "throughput": 19939.02, "total_tokens": 42993472} +{"current_steps": 13660, "total_steps": 78105, "loss": 0.581, "lr": 4.915098692591299e-06, "epoch": 0.8744638627488637, "percentage": 17.49, "elapsed_time": "0:35:56", "remaining_time": "2:49:36", "throughput": 19940.33, "total_tokens": 43010688} +{"current_steps": 13665, "total_steps": 78105, "loss": 0.4333, "lr": 4.914954279587486e-06, "epoch": 0.8747839446898406, "percentage": 17.5, "elapsed_time": "0:35:57", "remaining_time": "2:49:34", "throughput": 19941.55, "total_tokens": 43027392} +{"current_steps": 13670, "total_steps": 78105, "loss": 0.5035, "lr": 4.914809745993437e-06, "epoch": 0.8751040266308175, "percentage": 17.5, "elapsed_time": "0:35:58", "remaining_time": "2:49:33", "throughput": 19942.52, "total_tokens": 43043008} +{"current_steps": 13675, "total_steps": 78105, "loss": 0.5147, "lr": 4.9146650918163655e-06, "epoch": 0.8754241085717944, "percentage": 17.51, "elapsed_time": "0:35:59", "remaining_time": "2:49:32", "throughput": 19943.6, "total_tokens": 43059072} +{"current_steps": 13680, "total_steps": 78105, "loss": 0.4494, "lr": 4.914520317063498e-06, "epoch": 0.8757441905127713, "percentage": 17.51, "elapsed_time": "0:35:59", "remaining_time": "2:49:31", "throughput": 19944.6, "total_tokens": 43074624} +{"current_steps": 13685, "total_steps": 78105, "loss": 0.5185, "lr": 4.914375421742062e-06, "epoch": 0.8760642724537482, "percentage": 17.52, "elapsed_time": "0:36:00", "remaining_time": "2:49:29", "throughput": 19945.47, "total_tokens": 43089536} +{"current_steps": 13690, "total_steps": 78105, "loss": 0.5262, "lr": 4.914230405859294e-06, "epoch": 0.8763843543947251, "percentage": 17.53, "elapsed_time": "0:36:01", "remaining_time": "2:49:28", "throughput": 19946.33, "total_tokens": 43104512} +{"current_steps": 13695, "total_steps": 78105, "loss": 0.4122, "lr": 4.914085269422435e-06, "epoch": 0.8767044363357019, "percentage": 17.53, "elapsed_time": "0:36:01", "remaining_time": "2:49:26", "throughput": 19947.42, "total_tokens": 43120640} +{"current_steps": 13700, "total_steps": 78105, "loss": 0.5458, "lr": 4.913940012438732e-06, "epoch": 0.8770245182766788, "percentage": 17.54, "elapsed_time": "0:36:02", "remaining_time": "2:49:25", "throughput": 19948.41, "total_tokens": 43136384} +{"current_steps": 13705, "total_steps": 78105, "loss": 0.4664, "lr": 4.9137946349154386e-06, "epoch": 0.8773446002176557, "percentage": 17.55, "elapsed_time": "0:36:03", "remaining_time": "2:49:24", "throughput": 19949.65, "total_tokens": 43153216} +{"current_steps": 13710, "total_steps": 78105, "loss": 0.4954, "lr": 4.913649136859814e-06, "epoch": 0.8776646821586326, "percentage": 17.55, "elapsed_time": "0:36:03", "remaining_time": "2:49:23", "throughput": 19950.44, "total_tokens": 43167936} +{"current_steps": 13715, "total_steps": 78105, "loss": 0.4044, "lr": 4.9135035182791235e-06, "epoch": 0.8779847640996095, "percentage": 17.56, "elapsed_time": "0:36:04", "remaining_time": "2:49:21", "throughput": 19951.41, "total_tokens": 43183872} +{"current_steps": 13720, "total_steps": 78105, "loss": 0.6712, "lr": 4.913357779180639e-06, "epoch": 0.8783048460405863, "percentage": 17.57, "elapsed_time": "0:36:05", "remaining_time": "2:49:20", "throughput": 19952.45, "total_tokens": 43200256} +{"current_steps": 13725, "total_steps": 78105, "loss": 0.5041, "lr": 4.9132119195716375e-06, "epoch": 0.8786249279815633, "percentage": 17.57, "elapsed_time": "0:36:05", "remaining_time": "2:49:19", "throughput": 19953.73, "total_tokens": 43217600} +{"current_steps": 13730, "total_steps": 78105, "loss": 0.4631, "lr": 4.9130659394594025e-06, "epoch": 0.8789450099225402, "percentage": 17.58, "elapsed_time": "0:36:06", "remaining_time": "2:49:18", "throughput": 19954.75, "total_tokens": 43233344} +{"current_steps": 13735, "total_steps": 78105, "loss": 0.7038, "lr": 4.912919838851224e-06, "epoch": 0.8792650918635171, "percentage": 17.59, "elapsed_time": "0:36:07", "remaining_time": "2:49:17", "throughput": 19955.72, "total_tokens": 43249280} +{"current_steps": 13740, "total_steps": 78105, "loss": 0.508, "lr": 4.912773617754398e-06, "epoch": 0.879585173804494, "percentage": 17.59, "elapsed_time": "0:36:07", "remaining_time": "2:49:15", "throughput": 19956.73, "total_tokens": 43265024} +{"current_steps": 13745, "total_steps": 78105, "loss": 0.5256, "lr": 4.912627276176224e-06, "epoch": 0.8799052557454708, "percentage": 17.6, "elapsed_time": "0:36:08", "remaining_time": "2:49:14", "throughput": 19957.69, "total_tokens": 43280576} +{"current_steps": 13750, "total_steps": 78105, "loss": 0.451, "lr": 4.912480814124011e-06, "epoch": 0.8802253376864477, "percentage": 17.6, "elapsed_time": "0:36:09", "remaining_time": "2:49:13", "throughput": 19958.6, "total_tokens": 43296064} +{"current_steps": 13755, "total_steps": 78105, "loss": 0.5008, "lr": 4.912334231605073e-06, "epoch": 0.8805454196274246, "percentage": 17.61, "elapsed_time": "0:36:09", "remaining_time": "2:49:11", "throughput": 19959.55, "total_tokens": 43311552} +{"current_steps": 13760, "total_steps": 78105, "loss": 0.5462, "lr": 4.912187528626729e-06, "epoch": 0.8808655015684015, "percentage": 17.62, "elapsed_time": "0:36:10", "remaining_time": "2:49:10", "throughput": 19960.43, "total_tokens": 43326272} +{"current_steps": 13765, "total_steps": 78105, "loss": 0.452, "lr": 4.912040705196303e-06, "epoch": 0.8811855835093784, "percentage": 17.62, "elapsed_time": "0:36:11", "remaining_time": "2:49:09", "throughput": 19961.54, "total_tokens": 43342592} +{"current_steps": 13770, "total_steps": 78105, "loss": 0.3922, "lr": 4.911893761321129e-06, "epoch": 0.8815056654503552, "percentage": 17.63, "elapsed_time": "0:36:11", "remaining_time": "2:49:07", "throughput": 19962.47, "total_tokens": 43357888} +{"current_steps": 13775, "total_steps": 78105, "loss": 0.3391, "lr": 4.911746697008543e-06, "epoch": 0.8818257473913321, "percentage": 17.64, "elapsed_time": "0:36:12", "remaining_time": "2:49:06", "throughput": 19963.56, "total_tokens": 43374272} +{"current_steps": 13780, "total_steps": 78105, "loss": 0.3914, "lr": 4.91159951226589e-06, "epoch": 0.8821458293323091, "percentage": 17.64, "elapsed_time": "0:36:13", "remaining_time": "2:49:05", "throughput": 19965.14, "total_tokens": 43393280} +{"current_steps": 13785, "total_steps": 78105, "loss": 0.4902, "lr": 4.9114522071005185e-06, "epoch": 0.882465911273286, "percentage": 17.65, "elapsed_time": "0:36:14", "remaining_time": "2:49:04", "throughput": 19966.33, "total_tokens": 43409600} +{"current_steps": 13790, "total_steps": 78105, "loss": 0.5714, "lr": 4.911304781519785e-06, "epoch": 0.8827859932142629, "percentage": 17.66, "elapsed_time": "0:36:14", "remaining_time": "2:49:02", "throughput": 19967.21, "total_tokens": 43424384} +{"current_steps": 13795, "total_steps": 78105, "loss": 0.4055, "lr": 4.911157235531051e-06, "epoch": 0.8831060751552398, "percentage": 17.66, "elapsed_time": "0:36:15", "remaining_time": "2:49:01", "throughput": 19968.19, "total_tokens": 43440128} +{"current_steps": 13800, "total_steps": 78105, "loss": 0.578, "lr": 4.911009569141683e-06, "epoch": 0.8834261570962166, "percentage": 17.67, "elapsed_time": "0:36:16", "remaining_time": "2:49:00", "throughput": 19969.07, "total_tokens": 43455168} +{"current_steps": 13805, "total_steps": 78105, "loss": 0.4172, "lr": 4.910861782359057e-06, "epoch": 0.8837462390371935, "percentage": 17.67, "elapsed_time": "0:36:16", "remaining_time": "2:48:59", "throughput": 19970.33, "total_tokens": 43472064} +{"current_steps": 13810, "total_steps": 78105, "loss": 0.6041, "lr": 4.910713875190549e-06, "epoch": 0.8840663209781704, "percentage": 17.68, "elapsed_time": "0:36:17", "remaining_time": "2:48:57", "throughput": 19971.36, "total_tokens": 43488000} +{"current_steps": 13815, "total_steps": 78105, "loss": 0.4479, "lr": 4.910565847643549e-06, "epoch": 0.8843864029191473, "percentage": 17.69, "elapsed_time": "0:36:18", "remaining_time": "2:48:56", "throughput": 19972.37, "total_tokens": 43503488} +{"current_steps": 13820, "total_steps": 78105, "loss": 0.5161, "lr": 4.910417699725446e-06, "epoch": 0.8847064848601242, "percentage": 17.69, "elapsed_time": "0:36:18", "remaining_time": "2:48:55", "throughput": 19973.26, "total_tokens": 43518336} +{"current_steps": 13825, "total_steps": 78105, "loss": 0.42, "lr": 4.910269431443639e-06, "epoch": 0.885026566801101, "percentage": 17.7, "elapsed_time": "0:36:19", "remaining_time": "2:48:53", "throughput": 19974.04, "total_tokens": 43532800} +{"current_steps": 13830, "total_steps": 78105, "loss": 0.3951, "lr": 4.9101210428055306e-06, "epoch": 0.885346648742078, "percentage": 17.71, "elapsed_time": "0:36:20", "remaining_time": "2:48:52", "throughput": 19974.93, "total_tokens": 43547648} +{"current_steps": 13835, "total_steps": 78105, "loss": 0.4051, "lr": 4.909972533818531e-06, "epoch": 0.8856667306830549, "percentage": 17.71, "elapsed_time": "0:36:20", "remaining_time": "2:48:50", "throughput": 19975.78, "total_tokens": 43562688} +{"current_steps": 13840, "total_steps": 78105, "loss": 0.4491, "lr": 4.909823904490057e-06, "epoch": 0.8859868126240318, "percentage": 17.72, "elapsed_time": "0:36:21", "remaining_time": "2:48:49", "throughput": 19976.78, "total_tokens": 43578176} +{"current_steps": 13845, "total_steps": 78105, "loss": 0.4316, "lr": 4.909675154827529e-06, "epoch": 0.8863068945650087, "percentage": 17.73, "elapsed_time": "0:36:22", "remaining_time": "2:48:48", "throughput": 19977.83, "total_tokens": 43593920} +{"current_steps": 13850, "total_steps": 78105, "loss": 0.5023, "lr": 4.909526284838375e-06, "epoch": 0.8866269765059855, "percentage": 17.73, "elapsed_time": "0:36:22", "remaining_time": "2:48:46", "throughput": 19979.09, "total_tokens": 43610944} +{"current_steps": 13855, "total_steps": 78105, "loss": 0.5231, "lr": 4.90937729453003e-06, "epoch": 0.8869470584469624, "percentage": 17.74, "elapsed_time": "0:36:23", "remaining_time": "2:48:45", "throughput": 19980.15, "total_tokens": 43627008} +{"current_steps": 13860, "total_steps": 78105, "loss": 0.6215, "lr": 4.909228183909932e-06, "epoch": 0.8872671403879393, "percentage": 17.75, "elapsed_time": "0:36:24", "remaining_time": "2:48:44", "throughput": 19981.16, "total_tokens": 43642752} +{"current_steps": 13865, "total_steps": 78105, "loss": 0.416, "lr": 4.909078952985529e-06, "epoch": 0.8875872223289162, "percentage": 17.75, "elapsed_time": "0:36:24", "remaining_time": "2:48:43", "throughput": 19982.13, "total_tokens": 43658496} +{"current_steps": 13870, "total_steps": 78105, "loss": 0.5573, "lr": 4.90892960176427e-06, "epoch": 0.8879073042698931, "percentage": 17.76, "elapsed_time": "0:36:25", "remaining_time": "2:48:41", "throughput": 19983.2, "total_tokens": 43674304} +{"current_steps": 13875, "total_steps": 78105, "loss": 0.446, "lr": 4.908780130253615e-06, "epoch": 0.8882273862108699, "percentage": 17.76, "elapsed_time": "0:36:26", "remaining_time": "2:48:40", "throughput": 19984.16, "total_tokens": 43689536} +{"current_steps": 13880, "total_steps": 78105, "loss": 0.4856, "lr": 4.908630538461027e-06, "epoch": 0.8885474681518468, "percentage": 17.77, "elapsed_time": "0:36:26", "remaining_time": "2:48:39", "throughput": 19985.34, "total_tokens": 43706496} +{"current_steps": 13885, "total_steps": 78105, "loss": 0.378, "lr": 4.908480826393976e-06, "epoch": 0.8888675500928238, "percentage": 17.78, "elapsed_time": "0:36:27", "remaining_time": "2:48:37", "throughput": 19986.17, "total_tokens": 43721408} +{"current_steps": 13890, "total_steps": 78105, "loss": 0.5503, "lr": 4.908330994059939e-06, "epoch": 0.8891876320338007, "percentage": 17.78, "elapsed_time": "0:36:28", "remaining_time": "2:48:36", "throughput": 19987.26, "total_tokens": 43737536} +{"current_steps": 13895, "total_steps": 78105, "loss": 0.5514, "lr": 4.908181041466396e-06, "epoch": 0.8895077139747776, "percentage": 17.79, "elapsed_time": "0:36:28", "remaining_time": "2:48:35", "throughput": 19988.17, "total_tokens": 43752640} +{"current_steps": 13900, "total_steps": 78105, "loss": 0.5013, "lr": 4.9080309686208344e-06, "epoch": 0.8898277959157544, "percentage": 17.8, "elapsed_time": "0:36:29", "remaining_time": "2:48:33", "throughput": 19989.14, "total_tokens": 43768384} +{"current_steps": 13905, "total_steps": 78105, "loss": 0.3755, "lr": 4.90788077553075e-06, "epoch": 0.8901478778567313, "percentage": 17.8, "elapsed_time": "0:36:30", "remaining_time": "2:48:32", "throughput": 19990.18, "total_tokens": 43784320} +{"current_steps": 13910, "total_steps": 78105, "loss": 0.5875, "lr": 4.907730462203642e-06, "epoch": 0.8904679597977082, "percentage": 17.81, "elapsed_time": "0:36:30", "remaining_time": "2:48:31", "throughput": 19991.05, "total_tokens": 43799232} +{"current_steps": 13915, "total_steps": 78105, "loss": 0.5377, "lr": 4.907580028647016e-06, "epoch": 0.8907880417386851, "percentage": 17.82, "elapsed_time": "0:36:31", "remaining_time": "2:48:29", "throughput": 19992.18, "total_tokens": 43815360} +{"current_steps": 13920, "total_steps": 78105, "loss": 0.4908, "lr": 4.907429474868384e-06, "epoch": 0.891108123679662, "percentage": 17.82, "elapsed_time": "0:36:32", "remaining_time": "2:48:28", "throughput": 19992.99, "total_tokens": 43830336} +{"current_steps": 13925, "total_steps": 78105, "loss": 0.484, "lr": 4.9072788008752635e-06, "epoch": 0.8914282056206388, "percentage": 17.83, "elapsed_time": "0:36:32", "remaining_time": "2:48:27", "throughput": 19994.08, "total_tokens": 43846656} +{"current_steps": 13930, "total_steps": 78105, "loss": 0.4801, "lr": 4.9071280066751794e-06, "epoch": 0.8917482875616157, "percentage": 17.83, "elapsed_time": "0:36:33", "remaining_time": "2:48:26", "throughput": 19995.12, "total_tokens": 43862720} +{"current_steps": 13935, "total_steps": 78105, "loss": 0.4507, "lr": 4.906977092275661e-06, "epoch": 0.8920683695025927, "percentage": 17.84, "elapsed_time": "0:36:34", "remaining_time": "2:48:24", "throughput": 19995.81, "total_tokens": 43876800} +{"current_steps": 13940, "total_steps": 78105, "loss": 0.3272, "lr": 4.9068260576842435e-06, "epoch": 0.8923884514435696, "percentage": 17.85, "elapsed_time": "0:36:34", "remaining_time": "2:48:23", "throughput": 19996.74, "total_tokens": 43892160} +{"current_steps": 13945, "total_steps": 78105, "loss": 0.4564, "lr": 4.90667490290847e-06, "epoch": 0.8927085333845465, "percentage": 17.85, "elapsed_time": "0:36:35", "remaining_time": "2:48:22", "throughput": 19997.83, "total_tokens": 43908416} +{"current_steps": 13950, "total_steps": 78105, "loss": 0.4872, "lr": 4.906523627955887e-06, "epoch": 0.8930286153255234, "percentage": 17.86, "elapsed_time": "0:36:36", "remaining_time": "2:48:20", "throughput": 19998.71, "total_tokens": 43923712} +{"current_steps": 13955, "total_steps": 78105, "loss": 0.5561, "lr": 4.90637223283405e-06, "epoch": 0.8933486972665002, "percentage": 17.87, "elapsed_time": "0:36:37", "remaining_time": "2:48:19", "throughput": 19999.68, "total_tokens": 43939520} +{"current_steps": 13960, "total_steps": 78105, "loss": 0.5394, "lr": 4.9062207175505174e-06, "epoch": 0.8936687792074771, "percentage": 17.87, "elapsed_time": "0:36:37", "remaining_time": "2:48:18", "throughput": 20000.55, "total_tokens": 43954688} +{"current_steps": 13965, "total_steps": 78105, "loss": 0.4483, "lr": 4.906069082112856e-06, "epoch": 0.893988861148454, "percentage": 17.88, "elapsed_time": "0:36:38", "remaining_time": "2:48:16", "throughput": 20001.41, "total_tokens": 43969600} +{"current_steps": 13970, "total_steps": 78105, "loss": 0.4856, "lr": 4.905917326528638e-06, "epoch": 0.8943089430894309, "percentage": 17.89, "elapsed_time": "0:36:39", "remaining_time": "2:48:15", "throughput": 20002.43, "total_tokens": 43985472} +{"current_steps": 13975, "total_steps": 78105, "loss": 0.5622, "lr": 4.905765450805439e-06, "epoch": 0.8946290250304078, "percentage": 17.89, "elapsed_time": "0:36:39", "remaining_time": "2:48:14", "throughput": 20003.37, "total_tokens": 44000768} +{"current_steps": 13980, "total_steps": 78105, "loss": 0.689, "lr": 4.905613454950846e-06, "epoch": 0.8949491069713846, "percentage": 17.9, "elapsed_time": "0:36:40", "remaining_time": "2:48:12", "throughput": 20004.39, "total_tokens": 44017088} +{"current_steps": 13985, "total_steps": 78105, "loss": 0.493, "lr": 4.905461338972447e-06, "epoch": 0.8952691889123615, "percentage": 17.91, "elapsed_time": "0:36:41", "remaining_time": "2:48:11", "throughput": 20005.16, "total_tokens": 44031488} +{"current_steps": 13990, "total_steps": 78105, "loss": 0.4007, "lr": 4.9053091028778375e-06, "epoch": 0.8955892708533385, "percentage": 17.91, "elapsed_time": "0:36:41", "remaining_time": "2:48:10", "throughput": 20006.14, "total_tokens": 44047296} +{"current_steps": 13995, "total_steps": 78105, "loss": 0.4141, "lr": 4.905156746674622e-06, "epoch": 0.8959093527943154, "percentage": 17.92, "elapsed_time": "0:36:42", "remaining_time": "2:48:08", "throughput": 20007.23, "total_tokens": 44063744} +{"current_steps": 14000, "total_steps": 78105, "loss": 0.5588, "lr": 4.905004270370405e-06, "epoch": 0.8962294347352923, "percentage": 17.92, "elapsed_time": "0:36:43", "remaining_time": "2:48:11", "throughput": 20001.2, "total_tokens": 44079168} +{"current_steps": 14005, "total_steps": 78105, "loss": 0.4681, "lr": 4.904851673972803e-06, "epoch": 0.8965495166762691, "percentage": 17.93, "elapsed_time": "0:36:44", "remaining_time": "2:48:09", "throughput": 20001.98, "total_tokens": 44093824} +{"current_steps": 14010, "total_steps": 78105, "loss": 0.6702, "lr": 4.904698957489434e-06, "epoch": 0.896869598617246, "percentage": 17.94, "elapsed_time": "0:36:45", "remaining_time": "2:48:08", "throughput": 20003.24, "total_tokens": 44111296} +{"current_steps": 14015, "total_steps": 78105, "loss": 0.5934, "lr": 4.904546120927925e-06, "epoch": 0.8971896805582229, "percentage": 17.94, "elapsed_time": "0:36:45", "remaining_time": "2:48:07", "throughput": 20004.49, "total_tokens": 44128000} +{"current_steps": 14020, "total_steps": 78105, "loss": 0.438, "lr": 4.904393164295908e-06, "epoch": 0.8975097624991998, "percentage": 17.95, "elapsed_time": "0:36:46", "remaining_time": "2:48:06", "throughput": 20005.42, "total_tokens": 44143488} +{"current_steps": 14025, "total_steps": 78105, "loss": 0.4309, "lr": 4.90424008760102e-06, "epoch": 0.8978298444401767, "percentage": 17.96, "elapsed_time": "0:36:47", "remaining_time": "2:48:04", "throughput": 20006.41, "total_tokens": 44158976} +{"current_steps": 14030, "total_steps": 78105, "loss": 0.6214, "lr": 4.904086890850905e-06, "epoch": 0.8981499263811535, "percentage": 17.96, "elapsed_time": "0:36:47", "remaining_time": "2:48:03", "throughput": 20007.66, "total_tokens": 44175808} +{"current_steps": 14035, "total_steps": 78105, "loss": 0.4426, "lr": 4.903933574053213e-06, "epoch": 0.8984700083221304, "percentage": 17.97, "elapsed_time": "0:36:48", "remaining_time": "2:48:02", "throughput": 20008.45, "total_tokens": 44190336} +{"current_steps": 14040, "total_steps": 78105, "loss": 0.4952, "lr": 4.9037801372156e-06, "epoch": 0.8987900902631074, "percentage": 17.98, "elapsed_time": "0:36:49", "remaining_time": "2:48:02", "throughput": 20006.55, "total_tokens": 44205696} +{"current_steps": 14045, "total_steps": 78105, "loss": 0.5449, "lr": 4.903626580345729e-06, "epoch": 0.8991101722040843, "percentage": 17.98, "elapsed_time": "0:36:50", "remaining_time": "2:48:01", "throughput": 20007.65, "total_tokens": 44222016} +{"current_steps": 14050, "total_steps": 78105, "loss": 0.494, "lr": 4.9034729034512655e-06, "epoch": 0.8994302541450612, "percentage": 17.99, "elapsed_time": "0:36:50", "remaining_time": "2:47:59", "throughput": 20008.79, "total_tokens": 44238592} +{"current_steps": 14055, "total_steps": 78105, "loss": 0.3467, "lr": 4.903319106539884e-06, "epoch": 0.899750336086038, "percentage": 18.0, "elapsed_time": "0:36:51", "remaining_time": "2:47:58", "throughput": 20009.71, "total_tokens": 44254016} +{"current_steps": 14060, "total_steps": 78105, "loss": 0.5265, "lr": 4.9031651896192655e-06, "epoch": 0.9000704180270149, "percentage": 18.0, "elapsed_time": "0:36:52", "remaining_time": "2:47:57", "throughput": 20010.68, "total_tokens": 44269376} +{"current_steps": 14065, "total_steps": 78105, "loss": 0.6117, "lr": 4.903011152697095e-06, "epoch": 0.9003904999679918, "percentage": 18.01, "elapsed_time": "0:36:52", "remaining_time": "2:47:55", "throughput": 20011.55, "total_tokens": 44284672} +{"current_steps": 14070, "total_steps": 78105, "loss": 0.5628, "lr": 4.902856995781064e-06, "epoch": 0.9007105819089687, "percentage": 18.01, "elapsed_time": "0:36:53", "remaining_time": "2:47:54", "throughput": 20012.55, "total_tokens": 44300224} +{"current_steps": 14075, "total_steps": 78105, "loss": 0.6203, "lr": 4.902702718878871e-06, "epoch": 0.9010306638499456, "percentage": 18.02, "elapsed_time": "0:36:54", "remaining_time": "2:47:53", "throughput": 20013.45, "total_tokens": 44315264} +{"current_steps": 14080, "total_steps": 78105, "loss": 0.5193, "lr": 4.902548321998219e-06, "epoch": 0.9013507457909224, "percentage": 18.03, "elapsed_time": "0:36:54", "remaining_time": "2:47:51", "throughput": 20014.34, "total_tokens": 44330176} +{"current_steps": 14085, "total_steps": 78105, "loss": 0.5402, "lr": 4.902393805146819e-06, "epoch": 0.9016708277318993, "percentage": 18.03, "elapsed_time": "0:36:55", "remaining_time": "2:47:50", "throughput": 20015.2, "total_tokens": 44345152} +{"current_steps": 14090, "total_steps": 78105, "loss": 0.485, "lr": 4.902239168332386e-06, "epoch": 0.9019909096728762, "percentage": 18.04, "elapsed_time": "0:36:56", "remaining_time": "2:47:49", "throughput": 20016.19, "total_tokens": 44361152} +{"current_steps": 14095, "total_steps": 78105, "loss": 0.5202, "lr": 4.902084411562641e-06, "epoch": 0.9023109916138532, "percentage": 18.05, "elapsed_time": "0:36:56", "remaining_time": "2:47:47", "throughput": 20017.1, "total_tokens": 44376128} +{"current_steps": 14100, "total_steps": 78105, "loss": 0.4646, "lr": 4.901929534845313e-06, "epoch": 0.9026310735548301, "percentage": 18.05, "elapsed_time": "0:36:57", "remaining_time": "2:47:46", "throughput": 20018.21, "total_tokens": 44392192} +{"current_steps": 14105, "total_steps": 78105, "loss": 0.6618, "lr": 4.9017745381881345e-06, "epoch": 0.902951155495807, "percentage": 18.06, "elapsed_time": "0:36:58", "remaining_time": "2:47:45", "throughput": 20019.2, "total_tokens": 44407680} +{"current_steps": 14110, "total_steps": 78105, "loss": 0.5255, "lr": 4.901619421598847e-06, "epoch": 0.9032712374367838, "percentage": 18.07, "elapsed_time": "0:36:58", "remaining_time": "2:47:43", "throughput": 20020.37, "total_tokens": 44424384} +{"current_steps": 14115, "total_steps": 78105, "loss": 0.485, "lr": 4.901464185085194e-06, "epoch": 0.9035913193777607, "percentage": 18.07, "elapsed_time": "0:36:59", "remaining_time": "2:47:42", "throughput": 20021.27, "total_tokens": 44439744} +{"current_steps": 14120, "total_steps": 78105, "loss": 0.515, "lr": 4.9013088286549295e-06, "epoch": 0.9039114013187376, "percentage": 18.08, "elapsed_time": "0:37:00", "remaining_time": "2:47:41", "throughput": 20022.07, "total_tokens": 44454272} +{"current_steps": 14125, "total_steps": 78105, "loss": 0.4794, "lr": 4.901153352315809e-06, "epoch": 0.9042314832597145, "percentage": 18.08, "elapsed_time": "0:37:00", "remaining_time": "2:47:39", "throughput": 20022.89, "total_tokens": 44468992} +{"current_steps": 14130, "total_steps": 78105, "loss": 0.4356, "lr": 4.900997756075599e-06, "epoch": 0.9045515652006914, "percentage": 18.09, "elapsed_time": "0:37:01", "remaining_time": "2:47:38", "throughput": 20023.91, "total_tokens": 44484864} +{"current_steps": 14135, "total_steps": 78105, "loss": 0.4222, "lr": 4.900842039942065e-06, "epoch": 0.9048716471416682, "percentage": 18.1, "elapsed_time": "0:37:02", "remaining_time": "2:47:37", "throughput": 20024.79, "total_tokens": 44499968} +{"current_steps": 14140, "total_steps": 78105, "loss": 0.5553, "lr": 4.900686203922986e-06, "epoch": 0.9051917290826451, "percentage": 18.1, "elapsed_time": "0:37:02", "remaining_time": "2:47:36", "throughput": 20026.17, "total_tokens": 44517952} +{"current_steps": 14145, "total_steps": 78105, "loss": 0.399, "lr": 4.900530248026143e-06, "epoch": 0.905511811023622, "percentage": 18.11, "elapsed_time": "0:37:03", "remaining_time": "2:47:34", "throughput": 20027.06, "total_tokens": 44532928} +{"current_steps": 14150, "total_steps": 78105, "loss": 0.4205, "lr": 4.900374172259324e-06, "epoch": 0.905831892964599, "percentage": 18.12, "elapsed_time": "0:37:04", "remaining_time": "2:47:33", "throughput": 20027.97, "total_tokens": 44548288} +{"current_steps": 14155, "total_steps": 78105, "loss": 0.4555, "lr": 4.900217976630321e-06, "epoch": 0.9061519749055759, "percentage": 18.12, "elapsed_time": "0:37:04", "remaining_time": "2:47:32", "throughput": 20028.89, "total_tokens": 44563712} +{"current_steps": 14160, "total_steps": 78105, "loss": 0.4886, "lr": 4.9000616611469344e-06, "epoch": 0.9064720568465527, "percentage": 18.13, "elapsed_time": "0:37:05", "remaining_time": "2:47:30", "throughput": 20029.83, "total_tokens": 44579264} +{"current_steps": 14165, "total_steps": 78105, "loss": 0.3468, "lr": 4.89990522581697e-06, "epoch": 0.9067921387875296, "percentage": 18.14, "elapsed_time": "0:37:06", "remaining_time": "2:47:29", "throughput": 20030.69, "total_tokens": 44594176} +{"current_steps": 14170, "total_steps": 78105, "loss": 0.516, "lr": 4.89974867064824e-06, "epoch": 0.9071122207285065, "percentage": 18.14, "elapsed_time": "0:37:06", "remaining_time": "2:47:28", "throughput": 20031.65, "total_tokens": 44609664} +{"current_steps": 14175, "total_steps": 78105, "loss": 0.5396, "lr": 4.899591995648561e-06, "epoch": 0.9074323026694834, "percentage": 18.15, "elapsed_time": "0:37:07", "remaining_time": "2:47:26", "throughput": 20032.93, "total_tokens": 44626944} +{"current_steps": 14180, "total_steps": 78105, "loss": 0.3364, "lr": 4.899435200825756e-06, "epoch": 0.9077523846104603, "percentage": 18.16, "elapsed_time": "0:37:08", "remaining_time": "2:47:25", "throughput": 20034.08, "total_tokens": 44643520} +{"current_steps": 14185, "total_steps": 78105, "loss": 0.5203, "lr": 4.899278286187656e-06, "epoch": 0.9080724665514371, "percentage": 18.16, "elapsed_time": "0:37:09", "remaining_time": "2:47:24", "throughput": 20034.81, "total_tokens": 44657984} +{"current_steps": 14190, "total_steps": 78105, "loss": 0.5656, "lr": 4.899121251742095e-06, "epoch": 0.908392548492414, "percentage": 18.17, "elapsed_time": "0:37:09", "remaining_time": "2:47:22", "throughput": 20035.7, "total_tokens": 44673024} +{"current_steps": 14195, "total_steps": 78105, "loss": 0.5394, "lr": 4.898964097496917e-06, "epoch": 0.9087126304333909, "percentage": 18.17, "elapsed_time": "0:37:10", "remaining_time": "2:47:21", "throughput": 20036.39, "total_tokens": 44687424} +{"current_steps": 14200, "total_steps": 78105, "loss": 0.4064, "lr": 4.898806823459966e-06, "epoch": 0.9090327123743679, "percentage": 18.18, "elapsed_time": "0:37:10", "remaining_time": "2:47:20", "throughput": 20037.35, "total_tokens": 44702976} +{"current_steps": 14205, "total_steps": 78105, "loss": 0.5029, "lr": 4.898649429639097e-06, "epoch": 0.9093527943153448, "percentage": 18.19, "elapsed_time": "0:37:11", "remaining_time": "2:47:18", "throughput": 20038.26, "total_tokens": 44718144} +{"current_steps": 14210, "total_steps": 78105, "loss": 0.4381, "lr": 4.898491916042171e-06, "epoch": 0.9096728762563216, "percentage": 18.19, "elapsed_time": "0:37:12", "remaining_time": "2:47:17", "throughput": 20039.47, "total_tokens": 44734912} +{"current_steps": 14215, "total_steps": 78105, "loss": 0.4233, "lr": 4.898334282677051e-06, "epoch": 0.9099929581972985, "percentage": 18.2, "elapsed_time": "0:37:12", "remaining_time": "2:47:16", "throughput": 20040.34, "total_tokens": 44749888} +{"current_steps": 14220, "total_steps": 78105, "loss": 0.5408, "lr": 4.898176529551609e-06, "epoch": 0.9103130401382754, "percentage": 18.21, "elapsed_time": "0:37:13", "remaining_time": "2:47:14", "throughput": 20041.1, "total_tokens": 44764544} +{"current_steps": 14225, "total_steps": 78105, "loss": 0.3792, "lr": 4.898018656673724e-06, "epoch": 0.9106331220792523, "percentage": 18.21, "elapsed_time": "0:37:14", "remaining_time": "2:47:13", "throughput": 20042.2, "total_tokens": 44780992} +{"current_steps": 14230, "total_steps": 78105, "loss": 0.589, "lr": 4.8978606640512774e-06, "epoch": 0.9109532040202292, "percentage": 18.22, "elapsed_time": "0:37:15", "remaining_time": "2:47:12", "throughput": 20043.1, "total_tokens": 44796672} +{"current_steps": 14235, "total_steps": 78105, "loss": 0.5243, "lr": 4.897702551692159e-06, "epoch": 0.911273285961206, "percentage": 18.23, "elapsed_time": "0:37:15", "remaining_time": "2:47:11", "throughput": 20044.06, "total_tokens": 44812224} +{"current_steps": 14240, "total_steps": 78105, "loss": 0.4864, "lr": 4.897544319604265e-06, "epoch": 0.9115933679021829, "percentage": 18.23, "elapsed_time": "0:37:16", "remaining_time": "2:47:09", "throughput": 20044.89, "total_tokens": 44827136} +{"current_steps": 14245, "total_steps": 78105, "loss": 0.5151, "lr": 4.897385967795496e-06, "epoch": 0.9119134498431598, "percentage": 18.24, "elapsed_time": "0:37:16", "remaining_time": "2:47:08", "throughput": 20045.72, "total_tokens": 44842176} +{"current_steps": 14250, "total_steps": 78105, "loss": 0.6346, "lr": 4.897227496273759e-06, "epoch": 0.9122335317841367, "percentage": 18.24, "elapsed_time": "0:37:17", "remaining_time": "2:47:07", "throughput": 20046.8, "total_tokens": 44858880} +{"current_steps": 14255, "total_steps": 78105, "loss": 0.562, "lr": 4.897068905046967e-06, "epoch": 0.9125536137251137, "percentage": 18.25, "elapsed_time": "0:37:18", "remaining_time": "2:47:06", "throughput": 20047.96, "total_tokens": 44875328} +{"current_steps": 14260, "total_steps": 78105, "loss": 0.4998, "lr": 4.896910194123041e-06, "epoch": 0.9128736956660906, "percentage": 18.26, "elapsed_time": "0:37:19", "remaining_time": "2:47:04", "throughput": 20049.15, "total_tokens": 44892032} +{"current_steps": 14265, "total_steps": 78105, "loss": 0.4398, "lr": 4.896751363509904e-06, "epoch": 0.9131937776070674, "percentage": 18.26, "elapsed_time": "0:37:19", "remaining_time": "2:47:03", "throughput": 20050.04, "total_tokens": 44907328} +{"current_steps": 14270, "total_steps": 78105, "loss": 0.377, "lr": 4.896592413215489e-06, "epoch": 0.9135138595480443, "percentage": 18.27, "elapsed_time": "0:37:20", "remaining_time": "2:47:02", "throughput": 20050.9, "total_tokens": 44922560} +{"current_steps": 14275, "total_steps": 78105, "loss": 0.5012, "lr": 4.8964333432477315e-06, "epoch": 0.9138339414890212, "percentage": 18.28, "elapsed_time": "0:37:21", "remaining_time": "2:47:00", "throughput": 20051.73, "total_tokens": 44937728} +{"current_steps": 14280, "total_steps": 78105, "loss": 0.5723, "lr": 4.8962741536145755e-06, "epoch": 0.9141540234299981, "percentage": 18.28, "elapsed_time": "0:37:21", "remaining_time": "2:46:59", "throughput": 20052.83, "total_tokens": 44954560} +{"current_steps": 14285, "total_steps": 78105, "loss": 0.5088, "lr": 4.89611484432397e-06, "epoch": 0.914474105370975, "percentage": 18.29, "elapsed_time": "0:37:22", "remaining_time": "2:46:58", "throughput": 20054.0, "total_tokens": 44971520} +{"current_steps": 14290, "total_steps": 78105, "loss": 0.6032, "lr": 4.89595541538387e-06, "epoch": 0.9147941873119518, "percentage": 18.3, "elapsed_time": "0:37:23", "remaining_time": "2:46:57", "throughput": 20054.97, "total_tokens": 44987392} +{"current_steps": 14295, "total_steps": 78105, "loss": 0.4007, "lr": 4.8957958668022374e-06, "epoch": 0.9151142692529287, "percentage": 18.3, "elapsed_time": "0:37:23", "remaining_time": "2:46:56", "throughput": 20055.89, "total_tokens": 45002688} +{"current_steps": 14300, "total_steps": 78105, "loss": 0.4718, "lr": 4.8956361985870374e-06, "epoch": 0.9154343511939056, "percentage": 18.31, "elapsed_time": "0:37:24", "remaining_time": "2:46:54", "throughput": 20056.74, "total_tokens": 45017792} +{"current_steps": 14305, "total_steps": 78105, "loss": 0.5366, "lr": 4.895476410746245e-06, "epoch": 0.9157544331348826, "percentage": 18.32, "elapsed_time": "0:37:25", "remaining_time": "2:46:53", "throughput": 20057.52, "total_tokens": 45032640} +{"current_steps": 14310, "total_steps": 78105, "loss": 0.4542, "lr": 4.895316503287837e-06, "epoch": 0.9160745150758595, "percentage": 18.32, "elapsed_time": "0:37:25", "remaining_time": "2:46:52", "throughput": 20058.5, "total_tokens": 45048256} +{"current_steps": 14315, "total_steps": 78105, "loss": 0.536, "lr": 4.8951564762198e-06, "epoch": 0.9163945970168363, "percentage": 18.33, "elapsed_time": "0:37:26", "remaining_time": "2:46:50", "throughput": 20059.53, "total_tokens": 45064192} +{"current_steps": 14320, "total_steps": 78105, "loss": 0.447, "lr": 4.894996329550125e-06, "epoch": 0.9167146789578132, "percentage": 18.33, "elapsed_time": "0:37:27", "remaining_time": "2:46:49", "throughput": 20060.35, "total_tokens": 45079488} +{"current_steps": 14325, "total_steps": 78105, "loss": 0.5554, "lr": 4.894836063286809e-06, "epoch": 0.9170347608987901, "percentage": 18.34, "elapsed_time": "0:37:27", "remaining_time": "2:46:48", "throughput": 20061.32, "total_tokens": 45095616} +{"current_steps": 14330, "total_steps": 78105, "loss": 0.5817, "lr": 4.894675677437853e-06, "epoch": 0.917354842839767, "percentage": 18.35, "elapsed_time": "0:37:28", "remaining_time": "2:46:47", "throughput": 20062.18, "total_tokens": 45110592} +{"current_steps": 14335, "total_steps": 78105, "loss": 0.5129, "lr": 4.894515172011268e-06, "epoch": 0.9176749247807439, "percentage": 18.35, "elapsed_time": "0:37:29", "remaining_time": "2:46:45", "throughput": 20063.26, "total_tokens": 45127168} +{"current_steps": 14340, "total_steps": 78105, "loss": 0.4606, "lr": 4.894354547015067e-06, "epoch": 0.9179950067217207, "percentage": 18.36, "elapsed_time": "0:37:29", "remaining_time": "2:46:44", "throughput": 20064.11, "total_tokens": 45142208} +{"current_steps": 14345, "total_steps": 78105, "loss": 0.5549, "lr": 4.894193802457272e-06, "epoch": 0.9183150886626976, "percentage": 18.37, "elapsed_time": "0:37:30", "remaining_time": "2:46:43", "throughput": 20064.95, "total_tokens": 45157184} +{"current_steps": 14350, "total_steps": 78105, "loss": 0.5885, "lr": 4.8940329383459095e-06, "epoch": 0.9186351706036745, "percentage": 18.37, "elapsed_time": "0:37:31", "remaining_time": "2:46:42", "throughput": 20066.24, "total_tokens": 45174464} +{"current_steps": 14355, "total_steps": 78105, "loss": 0.4462, "lr": 4.8938719546890126e-06, "epoch": 0.9189552525446514, "percentage": 18.38, "elapsed_time": "0:37:31", "remaining_time": "2:46:40", "throughput": 20066.96, "total_tokens": 45188992} +{"current_steps": 14360, "total_steps": 78105, "loss": 0.6097, "lr": 4.893710851494619e-06, "epoch": 0.9192753344856284, "percentage": 18.39, "elapsed_time": "0:37:32", "remaining_time": "2:46:39", "throughput": 20067.77, "total_tokens": 45204032} +{"current_steps": 14365, "total_steps": 78105, "loss": 0.6409, "lr": 4.8935496287707736e-06, "epoch": 0.9195954164266052, "percentage": 18.39, "elapsed_time": "0:37:33", "remaining_time": "2:46:38", "throughput": 20068.59, "total_tokens": 45219328} +{"current_steps": 14370, "total_steps": 78105, "loss": 0.5678, "lr": 4.893388286525528e-06, "epoch": 0.9199154983675821, "percentage": 18.4, "elapsed_time": "0:37:33", "remaining_time": "2:46:36", "throughput": 20069.66, "total_tokens": 45235584} +{"current_steps": 14375, "total_steps": 78105, "loss": 0.4068, "lr": 4.8932268247669366e-06, "epoch": 0.920235580308559, "percentage": 18.4, "elapsed_time": "0:37:34", "remaining_time": "2:46:35", "throughput": 20070.53, "total_tokens": 45250880} +{"current_steps": 14380, "total_steps": 78105, "loss": 0.549, "lr": 4.893065243503065e-06, "epoch": 0.9205556622495359, "percentage": 18.41, "elapsed_time": "0:37:35", "remaining_time": "2:46:34", "throughput": 20071.52, "total_tokens": 45266752} +{"current_steps": 14385, "total_steps": 78105, "loss": 0.4711, "lr": 4.892903542741979e-06, "epoch": 0.9208757441905128, "percentage": 18.42, "elapsed_time": "0:37:35", "remaining_time": "2:46:32", "throughput": 20072.47, "total_tokens": 45282496} +{"current_steps": 14390, "total_steps": 78105, "loss": 0.4715, "lr": 4.892741722491755e-06, "epoch": 0.9211958261314896, "percentage": 18.42, "elapsed_time": "0:37:36", "remaining_time": "2:46:31", "throughput": 20073.18, "total_tokens": 45297024} +{"current_steps": 14395, "total_steps": 78105, "loss": 0.4262, "lr": 4.892579782760472e-06, "epoch": 0.9215159080724665, "percentage": 18.43, "elapsed_time": "0:37:37", "remaining_time": "2:46:30", "throughput": 20074.36, "total_tokens": 45314176} +{"current_steps": 14400, "total_steps": 78105, "loss": 0.4715, "lr": 4.8924177235562186e-06, "epoch": 0.9218359900134434, "percentage": 18.44, "elapsed_time": "0:37:37", "remaining_time": "2:46:29", "throughput": 20075.19, "total_tokens": 45329344} +{"current_steps": 14405, "total_steps": 78105, "loss": 0.4856, "lr": 4.892255544887084e-06, "epoch": 0.9221560719544203, "percentage": 18.44, "elapsed_time": "0:37:38", "remaining_time": "2:46:27", "throughput": 20076.07, "total_tokens": 45344384} +{"current_steps": 14410, "total_steps": 78105, "loss": 0.4845, "lr": 4.8920932467611705e-06, "epoch": 0.9224761538953972, "percentage": 18.45, "elapsed_time": "0:37:39", "remaining_time": "2:46:26", "throughput": 20076.97, "total_tokens": 45360192} +{"current_steps": 14415, "total_steps": 78105, "loss": 0.455, "lr": 4.891930829186579e-06, "epoch": 0.9227962358363742, "percentage": 18.46, "elapsed_time": "0:37:39", "remaining_time": "2:46:25", "throughput": 20077.9, "total_tokens": 45376000} +{"current_steps": 14420, "total_steps": 78105, "loss": 0.6976, "lr": 4.89176829217142e-06, "epoch": 0.923116317777351, "percentage": 18.46, "elapsed_time": "0:37:40", "remaining_time": "2:46:23", "throughput": 20078.61, "total_tokens": 45390400} +{"current_steps": 14425, "total_steps": 78105, "loss": 0.597, "lr": 4.891605635723812e-06, "epoch": 0.9234363997183279, "percentage": 18.47, "elapsed_time": "0:37:41", "remaining_time": "2:46:22", "throughput": 20079.68, "total_tokens": 45406976} +{"current_steps": 14430, "total_steps": 78105, "loss": 0.4891, "lr": 4.891442859851876e-06, "epoch": 0.9237564816593048, "percentage": 18.48, "elapsed_time": "0:37:41", "remaining_time": "2:46:21", "throughput": 20080.52, "total_tokens": 45422080} +{"current_steps": 14435, "total_steps": 78105, "loss": 0.4595, "lr": 4.891279964563739e-06, "epoch": 0.9240765636002817, "percentage": 18.48, "elapsed_time": "0:37:42", "remaining_time": "2:46:20", "throughput": 20081.77, "total_tokens": 45439296} +{"current_steps": 14440, "total_steps": 78105, "loss": 0.3903, "lr": 4.891116949867537e-06, "epoch": 0.9243966455412586, "percentage": 18.49, "elapsed_time": "0:37:43", "remaining_time": "2:46:19", "throughput": 20082.62, "total_tokens": 45454208} +{"current_steps": 14445, "total_steps": 78105, "loss": 0.4345, "lr": 4.89095381577141e-06, "epoch": 0.9247167274822354, "percentage": 18.49, "elapsed_time": "0:37:44", "remaining_time": "2:46:17", "throughput": 20083.63, "total_tokens": 45470400} +{"current_steps": 14450, "total_steps": 78105, "loss": 0.5197, "lr": 4.890790562283503e-06, "epoch": 0.9250368094232123, "percentage": 18.5, "elapsed_time": "0:37:44", "remaining_time": "2:46:16", "throughput": 20084.6, "total_tokens": 45486528} +{"current_steps": 14455, "total_steps": 78105, "loss": 0.4466, "lr": 4.8906271894119685e-06, "epoch": 0.9253568913641892, "percentage": 18.51, "elapsed_time": "0:37:45", "remaining_time": "2:46:15", "throughput": 20085.44, "total_tokens": 45501440} +{"current_steps": 14460, "total_steps": 78105, "loss": 0.4898, "lr": 4.890463697164965e-06, "epoch": 0.9256769733051661, "percentage": 18.51, "elapsed_time": "0:37:46", "remaining_time": "2:46:14", "throughput": 20086.49, "total_tokens": 45517760} +{"current_steps": 14465, "total_steps": 78105, "loss": 0.4846, "lr": 4.890300085550654e-06, "epoch": 0.9259970552461431, "percentage": 18.52, "elapsed_time": "0:37:46", "remaining_time": "2:46:12", "throughput": 20087.31, "total_tokens": 45533056} +{"current_steps": 14470, "total_steps": 78105, "loss": 0.477, "lr": 4.890136354577209e-06, "epoch": 0.9263171371871199, "percentage": 18.53, "elapsed_time": "0:37:47", "remaining_time": "2:46:11", "throughput": 20088.3, "total_tokens": 45549248} +{"current_steps": 14475, "total_steps": 78105, "loss": 0.3856, "lr": 4.889972504252804e-06, "epoch": 0.9266372191280968, "percentage": 18.53, "elapsed_time": "0:37:48", "remaining_time": "2:46:10", "throughput": 20089.44, "total_tokens": 45565760} +{"current_steps": 14480, "total_steps": 78105, "loss": 0.519, "lr": 4.889808534585621e-06, "epoch": 0.9269573010690737, "percentage": 18.54, "elapsed_time": "0:37:48", "remaining_time": "2:46:09", "throughput": 20090.37, "total_tokens": 45581568} +{"current_steps": 14485, "total_steps": 78105, "loss": 0.6125, "lr": 4.889644445583848e-06, "epoch": 0.9272773830100506, "percentage": 18.55, "elapsed_time": "0:37:49", "remaining_time": "2:46:07", "throughput": 20091.28, "total_tokens": 45596928} +{"current_steps": 14490, "total_steps": 78105, "loss": 0.486, "lr": 4.889480237255678e-06, "epoch": 0.9275974649510275, "percentage": 18.55, "elapsed_time": "0:37:50", "remaining_time": "2:46:06", "throughput": 20092.25, "total_tokens": 45612800} +{"current_steps": 14495, "total_steps": 78105, "loss": 0.5381, "lr": 4.889315909609311e-06, "epoch": 0.9279175468920043, "percentage": 18.56, "elapsed_time": "0:37:50", "remaining_time": "2:46:05", "throughput": 20093.06, "total_tokens": 45627584} +{"current_steps": 14500, "total_steps": 78105, "loss": 0.4355, "lr": 4.889151462652955e-06, "epoch": 0.9282376288329812, "percentage": 18.56, "elapsed_time": "0:37:51", "remaining_time": "2:46:04", "throughput": 20094.06, "total_tokens": 45643840} +{"current_steps": 14505, "total_steps": 78105, "loss": 0.4318, "lr": 4.888986896394817e-06, "epoch": 0.9285577107739581, "percentage": 18.57, "elapsed_time": "0:37:52", "remaining_time": "2:46:02", "throughput": 20094.94, "total_tokens": 45659072} +{"current_steps": 14510, "total_steps": 78105, "loss": 0.7709, "lr": 4.888822210843119e-06, "epoch": 0.928877792714935, "percentage": 18.58, "elapsed_time": "0:37:52", "remaining_time": "2:46:01", "throughput": 20095.99, "total_tokens": 45675328} +{"current_steps": 14515, "total_steps": 78105, "loss": 0.5242, "lr": 4.888657406006082e-06, "epoch": 0.9291978746559119, "percentage": 18.58, "elapsed_time": "0:37:53", "remaining_time": "2:46:00", "throughput": 20096.88, "total_tokens": 45690816} +{"current_steps": 14520, "total_steps": 78105, "loss": 0.3906, "lr": 4.888492481891937e-06, "epoch": 0.9295179565968889, "percentage": 18.59, "elapsed_time": "0:37:54", "remaining_time": "2:45:59", "throughput": 20097.75, "total_tokens": 45706432} +{"current_steps": 14525, "total_steps": 78105, "loss": 0.4153, "lr": 4.888327438508918e-06, "epoch": 0.9298380385378657, "percentage": 18.6, "elapsed_time": "0:37:54", "remaining_time": "2:45:57", "throughput": 20098.65, "total_tokens": 45721920} +{"current_steps": 14530, "total_steps": 78105, "loss": 0.402, "lr": 4.888162275865267e-06, "epoch": 0.9301581204788426, "percentage": 18.6, "elapsed_time": "0:37:55", "remaining_time": "2:45:56", "throughput": 20099.66, "total_tokens": 45738048} +{"current_steps": 14535, "total_steps": 78105, "loss": 0.3587, "lr": 4.887996993969231e-06, "epoch": 0.9304782024198195, "percentage": 18.61, "elapsed_time": "0:37:56", "remaining_time": "2:45:55", "throughput": 20100.63, "total_tokens": 45753856} +{"current_steps": 14540, "total_steps": 78105, "loss": 0.5039, "lr": 4.887831592829064e-06, "epoch": 0.9307982843607964, "percentage": 18.62, "elapsed_time": "0:37:56", "remaining_time": "2:45:53", "throughput": 20101.43, "total_tokens": 45768704} +{"current_steps": 14545, "total_steps": 78105, "loss": 0.3943, "lr": 4.887666072453026e-06, "epoch": 0.9311183663017732, "percentage": 18.62, "elapsed_time": "0:37:57", "remaining_time": "2:45:52", "throughput": 20102.32, "total_tokens": 45783936} +{"current_steps": 14550, "total_steps": 78105, "loss": 0.6048, "lr": 4.887500432849379e-06, "epoch": 0.9314384482427501, "percentage": 18.63, "elapsed_time": "0:37:58", "remaining_time": "2:45:51", "throughput": 20103.34, "total_tokens": 45800320} +{"current_steps": 14555, "total_steps": 78105, "loss": 0.4135, "lr": 4.887334674026399e-06, "epoch": 0.931758530183727, "percentage": 18.64, "elapsed_time": "0:37:58", "remaining_time": "2:45:50", "throughput": 20104.35, "total_tokens": 45816512} +{"current_steps": 14560, "total_steps": 78105, "loss": 0.4462, "lr": 4.887168795992359e-06, "epoch": 0.9320786121247039, "percentage": 18.64, "elapsed_time": "0:37:59", "remaining_time": "2:45:48", "throughput": 20105.25, "total_tokens": 45831936} +{"current_steps": 14565, "total_steps": 78105, "loss": 0.503, "lr": 4.887002798755544e-06, "epoch": 0.9323986940656808, "percentage": 18.65, "elapsed_time": "0:38:00", "remaining_time": "2:45:47", "throughput": 20106.01, "total_tokens": 45846784} +{"current_steps": 14570, "total_steps": 78105, "loss": 0.5613, "lr": 4.886836682324243e-06, "epoch": 0.9327187760066578, "percentage": 18.65, "elapsed_time": "0:38:00", "remaining_time": "2:45:46", "throughput": 20106.84, "total_tokens": 45862080} +{"current_steps": 14575, "total_steps": 78105, "loss": 0.5538, "lr": 4.886670446706751e-06, "epoch": 0.9330388579476346, "percentage": 18.66, "elapsed_time": "0:38:01", "remaining_time": "2:45:45", "throughput": 20107.83, "total_tokens": 45878528} +{"current_steps": 14580, "total_steps": 78105, "loss": 0.5014, "lr": 4.886504091911367e-06, "epoch": 0.9333589398886115, "percentage": 18.67, "elapsed_time": "0:38:02", "remaining_time": "2:45:43", "throughput": 20108.73, "total_tokens": 45894016} +{"current_steps": 14585, "total_steps": 78105, "loss": 0.5506, "lr": 4.886337617946401e-06, "epoch": 0.9336790218295884, "percentage": 18.67, "elapsed_time": "0:38:02", "remaining_time": "2:45:42", "throughput": 20109.56, "total_tokens": 45909504} +{"current_steps": 14590, "total_steps": 78105, "loss": 0.5049, "lr": 4.886171024820163e-06, "epoch": 0.9339991037705653, "percentage": 18.68, "elapsed_time": "0:38:03", "remaining_time": "2:45:41", "throughput": 20110.55, "total_tokens": 45925376} +{"current_steps": 14595, "total_steps": 78105, "loss": 0.6034, "lr": 4.886004312540974e-06, "epoch": 0.9343191857115422, "percentage": 18.69, "elapsed_time": "0:38:04", "remaining_time": "2:45:40", "throughput": 20111.27, "total_tokens": 45940224} +{"current_steps": 14600, "total_steps": 78105, "loss": 0.4263, "lr": 4.885837481117158e-06, "epoch": 0.934639267652519, "percentage": 18.69, "elapsed_time": "0:38:04", "remaining_time": "2:45:38", "throughput": 20112.05, "total_tokens": 45955072} +{"current_steps": 14605, "total_steps": 78105, "loss": 0.4194, "lr": 4.885670530557046e-06, "epoch": 0.9349593495934959, "percentage": 18.7, "elapsed_time": "0:38:05", "remaining_time": "2:45:37", "throughput": 20112.89, "total_tokens": 45970240} +{"current_steps": 14610, "total_steps": 78105, "loss": 0.8094, "lr": 4.885503460868973e-06, "epoch": 0.9352794315344728, "percentage": 18.71, "elapsed_time": "0:38:06", "remaining_time": "2:45:36", "throughput": 20113.79, "total_tokens": 45985856} +{"current_steps": 14615, "total_steps": 78105, "loss": 0.4448, "lr": 4.8853362720612844e-06, "epoch": 0.9355995134754497, "percentage": 18.71, "elapsed_time": "0:38:06", "remaining_time": "2:45:34", "throughput": 20114.5, "total_tokens": 46000256} +{"current_steps": 14620, "total_steps": 78105, "loss": 0.4281, "lr": 4.885168964142326e-06, "epoch": 0.9359195954164266, "percentage": 18.72, "elapsed_time": "0:38:07", "remaining_time": "2:45:33", "throughput": 20115.37, "total_tokens": 46015616} +{"current_steps": 14625, "total_steps": 78105, "loss": 0.4017, "lr": 4.885001537120454e-06, "epoch": 0.9362396773574035, "percentage": 18.72, "elapsed_time": "0:38:08", "remaining_time": "2:45:32", "throughput": 20116.2, "total_tokens": 46031040} +{"current_steps": 14630, "total_steps": 78105, "loss": 0.4725, "lr": 4.884833991004027e-06, "epoch": 0.9365597592983804, "percentage": 18.73, "elapsed_time": "0:38:08", "remaining_time": "2:45:30", "throughput": 20117.07, "total_tokens": 46046016} +{"current_steps": 14635, "total_steps": 78105, "loss": 0.4231, "lr": 4.884666325801414e-06, "epoch": 0.9368798412393573, "percentage": 18.74, "elapsed_time": "0:38:09", "remaining_time": "2:45:29", "throughput": 20118.15, "total_tokens": 46062528} +{"current_steps": 14640, "total_steps": 78105, "loss": 0.4505, "lr": 4.884498541520985e-06, "epoch": 0.9371999231803342, "percentage": 18.74, "elapsed_time": "0:38:10", "remaining_time": "2:45:28", "throughput": 20119.1, "total_tokens": 46078144} +{"current_steps": 14645, "total_steps": 78105, "loss": 0.6019, "lr": 4.88433063817112e-06, "epoch": 0.9375200051213111, "percentage": 18.75, "elapsed_time": "0:38:11", "remaining_time": "2:45:27", "throughput": 20120.55, "total_tokens": 46096896} +{"current_steps": 14650, "total_steps": 78105, "loss": 0.4503, "lr": 4.884162615760202e-06, "epoch": 0.9378400870622879, "percentage": 18.76, "elapsed_time": "0:38:11", "remaining_time": "2:45:26", "throughput": 20121.4, "total_tokens": 46111808} +{"current_steps": 14655, "total_steps": 78105, "loss": 0.4946, "lr": 4.883994474296622e-06, "epoch": 0.9381601690032648, "percentage": 18.76, "elapsed_time": "0:38:12", "remaining_time": "2:45:24", "throughput": 20122.42, "total_tokens": 46127936} +{"current_steps": 14660, "total_steps": 78105, "loss": 0.4874, "lr": 4.883826213788775e-06, "epoch": 0.9384802509442417, "percentage": 18.77, "elapsed_time": "0:38:13", "remaining_time": "2:45:23", "throughput": 20123.21, "total_tokens": 46142848} +{"current_steps": 14665, "total_steps": 78105, "loss": 0.7442, "lr": 4.883657834245065e-06, "epoch": 0.9388003328852186, "percentage": 18.78, "elapsed_time": "0:38:13", "remaining_time": "2:45:22", "throughput": 20124.11, "total_tokens": 46158848} +{"current_steps": 14670, "total_steps": 78105, "loss": 0.5611, "lr": 4.883489335673898e-06, "epoch": 0.9391204148261955, "percentage": 18.78, "elapsed_time": "0:38:14", "remaining_time": "2:45:21", "throughput": 20125.05, "total_tokens": 46174912} +{"current_steps": 14675, "total_steps": 78105, "loss": 0.4503, "lr": 4.883320718083688e-06, "epoch": 0.9394404967671725, "percentage": 18.79, "elapsed_time": "0:38:15", "remaining_time": "2:45:19", "throughput": 20125.74, "total_tokens": 46189248} +{"current_steps": 14680, "total_steps": 78105, "loss": 0.4028, "lr": 4.8831519814828565e-06, "epoch": 0.9397605787081493, "percentage": 18.8, "elapsed_time": "0:38:15", "remaining_time": "2:45:18", "throughput": 20126.7, "total_tokens": 46204928} +{"current_steps": 14685, "total_steps": 78105, "loss": 0.4413, "lr": 4.8829831258798285e-06, "epoch": 0.9400806606491262, "percentage": 18.8, "elapsed_time": "0:38:16", "remaining_time": "2:45:17", "throughput": 20127.5, "total_tokens": 46220160} +{"current_steps": 14690, "total_steps": 78105, "loss": 0.5157, "lr": 4.8828141512830355e-06, "epoch": 0.9404007425901031, "percentage": 18.81, "elapsed_time": "0:38:17", "remaining_time": "2:45:16", "throughput": 20128.43, "total_tokens": 46235584} +{"current_steps": 14695, "total_steps": 78105, "loss": 0.3311, "lr": 4.8826450577009154e-06, "epoch": 0.94072082453108, "percentage": 18.81, "elapsed_time": "0:38:17", "remaining_time": "2:45:14", "throughput": 20129.41, "total_tokens": 46251904} +{"current_steps": 14700, "total_steps": 78105, "loss": 0.498, "lr": 4.882475845141912e-06, "epoch": 0.9410409064720568, "percentage": 18.82, "elapsed_time": "0:38:18", "remaining_time": "2:45:13", "throughput": 20130.33, "total_tokens": 46268032} +{"current_steps": 14705, "total_steps": 78105, "loss": 0.4591, "lr": 4.882306513614474e-06, "epoch": 0.9413609884130337, "percentage": 18.83, "elapsed_time": "0:38:19", "remaining_time": "2:45:12", "throughput": 20131.19, "total_tokens": 46283392} +{"current_steps": 14710, "total_steps": 78105, "loss": 0.4144, "lr": 4.882137063127059e-06, "epoch": 0.9416810703540106, "percentage": 18.83, "elapsed_time": "0:38:19", "remaining_time": "2:45:11", "throughput": 20132.01, "total_tokens": 46298752} +{"current_steps": 14715, "total_steps": 78105, "loss": 0.3803, "lr": 4.881967493688126e-06, "epoch": 0.9420011522949875, "percentage": 18.84, "elapsed_time": "0:38:20", "remaining_time": "2:45:09", "throughput": 20132.96, "total_tokens": 46314624} +{"current_steps": 14720, "total_steps": 78105, "loss": 0.4852, "lr": 4.881797805306144e-06, "epoch": 0.9423212342359644, "percentage": 18.85, "elapsed_time": "0:38:21", "remaining_time": "2:45:08", "throughput": 20134.1, "total_tokens": 46331712} +{"current_steps": 14725, "total_steps": 78105, "loss": 0.5661, "lr": 4.881627997989587e-06, "epoch": 0.9426413161769412, "percentage": 18.85, "elapsed_time": "0:38:21", "remaining_time": "2:45:07", "throughput": 20134.92, "total_tokens": 46347200} +{"current_steps": 14730, "total_steps": 78105, "loss": 0.4971, "lr": 4.881458071746932e-06, "epoch": 0.9429613981179182, "percentage": 18.86, "elapsed_time": "0:38:22", "remaining_time": "2:45:06", "throughput": 20135.92, "total_tokens": 46363008} +{"current_steps": 14735, "total_steps": 78105, "loss": 0.4977, "lr": 4.881288026586665e-06, "epoch": 0.9432814800588951, "percentage": 18.87, "elapsed_time": "0:38:23", "remaining_time": "2:45:05", "throughput": 20136.85, "total_tokens": 46378816} +{"current_steps": 14740, "total_steps": 78105, "loss": 0.3572, "lr": 4.881117862517278e-06, "epoch": 0.943601561999872, "percentage": 18.87, "elapsed_time": "0:38:23", "remaining_time": "2:45:04", "throughput": 20137.81, "total_tokens": 46395200} +{"current_steps": 14745, "total_steps": 78105, "loss": 0.4358, "lr": 4.880947579547268e-06, "epoch": 0.9439216439408489, "percentage": 18.88, "elapsed_time": "0:38:24", "remaining_time": "2:45:02", "throughput": 20138.87, "total_tokens": 46411840} +{"current_steps": 14750, "total_steps": 78105, "loss": 0.6732, "lr": 4.880777177685138e-06, "epoch": 0.9442417258818258, "percentage": 18.88, "elapsed_time": "0:38:25", "remaining_time": "2:45:01", "throughput": 20139.74, "total_tokens": 46427776} +{"current_steps": 14755, "total_steps": 78105, "loss": 0.58, "lr": 4.880606656939397e-06, "epoch": 0.9445618078228026, "percentage": 18.89, "elapsed_time": "0:38:25", "remaining_time": "2:45:00", "throughput": 20140.88, "total_tokens": 46444736} +{"current_steps": 14760, "total_steps": 78105, "loss": 0.5092, "lr": 4.880436017318559e-06, "epoch": 0.9448818897637795, "percentage": 18.9, "elapsed_time": "0:38:26", "remaining_time": "2:44:59", "throughput": 20141.84, "total_tokens": 46460672} +{"current_steps": 14765, "total_steps": 78105, "loss": 0.4399, "lr": 4.8802652588311465e-06, "epoch": 0.9452019717047564, "percentage": 18.9, "elapsed_time": "0:38:27", "remaining_time": "2:44:58", "throughput": 20142.75, "total_tokens": 46476736} +{"current_steps": 14770, "total_steps": 78105, "loss": 0.5088, "lr": 4.8800943814856835e-06, "epoch": 0.9455220536457333, "percentage": 18.91, "elapsed_time": "0:38:28", "remaining_time": "2:44:57", "throughput": 20143.68, "total_tokens": 46492416} +{"current_steps": 14775, "total_steps": 78105, "loss": 0.3932, "lr": 4.879923385290706e-06, "epoch": 0.9458421355867102, "percentage": 18.92, "elapsed_time": "0:38:28", "remaining_time": "2:44:55", "throughput": 20144.45, "total_tokens": 46507264} +{"current_steps": 14780, "total_steps": 78105, "loss": 0.4766, "lr": 4.879752270254751e-06, "epoch": 0.946162217527687, "percentage": 18.92, "elapsed_time": "0:38:29", "remaining_time": "2:44:54", "throughput": 20145.57, "total_tokens": 46524224} +{"current_steps": 14785, "total_steps": 78105, "loss": 0.6264, "lr": 4.879581036386363e-06, "epoch": 0.946482299468664, "percentage": 18.93, "elapsed_time": "0:38:30", "remaining_time": "2:44:53", "throughput": 20146.38, "total_tokens": 46539456} +{"current_steps": 14790, "total_steps": 78105, "loss": 0.4624, "lr": 4.8794096836940926e-06, "epoch": 0.9468023814096409, "percentage": 18.94, "elapsed_time": "0:38:30", "remaining_time": "2:44:52", "throughput": 20147.24, "total_tokens": 46555136} +{"current_steps": 14795, "total_steps": 78105, "loss": 0.4513, "lr": 4.879238212186498e-06, "epoch": 0.9471224633506178, "percentage": 18.94, "elapsed_time": "0:38:31", "remaining_time": "2:44:50", "throughput": 20148.09, "total_tokens": 46570432} +{"current_steps": 14800, "total_steps": 78105, "loss": 0.4042, "lr": 4.8790666218721385e-06, "epoch": 0.9474425452915947, "percentage": 18.95, "elapsed_time": "0:38:32", "remaining_time": "2:44:49", "throughput": 20149.03, "total_tokens": 46586304} +{"current_steps": 14805, "total_steps": 78105, "loss": 0.4828, "lr": 4.878894912759584e-06, "epoch": 0.9477626272325715, "percentage": 18.96, "elapsed_time": "0:38:32", "remaining_time": "2:44:48", "throughput": 20150.02, "total_tokens": 46602432} +{"current_steps": 14810, "total_steps": 78105, "loss": 0.4343, "lr": 4.87872308485741e-06, "epoch": 0.9480827091735484, "percentage": 18.96, "elapsed_time": "0:38:33", "remaining_time": "2:44:47", "throughput": 20150.75, "total_tokens": 46617472} +{"current_steps": 14815, "total_steps": 78105, "loss": 0.5112, "lr": 4.878551138174195e-06, "epoch": 0.9484027911145253, "percentage": 18.97, "elapsed_time": "0:38:34", "remaining_time": "2:44:45", "throughput": 20151.59, "total_tokens": 46632896} +{"current_steps": 14820, "total_steps": 78105, "loss": 0.3885, "lr": 4.8783790727185246e-06, "epoch": 0.9487228730555022, "percentage": 18.97, "elapsed_time": "0:38:34", "remaining_time": "2:44:44", "throughput": 20152.59, "total_tokens": 46649408} +{"current_steps": 14825, "total_steps": 78105, "loss": 0.4176, "lr": 4.878206888498993e-06, "epoch": 0.9490429549964791, "percentage": 18.98, "elapsed_time": "0:38:35", "remaining_time": "2:44:43", "throughput": 20153.5, "total_tokens": 46665344} +{"current_steps": 14830, "total_steps": 78105, "loss": 0.5009, "lr": 4.878034585524196e-06, "epoch": 0.9493630369374559, "percentage": 18.99, "elapsed_time": "0:38:36", "remaining_time": "2:44:42", "throughput": 20154.23, "total_tokens": 46680704} +{"current_steps": 14835, "total_steps": 78105, "loss": 0.5474, "lr": 4.877862163802739e-06, "epoch": 0.9496831188784329, "percentage": 18.99, "elapsed_time": "0:38:36", "remaining_time": "2:44:41", "throughput": 20155.01, "total_tokens": 46695936} +{"current_steps": 14840, "total_steps": 78105, "loss": 0.4303, "lr": 4.877689623343232e-06, "epoch": 0.9500032008194098, "percentage": 19.0, "elapsed_time": "0:38:37", "remaining_time": "2:44:39", "throughput": 20155.9, "total_tokens": 46711680} +{"current_steps": 14845, "total_steps": 78105, "loss": 0.4389, "lr": 4.87751696415429e-06, "epoch": 0.9503232827603867, "percentage": 19.01, "elapsed_time": "0:38:38", "remaining_time": "2:44:38", "throughput": 20156.7, "total_tokens": 46727040} +{"current_steps": 14850, "total_steps": 78105, "loss": 0.5531, "lr": 4.877344186244534e-06, "epoch": 0.9506433647013636, "percentage": 19.01, "elapsed_time": "0:38:38", "remaining_time": "2:44:37", "throughput": 20157.55, "total_tokens": 46742720} +{"current_steps": 14855, "total_steps": 78105, "loss": 0.462, "lr": 4.877171289622593e-06, "epoch": 0.9509634466423404, "percentage": 19.02, "elapsed_time": "0:38:39", "remaining_time": "2:44:36", "throughput": 20158.37, "total_tokens": 46758336} +{"current_steps": 14860, "total_steps": 78105, "loss": 0.5603, "lr": 4.8769982742971e-06, "epoch": 0.9512835285833173, "percentage": 19.03, "elapsed_time": "0:38:40", "remaining_time": "2:44:34", "throughput": 20159.15, "total_tokens": 46773312} +{"current_steps": 14865, "total_steps": 78105, "loss": 0.3841, "lr": 4.876825140276694e-06, "epoch": 0.9516036105242942, "percentage": 19.03, "elapsed_time": "0:38:40", "remaining_time": "2:44:33", "throughput": 20159.85, "total_tokens": 46787968} +{"current_steps": 14870, "total_steps": 78105, "loss": 0.3905, "lr": 4.876651887570022e-06, "epoch": 0.9519236924652711, "percentage": 19.04, "elapsed_time": "0:38:41", "remaining_time": "2:44:32", "throughput": 20160.74, "total_tokens": 46803712} +{"current_steps": 14875, "total_steps": 78105, "loss": 0.4292, "lr": 4.876478516185733e-06, "epoch": 0.952243774406248, "percentage": 19.04, "elapsed_time": "0:38:42", "remaining_time": "2:44:31", "throughput": 20161.85, "total_tokens": 46820288} +{"current_steps": 14880, "total_steps": 78105, "loss": 0.4137, "lr": 4.8763050261324866e-06, "epoch": 0.9525638563472248, "percentage": 19.05, "elapsed_time": "0:38:42", "remaining_time": "2:44:29", "throughput": 20162.69, "total_tokens": 46835904} +{"current_steps": 14885, "total_steps": 78105, "loss": 0.5489, "lr": 4.876131417418945e-06, "epoch": 0.9528839382882017, "percentage": 19.06, "elapsed_time": "0:38:43", "remaining_time": "2:44:28", "throughput": 20163.47, "total_tokens": 46851136} +{"current_steps": 14890, "total_steps": 78105, "loss": 0.5037, "lr": 4.8759576900537765e-06, "epoch": 0.9532040202291787, "percentage": 19.06, "elapsed_time": "0:38:44", "remaining_time": "2:44:27", "throughput": 20164.45, "total_tokens": 46867456} +{"current_steps": 14895, "total_steps": 78105, "loss": 0.4738, "lr": 4.875783844045656e-06, "epoch": 0.9535241021701556, "percentage": 19.07, "elapsed_time": "0:38:44", "remaining_time": "2:44:26", "throughput": 20165.25, "total_tokens": 46882816} +{"current_steps": 14900, "total_steps": 78105, "loss": 0.5471, "lr": 4.875609879403267e-06, "epoch": 0.9538441841111325, "percentage": 19.08, "elapsed_time": "0:38:45", "remaining_time": "2:44:25", "throughput": 20166.16, "total_tokens": 46898624} +{"current_steps": 14905, "total_steps": 78105, "loss": 0.5049, "lr": 4.875435796135294e-06, "epoch": 0.9541642660521094, "percentage": 19.08, "elapsed_time": "0:38:46", "remaining_time": "2:44:23", "throughput": 20167.03, "total_tokens": 46914304} +{"current_steps": 14910, "total_steps": 78105, "loss": 0.535, "lr": 4.875261594250431e-06, "epoch": 0.9544843479930862, "percentage": 19.09, "elapsed_time": "0:38:46", "remaining_time": "2:44:22", "throughput": 20167.97, "total_tokens": 46930368} +{"current_steps": 14915, "total_steps": 78105, "loss": 0.4771, "lr": 4.875087273757375e-06, "epoch": 0.9548044299340631, "percentage": 19.1, "elapsed_time": "0:38:47", "remaining_time": "2:44:21", "throughput": 20168.83, "total_tokens": 46945792} +{"current_steps": 14920, "total_steps": 78105, "loss": 0.4557, "lr": 4.874912834664833e-06, "epoch": 0.95512451187504, "percentage": 19.1, "elapsed_time": "0:38:48", "remaining_time": "2:44:20", "throughput": 20169.8, "total_tokens": 46962048} +{"current_steps": 14925, "total_steps": 78105, "loss": 0.5768, "lr": 4.874738276981515e-06, "epoch": 0.9554445938160169, "percentage": 19.11, "elapsed_time": "0:38:48", "remaining_time": "2:44:18", "throughput": 20170.58, "total_tokens": 46976768} +{"current_steps": 14930, "total_steps": 78105, "loss": 0.5047, "lr": 4.874563600716137e-06, "epoch": 0.9557646757569938, "percentage": 19.12, "elapsed_time": "0:38:49", "remaining_time": "2:44:17", "throughput": 20171.33, "total_tokens": 46991424} +{"current_steps": 14935, "total_steps": 78105, "loss": 0.5943, "lr": 4.8743888058774205e-06, "epoch": 0.9560847576979706, "percentage": 19.12, "elapsed_time": "0:38:50", "remaining_time": "2:44:16", "throughput": 20172.14, "total_tokens": 47006656} +{"current_steps": 14940, "total_steps": 78105, "loss": 0.4124, "lr": 4.874213892474094e-06, "epoch": 0.9564048396389476, "percentage": 19.13, "elapsed_time": "0:38:50", "remaining_time": "2:44:14", "throughput": 20172.94, "total_tokens": 47021824} +{"current_steps": 14945, "total_steps": 78105, "loss": 0.5393, "lr": 4.874038860514895e-06, "epoch": 0.9567249215799245, "percentage": 19.13, "elapsed_time": "0:38:51", "remaining_time": "2:44:13", "throughput": 20173.87, "total_tokens": 47037440} +{"current_steps": 14950, "total_steps": 78105, "loss": 0.4314, "lr": 4.873863710008559e-06, "epoch": 0.9570450035209014, "percentage": 19.14, "elapsed_time": "0:38:52", "remaining_time": "2:44:12", "throughput": 20174.78, "total_tokens": 47053760} +{"current_steps": 14955, "total_steps": 78105, "loss": 0.4475, "lr": 4.873688440963835e-06, "epoch": 0.9573650854618783, "percentage": 19.15, "elapsed_time": "0:38:52", "remaining_time": "2:44:11", "throughput": 20175.55, "total_tokens": 47068928} +{"current_steps": 14960, "total_steps": 78105, "loss": 0.4712, "lr": 4.873513053389475e-06, "epoch": 0.9576851674028551, "percentage": 19.15, "elapsed_time": "0:38:53", "remaining_time": "2:44:10", "throughput": 20176.45, "total_tokens": 47084672} +{"current_steps": 14965, "total_steps": 78105, "loss": 0.5139, "lr": 4.873337547294236e-06, "epoch": 0.958005249343832, "percentage": 19.16, "elapsed_time": "0:38:54", "remaining_time": "2:44:09", "throughput": 20177.52, "total_tokens": 47101632} +{"current_steps": 14970, "total_steps": 78105, "loss": 0.4392, "lr": 4.873161922686882e-06, "epoch": 0.9583253312848089, "percentage": 19.17, "elapsed_time": "0:38:55", "remaining_time": "2:44:07", "throughput": 20178.25, "total_tokens": 47116480} +{"current_steps": 14975, "total_steps": 78105, "loss": 0.6716, "lr": 4.872986179576182e-06, "epoch": 0.9586454132257858, "percentage": 19.17, "elapsed_time": "0:38:55", "remaining_time": "2:44:06", "throughput": 20179.29, "total_tokens": 47132992} +{"current_steps": 14980, "total_steps": 78105, "loss": 0.4487, "lr": 4.872810317970914e-06, "epoch": 0.9589654951667627, "percentage": 19.18, "elapsed_time": "0:38:56", "remaining_time": "2:44:05", "throughput": 20180.27, "total_tokens": 47149056} +{"current_steps": 14985, "total_steps": 78105, "loss": 0.408, "lr": 4.872634337879858e-06, "epoch": 0.9592855771077395, "percentage": 19.19, "elapsed_time": "0:38:57", "remaining_time": "2:44:04", "throughput": 20181.17, "total_tokens": 47164864} +{"current_steps": 14990, "total_steps": 78105, "loss": 0.5247, "lr": 4.8724582393118015e-06, "epoch": 0.9596056590487164, "percentage": 19.19, "elapsed_time": "0:38:57", "remaining_time": "2:44:03", "throughput": 20181.96, "total_tokens": 47180544} +{"current_steps": 14995, "total_steps": 78105, "loss": 0.4967, "lr": 4.872282022275538e-06, "epoch": 0.9599257409896934, "percentage": 19.2, "elapsed_time": "0:38:58", "remaining_time": "2:44:01", "throughput": 20182.9, "total_tokens": 47196608} +{"current_steps": 15000, "total_steps": 78105, "loss": 0.4217, "lr": 4.8721056867798675e-06, "epoch": 0.9602458229306703, "percentage": 19.2, "elapsed_time": "0:38:59", "remaining_time": "2:44:00", "throughput": 20183.89, "total_tokens": 47213504} +{"current_steps": 15005, "total_steps": 78105, "loss": 0.3406, "lr": 4.871929232833595e-06, "epoch": 0.9605659048716472, "percentage": 19.21, "elapsed_time": "0:38:59", "remaining_time": "2:43:59", "throughput": 20184.67, "total_tokens": 47228288} +{"current_steps": 15010, "total_steps": 78105, "loss": 0.5522, "lr": 4.871752660445531e-06, "epoch": 0.960885986812624, "percentage": 19.22, "elapsed_time": "0:39:00", "remaining_time": "2:43:58", "throughput": 20185.5, "total_tokens": 47243712} +{"current_steps": 15015, "total_steps": 78105, "loss": 0.6114, "lr": 4.871575969624493e-06, "epoch": 0.9612060687536009, "percentage": 19.22, "elapsed_time": "0:39:01", "remaining_time": "2:43:57", "throughput": 20186.47, "total_tokens": 47259904} +{"current_steps": 15020, "total_steps": 78105, "loss": 0.574, "lr": 4.871399160379305e-06, "epoch": 0.9615261506945778, "percentage": 19.23, "elapsed_time": "0:39:01", "remaining_time": "2:43:55", "throughput": 20187.21, "total_tokens": 47275072} +{"current_steps": 15025, "total_steps": 78105, "loss": 0.5103, "lr": 4.871222232718795e-06, "epoch": 0.9618462326355547, "percentage": 19.24, "elapsed_time": "0:39:02", "remaining_time": "2:43:54", "throughput": 20188.06, "total_tokens": 47290688} +{"current_steps": 15030, "total_steps": 78105, "loss": 0.4575, "lr": 4.871045186651797e-06, "epoch": 0.9621663145765316, "percentage": 19.24, "elapsed_time": "0:39:03", "remaining_time": "2:43:53", "throughput": 20188.96, "total_tokens": 47306496} +{"current_steps": 15035, "total_steps": 78105, "loss": 0.3715, "lr": 4.870868022187153e-06, "epoch": 0.9624863965175084, "percentage": 19.25, "elapsed_time": "0:39:03", "remaining_time": "2:43:52", "throughput": 20189.71, "total_tokens": 47321280} +{"current_steps": 15040, "total_steps": 78105, "loss": 0.5453, "lr": 4.87069073933371e-06, "epoch": 0.9628064784584853, "percentage": 19.26, "elapsed_time": "0:39:04", "remaining_time": "2:43:50", "throughput": 20190.69, "total_tokens": 47337536} +{"current_steps": 15045, "total_steps": 78105, "loss": 0.447, "lr": 4.87051333810032e-06, "epoch": 0.9631265603994623, "percentage": 19.26, "elapsed_time": "0:39:05", "remaining_time": "2:43:49", "throughput": 20191.46, "total_tokens": 47353024} +{"current_steps": 15050, "total_steps": 78105, "loss": 0.4071, "lr": 4.87033581849584e-06, "epoch": 0.9634466423404392, "percentage": 19.27, "elapsed_time": "0:39:05", "remaining_time": "2:43:48", "throughput": 20192.35, "total_tokens": 47369088} +{"current_steps": 15055, "total_steps": 78105, "loss": 0.7118, "lr": 4.870158180529138e-06, "epoch": 0.9637667242814161, "percentage": 19.28, "elapsed_time": "0:39:06", "remaining_time": "2:43:47", "throughput": 20193.15, "total_tokens": 47384320} +{"current_steps": 15060, "total_steps": 78105, "loss": 0.5317, "lr": 4.869980424209081e-06, "epoch": 0.964086806222393, "percentage": 19.28, "elapsed_time": "0:39:07", "remaining_time": "2:43:46", "throughput": 20194.13, "total_tokens": 47400896} +{"current_steps": 15065, "total_steps": 78105, "loss": 0.385, "lr": 4.869802549544546e-06, "epoch": 0.9644068881633698, "percentage": 19.29, "elapsed_time": "0:39:07", "remaining_time": "2:43:45", "throughput": 20195.06, "total_tokens": 47416896} +{"current_steps": 15070, "total_steps": 78105, "loss": 0.732, "lr": 4.8696245565444165e-06, "epoch": 0.9647269701043467, "percentage": 19.29, "elapsed_time": "0:39:08", "remaining_time": "2:43:43", "throughput": 20195.93, "total_tokens": 47432320} +{"current_steps": 15075, "total_steps": 78105, "loss": 0.4796, "lr": 4.869446445217578e-06, "epoch": 0.9650470520453236, "percentage": 19.3, "elapsed_time": "0:39:09", "remaining_time": "2:43:42", "throughput": 20196.9, "total_tokens": 47448320} +{"current_steps": 15080, "total_steps": 78105, "loss": 0.5214, "lr": 4.869268215572928e-06, "epoch": 0.9653671339863005, "percentage": 19.31, "elapsed_time": "0:39:09", "remaining_time": "2:43:41", "throughput": 20197.64, "total_tokens": 47463488} +{"current_steps": 15085, "total_steps": 78105, "loss": 0.3804, "lr": 4.8690898676193635e-06, "epoch": 0.9656872159272774, "percentage": 19.31, "elapsed_time": "0:39:10", "remaining_time": "2:43:40", "throughput": 20198.48, "total_tokens": 47479104} +{"current_steps": 15090, "total_steps": 78105, "loss": 0.3869, "lr": 4.8689114013657914e-06, "epoch": 0.9660072978682542, "percentage": 19.32, "elapsed_time": "0:39:11", "remaining_time": "2:43:39", "throughput": 20199.65, "total_tokens": 47496448} +{"current_steps": 15095, "total_steps": 78105, "loss": 0.5534, "lr": 4.868732816821122e-06, "epoch": 0.9663273798092311, "percentage": 19.33, "elapsed_time": "0:39:12", "remaining_time": "2:43:37", "throughput": 20200.49, "total_tokens": 47511936} +{"current_steps": 15100, "total_steps": 78105, "loss": 0.5165, "lr": 4.8685541139942745e-06, "epoch": 0.9666474617502081, "percentage": 19.33, "elapsed_time": "0:39:12", "remaining_time": "2:43:36", "throughput": 20201.47, "total_tokens": 47528320} +{"current_steps": 15105, "total_steps": 78105, "loss": 0.4627, "lr": 4.868375292894173e-06, "epoch": 0.966967543691185, "percentage": 19.34, "elapsed_time": "0:39:13", "remaining_time": "2:43:35", "throughput": 20202.26, "total_tokens": 47543296} +{"current_steps": 15110, "total_steps": 78105, "loss": 0.4229, "lr": 4.868196353529745e-06, "epoch": 0.9672876256321619, "percentage": 19.35, "elapsed_time": "0:39:14", "remaining_time": "2:43:34", "throughput": 20203.06, "total_tokens": 47558656} +{"current_steps": 15115, "total_steps": 78105, "loss": 0.5188, "lr": 4.868017295909926e-06, "epoch": 0.9676077075731387, "percentage": 19.35, "elapsed_time": "0:39:14", "remaining_time": "2:43:33", "throughput": 20203.98, "total_tokens": 47574720} +{"current_steps": 15120, "total_steps": 78105, "loss": 0.4448, "lr": 4.867838120043659e-06, "epoch": 0.9679277895141156, "percentage": 19.36, "elapsed_time": "0:39:15", "remaining_time": "2:43:31", "throughput": 20204.81, "total_tokens": 47590272} +{"current_steps": 15125, "total_steps": 78105, "loss": 0.4908, "lr": 4.867658825939889e-06, "epoch": 0.9682478714550925, "percentage": 19.36, "elapsed_time": "0:39:16", "remaining_time": "2:43:30", "throughput": 20205.59, "total_tokens": 47605696} +{"current_steps": 15130, "total_steps": 78105, "loss": 0.4739, "lr": 4.86747941360757e-06, "epoch": 0.9685679533960694, "percentage": 19.37, "elapsed_time": "0:39:16", "remaining_time": "2:43:29", "throughput": 20206.62, "total_tokens": 47621760} +{"current_steps": 15135, "total_steps": 78105, "loss": 0.3881, "lr": 4.8672998830556616e-06, "epoch": 0.9688880353370463, "percentage": 19.38, "elapsed_time": "0:39:17", "remaining_time": "2:43:28", "throughput": 20207.83, "total_tokens": 47639296} +{"current_steps": 15140, "total_steps": 78105, "loss": 0.2948, "lr": 4.8671202342931275e-06, "epoch": 0.9692081172780231, "percentage": 19.38, "elapsed_time": "0:39:18", "remaining_time": "2:43:27", "throughput": 20208.63, "total_tokens": 47654656} +{"current_steps": 15145, "total_steps": 78105, "loss": 0.5355, "lr": 4.866940467328938e-06, "epoch": 0.969528199219, "percentage": 19.39, "elapsed_time": "0:39:18", "remaining_time": "2:43:26", "throughput": 20209.61, "total_tokens": 47671168} +{"current_steps": 15150, "total_steps": 78105, "loss": 0.468, "lr": 4.8667605821720714e-06, "epoch": 0.9698482811599769, "percentage": 19.4, "elapsed_time": "0:39:19", "remaining_time": "2:43:24", "throughput": 20210.38, "total_tokens": 47686400} +{"current_steps": 15155, "total_steps": 78105, "loss": 0.4837, "lr": 4.866580578831509e-06, "epoch": 0.9701683631009539, "percentage": 19.4, "elapsed_time": "0:39:20", "remaining_time": "2:43:23", "throughput": 20211.16, "total_tokens": 47701760} +{"current_steps": 15160, "total_steps": 78105, "loss": 0.5242, "lr": 4.86640045731624e-06, "epoch": 0.9704884450419308, "percentage": 19.41, "elapsed_time": "0:39:20", "remaining_time": "2:43:22", "throughput": 20211.95, "total_tokens": 47717248} +{"current_steps": 15165, "total_steps": 78105, "loss": 0.5109, "lr": 4.866220217635258e-06, "epoch": 0.9708085269829076, "percentage": 19.42, "elapsed_time": "0:39:21", "remaining_time": "2:43:21", "throughput": 20213.05, "total_tokens": 47734336} +{"current_steps": 15170, "total_steps": 78105, "loss": 0.5269, "lr": 4.866039859797563e-06, "epoch": 0.9711286089238845, "percentage": 19.42, "elapsed_time": "0:39:22", "remaining_time": "2:43:20", "throughput": 20213.96, "total_tokens": 47750464} +{"current_steps": 15175, "total_steps": 78105, "loss": 0.8597, "lr": 4.865859383812162e-06, "epoch": 0.9714486908648614, "percentage": 19.43, "elapsed_time": "0:39:22", "remaining_time": "2:43:18", "throughput": 20214.76, "total_tokens": 47765824} +{"current_steps": 15180, "total_steps": 78105, "loss": 0.434, "lr": 4.865678789688067e-06, "epoch": 0.9717687728058383, "percentage": 19.44, "elapsed_time": "0:39:23", "remaining_time": "2:43:17", "throughput": 20215.62, "total_tokens": 47781760} +{"current_steps": 15185, "total_steps": 78105, "loss": 0.4185, "lr": 4.865498077434295e-06, "epoch": 0.9720888547468152, "percentage": 19.44, "elapsed_time": "0:39:24", "remaining_time": "2:43:16", "throughput": 20216.43, "total_tokens": 47797312} +{"current_steps": 15190, "total_steps": 78105, "loss": 0.4009, "lr": 4.865317247059871e-06, "epoch": 0.972408936687792, "percentage": 19.45, "elapsed_time": "0:39:24", "remaining_time": "2:43:15", "throughput": 20217.43, "total_tokens": 47813440} +{"current_steps": 15195, "total_steps": 78105, "loss": 0.4948, "lr": 4.865136298573824e-06, "epoch": 0.9727290186287689, "percentage": 19.45, "elapsed_time": "0:39:25", "remaining_time": "2:43:14", "throughput": 20218.33, "total_tokens": 47829440} +{"current_steps": 15200, "total_steps": 78105, "loss": 0.5519, "lr": 4.86495523198519e-06, "epoch": 0.9730491005697458, "percentage": 19.46, "elapsed_time": "0:39:26", "remaining_time": "2:43:12", "throughput": 20219.05, "total_tokens": 47844608} +{"current_steps": 15205, "total_steps": 78105, "loss": 0.465, "lr": 4.86477404730301e-06, "epoch": 0.9733691825107228, "percentage": 19.47, "elapsed_time": "0:39:26", "remaining_time": "2:43:11", "throughput": 20219.88, "total_tokens": 47860160} +{"current_steps": 15210, "total_steps": 78105, "loss": 0.4305, "lr": 4.864592744536332e-06, "epoch": 0.9736892644516997, "percentage": 19.47, "elapsed_time": "0:39:27", "remaining_time": "2:43:10", "throughput": 20220.68, "total_tokens": 47875648} +{"current_steps": 15215, "total_steps": 78105, "loss": 0.4308, "lr": 4.864411323694208e-06, "epoch": 0.9740093463926766, "percentage": 19.48, "elapsed_time": "0:39:28", "remaining_time": "2:43:09", "throughput": 20221.74, "total_tokens": 47892736} +{"current_steps": 15220, "total_steps": 78105, "loss": 0.5133, "lr": 4.8642297847857e-06, "epoch": 0.9743294283336534, "percentage": 19.49, "elapsed_time": "0:39:29", "remaining_time": "2:43:08", "throughput": 20222.57, "total_tokens": 47908992} +{"current_steps": 15225, "total_steps": 78105, "loss": 0.5394, "lr": 4.8640481278198704e-06, "epoch": 0.9746495102746303, "percentage": 19.49, "elapsed_time": "0:39:29", "remaining_time": "2:43:07", "throughput": 20223.42, "total_tokens": 47924736} +{"current_steps": 15230, "total_steps": 78105, "loss": 0.5153, "lr": 4.863866352805791e-06, "epoch": 0.9749695922156072, "percentage": 19.5, "elapsed_time": "0:39:30", "remaining_time": "2:43:06", "throughput": 20224.31, "total_tokens": 47941056} +{"current_steps": 15235, "total_steps": 78105, "loss": 0.476, "lr": 4.863684459752539e-06, "epoch": 0.9752896741565841, "percentage": 19.51, "elapsed_time": "0:39:31", "remaining_time": "2:43:05", "throughput": 20225.28, "total_tokens": 47957824} +{"current_steps": 15240, "total_steps": 78105, "loss": 0.7246, "lr": 4.863502448669197e-06, "epoch": 0.975609756097561, "percentage": 19.51, "elapsed_time": "0:39:31", "remaining_time": "2:43:03", "throughput": 20225.98, "total_tokens": 47973056} +{"current_steps": 15245, "total_steps": 78105, "loss": 0.5526, "lr": 4.863320319564854e-06, "epoch": 0.9759298380385378, "percentage": 19.52, "elapsed_time": "0:39:32", "remaining_time": "2:43:02", "throughput": 20226.87, "total_tokens": 47988928} +{"current_steps": 15250, "total_steps": 78105, "loss": 0.5295, "lr": 4.863138072448604e-06, "epoch": 0.9762499199795147, "percentage": 19.52, "elapsed_time": "0:39:33", "remaining_time": "2:43:01", "throughput": 20227.58, "total_tokens": 48004032} +{"current_steps": 15255, "total_steps": 78105, "loss": 0.5599, "lr": 4.8629557073295486e-06, "epoch": 0.9765700019204916, "percentage": 19.53, "elapsed_time": "0:39:33", "remaining_time": "2:43:00", "throughput": 20228.54, "total_tokens": 48020800} +{"current_steps": 15260, "total_steps": 78105, "loss": 0.6066, "lr": 4.862773224216793e-06, "epoch": 0.9768900838614686, "percentage": 19.54, "elapsed_time": "0:39:34", "remaining_time": "2:42:59", "throughput": 20229.49, "total_tokens": 48036800} +{"current_steps": 15265, "total_steps": 78105, "loss": 0.3402, "lr": 4.86259062311945e-06, "epoch": 0.9772101658024455, "percentage": 19.54, "elapsed_time": "0:39:35", "remaining_time": "2:42:57", "throughput": 20230.13, "total_tokens": 48051264} +{"current_steps": 15270, "total_steps": 78105, "loss": 0.3295, "lr": 4.862407904046637e-06, "epoch": 0.9775302477434223, "percentage": 19.55, "elapsed_time": "0:39:35", "remaining_time": "2:42:56", "throughput": 20230.87, "total_tokens": 48066176} +{"current_steps": 15275, "total_steps": 78105, "loss": 0.3461, "lr": 4.862225067007479e-06, "epoch": 0.9778503296843992, "percentage": 19.56, "elapsed_time": "0:39:36", "remaining_time": "2:42:55", "throughput": 20231.73, "total_tokens": 48081984} +{"current_steps": 15280, "total_steps": 78105, "loss": 0.569, "lr": 4.862042112011105e-06, "epoch": 0.9781704116253761, "percentage": 19.56, "elapsed_time": "0:39:37", "remaining_time": "2:42:54", "throughput": 20232.42, "total_tokens": 48096896} +{"current_steps": 15285, "total_steps": 78105, "loss": 0.396, "lr": 4.861859039066652e-06, "epoch": 0.978490493566353, "percentage": 19.57, "elapsed_time": "0:39:37", "remaining_time": "2:42:52", "throughput": 20233.19, "total_tokens": 48112128} +{"current_steps": 15290, "total_steps": 78105, "loss": 0.449, "lr": 4.861675848183261e-06, "epoch": 0.9788105755073299, "percentage": 19.58, "elapsed_time": "0:39:38", "remaining_time": "2:42:51", "throughput": 20234.03, "total_tokens": 48127616} +{"current_steps": 15295, "total_steps": 78105, "loss": 0.5732, "lr": 4.861492539370079e-06, "epoch": 0.9791306574483067, "percentage": 19.58, "elapsed_time": "0:39:39", "remaining_time": "2:42:50", "throughput": 20234.7, "total_tokens": 48142144} +{"current_steps": 15300, "total_steps": 78105, "loss": 0.4656, "lr": 4.861309112636261e-06, "epoch": 0.9794507393892836, "percentage": 19.59, "elapsed_time": "0:39:39", "remaining_time": "2:42:49", "throughput": 20235.59, "total_tokens": 48158272} +{"current_steps": 15305, "total_steps": 78105, "loss": 0.5226, "lr": 4.861125567990965e-06, "epoch": 0.9797708213302605, "percentage": 19.6, "elapsed_time": "0:39:40", "remaining_time": "2:42:47", "throughput": 20236.29, "total_tokens": 48173120} +{"current_steps": 15310, "total_steps": 78105, "loss": 0.4565, "lr": 4.860941905443357e-06, "epoch": 0.9800909032712375, "percentage": 19.6, "elapsed_time": "0:39:41", "remaining_time": "2:42:46", "throughput": 20237.07, "total_tokens": 48188672} +{"current_steps": 15315, "total_steps": 78105, "loss": 0.4428, "lr": 4.860758125002608e-06, "epoch": 0.9804109852122144, "percentage": 19.61, "elapsed_time": "0:39:41", "remaining_time": "2:42:45", "throughput": 20237.9, "total_tokens": 48204480} +{"current_steps": 15320, "total_steps": 78105, "loss": 0.4556, "lr": 4.860574226677894e-06, "epoch": 0.9807310671531912, "percentage": 19.61, "elapsed_time": "0:39:42", "remaining_time": "2:42:44", "throughput": 20238.67, "total_tokens": 48219584} +{"current_steps": 15325, "total_steps": 78105, "loss": 0.39, "lr": 4.860390210478401e-06, "epoch": 0.9810511490941681, "percentage": 19.62, "elapsed_time": "0:39:43", "remaining_time": "2:42:43", "throughput": 20239.65, "total_tokens": 48235904} +{"current_steps": 15330, "total_steps": 78105, "loss": 0.4555, "lr": 4.860206076413314e-06, "epoch": 0.981371231035145, "percentage": 19.63, "elapsed_time": "0:39:43", "remaining_time": "2:42:42", "throughput": 20240.76, "total_tokens": 48252992} +{"current_steps": 15335, "total_steps": 78105, "loss": 0.47, "lr": 4.860021824491829e-06, "epoch": 0.9816913129761219, "percentage": 19.63, "elapsed_time": "0:39:44", "remaining_time": "2:42:40", "throughput": 20241.44, "total_tokens": 48267840} +{"current_steps": 15340, "total_steps": 78105, "loss": 0.6372, "lr": 4.859837454723149e-06, "epoch": 0.9820113949170988, "percentage": 19.64, "elapsed_time": "0:39:45", "remaining_time": "2:42:39", "throughput": 20242.14, "total_tokens": 48282688} +{"current_steps": 15345, "total_steps": 78105, "loss": 0.6663, "lr": 4.859652967116477e-06, "epoch": 0.9823314768580756, "percentage": 19.65, "elapsed_time": "0:39:45", "remaining_time": "2:42:38", "throughput": 20243.31, "total_tokens": 48300096} +{"current_steps": 15350, "total_steps": 78105, "loss": 0.5205, "lr": 4.859468361681027e-06, "epoch": 0.9826515587990525, "percentage": 19.65, "elapsed_time": "0:39:46", "remaining_time": "2:42:37", "throughput": 20244.05, "total_tokens": 48315136} +{"current_steps": 15355, "total_steps": 78105, "loss": 0.4813, "lr": 4.859283638426017e-06, "epoch": 0.9829716407400294, "percentage": 19.66, "elapsed_time": "0:39:47", "remaining_time": "2:42:35", "throughput": 20244.81, "total_tokens": 48330240} +{"current_steps": 15360, "total_steps": 78105, "loss": 0.3946, "lr": 4.859098797360672e-06, "epoch": 0.9832917226810063, "percentage": 19.67, "elapsed_time": "0:39:47", "remaining_time": "2:42:34", "throughput": 20245.45, "total_tokens": 48345280} +{"current_steps": 15365, "total_steps": 78105, "loss": 0.4358, "lr": 4.858913838494221e-06, "epoch": 0.9836118046219833, "percentage": 19.67, "elapsed_time": "0:39:48", "remaining_time": "2:42:33", "throughput": 20246.39, "total_tokens": 48361792} +{"current_steps": 15370, "total_steps": 78105, "loss": 0.4608, "lr": 4.858728761835898e-06, "epoch": 0.9839318865629602, "percentage": 19.68, "elapsed_time": "0:39:49", "remaining_time": "2:42:32", "throughput": 20247.13, "total_tokens": 48377408} +{"current_steps": 15375, "total_steps": 78105, "loss": 0.4886, "lr": 4.858543567394949e-06, "epoch": 0.984251968503937, "percentage": 19.69, "elapsed_time": "0:39:50", "remaining_time": "2:42:31", "throughput": 20247.92, "total_tokens": 48392896} +{"current_steps": 15380, "total_steps": 78105, "loss": 0.7478, "lr": 4.8583582551806186e-06, "epoch": 0.9845720504449139, "percentage": 19.69, "elapsed_time": "0:39:50", "remaining_time": "2:42:29", "throughput": 20248.54, "total_tokens": 48407552} +{"current_steps": 15385, "total_steps": 78105, "loss": 0.3833, "lr": 4.858172825202161e-06, "epoch": 0.9848921323858908, "percentage": 19.7, "elapsed_time": "0:39:51", "remaining_time": "2:42:28", "throughput": 20249.53, "total_tokens": 48424320} +{"current_steps": 15390, "total_steps": 78105, "loss": 0.6789, "lr": 4.857987277468836e-06, "epoch": 0.9852122143268677, "percentage": 19.7, "elapsed_time": "0:39:52", "remaining_time": "2:42:28", "throughput": 20250.64, "total_tokens": 48441984} +{"current_steps": 15395, "total_steps": 78105, "loss": 0.5358, "lr": 4.857801611989909e-06, "epoch": 0.9855322962678446, "percentage": 19.71, "elapsed_time": "0:39:52", "remaining_time": "2:42:26", "throughput": 20251.31, "total_tokens": 48456832} +{"current_steps": 15400, "total_steps": 78105, "loss": 0.399, "lr": 4.857615828774651e-06, "epoch": 0.9858523782088214, "percentage": 19.72, "elapsed_time": "0:39:53", "remaining_time": "2:42:25", "throughput": 20252.05, "total_tokens": 48472512} +{"current_steps": 15405, "total_steps": 78105, "loss": 0.3159, "lr": 4.857429927832339e-06, "epoch": 0.9861724601497983, "percentage": 19.72, "elapsed_time": "0:39:54", "remaining_time": "2:42:24", "throughput": 20252.91, "total_tokens": 48488832} +{"current_steps": 15410, "total_steps": 78105, "loss": 0.524, "lr": 4.857243909172255e-06, "epoch": 0.9864925420907752, "percentage": 19.73, "elapsed_time": "0:39:54", "remaining_time": "2:42:23", "throughput": 20253.62, "total_tokens": 48503936} +{"current_steps": 15415, "total_steps": 78105, "loss": 0.5741, "lr": 4.857057772803688e-06, "epoch": 0.9868126240317522, "percentage": 19.74, "elapsed_time": "0:39:55", "remaining_time": "2:42:21", "throughput": 20254.39, "total_tokens": 48519040} +{"current_steps": 15420, "total_steps": 78105, "loss": 0.6914, "lr": 4.856871518735935e-06, "epoch": 0.9871327059727291, "percentage": 19.74, "elapsed_time": "0:39:56", "remaining_time": "2:42:20", "throughput": 20255.31, "total_tokens": 48535040} +{"current_steps": 15425, "total_steps": 78105, "loss": 0.425, "lr": 4.856685146978294e-06, "epoch": 0.9874527879137059, "percentage": 19.75, "elapsed_time": "0:39:56", "remaining_time": "2:42:19", "throughput": 20256.3, "total_tokens": 48551808} +{"current_steps": 15430, "total_steps": 78105, "loss": 0.5954, "lr": 4.856498657540072e-06, "epoch": 0.9877728698546828, "percentage": 19.76, "elapsed_time": "0:39:57", "remaining_time": "2:42:18", "throughput": 20257.03, "total_tokens": 48566592} +{"current_steps": 15435, "total_steps": 78105, "loss": 0.3705, "lr": 4.856312050430582e-06, "epoch": 0.9880929517956597, "percentage": 19.76, "elapsed_time": "0:39:58", "remaining_time": "2:42:17", "throughput": 20257.95, "total_tokens": 48582720} +{"current_steps": 15440, "total_steps": 78105, "loss": 0.5104, "lr": 4.856125325659143e-06, "epoch": 0.9884130337366366, "percentage": 19.77, "elapsed_time": "0:39:58", "remaining_time": "2:42:16", "throughput": 20258.88, "total_tokens": 48599104} +{"current_steps": 15445, "total_steps": 78105, "loss": 0.4091, "lr": 4.855938483235076e-06, "epoch": 0.9887331156776135, "percentage": 19.77, "elapsed_time": "0:39:59", "remaining_time": "2:42:15", "throughput": 20259.77, "total_tokens": 48615040} +{"current_steps": 15450, "total_steps": 78105, "loss": 0.4871, "lr": 4.855751523167713e-06, "epoch": 0.9890531976185903, "percentage": 19.78, "elapsed_time": "0:40:00", "remaining_time": "2:42:13", "throughput": 20260.64, "total_tokens": 48630976} +{"current_steps": 15455, "total_steps": 78105, "loss": 0.4397, "lr": 4.85556444546639e-06, "epoch": 0.9893732795595672, "percentage": 19.79, "elapsed_time": "0:40:00", "remaining_time": "2:42:12", "throughput": 20261.44, "total_tokens": 48646080} +{"current_steps": 15460, "total_steps": 78105, "loss": 0.4126, "lr": 4.855377250140449e-06, "epoch": 0.9896933615005441, "percentage": 19.79, "elapsed_time": "0:40:01", "remaining_time": "2:42:11", "throughput": 20262.37, "total_tokens": 48662528} +{"current_steps": 15465, "total_steps": 78105, "loss": 0.4326, "lr": 4.855189937199235e-06, "epoch": 0.990013443441521, "percentage": 19.8, "elapsed_time": "0:40:02", "remaining_time": "2:42:10", "throughput": 20263.47, "total_tokens": 48680000} +{"current_steps": 15470, "total_steps": 78105, "loss": 0.4272, "lr": 4.855002506652104e-06, "epoch": 0.990333525382498, "percentage": 19.81, "elapsed_time": "0:40:03", "remaining_time": "2:42:09", "throughput": 20264.39, "total_tokens": 48696256} +{"current_steps": 15475, "total_steps": 78105, "loss": 0.3104, "lr": 4.854814958508416e-06, "epoch": 0.9906536073234748, "percentage": 19.81, "elapsed_time": "0:40:03", "remaining_time": "2:42:08", "throughput": 20265.09, "total_tokens": 48711168} +{"current_steps": 15480, "total_steps": 78105, "loss": 0.8846, "lr": 4.854627292777533e-06, "epoch": 0.9909736892644517, "percentage": 19.82, "elapsed_time": "0:40:04", "remaining_time": "2:42:07", "throughput": 20265.91, "total_tokens": 48726848} +{"current_steps": 15485, "total_steps": 78105, "loss": 0.5502, "lr": 4.854439509468829e-06, "epoch": 0.9912937712054286, "percentage": 19.83, "elapsed_time": "0:40:05", "remaining_time": "2:42:05", "throughput": 20266.84, "total_tokens": 48743232} +{"current_steps": 15490, "total_steps": 78105, "loss": 0.5215, "lr": 4.854251608591679e-06, "epoch": 0.9916138531464055, "percentage": 19.83, "elapsed_time": "0:40:05", "remaining_time": "2:42:04", "throughput": 20267.52, "total_tokens": 48758080} +{"current_steps": 15495, "total_steps": 78105, "loss": 0.4002, "lr": 4.854063590155467e-06, "epoch": 0.9919339350873824, "percentage": 19.84, "elapsed_time": "0:40:06", "remaining_time": "2:42:03", "throughput": 20268.22, "total_tokens": 48773120} +{"current_steps": 15500, "total_steps": 78105, "loss": 0.5051, "lr": 4.8538754541695806e-06, "epoch": 0.9922540170283592, "percentage": 19.85, "elapsed_time": "0:40:07", "remaining_time": "2:42:02", "throughput": 20269.03, "total_tokens": 48789056} +{"current_steps": 15505, "total_steps": 78105, "loss": 0.5882, "lr": 4.8536872006434155e-06, "epoch": 0.9925740989693361, "percentage": 19.85, "elapsed_time": "0:40:07", "remaining_time": "2:42:01", "throughput": 20269.72, "total_tokens": 48804288} +{"current_steps": 15510, "total_steps": 78105, "loss": 0.4437, "lr": 4.853498829586371e-06, "epoch": 0.992894180910313, "percentage": 19.86, "elapsed_time": "0:40:08", "remaining_time": "2:41:59", "throughput": 20270.33, "total_tokens": 48818816} +{"current_steps": 15515, "total_steps": 78105, "loss": 0.5251, "lr": 4.8533103410078544e-06, "epoch": 0.9932142628512899, "percentage": 19.86, "elapsed_time": "0:40:09", "remaining_time": "2:41:58", "throughput": 20271.31, "total_tokens": 48835520} +{"current_steps": 15520, "total_steps": 78105, "loss": 0.3338, "lr": 4.853121734917276e-06, "epoch": 0.9935343447922668, "percentage": 19.87, "elapsed_time": "0:40:09", "remaining_time": "2:41:57", "throughput": 20272.17, "total_tokens": 48851328} +{"current_steps": 15525, "total_steps": 78105, "loss": 0.4314, "lr": 4.852933011324057e-06, "epoch": 0.9938544267332438, "percentage": 19.88, "elapsed_time": "0:40:10", "remaining_time": "2:41:56", "throughput": 20272.87, "total_tokens": 48866304} +{"current_steps": 15530, "total_steps": 78105, "loss": 0.4162, "lr": 4.852744170237619e-06, "epoch": 0.9941745086742206, "percentage": 19.88, "elapsed_time": "0:40:11", "remaining_time": "2:41:55", "throughput": 20273.82, "total_tokens": 48882752} +{"current_steps": 15535, "total_steps": 78105, "loss": 0.5135, "lr": 4.852555211667391e-06, "epoch": 0.9944945906151975, "percentage": 19.89, "elapsed_time": "0:40:11", "remaining_time": "2:41:53", "throughput": 20274.6, "total_tokens": 48898304} +{"current_steps": 15540, "total_steps": 78105, "loss": 0.6086, "lr": 4.852366135622811e-06, "epoch": 0.9948146725561744, "percentage": 19.9, "elapsed_time": "0:40:12", "remaining_time": "2:41:52", "throughput": 20275.45, "total_tokens": 48914048} +{"current_steps": 15545, "total_steps": 78105, "loss": 0.4028, "lr": 4.852176942113318e-06, "epoch": 0.9951347544971513, "percentage": 19.9, "elapsed_time": "0:40:13", "remaining_time": "2:41:51", "throughput": 20276.3, "total_tokens": 48930176} +{"current_steps": 15550, "total_steps": 78105, "loss": 0.5957, "lr": 4.851987631148361e-06, "epoch": 0.9954548364381282, "percentage": 19.91, "elapsed_time": "0:40:13", "remaining_time": "2:41:50", "throughput": 20277.09, "total_tokens": 48945920} +{"current_steps": 15555, "total_steps": 78105, "loss": 0.5758, "lr": 4.851798202737393e-06, "epoch": 0.995774918379105, "percentage": 19.92, "elapsed_time": "0:40:14", "remaining_time": "2:41:49", "throughput": 20277.83, "total_tokens": 48960832} +{"current_steps": 15560, "total_steps": 78105, "loss": 0.3928, "lr": 4.851608656889874e-06, "epoch": 0.9960950003200819, "percentage": 19.92, "elapsed_time": "0:40:15", "remaining_time": "2:41:48", "throughput": 20278.7, "total_tokens": 48977280} +{"current_steps": 15565, "total_steps": 78105, "loss": 0.3878, "lr": 4.851418993615266e-06, "epoch": 0.9964150822610588, "percentage": 19.93, "elapsed_time": "0:40:15", "remaining_time": "2:41:46", "throughput": 20279.35, "total_tokens": 48992512} +{"current_steps": 15570, "total_steps": 78105, "loss": 0.4631, "lr": 4.851229212923042e-06, "epoch": 0.9967351642020357, "percentage": 19.93, "elapsed_time": "0:40:16", "remaining_time": "2:41:45", "throughput": 20280.16, "total_tokens": 49008128} +{"current_steps": 15575, "total_steps": 78105, "loss": 0.4514, "lr": 4.851039314822678e-06, "epoch": 0.9970552461430127, "percentage": 19.94, "elapsed_time": "0:40:17", "remaining_time": "2:41:44", "throughput": 20281.09, "total_tokens": 49024512} +{"current_steps": 15580, "total_steps": 78105, "loss": 0.3798, "lr": 4.850849299323657e-06, "epoch": 0.9973753280839895, "percentage": 19.95, "elapsed_time": "0:40:17", "remaining_time": "2:41:43", "throughput": 20281.87, "total_tokens": 49040128} +{"current_steps": 15585, "total_steps": 78105, "loss": 0.4323, "lr": 4.8506591664354676e-06, "epoch": 0.9976954100249664, "percentage": 19.95, "elapsed_time": "0:40:18", "remaining_time": "2:41:42", "throughput": 20282.63, "total_tokens": 49055360} +{"current_steps": 15590, "total_steps": 78105, "loss": 0.4633, "lr": 4.850468916167603e-06, "epoch": 0.9980154919659433, "percentage": 19.96, "elapsed_time": "0:40:19", "remaining_time": "2:41:41", "throughput": 20283.48, "total_tokens": 49071360} +{"current_steps": 15595, "total_steps": 78105, "loss": 0.3504, "lr": 4.850278548529563e-06, "epoch": 0.9983355739069202, "percentage": 19.97, "elapsed_time": "0:40:19", "remaining_time": "2:41:39", "throughput": 20284.14, "total_tokens": 49086144} +{"current_steps": 15600, "total_steps": 78105, "loss": 0.5339, "lr": 4.850088063530856e-06, "epoch": 0.9986556558478971, "percentage": 19.97, "elapsed_time": "0:40:20", "remaining_time": "2:41:38", "throughput": 20284.87, "total_tokens": 49101312} +{"current_steps": 15605, "total_steps": 78105, "loss": 0.3432, "lr": 4.849897461180991e-06, "epoch": 0.9989757377888739, "percentage": 19.98, "elapsed_time": "0:40:21", "remaining_time": "2:41:37", "throughput": 20285.61, "total_tokens": 49116672} +{"current_steps": 15610, "total_steps": 78105, "loss": 0.3068, "lr": 4.849706741489489e-06, "epoch": 0.9992958197298508, "percentage": 19.99, "elapsed_time": "0:40:21", "remaining_time": "2:41:36", "throughput": 20286.45, "total_tokens": 49132288} +{"current_steps": 15615, "total_steps": 78105, "loss": 0.5035, "lr": 4.84951590446587e-06, "epoch": 0.9996159016708277, "percentage": 19.99, "elapsed_time": "0:40:22", "remaining_time": "2:41:35", "throughput": 20287.21, "total_tokens": 49148096} +{"current_steps": 15620, "total_steps": 78105, "loss": 0.343, "lr": 4.849324950119665e-06, "epoch": 0.9999359836118046, "percentage": 20.0, "elapsed_time": "0:40:23", "remaining_time": "2:41:33", "throughput": 20288.09, "total_tokens": 49163840} +{"current_steps": 15624, "total_steps": 78105, "eval_loss": 0.4853726923465729, "epoch": 1.0001920491645861, "percentage": 20.0, "elapsed_time": "0:41:14", "remaining_time": "2:44:57", "throughput": 19870.23, "total_tokens": 49176512} +{"current_steps": 15625, "total_steps": 78105, "loss": 0.3136, "lr": 4.849133878460409e-06, "epoch": 1.0002560655527815, "percentage": 20.01, "elapsed_time": "0:41:50", "remaining_time": "2:47:18", "throughput": 19591.19, "total_tokens": 49180096} +{"current_steps": 15630, "total_steps": 78105, "loss": 0.267, "lr": 4.848942689497643e-06, "epoch": 1.0005761474937584, "percentage": 20.01, "elapsed_time": "0:41:50", "remaining_time": "2:47:16", "throughput": 19592.11, "total_tokens": 49195648} +{"current_steps": 15635, "total_steps": 78105, "loss": 0.4077, "lr": 4.848751383240915e-06, "epoch": 1.0008962294347352, "percentage": 20.02, "elapsed_time": "0:41:51", "remaining_time": "2:47:15", "throughput": 19593.27, "total_tokens": 49212544} +{"current_steps": 15640, "total_steps": 78105, "loss": 0.3645, "lr": 4.848559959699778e-06, "epoch": 1.0012163113757122, "percentage": 20.02, "elapsed_time": "0:41:52", "remaining_time": "2:47:14", "throughput": 19594.29, "total_tokens": 49228672} +{"current_steps": 15645, "total_steps": 78105, "loss": 0.3051, "lr": 4.848368418883788e-06, "epoch": 1.001536393316689, "percentage": 20.03, "elapsed_time": "0:41:53", "remaining_time": "2:47:12", "throughput": 19595.01, "total_tokens": 49243072} +{"current_steps": 15650, "total_steps": 78105, "loss": 0.3817, "lr": 4.848176760802512e-06, "epoch": 1.001856475257666, "percentage": 20.04, "elapsed_time": "0:41:53", "remaining_time": "2:47:11", "throughput": 19596.08, "total_tokens": 49259520} +{"current_steps": 15655, "total_steps": 78105, "loss": 0.2813, "lr": 4.84798498546552e-06, "epoch": 1.002176557198643, "percentage": 20.04, "elapsed_time": "0:41:54", "remaining_time": "2:47:10", "throughput": 19596.96, "total_tokens": 49274752} +{"current_steps": 15660, "total_steps": 78105, "loss": 0.3692, "lr": 4.847793092882388e-06, "epoch": 1.0024966391396197, "percentage": 20.05, "elapsed_time": "0:41:55", "remaining_time": "2:47:08", "throughput": 19597.8, "total_tokens": 49289728} +{"current_steps": 15665, "total_steps": 78105, "loss": 0.4069, "lr": 4.847601083062699e-06, "epoch": 1.0028167210805967, "percentage": 20.06, "elapsed_time": "0:41:55", "remaining_time": "2:47:07", "throughput": 19598.72, "total_tokens": 49304960} +{"current_steps": 15670, "total_steps": 78105, "loss": 0.4501, "lr": 4.847408956016039e-06, "epoch": 1.0031368030215735, "percentage": 20.06, "elapsed_time": "0:41:56", "remaining_time": "2:47:06", "throughput": 19599.81, "total_tokens": 49321152} +{"current_steps": 15675, "total_steps": 78105, "loss": 0.2486, "lr": 4.847216711752003e-06, "epoch": 1.0034568849625505, "percentage": 20.07, "elapsed_time": "0:41:57", "remaining_time": "2:47:05", "throughput": 19600.92, "total_tokens": 49337984} +{"current_steps": 15680, "total_steps": 78105, "loss": 0.2594, "lr": 4.847024350280191e-06, "epoch": 1.0037769669035272, "percentage": 20.08, "elapsed_time": "0:41:57", "remaining_time": "2:47:03", "throughput": 19601.73, "total_tokens": 49352768} +{"current_steps": 15685, "total_steps": 78105, "loss": 0.2567, "lr": 4.846831871610207e-06, "epoch": 1.0040970488445042, "percentage": 20.08, "elapsed_time": "0:41:58", "remaining_time": "2:47:02", "throughput": 19602.79, "total_tokens": 49368832} +{"current_steps": 15690, "total_steps": 78105, "loss": 0.3306, "lr": 4.846639275751664e-06, "epoch": 1.004417130785481, "percentage": 20.09, "elapsed_time": "0:41:59", "remaining_time": "2:47:01", "throughput": 19603.77, "total_tokens": 49384640} +{"current_steps": 15695, "total_steps": 78105, "loss": 0.3599, "lr": 4.84644656271418e-06, "epoch": 1.004737212726458, "percentage": 20.09, "elapsed_time": "0:41:59", "remaining_time": "2:46:59", "throughput": 19604.68, "total_tokens": 49399936} +{"current_steps": 15700, "total_steps": 78105, "loss": 0.2855, "lr": 4.846253732507376e-06, "epoch": 1.0050572946674348, "percentage": 20.1, "elapsed_time": "0:42:00", "remaining_time": "2:46:58", "throughput": 19605.61, "total_tokens": 49415680} +{"current_steps": 15705, "total_steps": 78105, "loss": 0.3478, "lr": 4.846060785140882e-06, "epoch": 1.0053773766084118, "percentage": 20.11, "elapsed_time": "0:42:01", "remaining_time": "2:46:57", "throughput": 19606.44, "total_tokens": 49430592} +{"current_steps": 15710, "total_steps": 78105, "loss": 0.3396, "lr": 4.845867720624332e-06, "epoch": 1.0056974585493887, "percentage": 20.11, "elapsed_time": "0:42:01", "remaining_time": "2:46:55", "throughput": 19607.28, "total_tokens": 49445312} +{"current_steps": 15715, "total_steps": 78105, "loss": 0.3172, "lr": 4.845674538967367e-06, "epoch": 1.0060175404903655, "percentage": 20.12, "elapsed_time": "0:42:02", "remaining_time": "2:46:54", "throughput": 19608.16, "total_tokens": 49460352} +{"current_steps": 15720, "total_steps": 78105, "loss": 0.2856, "lr": 4.8454812401796355e-06, "epoch": 1.0063376224313425, "percentage": 20.13, "elapsed_time": "0:42:03", "remaining_time": "2:46:53", "throughput": 19609.17, "total_tokens": 49476352} +{"current_steps": 15725, "total_steps": 78105, "loss": 0.2585, "lr": 4.845287824270787e-06, "epoch": 1.0066577043723193, "percentage": 20.13, "elapsed_time": "0:42:03", "remaining_time": "2:46:51", "throughput": 19610.13, "total_tokens": 49492096} +{"current_steps": 15730, "total_steps": 78105, "loss": 0.2834, "lr": 4.8450942912504805e-06, "epoch": 1.0069777863132963, "percentage": 20.14, "elapsed_time": "0:42:04", "remaining_time": "2:46:50", "throughput": 19610.97, "total_tokens": 49506944} +{"current_steps": 15735, "total_steps": 78105, "loss": 0.3926, "lr": 4.844900641128381e-06, "epoch": 1.007297868254273, "percentage": 20.15, "elapsed_time": "0:42:05", "remaining_time": "2:46:49", "throughput": 19612.14, "total_tokens": 49523904} +{"current_steps": 15740, "total_steps": 78105, "loss": 0.4609, "lr": 4.844706873914156e-06, "epoch": 1.00761795019525, "percentage": 20.15, "elapsed_time": "0:42:05", "remaining_time": "2:46:47", "throughput": 19612.92, "total_tokens": 49538624} +{"current_steps": 15745, "total_steps": 78105, "loss": 0.4485, "lr": 4.844512989617484e-06, "epoch": 1.0079380321362268, "percentage": 20.16, "elapsed_time": "0:42:06", "remaining_time": "2:46:46", "throughput": 19613.93, "total_tokens": 49554816} +{"current_steps": 15750, "total_steps": 78105, "loss": 0.2031, "lr": 4.844318988248045e-06, "epoch": 1.0082581140772038, "percentage": 20.17, "elapsed_time": "0:42:07", "remaining_time": "2:46:45", "throughput": 19614.92, "total_tokens": 49570496} +{"current_steps": 15755, "total_steps": 78105, "loss": 0.5065, "lr": 4.844124869815528e-06, "epoch": 1.0085781960181806, "percentage": 20.17, "elapsed_time": "0:42:07", "remaining_time": "2:46:43", "throughput": 19615.94, "total_tokens": 49586432} +{"current_steps": 15760, "total_steps": 78105, "loss": 0.3074, "lr": 4.843930634329623e-06, "epoch": 1.0088982779591575, "percentage": 20.18, "elapsed_time": "0:42:08", "remaining_time": "2:46:42", "throughput": 19617.07, "total_tokens": 49603264} +{"current_steps": 15765, "total_steps": 78105, "loss": 0.2569, "lr": 4.8437362818000325e-06, "epoch": 1.0092183599001345, "percentage": 20.18, "elapsed_time": "0:42:09", "remaining_time": "2:46:41", "throughput": 19617.96, "total_tokens": 49619008} +{"current_steps": 15770, "total_steps": 78105, "loss": 0.3842, "lr": 4.843541812236461e-06, "epoch": 1.0095384418411113, "percentage": 20.19, "elapsed_time": "0:42:09", "remaining_time": "2:46:40", "throughput": 19619.05, "total_tokens": 49635456} +{"current_steps": 15775, "total_steps": 78105, "loss": 0.379, "lr": 4.843347225648617e-06, "epoch": 1.0098585237820883, "percentage": 20.2, "elapsed_time": "0:42:10", "remaining_time": "2:46:39", "throughput": 19620.11, "total_tokens": 49651776} +{"current_steps": 15780, "total_steps": 78105, "loss": 0.3251, "lr": 4.843152522046219e-06, "epoch": 1.010178605723065, "percentage": 20.2, "elapsed_time": "0:42:11", "remaining_time": "2:46:37", "throughput": 19621.07, "total_tokens": 49667264} +{"current_steps": 15785, "total_steps": 78105, "loss": 0.2912, "lr": 4.84295770143899e-06, "epoch": 1.010498687664042, "percentage": 20.21, "elapsed_time": "0:42:12", "remaining_time": "2:46:36", "throughput": 19622.04, "total_tokens": 49683392} +{"current_steps": 15790, "total_steps": 78105, "loss": 0.2347, "lr": 4.842762763836656e-06, "epoch": 1.0108187696050188, "percentage": 20.22, "elapsed_time": "0:42:12", "remaining_time": "2:46:35", "throughput": 19622.99, "total_tokens": 49698560} +{"current_steps": 15795, "total_steps": 78105, "loss": 0.4577, "lr": 4.842567709248953e-06, "epoch": 1.0111388515459958, "percentage": 20.22, "elapsed_time": "0:42:13", "remaining_time": "2:46:33", "throughput": 19623.91, "total_tokens": 49713984} +{"current_steps": 15800, "total_steps": 78105, "loss": 0.2336, "lr": 4.842372537685621e-06, "epoch": 1.0114589334869726, "percentage": 20.23, "elapsed_time": "0:42:14", "remaining_time": "2:46:32", "throughput": 19624.89, "total_tokens": 49729920} +{"current_steps": 15805, "total_steps": 78105, "loss": 0.2783, "lr": 4.842177249156405e-06, "epoch": 1.0117790154279496, "percentage": 20.24, "elapsed_time": "0:42:14", "remaining_time": "2:46:31", "throughput": 19626.12, "total_tokens": 49747008} +{"current_steps": 15810, "total_steps": 78105, "loss": 0.3888, "lr": 4.8419818436710575e-06, "epoch": 1.0120990973689263, "percentage": 20.24, "elapsed_time": "0:42:15", "remaining_time": "2:46:30", "throughput": 19627.29, "total_tokens": 49763584} +{"current_steps": 15815, "total_steps": 78105, "loss": 0.4723, "lr": 4.841786321239336e-06, "epoch": 1.0124191793099033, "percentage": 20.25, "elapsed_time": "0:42:16", "remaining_time": "2:46:28", "throughput": 19628.12, "total_tokens": 49778368} +{"current_steps": 15820, "total_steps": 78105, "loss": 0.325, "lr": 4.841590681871002e-06, "epoch": 1.0127392612508803, "percentage": 20.25, "elapsed_time": "0:42:16", "remaining_time": "2:46:27", "throughput": 19629.12, "total_tokens": 49794368} +{"current_steps": 15825, "total_steps": 78105, "loss": 0.3768, "lr": 4.8413949255758274e-06, "epoch": 1.013059343191857, "percentage": 20.26, "elapsed_time": "0:42:17", "remaining_time": "2:46:26", "throughput": 19630.17, "total_tokens": 49810688} +{"current_steps": 15830, "total_steps": 78105, "loss": 0.4759, "lr": 4.841199052363586e-06, "epoch": 1.013379425132834, "percentage": 20.27, "elapsed_time": "0:42:18", "remaining_time": "2:46:24", "throughput": 19631.02, "total_tokens": 49825920} +{"current_steps": 15835, "total_steps": 78105, "loss": 0.5284, "lr": 4.841003062244059e-06, "epoch": 1.0136995070738108, "percentage": 20.27, "elapsed_time": "0:42:18", "remaining_time": "2:46:23", "throughput": 19632.02, "total_tokens": 49841856} +{"current_steps": 15840, "total_steps": 78105, "loss": 0.3258, "lr": 4.840806955227033e-06, "epoch": 1.0140195890147878, "percentage": 20.28, "elapsed_time": "0:42:19", "remaining_time": "2:46:22", "throughput": 19632.94, "total_tokens": 49857088} +{"current_steps": 15845, "total_steps": 78105, "loss": 0.3578, "lr": 4.8406107313223e-06, "epoch": 1.0143396709557646, "percentage": 20.29, "elapsed_time": "0:42:20", "remaining_time": "2:46:21", "throughput": 19633.87, "total_tokens": 49872832} +{"current_steps": 15850, "total_steps": 78105, "loss": 0.3216, "lr": 4.840414390539659e-06, "epoch": 1.0146597528967416, "percentage": 20.29, "elapsed_time": "0:42:20", "remaining_time": "2:46:19", "throughput": 19634.68, "total_tokens": 49887936} +{"current_steps": 15855, "total_steps": 78105, "loss": 0.2599, "lr": 4.840217932888915e-06, "epoch": 1.0149798348377184, "percentage": 20.3, "elapsed_time": "0:42:21", "remaining_time": "2:46:18", "throughput": 19635.44, "total_tokens": 49902656} +{"current_steps": 15860, "total_steps": 78105, "loss": 0.3158, "lr": 4.840021358379876e-06, "epoch": 1.0152999167786954, "percentage": 20.31, "elapsed_time": "0:42:22", "remaining_time": "2:46:17", "throughput": 19636.4, "total_tokens": 49918464} +{"current_steps": 15865, "total_steps": 78105, "loss": 0.2552, "lr": 4.83982466702236e-06, "epoch": 1.0156199987196723, "percentage": 20.31, "elapsed_time": "0:42:22", "remaining_time": "2:46:15", "throughput": 19637.23, "total_tokens": 49933696} +{"current_steps": 15870, "total_steps": 78105, "loss": 0.3292, "lr": 4.839627858826186e-06, "epoch": 1.0159400806606491, "percentage": 20.32, "elapsed_time": "0:42:23", "remaining_time": "2:46:14", "throughput": 19638.22, "total_tokens": 49949760} +{"current_steps": 15875, "total_steps": 78105, "loss": 0.3322, "lr": 4.839430933801185e-06, "epoch": 1.016260162601626, "percentage": 20.33, "elapsed_time": "0:42:24", "remaining_time": "2:46:13", "throughput": 19639.24, "total_tokens": 49966080} +{"current_steps": 15880, "total_steps": 78105, "loss": 0.2365, "lr": 4.839233891957188e-06, "epoch": 1.0165802445426029, "percentage": 20.33, "elapsed_time": "0:42:24", "remaining_time": "2:46:11", "throughput": 19640.0, "total_tokens": 49980928} +{"current_steps": 15885, "total_steps": 78105, "loss": 0.3634, "lr": 4.839036733304036e-06, "epoch": 1.0169003264835799, "percentage": 20.34, "elapsed_time": "0:42:25", "remaining_time": "2:46:10", "throughput": 19640.87, "total_tokens": 49996160} +{"current_steps": 15890, "total_steps": 78105, "loss": 0.2996, "lr": 4.838839457851573e-06, "epoch": 1.0172204084245566, "percentage": 20.34, "elapsed_time": "0:42:26", "remaining_time": "2:46:09", "throughput": 19641.66, "total_tokens": 50011008} +{"current_steps": 15895, "total_steps": 78105, "loss": 0.2945, "lr": 4.83864206560965e-06, "epoch": 1.0175404903655336, "percentage": 20.35, "elapsed_time": "0:42:26", "remaining_time": "2:46:07", "throughput": 19642.5, "total_tokens": 50026304} +{"current_steps": 15900, "total_steps": 78105, "loss": 0.2465, "lr": 4.838444556588123e-06, "epoch": 1.0178605723065104, "percentage": 20.36, "elapsed_time": "0:42:27", "remaining_time": "2:46:06", "throughput": 19643.46, "total_tokens": 50041856} +{"current_steps": 15905, "total_steps": 78105, "loss": 0.4178, "lr": 4.838246930796856e-06, "epoch": 1.0181806542474874, "percentage": 20.36, "elapsed_time": "0:42:28", "remaining_time": "2:46:05", "throughput": 19644.28, "total_tokens": 50057152} +{"current_steps": 15910, "total_steps": 78105, "loss": 0.2823, "lr": 4.838049188245717e-06, "epoch": 1.0185007361884642, "percentage": 20.37, "elapsed_time": "0:42:28", "remaining_time": "2:46:04", "throughput": 19645.64, "total_tokens": 50075392} +{"current_steps": 15915, "total_steps": 78105, "loss": 0.3396, "lr": 4.8378513289445785e-06, "epoch": 1.0188208181294411, "percentage": 20.38, "elapsed_time": "0:42:29", "remaining_time": "2:46:02", "throughput": 19646.53, "total_tokens": 50090816} +{"current_steps": 15920, "total_steps": 78105, "loss": 0.2388, "lr": 4.837653352903324e-06, "epoch": 1.0191409000704181, "percentage": 20.38, "elapsed_time": "0:42:30", "remaining_time": "2:46:01", "throughput": 19647.37, "total_tokens": 50106112} +{"current_steps": 15925, "total_steps": 78105, "loss": 0.4008, "lr": 4.837455260131836e-06, "epoch": 1.019460982011395, "percentage": 20.39, "elapsed_time": "0:42:30", "remaining_time": "2:46:00", "throughput": 19648.3, "total_tokens": 50121664} +{"current_steps": 15930, "total_steps": 78105, "loss": 0.3061, "lr": 4.837257050640009e-06, "epoch": 1.019781063952372, "percentage": 20.4, "elapsed_time": "0:42:31", "remaining_time": "2:45:59", "throughput": 19649.34, "total_tokens": 50137856} +{"current_steps": 15935, "total_steps": 78105, "loss": 0.295, "lr": 4.837058724437738e-06, "epoch": 1.0201011458933487, "percentage": 20.4, "elapsed_time": "0:42:32", "remaining_time": "2:45:57", "throughput": 19650.32, "total_tokens": 50153984} +{"current_steps": 15940, "total_steps": 78105, "loss": 0.3458, "lr": 4.836860281534928e-06, "epoch": 1.0204212278343257, "percentage": 20.41, "elapsed_time": "0:42:33", "remaining_time": "2:45:56", "throughput": 19651.28, "total_tokens": 50170112} +{"current_steps": 15945, "total_steps": 78105, "loss": 0.2094, "lr": 4.836661721941488e-06, "epoch": 1.0207413097753024, "percentage": 20.41, "elapsed_time": "0:42:33", "remaining_time": "2:45:55", "throughput": 19652.12, "total_tokens": 50185216} +{"current_steps": 15950, "total_steps": 78105, "loss": 0.3635, "lr": 4.836463045667333e-06, "epoch": 1.0210613917162794, "percentage": 20.42, "elapsed_time": "0:42:34", "remaining_time": "2:45:54", "throughput": 19653.39, "total_tokens": 50203200} +{"current_steps": 15955, "total_steps": 78105, "loss": 0.3291, "lr": 4.836264252722384e-06, "epoch": 1.0213814736572562, "percentage": 20.43, "elapsed_time": "0:42:35", "remaining_time": "2:45:52", "throughput": 19654.17, "total_tokens": 50218304} +{"current_steps": 15960, "total_steps": 78105, "loss": 0.3045, "lr": 4.8360653431165665e-06, "epoch": 1.0217015555982332, "percentage": 20.43, "elapsed_time": "0:42:35", "remaining_time": "2:45:51", "throughput": 19655.08, "total_tokens": 50234048} +{"current_steps": 15965, "total_steps": 78105, "loss": 0.289, "lr": 4.835866316859814e-06, "epoch": 1.02202163753921, "percentage": 20.44, "elapsed_time": "0:42:36", "remaining_time": "2:45:50", "throughput": 19656.0, "total_tokens": 50250112} +{"current_steps": 15970, "total_steps": 78105, "loss": 0.1814, "lr": 4.835667173962065e-06, "epoch": 1.022341719480187, "percentage": 20.45, "elapsed_time": "0:42:37", "remaining_time": "2:45:49", "throughput": 19657.0, "total_tokens": 50266752} +{"current_steps": 15975, "total_steps": 78105, "loss": 0.3621, "lr": 4.835467914433262e-06, "epoch": 1.022661801421164, "percentage": 20.45, "elapsed_time": "0:42:37", "remaining_time": "2:45:48", "throughput": 19657.79, "total_tokens": 50281792} +{"current_steps": 15980, "total_steps": 78105, "loss": 0.3635, "lr": 4.835268538283359e-06, "epoch": 1.0229818833621407, "percentage": 20.46, "elapsed_time": "0:42:38", "remaining_time": "2:45:46", "throughput": 19658.61, "total_tokens": 50296896} +{"current_steps": 15985, "total_steps": 78105, "loss": 0.2286, "lr": 4.835069045522307e-06, "epoch": 1.0233019653031177, "percentage": 20.47, "elapsed_time": "0:42:39", "remaining_time": "2:45:45", "throughput": 19659.63, "total_tokens": 50313280} +{"current_steps": 15990, "total_steps": 78105, "loss": 0.3084, "lr": 4.834869436160071e-06, "epoch": 1.0236220472440944, "percentage": 20.47, "elapsed_time": "0:42:39", "remaining_time": "2:45:44", "throughput": 19660.52, "total_tokens": 50329088} +{"current_steps": 15995, "total_steps": 78105, "loss": 0.4138, "lr": 4.834669710206617e-06, "epoch": 1.0239421291850714, "percentage": 20.48, "elapsed_time": "0:42:40", "remaining_time": "2:45:43", "throughput": 19661.54, "total_tokens": 50345472} +{"current_steps": 16000, "total_steps": 78105, "loss": 0.2707, "lr": 4.834469867671918e-06, "epoch": 1.0242622111260482, "percentage": 20.49, "elapsed_time": "0:42:41", "remaining_time": "2:45:41", "throughput": 19662.3, "total_tokens": 50360128} +{"current_steps": 16005, "total_steps": 78105, "loss": 0.3587, "lr": 4.8342699085659545e-06, "epoch": 1.0245822930670252, "percentage": 20.49, "elapsed_time": "0:42:41", "remaining_time": "2:45:40", "throughput": 19663.14, "total_tokens": 50375424} +{"current_steps": 16010, "total_steps": 78105, "loss": 0.3576, "lr": 4.83406983289871e-06, "epoch": 1.024902375008002, "percentage": 20.5, "elapsed_time": "0:42:42", "remaining_time": "2:45:39", "throughput": 19664.03, "total_tokens": 50390656} +{"current_steps": 16015, "total_steps": 78105, "loss": 0.268, "lr": 4.8338696406801755e-06, "epoch": 1.025222456948979, "percentage": 20.5, "elapsed_time": "0:42:43", "remaining_time": "2:45:37", "throughput": 19665.08, "total_tokens": 50407296} +{"current_steps": 16020, "total_steps": 78105, "loss": 0.3322, "lr": 4.8336693319203485e-06, "epoch": 1.0255425388899557, "percentage": 20.51, "elapsed_time": "0:42:43", "remaining_time": "2:45:36", "throughput": 19665.98, "total_tokens": 50422912} +{"current_steps": 16025, "total_steps": 78105, "loss": 0.3111, "lr": 4.83346890662923e-06, "epoch": 1.0258626208309327, "percentage": 20.52, "elapsed_time": "0:42:44", "remaining_time": "2:45:35", "throughput": 19666.84, "total_tokens": 50438208} +{"current_steps": 16030, "total_steps": 78105, "loss": 0.3233, "lr": 4.833268364816829e-06, "epoch": 1.0261827027719097, "percentage": 20.52, "elapsed_time": "0:42:45", "remaining_time": "2:45:33", "throughput": 19667.73, "total_tokens": 50453760} +{"current_steps": 16035, "total_steps": 78105, "loss": 0.296, "lr": 4.833067706493159e-06, "epoch": 1.0265027847128865, "percentage": 20.53, "elapsed_time": "0:42:45", "remaining_time": "2:45:32", "throughput": 19668.66, "total_tokens": 50469504} +{"current_steps": 16040, "total_steps": 78105, "loss": 0.31, "lr": 4.832866931668242e-06, "epoch": 1.0268228666538635, "percentage": 20.54, "elapsed_time": "0:42:46", "remaining_time": "2:45:31", "throughput": 19669.64, "total_tokens": 50485568} +{"current_steps": 16045, "total_steps": 78105, "loss": 0.2915, "lr": 4.8326660403521005e-06, "epoch": 1.0271429485948402, "percentage": 20.54, "elapsed_time": "0:42:47", "remaining_time": "2:45:30", "throughput": 19670.42, "total_tokens": 50500480} +{"current_steps": 16050, "total_steps": 78105, "loss": 0.3051, "lr": 4.832465032554768e-06, "epoch": 1.0274630305358172, "percentage": 20.55, "elapsed_time": "0:42:48", "remaining_time": "2:45:28", "throughput": 19671.46, "total_tokens": 50516352} +{"current_steps": 16055, "total_steps": 78105, "loss": 0.3903, "lr": 4.832263908286281e-06, "epoch": 1.027783112476794, "percentage": 20.56, "elapsed_time": "0:42:48", "remaining_time": "2:45:27", "throughput": 19672.3, "total_tokens": 50531456} +{"current_steps": 16060, "total_steps": 78105, "loss": 0.3955, "lr": 4.832062667556682e-06, "epoch": 1.028103194417771, "percentage": 20.56, "elapsed_time": "0:42:49", "remaining_time": "2:45:26", "throughput": 19673.13, "total_tokens": 50546240} +{"current_steps": 16065, "total_steps": 78105, "loss": 0.3919, "lr": 4.8318613103760215e-06, "epoch": 1.0284232763587478, "percentage": 20.57, "elapsed_time": "0:42:49", "remaining_time": "2:45:24", "throughput": 19673.9, "total_tokens": 50560768} +{"current_steps": 16070, "total_steps": 78105, "loss": 0.4741, "lr": 4.831659836754353e-06, "epoch": 1.0287433582997247, "percentage": 20.57, "elapsed_time": "0:42:50", "remaining_time": "2:45:23", "throughput": 19675.22, "total_tokens": 50578816} +{"current_steps": 16075, "total_steps": 78105, "loss": 0.389, "lr": 4.831458246701738e-06, "epoch": 1.0290634402407015, "percentage": 20.58, "elapsed_time": "0:42:51", "remaining_time": "2:45:22", "throughput": 19676.11, "total_tokens": 50594048} +{"current_steps": 16080, "total_steps": 78105, "loss": 0.2532, "lr": 4.831256540228242e-06, "epoch": 1.0293835221816785, "percentage": 20.59, "elapsed_time": "0:42:52", "remaining_time": "2:45:20", "throughput": 19676.95, "total_tokens": 50609344} +{"current_steps": 16085, "total_steps": 78105, "loss": 0.3798, "lr": 4.831054717343936e-06, "epoch": 1.0297036041226555, "percentage": 20.59, "elapsed_time": "0:42:52", "remaining_time": "2:45:19", "throughput": 19677.88, "total_tokens": 50624832} +{"current_steps": 16090, "total_steps": 78105, "loss": 0.2135, "lr": 4.830852778058901e-06, "epoch": 1.0300236860636323, "percentage": 20.6, "elapsed_time": "0:42:53", "remaining_time": "2:45:18", "throughput": 19678.84, "total_tokens": 50641152} +{"current_steps": 16095, "total_steps": 78105, "loss": 0.3473, "lr": 4.830650722383219e-06, "epoch": 1.0303437680046093, "percentage": 20.61, "elapsed_time": "0:42:54", "remaining_time": "2:45:17", "throughput": 19679.87, "total_tokens": 50657472} +{"current_steps": 16100, "total_steps": 78105, "loss": 0.3126, "lr": 4.83044855032698e-06, "epoch": 1.030663849945586, "percentage": 20.61, "elapsed_time": "0:42:54", "remaining_time": "2:45:16", "throughput": 19680.79, "total_tokens": 50673536} +{"current_steps": 16105, "total_steps": 78105, "loss": 0.282, "lr": 4.830246261900279e-06, "epoch": 1.030983931886563, "percentage": 20.62, "elapsed_time": "0:42:55", "remaining_time": "2:45:14", "throughput": 19681.69, "total_tokens": 50688832} +{"current_steps": 16110, "total_steps": 78105, "loss": 0.2764, "lr": 4.830043857113217e-06, "epoch": 1.0313040138275398, "percentage": 20.63, "elapsed_time": "0:42:56", "remaining_time": "2:45:13", "throughput": 19682.84, "total_tokens": 50705728} +{"current_steps": 16115, "total_steps": 78105, "loss": 0.3087, "lr": 4.829841335975902e-06, "epoch": 1.0316240957685168, "percentage": 20.63, "elapsed_time": "0:42:56", "remaining_time": "2:45:12", "throughput": 19683.88, "total_tokens": 50722112} +{"current_steps": 16120, "total_steps": 78105, "loss": 0.326, "lr": 4.8296386984984466e-06, "epoch": 1.0319441777094935, "percentage": 20.64, "elapsed_time": "0:42:57", "remaining_time": "2:45:11", "throughput": 19684.73, "total_tokens": 50737408} +{"current_steps": 16125, "total_steps": 78105, "loss": 0.277, "lr": 4.829435944690969e-06, "epoch": 1.0322642596504705, "percentage": 20.65, "elapsed_time": "0:42:58", "remaining_time": "2:45:09", "throughput": 19685.63, "total_tokens": 50753152} +{"current_steps": 16130, "total_steps": 78105, "loss": 0.4376, "lr": 4.829233074563594e-06, "epoch": 1.0325843415914475, "percentage": 20.65, "elapsed_time": "0:42:58", "remaining_time": "2:45:08", "throughput": 19686.52, "total_tokens": 50768704} +{"current_steps": 16135, "total_steps": 78105, "loss": 0.2066, "lr": 4.8290300881264516e-06, "epoch": 1.0329044235324243, "percentage": 20.66, "elapsed_time": "0:42:59", "remaining_time": "2:45:07", "throughput": 19687.39, "total_tokens": 50783936} +{"current_steps": 16140, "total_steps": 78105, "loss": 0.3182, "lr": 4.8288269853896786e-06, "epoch": 1.0332245054734013, "percentage": 20.66, "elapsed_time": "0:43:00", "remaining_time": "2:45:05", "throughput": 19688.29, "total_tokens": 50799488} +{"current_steps": 16145, "total_steps": 78105, "loss": 0.4688, "lr": 4.828623766363416e-06, "epoch": 1.033544587414378, "percentage": 20.67, "elapsed_time": "0:43:00", "remaining_time": "2:45:04", "throughput": 19689.19, "total_tokens": 50814720} +{"current_steps": 16150, "total_steps": 78105, "loss": 0.2703, "lr": 4.828420431057812e-06, "epoch": 1.033864669355355, "percentage": 20.68, "elapsed_time": "0:43:01", "remaining_time": "2:45:03", "throughput": 19689.94, "total_tokens": 50829632} +{"current_steps": 16155, "total_steps": 78105, "loss": 0.2409, "lr": 4.828216979483019e-06, "epoch": 1.0341847512963318, "percentage": 20.68, "elapsed_time": "0:43:02", "remaining_time": "2:45:01", "throughput": 19690.88, "total_tokens": 50845376} +{"current_steps": 16160, "total_steps": 78105, "loss": 0.411, "lr": 4.828013411649198e-06, "epoch": 1.0345048332373088, "percentage": 20.69, "elapsed_time": "0:43:02", "remaining_time": "2:45:00", "throughput": 19691.91, "total_tokens": 50861888} +{"current_steps": 16165, "total_steps": 78105, "loss": 0.3307, "lr": 4.827809727566514e-06, "epoch": 1.0348249151782856, "percentage": 20.7, "elapsed_time": "0:43:03", "remaining_time": "2:44:59", "throughput": 19692.89, "total_tokens": 50877952} +{"current_steps": 16170, "total_steps": 78105, "loss": 0.3777, "lr": 4.827605927245137e-06, "epoch": 1.0351449971192626, "percentage": 20.7, "elapsed_time": "0:43:04", "remaining_time": "2:44:58", "throughput": 19693.62, "total_tokens": 50892608} +{"current_steps": 16175, "total_steps": 78105, "loss": 0.3058, "lr": 4.827402010695244e-06, "epoch": 1.0354650790602393, "percentage": 20.71, "elapsed_time": "0:43:04", "remaining_time": "2:44:57", "throughput": 19694.78, "total_tokens": 50909568} +{"current_steps": 16180, "total_steps": 78105, "loss": 0.326, "lr": 4.827197977927017e-06, "epoch": 1.0357851610012163, "percentage": 20.72, "elapsed_time": "0:43:05", "remaining_time": "2:44:55", "throughput": 19695.86, "total_tokens": 50926144} +{"current_steps": 16185, "total_steps": 78105, "loss": 0.2551, "lr": 4.826993828950646e-06, "epoch": 1.0361052429421933, "percentage": 20.72, "elapsed_time": "0:43:06", "remaining_time": "2:44:54", "throughput": 19696.81, "total_tokens": 50942080} +{"current_steps": 16190, "total_steps": 78105, "loss": 0.2769, "lr": 4.826789563776323e-06, "epoch": 1.03642532488317, "percentage": 20.73, "elapsed_time": "0:43:07", "remaining_time": "2:44:53", "throughput": 19697.83, "total_tokens": 50958592} +{"current_steps": 16195, "total_steps": 78105, "loss": 0.2245, "lr": 4.826585182414249e-06, "epoch": 1.036745406824147, "percentage": 20.73, "elapsed_time": "0:43:07", "remaining_time": "2:44:52", "throughput": 19698.83, "total_tokens": 50974464} +{"current_steps": 16200, "total_steps": 78105, "loss": 0.4052, "lr": 4.826380684874631e-06, "epoch": 1.0370654887651238, "percentage": 20.74, "elapsed_time": "0:43:08", "remaining_time": "2:44:50", "throughput": 19699.76, "total_tokens": 50990016} +{"current_steps": 16205, "total_steps": 78105, "loss": 0.4216, "lr": 4.826176071167678e-06, "epoch": 1.0373855707061008, "percentage": 20.75, "elapsed_time": "0:43:09", "remaining_time": "2:44:49", "throughput": 19700.65, "total_tokens": 51005376} +{"current_steps": 16210, "total_steps": 78105, "loss": 0.4809, "lr": 4.825971341303609e-06, "epoch": 1.0377056526470776, "percentage": 20.75, "elapsed_time": "0:43:09", "remaining_time": "2:44:48", "throughput": 19701.47, "total_tokens": 51020416} +{"current_steps": 16215, "total_steps": 78105, "loss": 0.3424, "lr": 4.825766495292647e-06, "epoch": 1.0380257345880546, "percentage": 20.76, "elapsed_time": "0:43:10", "remaining_time": "2:44:46", "throughput": 19702.37, "total_tokens": 51036096} +{"current_steps": 16220, "total_steps": 78105, "loss": 0.3262, "lr": 4.825561533145021e-06, "epoch": 1.0383458165290314, "percentage": 20.77, "elapsed_time": "0:43:11", "remaining_time": "2:44:45", "throughput": 19703.2, "total_tokens": 51051328} +{"current_steps": 16225, "total_steps": 78105, "loss": 0.4197, "lr": 4.825356454870964e-06, "epoch": 1.0386658984700083, "percentage": 20.77, "elapsed_time": "0:43:11", "remaining_time": "2:44:44", "throughput": 19704.21, "total_tokens": 51067776} +{"current_steps": 16230, "total_steps": 78105, "loss": 0.3289, "lr": 4.825151260480719e-06, "epoch": 1.038985980410985, "percentage": 20.78, "elapsed_time": "0:43:12", "remaining_time": "2:44:43", "throughput": 19705.36, "total_tokens": 51084736} +{"current_steps": 16235, "total_steps": 78105, "loss": 0.2553, "lr": 4.824945949984532e-06, "epoch": 1.039306062351962, "percentage": 20.79, "elapsed_time": "0:43:13", "remaining_time": "2:44:42", "throughput": 19706.28, "total_tokens": 51100352} +{"current_steps": 16240, "total_steps": 78105, "loss": 0.2415, "lr": 4.8247405233926534e-06, "epoch": 1.039626144292939, "percentage": 20.79, "elapsed_time": "0:43:13", "remaining_time": "2:44:40", "throughput": 19707.21, "total_tokens": 51116352} +{"current_steps": 16245, "total_steps": 78105, "loss": 0.3407, "lr": 4.824534980715343e-06, "epoch": 1.0399462262339159, "percentage": 20.8, "elapsed_time": "0:43:14", "remaining_time": "2:44:39", "throughput": 19708.23, "total_tokens": 51132736} +{"current_steps": 16250, "total_steps": 78105, "loss": 0.3125, "lr": 4.824329321962863e-06, "epoch": 1.0402663081748929, "percentage": 20.81, "elapsed_time": "0:43:15", "remaining_time": "2:44:38", "throughput": 19709.35, "total_tokens": 51149696} +{"current_steps": 16255, "total_steps": 78105, "loss": 0.2986, "lr": 4.824123547145484e-06, "epoch": 1.0405863901158696, "percentage": 20.81, "elapsed_time": "0:43:15", "remaining_time": "2:44:37", "throughput": 19710.26, "total_tokens": 51165248} +{"current_steps": 16260, "total_steps": 78105, "loss": 0.3958, "lr": 4.82391765627348e-06, "epoch": 1.0409064720568466, "percentage": 20.82, "elapsed_time": "0:43:16", "remaining_time": "2:44:36", "throughput": 19711.28, "total_tokens": 51181632} +{"current_steps": 16265, "total_steps": 78105, "loss": 0.3475, "lr": 4.823711649357135e-06, "epoch": 1.0412265539978234, "percentage": 20.82, "elapsed_time": "0:43:17", "remaining_time": "2:44:34", "throughput": 19712.12, "total_tokens": 51196864} +{"current_steps": 16270, "total_steps": 78105, "loss": 0.2704, "lr": 4.823505526406732e-06, "epoch": 1.0415466359388004, "percentage": 20.83, "elapsed_time": "0:43:17", "remaining_time": "2:44:33", "throughput": 19713.12, "total_tokens": 51213184} +{"current_steps": 16275, "total_steps": 78105, "loss": 0.2751, "lr": 4.823299287432567e-06, "epoch": 1.0418667178797771, "percentage": 20.84, "elapsed_time": "0:43:18", "remaining_time": "2:44:32", "throughput": 19714.13, "total_tokens": 51229504} +{"current_steps": 16280, "total_steps": 78105, "loss": 0.264, "lr": 4.823092932444937e-06, "epoch": 1.0421867998207541, "percentage": 20.84, "elapsed_time": "0:43:19", "remaining_time": "2:44:31", "throughput": 19714.97, "total_tokens": 51244800} +{"current_steps": 16285, "total_steps": 78105, "loss": 0.2822, "lr": 4.822886461454146e-06, "epoch": 1.042506881761731, "percentage": 20.85, "elapsed_time": "0:43:19", "remaining_time": "2:44:29", "throughput": 19715.79, "total_tokens": 51260032} +{"current_steps": 16290, "total_steps": 78105, "loss": 0.3556, "lr": 4.822679874470505e-06, "epoch": 1.0428269637027079, "percentage": 20.86, "elapsed_time": "0:43:20", "remaining_time": "2:44:28", "throughput": 19716.64, "total_tokens": 51275392} +{"current_steps": 16295, "total_steps": 78105, "loss": 0.3518, "lr": 4.8224731715043295e-06, "epoch": 1.0431470456436849, "percentage": 20.86, "elapsed_time": "0:43:21", "remaining_time": "2:44:27", "throughput": 19717.53, "total_tokens": 51290944} +{"current_steps": 16300, "total_steps": 78105, "loss": 0.2881, "lr": 4.822266352565941e-06, "epoch": 1.0434671275846616, "percentage": 20.87, "elapsed_time": "0:43:21", "remaining_time": "2:44:25", "throughput": 19718.51, "total_tokens": 51307200} +{"current_steps": 16305, "total_steps": 78105, "loss": 0.3328, "lr": 4.822059417665667e-06, "epoch": 1.0437872095256386, "percentage": 20.88, "elapsed_time": "0:43:22", "remaining_time": "2:44:24", "throughput": 19719.24, "total_tokens": 51321920} +{"current_steps": 16310, "total_steps": 78105, "loss": 0.233, "lr": 4.821852366813841e-06, "epoch": 1.0441072914666154, "percentage": 20.88, "elapsed_time": "0:43:23", "remaining_time": "2:44:23", "throughput": 19720.12, "total_tokens": 51337344} +{"current_steps": 16315, "total_steps": 78105, "loss": 0.2586, "lr": 4.821645200020802e-06, "epoch": 1.0444273734075924, "percentage": 20.89, "elapsed_time": "0:43:23", "remaining_time": "2:44:21", "throughput": 19720.91, "total_tokens": 51352256} +{"current_steps": 16320, "total_steps": 78105, "loss": 0.3016, "lr": 4.821437917296895e-06, "epoch": 1.0447474553485692, "percentage": 20.89, "elapsed_time": "0:43:24", "remaining_time": "2:44:20", "throughput": 19721.83, "total_tokens": 51367936} +{"current_steps": 16325, "total_steps": 78105, "loss": 0.4314, "lr": 4.82123051865247e-06, "epoch": 1.0450675372895462, "percentage": 20.9, "elapsed_time": "0:43:25", "remaining_time": "2:44:19", "throughput": 19722.62, "total_tokens": 51382592} +{"current_steps": 16330, "total_steps": 78105, "loss": 0.4091, "lr": 4.821023004097883e-06, "epoch": 1.045387619230523, "percentage": 20.91, "elapsed_time": "0:43:25", "remaining_time": "2:44:18", "throughput": 19723.63, "total_tokens": 51398528} +{"current_steps": 16335, "total_steps": 78105, "loss": 0.3303, "lr": 4.820815373643498e-06, "epoch": 1.0457077011715, "percentage": 20.91, "elapsed_time": "0:43:26", "remaining_time": "2:44:16", "throughput": 19724.4, "total_tokens": 51413376} +{"current_steps": 16340, "total_steps": 78105, "loss": 0.3399, "lr": 4.820607627299682e-06, "epoch": 1.0460277831124767, "percentage": 20.92, "elapsed_time": "0:43:27", "remaining_time": "2:44:15", "throughput": 19725.7, "total_tokens": 51432064} +{"current_steps": 16345, "total_steps": 78105, "loss": 0.3488, "lr": 4.820399765076808e-06, "epoch": 1.0463478650534537, "percentage": 20.93, "elapsed_time": "0:43:28", "remaining_time": "2:44:14", "throughput": 19726.48, "total_tokens": 51447232} +{"current_steps": 16350, "total_steps": 78105, "loss": 0.3695, "lr": 4.8201917869852565e-06, "epoch": 1.0466679469944307, "percentage": 20.93, "elapsed_time": "0:43:28", "remaining_time": "2:44:13", "throughput": 19727.4, "total_tokens": 51463168} +{"current_steps": 16355, "total_steps": 78105, "loss": 0.1727, "lr": 4.819983693035412e-06, "epoch": 1.0469880289354074, "percentage": 20.94, "elapsed_time": "0:43:29", "remaining_time": "2:44:12", "throughput": 19728.75, "total_tokens": 51481216} +{"current_steps": 16360, "total_steps": 78105, "loss": 0.2678, "lr": 4.819775483237667e-06, "epoch": 1.0473081108763844, "percentage": 20.95, "elapsed_time": "0:43:30", "remaining_time": "2:44:11", "throughput": 19729.78, "total_tokens": 51497856} +{"current_steps": 16365, "total_steps": 78105, "loss": 0.3309, "lr": 4.819567157602418e-06, "epoch": 1.0476281928173612, "percentage": 20.95, "elapsed_time": "0:43:30", "remaining_time": "2:44:09", "throughput": 19730.63, "total_tokens": 51512896} +{"current_steps": 16370, "total_steps": 78105, "loss": 0.3148, "lr": 4.819358716140067e-06, "epoch": 1.0479482747583382, "percentage": 20.96, "elapsed_time": "0:43:31", "remaining_time": "2:44:08", "throughput": 19731.54, "total_tokens": 51528448} +{"current_steps": 16375, "total_steps": 78105, "loss": 0.3235, "lr": 4.819150158861023e-06, "epoch": 1.048268356699315, "percentage": 20.97, "elapsed_time": "0:43:32", "remaining_time": "2:44:07", "throughput": 19732.5, "total_tokens": 51544768} +{"current_steps": 16380, "total_steps": 78105, "loss": 0.285, "lr": 4.8189414857756985e-06, "epoch": 1.048588438640292, "percentage": 20.97, "elapsed_time": "0:43:32", "remaining_time": "2:44:06", "throughput": 19733.38, "total_tokens": 51560448} +{"current_steps": 16385, "total_steps": 78105, "loss": 0.3436, "lr": 4.818732696894517e-06, "epoch": 1.0489085205812687, "percentage": 20.98, "elapsed_time": "0:43:33", "remaining_time": "2:44:04", "throughput": 19734.24, "total_tokens": 51575616} +{"current_steps": 16390, "total_steps": 78105, "loss": 0.2012, "lr": 4.8185237922279014e-06, "epoch": 1.0492286025222457, "percentage": 20.98, "elapsed_time": "0:43:34", "remaining_time": "2:44:03", "throughput": 19735.15, "total_tokens": 51591680} +{"current_steps": 16395, "total_steps": 78105, "loss": 0.2593, "lr": 4.818314771786284e-06, "epoch": 1.0495486844632227, "percentage": 20.99, "elapsed_time": "0:43:34", "remaining_time": "2:44:02", "throughput": 19736.1, "total_tokens": 51607680} +{"current_steps": 16400, "total_steps": 78105, "loss": 0.2339, "lr": 4.818105635580104e-06, "epoch": 1.0498687664041995, "percentage": 21.0, "elapsed_time": "0:43:35", "remaining_time": "2:44:01", "throughput": 19736.86, "total_tokens": 51622912} +{"current_steps": 16405, "total_steps": 78105, "loss": 0.4946, "lr": 4.817896383619802e-06, "epoch": 1.0501888483451765, "percentage": 21.0, "elapsed_time": "0:43:36", "remaining_time": "2:43:59", "throughput": 19737.72, "total_tokens": 51638144} +{"current_steps": 16410, "total_steps": 78105, "loss": 0.2627, "lr": 4.817687015915829e-06, "epoch": 1.0505089302861532, "percentage": 21.01, "elapsed_time": "0:43:36", "remaining_time": "2:43:58", "throughput": 19738.67, "total_tokens": 51654336} +{"current_steps": 16415, "total_steps": 78105, "loss": 0.3434, "lr": 4.817477532478638e-06, "epoch": 1.0508290122271302, "percentage": 21.02, "elapsed_time": "0:43:37", "remaining_time": "2:43:57", "throughput": 19739.53, "total_tokens": 51669760} +{"current_steps": 16420, "total_steps": 78105, "loss": 0.3096, "lr": 4.817267933318691e-06, "epoch": 1.051149094168107, "percentage": 21.02, "elapsed_time": "0:43:38", "remaining_time": "2:43:56", "throughput": 19740.39, "total_tokens": 51685440} +{"current_steps": 16425, "total_steps": 78105, "loss": 0.3697, "lr": 4.817058218446453e-06, "epoch": 1.051469176109084, "percentage": 21.03, "elapsed_time": "0:43:38", "remaining_time": "2:43:54", "throughput": 19741.21, "total_tokens": 51700544} +{"current_steps": 16430, "total_steps": 78105, "loss": 0.2282, "lr": 4.816848387872397e-06, "epoch": 1.0517892580500607, "percentage": 21.04, "elapsed_time": "0:43:39", "remaining_time": "2:43:53", "throughput": 19742.07, "total_tokens": 51716096} +{"current_steps": 16435, "total_steps": 78105, "loss": 0.3054, "lr": 4.8166384416070005e-06, "epoch": 1.0521093399910377, "percentage": 21.04, "elapsed_time": "0:43:40", "remaining_time": "2:43:52", "throughput": 19743.21, "total_tokens": 51733312} +{"current_steps": 16440, "total_steps": 78105, "loss": 0.3531, "lr": 4.8164283796607466e-06, "epoch": 1.0524294219320145, "percentage": 21.05, "elapsed_time": "0:43:40", "remaining_time": "2:43:51", "throughput": 19744.1, "total_tokens": 51748992} +{"current_steps": 16445, "total_steps": 78105, "loss": 0.4066, "lr": 4.8162182020441265e-06, "epoch": 1.0527495038729915, "percentage": 21.05, "elapsed_time": "0:43:41", "remaining_time": "2:43:49", "throughput": 19744.93, "total_tokens": 51764096} +{"current_steps": 16450, "total_steps": 78105, "loss": 0.3169, "lr": 4.816007908767633e-06, "epoch": 1.0530695858139685, "percentage": 21.06, "elapsed_time": "0:43:42", "remaining_time": "2:43:48", "throughput": 19745.8, "total_tokens": 51779584} +{"current_steps": 16455, "total_steps": 78105, "loss": 0.3345, "lr": 4.815797499841769e-06, "epoch": 1.0533896677549452, "percentage": 21.07, "elapsed_time": "0:43:42", "remaining_time": "2:43:47", "throughput": 19746.55, "total_tokens": 51794368} +{"current_steps": 16460, "total_steps": 78105, "loss": 0.2701, "lr": 4.81558697527704e-06, "epoch": 1.0537097496959222, "percentage": 21.07, "elapsed_time": "0:43:43", "remaining_time": "2:43:45", "throughput": 19747.36, "total_tokens": 51809664} +{"current_steps": 16465, "total_steps": 78105, "loss": 0.332, "lr": 4.815376335083959e-06, "epoch": 1.054029831636899, "percentage": 21.08, "elapsed_time": "0:43:44", "remaining_time": "2:43:44", "throughput": 19748.16, "total_tokens": 51824832} +{"current_steps": 16470, "total_steps": 78105, "loss": 0.5044, "lr": 4.815165579273044e-06, "epoch": 1.054349913577876, "percentage": 21.09, "elapsed_time": "0:43:44", "remaining_time": "2:43:43", "throughput": 19749.24, "total_tokens": 51841408} +{"current_steps": 16475, "total_steps": 78105, "loss": 0.2284, "lr": 4.8149547078548195e-06, "epoch": 1.0546699955188528, "percentage": 21.09, "elapsed_time": "0:43:45", "remaining_time": "2:43:42", "throughput": 19750.13, "total_tokens": 51857024} +{"current_steps": 16480, "total_steps": 78105, "loss": 0.2576, "lr": 4.814743720839815e-06, "epoch": 1.0549900774598298, "percentage": 21.1, "elapsed_time": "0:43:46", "remaining_time": "2:43:40", "throughput": 19751.02, "total_tokens": 51873088} +{"current_steps": 16485, "total_steps": 78105, "loss": 0.3573, "lr": 4.814532618238566e-06, "epoch": 1.0553101594008065, "percentage": 21.11, "elapsed_time": "0:43:47", "remaining_time": "2:43:39", "throughput": 19751.92, "total_tokens": 51888832} +{"current_steps": 16490, "total_steps": 78105, "loss": 0.2721, "lr": 4.814321400061614e-06, "epoch": 1.0556302413417835, "percentage": 21.11, "elapsed_time": "0:43:47", "remaining_time": "2:43:38", "throughput": 19752.9, "total_tokens": 51905088} +{"current_steps": 16495, "total_steps": 78105, "loss": 0.3373, "lr": 4.814110066319506e-06, "epoch": 1.0559503232827603, "percentage": 21.12, "elapsed_time": "0:43:48", "remaining_time": "2:43:37", "throughput": 19753.79, "total_tokens": 51920448} +{"current_steps": 16500, "total_steps": 78105, "loss": 0.2952, "lr": 4.8138986170227955e-06, "epoch": 1.0562704052237373, "percentage": 21.13, "elapsed_time": "0:43:49", "remaining_time": "2:43:36", "throughput": 19754.85, "total_tokens": 51936960} +{"current_steps": 16505, "total_steps": 78105, "loss": 0.3214, "lr": 4.81368705218204e-06, "epoch": 1.0565904871647143, "percentage": 21.13, "elapsed_time": "0:43:49", "remaining_time": "2:43:34", "throughput": 19755.87, "total_tokens": 51953536} +{"current_steps": 16510, "total_steps": 78105, "loss": 0.3257, "lr": 4.8134753718078054e-06, "epoch": 1.056910569105691, "percentage": 21.14, "elapsed_time": "0:43:50", "remaining_time": "2:43:33", "throughput": 19756.71, "total_tokens": 51968704} +{"current_steps": 16515, "total_steps": 78105, "loss": 0.314, "lr": 4.813263575910661e-06, "epoch": 1.057230651046668, "percentage": 21.14, "elapsed_time": "0:43:51", "remaining_time": "2:43:32", "throughput": 19757.52, "total_tokens": 51983808} +{"current_steps": 16520, "total_steps": 78105, "loss": 0.2979, "lr": 4.813051664501182e-06, "epoch": 1.0575507329876448, "percentage": 21.15, "elapsed_time": "0:43:51", "remaining_time": "2:43:30", "throughput": 19758.34, "total_tokens": 51999424} +{"current_steps": 16525, "total_steps": 78105, "loss": 0.3579, "lr": 4.812839637589953e-06, "epoch": 1.0578708149286218, "percentage": 21.16, "elapsed_time": "0:43:52", "remaining_time": "2:43:29", "throughput": 19759.13, "total_tokens": 52014784} +{"current_steps": 16530, "total_steps": 78105, "loss": 0.3112, "lr": 4.812627495187558e-06, "epoch": 1.0581908968695986, "percentage": 21.16, "elapsed_time": "0:43:53", "remaining_time": "2:43:28", "throughput": 19760.05, "total_tokens": 52030656} +{"current_steps": 16535, "total_steps": 78105, "loss": 0.3321, "lr": 4.812415237304593e-06, "epoch": 1.0585109788105755, "percentage": 21.17, "elapsed_time": "0:43:53", "remaining_time": "2:43:27", "throughput": 19760.87, "total_tokens": 52045888} +{"current_steps": 16540, "total_steps": 78105, "loss": 0.2757, "lr": 4.812202863951655e-06, "epoch": 1.0588310607515523, "percentage": 21.18, "elapsed_time": "0:43:54", "remaining_time": "2:43:25", "throughput": 19761.66, "total_tokens": 52060672} +{"current_steps": 16545, "total_steps": 78105, "loss": 0.2529, "lr": 4.811990375139351e-06, "epoch": 1.0591511426925293, "percentage": 21.18, "elapsed_time": "0:43:55", "remaining_time": "2:43:24", "throughput": 19762.39, "total_tokens": 52075456} +{"current_steps": 16550, "total_steps": 78105, "loss": 0.3074, "lr": 4.81177777087829e-06, "epoch": 1.059471224633506, "percentage": 21.19, "elapsed_time": "0:43:55", "remaining_time": "2:43:23", "throughput": 19763.04, "total_tokens": 52089600} +{"current_steps": 16555, "total_steps": 78105, "loss": 0.3159, "lr": 4.8115650511790875e-06, "epoch": 1.059791306574483, "percentage": 21.2, "elapsed_time": "0:43:56", "remaining_time": "2:43:21", "throughput": 19763.85, "total_tokens": 52104768} +{"current_steps": 16560, "total_steps": 78105, "loss": 0.2456, "lr": 4.811352216052368e-06, "epoch": 1.06011138851546, "percentage": 21.2, "elapsed_time": "0:43:57", "remaining_time": "2:43:20", "throughput": 19764.68, "total_tokens": 52120576} +{"current_steps": 16565, "total_steps": 78105, "loss": 0.2515, "lr": 4.8111392655087585e-06, "epoch": 1.0604314704564368, "percentage": 21.21, "elapsed_time": "0:43:57", "remaining_time": "2:43:19", "throughput": 19765.57, "total_tokens": 52136256} +{"current_steps": 16570, "total_steps": 78105, "loss": 0.3077, "lr": 4.810926199558892e-06, "epoch": 1.0607515523974138, "percentage": 21.22, "elapsed_time": "0:43:58", "remaining_time": "2:43:18", "throughput": 19766.47, "total_tokens": 52152256} +{"current_steps": 16575, "total_steps": 78105, "loss": 0.364, "lr": 4.810713018213407e-06, "epoch": 1.0610716343383906, "percentage": 21.22, "elapsed_time": "0:43:59", "remaining_time": "2:43:16", "throughput": 19767.27, "total_tokens": 52167488} +{"current_steps": 16580, "total_steps": 78105, "loss": 0.4094, "lr": 4.810499721482952e-06, "epoch": 1.0613917162793676, "percentage": 21.23, "elapsed_time": "0:43:59", "remaining_time": "2:43:15", "throughput": 19768.04, "total_tokens": 52182208} +{"current_steps": 16585, "total_steps": 78105, "loss": 0.2917, "lr": 4.8102863093781745e-06, "epoch": 1.0617117982203443, "percentage": 21.23, "elapsed_time": "0:44:00", "remaining_time": "2:43:14", "throughput": 19769.06, "total_tokens": 52198720} +{"current_steps": 16590, "total_steps": 78105, "loss": 0.5487, "lr": 4.810072781909734e-06, "epoch": 1.0620318801613213, "percentage": 21.24, "elapsed_time": "0:44:01", "remaining_time": "2:43:13", "throughput": 19770.15, "total_tokens": 52215552} +{"current_steps": 16595, "total_steps": 78105, "loss": 0.2942, "lr": 4.80985913908829e-06, "epoch": 1.062351962102298, "percentage": 21.25, "elapsed_time": "0:44:01", "remaining_time": "2:43:12", "throughput": 19771.2, "total_tokens": 52232064} +{"current_steps": 16600, "total_steps": 78105, "loss": 0.4301, "lr": 4.809645380924512e-06, "epoch": 1.062672044043275, "percentage": 21.25, "elapsed_time": "0:44:02", "remaining_time": "2:43:10", "throughput": 19772.15, "total_tokens": 52248064} +{"current_steps": 16605, "total_steps": 78105, "loss": 0.438, "lr": 4.809431507429076e-06, "epoch": 1.0629921259842519, "percentage": 21.26, "elapsed_time": "0:44:03", "remaining_time": "2:43:09", "throughput": 19773.23, "total_tokens": 52265024} +{"current_steps": 16610, "total_steps": 78105, "loss": 0.1769, "lr": 4.809217518612659e-06, "epoch": 1.0633122079252288, "percentage": 21.27, "elapsed_time": "0:44:03", "remaining_time": "2:43:08", "throughput": 19774.09, "total_tokens": 52280320} +{"current_steps": 16615, "total_steps": 78105, "loss": 0.2284, "lr": 4.809003414485947e-06, "epoch": 1.0636322898662058, "percentage": 21.27, "elapsed_time": "0:44:04", "remaining_time": "2:43:07", "throughput": 19774.89, "total_tokens": 52295616} +{"current_steps": 16620, "total_steps": 78105, "loss": 0.3144, "lr": 4.8087891950596314e-06, "epoch": 1.0639523718071826, "percentage": 21.28, "elapsed_time": "0:44:05", "remaining_time": "2:43:05", "throughput": 19775.85, "total_tokens": 52311680} +{"current_steps": 16625, "total_steps": 78105, "loss": 0.3209, "lr": 4.80857486034441e-06, "epoch": 1.0642724537481596, "percentage": 21.29, "elapsed_time": "0:44:05", "remaining_time": "2:43:04", "throughput": 19776.59, "total_tokens": 52326464} +{"current_steps": 16630, "total_steps": 78105, "loss": 0.3622, "lr": 4.808360410350985e-06, "epoch": 1.0645925356891364, "percentage": 21.29, "elapsed_time": "0:44:06", "remaining_time": "2:43:03", "throughput": 19777.44, "total_tokens": 52341696} +{"current_steps": 16635, "total_steps": 78105, "loss": 0.3042, "lr": 4.808145845090065e-06, "epoch": 1.0649126176301134, "percentage": 21.3, "elapsed_time": "0:44:07", "remaining_time": "2:43:02", "throughput": 19778.31, "total_tokens": 52357504} +{"current_steps": 16640, "total_steps": 78105, "loss": 0.3643, "lr": 4.807931164572364e-06, "epoch": 1.0652326995710901, "percentage": 21.3, "elapsed_time": "0:44:07", "remaining_time": "2:43:00", "throughput": 19779.32, "total_tokens": 52373888} +{"current_steps": 16645, "total_steps": 78105, "loss": 0.2879, "lr": 4.807716368808602e-06, "epoch": 1.0655527815120671, "percentage": 21.31, "elapsed_time": "0:44:08", "remaining_time": "2:42:59", "throughput": 19780.18, "total_tokens": 52389440} +{"current_steps": 16650, "total_steps": 78105, "loss": 0.2488, "lr": 4.807501457809505e-06, "epoch": 1.0658728634530439, "percentage": 21.32, "elapsed_time": "0:44:09", "remaining_time": "2:42:58", "throughput": 19781.0, "total_tokens": 52404608} +{"current_steps": 16655, "total_steps": 78105, "loss": 0.3124, "lr": 4.807286431585805e-06, "epoch": 1.0661929453940209, "percentage": 21.32, "elapsed_time": "0:44:09", "remaining_time": "2:42:56", "throughput": 19781.76, "total_tokens": 52419520} +{"current_steps": 16660, "total_steps": 78105, "loss": 0.2219, "lr": 4.807071290148238e-06, "epoch": 1.0665130273349979, "percentage": 21.33, "elapsed_time": "0:44:10", "remaining_time": "2:42:55", "throughput": 19782.57, "total_tokens": 52434944} +{"current_steps": 16665, "total_steps": 78105, "loss": 0.2712, "lr": 4.806856033507549e-06, "epoch": 1.0668331092759746, "percentage": 21.34, "elapsed_time": "0:44:11", "remaining_time": "2:42:54", "throughput": 19783.48, "total_tokens": 52451008} +{"current_steps": 16670, "total_steps": 78105, "loss": 0.3997, "lr": 4.806640661674485e-06, "epoch": 1.0671531912169516, "percentage": 21.34, "elapsed_time": "0:44:11", "remaining_time": "2:42:53", "throughput": 19784.5, "total_tokens": 52467392} +{"current_steps": 16675, "total_steps": 78105, "loss": 0.2614, "lr": 4.806425174659801e-06, "epoch": 1.0674732731579284, "percentage": 21.35, "elapsed_time": "0:44:12", "remaining_time": "2:42:52", "throughput": 19785.3, "total_tokens": 52482432} +{"current_steps": 16680, "total_steps": 78105, "loss": 0.3247, "lr": 4.806209572474258e-06, "epoch": 1.0677933550989054, "percentage": 21.36, "elapsed_time": "0:44:13", "remaining_time": "2:42:50", "throughput": 19786.03, "total_tokens": 52497472} +{"current_steps": 16685, "total_steps": 78105, "loss": 0.1651, "lr": 4.805993855128622e-06, "epoch": 1.0681134370398822, "percentage": 21.36, "elapsed_time": "0:44:13", "remaining_time": "2:42:49", "throughput": 19786.85, "total_tokens": 52512960} +{"current_steps": 16690, "total_steps": 78105, "loss": 0.3146, "lr": 4.8057780226336636e-06, "epoch": 1.0684335189808591, "percentage": 21.37, "elapsed_time": "0:44:14", "remaining_time": "2:42:48", "throughput": 19787.69, "total_tokens": 52528000} +{"current_steps": 16695, "total_steps": 78105, "loss": 0.4474, "lr": 4.805562075000161e-06, "epoch": 1.068753600921836, "percentage": 21.38, "elapsed_time": "0:44:15", "remaining_time": "2:42:46", "throughput": 19788.46, "total_tokens": 52542976} +{"current_steps": 16700, "total_steps": 78105, "loss": 0.3965, "lr": 4.805346012238898e-06, "epoch": 1.069073682862813, "percentage": 21.38, "elapsed_time": "0:44:15", "remaining_time": "2:42:45", "throughput": 19789.3, "total_tokens": 52558208} +{"current_steps": 16705, "total_steps": 78105, "loss": 0.3391, "lr": 4.805129834360664e-06, "epoch": 1.0693937648037897, "percentage": 21.39, "elapsed_time": "0:44:16", "remaining_time": "2:42:44", "throughput": 19790.38, "total_tokens": 52574976} +{"current_steps": 16710, "total_steps": 78105, "loss": 0.2333, "lr": 4.8049135413762514e-06, "epoch": 1.0697138467447667, "percentage": 21.39, "elapsed_time": "0:44:17", "remaining_time": "2:42:43", "throughput": 19791.17, "total_tokens": 52590208} +{"current_steps": 16715, "total_steps": 78105, "loss": 0.3121, "lr": 4.804697133296463e-06, "epoch": 1.0700339286857437, "percentage": 21.4, "elapsed_time": "0:44:17", "remaining_time": "2:42:42", "throughput": 19792.16, "total_tokens": 52606720} +{"current_steps": 16720, "total_steps": 78105, "loss": 0.3195, "lr": 4.804480610132104e-06, "epoch": 1.0703540106267204, "percentage": 21.41, "elapsed_time": "0:44:18", "remaining_time": "2:42:40", "throughput": 19793.29, "total_tokens": 52623872} +{"current_steps": 16725, "total_steps": 78105, "loss": 0.223, "lr": 4.804263971893988e-06, "epoch": 1.0706740925676974, "percentage": 21.41, "elapsed_time": "0:44:19", "remaining_time": "2:42:39", "throughput": 19794.11, "total_tokens": 52639040} +{"current_steps": 16730, "total_steps": 78105, "loss": 0.301, "lr": 4.804047218592932e-06, "epoch": 1.0709941745086742, "percentage": 21.42, "elapsed_time": "0:44:20", "remaining_time": "2:42:38", "throughput": 19795.1, "total_tokens": 52655680} +{"current_steps": 16735, "total_steps": 78105, "loss": 0.3257, "lr": 4.8038303502397584e-06, "epoch": 1.0713142564496512, "percentage": 21.43, "elapsed_time": "0:44:20", "remaining_time": "2:42:37", "throughput": 19795.93, "total_tokens": 52670912} +{"current_steps": 16740, "total_steps": 78105, "loss": 0.3093, "lr": 4.803613366845297e-06, "epoch": 1.071634338390628, "percentage": 21.43, "elapsed_time": "0:44:21", "remaining_time": "2:42:35", "throughput": 19796.8, "total_tokens": 52686656} +{"current_steps": 16745, "total_steps": 78105, "loss": 0.259, "lr": 4.803396268420384e-06, "epoch": 1.071954420331605, "percentage": 21.44, "elapsed_time": "0:44:21", "remaining_time": "2:42:34", "throughput": 19797.43, "total_tokens": 52700736} +{"current_steps": 16750, "total_steps": 78105, "loss": 0.3664, "lr": 4.803179054975859e-06, "epoch": 1.0722745022725817, "percentage": 21.45, "elapsed_time": "0:44:22", "remaining_time": "2:42:33", "throughput": 19798.26, "total_tokens": 52716352} +{"current_steps": 16755, "total_steps": 78105, "loss": 0.3951, "lr": 4.802961726522568e-06, "epoch": 1.0725945842135587, "percentage": 21.45, "elapsed_time": "0:44:23", "remaining_time": "2:42:31", "throughput": 19798.99, "total_tokens": 52730944} +{"current_steps": 16760, "total_steps": 78105, "loss": 0.2282, "lr": 4.8027442830713655e-06, "epoch": 1.0729146661545355, "percentage": 21.46, "elapsed_time": "0:44:23", "remaining_time": "2:42:30", "throughput": 19799.82, "total_tokens": 52746368} +{"current_steps": 16765, "total_steps": 78105, "loss": 0.3642, "lr": 4.802526724633107e-06, "epoch": 1.0732347480955124, "percentage": 21.46, "elapsed_time": "0:44:24", "remaining_time": "2:42:29", "throughput": 19800.79, "total_tokens": 52762624} +{"current_steps": 16770, "total_steps": 78105, "loss": 0.3688, "lr": 4.802309051218658e-06, "epoch": 1.0735548300364894, "percentage": 21.47, "elapsed_time": "0:44:25", "remaining_time": "2:42:28", "throughput": 19801.53, "total_tokens": 52777728} +{"current_steps": 16775, "total_steps": 78105, "loss": 0.3491, "lr": 4.802091262838886e-06, "epoch": 1.0738749119774662, "percentage": 21.48, "elapsed_time": "0:44:26", "remaining_time": "2:42:27", "throughput": 19802.48, "total_tokens": 52793920} +{"current_steps": 16780, "total_steps": 78105, "loss": 0.3184, "lr": 4.801873359504669e-06, "epoch": 1.0741949939184432, "percentage": 21.48, "elapsed_time": "0:44:26", "remaining_time": "2:42:25", "throughput": 19803.36, "total_tokens": 52809792} +{"current_steps": 16785, "total_steps": 78105, "loss": 0.4376, "lr": 4.801655341226886e-06, "epoch": 1.07451507585942, "percentage": 21.49, "elapsed_time": "0:44:27", "remaining_time": "2:42:24", "throughput": 19804.17, "total_tokens": 52824960} +{"current_steps": 16790, "total_steps": 78105, "loss": 0.3442, "lr": 4.801437208016424e-06, "epoch": 1.074835157800397, "percentage": 21.5, "elapsed_time": "0:44:28", "remaining_time": "2:42:23", "throughput": 19804.86, "total_tokens": 52839616} +{"current_steps": 16795, "total_steps": 78105, "loss": 0.2801, "lr": 4.801218959884176e-06, "epoch": 1.0751552397413737, "percentage": 21.5, "elapsed_time": "0:44:28", "remaining_time": "2:42:22", "throughput": 19805.76, "total_tokens": 52855616} +{"current_steps": 16800, "total_steps": 78105, "loss": 0.4209, "lr": 4.801000596841039e-06, "epoch": 1.0754753216823507, "percentage": 21.51, "elapsed_time": "0:44:29", "remaining_time": "2:42:21", "throughput": 19806.93, "total_tokens": 52873088} +{"current_steps": 16805, "total_steps": 78105, "loss": 0.2287, "lr": 4.800782118897917e-06, "epoch": 1.0757954036233275, "percentage": 21.52, "elapsed_time": "0:44:30", "remaining_time": "2:42:19", "throughput": 19807.83, "total_tokens": 52889280} +{"current_steps": 16810, "total_steps": 78105, "loss": 0.3968, "lr": 4.8005635260657224e-06, "epoch": 1.0761154855643045, "percentage": 21.52, "elapsed_time": "0:44:30", "remaining_time": "2:42:18", "throughput": 19808.68, "total_tokens": 52904896} +{"current_steps": 16815, "total_steps": 78105, "loss": 0.2281, "lr": 4.800344818355368e-06, "epoch": 1.0764355675052815, "percentage": 21.53, "elapsed_time": "0:44:31", "remaining_time": "2:42:17", "throughput": 19809.52, "total_tokens": 52920640} +{"current_steps": 16820, "total_steps": 78105, "loss": 0.2776, "lr": 4.800125995777775e-06, "epoch": 1.0767556494462582, "percentage": 21.54, "elapsed_time": "0:44:32", "remaining_time": "2:42:16", "throughput": 19810.3, "total_tokens": 52935680} +{"current_steps": 16825, "total_steps": 78105, "loss": 0.3489, "lr": 4.79990705834387e-06, "epoch": 1.0770757313872352, "percentage": 21.54, "elapsed_time": "0:44:32", "remaining_time": "2:42:14", "throughput": 19811.06, "total_tokens": 52950784} +{"current_steps": 16830, "total_steps": 78105, "loss": 0.2919, "lr": 4.799688006064587e-06, "epoch": 1.077395813328212, "percentage": 21.55, "elapsed_time": "0:44:33", "remaining_time": "2:42:13", "throughput": 19811.81, "total_tokens": 52965824} +{"current_steps": 16835, "total_steps": 78105, "loss": 0.2484, "lr": 4.799468838950863e-06, "epoch": 1.077715895269189, "percentage": 21.55, "elapsed_time": "0:44:34", "remaining_time": "2:42:12", "throughput": 19812.68, "total_tokens": 52981568} +{"current_steps": 16840, "total_steps": 78105, "loss": 0.3151, "lr": 4.799249557013643e-06, "epoch": 1.0780359772101658, "percentage": 21.56, "elapsed_time": "0:44:34", "remaining_time": "2:42:11", "throughput": 19813.58, "total_tokens": 52997632} +{"current_steps": 16845, "total_steps": 78105, "loss": 0.2957, "lr": 4.7990301602638764e-06, "epoch": 1.0783560591511427, "percentage": 21.57, "elapsed_time": "0:44:35", "remaining_time": "2:42:09", "throughput": 19814.41, "total_tokens": 53013056} +{"current_steps": 16850, "total_steps": 78105, "loss": 0.3658, "lr": 4.79881064871252e-06, "epoch": 1.0786761410921195, "percentage": 21.57, "elapsed_time": "0:44:36", "remaining_time": "2:42:08", "throughput": 19815.18, "total_tokens": 53028096} +{"current_steps": 16855, "total_steps": 78105, "loss": 0.2365, "lr": 4.798591022370532e-06, "epoch": 1.0789962230330965, "percentage": 21.58, "elapsed_time": "0:44:36", "remaining_time": "2:42:07", "throughput": 19816.06, "total_tokens": 53043968} +{"current_steps": 16860, "total_steps": 78105, "loss": 0.3099, "lr": 4.798371281248883e-06, "epoch": 1.0793163049740733, "percentage": 21.59, "elapsed_time": "0:44:37", "remaining_time": "2:42:06", "throughput": 19816.98, "total_tokens": 53060224} +{"current_steps": 16865, "total_steps": 78105, "loss": 0.3508, "lr": 4.798151425358543e-06, "epoch": 1.0796363869150503, "percentage": 21.59, "elapsed_time": "0:44:38", "remaining_time": "2:42:05", "throughput": 19817.98, "total_tokens": 53076544} +{"current_steps": 16870, "total_steps": 78105, "loss": 0.2995, "lr": 4.7979314547104915e-06, "epoch": 1.079956468856027, "percentage": 21.6, "elapsed_time": "0:44:38", "remaining_time": "2:42:03", "throughput": 19818.86, "total_tokens": 53092224} +{"current_steps": 16875, "total_steps": 78105, "loss": 0.3381, "lr": 4.797711369315713e-06, "epoch": 1.080276550797004, "percentage": 21.61, "elapsed_time": "0:44:39", "remaining_time": "2:42:02", "throughput": 19819.57, "total_tokens": 53106752} +{"current_steps": 16880, "total_steps": 78105, "loss": 0.3309, "lr": 4.797491169185197e-06, "epoch": 1.080596632737981, "percentage": 21.61, "elapsed_time": "0:44:40", "remaining_time": "2:42:01", "throughput": 19820.3, "total_tokens": 53121600} +{"current_steps": 16885, "total_steps": 78105, "loss": 0.2545, "lr": 4.797270854329938e-06, "epoch": 1.0809167146789578, "percentage": 21.62, "elapsed_time": "0:44:40", "remaining_time": "2:41:59", "throughput": 19820.91, "total_tokens": 53135936} +{"current_steps": 16890, "total_steps": 78105, "loss": 0.3368, "lr": 4.7970504247609405e-06, "epoch": 1.0812367966199348, "percentage": 21.62, "elapsed_time": "0:44:41", "remaining_time": "2:41:58", "throughput": 19821.79, "total_tokens": 53152000} +{"current_steps": 16895, "total_steps": 78105, "loss": 0.4705, "lr": 4.7968298804892085e-06, "epoch": 1.0815568785609115, "percentage": 21.63, "elapsed_time": "0:44:42", "remaining_time": "2:41:57", "throughput": 19822.63, "total_tokens": 53167424} +{"current_steps": 16900, "total_steps": 78105, "loss": 0.1764, "lr": 4.796609221525756e-06, "epoch": 1.0818769605018885, "percentage": 21.64, "elapsed_time": "0:44:42", "remaining_time": "2:41:56", "throughput": 19823.32, "total_tokens": 53182272} +{"current_steps": 16905, "total_steps": 78105, "loss": 0.3175, "lr": 4.7963884478816025e-06, "epoch": 1.0821970424428653, "percentage": 21.64, "elapsed_time": "0:44:43", "remaining_time": "2:41:55", "throughput": 19824.37, "total_tokens": 53199360} +{"current_steps": 16910, "total_steps": 78105, "loss": 0.3394, "lr": 4.79616755956777e-06, "epoch": 1.0825171243838423, "percentage": 21.65, "elapsed_time": "0:44:44", "remaining_time": "2:41:53", "throughput": 19825.16, "total_tokens": 53214912} +{"current_steps": 16915, "total_steps": 78105, "loss": 0.312, "lr": 4.795946556595291e-06, "epoch": 1.082837206324819, "percentage": 21.66, "elapsed_time": "0:44:44", "remaining_time": "2:41:52", "throughput": 19825.93, "total_tokens": 53230208} +{"current_steps": 16920, "total_steps": 78105, "loss": 0.298, "lr": 4.7957254389752e-06, "epoch": 1.083157288265796, "percentage": 21.66, "elapsed_time": "0:44:45", "remaining_time": "2:41:51", "throughput": 19826.72, "total_tokens": 53245312} +{"current_steps": 16925, "total_steps": 78105, "loss": 0.4834, "lr": 4.795504206718538e-06, "epoch": 1.083477370206773, "percentage": 21.67, "elapsed_time": "0:44:46", "remaining_time": "2:41:49", "throughput": 19827.49, "total_tokens": 53260352} +{"current_steps": 16930, "total_steps": 78105, "loss": 0.3298, "lr": 4.795282859836353e-06, "epoch": 1.0837974521477498, "percentage": 21.68, "elapsed_time": "0:44:46", "remaining_time": "2:41:48", "throughput": 19828.48, "total_tokens": 53276608} +{"current_steps": 16935, "total_steps": 78105, "loss": 0.2241, "lr": 4.7950613983396975e-06, "epoch": 1.0841175340887268, "percentage": 21.68, "elapsed_time": "0:44:47", "remaining_time": "2:41:47", "throughput": 19829.45, "total_tokens": 53292864} +{"current_steps": 16940, "total_steps": 78105, "loss": 0.3922, "lr": 4.794839822239631e-06, "epoch": 1.0844376160297036, "percentage": 21.69, "elapsed_time": "0:44:48", "remaining_time": "2:41:46", "throughput": 19830.26, "total_tokens": 53308288} +{"current_steps": 16945, "total_steps": 78105, "loss": 0.3077, "lr": 4.794618131547217e-06, "epoch": 1.0847576979706806, "percentage": 21.7, "elapsed_time": "0:44:48", "remaining_time": "2:41:45", "throughput": 19831.11, "total_tokens": 53324096} +{"current_steps": 16950, "total_steps": 78105, "loss": 0.3316, "lr": 4.7943963262735245e-06, "epoch": 1.0850777799116573, "percentage": 21.7, "elapsed_time": "0:44:49", "remaining_time": "2:41:44", "throughput": 19832.17, "total_tokens": 53341248} +{"current_steps": 16955, "total_steps": 78105, "loss": 0.3751, "lr": 4.794174406429632e-06, "epoch": 1.0853978618526343, "percentage": 21.71, "elapsed_time": "0:44:50", "remaining_time": "2:41:42", "throughput": 19833.08, "total_tokens": 53357184} +{"current_steps": 16960, "total_steps": 78105, "loss": 0.3232, "lr": 4.793952372026619e-06, "epoch": 1.085717943793611, "percentage": 21.71, "elapsed_time": "0:44:50", "remaining_time": "2:41:41", "throughput": 19833.7, "total_tokens": 53371392} +{"current_steps": 16965, "total_steps": 78105, "loss": 0.2409, "lr": 4.7937302230755735e-06, "epoch": 1.086038025734588, "percentage": 21.72, "elapsed_time": "0:44:51", "remaining_time": "2:41:40", "throughput": 19834.7, "total_tokens": 53387904} +{"current_steps": 16970, "total_steps": 78105, "loss": 0.3087, "lr": 4.793507959587588e-06, "epoch": 1.0863581076755648, "percentage": 21.73, "elapsed_time": "0:44:52", "remaining_time": "2:41:39", "throughput": 19835.6, "total_tokens": 53403968} +{"current_steps": 16975, "total_steps": 78105, "loss": 0.2574, "lr": 4.793285581573762e-06, "epoch": 1.0866781896165418, "percentage": 21.73, "elapsed_time": "0:44:53", "remaining_time": "2:41:38", "throughput": 19836.44, "total_tokens": 53419776} +{"current_steps": 16980, "total_steps": 78105, "loss": 0.3332, "lr": 4.793063089045199e-06, "epoch": 1.0869982715575188, "percentage": 21.74, "elapsed_time": "0:44:53", "remaining_time": "2:41:36", "throughput": 19837.19, "total_tokens": 53434816} +{"current_steps": 16985, "total_steps": 78105, "loss": 0.3752, "lr": 4.792840482013009e-06, "epoch": 1.0873183534984956, "percentage": 21.75, "elapsed_time": "0:44:54", "remaining_time": "2:41:35", "throughput": 19838.05, "total_tokens": 53450688} +{"current_steps": 16990, "total_steps": 78105, "loss": 0.5535, "lr": 4.792617760488308e-06, "epoch": 1.0876384354394726, "percentage": 21.75, "elapsed_time": "0:44:55", "remaining_time": "2:41:34", "throughput": 19838.92, "total_tokens": 53466240} +{"current_steps": 16995, "total_steps": 78105, "loss": 0.3152, "lr": 4.792394924482219e-06, "epoch": 1.0879585173804494, "percentage": 21.76, "elapsed_time": "0:44:55", "remaining_time": "2:41:33", "throughput": 19839.74, "total_tokens": 53481728} +{"current_steps": 17000, "total_steps": 78105, "loss": 0.4126, "lr": 4.792171974005868e-06, "epoch": 1.0882785993214263, "percentage": 21.77, "elapsed_time": "0:44:56", "remaining_time": "2:41:31", "throughput": 19840.48, "total_tokens": 53496896} +{"current_steps": 17005, "total_steps": 78105, "loss": 0.3726, "lr": 4.791948909070388e-06, "epoch": 1.0885986812624031, "percentage": 21.77, "elapsed_time": "0:44:57", "remaining_time": "2:41:30", "throughput": 19841.23, "total_tokens": 53512192} +{"current_steps": 17010, "total_steps": 78105, "loss": 0.3589, "lr": 4.7917257296869165e-06, "epoch": 1.08891876320338, "percentage": 21.78, "elapsed_time": "0:44:57", "remaining_time": "2:41:29", "throughput": 19841.99, "total_tokens": 53527424} +{"current_steps": 17015, "total_steps": 78105, "loss": 0.2884, "lr": 4.7915024358666005e-06, "epoch": 1.0892388451443569, "percentage": 21.78, "elapsed_time": "0:44:58", "remaining_time": "2:41:27", "throughput": 19842.68, "total_tokens": 53542016} +{"current_steps": 17020, "total_steps": 78105, "loss": 0.2984, "lr": 4.791279027620589e-06, "epoch": 1.0895589270853339, "percentage": 21.79, "elapsed_time": "0:44:59", "remaining_time": "2:41:26", "throughput": 19843.55, "total_tokens": 53557824} +{"current_steps": 17025, "total_steps": 78105, "loss": 0.2232, "lr": 4.7910555049600374e-06, "epoch": 1.0898790090263106, "percentage": 21.8, "elapsed_time": "0:44:59", "remaining_time": "2:41:25", "throughput": 19844.14, "total_tokens": 53571904} +{"current_steps": 17030, "total_steps": 78105, "loss": 0.3296, "lr": 4.790831867896107e-06, "epoch": 1.0901990909672876, "percentage": 21.8, "elapsed_time": "0:45:00", "remaining_time": "2:41:24", "throughput": 19845.14, "total_tokens": 53588800} +{"current_steps": 17035, "total_steps": 78105, "loss": 0.2592, "lr": 4.790608116439966e-06, "epoch": 1.0905191729082646, "percentage": 21.81, "elapsed_time": "0:45:01", "remaining_time": "2:41:23", "throughput": 19846.21, "total_tokens": 53605824} +{"current_steps": 17040, "total_steps": 78105, "loss": 0.3961, "lr": 4.790384250602788e-06, "epoch": 1.0908392548492414, "percentage": 21.82, "elapsed_time": "0:45:01", "remaining_time": "2:41:22", "throughput": 19847.09, "total_tokens": 53622016} +{"current_steps": 17045, "total_steps": 78105, "loss": 0.2924, "lr": 4.7901602703957494e-06, "epoch": 1.0911593367902184, "percentage": 21.82, "elapsed_time": "0:45:02", "remaining_time": "2:41:20", "throughput": 19847.92, "total_tokens": 53637888} +{"current_steps": 17050, "total_steps": 78105, "loss": 0.3298, "lr": 4.7899361758300365e-06, "epoch": 1.0914794187311951, "percentage": 21.83, "elapsed_time": "0:45:03", "remaining_time": "2:41:19", "throughput": 19848.74, "total_tokens": 53653760} +{"current_steps": 17055, "total_steps": 78105, "loss": 0.2407, "lr": 4.789711966916839e-06, "epoch": 1.0917995006721721, "percentage": 21.84, "elapsed_time": "0:45:03", "remaining_time": "2:41:18", "throughput": 19849.4, "total_tokens": 53668160} +{"current_steps": 17060, "total_steps": 78105, "loss": 0.3244, "lr": 4.7894876436673525e-06, "epoch": 1.092119582613149, "percentage": 21.84, "elapsed_time": "0:45:04", "remaining_time": "2:41:17", "throughput": 19850.14, "total_tokens": 53683136} +{"current_steps": 17065, "total_steps": 78105, "loss": 0.3165, "lr": 4.789263206092778e-06, "epoch": 1.0924396645541259, "percentage": 21.85, "elapsed_time": "0:45:05", "remaining_time": "2:41:15", "throughput": 19850.97, "total_tokens": 53698752} +{"current_steps": 17070, "total_steps": 78105, "loss": 0.2705, "lr": 4.789038654204323e-06, "epoch": 1.0927597464951027, "percentage": 21.86, "elapsed_time": "0:45:05", "remaining_time": "2:41:14", "throughput": 19851.79, "total_tokens": 53714496} +{"current_steps": 17075, "total_steps": 78105, "loss": 0.2991, "lr": 4.788813988013202e-06, "epoch": 1.0930798284360796, "percentage": 21.86, "elapsed_time": "0:45:06", "remaining_time": "2:41:13", "throughput": 19852.57, "total_tokens": 53729984} +{"current_steps": 17080, "total_steps": 78105, "loss": 0.3435, "lr": 4.788589207530632e-06, "epoch": 1.0933999103770566, "percentage": 21.87, "elapsed_time": "0:45:07", "remaining_time": "2:41:12", "throughput": 19853.43, "total_tokens": 53746048} +{"current_steps": 17085, "total_steps": 78105, "loss": 0.2946, "lr": 4.788364312767837e-06, "epoch": 1.0937199923180334, "percentage": 21.87, "elapsed_time": "0:45:07", "remaining_time": "2:41:11", "throughput": 19854.23, "total_tokens": 53761728} +{"current_steps": 17090, "total_steps": 78105, "loss": 0.3548, "lr": 4.788139303736049e-06, "epoch": 1.0940400742590104, "percentage": 21.88, "elapsed_time": "0:45:08", "remaining_time": "2:41:09", "throughput": 19854.98, "total_tokens": 53776704} +{"current_steps": 17095, "total_steps": 78105, "loss": 0.2661, "lr": 4.787914180446502e-06, "epoch": 1.0943601561999872, "percentage": 21.89, "elapsed_time": "0:45:09", "remaining_time": "2:41:08", "throughput": 19856.07, "total_tokens": 53794048} +{"current_steps": 17100, "total_steps": 78105, "loss": 0.3857, "lr": 4.7876889429104374e-06, "epoch": 1.0946802381409642, "percentage": 21.89, "elapsed_time": "0:45:09", "remaining_time": "2:41:07", "throughput": 19856.75, "total_tokens": 53808576} +{"current_steps": 17105, "total_steps": 78105, "loss": 0.3105, "lr": 4.787463591139104e-06, "epoch": 1.095000320081941, "percentage": 21.9, "elapsed_time": "0:45:10", "remaining_time": "2:41:06", "throughput": 19857.45, "total_tokens": 53823360} +{"current_steps": 17110, "total_steps": 78105, "loss": 0.4111, "lr": 4.787238125143754e-06, "epoch": 1.095320402022918, "percentage": 21.91, "elapsed_time": "0:45:11", "remaining_time": "2:41:04", "throughput": 19858.25, "total_tokens": 53838912} +{"current_steps": 17115, "total_steps": 78105, "loss": 0.4905, "lr": 4.787012544935646e-06, "epoch": 1.0956404839638947, "percentage": 21.91, "elapsed_time": "0:45:11", "remaining_time": "2:41:03", "throughput": 19859.03, "total_tokens": 53854336} +{"current_steps": 17120, "total_steps": 78105, "loss": 0.2755, "lr": 4.786786850526044e-06, "epoch": 1.0959605659048717, "percentage": 21.92, "elapsed_time": "0:45:12", "remaining_time": "2:41:02", "throughput": 19860.16, "total_tokens": 53871680} +{"current_steps": 17125, "total_steps": 78105, "loss": 0.2438, "lr": 4.7865610419262175e-06, "epoch": 1.0962806478458484, "percentage": 21.93, "elapsed_time": "0:45:13", "remaining_time": "2:41:01", "throughput": 19860.99, "total_tokens": 53887360} +{"current_steps": 17130, "total_steps": 78105, "loss": 0.2652, "lr": 4.786335119147443e-06, "epoch": 1.0966007297868254, "percentage": 21.93, "elapsed_time": "0:45:13", "remaining_time": "2:41:00", "throughput": 19861.67, "total_tokens": 53902144} +{"current_steps": 17135, "total_steps": 78105, "loss": 0.5099, "lr": 4.786109082201003e-06, "epoch": 1.0969208117278022, "percentage": 21.94, "elapsed_time": "0:45:14", "remaining_time": "2:40:59", "throughput": 19862.75, "total_tokens": 53919488} +{"current_steps": 17140, "total_steps": 78105, "loss": 0.4236, "lr": 4.7858829310981815e-06, "epoch": 1.0972408936687792, "percentage": 21.94, "elapsed_time": "0:45:15", "remaining_time": "2:40:58", "throughput": 19863.68, "total_tokens": 53936000} +{"current_steps": 17145, "total_steps": 78105, "loss": 0.3126, "lr": 4.785656665850274e-06, "epoch": 1.0975609756097562, "percentage": 21.95, "elapsed_time": "0:45:16", "remaining_time": "2:40:57", "throughput": 19864.82, "total_tokens": 53953920} +{"current_steps": 17150, "total_steps": 78105, "loss": 0.401, "lr": 4.7854302864685785e-06, "epoch": 1.097881057550733, "percentage": 21.96, "elapsed_time": "0:45:16", "remaining_time": "2:40:55", "throughput": 19865.73, "total_tokens": 53970240} +{"current_steps": 17155, "total_steps": 78105, "loss": 0.3472, "lr": 4.7852037929643976e-06, "epoch": 1.09820113949171, "percentage": 21.96, "elapsed_time": "0:45:17", "remaining_time": "2:40:54", "throughput": 19866.52, "total_tokens": 53985984} +{"current_steps": 17160, "total_steps": 78105, "loss": 0.2382, "lr": 4.784977185349044e-06, "epoch": 1.0985212214326867, "percentage": 21.97, "elapsed_time": "0:45:18", "remaining_time": "2:40:53", "throughput": 19867.5, "total_tokens": 54002432} +{"current_steps": 17165, "total_steps": 78105, "loss": 0.2789, "lr": 4.784750463633831e-06, "epoch": 1.0988413033736637, "percentage": 21.98, "elapsed_time": "0:45:18", "remaining_time": "2:40:52", "throughput": 19868.35, "total_tokens": 54018304} +{"current_steps": 17170, "total_steps": 78105, "loss": 0.2712, "lr": 4.784523627830081e-06, "epoch": 1.0991613853146405, "percentage": 21.98, "elapsed_time": "0:45:19", "remaining_time": "2:40:51", "throughput": 19868.93, "total_tokens": 54032384} +{"current_steps": 17175, "total_steps": 78105, "loss": 0.3503, "lr": 4.7842966779491205e-06, "epoch": 1.0994814672556175, "percentage": 21.99, "elapsed_time": "0:45:20", "remaining_time": "2:40:49", "throughput": 19869.71, "total_tokens": 54048128} +{"current_steps": 17180, "total_steps": 78105, "loss": 0.3664, "lr": 4.7840696140022825e-06, "epoch": 1.0998015491965942, "percentage": 22.0, "elapsed_time": "0:45:20", "remaining_time": "2:40:48", "throughput": 19870.42, "total_tokens": 54063040} +{"current_steps": 17185, "total_steps": 78105, "loss": 0.2494, "lr": 4.783842436000905e-06, "epoch": 1.1001216311375712, "percentage": 22.0, "elapsed_time": "0:45:21", "remaining_time": "2:40:47", "throughput": 19871.28, "total_tokens": 54078720} +{"current_steps": 17190, "total_steps": 78105, "loss": 0.3827, "lr": 4.783615143956332e-06, "epoch": 1.1004417130785482, "percentage": 22.01, "elapsed_time": "0:45:22", "remaining_time": "2:40:46", "throughput": 19871.91, "total_tokens": 54093248} +{"current_steps": 17195, "total_steps": 78105, "loss": 0.2817, "lr": 4.7833877378799145e-06, "epoch": 1.100761795019525, "percentage": 22.02, "elapsed_time": "0:45:22", "remaining_time": "2:40:44", "throughput": 19872.76, "total_tokens": 54109120} +{"current_steps": 17200, "total_steps": 78105, "loss": 0.3777, "lr": 4.783160217783007e-06, "epoch": 1.101081876960502, "percentage": 22.02, "elapsed_time": "0:45:23", "remaining_time": "2:40:43", "throughput": 19873.58, "total_tokens": 54124544} +{"current_steps": 17205, "total_steps": 78105, "loss": 0.2661, "lr": 4.7829325836769705e-06, "epoch": 1.1014019589014787, "percentage": 22.03, "elapsed_time": "0:45:24", "remaining_time": "2:40:42", "throughput": 19874.3, "total_tokens": 54139456} +{"current_steps": 17210, "total_steps": 78105, "loss": 0.3123, "lr": 4.782704835573172e-06, "epoch": 1.1017220408424557, "percentage": 22.03, "elapsed_time": "0:45:24", "remaining_time": "2:40:41", "throughput": 19874.96, "total_tokens": 54154112} +{"current_steps": 17215, "total_steps": 78105, "loss": 0.3453, "lr": 4.782476973482984e-06, "epoch": 1.1020421227834325, "percentage": 22.04, "elapsed_time": "0:45:25", "remaining_time": "2:40:39", "throughput": 19875.72, "total_tokens": 54169664} +{"current_steps": 17220, "total_steps": 78105, "loss": 0.2779, "lr": 4.782248997417785e-06, "epoch": 1.1023622047244095, "percentage": 22.05, "elapsed_time": "0:45:26", "remaining_time": "2:40:38", "throughput": 19876.73, "total_tokens": 54186752} +{"current_steps": 17225, "total_steps": 78105, "loss": 0.2646, "lr": 4.782020907388959e-06, "epoch": 1.1026822866653863, "percentage": 22.05, "elapsed_time": "0:45:26", "remaining_time": "2:40:37", "throughput": 19877.91, "total_tokens": 54204928} +{"current_steps": 17230, "total_steps": 78105, "loss": 0.3441, "lr": 4.781792703407896e-06, "epoch": 1.1030023686063632, "percentage": 22.06, "elapsed_time": "0:45:27", "remaining_time": "2:40:36", "throughput": 19878.64, "total_tokens": 54220160} +{"current_steps": 17235, "total_steps": 78105, "loss": 0.4479, "lr": 4.78156438548599e-06, "epoch": 1.10332245054734, "percentage": 22.07, "elapsed_time": "0:45:28", "remaining_time": "2:40:35", "throughput": 19879.41, "total_tokens": 54235392} +{"current_steps": 17240, "total_steps": 78105, "loss": 0.3998, "lr": 4.781335953634643e-06, "epoch": 1.103642532488317, "percentage": 22.07, "elapsed_time": "0:45:28", "remaining_time": "2:40:34", "throughput": 19880.2, "total_tokens": 54250560} +{"current_steps": 17245, "total_steps": 78105, "loss": 0.3057, "lr": 4.781107407865262e-06, "epoch": 1.103962614429294, "percentage": 22.08, "elapsed_time": "0:45:29", "remaining_time": "2:40:33", "throughput": 19881.11, "total_tokens": 54267072} +{"current_steps": 17250, "total_steps": 78105, "loss": 0.3702, "lr": 4.780878748189259e-06, "epoch": 1.1042826963702708, "percentage": 22.09, "elapsed_time": "0:45:30", "remaining_time": "2:40:31", "throughput": 19882.03, "total_tokens": 54283584} +{"current_steps": 17255, "total_steps": 78105, "loss": 0.2723, "lr": 4.780649974618051e-06, "epoch": 1.1046027783112478, "percentage": 22.09, "elapsed_time": "0:45:30", "remaining_time": "2:40:30", "throughput": 19882.89, "total_tokens": 54299584} +{"current_steps": 17260, "total_steps": 78105, "loss": 0.3932, "lr": 4.780421087163064e-06, "epoch": 1.1049228602522245, "percentage": 22.1, "elapsed_time": "0:45:31", "remaining_time": "2:40:29", "throughput": 19883.77, "total_tokens": 54315968} +{"current_steps": 17265, "total_steps": 78105, "loss": 0.4483, "lr": 4.7801920858357255e-06, "epoch": 1.1052429421932015, "percentage": 22.1, "elapsed_time": "0:45:32", "remaining_time": "2:40:28", "throughput": 19884.52, "total_tokens": 54331392} +{"current_steps": 17270, "total_steps": 78105, "loss": 0.3162, "lr": 4.779962970647471e-06, "epoch": 1.1055630241341783, "percentage": 22.11, "elapsed_time": "0:45:33", "remaining_time": "2:40:27", "throughput": 19885.27, "total_tokens": 54346624} +{"current_steps": 17275, "total_steps": 78105, "loss": 0.3045, "lr": 4.779733741609742e-06, "epoch": 1.1058831060751553, "percentage": 22.12, "elapsed_time": "0:45:33", "remaining_time": "2:40:26", "throughput": 19886.28, "total_tokens": 54363904} +{"current_steps": 17280, "total_steps": 78105, "loss": 0.2935, "lr": 4.779504398733985e-06, "epoch": 1.106203188016132, "percentage": 22.12, "elapsed_time": "0:45:34", "remaining_time": "2:40:24", "throughput": 19886.87, "total_tokens": 54378240} +{"current_steps": 17285, "total_steps": 78105, "loss": 0.3299, "lr": 4.779274942031651e-06, "epoch": 1.106523269957109, "percentage": 22.13, "elapsed_time": "0:45:35", "remaining_time": "2:40:23", "throughput": 19887.7, "total_tokens": 54393856} +{"current_steps": 17290, "total_steps": 78105, "loss": 0.3736, "lr": 4.7790453715142e-06, "epoch": 1.1068433518980858, "percentage": 22.14, "elapsed_time": "0:45:35", "remaining_time": "2:40:22", "throughput": 19888.57, "total_tokens": 54409792} +{"current_steps": 17295, "total_steps": 78105, "loss": 0.3891, "lr": 4.778815687193093e-06, "epoch": 1.1071634338390628, "percentage": 22.14, "elapsed_time": "0:45:36", "remaining_time": "2:40:21", "throughput": 19889.34, "total_tokens": 54425024} +{"current_steps": 17300, "total_steps": 78105, "loss": 0.3918, "lr": 4.778585889079801e-06, "epoch": 1.1074835157800398, "percentage": 22.15, "elapsed_time": "0:45:37", "remaining_time": "2:40:20", "throughput": 19890.1, "total_tokens": 54440384} +{"current_steps": 17305, "total_steps": 78105, "loss": 0.4742, "lr": 4.778355977185798e-06, "epoch": 1.1078035977210166, "percentage": 22.16, "elapsed_time": "0:45:37", "remaining_time": "2:40:18", "throughput": 19890.94, "total_tokens": 54456064} +{"current_steps": 17310, "total_steps": 78105, "loss": 0.4649, "lr": 4.778125951522565e-06, "epoch": 1.1081236796619935, "percentage": 22.16, "elapsed_time": "0:45:38", "remaining_time": "2:40:17", "throughput": 19891.65, "total_tokens": 54470976} +{"current_steps": 17315, "total_steps": 78105, "loss": 0.4483, "lr": 4.77789581210159e-06, "epoch": 1.1084437616029703, "percentage": 22.17, "elapsed_time": "0:45:39", "remaining_time": "2:40:16", "throughput": 19892.51, "total_tokens": 54486720} +{"current_steps": 17320, "total_steps": 78105, "loss": 0.2347, "lr": 4.777665558934361e-06, "epoch": 1.1087638435439473, "percentage": 22.18, "elapsed_time": "0:45:39", "remaining_time": "2:40:15", "throughput": 19893.33, "total_tokens": 54502592} +{"current_steps": 17325, "total_steps": 78105, "loss": 0.3618, "lr": 4.7774351920323795e-06, "epoch": 1.109083925484924, "percentage": 22.18, "elapsed_time": "0:45:40", "remaining_time": "2:40:13", "throughput": 19893.99, "total_tokens": 54517376} +{"current_steps": 17330, "total_steps": 78105, "loss": 0.3037, "lr": 4.777204711407146e-06, "epoch": 1.109404007425901, "percentage": 22.19, "elapsed_time": "0:45:41", "remaining_time": "2:40:12", "throughput": 19894.83, "total_tokens": 54533248} +{"current_steps": 17335, "total_steps": 78105, "loss": 0.3779, "lr": 4.776974117070172e-06, "epoch": 1.1097240893668778, "percentage": 22.19, "elapsed_time": "0:45:41", "remaining_time": "2:40:11", "throughput": 19895.62, "total_tokens": 54548800} +{"current_steps": 17340, "total_steps": 78105, "loss": 0.3112, "lr": 4.77674340903297e-06, "epoch": 1.1100441713078548, "percentage": 22.2, "elapsed_time": "0:45:42", "remaining_time": "2:40:10", "throughput": 19896.4, "total_tokens": 54564352} +{"current_steps": 17345, "total_steps": 78105, "loss": 0.3762, "lr": 4.776512587307062e-06, "epoch": 1.1103642532488318, "percentage": 22.21, "elapsed_time": "0:45:43", "remaining_time": "2:40:09", "throughput": 19897.29, "total_tokens": 54580480} +{"current_steps": 17350, "total_steps": 78105, "loss": 0.2193, "lr": 4.776281651903972e-06, "epoch": 1.1106843351898086, "percentage": 22.21, "elapsed_time": "0:45:43", "remaining_time": "2:40:07", "throughput": 19897.8, "total_tokens": 54594368} +{"current_steps": 17355, "total_steps": 78105, "loss": 0.3551, "lr": 4.776050602835234e-06, "epoch": 1.1110044171307856, "percentage": 22.22, "elapsed_time": "0:45:44", "remaining_time": "2:40:06", "throughput": 19898.46, "total_tokens": 54609216} +{"current_steps": 17360, "total_steps": 78105, "loss": 0.3503, "lr": 4.775819440112385e-06, "epoch": 1.1113244990717623, "percentage": 22.23, "elapsed_time": "0:45:45", "remaining_time": "2:40:05", "throughput": 19899.23, "total_tokens": 54624640} +{"current_steps": 17365, "total_steps": 78105, "loss": 0.3011, "lr": 4.775588163746966e-06, "epoch": 1.1116445810127393, "percentage": 22.23, "elapsed_time": "0:45:45", "remaining_time": "2:40:04", "throughput": 19900.08, "total_tokens": 54640384} +{"current_steps": 17370, "total_steps": 78105, "loss": 0.3279, "lr": 4.775356773750528e-06, "epoch": 1.111964662953716, "percentage": 22.24, "elapsed_time": "0:45:46", "remaining_time": "2:40:02", "throughput": 19900.87, "total_tokens": 54655616} +{"current_steps": 17375, "total_steps": 78105, "loss": 0.3209, "lr": 4.7751252701346255e-06, "epoch": 1.112284744894693, "percentage": 22.25, "elapsed_time": "0:45:47", "remaining_time": "2:40:01", "throughput": 19901.64, "total_tokens": 54670656} +{"current_steps": 17380, "total_steps": 78105, "loss": 0.3393, "lr": 4.7748936529108154e-06, "epoch": 1.1126048268356699, "percentage": 22.25, "elapsed_time": "0:45:47", "remaining_time": "2:40:00", "throughput": 19902.46, "total_tokens": 54686464} +{"current_steps": 17385, "total_steps": 78105, "loss": 0.4474, "lr": 4.774661922090667e-06, "epoch": 1.1129249087766468, "percentage": 22.26, "elapsed_time": "0:45:48", "remaining_time": "2:39:59", "throughput": 19903.4, "total_tokens": 54703232} +{"current_steps": 17390, "total_steps": 78105, "loss": 0.2873, "lr": 4.774430077685751e-06, "epoch": 1.1132449907176236, "percentage": 22.26, "elapsed_time": "0:45:49", "remaining_time": "2:39:58", "throughput": 19904.49, "total_tokens": 54720512} +{"current_steps": 17395, "total_steps": 78105, "loss": 0.3057, "lr": 4.774198119707644e-06, "epoch": 1.1135650726586006, "percentage": 22.27, "elapsed_time": "0:45:49", "remaining_time": "2:39:57", "throughput": 19905.51, "total_tokens": 54737216} +{"current_steps": 17400, "total_steps": 78105, "loss": 0.2909, "lr": 4.7739660481679285e-06, "epoch": 1.1138851545995774, "percentage": 22.28, "elapsed_time": "0:45:50", "remaining_time": "2:39:55", "throughput": 19906.29, "total_tokens": 54752640} +{"current_steps": 17405, "total_steps": 78105, "loss": 0.3757, "lr": 4.773733863078193e-06, "epoch": 1.1142052365405544, "percentage": 22.28, "elapsed_time": "0:45:51", "remaining_time": "2:39:54", "throughput": 19907.09, "total_tokens": 54767936} +{"current_steps": 17410, "total_steps": 78105, "loss": 0.2134, "lr": 4.773501564450032e-06, "epoch": 1.1145253184815314, "percentage": 22.29, "elapsed_time": "0:45:51", "remaining_time": "2:39:53", "throughput": 19907.86, "total_tokens": 54783488} +{"current_steps": 17415, "total_steps": 78105, "loss": 0.2501, "lr": 4.773269152295045e-06, "epoch": 1.1148454004225081, "percentage": 22.3, "elapsed_time": "0:45:52", "remaining_time": "2:39:52", "throughput": 19908.82, "total_tokens": 54800192} +{"current_steps": 17420, "total_steps": 78105, "loss": 0.3489, "lr": 4.773036626624838e-06, "epoch": 1.1151654823634851, "percentage": 22.3, "elapsed_time": "0:45:53", "remaining_time": "2:39:51", "throughput": 19909.69, "total_tokens": 54815936} +{"current_steps": 17425, "total_steps": 78105, "loss": 0.277, "lr": 4.772803987451021e-06, "epoch": 1.1154855643044619, "percentage": 22.31, "elapsed_time": "0:45:53", "remaining_time": "2:39:50", "throughput": 19910.51, "total_tokens": 54831488} +{"current_steps": 17430, "total_steps": 78105, "loss": 0.2835, "lr": 4.772571234785212e-06, "epoch": 1.1158056462454389, "percentage": 22.32, "elapsed_time": "0:45:54", "remaining_time": "2:39:48", "throughput": 19911.3, "total_tokens": 54846784} +{"current_steps": 17435, "total_steps": 78105, "loss": 0.3408, "lr": 4.7723383686390324e-06, "epoch": 1.1161257281864156, "percentage": 22.32, "elapsed_time": "0:45:55", "remaining_time": "2:39:47", "throughput": 19912.21, "total_tokens": 54863040} +{"current_steps": 17440, "total_steps": 78105, "loss": 0.3138, "lr": 4.772105389024111e-06, "epoch": 1.1164458101273926, "percentage": 22.33, "elapsed_time": "0:45:55", "remaining_time": "2:39:46", "throughput": 19912.88, "total_tokens": 54877696} +{"current_steps": 17445, "total_steps": 78105, "loss": 0.3693, "lr": 4.771872295952082e-06, "epoch": 1.1167658920683694, "percentage": 22.34, "elapsed_time": "0:45:56", "remaining_time": "2:39:45", "throughput": 19913.6, "total_tokens": 54892672} +{"current_steps": 17450, "total_steps": 78105, "loss": 0.3598, "lr": 4.771639089434584e-06, "epoch": 1.1170859740093464, "percentage": 22.34, "elapsed_time": "0:45:57", "remaining_time": "2:39:43", "throughput": 19914.33, "total_tokens": 54907712} +{"current_steps": 17455, "total_steps": 78105, "loss": 0.2294, "lr": 4.771405769483262e-06, "epoch": 1.1174060559503234, "percentage": 22.35, "elapsed_time": "0:45:57", "remaining_time": "2:39:42", "throughput": 19915.08, "total_tokens": 54923072} +{"current_steps": 17460, "total_steps": 78105, "loss": 0.2831, "lr": 4.771172336109768e-06, "epoch": 1.1177261378913002, "percentage": 22.35, "elapsed_time": "0:45:58", "remaining_time": "2:39:41", "throughput": 19915.84, "total_tokens": 54938240} +{"current_steps": 17465, "total_steps": 78105, "loss": 0.4827, "lr": 4.7709387893257565e-06, "epoch": 1.1180462198322771, "percentage": 22.36, "elapsed_time": "0:45:59", "remaining_time": "2:39:40", "throughput": 19916.64, "total_tokens": 54953984} +{"current_steps": 17470, "total_steps": 78105, "loss": 0.3294, "lr": 4.770705129142892e-06, "epoch": 1.118366301773254, "percentage": 22.37, "elapsed_time": "0:45:59", "remaining_time": "2:39:38", "throughput": 19917.47, "total_tokens": 54969600} +{"current_steps": 17475, "total_steps": 78105, "loss": 0.4694, "lr": 4.77047135557284e-06, "epoch": 1.118686383714231, "percentage": 22.37, "elapsed_time": "0:46:00", "remaining_time": "2:39:37", "throughput": 19918.34, "total_tokens": 54985216} +{"current_steps": 17480, "total_steps": 78105, "loss": 0.3939, "lr": 4.770237468627276e-06, "epoch": 1.1190064656552077, "percentage": 22.38, "elapsed_time": "0:46:01", "remaining_time": "2:39:36", "throughput": 19919.06, "total_tokens": 55000064} +{"current_steps": 17485, "total_steps": 78105, "loss": 0.5221, "lr": 4.770003468317877e-06, "epoch": 1.1193265475961847, "percentage": 22.39, "elapsed_time": "0:46:01", "remaining_time": "2:39:35", "throughput": 19919.89, "total_tokens": 55015808} +{"current_steps": 17490, "total_steps": 78105, "loss": 0.287, "lr": 4.769769354656329e-06, "epoch": 1.1196466295371614, "percentage": 22.39, "elapsed_time": "0:46:02", "remaining_time": "2:39:34", "throughput": 19920.58, "total_tokens": 55030848} +{"current_steps": 17495, "total_steps": 78105, "loss": 0.258, "lr": 4.769535127654323e-06, "epoch": 1.1199667114781384, "percentage": 22.4, "elapsed_time": "0:46:03", "remaining_time": "2:39:32", "throughput": 19921.34, "total_tokens": 55046080} +{"current_steps": 17500, "total_steps": 78105, "loss": 0.2615, "lr": 4.769300787323553e-06, "epoch": 1.1202867934191152, "percentage": 22.41, "elapsed_time": "0:46:03", "remaining_time": "2:39:31", "throughput": 19922.21, "total_tokens": 55062208} +{"current_steps": 17505, "total_steps": 78105, "loss": 0.3679, "lr": 4.769066333675723e-06, "epoch": 1.1206068753600922, "percentage": 22.41, "elapsed_time": "0:46:04", "remaining_time": "2:39:30", "throughput": 19922.99, "total_tokens": 55077696} +{"current_steps": 17510, "total_steps": 78105, "loss": 0.2117, "lr": 4.768831766722539e-06, "epoch": 1.1209269573010692, "percentage": 22.42, "elapsed_time": "0:46:05", "remaining_time": "2:39:29", "throughput": 19923.72, "total_tokens": 55093184} +{"current_steps": 17515, "total_steps": 78105, "loss": 0.2977, "lr": 4.768597086475715e-06, "epoch": 1.121247039242046, "percentage": 22.42, "elapsed_time": "0:46:05", "remaining_time": "2:39:28", "throughput": 19924.63, "total_tokens": 55109440} +{"current_steps": 17520, "total_steps": 78105, "loss": 0.2157, "lr": 4.768362292946968e-06, "epoch": 1.121567121183023, "percentage": 22.43, "elapsed_time": "0:46:06", "remaining_time": "2:39:26", "throughput": 19925.24, "total_tokens": 55123968} +{"current_steps": 17525, "total_steps": 78105, "loss": 0.245, "lr": 4.768127386148025e-06, "epoch": 1.1218872031239997, "percentage": 22.44, "elapsed_time": "0:46:07", "remaining_time": "2:39:25", "throughput": 19926.19, "total_tokens": 55140416} +{"current_steps": 17530, "total_steps": 78105, "loss": 0.3131, "lr": 4.767892366090614e-06, "epoch": 1.1222072850649767, "percentage": 22.44, "elapsed_time": "0:46:07", "remaining_time": "2:39:24", "throughput": 19927.02, "total_tokens": 55156224} +{"current_steps": 17535, "total_steps": 78105, "loss": 0.4164, "lr": 4.767657232786471e-06, "epoch": 1.1225273670059535, "percentage": 22.45, "elapsed_time": "0:46:08", "remaining_time": "2:39:23", "throughput": 19927.79, "total_tokens": 55171584} +{"current_steps": 17540, "total_steps": 78105, "loss": 0.4182, "lr": 4.767421986247338e-06, "epoch": 1.1228474489469304, "percentage": 22.46, "elapsed_time": "0:46:09", "remaining_time": "2:39:22", "throughput": 19928.48, "total_tokens": 55186624} +{"current_steps": 17545, "total_steps": 78105, "loss": 0.5068, "lr": 4.767186626484962e-06, "epoch": 1.1231675308879072, "percentage": 22.46, "elapsed_time": "0:46:09", "remaining_time": "2:39:20", "throughput": 19929.36, "total_tokens": 55202944} +{"current_steps": 17550, "total_steps": 78105, "loss": 0.3982, "lr": 4.766951153511095e-06, "epoch": 1.1234876128288842, "percentage": 22.47, "elapsed_time": "0:46:10", "remaining_time": "2:39:19", "throughput": 19930.31, "total_tokens": 55219520} +{"current_steps": 17555, "total_steps": 78105, "loss": 0.3083, "lr": 4.766715567337494e-06, "epoch": 1.123807694769861, "percentage": 22.48, "elapsed_time": "0:46:11", "remaining_time": "2:39:18", "throughput": 19931.08, "total_tokens": 55235008} +{"current_steps": 17560, "total_steps": 78105, "loss": 0.2455, "lr": 4.766479867975926e-06, "epoch": 1.124127776710838, "percentage": 22.48, "elapsed_time": "0:46:11", "remaining_time": "2:39:17", "throughput": 19931.9, "total_tokens": 55251008} +{"current_steps": 17565, "total_steps": 78105, "loss": 0.4543, "lr": 4.766244055438159e-06, "epoch": 1.124447858651815, "percentage": 22.49, "elapsed_time": "0:46:12", "remaining_time": "2:39:16", "throughput": 19932.61, "total_tokens": 55265728} +{"current_steps": 17570, "total_steps": 78105, "loss": 0.3649, "lr": 4.766008129735968e-06, "epoch": 1.1247679405927917, "percentage": 22.5, "elapsed_time": "0:46:13", "remaining_time": "2:39:15", "throughput": 19933.51, "total_tokens": 55281984} +{"current_steps": 17575, "total_steps": 78105, "loss": 0.3141, "lr": 4.765772090881135e-06, "epoch": 1.1250880225337687, "percentage": 22.5, "elapsed_time": "0:46:13", "remaining_time": "2:39:13", "throughput": 19934.2, "total_tokens": 55297152} +{"current_steps": 17580, "total_steps": 78105, "loss": 0.2546, "lr": 4.765535938885444e-06, "epoch": 1.1254081044747455, "percentage": 22.51, "elapsed_time": "0:46:14", "remaining_time": "2:39:12", "throughput": 19935.0, "total_tokens": 55312768} +{"current_steps": 17585, "total_steps": 78105, "loss": 0.3107, "lr": 4.76529967376069e-06, "epoch": 1.1257281864157225, "percentage": 22.51, "elapsed_time": "0:46:15", "remaining_time": "2:39:11", "throughput": 19935.85, "total_tokens": 55328832} +{"current_steps": 17590, "total_steps": 78105, "loss": 0.3397, "lr": 4.7650632955186695e-06, "epoch": 1.1260482683566992, "percentage": 22.52, "elapsed_time": "0:46:16", "remaining_time": "2:39:10", "throughput": 19936.53, "total_tokens": 55343808} +{"current_steps": 17595, "total_steps": 78105, "loss": 0.2761, "lr": 4.764826804171186e-06, "epoch": 1.1263683502976762, "percentage": 22.53, "elapsed_time": "0:46:16", "remaining_time": "2:39:09", "throughput": 19937.41, "total_tokens": 55359872} +{"current_steps": 17600, "total_steps": 78105, "loss": 0.3031, "lr": 4.764590199730051e-06, "epoch": 1.126688432238653, "percentage": 22.53, "elapsed_time": "0:46:17", "remaining_time": "2:39:07", "throughput": 19938.14, "total_tokens": 55374784} +{"current_steps": 17605, "total_steps": 78105, "loss": 0.3103, "lr": 4.764353482207075e-06, "epoch": 1.12700851417963, "percentage": 22.54, "elapsed_time": "0:46:18", "remaining_time": "2:39:06", "throughput": 19939.17, "total_tokens": 55391872} +{"current_steps": 17610, "total_steps": 78105, "loss": 0.4341, "lr": 4.764116651614081e-06, "epoch": 1.127328596120607, "percentage": 22.55, "elapsed_time": "0:46:18", "remaining_time": "2:39:05", "throughput": 19939.9, "total_tokens": 55407232} +{"current_steps": 17615, "total_steps": 78105, "loss": 0.3801, "lr": 4.763879707962895e-06, "epoch": 1.1276486780615838, "percentage": 22.55, "elapsed_time": "0:46:19", "remaining_time": "2:39:04", "throughput": 19940.64, "total_tokens": 55422336} +{"current_steps": 17620, "total_steps": 78105, "loss": 0.2858, "lr": 4.7636426512653486e-06, "epoch": 1.1279687600025607, "percentage": 22.56, "elapsed_time": "0:46:20", "remaining_time": "2:39:03", "throughput": 19941.46, "total_tokens": 55438528} +{"current_steps": 17625, "total_steps": 78105, "loss": 0.4079, "lr": 4.76340548153328e-06, "epoch": 1.1282888419435375, "percentage": 22.57, "elapsed_time": "0:46:20", "remaining_time": "2:39:02", "throughput": 19942.72, "total_tokens": 55457152} +{"current_steps": 17630, "total_steps": 78105, "loss": 0.2035, "lr": 4.7631681987785305e-06, "epoch": 1.1286089238845145, "percentage": 22.57, "elapsed_time": "0:46:21", "remaining_time": "2:39:01", "throughput": 19943.5, "total_tokens": 55473024} +{"current_steps": 17635, "total_steps": 78105, "loss": 0.4639, "lr": 4.762930803012949e-06, "epoch": 1.1289290058254913, "percentage": 22.58, "elapsed_time": "0:46:22", "remaining_time": "2:39:00", "throughput": 19944.35, "total_tokens": 55489024} +{"current_steps": 17640, "total_steps": 78105, "loss": 0.3218, "lr": 4.762693294248391e-06, "epoch": 1.1292490877664683, "percentage": 22.58, "elapsed_time": "0:46:22", "remaining_time": "2:38:58", "throughput": 19945.14, "total_tokens": 55504832} +{"current_steps": 17645, "total_steps": 78105, "loss": 0.2533, "lr": 4.762455672496716e-06, "epoch": 1.129569169707445, "percentage": 22.59, "elapsed_time": "0:46:23", "remaining_time": "2:38:57", "throughput": 19945.97, "total_tokens": 55520320} +{"current_steps": 17650, "total_steps": 78105, "loss": 0.3186, "lr": 4.7622179377697895e-06, "epoch": 1.129889251648422, "percentage": 22.6, "elapsed_time": "0:46:24", "remaining_time": "2:38:56", "throughput": 19946.68, "total_tokens": 55535360} +{"current_steps": 17655, "total_steps": 78105, "loss": 0.4017, "lr": 4.761980090079482e-06, "epoch": 1.1302093335893988, "percentage": 22.6, "elapsed_time": "0:46:24", "remaining_time": "2:38:55", "throughput": 19947.38, "total_tokens": 55550464} +{"current_steps": 17660, "total_steps": 78105, "loss": 0.395, "lr": 4.761742129437671e-06, "epoch": 1.1305294155303758, "percentage": 22.61, "elapsed_time": "0:46:25", "remaining_time": "2:38:53", "throughput": 19948.03, "total_tokens": 55565248} +{"current_steps": 17665, "total_steps": 78105, "loss": 0.3577, "lr": 4.761504055856239e-06, "epoch": 1.1308494974713525, "percentage": 22.62, "elapsed_time": "0:46:26", "remaining_time": "2:38:52", "throughput": 19948.76, "total_tokens": 55580672} +{"current_steps": 17670, "total_steps": 78105, "loss": 0.562, "lr": 4.7612658693470745e-06, "epoch": 1.1311695794123295, "percentage": 22.62, "elapsed_time": "0:46:26", "remaining_time": "2:38:51", "throughput": 19949.62, "total_tokens": 55596608} +{"current_steps": 17675, "total_steps": 78105, "loss": 0.3617, "lr": 4.761027569922072e-06, "epoch": 1.1314896613533065, "percentage": 22.63, "elapsed_time": "0:46:27", "remaining_time": "2:38:50", "throughput": 19950.32, "total_tokens": 55611904} +{"current_steps": 17680, "total_steps": 78105, "loss": 0.3807, "lr": 4.760789157593129e-06, "epoch": 1.1318097432942833, "percentage": 22.64, "elapsed_time": "0:46:28", "remaining_time": "2:38:49", "throughput": 19950.05, "total_tokens": 55627968} +{"current_steps": 17685, "total_steps": 78105, "loss": 0.356, "lr": 4.760550632372151e-06, "epoch": 1.1321298252352603, "percentage": 22.64, "elapsed_time": "0:46:29", "remaining_time": "2:38:48", "throughput": 19951.03, "total_tokens": 55644928} +{"current_steps": 17690, "total_steps": 78105, "loss": 0.4617, "lr": 4.76031199427105e-06, "epoch": 1.132449907176237, "percentage": 22.65, "elapsed_time": "0:46:29", "remaining_time": "2:38:47", "throughput": 19951.8, "total_tokens": 55660352} +{"current_steps": 17695, "total_steps": 78105, "loss": 0.2799, "lr": 4.760073243301741e-06, "epoch": 1.132769989117214, "percentage": 22.66, "elapsed_time": "0:46:30", "remaining_time": "2:38:46", "throughput": 19952.68, "total_tokens": 55676672} +{"current_steps": 17700, "total_steps": 78105, "loss": 0.3001, "lr": 4.759834379476147e-06, "epoch": 1.1330900710581908, "percentage": 22.66, "elapsed_time": "0:46:31", "remaining_time": "2:38:45", "throughput": 19953.31, "total_tokens": 55691328} +{"current_steps": 17705, "total_steps": 78105, "loss": 0.5102, "lr": 4.759595402806194e-06, "epoch": 1.1334101529991678, "percentage": 22.67, "elapsed_time": "0:46:31", "remaining_time": "2:38:43", "throughput": 19954.04, "total_tokens": 55706752} +{"current_steps": 17710, "total_steps": 78105, "loss": 0.3296, "lr": 4.759356313303818e-06, "epoch": 1.1337302349401446, "percentage": 22.67, "elapsed_time": "0:46:32", "remaining_time": "2:38:42", "throughput": 19954.94, "total_tokens": 55722944} +{"current_steps": 17715, "total_steps": 78105, "loss": 0.2977, "lr": 4.759117110980955e-06, "epoch": 1.1340503168811216, "percentage": 22.68, "elapsed_time": "0:46:33", "remaining_time": "2:38:41", "throughput": 19955.88, "total_tokens": 55739776} +{"current_steps": 17720, "total_steps": 78105, "loss": 0.4228, "lr": 4.7588777958495515e-06, "epoch": 1.1343703988220986, "percentage": 22.69, "elapsed_time": "0:46:33", "remaining_time": "2:38:40", "throughput": 19956.67, "total_tokens": 55755712} +{"current_steps": 17725, "total_steps": 78105, "loss": 0.3189, "lr": 4.758638367921556e-06, "epoch": 1.1346904807630753, "percentage": 22.69, "elapsed_time": "0:46:34", "remaining_time": "2:38:39", "throughput": 19957.6, "total_tokens": 55772032} +{"current_steps": 17730, "total_steps": 78105, "loss": 0.3287, "lr": 4.758398827208927e-06, "epoch": 1.1350105627040523, "percentage": 22.7, "elapsed_time": "0:46:35", "remaining_time": "2:38:38", "throughput": 19958.62, "total_tokens": 55789248} +{"current_steps": 17735, "total_steps": 78105, "loss": 0.3426, "lr": 4.758159173723623e-06, "epoch": 1.135330644645029, "percentage": 22.71, "elapsed_time": "0:46:35", "remaining_time": "2:38:37", "throughput": 19959.32, "total_tokens": 55804224} +{"current_steps": 17740, "total_steps": 78105, "loss": 0.4571, "lr": 4.757919407477613e-06, "epoch": 1.135650726586006, "percentage": 22.71, "elapsed_time": "0:46:36", "remaining_time": "2:38:35", "throughput": 19959.93, "total_tokens": 55818816} +{"current_steps": 17745, "total_steps": 78105, "loss": 0.3233, "lr": 4.757679528482867e-06, "epoch": 1.1359708085269828, "percentage": 22.72, "elapsed_time": "0:46:37", "remaining_time": "2:38:34", "throughput": 19960.73, "total_tokens": 55834496} +{"current_steps": 17750, "total_steps": 78105, "loss": 0.3372, "lr": 4.757439536751367e-06, "epoch": 1.1362908904679598, "percentage": 22.73, "elapsed_time": "0:46:37", "remaining_time": "2:38:33", "throughput": 19961.63, "total_tokens": 55851200} +{"current_steps": 17755, "total_steps": 78105, "loss": 0.2493, "lr": 4.757199432295094e-06, "epoch": 1.1366109724089366, "percentage": 22.73, "elapsed_time": "0:46:38", "remaining_time": "2:38:32", "throughput": 19962.52, "total_tokens": 55867712} +{"current_steps": 17760, "total_steps": 78105, "loss": 0.2704, "lr": 4.756959215126039e-06, "epoch": 1.1369310543499136, "percentage": 22.74, "elapsed_time": "0:46:39", "remaining_time": "2:38:31", "throughput": 19963.17, "total_tokens": 55882496} +{"current_steps": 17765, "total_steps": 78105, "loss": 0.4733, "lr": 4.756718885256197e-06, "epoch": 1.1372511362908906, "percentage": 22.75, "elapsed_time": "0:46:39", "remaining_time": "2:38:30", "throughput": 19963.9, "total_tokens": 55897984} +{"current_steps": 17770, "total_steps": 78105, "loss": 0.3585, "lr": 4.7564784426975685e-06, "epoch": 1.1375712182318674, "percentage": 22.75, "elapsed_time": "0:46:40", "remaining_time": "2:38:29", "throughput": 19964.63, "total_tokens": 55913216} +{"current_steps": 17775, "total_steps": 78105, "loss": 0.2183, "lr": 4.756237887462161e-06, "epoch": 1.1378913001728443, "percentage": 22.76, "elapsed_time": "0:46:41", "remaining_time": "2:38:27", "throughput": 19965.31, "total_tokens": 55928320} +{"current_steps": 17780, "total_steps": 78105, "loss": 0.31, "lr": 4.755997219561984e-06, "epoch": 1.1382113821138211, "percentage": 22.76, "elapsed_time": "0:46:41", "remaining_time": "2:38:26", "throughput": 19966.14, "total_tokens": 55944576} +{"current_steps": 17785, "total_steps": 78105, "loss": 0.2849, "lr": 4.7557564390090585e-06, "epoch": 1.138531464054798, "percentage": 22.77, "elapsed_time": "0:46:42", "remaining_time": "2:38:25", "throughput": 19966.98, "total_tokens": 55960256} +{"current_steps": 17790, "total_steps": 78105, "loss": 0.3296, "lr": 4.755515545815406e-06, "epoch": 1.1388515459957749, "percentage": 22.78, "elapsed_time": "0:46:43", "remaining_time": "2:38:24", "throughput": 19967.77, "total_tokens": 55975872} +{"current_steps": 17795, "total_steps": 78105, "loss": 0.3347, "lr": 4.755274539993056e-06, "epoch": 1.1391716279367519, "percentage": 22.78, "elapsed_time": "0:46:44", "remaining_time": "2:38:23", "throughput": 19967.72, "total_tokens": 55992192} +{"current_steps": 17800, "total_steps": 78105, "loss": 0.3867, "lr": 4.755033421554042e-06, "epoch": 1.1394917098777286, "percentage": 22.79, "elapsed_time": "0:46:44", "remaining_time": "2:38:22", "throughput": 19968.52, "total_tokens": 56008064} +{"current_steps": 17805, "total_steps": 78105, "loss": 0.4431, "lr": 4.754792190510405e-06, "epoch": 1.1398117918187056, "percentage": 22.8, "elapsed_time": "0:46:45", "remaining_time": "2:38:21", "throughput": 19969.36, "total_tokens": 56024320} +{"current_steps": 17810, "total_steps": 78105, "loss": 0.4699, "lr": 4.7545508468741905e-06, "epoch": 1.1401318737596824, "percentage": 22.8, "elapsed_time": "0:46:46", "remaining_time": "2:38:20", "throughput": 19970.51, "total_tokens": 56042688} +{"current_steps": 17815, "total_steps": 78105, "loss": 0.3477, "lr": 4.754309390657451e-06, "epoch": 1.1404519557006594, "percentage": 22.81, "elapsed_time": "0:46:46", "remaining_time": "2:38:19", "throughput": 19971.12, "total_tokens": 56057280} +{"current_steps": 17820, "total_steps": 78105, "loss": 0.2841, "lr": 4.754067821872242e-06, "epoch": 1.1407720376416361, "percentage": 22.82, "elapsed_time": "0:46:47", "remaining_time": "2:38:18", "throughput": 19972.0, "total_tokens": 56073600} +{"current_steps": 17825, "total_steps": 78105, "loss": 0.3666, "lr": 4.753826140530628e-06, "epoch": 1.1410921195826131, "percentage": 22.82, "elapsed_time": "0:46:48", "remaining_time": "2:38:16", "throughput": 19972.74, "total_tokens": 56089344} +{"current_steps": 17830, "total_steps": 78105, "loss": 0.344, "lr": 4.753584346644675e-06, "epoch": 1.1414122015235901, "percentage": 22.83, "elapsed_time": "0:46:48", "remaining_time": "2:38:15", "throughput": 19973.66, "total_tokens": 56105856} +{"current_steps": 17835, "total_steps": 78105, "loss": 0.4252, "lr": 4.753342440226459e-06, "epoch": 1.141732283464567, "percentage": 22.83, "elapsed_time": "0:46:49", "remaining_time": "2:38:14", "throughput": 19974.43, "total_tokens": 56121344} +{"current_steps": 17840, "total_steps": 78105, "loss": 0.3486, "lr": 4.753100421288059e-06, "epoch": 1.142052365405544, "percentage": 22.84, "elapsed_time": "0:46:50", "remaining_time": "2:38:13", "throughput": 19975.3, "total_tokens": 56137984} +{"current_steps": 17845, "total_steps": 78105, "loss": 0.3367, "lr": 4.752858289841559e-06, "epoch": 1.1423724473465207, "percentage": 22.85, "elapsed_time": "0:46:51", "remaining_time": "2:38:12", "throughput": 19975.96, "total_tokens": 56153024} +{"current_steps": 17850, "total_steps": 78105, "loss": 0.2596, "lr": 4.7526160458990515e-06, "epoch": 1.1426925292874976, "percentage": 22.85, "elapsed_time": "0:46:51", "remaining_time": "2:38:11", "throughput": 19976.8, "total_tokens": 56169088} +{"current_steps": 17855, "total_steps": 78105, "loss": 0.2969, "lr": 4.752373689472633e-06, "epoch": 1.1430126112284744, "percentage": 22.86, "elapsed_time": "0:46:52", "remaining_time": "2:38:10", "throughput": 19977.56, "total_tokens": 56184576} +{"current_steps": 17860, "total_steps": 78105, "loss": 0.33, "lr": 4.752131220574403e-06, "epoch": 1.1433326931694514, "percentage": 22.87, "elapsed_time": "0:46:53", "remaining_time": "2:38:08", "throughput": 19978.38, "total_tokens": 56200320} +{"current_steps": 17865, "total_steps": 78105, "loss": 0.3765, "lr": 4.751888639216471e-06, "epoch": 1.1436527751104282, "percentage": 22.87, "elapsed_time": "0:46:53", "remaining_time": "2:38:07", "throughput": 19979.13, "total_tokens": 56215360} +{"current_steps": 17870, "total_steps": 78105, "loss": 0.3007, "lr": 4.751645945410951e-06, "epoch": 1.1439728570514052, "percentage": 22.88, "elapsed_time": "0:46:54", "remaining_time": "2:38:06", "throughput": 19979.97, "total_tokens": 56231360} +{"current_steps": 17875, "total_steps": 78105, "loss": 0.2715, "lr": 4.75140313916996e-06, "epoch": 1.1442929389923822, "percentage": 22.89, "elapsed_time": "0:46:55", "remaining_time": "2:38:05", "throughput": 19980.87, "total_tokens": 56247488} +{"current_steps": 17880, "total_steps": 78105, "loss": 0.4135, "lr": 4.751160220505623e-06, "epoch": 1.144613020933359, "percentage": 22.89, "elapsed_time": "0:46:55", "remaining_time": "2:38:04", "throughput": 19981.6, "total_tokens": 56262912} +{"current_steps": 17885, "total_steps": 78105, "loss": 0.3799, "lr": 4.750917189430072e-06, "epoch": 1.144933102874336, "percentage": 22.9, "elapsed_time": "0:46:56", "remaining_time": "2:38:03", "throughput": 19982.39, "total_tokens": 56278784} +{"current_steps": 17890, "total_steps": 78105, "loss": 0.3399, "lr": 4.75067404595544e-06, "epoch": 1.1452531848153127, "percentage": 22.91, "elapsed_time": "0:46:57", "remaining_time": "2:38:01", "throughput": 19983.02, "total_tokens": 56293312} +{"current_steps": 17895, "total_steps": 78105, "loss": 0.4924, "lr": 4.7504307900938694e-06, "epoch": 1.1455732667562897, "percentage": 22.91, "elapsed_time": "0:46:57", "remaining_time": "2:38:00", "throughput": 19983.69, "total_tokens": 56308352} +{"current_steps": 17900, "total_steps": 78105, "loss": 0.2666, "lr": 4.750187421857507e-06, "epoch": 1.1458933486972664, "percentage": 22.92, "elapsed_time": "0:46:58", "remaining_time": "2:37:59", "throughput": 19984.44, "total_tokens": 56323904} +{"current_steps": 17905, "total_steps": 78105, "loss": 0.3543, "lr": 4.749943941258507e-06, "epoch": 1.1462134306382434, "percentage": 22.92, "elapsed_time": "0:46:59", "remaining_time": "2:37:58", "throughput": 19985.17, "total_tokens": 56339328} +{"current_steps": 17910, "total_steps": 78105, "loss": 0.3061, "lr": 4.749700348309025e-06, "epoch": 1.1465335125792202, "percentage": 22.93, "elapsed_time": "0:46:59", "remaining_time": "2:37:57", "throughput": 19986.11, "total_tokens": 56355968} +{"current_steps": 17915, "total_steps": 78105, "loss": 0.2885, "lr": 4.749456643021226e-06, "epoch": 1.1468535945201972, "percentage": 22.94, "elapsed_time": "0:47:00", "remaining_time": "2:37:56", "throughput": 19986.99, "total_tokens": 56372544} +{"current_steps": 17920, "total_steps": 78105, "loss": 0.2448, "lr": 4.749212825407279e-06, "epoch": 1.1471736764611742, "percentage": 22.94, "elapsed_time": "0:47:01", "remaining_time": "2:37:54", "throughput": 19987.79, "total_tokens": 56388288} +{"current_steps": 17925, "total_steps": 78105, "loss": 0.3832, "lr": 4.74896889547936e-06, "epoch": 1.147493758402151, "percentage": 22.95, "elapsed_time": "0:47:01", "remaining_time": "2:37:53", "throughput": 19988.6, "total_tokens": 56404160} +{"current_steps": 17930, "total_steps": 78105, "loss": 0.2836, "lr": 4.748724853249648e-06, "epoch": 1.1478138403431277, "percentage": 22.96, "elapsed_time": "0:47:02", "remaining_time": "2:37:52", "throughput": 19989.34, "total_tokens": 56419264} +{"current_steps": 17935, "total_steps": 78105, "loss": 0.3568, "lr": 4.74848069873033e-06, "epoch": 1.1481339222841047, "percentage": 22.96, "elapsed_time": "0:47:03", "remaining_time": "2:37:51", "throughput": 19990.18, "total_tokens": 56435520} +{"current_steps": 17940, "total_steps": 78105, "loss": 0.3754, "lr": 4.748236431933598e-06, "epoch": 1.1484540042250817, "percentage": 22.97, "elapsed_time": "0:47:03", "remaining_time": "2:37:50", "throughput": 19990.91, "total_tokens": 56451264} +{"current_steps": 17945, "total_steps": 78105, "loss": 0.2532, "lr": 4.74799205287165e-06, "epoch": 1.1487740861660585, "percentage": 22.98, "elapsed_time": "0:47:04", "remaining_time": "2:37:49", "throughput": 19991.6, "total_tokens": 56466624} +{"current_steps": 17950, "total_steps": 78105, "loss": 0.3551, "lr": 4.747747561556687e-06, "epoch": 1.1490941681070355, "percentage": 22.98, "elapsed_time": "0:47:05", "remaining_time": "2:37:47", "throughput": 19992.31, "total_tokens": 56481856} +{"current_steps": 17955, "total_steps": 78105, "loss": 0.3701, "lr": 4.7475029580009205e-06, "epoch": 1.1494142500480122, "percentage": 22.99, "elapsed_time": "0:47:05", "remaining_time": "2:37:46", "throughput": 19993.15, "total_tokens": 56498048} +{"current_steps": 17960, "total_steps": 78105, "loss": 0.329, "lr": 4.747258242216561e-06, "epoch": 1.1497343319889892, "percentage": 22.99, "elapsed_time": "0:47:06", "remaining_time": "2:37:45", "throughput": 19993.83, "total_tokens": 56512960} +{"current_steps": 17965, "total_steps": 78105, "loss": 0.3293, "lr": 4.747013414215832e-06, "epoch": 1.150054413929966, "percentage": 23.0, "elapsed_time": "0:47:07", "remaining_time": "2:37:44", "throughput": 19994.51, "total_tokens": 56528256} +{"current_steps": 17970, "total_steps": 78105, "loss": 0.3477, "lr": 4.746768474010957e-06, "epoch": 1.150374495870943, "percentage": 23.01, "elapsed_time": "0:47:07", "remaining_time": "2:37:43", "throughput": 19995.27, "total_tokens": 56543552} +{"current_steps": 17975, "total_steps": 78105, "loss": 0.4648, "lr": 4.746523421614168e-06, "epoch": 1.1506945778119197, "percentage": 23.01, "elapsed_time": "0:47:08", "remaining_time": "2:37:42", "throughput": 19996.11, "total_tokens": 56559936} +{"current_steps": 17980, "total_steps": 78105, "loss": 0.3169, "lr": 4.7462782570377e-06, "epoch": 1.1510146597528967, "percentage": 23.02, "elapsed_time": "0:47:09", "remaining_time": "2:37:40", "throughput": 19996.87, "total_tokens": 56575872} +{"current_steps": 17985, "total_steps": 78105, "loss": 0.2964, "lr": 4.746032980293796e-06, "epoch": 1.1513347416938737, "percentage": 23.03, "elapsed_time": "0:47:09", "remaining_time": "2:37:39", "throughput": 19997.65, "total_tokens": 56591936} +{"current_steps": 17990, "total_steps": 78105, "loss": 0.2924, "lr": 4.745787591394705e-06, "epoch": 1.1516548236348505, "percentage": 23.03, "elapsed_time": "0:47:10", "remaining_time": "2:37:38", "throughput": 19998.36, "total_tokens": 56607040} +{"current_steps": 17995, "total_steps": 78105, "loss": 0.2968, "lr": 4.74554209035268e-06, "epoch": 1.1519749055758275, "percentage": 23.04, "elapsed_time": "0:47:11", "remaining_time": "2:37:37", "throughput": 19999.31, "total_tokens": 56623872} +{"current_steps": 18000, "total_steps": 78105, "loss": 0.3764, "lr": 4.745296477179978e-06, "epoch": 1.1522949875168043, "percentage": 23.05, "elapsed_time": "0:47:12", "remaining_time": "2:37:36", "throughput": 20000.34, "total_tokens": 56641280} +{"current_steps": 18005, "total_steps": 78105, "loss": 0.4219, "lr": 4.7450507518888665e-06, "epoch": 1.1526150694577812, "percentage": 23.05, "elapsed_time": "0:47:12", "remaining_time": "2:37:35", "throughput": 20001.17, "total_tokens": 56657408} +{"current_steps": 18010, "total_steps": 78105, "loss": 0.4024, "lr": 4.744804914491613e-06, "epoch": 1.152935151398758, "percentage": 23.06, "elapsed_time": "0:47:13", "remaining_time": "2:37:34", "throughput": 20001.95, "total_tokens": 56672896} +{"current_steps": 18015, "total_steps": 78105, "loss": 0.3147, "lr": 4.744558965000497e-06, "epoch": 1.153255233339735, "percentage": 23.07, "elapsed_time": "0:47:14", "remaining_time": "2:37:33", "throughput": 20002.78, "total_tokens": 56688960} +{"current_steps": 18020, "total_steps": 78105, "loss": 0.3024, "lr": 4.744312903427797e-06, "epoch": 1.1535753152807118, "percentage": 23.07, "elapsed_time": "0:47:14", "remaining_time": "2:37:32", "throughput": 20003.53, "total_tokens": 56704768} +{"current_steps": 18025, "total_steps": 78105, "loss": 0.4075, "lr": 4.7440667297858e-06, "epoch": 1.1538953972216888, "percentage": 23.08, "elapsed_time": "0:47:15", "remaining_time": "2:37:30", "throughput": 20004.3, "total_tokens": 56720576} +{"current_steps": 18030, "total_steps": 78105, "loss": 0.311, "lr": 4.7438204440868e-06, "epoch": 1.1542154791626658, "percentage": 23.08, "elapsed_time": "0:47:16", "remaining_time": "2:37:29", "throughput": 20005.18, "total_tokens": 56737344} +{"current_steps": 18035, "total_steps": 78105, "loss": 0.3828, "lr": 4.743574046343095e-06, "epoch": 1.1545355611036425, "percentage": 23.09, "elapsed_time": "0:47:16", "remaining_time": "2:37:28", "throughput": 20005.81, "total_tokens": 56752128} +{"current_steps": 18040, "total_steps": 78105, "loss": 0.1886, "lr": 4.743327536566988e-06, "epoch": 1.1548556430446195, "percentage": 23.1, "elapsed_time": "0:47:17", "remaining_time": "2:37:27", "throughput": 20006.47, "total_tokens": 56767424} +{"current_steps": 18045, "total_steps": 78105, "loss": 0.2718, "lr": 4.7430809147707885e-06, "epoch": 1.1551757249855963, "percentage": 23.1, "elapsed_time": "0:47:18", "remaining_time": "2:37:26", "throughput": 20007.3, "total_tokens": 56783552} +{"current_steps": 18050, "total_steps": 78105, "loss": 0.4257, "lr": 4.742834180966812e-06, "epoch": 1.1554958069265733, "percentage": 23.11, "elapsed_time": "0:47:18", "remaining_time": "2:37:25", "throughput": 20007.95, "total_tokens": 56798528} +{"current_steps": 18055, "total_steps": 78105, "loss": 0.3652, "lr": 4.74258733516738e-06, "epoch": 1.15581588886755, "percentage": 23.12, "elapsed_time": "0:47:19", "remaining_time": "2:37:24", "throughput": 20008.96, "total_tokens": 56815808} +{"current_steps": 18060, "total_steps": 78105, "loss": 0.2541, "lr": 4.742340377384816e-06, "epoch": 1.156135970808527, "percentage": 23.12, "elapsed_time": "0:47:20", "remaining_time": "2:37:22", "throughput": 20009.72, "total_tokens": 56831552} +{"current_steps": 18065, "total_steps": 78105, "loss": 0.2047, "lr": 4.742093307631456e-06, "epoch": 1.1564560527495038, "percentage": 23.13, "elapsed_time": "0:47:20", "remaining_time": "2:37:21", "throughput": 20010.39, "total_tokens": 56846656} +{"current_steps": 18070, "total_steps": 78105, "loss": 0.1776, "lr": 4.7418461259196326e-06, "epoch": 1.1567761346904808, "percentage": 23.14, "elapsed_time": "0:47:21", "remaining_time": "2:37:20", "throughput": 20011.21, "total_tokens": 56862656} +{"current_steps": 18075, "total_steps": 78105, "loss": 0.3047, "lr": 4.741598832261692e-06, "epoch": 1.1570962166314576, "percentage": 23.14, "elapsed_time": "0:47:22", "remaining_time": "2:37:19", "throughput": 20011.94, "total_tokens": 56878144} +{"current_steps": 18080, "total_steps": 78105, "loss": 0.2693, "lr": 4.741351426669982e-06, "epoch": 1.1574162985724346, "percentage": 23.15, "elapsed_time": "0:47:22", "remaining_time": "2:37:18", "throughput": 20013.01, "total_tokens": 56895808} +{"current_steps": 18085, "total_steps": 78105, "loss": 0.3429, "lr": 4.741103909156856e-06, "epoch": 1.1577363805134113, "percentage": 23.15, "elapsed_time": "0:47:23", "remaining_time": "2:37:17", "throughput": 20013.67, "total_tokens": 56910784} +{"current_steps": 18090, "total_steps": 78105, "loss": 0.2965, "lr": 4.740856279734674e-06, "epoch": 1.1580564624543883, "percentage": 23.16, "elapsed_time": "0:47:24", "remaining_time": "2:37:16", "throughput": 20014.65, "total_tokens": 56928000} +{"current_steps": 18095, "total_steps": 78105, "loss": 0.2712, "lr": 4.740608538415802e-06, "epoch": 1.1583765443953653, "percentage": 23.17, "elapsed_time": "0:47:24", "remaining_time": "2:37:15", "throughput": 20015.41, "total_tokens": 56943616} +{"current_steps": 18100, "total_steps": 78105, "loss": 0.4541, "lr": 4.74036068521261e-06, "epoch": 1.158696626336342, "percentage": 23.17, "elapsed_time": "0:47:25", "remaining_time": "2:37:13", "throughput": 20016.21, "total_tokens": 56959424} +{"current_steps": 18105, "total_steps": 78105, "loss": 0.4573, "lr": 4.740112720137476e-06, "epoch": 1.159016708277319, "percentage": 23.18, "elapsed_time": "0:47:26", "remaining_time": "2:37:12", "throughput": 20017.25, "total_tokens": 56977152} +{"current_steps": 18110, "total_steps": 78105, "loss": 0.3981, "lr": 4.73986464320278e-06, "epoch": 1.1593367902182958, "percentage": 23.19, "elapsed_time": "0:47:27", "remaining_time": "2:37:11", "throughput": 20018.03, "total_tokens": 56993024} +{"current_steps": 18115, "total_steps": 78105, "loss": 0.2842, "lr": 4.739616454420912e-06, "epoch": 1.1596568721592728, "percentage": 23.19, "elapsed_time": "0:47:27", "remaining_time": "2:37:10", "throughput": 20018.71, "total_tokens": 57008320} +{"current_steps": 18120, "total_steps": 78105, "loss": 0.3857, "lr": 4.739368153804264e-06, "epoch": 1.1599769541002496, "percentage": 23.2, "elapsed_time": "0:47:28", "remaining_time": "2:37:09", "throughput": 20019.48, "total_tokens": 57024256} +{"current_steps": 18125, "total_steps": 78105, "loss": 0.222, "lr": 4.739119741365233e-06, "epoch": 1.1602970360412266, "percentage": 23.21, "elapsed_time": "0:47:29", "remaining_time": "2:37:08", "throughput": 20020.17, "total_tokens": 57039616} +{"current_steps": 18130, "total_steps": 78105, "loss": 0.3577, "lr": 4.738871217116226e-06, "epoch": 1.1606171179822034, "percentage": 23.21, "elapsed_time": "0:47:29", "remaining_time": "2:37:07", "throughput": 20020.99, "total_tokens": 57055744} +{"current_steps": 18135, "total_steps": 78105, "loss": 0.3976, "lr": 4.7386225810696535e-06, "epoch": 1.1609371999231803, "percentage": 23.22, "elapsed_time": "0:47:30", "remaining_time": "2:37:06", "throughput": 20021.65, "total_tokens": 57070720} +{"current_steps": 18140, "total_steps": 78105, "loss": 0.3639, "lr": 4.7383738332379295e-06, "epoch": 1.1612572818641573, "percentage": 23.23, "elapsed_time": "0:47:31", "remaining_time": "2:37:04", "throughput": 20022.34, "total_tokens": 57086080} +{"current_steps": 18145, "total_steps": 78105, "loss": 0.3494, "lr": 4.738124973633474e-06, "epoch": 1.161577363805134, "percentage": 23.23, "elapsed_time": "0:47:31", "remaining_time": "2:37:03", "throughput": 20023.03, "total_tokens": 57101184} +{"current_steps": 18150, "total_steps": 78105, "loss": 0.3558, "lr": 4.737876002268716e-06, "epoch": 1.161897445746111, "percentage": 23.24, "elapsed_time": "0:47:32", "remaining_time": "2:37:02", "throughput": 20024.2, "total_tokens": 57119680} +{"current_steps": 18155, "total_steps": 78105, "loss": 0.2029, "lr": 4.737626919156089e-06, "epoch": 1.1622175276870879, "percentage": 23.24, "elapsed_time": "0:47:33", "remaining_time": "2:37:01", "throughput": 20025.05, "total_tokens": 57135808} +{"current_steps": 18160, "total_steps": 78105, "loss": 0.4155, "lr": 4.737377724308027e-06, "epoch": 1.1625376096280648, "percentage": 23.25, "elapsed_time": "0:47:33", "remaining_time": "2:37:00", "throughput": 20025.73, "total_tokens": 57151296} +{"current_steps": 18165, "total_steps": 78105, "loss": 0.3585, "lr": 4.737128417736976e-06, "epoch": 1.1628576915690416, "percentage": 23.26, "elapsed_time": "0:47:34", "remaining_time": "2:36:59", "throughput": 20026.46, "total_tokens": 57166656} +{"current_steps": 18170, "total_steps": 78105, "loss": 0.343, "lr": 4.7368789994553845e-06, "epoch": 1.1631777735100186, "percentage": 23.26, "elapsed_time": "0:47:35", "remaining_time": "2:36:58", "throughput": 20027.56, "total_tokens": 57184320} +{"current_steps": 18175, "total_steps": 78105, "loss": 0.2874, "lr": 4.736629469475708e-06, "epoch": 1.1634978554509954, "percentage": 23.27, "elapsed_time": "0:47:35", "remaining_time": "2:36:57", "throughput": 20028.2, "total_tokens": 57199360} +{"current_steps": 18180, "total_steps": 78105, "loss": 0.2837, "lr": 4.7363798278104046e-06, "epoch": 1.1638179373919724, "percentage": 23.28, "elapsed_time": "0:47:36", "remaining_time": "2:36:55", "throughput": 20028.85, "total_tokens": 57214528} +{"current_steps": 18185, "total_steps": 78105, "loss": 0.4724, "lr": 4.736130074471942e-06, "epoch": 1.1641380193329494, "percentage": 23.28, "elapsed_time": "0:47:37", "remaining_time": "2:36:54", "throughput": 20029.72, "total_tokens": 57230784} +{"current_steps": 18190, "total_steps": 78105, "loss": 0.253, "lr": 4.735880209472792e-06, "epoch": 1.1644581012739261, "percentage": 23.29, "elapsed_time": "0:47:37", "remaining_time": "2:36:53", "throughput": 20030.52, "total_tokens": 57246656} +{"current_steps": 18195, "total_steps": 78105, "loss": 0.282, "lr": 4.73563023282543e-06, "epoch": 1.164778183214903, "percentage": 23.3, "elapsed_time": "0:47:38", "remaining_time": "2:36:52", "throughput": 20031.17, "total_tokens": 57261696} +{"current_steps": 18200, "total_steps": 78105, "loss": 0.4472, "lr": 4.735380144542339e-06, "epoch": 1.1650982651558799, "percentage": 23.3, "elapsed_time": "0:47:39", "remaining_time": "2:36:51", "throughput": 20031.99, "total_tokens": 57277824} +{"current_steps": 18205, "total_steps": 78105, "loss": 0.3102, "lr": 4.7351299446360065e-06, "epoch": 1.1654183470968569, "percentage": 23.31, "elapsed_time": "0:47:39", "remaining_time": "2:36:50", "throughput": 20032.74, "total_tokens": 57293440} +{"current_steps": 18210, "total_steps": 78105, "loss": 0.2573, "lr": 4.734879633118928e-06, "epoch": 1.1657384290378336, "percentage": 23.31, "elapsed_time": "0:47:40", "remaining_time": "2:36:49", "throughput": 20033.46, "total_tokens": 57308928} +{"current_steps": 18215, "total_steps": 78105, "loss": 0.2893, "lr": 4.734629210003601e-06, "epoch": 1.1660585109788106, "percentage": 23.32, "elapsed_time": "0:47:41", "remaining_time": "2:36:47", "throughput": 20034.3, "total_tokens": 57325056} +{"current_steps": 18220, "total_steps": 78105, "loss": 0.3903, "lr": 4.734378675302532e-06, "epoch": 1.1663785929197874, "percentage": 23.33, "elapsed_time": "0:47:42", "remaining_time": "2:36:46", "throughput": 20035.02, "total_tokens": 57340608} +{"current_steps": 18225, "total_steps": 78105, "loss": 0.2948, "lr": 4.73412802902823e-06, "epoch": 1.1666986748607644, "percentage": 23.33, "elapsed_time": "0:47:42", "remaining_time": "2:36:45", "throughput": 20035.66, "total_tokens": 57355520} +{"current_steps": 18230, "total_steps": 78105, "loss": 0.2962, "lr": 4.733877271193211e-06, "epoch": 1.1670187568017412, "percentage": 23.34, "elapsed_time": "0:47:43", "remaining_time": "2:36:44", "throughput": 20036.41, "total_tokens": 57371328} +{"current_steps": 18235, "total_steps": 78105, "loss": 0.3113, "lr": 4.733626401809997e-06, "epoch": 1.1673388387427182, "percentage": 23.35, "elapsed_time": "0:47:44", "remaining_time": "2:36:43", "throughput": 20037.45, "total_tokens": 57389056} +{"current_steps": 18240, "total_steps": 78105, "loss": 0.195, "lr": 4.733375420891115e-06, "epoch": 1.167658920683695, "percentage": 23.35, "elapsed_time": "0:47:44", "remaining_time": "2:36:42", "throughput": 20038.01, "total_tokens": 57403456} +{"current_steps": 18245, "total_steps": 78105, "loss": 0.3968, "lr": 4.733124328449098e-06, "epoch": 1.167979002624672, "percentage": 23.36, "elapsed_time": "0:47:45", "remaining_time": "2:36:41", "throughput": 20038.76, "total_tokens": 57419072} +{"current_steps": 18250, "total_steps": 78105, "loss": 0.2418, "lr": 4.732873124496483e-06, "epoch": 1.168299084565649, "percentage": 23.37, "elapsed_time": "0:47:46", "remaining_time": "2:36:39", "throughput": 20039.5, "total_tokens": 57434624} +{"current_steps": 18255, "total_steps": 78105, "loss": 0.3917, "lr": 4.732621809045817e-06, "epoch": 1.1686191665066257, "percentage": 23.37, "elapsed_time": "0:47:46", "remaining_time": "2:36:38", "throughput": 20040.44, "total_tokens": 57451008} +{"current_steps": 18260, "total_steps": 78105, "loss": 0.4322, "lr": 4.732370382109644e-06, "epoch": 1.1689392484476027, "percentage": 23.38, "elapsed_time": "0:47:47", "remaining_time": "2:36:37", "throughput": 20041.25, "total_tokens": 57467264} +{"current_steps": 18265, "total_steps": 78105, "loss": 0.465, "lr": 4.732118843700525e-06, "epoch": 1.1692593303885794, "percentage": 23.39, "elapsed_time": "0:47:48", "remaining_time": "2:36:36", "throughput": 20042.01, "total_tokens": 57483072} +{"current_steps": 18270, "total_steps": 78105, "loss": 0.2412, "lr": 4.731867193831016e-06, "epoch": 1.1695794123295564, "percentage": 23.39, "elapsed_time": "0:47:48", "remaining_time": "2:36:35", "throughput": 20042.64, "total_tokens": 57498240} +{"current_steps": 18275, "total_steps": 78105, "loss": 0.3972, "lr": 4.7316154325136855e-06, "epoch": 1.1698994942705332, "percentage": 23.4, "elapsed_time": "0:47:49", "remaining_time": "2:36:34", "throughput": 20043.42, "total_tokens": 57514112} +{"current_steps": 18280, "total_steps": 78105, "loss": 0.3429, "lr": 4.731363559761104e-06, "epoch": 1.1702195762115102, "percentage": 23.4, "elapsed_time": "0:47:50", "remaining_time": "2:36:33", "throughput": 20044.25, "total_tokens": 57530496} +{"current_steps": 18285, "total_steps": 78105, "loss": 0.3387, "lr": 4.731111575585849e-06, "epoch": 1.170539658152487, "percentage": 23.41, "elapsed_time": "0:47:50", "remaining_time": "2:36:32", "throughput": 20045.17, "total_tokens": 57547648} +{"current_steps": 18290, "total_steps": 78105, "loss": 0.2618, "lr": 4.730859480000503e-06, "epoch": 1.170859740093464, "percentage": 23.42, "elapsed_time": "0:47:51", "remaining_time": "2:36:31", "throughput": 20045.87, "total_tokens": 57563136} +{"current_steps": 18295, "total_steps": 78105, "loss": 0.2651, "lr": 4.730607273017655e-06, "epoch": 1.171179822034441, "percentage": 23.42, "elapsed_time": "0:47:52", "remaining_time": "2:36:29", "throughput": 20046.62, "total_tokens": 57578880} +{"current_steps": 18300, "total_steps": 78105, "loss": 0.3716, "lr": 4.730354954649899e-06, "epoch": 1.1714999039754177, "percentage": 23.43, "elapsed_time": "0:47:52", "remaining_time": "2:36:28", "throughput": 20047.39, "total_tokens": 57594624} +{"current_steps": 18305, "total_steps": 78105, "loss": 0.2422, "lr": 4.730102524909835e-06, "epoch": 1.1718199859163947, "percentage": 23.44, "elapsed_time": "0:47:53", "remaining_time": "2:36:27", "throughput": 20048.1, "total_tokens": 57609856} +{"current_steps": 18310, "total_steps": 78105, "loss": 0.2457, "lr": 4.729849983810066e-06, "epoch": 1.1721400678573715, "percentage": 23.44, "elapsed_time": "0:47:54", "remaining_time": "2:36:26", "throughput": 20048.8, "total_tokens": 57624896} +{"current_steps": 18315, "total_steps": 78105, "loss": 0.396, "lr": 4.729597331363203e-06, "epoch": 1.1724601497983484, "percentage": 23.45, "elapsed_time": "0:47:54", "remaining_time": "2:36:25", "throughput": 20049.42, "total_tokens": 57639744} +{"current_steps": 18320, "total_steps": 78105, "loss": 0.2308, "lr": 4.7293445675818635e-06, "epoch": 1.1727802317393252, "percentage": 23.46, "elapsed_time": "0:47:55", "remaining_time": "2:36:23", "throughput": 20050.1, "total_tokens": 57654912} +{"current_steps": 18325, "total_steps": 78105, "loss": 0.2958, "lr": 4.729091692478668e-06, "epoch": 1.1731003136803022, "percentage": 23.46, "elapsed_time": "0:47:56", "remaining_time": "2:36:22", "throughput": 20050.73, "total_tokens": 57669760} +{"current_steps": 18330, "total_steps": 78105, "loss": 0.3218, "lr": 4.728838706066245e-06, "epoch": 1.173420395621279, "percentage": 23.47, "elapsed_time": "0:47:56", "remaining_time": "2:36:21", "throughput": 20051.53, "total_tokens": 57685888} +{"current_steps": 18335, "total_steps": 78105, "loss": 0.2418, "lr": 4.728585608357226e-06, "epoch": 1.173740477562256, "percentage": 23.47, "elapsed_time": "0:47:57", "remaining_time": "2:36:20", "throughput": 20052.36, "total_tokens": 57702080} +{"current_steps": 18340, "total_steps": 78105, "loss": 0.3131, "lr": 4.728332399364251e-06, "epoch": 1.1740605595032327, "percentage": 23.48, "elapsed_time": "0:47:58", "remaining_time": "2:36:19", "throughput": 20053.05, "total_tokens": 57716992} +{"current_steps": 18345, "total_steps": 78105, "loss": 0.2745, "lr": 4.728079079099962e-06, "epoch": 1.1743806414442097, "percentage": 23.49, "elapsed_time": "0:47:58", "remaining_time": "2:36:18", "throughput": 20053.91, "total_tokens": 57733248} +{"current_steps": 18350, "total_steps": 78105, "loss": 0.2247, "lr": 4.727825647577009e-06, "epoch": 1.1747007233851865, "percentage": 23.49, "elapsed_time": "0:47:59", "remaining_time": "2:36:17", "throughput": 20054.55, "total_tokens": 57748288} +{"current_steps": 18355, "total_steps": 78105, "loss": 0.3643, "lr": 4.727572104808049e-06, "epoch": 1.1750208053261635, "percentage": 23.5, "elapsed_time": "0:48:00", "remaining_time": "2:36:15", "throughput": 20055.39, "total_tokens": 57764480} +{"current_steps": 18360, "total_steps": 78105, "loss": 0.536, "lr": 4.72731845080574e-06, "epoch": 1.1753408872671405, "percentage": 23.51, "elapsed_time": "0:48:00", "remaining_time": "2:36:14", "throughput": 20056.15, "total_tokens": 57780160} +{"current_steps": 18365, "total_steps": 78105, "loss": 0.331, "lr": 4.72706468558275e-06, "epoch": 1.1756609692081172, "percentage": 23.51, "elapsed_time": "0:48:01", "remaining_time": "2:36:13", "throughput": 20056.7, "total_tokens": 57794560} +{"current_steps": 18370, "total_steps": 78105, "loss": 0.4336, "lr": 4.726810809151749e-06, "epoch": 1.1759810511490942, "percentage": 23.52, "elapsed_time": "0:48:02", "remaining_time": "2:36:12", "throughput": 20057.44, "total_tokens": 57810176} +{"current_steps": 18375, "total_steps": 78105, "loss": 0.4832, "lr": 4.726556821525415e-06, "epoch": 1.176301133090071, "percentage": 23.53, "elapsed_time": "0:48:02", "remaining_time": "2:36:11", "throughput": 20058.22, "total_tokens": 57826240} +{"current_steps": 18380, "total_steps": 78105, "loss": 0.274, "lr": 4.726302722716433e-06, "epoch": 1.176621215031048, "percentage": 23.53, "elapsed_time": "0:48:03", "remaining_time": "2:36:10", "throughput": 20059.05, "total_tokens": 57842560} +{"current_steps": 18385, "total_steps": 78105, "loss": 0.3844, "lr": 4.726048512737488e-06, "epoch": 1.1769412969720248, "percentage": 23.54, "elapsed_time": "0:48:04", "remaining_time": "2:36:09", "throughput": 20059.78, "total_tokens": 57858176} +{"current_steps": 18390, "total_steps": 78105, "loss": 0.2736, "lr": 4.725794191601275e-06, "epoch": 1.1772613789130018, "percentage": 23.55, "elapsed_time": "0:48:04", "remaining_time": "2:36:07", "throughput": 20060.57, "total_tokens": 57874432} +{"current_steps": 18395, "total_steps": 78105, "loss": 0.3454, "lr": 4.725539759320494e-06, "epoch": 1.1775814608539785, "percentage": 23.55, "elapsed_time": "0:48:05", "remaining_time": "2:36:06", "throughput": 20061.36, "total_tokens": 57890496} +{"current_steps": 18400, "total_steps": 78105, "loss": 0.2779, "lr": 4.72528521590785e-06, "epoch": 1.1779015427949555, "percentage": 23.56, "elapsed_time": "0:48:06", "remaining_time": "2:36:05", "throughput": 20062.03, "total_tokens": 57905856} +{"current_steps": 18405, "total_steps": 78105, "loss": 0.4571, "lr": 4.725030561376054e-06, "epoch": 1.1782216247359325, "percentage": 23.56, "elapsed_time": "0:48:07", "remaining_time": "2:36:04", "throughput": 20062.84, "total_tokens": 57922112} +{"current_steps": 18410, "total_steps": 78105, "loss": 0.3497, "lr": 4.724775795737821e-06, "epoch": 1.1785417066769093, "percentage": 23.57, "elapsed_time": "0:48:07", "remaining_time": "2:36:03", "throughput": 20063.47, "total_tokens": 57937152} +{"current_steps": 18415, "total_steps": 78105, "loss": 0.2677, "lr": 4.724520919005873e-06, "epoch": 1.1788617886178863, "percentage": 23.58, "elapsed_time": "0:48:08", "remaining_time": "2:36:02", "throughput": 20064.23, "total_tokens": 57952832} +{"current_steps": 18420, "total_steps": 78105, "loss": 0.2739, "lr": 4.724265931192938e-06, "epoch": 1.179181870558863, "percentage": 23.58, "elapsed_time": "0:48:09", "remaining_time": "2:36:01", "throughput": 20065.0, "total_tokens": 57968448} +{"current_steps": 18425, "total_steps": 78105, "loss": 0.333, "lr": 4.724010832311747e-06, "epoch": 1.17950195249984, "percentage": 23.59, "elapsed_time": "0:48:09", "remaining_time": "2:35:59", "throughput": 20065.75, "total_tokens": 57984192} +{"current_steps": 18430, "total_steps": 78105, "loss": 0.2662, "lr": 4.723755622375041e-06, "epoch": 1.1798220344408168, "percentage": 23.6, "elapsed_time": "0:48:10", "remaining_time": "2:35:58", "throughput": 20066.49, "total_tokens": 57999744} +{"current_steps": 18435, "total_steps": 78105, "loss": 0.3882, "lr": 4.723500301395562e-06, "epoch": 1.1801421163817938, "percentage": 23.6, "elapsed_time": "0:48:11", "remaining_time": "2:35:57", "throughput": 20067.17, "total_tokens": 58014976} +{"current_steps": 18440, "total_steps": 78105, "loss": 0.3419, "lr": 4.723244869386059e-06, "epoch": 1.1804621983227706, "percentage": 23.61, "elapsed_time": "0:48:11", "remaining_time": "2:35:56", "throughput": 20067.91, "total_tokens": 58030464} +{"current_steps": 18445, "total_steps": 78105, "loss": 0.3195, "lr": 4.7229893263592885e-06, "epoch": 1.1807822802637475, "percentage": 23.62, "elapsed_time": "0:48:12", "remaining_time": "2:35:55", "throughput": 20068.61, "total_tokens": 58045760} +{"current_steps": 18450, "total_steps": 78105, "loss": 0.276, "lr": 4.722733672328009e-06, "epoch": 1.1811023622047245, "percentage": 23.62, "elapsed_time": "0:48:13", "remaining_time": "2:35:54", "throughput": 20069.28, "total_tokens": 58061184} +{"current_steps": 18455, "total_steps": 78105, "loss": 0.2063, "lr": 4.722477907304988e-06, "epoch": 1.1814224441457013, "percentage": 23.63, "elapsed_time": "0:48:13", "remaining_time": "2:35:52", "throughput": 20069.95, "total_tokens": 58076288} +{"current_steps": 18460, "total_steps": 78105, "loss": 0.338, "lr": 4.722222031302998e-06, "epoch": 1.181742526086678, "percentage": 23.63, "elapsed_time": "0:48:14", "remaining_time": "2:35:51", "throughput": 20070.61, "total_tokens": 58091072} +{"current_steps": 18465, "total_steps": 78105, "loss": 0.415, "lr": 4.721966044334814e-06, "epoch": 1.182062608027655, "percentage": 23.64, "elapsed_time": "0:48:14", "remaining_time": "2:35:50", "throughput": 20071.24, "total_tokens": 58105984} +{"current_steps": 18470, "total_steps": 78105, "loss": 0.3179, "lr": 4.72170994641322e-06, "epoch": 1.182382689968632, "percentage": 23.65, "elapsed_time": "0:48:15", "remaining_time": "2:35:49", "throughput": 20072.07, "total_tokens": 58122176} +{"current_steps": 18475, "total_steps": 78105, "loss": 0.3703, "lr": 4.721453737551003e-06, "epoch": 1.1827027719096088, "percentage": 23.65, "elapsed_time": "0:48:16", "remaining_time": "2:35:48", "throughput": 20072.75, "total_tokens": 58137280} +{"current_steps": 18480, "total_steps": 78105, "loss": 0.2988, "lr": 4.721197417760958e-06, "epoch": 1.1830228538505858, "percentage": 23.66, "elapsed_time": "0:48:17", "remaining_time": "2:35:47", "throughput": 20073.5, "total_tokens": 58153216} +{"current_steps": 18485, "total_steps": 78105, "loss": 0.3089, "lr": 4.720940987055884e-06, "epoch": 1.1833429357915626, "percentage": 23.67, "elapsed_time": "0:48:17", "remaining_time": "2:35:46", "throughput": 20074.42, "total_tokens": 58170304} +{"current_steps": 18490, "total_steps": 78105, "loss": 0.2705, "lr": 4.720684445448585e-06, "epoch": 1.1836630177325396, "percentage": 23.67, "elapsed_time": "0:48:18", "remaining_time": "2:35:45", "throughput": 20075.31, "total_tokens": 58187328} +{"current_steps": 18495, "total_steps": 78105, "loss": 0.3306, "lr": 4.7204277929518725e-06, "epoch": 1.1839830996735163, "percentage": 23.68, "elapsed_time": "0:48:19", "remaining_time": "2:35:43", "throughput": 20076.05, "total_tokens": 58203008} +{"current_steps": 18500, "total_steps": 78105, "loss": 0.2484, "lr": 4.720171029578561e-06, "epoch": 1.1843031816144933, "percentage": 23.69, "elapsed_time": "0:48:19", "remaining_time": "2:35:43", "throughput": 20076.98, "total_tokens": 58220288} +{"current_steps": 18505, "total_steps": 78105, "loss": 0.4562, "lr": 4.719914155341473e-06, "epoch": 1.18462326355547, "percentage": 23.69, "elapsed_time": "0:48:20", "remaining_time": "2:35:41", "throughput": 20077.61, "total_tokens": 58235264} +{"current_steps": 18510, "total_steps": 78105, "loss": 0.367, "lr": 4.719657170253436e-06, "epoch": 1.184943345496447, "percentage": 23.7, "elapsed_time": "0:48:21", "remaining_time": "2:35:40", "throughput": 20078.24, "total_tokens": 58250176} +{"current_steps": 18515, "total_steps": 78105, "loss": 0.3158, "lr": 4.71940007432728e-06, "epoch": 1.185263427437424, "percentage": 23.71, "elapsed_time": "0:48:21", "remaining_time": "2:35:39", "throughput": 20079.05, "total_tokens": 58266432} +{"current_steps": 18520, "total_steps": 78105, "loss": 0.347, "lr": 4.719142867575847e-06, "epoch": 1.1855835093784008, "percentage": 23.71, "elapsed_time": "0:48:22", "remaining_time": "2:35:38", "throughput": 20079.83, "total_tokens": 58282368} +{"current_steps": 18525, "total_steps": 78105, "loss": 0.1971, "lr": 4.718885550011977e-06, "epoch": 1.1859035913193778, "percentage": 23.72, "elapsed_time": "0:48:23", "remaining_time": "2:35:37", "throughput": 20080.58, "total_tokens": 58297856} +{"current_steps": 18530, "total_steps": 78105, "loss": 0.372, "lr": 4.71862812164852e-06, "epoch": 1.1862236732603546, "percentage": 23.72, "elapsed_time": "0:48:23", "remaining_time": "2:35:36", "throughput": 20081.32, "total_tokens": 58313344} +{"current_steps": 18535, "total_steps": 78105, "loss": 0.38, "lr": 4.718370582498331e-06, "epoch": 1.1865437552013316, "percentage": 23.73, "elapsed_time": "0:48:24", "remaining_time": "2:35:34", "throughput": 20082.09, "total_tokens": 58329024} +{"current_steps": 18540, "total_steps": 78105, "loss": 0.5988, "lr": 4.718112932574271e-06, "epoch": 1.1868638371423084, "percentage": 23.74, "elapsed_time": "0:48:25", "remaining_time": "2:35:33", "throughput": 20082.79, "total_tokens": 58344320} +{"current_steps": 18545, "total_steps": 78105, "loss": 0.3548, "lr": 4.7178551718892045e-06, "epoch": 1.1871839190832854, "percentage": 23.74, "elapsed_time": "0:48:25", "remaining_time": "2:35:32", "throughput": 20083.87, "total_tokens": 58362432} +{"current_steps": 18550, "total_steps": 78105, "loss": 0.3206, "lr": 4.717597300456003e-06, "epoch": 1.1875040010242621, "percentage": 23.75, "elapsed_time": "0:48:26", "remaining_time": "2:35:31", "throughput": 20084.52, "total_tokens": 58377536} +{"current_steps": 18555, "total_steps": 78105, "loss": 0.4159, "lr": 4.717339318287543e-06, "epoch": 1.1878240829652391, "percentage": 23.76, "elapsed_time": "0:48:27", "remaining_time": "2:35:30", "throughput": 20085.27, "total_tokens": 58393088} +{"current_steps": 18560, "total_steps": 78105, "loss": 0.5395, "lr": 4.717081225396708e-06, "epoch": 1.188144164906216, "percentage": 23.76, "elapsed_time": "0:48:27", "remaining_time": "2:35:29", "throughput": 20085.9, "total_tokens": 58408000} +{"current_steps": 18565, "total_steps": 78105, "loss": 0.2805, "lr": 4.716823021796385e-06, "epoch": 1.1884642468471929, "percentage": 23.77, "elapsed_time": "0:48:28", "remaining_time": "2:35:28", "throughput": 20086.56, "total_tokens": 58423168} +{"current_steps": 18570, "total_steps": 78105, "loss": 0.4687, "lr": 4.716564707499467e-06, "epoch": 1.1887843287881699, "percentage": 23.78, "elapsed_time": "0:48:29", "remaining_time": "2:35:26", "throughput": 20087.17, "total_tokens": 58437952} +{"current_steps": 18575, "total_steps": 78105, "loss": 0.2759, "lr": 4.716306282518852e-06, "epoch": 1.1891044107291466, "percentage": 23.78, "elapsed_time": "0:48:29", "remaining_time": "2:35:25", "throughput": 20088.1, "total_tokens": 58454784} +{"current_steps": 18580, "total_steps": 78105, "loss": 0.4194, "lr": 4.716047746867447e-06, "epoch": 1.1894244926701236, "percentage": 23.79, "elapsed_time": "0:48:30", "remaining_time": "2:35:24", "throughput": 20088.84, "total_tokens": 58470592} +{"current_steps": 18585, "total_steps": 78105, "loss": 0.2592, "lr": 4.7157891005581605e-06, "epoch": 1.1897445746111004, "percentage": 23.79, "elapsed_time": "0:48:31", "remaining_time": "2:35:23", "throughput": 20089.51, "total_tokens": 58486080} +{"current_steps": 18590, "total_steps": 78105, "loss": 0.4028, "lr": 4.715530343603907e-06, "epoch": 1.1900646565520774, "percentage": 23.8, "elapsed_time": "0:48:31", "remaining_time": "2:35:22", "throughput": 20090.22, "total_tokens": 58502016} +{"current_steps": 18595, "total_steps": 78105, "loss": 0.372, "lr": 4.71527147601761e-06, "epoch": 1.1903847384930542, "percentage": 23.81, "elapsed_time": "0:48:32", "remaining_time": "2:35:21", "throughput": 20091.42, "total_tokens": 58521280} +{"current_steps": 18600, "total_steps": 78105, "loss": 0.5035, "lr": 4.7150124978121924e-06, "epoch": 1.1907048204340311, "percentage": 23.81, "elapsed_time": "0:48:33", "remaining_time": "2:35:20", "throughput": 20092.12, "total_tokens": 58536960} +{"current_steps": 18605, "total_steps": 78105, "loss": 0.3701, "lr": 4.7147534090005896e-06, "epoch": 1.191024902375008, "percentage": 23.82, "elapsed_time": "0:48:34", "remaining_time": "2:35:19", "throughput": 20092.8, "total_tokens": 58552448} +{"current_steps": 18610, "total_steps": 78105, "loss": 0.4091, "lr": 4.714494209595738e-06, "epoch": 1.191344984315985, "percentage": 23.83, "elapsed_time": "0:48:34", "remaining_time": "2:35:18", "throughput": 20093.42, "total_tokens": 58567424} +{"current_steps": 18615, "total_steps": 78105, "loss": 0.2487, "lr": 4.714234899610579e-06, "epoch": 1.1916650662569617, "percentage": 23.83, "elapsed_time": "0:48:35", "remaining_time": "2:35:17", "throughput": 20094.13, "total_tokens": 58583296} +{"current_steps": 18620, "total_steps": 78105, "loss": 0.2477, "lr": 4.713975479058064e-06, "epoch": 1.1919851481979387, "percentage": 23.84, "elapsed_time": "0:48:36", "remaining_time": "2:35:16", "throughput": 20094.87, "total_tokens": 58598976} +{"current_steps": 18625, "total_steps": 78105, "loss": 0.3458, "lr": 4.713715947951145e-06, "epoch": 1.1923052301389157, "percentage": 23.85, "elapsed_time": "0:48:36", "remaining_time": "2:35:15", "throughput": 20095.79, "total_tokens": 58615872} +{"current_steps": 18630, "total_steps": 78105, "loss": 0.2194, "lr": 4.7134563063027825e-06, "epoch": 1.1926253120798924, "percentage": 23.85, "elapsed_time": "0:48:37", "remaining_time": "2:35:13", "throughput": 20096.43, "total_tokens": 58630912} +{"current_steps": 18635, "total_steps": 78105, "loss": 0.4626, "lr": 4.713196554125942e-06, "epoch": 1.1929453940208694, "percentage": 23.86, "elapsed_time": "0:48:38", "remaining_time": "2:35:12", "throughput": 20097.08, "total_tokens": 58646144} +{"current_steps": 18640, "total_steps": 78105, "loss": 0.3525, "lr": 4.712936691433593e-06, "epoch": 1.1932654759618462, "percentage": 23.87, "elapsed_time": "0:48:38", "remaining_time": "2:35:11", "throughput": 20097.7, "total_tokens": 58661248} +{"current_steps": 18645, "total_steps": 78105, "loss": 0.3118, "lr": 4.712676718238714e-06, "epoch": 1.1935855579028232, "percentage": 23.87, "elapsed_time": "0:48:39", "remaining_time": "2:35:10", "throughput": 20098.53, "total_tokens": 58677824} +{"current_steps": 18650, "total_steps": 78105, "loss": 0.253, "lr": 4.712416634554283e-06, "epoch": 1.1939056398438, "percentage": 23.88, "elapsed_time": "0:48:40", "remaining_time": "2:35:09", "throughput": 20099.13, "total_tokens": 58692544} +{"current_steps": 18655, "total_steps": 78105, "loss": 0.311, "lr": 4.71215644039329e-06, "epoch": 1.194225721784777, "percentage": 23.88, "elapsed_time": "0:48:40", "remaining_time": "2:35:08", "throughput": 20100.0, "total_tokens": 58709248} +{"current_steps": 18660, "total_steps": 78105, "loss": 0.306, "lr": 4.711896135768728e-06, "epoch": 1.1945458037257537, "percentage": 23.89, "elapsed_time": "0:48:41", "remaining_time": "2:35:07", "throughput": 20100.65, "total_tokens": 58724416} +{"current_steps": 18665, "total_steps": 78105, "loss": 0.326, "lr": 4.711635720693593e-06, "epoch": 1.1948658856667307, "percentage": 23.9, "elapsed_time": "0:48:42", "remaining_time": "2:35:05", "throughput": 20101.28, "total_tokens": 58739392} +{"current_steps": 18670, "total_steps": 78105, "loss": 0.298, "lr": 4.711375195180891e-06, "epoch": 1.1951859676077077, "percentage": 23.9, "elapsed_time": "0:48:42", "remaining_time": "2:35:04", "throughput": 20102.01, "total_tokens": 58755072} +{"current_steps": 18675, "total_steps": 78105, "loss": 0.415, "lr": 4.71111455924363e-06, "epoch": 1.1955060495486844, "percentage": 23.91, "elapsed_time": "0:48:43", "remaining_time": "2:35:03", "throughput": 20102.89, "total_tokens": 58771840} +{"current_steps": 18680, "total_steps": 78105, "loss": 0.3134, "lr": 4.710853812894825e-06, "epoch": 1.1958261314896614, "percentage": 23.92, "elapsed_time": "0:48:44", "remaining_time": "2:35:02", "throughput": 20103.66, "total_tokens": 58787648} +{"current_steps": 18685, "total_steps": 78105, "loss": 0.1993, "lr": 4.710592956147497e-06, "epoch": 1.1961462134306382, "percentage": 23.92, "elapsed_time": "0:48:44", "remaining_time": "2:35:01", "throughput": 20104.41, "total_tokens": 58803520} +{"current_steps": 18690, "total_steps": 78105, "loss": 0.3489, "lr": 4.710331989014671e-06, "epoch": 1.1964662953716152, "percentage": 23.93, "elapsed_time": "0:48:45", "remaining_time": "2:35:00", "throughput": 20105.3, "total_tokens": 58820352} +{"current_steps": 18695, "total_steps": 78105, "loss": 0.3614, "lr": 4.710070911509379e-06, "epoch": 1.196786377312592, "percentage": 23.94, "elapsed_time": "0:48:46", "remaining_time": "2:34:59", "throughput": 20106.04, "total_tokens": 58836032} +{"current_steps": 18700, "total_steps": 78105, "loss": 0.3157, "lr": 4.709809723644657e-06, "epoch": 1.197106459253569, "percentage": 23.94, "elapsed_time": "0:48:46", "remaining_time": "2:34:58", "throughput": 20106.82, "total_tokens": 58852096} +{"current_steps": 18705, "total_steps": 78105, "loss": 0.3826, "lr": 4.709548425433548e-06, "epoch": 1.1974265411945457, "percentage": 23.95, "elapsed_time": "0:48:47", "remaining_time": "2:34:57", "throughput": 20107.48, "total_tokens": 58867776} +{"current_steps": 18710, "total_steps": 78105, "loss": 0.3949, "lr": 4.7092870168891e-06, "epoch": 1.1977466231355227, "percentage": 23.95, "elapsed_time": "0:48:48", "remaining_time": "2:34:55", "throughput": 20108.02, "total_tokens": 58882112} +{"current_steps": 18715, "total_steps": 78105, "loss": 0.3101, "lr": 4.709025498024367e-06, "epoch": 1.1980667050764997, "percentage": 23.96, "elapsed_time": "0:48:48", "remaining_time": "2:34:54", "throughput": 20108.76, "total_tokens": 58897792} +{"current_steps": 18720, "total_steps": 78105, "loss": 0.3016, "lr": 4.708763868852405e-06, "epoch": 1.1983867870174765, "percentage": 23.97, "elapsed_time": "0:48:49", "remaining_time": "2:34:53", "throughput": 20109.34, "total_tokens": 58912576} +{"current_steps": 18725, "total_steps": 78105, "loss": 0.2631, "lr": 4.708502129386282e-06, "epoch": 1.1987068689584532, "percentage": 23.97, "elapsed_time": "0:48:50", "remaining_time": "2:34:52", "throughput": 20110.09, "total_tokens": 58928704} +{"current_steps": 18730, "total_steps": 78105, "loss": 0.3297, "lr": 4.708240279639066e-06, "epoch": 1.1990269508994302, "percentage": 23.98, "elapsed_time": "0:48:51", "remaining_time": "2:34:51", "throughput": 20111.12, "total_tokens": 58946624} +{"current_steps": 18735, "total_steps": 78105, "loss": 0.3274, "lr": 4.7079783196238324e-06, "epoch": 1.1993470328404072, "percentage": 23.99, "elapsed_time": "0:48:51", "remaining_time": "2:34:50", "throughput": 20111.9, "total_tokens": 58962496} +{"current_steps": 18740, "total_steps": 78105, "loss": 0.3828, "lr": 4.707716249353662e-06, "epoch": 1.199667114781384, "percentage": 23.99, "elapsed_time": "0:48:52", "remaining_time": "2:34:49", "throughput": 20112.6, "total_tokens": 58978048} +{"current_steps": 18745, "total_steps": 78105, "loss": 0.2367, "lr": 4.7074540688416425e-06, "epoch": 1.199987196722361, "percentage": 24.0, "elapsed_time": "0:48:53", "remaining_time": "2:34:48", "throughput": 20113.47, "total_tokens": 58994688} +{"current_steps": 18750, "total_steps": 78105, "loss": 0.2651, "lr": 4.707191778100865e-06, "epoch": 1.2003072786633378, "percentage": 24.01, "elapsed_time": "0:48:53", "remaining_time": "2:34:47", "throughput": 20114.17, "total_tokens": 59010240} +{"current_steps": 18755, "total_steps": 78105, "loss": 0.3063, "lr": 4.706929377144427e-06, "epoch": 1.2006273606043147, "percentage": 24.01, "elapsed_time": "0:48:54", "remaining_time": "2:34:46", "throughput": 20114.93, "total_tokens": 59026368} +{"current_steps": 18760, "total_steps": 78105, "loss": 0.3671, "lr": 4.706666865985431e-06, "epoch": 1.2009474425452915, "percentage": 24.02, "elapsed_time": "0:48:55", "remaining_time": "2:34:44", "throughput": 20115.64, "total_tokens": 59041920} +{"current_steps": 18765, "total_steps": 78105, "loss": 0.3089, "lr": 4.706404244636986e-06, "epoch": 1.2012675244862685, "percentage": 24.03, "elapsed_time": "0:48:55", "remaining_time": "2:34:43", "throughput": 20116.37, "total_tokens": 59057600} +{"current_steps": 18770, "total_steps": 78105, "loss": 0.2474, "lr": 4.7061415131122055e-06, "epoch": 1.2015876064272453, "percentage": 24.03, "elapsed_time": "0:48:56", "remaining_time": "2:34:42", "throughput": 20117.24, "total_tokens": 59074304} +{"current_steps": 18775, "total_steps": 78105, "loss": 0.4899, "lr": 4.70587867142421e-06, "epoch": 1.2019076883682223, "percentage": 24.04, "elapsed_time": "0:48:57", "remaining_time": "2:34:41", "throughput": 20117.92, "total_tokens": 59089472} +{"current_steps": 18780, "total_steps": 78105, "loss": 0.2991, "lr": 4.705615719586123e-06, "epoch": 1.2022277703091993, "percentage": 24.04, "elapsed_time": "0:48:57", "remaining_time": "2:34:40", "throughput": 20118.71, "total_tokens": 59105664} +{"current_steps": 18785, "total_steps": 78105, "loss": 0.2843, "lr": 4.705352657611075e-06, "epoch": 1.202547852250176, "percentage": 24.05, "elapsed_time": "0:48:58", "remaining_time": "2:34:39", "throughput": 20119.46, "total_tokens": 59121408} +{"current_steps": 18790, "total_steps": 78105, "loss": 0.331, "lr": 4.705089485512203e-06, "epoch": 1.202867934191153, "percentage": 24.06, "elapsed_time": "0:48:59", "remaining_time": "2:34:38", "throughput": 20120.17, "total_tokens": 59137088} +{"current_steps": 18795, "total_steps": 78105, "loss": 0.3433, "lr": 4.704826203302649e-06, "epoch": 1.2031880161321298, "percentage": 24.06, "elapsed_time": "0:48:59", "remaining_time": "2:34:37", "throughput": 20121.01, "total_tokens": 59153344} +{"current_steps": 18800, "total_steps": 78105, "loss": 0.3083, "lr": 4.704562810995558e-06, "epoch": 1.2035080980731068, "percentage": 24.07, "elapsed_time": "0:49:00", "remaining_time": "2:34:36", "throughput": 20121.8, "total_tokens": 59169536} +{"current_steps": 18805, "total_steps": 78105, "loss": 0.4914, "lr": 4.704299308604083e-06, "epoch": 1.2038281800140835, "percentage": 24.08, "elapsed_time": "0:49:01", "remaining_time": "2:34:34", "throughput": 20122.49, "total_tokens": 59185216} +{"current_steps": 18810, "total_steps": 78105, "loss": 0.3957, "lr": 4.704035696141383e-06, "epoch": 1.2041482619550605, "percentage": 24.08, "elapsed_time": "0:49:01", "remaining_time": "2:34:33", "throughput": 20123.2, "total_tokens": 59200960} +{"current_steps": 18815, "total_steps": 78105, "loss": 0.3012, "lr": 4.703771973620621e-06, "epoch": 1.2044683438960373, "percentage": 24.09, "elapsed_time": "0:49:02", "remaining_time": "2:34:32", "throughput": 20123.85, "total_tokens": 59216192} +{"current_steps": 18820, "total_steps": 78105, "loss": 0.3132, "lr": 4.7035081410549645e-06, "epoch": 1.2047884258370143, "percentage": 24.1, "elapsed_time": "0:49:03", "remaining_time": "2:34:31", "throughput": 20124.59, "total_tokens": 59232576} +{"current_steps": 18825, "total_steps": 78105, "loss": 0.2981, "lr": 4.703244198457591e-06, "epoch": 1.2051085077779913, "percentage": 24.1, "elapsed_time": "0:49:03", "remaining_time": "2:34:30", "throughput": 20125.2, "total_tokens": 59247424} +{"current_steps": 18830, "total_steps": 78105, "loss": 0.297, "lr": 4.702980145841677e-06, "epoch": 1.205428589718968, "percentage": 24.11, "elapsed_time": "0:49:04", "remaining_time": "2:34:29", "throughput": 20125.87, "total_tokens": 59262848} +{"current_steps": 18835, "total_steps": 78105, "loss": 0.2949, "lr": 4.70271598322041e-06, "epoch": 1.205748671659945, "percentage": 24.11, "elapsed_time": "0:49:05", "remaining_time": "2:34:28", "throughput": 20126.49, "total_tokens": 59278208} +{"current_steps": 18840, "total_steps": 78105, "loss": 0.3256, "lr": 4.7024517106069785e-06, "epoch": 1.2060687536009218, "percentage": 24.12, "elapsed_time": "0:49:05", "remaining_time": "2:34:27", "throughput": 20127.13, "total_tokens": 59293504} +{"current_steps": 18845, "total_steps": 78105, "loss": 0.4049, "lr": 4.702187328014583e-06, "epoch": 1.2063888355418988, "percentage": 24.13, "elapsed_time": "0:49:06", "remaining_time": "2:34:25", "throughput": 20127.73, "total_tokens": 59308416} +{"current_steps": 18850, "total_steps": 78105, "loss": 0.3353, "lr": 4.701922835456423e-06, "epoch": 1.2067089174828756, "percentage": 24.13, "elapsed_time": "0:49:07", "remaining_time": "2:34:24", "throughput": 20128.34, "total_tokens": 59323264} +{"current_steps": 18855, "total_steps": 78105, "loss": 0.2967, "lr": 4.701658232945705e-06, "epoch": 1.2070289994238526, "percentage": 24.14, "elapsed_time": "0:49:07", "remaining_time": "2:34:23", "throughput": 20129.2, "total_tokens": 59339584} +{"current_steps": 18860, "total_steps": 78105, "loss": 0.3586, "lr": 4.701393520495643e-06, "epoch": 1.2073490813648293, "percentage": 24.15, "elapsed_time": "0:49:08", "remaining_time": "2:34:22", "throughput": 20129.87, "total_tokens": 59354880} +{"current_steps": 18865, "total_steps": 78105, "loss": 0.4013, "lr": 4.701128698119456e-06, "epoch": 1.2076691633058063, "percentage": 24.15, "elapsed_time": "0:49:09", "remaining_time": "2:34:21", "throughput": 20131.07, "total_tokens": 59374464} +{"current_steps": 18870, "total_steps": 78105, "loss": 0.3701, "lr": 4.700863765830366e-06, "epoch": 1.207989245246783, "percentage": 24.16, "elapsed_time": "0:49:10", "remaining_time": "2:34:20", "throughput": 20131.72, "total_tokens": 59389760} +{"current_steps": 18875, "total_steps": 78105, "loss": 0.2765, "lr": 4.700598723641604e-06, "epoch": 1.20830932718776, "percentage": 24.17, "elapsed_time": "0:49:10", "remaining_time": "2:34:19", "throughput": 20132.21, "total_tokens": 59403968} +{"current_steps": 18880, "total_steps": 78105, "loss": 0.3123, "lr": 4.700333571566405e-06, "epoch": 1.2086294091287368, "percentage": 24.17, "elapsed_time": "0:49:11", "remaining_time": "2:34:18", "throughput": 20132.9, "total_tokens": 59419520} +{"current_steps": 18885, "total_steps": 78105, "loss": 0.3644, "lr": 4.7000683096180065e-06, "epoch": 1.2089494910697138, "percentage": 24.18, "elapsed_time": "0:49:12", "remaining_time": "2:34:17", "throughput": 20133.55, "total_tokens": 59435136} +{"current_steps": 18890, "total_steps": 78105, "loss": 0.3423, "lr": 4.699802937809658e-06, "epoch": 1.2092695730106908, "percentage": 24.19, "elapsed_time": "0:49:12", "remaining_time": "2:34:15", "throughput": 20134.3, "total_tokens": 59450944} +{"current_steps": 18895, "total_steps": 78105, "loss": 0.2624, "lr": 4.699537456154609e-06, "epoch": 1.2095896549516676, "percentage": 24.19, "elapsed_time": "0:49:13", "remaining_time": "2:34:14", "throughput": 20135.17, "total_tokens": 59467840} +{"current_steps": 18900, "total_steps": 78105, "loss": 0.2928, "lr": 4.6992718646661165e-06, "epoch": 1.2099097368926446, "percentage": 24.2, "elapsed_time": "0:49:14", "remaining_time": "2:34:13", "throughput": 20135.92, "total_tokens": 59483584} +{"current_steps": 18905, "total_steps": 78105, "loss": 0.4202, "lr": 4.699006163357443e-06, "epoch": 1.2102298188336214, "percentage": 24.2, "elapsed_time": "0:49:14", "remaining_time": "2:34:12", "throughput": 20136.62, "total_tokens": 59499072} +{"current_steps": 18910, "total_steps": 78105, "loss": 0.2843, "lr": 4.698740352241854e-06, "epoch": 1.2105499007745983, "percentage": 24.21, "elapsed_time": "0:49:15", "remaining_time": "2:34:11", "throughput": 20137.23, "total_tokens": 59514176} +{"current_steps": 18915, "total_steps": 78105, "loss": 0.3067, "lr": 4.698474431332626e-06, "epoch": 1.210869982715575, "percentage": 24.22, "elapsed_time": "0:49:16", "remaining_time": "2:34:10", "throughput": 20138.03, "total_tokens": 59530560} +{"current_steps": 18920, "total_steps": 78105, "loss": 0.2967, "lr": 4.698208400643036e-06, "epoch": 1.211190064656552, "percentage": 24.22, "elapsed_time": "0:49:16", "remaining_time": "2:34:09", "throughput": 20138.76, "total_tokens": 59546304} +{"current_steps": 18925, "total_steps": 78105, "loss": 0.3578, "lr": 4.697942260186369e-06, "epoch": 1.2115101465975289, "percentage": 24.23, "elapsed_time": "0:49:17", "remaining_time": "2:34:08", "throughput": 20139.48, "total_tokens": 59562048} +{"current_steps": 18930, "total_steps": 78105, "loss": 0.3102, "lr": 4.697676009975914e-06, "epoch": 1.2118302285385059, "percentage": 24.24, "elapsed_time": "0:49:18", "remaining_time": "2:34:07", "throughput": 20140.25, "total_tokens": 59578304} +{"current_steps": 18935, "total_steps": 78105, "loss": 0.3362, "lr": 4.6974096500249665e-06, "epoch": 1.2121503104794829, "percentage": 24.24, "elapsed_time": "0:49:18", "remaining_time": "2:34:06", "throughput": 20141.1, "total_tokens": 59595200} +{"current_steps": 18940, "total_steps": 78105, "loss": 0.2218, "lr": 4.697143180346827e-06, "epoch": 1.2124703924204596, "percentage": 24.25, "elapsed_time": "0:49:19", "remaining_time": "2:34:05", "throughput": 20141.95, "total_tokens": 59611968} +{"current_steps": 18945, "total_steps": 78105, "loss": 0.3227, "lr": 4.696876600954801e-06, "epoch": 1.2127904743614366, "percentage": 24.26, "elapsed_time": "0:49:20", "remaining_time": "2:34:04", "throughput": 20142.66, "total_tokens": 59627648} +{"current_steps": 18950, "total_steps": 78105, "loss": 0.3606, "lr": 4.696609911862201e-06, "epoch": 1.2131105563024134, "percentage": 24.26, "elapsed_time": "0:49:20", "remaining_time": "2:34:03", "throughput": 20143.4, "total_tokens": 59643712} +{"current_steps": 18955, "total_steps": 78105, "loss": 0.4345, "lr": 4.696343113082344e-06, "epoch": 1.2134306382433904, "percentage": 24.27, "elapsed_time": "0:49:21", "remaining_time": "2:34:01", "throughput": 20144.0, "total_tokens": 59658560} +{"current_steps": 18960, "total_steps": 78105, "loss": 0.3038, "lr": 4.696076204628551e-06, "epoch": 1.2137507201843671, "percentage": 24.28, "elapsed_time": "0:49:22", "remaining_time": "2:34:00", "throughput": 20144.72, "total_tokens": 59674176} +{"current_steps": 18965, "total_steps": 78105, "loss": 0.3547, "lr": 4.6958091865141525e-06, "epoch": 1.2140708021253441, "percentage": 24.28, "elapsed_time": "0:49:22", "remaining_time": "2:33:59", "throughput": 20145.25, "total_tokens": 59688704} +{"current_steps": 18970, "total_steps": 78105, "loss": 0.438, "lr": 4.6955420587524804e-06, "epoch": 1.214390884066321, "percentage": 24.29, "elapsed_time": "0:49:23", "remaining_time": "2:33:58", "throughput": 20146.06, "total_tokens": 59705600} +{"current_steps": 18975, "total_steps": 78105, "loss": 0.2542, "lr": 4.695274821356874e-06, "epoch": 1.2147109660072979, "percentage": 24.29, "elapsed_time": "0:49:24", "remaining_time": "2:33:57", "throughput": 20146.7, "total_tokens": 59720896} +{"current_steps": 18980, "total_steps": 78105, "loss": 0.3752, "lr": 4.695007474340678e-06, "epoch": 1.2150310479482749, "percentage": 24.3, "elapsed_time": "0:49:24", "remaining_time": "2:33:56", "throughput": 20147.33, "total_tokens": 59736000} +{"current_steps": 18985, "total_steps": 78105, "loss": 0.3307, "lr": 4.694740017717243e-06, "epoch": 1.2153511298892516, "percentage": 24.31, "elapsed_time": "0:49:25", "remaining_time": "2:33:55", "throughput": 20147.96, "total_tokens": 59751168} +{"current_steps": 18990, "total_steps": 78105, "loss": 0.3134, "lr": 4.694472451499922e-06, "epoch": 1.2156712118302284, "percentage": 24.31, "elapsed_time": "0:49:26", "remaining_time": "2:33:53", "throughput": 20148.62, "total_tokens": 59766144} +{"current_steps": 18995, "total_steps": 78105, "loss": 0.248, "lr": 4.6942047757020784e-06, "epoch": 1.2159912937712054, "percentage": 24.32, "elapsed_time": "0:49:26", "remaining_time": "2:33:52", "throughput": 20149.31, "total_tokens": 59781632} +{"current_steps": 19000, "total_steps": 78105, "loss": 0.3223, "lr": 4.6939369903370765e-06, "epoch": 1.2163113757121824, "percentage": 24.33, "elapsed_time": "0:49:27", "remaining_time": "2:33:51", "throughput": 20150.04, "total_tokens": 59797632} +{"current_steps": 19005, "total_steps": 78105, "loss": 0.2876, "lr": 4.693669095418289e-06, "epoch": 1.2166314576531592, "percentage": 24.33, "elapsed_time": "0:49:28", "remaining_time": "2:33:50", "throughput": 20150.81, "total_tokens": 59813632} +{"current_steps": 19010, "total_steps": 78105, "loss": 0.4526, "lr": 4.693401090959095e-06, "epoch": 1.2169515395941362, "percentage": 24.34, "elapsed_time": "0:49:28", "remaining_time": "2:33:49", "throughput": 20151.49, "total_tokens": 59829056} +{"current_steps": 19015, "total_steps": 78105, "loss": 0.327, "lr": 4.693132976972874e-06, "epoch": 1.217271621535113, "percentage": 24.35, "elapsed_time": "0:49:29", "remaining_time": "2:33:48", "throughput": 20152.12, "total_tokens": 59844224} +{"current_steps": 19020, "total_steps": 78105, "loss": 0.3835, "lr": 4.692864753473016e-06, "epoch": 1.21759170347609, "percentage": 24.35, "elapsed_time": "0:49:30", "remaining_time": "2:33:47", "throughput": 20152.83, "total_tokens": 59859776} +{"current_steps": 19025, "total_steps": 78105, "loss": 0.455, "lr": 4.6925964204729145e-06, "epoch": 1.2179117854170667, "percentage": 24.36, "elapsed_time": "0:49:30", "remaining_time": "2:33:45", "throughput": 20153.55, "total_tokens": 59875456} +{"current_steps": 19030, "total_steps": 78105, "loss": 0.3682, "lr": 4.6923279779859685e-06, "epoch": 1.2182318673580437, "percentage": 24.36, "elapsed_time": "0:49:31", "remaining_time": "2:33:44", "throughput": 20154.31, "total_tokens": 59891840} +{"current_steps": 19035, "total_steps": 78105, "loss": 0.3199, "lr": 4.692059426025583e-06, "epoch": 1.2185519492990204, "percentage": 24.37, "elapsed_time": "0:49:32", "remaining_time": "2:33:43", "throughput": 20155.0, "total_tokens": 59907904} +{"current_steps": 19040, "total_steps": 78105, "loss": 0.3169, "lr": 4.691790764605168e-06, "epoch": 1.2188720312399974, "percentage": 24.38, "elapsed_time": "0:49:33", "remaining_time": "2:33:42", "throughput": 20155.85, "total_tokens": 59924672} +{"current_steps": 19045, "total_steps": 78105, "loss": 0.3917, "lr": 4.691521993738139e-06, "epoch": 1.2191921131809744, "percentage": 24.38, "elapsed_time": "0:49:33", "remaining_time": "2:33:41", "throughput": 20156.6, "total_tokens": 59940736} +{"current_steps": 19050, "total_steps": 78105, "loss": 0.2466, "lr": 4.691253113437916e-06, "epoch": 1.2195121951219512, "percentage": 24.39, "elapsed_time": "0:49:34", "remaining_time": "2:33:40", "throughput": 20157.43, "total_tokens": 59956992} +{"current_steps": 19055, "total_steps": 78105, "loss": 0.5251, "lr": 4.690984123717926e-06, "epoch": 1.2198322770629282, "percentage": 24.4, "elapsed_time": "0:49:35", "remaining_time": "2:33:39", "throughput": 20158.12, "total_tokens": 59972800} +{"current_steps": 19060, "total_steps": 78105, "loss": 0.3085, "lr": 4.690715024591603e-06, "epoch": 1.220152359003905, "percentage": 24.4, "elapsed_time": "0:49:35", "remaining_time": "2:33:38", "throughput": 20158.86, "total_tokens": 59989120} +{"current_steps": 19065, "total_steps": 78105, "loss": 0.3967, "lr": 4.690445816072381e-06, "epoch": 1.220472440944882, "percentage": 24.41, "elapsed_time": "0:49:36", "remaining_time": "2:33:37", "throughput": 20159.61, "total_tokens": 60004928} +{"current_steps": 19070, "total_steps": 78105, "loss": 0.27, "lr": 4.690176498173705e-06, "epoch": 1.2207925228858587, "percentage": 24.42, "elapsed_time": "0:49:37", "remaining_time": "2:33:36", "throughput": 20160.26, "total_tokens": 60020352} +{"current_steps": 19075, "total_steps": 78105, "loss": 0.4531, "lr": 4.689907070909024e-06, "epoch": 1.2211126048268357, "percentage": 24.42, "elapsed_time": "0:49:37", "remaining_time": "2:33:35", "throughput": 20160.9, "total_tokens": 60035456} +{"current_steps": 19080, "total_steps": 78105, "loss": 0.3457, "lr": 4.68963753429179e-06, "epoch": 1.2214326867678125, "percentage": 24.43, "elapsed_time": "0:49:38", "remaining_time": "2:33:34", "throughput": 20161.53, "total_tokens": 60050752} +{"current_steps": 19085, "total_steps": 78105, "loss": 0.298, "lr": 4.689367888335462e-06, "epoch": 1.2217527687087895, "percentage": 24.44, "elapsed_time": "0:49:39", "remaining_time": "2:33:32", "throughput": 20162.26, "total_tokens": 60066496} +{"current_steps": 19090, "total_steps": 78105, "loss": 0.3365, "lr": 4.689098133053507e-06, "epoch": 1.2220728506497665, "percentage": 24.44, "elapsed_time": "0:49:39", "remaining_time": "2:33:31", "throughput": 20162.92, "total_tokens": 60081728} +{"current_steps": 19095, "total_steps": 78105, "loss": 0.2386, "lr": 4.688828268459393e-06, "epoch": 1.2223929325907432, "percentage": 24.45, "elapsed_time": "0:49:40", "remaining_time": "2:33:30", "throughput": 20163.64, "total_tokens": 60098048} +{"current_steps": 19100, "total_steps": 78105, "loss": 0.4498, "lr": 4.6885582945665964e-06, "epoch": 1.2227130145317202, "percentage": 24.45, "elapsed_time": "0:49:41", "remaining_time": "2:33:29", "throughput": 20164.19, "total_tokens": 60112640} +{"current_steps": 19105, "total_steps": 78105, "loss": 0.2333, "lr": 4.688288211388599e-06, "epoch": 1.223033096472697, "percentage": 24.46, "elapsed_time": "0:49:41", "remaining_time": "2:33:28", "throughput": 20164.88, "total_tokens": 60128384} +{"current_steps": 19110, "total_steps": 78105, "loss": 0.3721, "lr": 4.688018018938887e-06, "epoch": 1.223353178413674, "percentage": 24.47, "elapsed_time": "0:49:42", "remaining_time": "2:33:27", "throughput": 20165.65, "total_tokens": 60144448} +{"current_steps": 19115, "total_steps": 78105, "loss": 0.4246, "lr": 4.6877477172309505e-06, "epoch": 1.2236732603546507, "percentage": 24.47, "elapsed_time": "0:49:43", "remaining_time": "2:33:26", "throughput": 20166.34, "total_tokens": 60159936} +{"current_steps": 19120, "total_steps": 78105, "loss": 0.4009, "lr": 4.687477306278289e-06, "epoch": 1.2239933422956277, "percentage": 24.48, "elapsed_time": "0:49:43", "remaining_time": "2:33:25", "throughput": 20167.09, "total_tokens": 60175808} +{"current_steps": 19125, "total_steps": 78105, "loss": 0.3246, "lr": 4.687206786094406e-06, "epoch": 1.2243134242366045, "percentage": 24.49, "elapsed_time": "0:49:44", "remaining_time": "2:33:24", "throughput": 20167.77, "total_tokens": 60191808} +{"current_steps": 19130, "total_steps": 78105, "loss": 0.3218, "lr": 4.686936156692809e-06, "epoch": 1.2246335061775815, "percentage": 24.49, "elapsed_time": "0:49:45", "remaining_time": "2:33:23", "throughput": 20168.55, "total_tokens": 60208192} +{"current_steps": 19135, "total_steps": 78105, "loss": 0.3329, "lr": 4.686665418087011e-06, "epoch": 1.2249535881185583, "percentage": 24.5, "elapsed_time": "0:49:45", "remaining_time": "2:33:21", "throughput": 20169.07, "total_tokens": 60222848} +{"current_steps": 19140, "total_steps": 78105, "loss": 0.3869, "lr": 4.686394570290532e-06, "epoch": 1.2252736700595352, "percentage": 24.51, "elapsed_time": "0:49:46", "remaining_time": "2:33:20", "throughput": 20169.72, "total_tokens": 60238336} +{"current_steps": 19145, "total_steps": 78105, "loss": 0.2233, "lr": 4.686123613316896e-06, "epoch": 1.225593752000512, "percentage": 24.51, "elapsed_time": "0:49:47", "remaining_time": "2:33:19", "throughput": 20170.33, "total_tokens": 60253376} +{"current_steps": 19150, "total_steps": 78105, "loss": 0.4171, "lr": 4.685852547179635e-06, "epoch": 1.225913833941489, "percentage": 24.52, "elapsed_time": "0:49:47", "remaining_time": "2:33:18", "throughput": 20171.07, "total_tokens": 60269056} +{"current_steps": 19155, "total_steps": 78105, "loss": 0.1714, "lr": 4.685581371892283e-06, "epoch": 1.226233915882466, "percentage": 24.52, "elapsed_time": "0:49:48", "remaining_time": "2:33:17", "throughput": 20172.23, "total_tokens": 60288704} +{"current_steps": 19160, "total_steps": 78105, "loss": 0.317, "lr": 4.685310087468381e-06, "epoch": 1.2265539978234428, "percentage": 24.53, "elapsed_time": "0:49:49", "remaining_time": "2:33:16", "throughput": 20173.03, "total_tokens": 60305152} +{"current_steps": 19165, "total_steps": 78105, "loss": 0.3379, "lr": 4.685038693921477e-06, "epoch": 1.2268740797644198, "percentage": 24.54, "elapsed_time": "0:49:50", "remaining_time": "2:33:15", "throughput": 20173.61, "total_tokens": 60319680} +{"current_steps": 19170, "total_steps": 78105, "loss": 0.243, "lr": 4.684767191265122e-06, "epoch": 1.2271941617053965, "percentage": 24.54, "elapsed_time": "0:49:50", "remaining_time": "2:33:14", "throughput": 20174.18, "total_tokens": 60334528} +{"current_steps": 19175, "total_steps": 78105, "loss": 0.2693, "lr": 4.684495579512873e-06, "epoch": 1.2275142436463735, "percentage": 24.55, "elapsed_time": "0:49:51", "remaining_time": "2:33:13", "throughput": 20174.85, "total_tokens": 60349824} +{"current_steps": 19180, "total_steps": 78105, "loss": 0.4104, "lr": 4.684223858678294e-06, "epoch": 1.2278343255873503, "percentage": 24.56, "elapsed_time": "0:49:51", "remaining_time": "2:33:12", "throughput": 20175.41, "total_tokens": 60364544} +{"current_steps": 19185, "total_steps": 78105, "loss": 0.443, "lr": 4.683952028774952e-06, "epoch": 1.2281544075283273, "percentage": 24.56, "elapsed_time": "0:49:52", "remaining_time": "2:33:10", "throughput": 20176.16, "total_tokens": 60380672} +{"current_steps": 19190, "total_steps": 78105, "loss": 0.3621, "lr": 4.683680089816422e-06, "epoch": 1.228474489469304, "percentage": 24.57, "elapsed_time": "0:49:53", "remaining_time": "2:33:09", "throughput": 20176.77, "total_tokens": 60396096} +{"current_steps": 19195, "total_steps": 78105, "loss": 0.336, "lr": 4.683408041816282e-06, "epoch": 1.228794571410281, "percentage": 24.58, "elapsed_time": "0:49:54", "remaining_time": "2:33:08", "throughput": 20177.53, "total_tokens": 60412224} +{"current_steps": 19200, "total_steps": 78105, "loss": 0.2713, "lr": 4.683135884788118e-06, "epoch": 1.229114653351258, "percentage": 24.58, "elapsed_time": "0:49:54", "remaining_time": "2:33:07", "throughput": 20178.17, "total_tokens": 60428160} +{"current_steps": 19205, "total_steps": 78105, "loss": 0.2679, "lr": 4.682863618745519e-06, "epoch": 1.2294347352922348, "percentage": 24.59, "elapsed_time": "0:49:55", "remaining_time": "2:33:06", "throughput": 20178.85, "total_tokens": 60444288} +{"current_steps": 19210, "total_steps": 78105, "loss": 0.2873, "lr": 4.6825912437020814e-06, "epoch": 1.2297548172332118, "percentage": 24.6, "elapsed_time": "0:49:56", "remaining_time": "2:33:05", "throughput": 20179.49, "total_tokens": 60459840} +{"current_steps": 19215, "total_steps": 78105, "loss": 0.404, "lr": 4.682318759671406e-06, "epoch": 1.2300748991741886, "percentage": 24.6, "elapsed_time": "0:49:56", "remaining_time": "2:33:04", "throughput": 20180.02, "total_tokens": 60474752} +{"current_steps": 19220, "total_steps": 78105, "loss": 0.1633, "lr": 4.682046166667098e-06, "epoch": 1.2303949811151655, "percentage": 24.61, "elapsed_time": "0:49:57", "remaining_time": "2:33:03", "throughput": 20180.54, "total_tokens": 60489408} +{"current_steps": 19225, "total_steps": 78105, "loss": 0.363, "lr": 4.6817734647027715e-06, "epoch": 1.2307150630561423, "percentage": 24.61, "elapsed_time": "0:49:58", "remaining_time": "2:33:02", "throughput": 20181.15, "total_tokens": 60504704} +{"current_steps": 19230, "total_steps": 78105, "loss": 0.2782, "lr": 4.6815006537920424e-06, "epoch": 1.2310351449971193, "percentage": 24.62, "elapsed_time": "0:49:58", "remaining_time": "2:33:01", "throughput": 20181.91, "total_tokens": 60520960} +{"current_steps": 19235, "total_steps": 78105, "loss": 0.4007, "lr": 4.681227733948533e-06, "epoch": 1.231355226938096, "percentage": 24.63, "elapsed_time": "0:49:59", "remaining_time": "2:32:59", "throughput": 20182.45, "total_tokens": 60535424} +{"current_steps": 19240, "total_steps": 78105, "loss": 0.2642, "lr": 4.680954705185873e-06, "epoch": 1.231675308879073, "percentage": 24.63, "elapsed_time": "0:50:00", "remaining_time": "2:32:58", "throughput": 20183.11, "total_tokens": 60550976} +{"current_steps": 19245, "total_steps": 78105, "loss": 0.2975, "lr": 4.680681567517694e-06, "epoch": 1.23199539082005, "percentage": 24.64, "elapsed_time": "0:50:00", "remaining_time": "2:32:57", "throughput": 20183.76, "total_tokens": 60566784} +{"current_steps": 19250, "total_steps": 78105, "loss": 0.3432, "lr": 4.6804083209576375e-06, "epoch": 1.2323154727610268, "percentage": 24.65, "elapsed_time": "0:50:01", "remaining_time": "2:32:56", "throughput": 20184.46, "total_tokens": 60582400} +{"current_steps": 19255, "total_steps": 78105, "loss": 0.3691, "lr": 4.680134965519346e-06, "epoch": 1.2326355547020036, "percentage": 24.65, "elapsed_time": "0:50:02", "remaining_time": "2:32:55", "throughput": 20184.95, "total_tokens": 60597056} +{"current_steps": 19260, "total_steps": 78105, "loss": 0.2932, "lr": 4.67986150121647e-06, "epoch": 1.2329556366429806, "percentage": 24.66, "elapsed_time": "0:50:02", "remaining_time": "2:32:54", "throughput": 20185.67, "total_tokens": 60612864} +{"current_steps": 19265, "total_steps": 78105, "loss": 0.326, "lr": 4.679587928062666e-06, "epoch": 1.2332757185839576, "percentage": 24.67, "elapsed_time": "0:50:03", "remaining_time": "2:32:53", "throughput": 20186.24, "total_tokens": 60627968} +{"current_steps": 19270, "total_steps": 78105, "loss": 0.4103, "lr": 4.679314246071594e-06, "epoch": 1.2335958005249343, "percentage": 24.67, "elapsed_time": "0:50:04", "remaining_time": "2:32:52", "throughput": 20187.0, "total_tokens": 60644160} +{"current_steps": 19275, "total_steps": 78105, "loss": 0.3367, "lr": 4.67904045525692e-06, "epoch": 1.2339158824659113, "percentage": 24.68, "elapsed_time": "0:50:04", "remaining_time": "2:32:51", "throughput": 20187.66, "total_tokens": 60659648} +{"current_steps": 19280, "total_steps": 78105, "loss": 0.4156, "lr": 4.678766555632315e-06, "epoch": 1.234235964406888, "percentage": 24.68, "elapsed_time": "0:50:05", "remaining_time": "2:32:49", "throughput": 20188.41, "total_tokens": 60675712} +{"current_steps": 19285, "total_steps": 78105, "loss": 0.2761, "lr": 4.678492547211459e-06, "epoch": 1.234556046347865, "percentage": 24.69, "elapsed_time": "0:50:06", "remaining_time": "2:32:48", "throughput": 20189.11, "total_tokens": 60691456} +{"current_steps": 19290, "total_steps": 78105, "loss": 0.3125, "lr": 4.67821843000803e-06, "epoch": 1.2348761282888419, "percentage": 24.7, "elapsed_time": "0:50:06", "remaining_time": "2:32:47", "throughput": 20189.95, "total_tokens": 60708096} +{"current_steps": 19295, "total_steps": 78105, "loss": 0.5368, "lr": 4.67794420403572e-06, "epoch": 1.2351962102298188, "percentage": 24.7, "elapsed_time": "0:50:07", "remaining_time": "2:32:46", "throughput": 20190.68, "total_tokens": 60724352} +{"current_steps": 19300, "total_steps": 78105, "loss": 0.3617, "lr": 4.677669869308221e-06, "epoch": 1.2355162921707956, "percentage": 24.71, "elapsed_time": "0:50:08", "remaining_time": "2:32:45", "throughput": 20191.31, "total_tokens": 60739968} +{"current_steps": 19305, "total_steps": 78105, "loss": 0.3916, "lr": 4.677395425839231e-06, "epoch": 1.2358363741117726, "percentage": 24.72, "elapsed_time": "0:50:08", "remaining_time": "2:32:44", "throughput": 20191.81, "total_tokens": 60754240} +{"current_steps": 19310, "total_steps": 78105, "loss": 0.5291, "lr": 4.677120873642455e-06, "epoch": 1.2361564560527496, "percentage": 24.72, "elapsed_time": "0:50:09", "remaining_time": "2:32:43", "throughput": 20192.46, "total_tokens": 60769728} +{"current_steps": 19315, "total_steps": 78105, "loss": 0.3187, "lr": 4.676846212731604e-06, "epoch": 1.2364765379937264, "percentage": 24.73, "elapsed_time": "0:50:10", "remaining_time": "2:32:42", "throughput": 20193.17, "total_tokens": 60785408} +{"current_steps": 19320, "total_steps": 78105, "loss": 0.3089, "lr": 4.6765714431203914e-06, "epoch": 1.2367966199347034, "percentage": 24.74, "elapsed_time": "0:50:10", "remaining_time": "2:32:41", "throughput": 20193.85, "total_tokens": 60801152} +{"current_steps": 19325, "total_steps": 78105, "loss": 0.2893, "lr": 4.676296564822538e-06, "epoch": 1.2371167018756801, "percentage": 24.74, "elapsed_time": "0:50:11", "remaining_time": "2:32:40", "throughput": 20194.57, "total_tokens": 60817088} +{"current_steps": 19330, "total_steps": 78105, "loss": 0.3399, "lr": 4.67602157785177e-06, "epoch": 1.2374367838166571, "percentage": 24.75, "elapsed_time": "0:50:12", "remaining_time": "2:32:39", "throughput": 20195.31, "total_tokens": 60833088} +{"current_steps": 19335, "total_steps": 78105, "loss": 0.2608, "lr": 4.675746482221819e-06, "epoch": 1.2377568657576339, "percentage": 24.76, "elapsed_time": "0:50:12", "remaining_time": "2:32:38", "throughput": 20196.18, "total_tokens": 60850496} +{"current_steps": 19340, "total_steps": 78105, "loss": 0.3079, "lr": 4.6754712779464215e-06, "epoch": 1.2380769476986109, "percentage": 24.76, "elapsed_time": "0:50:13", "remaining_time": "2:32:37", "throughput": 20196.99, "total_tokens": 60867200} +{"current_steps": 19345, "total_steps": 78105, "loss": 0.4516, "lr": 4.675195965039321e-06, "epoch": 1.2383970296395876, "percentage": 24.77, "elapsed_time": "0:50:14", "remaining_time": "2:32:36", "throughput": 20197.68, "total_tokens": 60883072} +{"current_steps": 19350, "total_steps": 78105, "loss": 0.3426, "lr": 4.674920543514263e-06, "epoch": 1.2387171115805646, "percentage": 24.77, "elapsed_time": "0:50:15", "remaining_time": "2:32:34", "throughput": 20198.47, "total_tokens": 60899392} +{"current_steps": 19355, "total_steps": 78105, "loss": 0.2972, "lr": 4.674645013385002e-06, "epoch": 1.2390371935215416, "percentage": 24.78, "elapsed_time": "0:50:15", "remaining_time": "2:32:33", "throughput": 20199.04, "total_tokens": 60914560} +{"current_steps": 19360, "total_steps": 78105, "loss": 0.4091, "lr": 4.674369374665297e-06, "epoch": 1.2393572754625184, "percentage": 24.79, "elapsed_time": "0:50:16", "remaining_time": "2:32:32", "throughput": 20199.56, "total_tokens": 60929088} +{"current_steps": 19365, "total_steps": 78105, "loss": 0.3021, "lr": 4.6740936273689105e-06, "epoch": 1.2396773574034954, "percentage": 24.79, "elapsed_time": "0:50:17", "remaining_time": "2:32:31", "throughput": 20200.18, "total_tokens": 60944576} +{"current_steps": 19370, "total_steps": 78105, "loss": 0.4317, "lr": 4.673817771509613e-06, "epoch": 1.2399974393444722, "percentage": 24.8, "elapsed_time": "0:50:17", "remaining_time": "2:32:30", "throughput": 20200.84, "total_tokens": 60959808} +{"current_steps": 19375, "total_steps": 78105, "loss": 0.2599, "lr": 4.67354180710118e-06, "epoch": 1.2403175212854491, "percentage": 24.81, "elapsed_time": "0:50:18", "remaining_time": "2:32:29", "throughput": 20201.55, "total_tokens": 60975488} +{"current_steps": 19380, "total_steps": 78105, "loss": 0.2838, "lr": 4.673265734157389e-06, "epoch": 1.240637603226426, "percentage": 24.81, "elapsed_time": "0:50:19", "remaining_time": "2:32:28", "throughput": 20202.22, "total_tokens": 60991168} +{"current_steps": 19385, "total_steps": 78105, "loss": 0.3576, "lr": 4.672989552692029e-06, "epoch": 1.240957685167403, "percentage": 24.82, "elapsed_time": "0:50:19", "remaining_time": "2:32:27", "throughput": 20202.92, "total_tokens": 61006912} +{"current_steps": 19390, "total_steps": 78105, "loss": 0.3117, "lr": 4.672713262718888e-06, "epoch": 1.2412777671083797, "percentage": 24.83, "elapsed_time": "0:50:20", "remaining_time": "2:32:26", "throughput": 20203.59, "total_tokens": 61022528} +{"current_steps": 19395, "total_steps": 78105, "loss": 0.4646, "lr": 4.672436864251765e-06, "epoch": 1.2415978490493567, "percentage": 24.83, "elapsed_time": "0:50:21", "remaining_time": "2:32:24", "throughput": 20204.14, "total_tokens": 61037056} +{"current_steps": 19400, "total_steps": 78105, "loss": 0.2904, "lr": 4.67216035730446e-06, "epoch": 1.2419179309903334, "percentage": 24.84, "elapsed_time": "0:50:21", "remaining_time": "2:32:23", "throughput": 20204.74, "total_tokens": 61052288} +{"current_steps": 19405, "total_steps": 78105, "loss": 0.3429, "lr": 4.671883741890782e-06, "epoch": 1.2422380129313104, "percentage": 24.84, "elapsed_time": "0:50:22", "remaining_time": "2:32:22", "throughput": 20205.6, "total_tokens": 61069248} +{"current_steps": 19410, "total_steps": 78105, "loss": 0.3064, "lr": 4.671607018024541e-06, "epoch": 1.2425580948722872, "percentage": 24.85, "elapsed_time": "0:50:23", "remaining_time": "2:32:21", "throughput": 20206.14, "total_tokens": 61084096} +{"current_steps": 19415, "total_steps": 78105, "loss": 0.2906, "lr": 4.671330185719559e-06, "epoch": 1.2428781768132642, "percentage": 24.86, "elapsed_time": "0:50:23", "remaining_time": "2:32:20", "throughput": 20206.72, "total_tokens": 61098752} +{"current_steps": 19420, "total_steps": 78105, "loss": 0.2736, "lr": 4.6710532449896575e-06, "epoch": 1.2431982587542412, "percentage": 24.86, "elapsed_time": "0:50:24", "remaining_time": "2:32:19", "throughput": 20207.41, "total_tokens": 61114560} +{"current_steps": 19425, "total_steps": 78105, "loss": 0.2584, "lr": 4.670776195848664e-06, "epoch": 1.243518340695218, "percentage": 24.87, "elapsed_time": "0:50:25", "remaining_time": "2:32:18", "throughput": 20208.03, "total_tokens": 61130176} +{"current_steps": 19430, "total_steps": 78105, "loss": 0.3111, "lr": 4.670499038310416e-06, "epoch": 1.243838422636195, "percentage": 24.88, "elapsed_time": "0:50:25", "remaining_time": "2:32:17", "throughput": 20208.62, "total_tokens": 61145472} +{"current_steps": 19435, "total_steps": 78105, "loss": 0.3217, "lr": 4.670221772388751e-06, "epoch": 1.2441585045771717, "percentage": 24.88, "elapsed_time": "0:50:26", "remaining_time": "2:32:16", "throughput": 20209.33, "total_tokens": 61161408} +{"current_steps": 19440, "total_steps": 78105, "loss": 0.2143, "lr": 4.669944398097515e-06, "epoch": 1.2444785865181487, "percentage": 24.89, "elapsed_time": "0:50:27", "remaining_time": "2:32:14", "throughput": 20209.94, "total_tokens": 61176576} +{"current_steps": 19445, "total_steps": 78105, "loss": 0.471, "lr": 4.669666915450559e-06, "epoch": 1.2447986684591255, "percentage": 24.9, "elapsed_time": "0:50:27", "remaining_time": "2:32:13", "throughput": 20210.57, "total_tokens": 61191616} +{"current_steps": 19450, "total_steps": 78105, "loss": 0.3311, "lr": 4.669389324461738e-06, "epoch": 1.2451187504001024, "percentage": 24.9, "elapsed_time": "0:50:28", "remaining_time": "2:32:12", "throughput": 20211.22, "total_tokens": 61207168} +{"current_steps": 19455, "total_steps": 78105, "loss": 0.4144, "lr": 4.669111625144914e-06, "epoch": 1.2454388323410792, "percentage": 24.91, "elapsed_time": "0:50:29", "remaining_time": "2:32:11", "throughput": 20212.01, "total_tokens": 61223872} +{"current_steps": 19460, "total_steps": 78105, "loss": 0.2513, "lr": 4.6688338175139544e-06, "epoch": 1.2457589142820562, "percentage": 24.92, "elapsed_time": "0:50:29", "remaining_time": "2:32:10", "throughput": 20212.65, "total_tokens": 61239680} +{"current_steps": 19465, "total_steps": 78105, "loss": 0.285, "lr": 4.668555901582731e-06, "epoch": 1.2460789962230332, "percentage": 24.92, "elapsed_time": "0:50:30", "remaining_time": "2:32:09", "throughput": 20213.36, "total_tokens": 61255616} +{"current_steps": 19470, "total_steps": 78105, "loss": 0.4098, "lr": 4.668277877365122e-06, "epoch": 1.24639907816401, "percentage": 24.93, "elapsed_time": "0:50:31", "remaining_time": "2:32:08", "throughput": 20214.0, "total_tokens": 61270976} +{"current_steps": 19475, "total_steps": 78105, "loss": 0.2872, "lr": 4.6679997448750095e-06, "epoch": 1.246719160104987, "percentage": 24.93, "elapsed_time": "0:50:31", "remaining_time": "2:32:07", "throughput": 20214.53, "total_tokens": 61285824} +{"current_steps": 19480, "total_steps": 78105, "loss": 0.4148, "lr": 4.667721504126281e-06, "epoch": 1.2470392420459637, "percentage": 24.94, "elapsed_time": "0:50:32", "remaining_time": "2:32:06", "throughput": 20215.05, "total_tokens": 61300672} +{"current_steps": 19485, "total_steps": 78105, "loss": 0.3055, "lr": 4.667443155132834e-06, "epoch": 1.2473593239869407, "percentage": 24.95, "elapsed_time": "0:50:33", "remaining_time": "2:32:05", "throughput": 20215.89, "total_tokens": 61317760} +{"current_steps": 19490, "total_steps": 78105, "loss": 0.4127, "lr": 4.667164697908564e-06, "epoch": 1.2476794059279175, "percentage": 24.95, "elapsed_time": "0:50:33", "remaining_time": "2:32:04", "throughput": 20216.59, "total_tokens": 61333888} +{"current_steps": 19495, "total_steps": 78105, "loss": 0.4217, "lr": 4.666886132467379e-06, "epoch": 1.2479994878688945, "percentage": 24.96, "elapsed_time": "0:50:34", "remaining_time": "2:32:03", "throughput": 20217.3, "total_tokens": 61350080} +{"current_steps": 19500, "total_steps": 78105, "loss": 0.4635, "lr": 4.666607458823186e-06, "epoch": 1.2483195698098712, "percentage": 24.97, "elapsed_time": "0:50:35", "remaining_time": "2:32:02", "throughput": 20218.08, "total_tokens": 61366912} +{"current_steps": 19505, "total_steps": 78105, "loss": 0.322, "lr": 4.666328676989903e-06, "epoch": 1.2486396517508482, "percentage": 24.97, "elapsed_time": "0:50:35", "remaining_time": "2:32:01", "throughput": 20218.92, "total_tokens": 61384256} +{"current_steps": 19510, "total_steps": 78105, "loss": 0.2763, "lr": 4.666049786981449e-06, "epoch": 1.2489597336918252, "percentage": 24.98, "elapsed_time": "0:50:36", "remaining_time": "2:31:59", "throughput": 20219.48, "total_tokens": 61399040} +{"current_steps": 19515, "total_steps": 78105, "loss": 0.2761, "lr": 4.665770788811751e-06, "epoch": 1.249279815632802, "percentage": 24.99, "elapsed_time": "0:50:37", "remaining_time": "2:31:59", "throughput": 20220.43, "total_tokens": 61416832} +{"current_steps": 19520, "total_steps": 78105, "loss": 0.3629, "lr": 4.6654916824947415e-06, "epoch": 1.249599897573779, "percentage": 24.99, "elapsed_time": "0:50:38", "remaining_time": "2:31:58", "throughput": 20221.22, "total_tokens": 61433152} +{"current_steps": 19525, "total_steps": 78105, "loss": 0.3424, "lr": 4.665212468044356e-06, "epoch": 1.2499199795147558, "percentage": 25.0, "elapsed_time": "0:50:38", "remaining_time": "2:31:57", "throughput": 20222.04, "total_tokens": 61450176} +{"current_steps": 19530, "total_steps": 78105, "loss": 0.265, "lr": 4.66493314547454e-06, "epoch": 1.2502400614557327, "percentage": 25.0, "elapsed_time": "0:50:39", "remaining_time": "2:31:55", "throughput": 20222.61, "total_tokens": 61465280} +{"current_steps": 19530, "total_steps": 78105, "eval_loss": 0.5115891098976135, "epoch": 1.2502400614557327, "percentage": 25.0, "elapsed_time": "0:51:30", "remaining_time": "2:34:29", "throughput": 19887.85, "total_tokens": 61465280} +{"current_steps": 19535, "total_steps": 78105, "loss": 0.3263, "lr": 4.664653714799239e-06, "epoch": 1.2505601433967095, "percentage": 25.01, "elapsed_time": "0:52:03", "remaining_time": "2:36:06", "throughput": 19680.97, "total_tokens": 61480960} +{"current_steps": 19540, "total_steps": 78105, "loss": 0.3483, "lr": 4.664374176032406e-06, "epoch": 1.2508802253376865, "percentage": 25.02, "elapsed_time": "0:52:04", "remaining_time": "2:36:04", "throughput": 19681.62, "total_tokens": 61496064} +{"current_steps": 19545, "total_steps": 78105, "loss": 0.3428, "lr": 4.6640945291880006e-06, "epoch": 1.2512003072786633, "percentage": 25.02, "elapsed_time": "0:52:05", "remaining_time": "2:36:03", "throughput": 19682.42, "total_tokens": 61512000} +{"current_steps": 19550, "total_steps": 78105, "loss": 0.3861, "lr": 4.6638147742799865e-06, "epoch": 1.2515203892196403, "percentage": 25.03, "elapsed_time": "0:52:05", "remaining_time": "2:36:02", "throughput": 19683.13, "total_tokens": 61527552} +{"current_steps": 19555, "total_steps": 78105, "loss": 0.3321, "lr": 4.663534911322334e-06, "epoch": 1.2518404711606173, "percentage": 25.04, "elapsed_time": "0:52:06", "remaining_time": "2:36:01", "throughput": 19683.81, "total_tokens": 61542656} +{"current_steps": 19560, "total_steps": 78105, "loss": 0.3228, "lr": 4.663254940329017e-06, "epoch": 1.252160553101594, "percentage": 25.04, "elapsed_time": "0:52:07", "remaining_time": "2:36:00", "throughput": 19684.45, "total_tokens": 61557568} +{"current_steps": 19565, "total_steps": 78105, "loss": 0.3364, "lr": 4.662974861314016e-06, "epoch": 1.2524806350425708, "percentage": 25.05, "elapsed_time": "0:52:07", "remaining_time": "2:35:58", "throughput": 19685.4, "total_tokens": 61574464} +{"current_steps": 19570, "total_steps": 78105, "loss": 0.294, "lr": 4.662694674291318e-06, "epoch": 1.2528007169835478, "percentage": 25.06, "elapsed_time": "0:52:08", "remaining_time": "2:35:58", "throughput": 19684.5, "total_tokens": 61589568} +{"current_steps": 19575, "total_steps": 78105, "loss": 0.4253, "lr": 4.662414379274912e-06, "epoch": 1.2531207989245248, "percentage": 25.06, "elapsed_time": "0:52:09", "remaining_time": "2:35:57", "throughput": 19685.26, "total_tokens": 61605440} +{"current_steps": 19580, "total_steps": 78105, "loss": 0.3526, "lr": 4.662133976278796e-06, "epoch": 1.2534408808655015, "percentage": 25.07, "elapsed_time": "0:52:10", "remaining_time": "2:35:56", "throughput": 19686.04, "total_tokens": 61621056} +{"current_steps": 19585, "total_steps": 78105, "loss": 0.2846, "lr": 4.661853465316971e-06, "epoch": 1.2537609628064785, "percentage": 25.08, "elapsed_time": "0:52:10", "remaining_time": "2:35:55", "throughput": 19686.77, "total_tokens": 61636736} +{"current_steps": 19590, "total_steps": 78105, "loss": 0.2533, "lr": 4.6615728464034455e-06, "epoch": 1.2540810447474553, "percentage": 25.08, "elapsed_time": "0:52:11", "remaining_time": "2:35:54", "throughput": 19687.75, "total_tokens": 61654208} +{"current_steps": 19595, "total_steps": 78105, "loss": 0.2326, "lr": 4.66129211955223e-06, "epoch": 1.2544011266884323, "percentage": 25.09, "elapsed_time": "0:52:12", "remaining_time": "2:35:52", "throughput": 19688.57, "total_tokens": 61670464} +{"current_steps": 19600, "total_steps": 78105, "loss": 0.3779, "lr": 4.661011284777345e-06, "epoch": 1.254721208629409, "percentage": 25.09, "elapsed_time": "0:52:12", "remaining_time": "2:35:51", "throughput": 19689.33, "total_tokens": 61686208} +{"current_steps": 19605, "total_steps": 78105, "loss": 0.275, "lr": 4.660730342092812e-06, "epoch": 1.255041290570386, "percentage": 25.1, "elapsed_time": "0:52:13", "remaining_time": "2:35:50", "throughput": 19690.27, "total_tokens": 61703232} +{"current_steps": 19610, "total_steps": 78105, "loss": 0.2649, "lr": 4.660449291512661e-06, "epoch": 1.2553613725113628, "percentage": 25.11, "elapsed_time": "0:52:14", "remaining_time": "2:35:49", "throughput": 19691.13, "total_tokens": 61719680} +{"current_steps": 19615, "total_steps": 78105, "loss": 0.2393, "lr": 4.6601681330509255e-06, "epoch": 1.2556814544523398, "percentage": 25.11, "elapsed_time": "0:52:15", "remaining_time": "2:35:48", "throughput": 19691.78, "total_tokens": 61734336} +{"current_steps": 19620, "total_steps": 78105, "loss": 0.4221, "lr": 4.6598868667216454e-06, "epoch": 1.2560015363933168, "percentage": 25.12, "elapsed_time": "0:52:15", "remaining_time": "2:35:47", "throughput": 19692.39, "total_tokens": 61749120} +{"current_steps": 19625, "total_steps": 78105, "loss": 0.3938, "lr": 4.659605492538866e-06, "epoch": 1.2563216183342936, "percentage": 25.13, "elapsed_time": "0:52:16", "remaining_time": "2:35:45", "throughput": 19692.9, "total_tokens": 61763072} +{"current_steps": 19630, "total_steps": 78105, "loss": 0.524, "lr": 4.659324010516637e-06, "epoch": 1.2566417002752703, "percentage": 25.13, "elapsed_time": "0:52:16", "remaining_time": "2:35:44", "throughput": 19693.58, "total_tokens": 61778432} +{"current_steps": 19635, "total_steps": 78105, "loss": 0.3252, "lr": 4.6590424206690145e-06, "epoch": 1.2569617822162473, "percentage": 25.14, "elapsed_time": "0:52:17", "remaining_time": "2:35:43", "throughput": 19694.24, "total_tokens": 61793088} +{"current_steps": 19640, "total_steps": 78105, "loss": 0.4921, "lr": 4.658760723010059e-06, "epoch": 1.2572818641572243, "percentage": 25.15, "elapsed_time": "0:52:18", "remaining_time": "2:35:42", "throughput": 19695.26, "total_tokens": 61810496} +{"current_steps": 19645, "total_steps": 78105, "loss": 0.2689, "lr": 4.6584789175538384e-06, "epoch": 1.257601946098201, "percentage": 25.15, "elapsed_time": "0:52:19", "remaining_time": "2:35:41", "throughput": 19696.01, "total_tokens": 61826240} +{"current_steps": 19650, "total_steps": 78105, "loss": 0.3473, "lr": 4.658197004314423e-06, "epoch": 1.257922028039178, "percentage": 25.16, "elapsed_time": "0:52:19", "remaining_time": "2:35:40", "throughput": 19696.86, "total_tokens": 61842624} +{"current_steps": 19655, "total_steps": 78105, "loss": 0.347, "lr": 4.657914983305891e-06, "epoch": 1.2582421099801548, "percentage": 25.16, "elapsed_time": "0:52:20", "remaining_time": "2:35:38", "throughput": 19697.65, "total_tokens": 61858240} +{"current_steps": 19660, "total_steps": 78105, "loss": 0.3243, "lr": 4.657632854542325e-06, "epoch": 1.2585621919211318, "percentage": 25.17, "elapsed_time": "0:52:21", "remaining_time": "2:35:37", "throughput": 19698.38, "total_tokens": 61873856} +{"current_steps": 19665, "total_steps": 78105, "loss": 0.3672, "lr": 4.657350618037813e-06, "epoch": 1.2588822738621088, "percentage": 25.18, "elapsed_time": "0:52:21", "remaining_time": "2:35:36", "throughput": 19699.38, "total_tokens": 61891008} +{"current_steps": 19670, "total_steps": 78105, "loss": 0.2677, "lr": 4.6570682738064485e-06, "epoch": 1.2592023558030856, "percentage": 25.18, "elapsed_time": "0:52:22", "remaining_time": "2:35:35", "throughput": 19700.15, "total_tokens": 61907200} +{"current_steps": 19675, "total_steps": 78105, "loss": 0.1919, "lr": 4.65678582186233e-06, "epoch": 1.2595224377440624, "percentage": 25.19, "elapsed_time": "0:52:23", "remaining_time": "2:35:34", "throughput": 19700.82, "total_tokens": 61922432} +{"current_steps": 19680, "total_steps": 78105, "loss": 0.4322, "lr": 4.6565032622195615e-06, "epoch": 1.2598425196850394, "percentage": 25.2, "elapsed_time": "0:52:23", "remaining_time": "2:35:33", "throughput": 19701.38, "total_tokens": 61936576} +{"current_steps": 19685, "total_steps": 78105, "loss": 0.3127, "lr": 4.656220594892253e-06, "epoch": 1.2601626016260163, "percentage": 25.2, "elapsed_time": "0:52:24", "remaining_time": "2:35:31", "throughput": 19702.12, "total_tokens": 61952064} +{"current_steps": 19690, "total_steps": 78105, "loss": 0.4121, "lr": 4.655937819894521e-06, "epoch": 1.2604826835669931, "percentage": 25.21, "elapsed_time": "0:52:25", "remaining_time": "2:35:30", "throughput": 19702.94, "total_tokens": 61968256} +{"current_steps": 19695, "total_steps": 78105, "loss": 0.2803, "lr": 4.655654937240482e-06, "epoch": 1.26080276550797, "percentage": 25.22, "elapsed_time": "0:52:25", "remaining_time": "2:35:29", "throughput": 19703.7, "total_tokens": 61984192} +{"current_steps": 19700, "total_steps": 78105, "loss": 0.3284, "lr": 4.655371946944265e-06, "epoch": 1.2611228474489469, "percentage": 25.22, "elapsed_time": "0:52:26", "remaining_time": "2:35:28", "throughput": 19704.41, "total_tokens": 61999872} +{"current_steps": 19705, "total_steps": 78105, "loss": 0.3631, "lr": 4.655088849019999e-06, "epoch": 1.2614429293899239, "percentage": 25.23, "elapsed_time": "0:52:27", "remaining_time": "2:35:27", "throughput": 19705.1, "total_tokens": 62015424} +{"current_steps": 19710, "total_steps": 78105, "loss": 0.5244, "lr": 4.654805643481822e-06, "epoch": 1.2617630113309006, "percentage": 25.24, "elapsed_time": "0:52:27", "remaining_time": "2:35:26", "throughput": 19705.76, "total_tokens": 62030912} +{"current_steps": 19715, "total_steps": 78105, "loss": 0.273, "lr": 4.654522330343875e-06, "epoch": 1.2620830932718776, "percentage": 25.24, "elapsed_time": "0:52:28", "remaining_time": "2:35:24", "throughput": 19706.38, "total_tokens": 62045696} +{"current_steps": 19720, "total_steps": 78105, "loss": 0.3918, "lr": 4.654238909620305e-06, "epoch": 1.2624031752128544, "percentage": 25.25, "elapsed_time": "0:52:29", "remaining_time": "2:35:23", "throughput": 19706.97, "total_tokens": 62060288} +{"current_steps": 19725, "total_steps": 78105, "loss": 0.2186, "lr": 4.6539553813252645e-06, "epoch": 1.2627232571538314, "percentage": 25.25, "elapsed_time": "0:52:29", "remaining_time": "2:35:22", "throughput": 19707.94, "total_tokens": 62077888} +{"current_steps": 19730, "total_steps": 78105, "loss": 0.3008, "lr": 4.653671745472913e-06, "epoch": 1.2630433390948084, "percentage": 25.26, "elapsed_time": "0:52:30", "remaining_time": "2:35:21", "throughput": 19708.73, "total_tokens": 62094336} +{"current_steps": 19735, "total_steps": 78105, "loss": 0.3185, "lr": 4.6533880020774126e-06, "epoch": 1.2633634210357851, "percentage": 25.27, "elapsed_time": "0:52:31", "remaining_time": "2:35:20", "throughput": 19709.49, "total_tokens": 62110400} +{"current_steps": 19740, "total_steps": 78105, "loss": 0.3232, "lr": 4.653104151152931e-06, "epoch": 1.2636835029767621, "percentage": 25.27, "elapsed_time": "0:52:31", "remaining_time": "2:35:19", "throughput": 19710.33, "total_tokens": 62126720} +{"current_steps": 19745, "total_steps": 78105, "loss": 0.3325, "lr": 4.652820192713643e-06, "epoch": 1.264003584917739, "percentage": 25.28, "elapsed_time": "0:52:32", "remaining_time": "2:35:18", "throughput": 19711.14, "total_tokens": 62142976} +{"current_steps": 19750, "total_steps": 78105, "loss": 0.2464, "lr": 4.652536126773729e-06, "epoch": 1.2643236668587159, "percentage": 25.29, "elapsed_time": "0:52:33", "remaining_time": "2:35:17", "throughput": 19711.79, "total_tokens": 62158080} +{"current_steps": 19755, "total_steps": 78105, "loss": 0.2626, "lr": 4.652251953347373e-06, "epoch": 1.2646437487996927, "percentage": 25.29, "elapsed_time": "0:52:34", "remaining_time": "2:35:16", "throughput": 19712.49, "total_tokens": 62173824} +{"current_steps": 19760, "total_steps": 78105, "loss": 0.4023, "lr": 4.651967672448765e-06, "epoch": 1.2649638307406696, "percentage": 25.3, "elapsed_time": "0:52:34", "remaining_time": "2:35:14", "throughput": 19713.22, "total_tokens": 62189248} +{"current_steps": 19765, "total_steps": 78105, "loss": 0.4716, "lr": 4.6516832840921e-06, "epoch": 1.2652839126816464, "percentage": 25.31, "elapsed_time": "0:52:35", "remaining_time": "2:35:13", "throughput": 19714.01, "total_tokens": 62205568} +{"current_steps": 19770, "total_steps": 78105, "loss": 0.4309, "lr": 4.65139878829158e-06, "epoch": 1.2656039946226234, "percentage": 25.31, "elapsed_time": "0:52:36", "remaining_time": "2:35:12", "throughput": 19714.69, "total_tokens": 62220992} +{"current_steps": 19775, "total_steps": 78105, "loss": 0.3571, "lr": 4.651114185061412e-06, "epoch": 1.2659240765636004, "percentage": 25.32, "elapsed_time": "0:52:36", "remaining_time": "2:35:11", "throughput": 19715.31, "total_tokens": 62235968} +{"current_steps": 19780, "total_steps": 78105, "loss": 0.3261, "lr": 4.650829474415805e-06, "epoch": 1.2662441585045772, "percentage": 25.32, "elapsed_time": "0:52:37", "remaining_time": "2:35:10", "throughput": 19716.01, "total_tokens": 62251136} +{"current_steps": 19785, "total_steps": 78105, "loss": 0.2417, "lr": 4.650544656368977e-06, "epoch": 1.266564240445554, "percentage": 25.33, "elapsed_time": "0:52:38", "remaining_time": "2:35:09", "throughput": 19716.77, "total_tokens": 62267264} +{"current_steps": 19790, "total_steps": 78105, "loss": 0.3685, "lr": 4.65025973093515e-06, "epoch": 1.266884322386531, "percentage": 25.34, "elapsed_time": "0:52:38", "remaining_time": "2:35:07", "throughput": 19717.39, "total_tokens": 62282112} +{"current_steps": 19795, "total_steps": 78105, "loss": 0.2891, "lr": 4.649974698128554e-06, "epoch": 1.267204404327508, "percentage": 25.34, "elapsed_time": "0:52:39", "remaining_time": "2:35:06", "throughput": 19718.01, "total_tokens": 62297408} +{"current_steps": 19800, "total_steps": 78105, "loss": 0.3548, "lr": 4.64968955796342e-06, "epoch": 1.2675244862684847, "percentage": 25.35, "elapsed_time": "0:52:40", "remaining_time": "2:35:05", "throughput": 19718.85, "total_tokens": 62314304} +{"current_steps": 19805, "total_steps": 78105, "loss": 0.3924, "lr": 4.6494043104539864e-06, "epoch": 1.2678445682094617, "percentage": 25.36, "elapsed_time": "0:52:40", "remaining_time": "2:35:04", "throughput": 19719.52, "total_tokens": 62329600} +{"current_steps": 19810, "total_steps": 78105, "loss": 0.2952, "lr": 4.649118955614498e-06, "epoch": 1.2681646501504384, "percentage": 25.36, "elapsed_time": "0:52:41", "remaining_time": "2:35:03", "throughput": 19720.45, "total_tokens": 62346880} +{"current_steps": 19815, "total_steps": 78105, "loss": 0.2824, "lr": 4.6488334934592025e-06, "epoch": 1.2684847320914154, "percentage": 25.37, "elapsed_time": "0:52:42", "remaining_time": "2:35:02", "throughput": 19721.31, "total_tokens": 62363392} +{"current_steps": 19820, "total_steps": 78105, "loss": 0.3602, "lr": 4.648547924002356e-06, "epoch": 1.2688048140323924, "percentage": 25.38, "elapsed_time": "0:52:42", "remaining_time": "2:35:01", "throughput": 19722.06, "total_tokens": 62379584} +{"current_steps": 19825, "total_steps": 78105, "loss": 0.3378, "lr": 4.648262247258217e-06, "epoch": 1.2691248959733692, "percentage": 25.38, "elapsed_time": "0:52:43", "remaining_time": "2:35:00", "throughput": 19722.75, "total_tokens": 62394880} +{"current_steps": 19830, "total_steps": 78105, "loss": 0.2125, "lr": 4.647976463241051e-06, "epoch": 1.269444977914346, "percentage": 25.39, "elapsed_time": "0:52:44", "remaining_time": "2:34:58", "throughput": 19723.33, "total_tokens": 62409600} +{"current_steps": 19835, "total_steps": 78105, "loss": 0.351, "lr": 4.64769057196513e-06, "epoch": 1.269765059855323, "percentage": 25.4, "elapsed_time": "0:52:44", "remaining_time": "2:34:57", "throughput": 19724.02, "total_tokens": 62424960} +{"current_steps": 19840, "total_steps": 78105, "loss": 0.4485, "lr": 4.647404573444727e-06, "epoch": 1.2700851417963, "percentage": 25.4, "elapsed_time": "0:52:45", "remaining_time": "2:34:56", "throughput": 19724.63, "total_tokens": 62439936} +{"current_steps": 19845, "total_steps": 78105, "loss": 0.4064, "lr": 4.647118467694126e-06, "epoch": 1.2704052237372767, "percentage": 25.41, "elapsed_time": "0:52:46", "remaining_time": "2:34:55", "throughput": 19725.31, "total_tokens": 62455360} +{"current_steps": 19850, "total_steps": 78105, "loss": 0.4465, "lr": 4.646832254727614e-06, "epoch": 1.2707253056782537, "percentage": 25.41, "elapsed_time": "0:52:46", "remaining_time": "2:34:54", "throughput": 19726.16, "total_tokens": 62472128} +{"current_steps": 19855, "total_steps": 78105, "loss": 0.4335, "lr": 4.646545934559479e-06, "epoch": 1.2710453876192305, "percentage": 25.42, "elapsed_time": "0:52:47", "remaining_time": "2:34:53", "throughput": 19726.88, "total_tokens": 62487936} +{"current_steps": 19860, "total_steps": 78105, "loss": 0.3685, "lr": 4.646259507204023e-06, "epoch": 1.2713654695602075, "percentage": 25.43, "elapsed_time": "0:52:48", "remaining_time": "2:34:51", "throughput": 19727.53, "total_tokens": 62502848} +{"current_steps": 19865, "total_steps": 78105, "loss": 0.5468, "lr": 4.645972972675545e-06, "epoch": 1.2716855515011842, "percentage": 25.43, "elapsed_time": "0:52:48", "remaining_time": "2:34:50", "throughput": 19728.19, "total_tokens": 62517888} +{"current_steps": 19870, "total_steps": 78105, "loss": 0.319, "lr": 4.645686330988356e-06, "epoch": 1.2720056334421612, "percentage": 25.44, "elapsed_time": "0:52:49", "remaining_time": "2:34:49", "throughput": 19728.89, "total_tokens": 62533312} +{"current_steps": 19875, "total_steps": 78105, "loss": 0.328, "lr": 4.645399582156768e-06, "epoch": 1.272325715383138, "percentage": 25.45, "elapsed_time": "0:52:50", "remaining_time": "2:34:48", "throughput": 19729.53, "total_tokens": 62548160} +{"current_steps": 19880, "total_steps": 78105, "loss": 0.2244, "lr": 4.645112726195099e-06, "epoch": 1.272645797324115, "percentage": 25.45, "elapsed_time": "0:52:50", "remaining_time": "2:34:47", "throughput": 19730.34, "total_tokens": 62564480} +{"current_steps": 19885, "total_steps": 78105, "loss": 0.2637, "lr": 4.644825763117675e-06, "epoch": 1.272965879265092, "percentage": 25.46, "elapsed_time": "0:52:51", "remaining_time": "2:34:46", "throughput": 19731.05, "total_tokens": 62580288} +{"current_steps": 19890, "total_steps": 78105, "loss": 0.3284, "lr": 4.644538692938824e-06, "epoch": 1.2732859612060687, "percentage": 25.47, "elapsed_time": "0:52:52", "remaining_time": "2:34:44", "throughput": 19731.71, "total_tokens": 62595392} +{"current_steps": 19895, "total_steps": 78105, "loss": 0.3854, "lr": 4.6442515156728806e-06, "epoch": 1.2736060431470455, "percentage": 25.47, "elapsed_time": "0:52:52", "remaining_time": "2:34:43", "throughput": 19732.31, "total_tokens": 62609792} +{"current_steps": 19900, "total_steps": 78105, "loss": 0.2897, "lr": 4.643964231334186e-06, "epoch": 1.2739261250880225, "percentage": 25.48, "elapsed_time": "0:52:53", "remaining_time": "2:34:42", "throughput": 19732.96, "total_tokens": 62624576} +{"current_steps": 19905, "total_steps": 78105, "loss": 0.2879, "lr": 4.643676839937086e-06, "epoch": 1.2742462070289995, "percentage": 25.48, "elapsed_time": "0:52:54", "remaining_time": "2:34:41", "throughput": 19733.87, "total_tokens": 62641664} +{"current_steps": 19910, "total_steps": 78105, "loss": 0.3125, "lr": 4.643389341495929e-06, "epoch": 1.2745662889699763, "percentage": 25.49, "elapsed_time": "0:52:54", "remaining_time": "2:34:40", "throughput": 19734.49, "total_tokens": 62656448} +{"current_steps": 19915, "total_steps": 78105, "loss": 0.3907, "lr": 4.643101736025074e-06, "epoch": 1.2748863709109532, "percentage": 25.5, "elapsed_time": "0:52:55", "remaining_time": "2:34:38", "throughput": 19735.26, "total_tokens": 62671936} +{"current_steps": 19920, "total_steps": 78105, "loss": 0.2957, "lr": 4.642814023538881e-06, "epoch": 1.27520645285193, "percentage": 25.5, "elapsed_time": "0:52:56", "remaining_time": "2:34:37", "throughput": 19735.93, "total_tokens": 62687168} +{"current_steps": 19925, "total_steps": 78105, "loss": 0.2726, "lr": 4.642526204051717e-06, "epoch": 1.275526534792907, "percentage": 25.51, "elapsed_time": "0:52:56", "remaining_time": "2:34:36", "throughput": 19736.65, "total_tokens": 62702400} +{"current_steps": 19930, "total_steps": 78105, "loss": 0.3796, "lr": 4.642238277577954e-06, "epoch": 1.275846616733884, "percentage": 25.52, "elapsed_time": "0:52:57", "remaining_time": "2:34:35", "throughput": 19737.37, "total_tokens": 62717824} +{"current_steps": 19935, "total_steps": 78105, "loss": 0.309, "lr": 4.64195024413197e-06, "epoch": 1.2761666986748608, "percentage": 25.52, "elapsed_time": "0:52:58", "remaining_time": "2:34:34", "throughput": 19738.29, "total_tokens": 62735040} +{"current_steps": 19940, "total_steps": 78105, "loss": 0.3371, "lr": 4.641662103728149e-06, "epoch": 1.2764867806158375, "percentage": 25.53, "elapsed_time": "0:52:59", "remaining_time": "2:34:33", "throughput": 19738.93, "total_tokens": 62750144} +{"current_steps": 19945, "total_steps": 78105, "loss": 0.3251, "lr": 4.641373856380878e-06, "epoch": 1.2768068625568145, "percentage": 25.54, "elapsed_time": "0:52:59", "remaining_time": "2:34:32", "throughput": 19739.64, "total_tokens": 62765824} +{"current_steps": 19950, "total_steps": 78105, "loss": 0.3829, "lr": 4.641085502104549e-06, "epoch": 1.2771269444977915, "percentage": 25.54, "elapsed_time": "0:53:00", "remaining_time": "2:34:30", "throughput": 19740.49, "total_tokens": 62782464} +{"current_steps": 19955, "total_steps": 78105, "loss": 0.3555, "lr": 4.640797040913563e-06, "epoch": 1.2774470264387683, "percentage": 25.55, "elapsed_time": "0:53:01", "remaining_time": "2:34:29", "throughput": 19741.38, "total_tokens": 62799040} +{"current_steps": 19960, "total_steps": 78105, "loss": 0.3871, "lr": 4.640508472822324e-06, "epoch": 1.2777671083797453, "percentage": 25.56, "elapsed_time": "0:53:01", "remaining_time": "2:34:28", "throughput": 19742.0, "total_tokens": 62813952} +{"current_steps": 19965, "total_steps": 78105, "loss": 0.2359, "lr": 4.640219797845242e-06, "epoch": 1.278087190320722, "percentage": 25.56, "elapsed_time": "0:53:02", "remaining_time": "2:34:27", "throughput": 19742.66, "total_tokens": 62829056} +{"current_steps": 19970, "total_steps": 78105, "loss": 0.315, "lr": 4.6399310159967306e-06, "epoch": 1.278407272261699, "percentage": 25.57, "elapsed_time": "0:53:03", "remaining_time": "2:34:26", "throughput": 19743.36, "total_tokens": 62844288} +{"current_steps": 19975, "total_steps": 78105, "loss": 0.3672, "lr": 4.639642127291211e-06, "epoch": 1.2787273542026758, "percentage": 25.57, "elapsed_time": "0:53:03", "remaining_time": "2:34:25", "throughput": 19744.01, "total_tokens": 62859584} +{"current_steps": 19980, "total_steps": 78105, "loss": 0.4307, "lr": 4.639353131743108e-06, "epoch": 1.2790474361436528, "percentage": 25.58, "elapsed_time": "0:53:04", "remaining_time": "2:34:23", "throughput": 19744.7, "total_tokens": 62874432} +{"current_steps": 19985, "total_steps": 78105, "loss": 0.2447, "lr": 4.639064029366854e-06, "epoch": 1.2793675180846296, "percentage": 25.59, "elapsed_time": "0:53:05", "remaining_time": "2:34:22", "throughput": 19745.42, "total_tokens": 62890112} +{"current_steps": 19990, "total_steps": 78105, "loss": 0.2688, "lr": 4.638774820176884e-06, "epoch": 1.2796876000256066, "percentage": 25.59, "elapsed_time": "0:53:05", "remaining_time": "2:34:21", "throughput": 19746.23, "total_tokens": 62906432} +{"current_steps": 19995, "total_steps": 78105, "loss": 0.3082, "lr": 4.63848550418764e-06, "epoch": 1.2800076819665835, "percentage": 25.6, "elapsed_time": "0:53:06", "remaining_time": "2:34:20", "throughput": 19747.25, "total_tokens": 62924288} +{"current_steps": 20000, "total_steps": 78105, "loss": 0.3916, "lr": 4.638196081413569e-06, "epoch": 1.2803277639075603, "percentage": 25.61, "elapsed_time": "0:53:07", "remaining_time": "2:34:19", "throughput": 19747.9, "total_tokens": 62939712} +{"current_steps": 20005, "total_steps": 78105, "loss": 0.3147, "lr": 4.637906551869123e-06, "epoch": 1.2806478458485373, "percentage": 25.61, "elapsed_time": "0:53:07", "remaining_time": "2:34:18", "throughput": 19748.56, "total_tokens": 62954816} +{"current_steps": 20010, "total_steps": 78105, "loss": 0.4312, "lr": 4.63761691556876e-06, "epoch": 1.280967927789514, "percentage": 25.62, "elapsed_time": "0:53:08", "remaining_time": "2:34:17", "throughput": 19749.39, "total_tokens": 62971008} +{"current_steps": 20015, "total_steps": 78105, "loss": 0.3879, "lr": 4.637327172526942e-06, "epoch": 1.281288009730491, "percentage": 25.63, "elapsed_time": "0:53:09", "remaining_time": "2:34:15", "throughput": 19750.1, "total_tokens": 62986240} +{"current_steps": 20020, "total_steps": 78105, "loss": 0.4595, "lr": 4.637037322758139e-06, "epoch": 1.2816080916714678, "percentage": 25.63, "elapsed_time": "0:53:09", "remaining_time": "2:34:14", "throughput": 19750.71, "total_tokens": 63000768} +{"current_steps": 20025, "total_steps": 78105, "loss": 0.287, "lr": 4.636747366276822e-06, "epoch": 1.2819281736124448, "percentage": 25.64, "elapsed_time": "0:53:10", "remaining_time": "2:34:13", "throughput": 19751.55, "total_tokens": 63017344} +{"current_steps": 20030, "total_steps": 78105, "loss": 0.3764, "lr": 4.6364573030974725e-06, "epoch": 1.2822482555534216, "percentage": 25.64, "elapsed_time": "0:53:11", "remaining_time": "2:34:12", "throughput": 19752.16, "total_tokens": 63031872} +{"current_steps": 20035, "total_steps": 78105, "loss": 0.3154, "lr": 4.636167133234574e-06, "epoch": 1.2825683374943986, "percentage": 25.65, "elapsed_time": "0:53:11", "remaining_time": "2:34:11", "throughput": 19752.89, "total_tokens": 63047936} +{"current_steps": 20040, "total_steps": 78105, "loss": 0.5253, "lr": 4.6358768567026145e-06, "epoch": 1.2828884194353756, "percentage": 25.66, "elapsed_time": "0:53:12", "remaining_time": "2:34:10", "throughput": 19753.44, "total_tokens": 63062144} +{"current_steps": 20045, "total_steps": 78105, "loss": 0.3548, "lr": 4.635586473516092e-06, "epoch": 1.2832085013763523, "percentage": 25.66, "elapsed_time": "0:53:13", "remaining_time": "2:34:08", "throughput": 19754.13, "total_tokens": 63077376} +{"current_steps": 20050, "total_steps": 78105, "loss": 0.3181, "lr": 4.6352959836895024e-06, "epoch": 1.283528583317329, "percentage": 25.67, "elapsed_time": "0:53:13", "remaining_time": "2:34:07", "throughput": 19754.88, "total_tokens": 63092928} +{"current_steps": 20055, "total_steps": 78105, "loss": 0.4008, "lr": 4.635005387237355e-06, "epoch": 1.283848665258306, "percentage": 25.68, "elapsed_time": "0:53:14", "remaining_time": "2:34:06", "throughput": 19755.59, "total_tokens": 63108224} +{"current_steps": 20060, "total_steps": 78105, "loss": 0.4035, "lr": 4.63471468417416e-06, "epoch": 1.284168747199283, "percentage": 25.68, "elapsed_time": "0:53:15", "remaining_time": "2:34:05", "throughput": 19756.29, "total_tokens": 63123584} +{"current_steps": 20065, "total_steps": 78105, "loss": 0.3448, "lr": 4.634423874514433e-06, "epoch": 1.2844888291402599, "percentage": 25.69, "elapsed_time": "0:53:15", "remaining_time": "2:34:04", "throughput": 19756.92, "total_tokens": 63138432} +{"current_steps": 20070, "total_steps": 78105, "loss": 0.3615, "lr": 4.634132958272694e-06, "epoch": 1.2848089110812368, "percentage": 25.7, "elapsed_time": "0:53:16", "remaining_time": "2:34:02", "throughput": 19757.59, "total_tokens": 63153600} +{"current_steps": 20075, "total_steps": 78105, "loss": 0.3817, "lr": 4.633841935463473e-06, "epoch": 1.2851289930222136, "percentage": 25.7, "elapsed_time": "0:53:17", "remaining_time": "2:34:01", "throughput": 19758.15, "total_tokens": 63167936} +{"current_steps": 20080, "total_steps": 78105, "loss": 0.2778, "lr": 4.6335508061013005e-06, "epoch": 1.2854490749631906, "percentage": 25.71, "elapsed_time": "0:53:17", "remaining_time": "2:34:00", "throughput": 19758.87, "total_tokens": 63183680} +{"current_steps": 20085, "total_steps": 78105, "loss": 0.39, "lr": 4.633259570200713e-06, "epoch": 1.2857691569041676, "percentage": 25.72, "elapsed_time": "0:53:18", "remaining_time": "2:33:59", "throughput": 19759.63, "total_tokens": 63199552} +{"current_steps": 20090, "total_steps": 78105, "loss": 0.397, "lr": 4.6329682277762544e-06, "epoch": 1.2860892388451444, "percentage": 25.72, "elapsed_time": "0:53:19", "remaining_time": "2:33:58", "throughput": 19760.32, "total_tokens": 63214848} +{"current_steps": 20095, "total_steps": 78105, "loss": 0.3408, "lr": 4.632676778842473e-06, "epoch": 1.2864093207861211, "percentage": 25.73, "elapsed_time": "0:53:19", "remaining_time": "2:33:56", "throughput": 19761.0, "total_tokens": 63230272} +{"current_steps": 20100, "total_steps": 78105, "loss": 0.2205, "lr": 4.632385223413922e-06, "epoch": 1.2867294027270981, "percentage": 25.73, "elapsed_time": "0:53:20", "remaining_time": "2:33:55", "throughput": 19761.92, "total_tokens": 63247424} +{"current_steps": 20105, "total_steps": 78105, "loss": 0.3853, "lr": 4.63209356150516e-06, "epoch": 1.2870494846680751, "percentage": 25.74, "elapsed_time": "0:53:21", "remaining_time": "2:33:54", "throughput": 19762.68, "total_tokens": 63263360} +{"current_steps": 20110, "total_steps": 78105, "loss": 0.2982, "lr": 4.631801793130751e-06, "epoch": 1.2873695666090519, "percentage": 25.75, "elapsed_time": "0:53:21", "remaining_time": "2:33:53", "throughput": 19763.38, "total_tokens": 63278784} +{"current_steps": 20115, "total_steps": 78105, "loss": 0.2606, "lr": 4.631509918305266e-06, "epoch": 1.2876896485500289, "percentage": 25.75, "elapsed_time": "0:53:22", "remaining_time": "2:33:52", "throughput": 19764.1, "total_tokens": 63294784} +{"current_steps": 20120, "total_steps": 78105, "loss": 0.3204, "lr": 4.6312179370432766e-06, "epoch": 1.2880097304910056, "percentage": 25.76, "elapsed_time": "0:53:23", "remaining_time": "2:33:51", "throughput": 19764.73, "total_tokens": 63309760} +{"current_steps": 20125, "total_steps": 78105, "loss": 0.4828, "lr": 4.630925849359366e-06, "epoch": 1.2883298124319826, "percentage": 25.77, "elapsed_time": "0:53:23", "remaining_time": "2:33:50", "throughput": 19765.49, "total_tokens": 63325312} +{"current_steps": 20130, "total_steps": 78105, "loss": 0.2336, "lr": 4.630633655268116e-06, "epoch": 1.2886498943729594, "percentage": 25.77, "elapsed_time": "0:53:24", "remaining_time": "2:33:49", "throughput": 19766.16, "total_tokens": 63340416} +{"current_steps": 20135, "total_steps": 78105, "loss": 0.4746, "lr": 4.6303413547841215e-06, "epoch": 1.2889699763139364, "percentage": 25.78, "elapsed_time": "0:53:25", "remaining_time": "2:33:47", "throughput": 19766.93, "total_tokens": 63356352} +{"current_steps": 20140, "total_steps": 78105, "loss": 0.3209, "lr": 4.6300489479219745e-06, "epoch": 1.2892900582549132, "percentage": 25.79, "elapsed_time": "0:53:25", "remaining_time": "2:33:46", "throughput": 19767.65, "total_tokens": 63371904} +{"current_steps": 20145, "total_steps": 78105, "loss": 0.2791, "lr": 4.62975643469628e-06, "epoch": 1.2896101401958902, "percentage": 25.79, "elapsed_time": "0:53:26", "remaining_time": "2:33:45", "throughput": 19768.4, "total_tokens": 63387712} +{"current_steps": 20150, "total_steps": 78105, "loss": 0.2382, "lr": 4.629463815121641e-06, "epoch": 1.2899302221368671, "percentage": 25.8, "elapsed_time": "0:53:27", "remaining_time": "2:33:44", "throughput": 19769.14, "total_tokens": 63403392} +{"current_steps": 20155, "total_steps": 78105, "loss": 0.4451, "lr": 4.6291710892126715e-06, "epoch": 1.290250304077844, "percentage": 25.81, "elapsed_time": "0:53:27", "remaining_time": "2:33:43", "throughput": 19769.82, "total_tokens": 63418496} +{"current_steps": 20160, "total_steps": 78105, "loss": 0.4002, "lr": 4.628878256983989e-06, "epoch": 1.2905703860188207, "percentage": 25.81, "elapsed_time": "0:53:28", "remaining_time": "2:33:42", "throughput": 19770.57, "total_tokens": 63434432} +{"current_steps": 20165, "total_steps": 78105, "loss": 0.3181, "lr": 4.628585318450214e-06, "epoch": 1.2908904679597977, "percentage": 25.82, "elapsed_time": "0:53:29", "remaining_time": "2:33:41", "throughput": 19771.37, "total_tokens": 63450752} +{"current_steps": 20170, "total_steps": 78105, "loss": 0.4293, "lr": 4.6282922736259764e-06, "epoch": 1.2912105499007747, "percentage": 25.82, "elapsed_time": "0:53:29", "remaining_time": "2:33:39", "throughput": 19772.01, "total_tokens": 63466176} +{"current_steps": 20175, "total_steps": 78105, "loss": 0.3507, "lr": 4.627999122525908e-06, "epoch": 1.2915306318417514, "percentage": 25.83, "elapsed_time": "0:53:30", "remaining_time": "2:33:38", "throughput": 19772.66, "total_tokens": 63481280} +{"current_steps": 20180, "total_steps": 78105, "loss": 0.4395, "lr": 4.627705865164648e-06, "epoch": 1.2918507137827284, "percentage": 25.84, "elapsed_time": "0:53:31", "remaining_time": "2:33:37", "throughput": 19773.32, "total_tokens": 63496640} +{"current_steps": 20185, "total_steps": 78105, "loss": 0.2385, "lr": 4.627412501556841e-06, "epoch": 1.2921707957237052, "percentage": 25.84, "elapsed_time": "0:53:31", "remaining_time": "2:33:36", "throughput": 19774.11, "total_tokens": 63513280} +{"current_steps": 20190, "total_steps": 78105, "loss": 0.2578, "lr": 4.627119031717133e-06, "epoch": 1.2924908776646822, "percentage": 25.85, "elapsed_time": "0:53:32", "remaining_time": "2:33:35", "throughput": 19774.89, "total_tokens": 63529472} +{"current_steps": 20195, "total_steps": 78105, "loss": 0.2907, "lr": 4.626825455660182e-06, "epoch": 1.2928109596056592, "percentage": 25.86, "elapsed_time": "0:53:33", "remaining_time": "2:33:34", "throughput": 19775.6, "total_tokens": 63545088} +{"current_steps": 20200, "total_steps": 78105, "loss": 0.4634, "lr": 4.6265317734006445e-06, "epoch": 1.293131041546636, "percentage": 25.86, "elapsed_time": "0:53:33", "remaining_time": "2:33:33", "throughput": 19776.23, "total_tokens": 63560064} +{"current_steps": 20205, "total_steps": 78105, "loss": 0.463, "lr": 4.626237984953188e-06, "epoch": 1.2934511234876127, "percentage": 25.87, "elapsed_time": "0:53:34", "remaining_time": "2:33:31", "throughput": 19777.03, "total_tokens": 63576064} +{"current_steps": 20210, "total_steps": 78105, "loss": 0.4143, "lr": 4.625944090332481e-06, "epoch": 1.2937712054285897, "percentage": 25.88, "elapsed_time": "0:53:35", "remaining_time": "2:33:30", "throughput": 19777.66, "total_tokens": 63590976} +{"current_steps": 20215, "total_steps": 78105, "loss": 0.3367, "lr": 4.625650089553201e-06, "epoch": 1.2940912873695667, "percentage": 25.88, "elapsed_time": "0:53:35", "remaining_time": "2:33:29", "throughput": 19778.49, "total_tokens": 63607552} +{"current_steps": 20220, "total_steps": 78105, "loss": 0.3755, "lr": 4.625355982630027e-06, "epoch": 1.2944113693105435, "percentage": 25.89, "elapsed_time": "0:53:36", "remaining_time": "2:33:28", "throughput": 19779.41, "total_tokens": 63624896} +{"current_steps": 20225, "total_steps": 78105, "loss": 0.2755, "lr": 4.625061769577645e-06, "epoch": 1.2947314512515204, "percentage": 25.89, "elapsed_time": "0:53:37", "remaining_time": "2:33:27", "throughput": 19780.16, "total_tokens": 63640576} +{"current_steps": 20230, "total_steps": 78105, "loss": 0.3411, "lr": 4.624767450410748e-06, "epoch": 1.2950515331924972, "percentage": 25.9, "elapsed_time": "0:53:38", "remaining_time": "2:33:26", "throughput": 19780.75, "total_tokens": 63655552} +{"current_steps": 20235, "total_steps": 78105, "loss": 0.4274, "lr": 4.624473025144031e-06, "epoch": 1.2953716151334742, "percentage": 25.91, "elapsed_time": "0:53:38", "remaining_time": "2:33:25", "throughput": 19781.48, "total_tokens": 63671296} +{"current_steps": 20240, "total_steps": 78105, "loss": 0.2648, "lr": 4.624178493792199e-06, "epoch": 1.295691697074451, "percentage": 25.91, "elapsed_time": "0:53:39", "remaining_time": "2:33:23", "throughput": 19782.04, "total_tokens": 63685632} +{"current_steps": 20245, "total_steps": 78105, "loss": 0.3499, "lr": 4.623883856369956e-06, "epoch": 1.296011779015428, "percentage": 25.92, "elapsed_time": "0:53:40", "remaining_time": "2:33:22", "throughput": 19782.77, "total_tokens": 63701184} +{"current_steps": 20250, "total_steps": 78105, "loss": 0.334, "lr": 4.623589112892017e-06, "epoch": 1.2963318609564047, "percentage": 25.93, "elapsed_time": "0:53:40", "remaining_time": "2:33:21", "throughput": 19783.48, "total_tokens": 63716992} +{"current_steps": 20255, "total_steps": 78105, "loss": 0.3291, "lr": 4.623294263373098e-06, "epoch": 1.2966519428973817, "percentage": 25.93, "elapsed_time": "0:53:41", "remaining_time": "2:33:20", "throughput": 19784.24, "total_tokens": 63733184} +{"current_steps": 20260, "total_steps": 78105, "loss": 0.3076, "lr": 4.6229993078279245e-06, "epoch": 1.2969720248383587, "percentage": 25.94, "elapsed_time": "0:53:42", "remaining_time": "2:33:19", "throughput": 19784.85, "total_tokens": 63747904} +{"current_steps": 20265, "total_steps": 78105, "loss": 0.2991, "lr": 4.6227042462712235e-06, "epoch": 1.2972921067793355, "percentage": 25.95, "elapsed_time": "0:53:42", "remaining_time": "2:33:18", "throughput": 19785.6, "total_tokens": 63763904} +{"current_steps": 20270, "total_steps": 78105, "loss": 0.272, "lr": 4.622409078717729e-06, "epoch": 1.2976121887203125, "percentage": 25.95, "elapsed_time": "0:53:43", "remaining_time": "2:33:17", "throughput": 19786.33, "total_tokens": 63779648} +{"current_steps": 20275, "total_steps": 78105, "loss": 0.4574, "lr": 4.62211380518218e-06, "epoch": 1.2979322706612892, "percentage": 25.96, "elapsed_time": "0:53:44", "remaining_time": "2:33:16", "throughput": 19786.96, "total_tokens": 63795008} +{"current_steps": 20280, "total_steps": 78105, "loss": 0.2077, "lr": 4.621818425679322e-06, "epoch": 1.2982523526022662, "percentage": 25.97, "elapsed_time": "0:53:44", "remaining_time": "2:33:14", "throughput": 19787.6, "total_tokens": 63810112} +{"current_steps": 20285, "total_steps": 78105, "loss": 0.3095, "lr": 4.621522940223903e-06, "epoch": 1.298572434543243, "percentage": 25.97, "elapsed_time": "0:53:45", "remaining_time": "2:33:13", "throughput": 19788.53, "total_tokens": 63827264} +{"current_steps": 20290, "total_steps": 78105, "loss": 0.5271, "lr": 4.6212273488306806e-06, "epoch": 1.29889251648422, "percentage": 25.98, "elapsed_time": "0:53:46", "remaining_time": "2:33:12", "throughput": 19789.28, "total_tokens": 63843520} +{"current_steps": 20295, "total_steps": 78105, "loss": 0.6525, "lr": 4.620931651514412e-06, "epoch": 1.2992125984251968, "percentage": 25.98, "elapsed_time": "0:53:46", "remaining_time": "2:33:11", "throughput": 19789.98, "total_tokens": 63859264} +{"current_steps": 20300, "total_steps": 78105, "loss": 0.4714, "lr": 4.620635848289865e-06, "epoch": 1.2995326803661738, "percentage": 25.99, "elapsed_time": "0:53:47", "remaining_time": "2:33:10", "throughput": 19790.69, "total_tokens": 63874816} +{"current_steps": 20305, "total_steps": 78105, "loss": 0.2975, "lr": 4.620339939171809e-06, "epoch": 1.2998527623071507, "percentage": 26.0, "elapsed_time": "0:53:48", "remaining_time": "2:33:09", "throughput": 19791.32, "total_tokens": 63889728} +{"current_steps": 20310, "total_steps": 78105, "loss": 0.3383, "lr": 4.620043924175022e-06, "epoch": 1.3001728442481275, "percentage": 26.0, "elapsed_time": "0:53:48", "remaining_time": "2:33:08", "throughput": 19792.05, "total_tokens": 63905472} +{"current_steps": 20315, "total_steps": 78105, "loss": 0.2967, "lr": 4.619747803314284e-06, "epoch": 1.3004929261891043, "percentage": 26.01, "elapsed_time": "0:53:49", "remaining_time": "2:33:06", "throughput": 19792.66, "total_tokens": 63920320} +{"current_steps": 20320, "total_steps": 78105, "loss": 0.293, "lr": 4.619451576604381e-06, "epoch": 1.3008130081300813, "percentage": 26.02, "elapsed_time": "0:53:50", "remaining_time": "2:33:05", "throughput": 19793.29, "total_tokens": 63935488} +{"current_steps": 20325, "total_steps": 78105, "loss": 0.2501, "lr": 4.619155244060107e-06, "epoch": 1.3011330900710583, "percentage": 26.02, "elapsed_time": "0:53:50", "remaining_time": "2:33:04", "throughput": 19794.05, "total_tokens": 63951616} +{"current_steps": 20330, "total_steps": 78105, "loss": 0.4311, "lr": 4.618858805696258e-06, "epoch": 1.301453172012035, "percentage": 26.03, "elapsed_time": "0:53:51", "remaining_time": "2:33:03", "throughput": 19794.61, "total_tokens": 63965952} +{"current_steps": 20335, "total_steps": 78105, "loss": 0.3449, "lr": 4.6185622615276375e-06, "epoch": 1.301773253953012, "percentage": 26.04, "elapsed_time": "0:53:52", "remaining_time": "2:33:02", "throughput": 19795.24, "total_tokens": 63981312} +{"current_steps": 20340, "total_steps": 78105, "loss": 0.4485, "lr": 4.618265611569052e-06, "epoch": 1.3020933358939888, "percentage": 26.04, "elapsed_time": "0:53:52", "remaining_time": "2:33:01", "throughput": 19795.81, "total_tokens": 63995584} +{"current_steps": 20345, "total_steps": 78105, "loss": 0.3986, "lr": 4.617968855835317e-06, "epoch": 1.3024134178349658, "percentage": 26.05, "elapsed_time": "0:53:53", "remaining_time": "2:32:59", "throughput": 19796.71, "total_tokens": 64012608} +{"current_steps": 20350, "total_steps": 78105, "loss": 0.2306, "lr": 4.617671994341248e-06, "epoch": 1.3027334997759428, "percentage": 26.05, "elapsed_time": "0:53:54", "remaining_time": "2:32:58", "throughput": 19797.51, "total_tokens": 64028992} +{"current_steps": 20355, "total_steps": 78105, "loss": 0.3203, "lr": 4.61737502710167e-06, "epoch": 1.3030535817169195, "percentage": 26.06, "elapsed_time": "0:53:54", "remaining_time": "2:32:57", "throughput": 19798.27, "total_tokens": 64045056} +{"current_steps": 20360, "total_steps": 78105, "loss": 0.4393, "lr": 4.6170779541314134e-06, "epoch": 1.3033736636578963, "percentage": 26.07, "elapsed_time": "0:53:55", "remaining_time": "2:32:56", "throughput": 19798.89, "total_tokens": 64059840} +{"current_steps": 20365, "total_steps": 78105, "loss": 0.3302, "lr": 4.616780775445311e-06, "epoch": 1.3036937455988733, "percentage": 26.07, "elapsed_time": "0:53:56", "remaining_time": "2:32:55", "throughput": 19799.72, "total_tokens": 64076224} +{"current_steps": 20370, "total_steps": 78105, "loss": 0.3779, "lr": 4.6164834910582014e-06, "epoch": 1.3040138275398503, "percentage": 26.08, "elapsed_time": "0:53:56", "remaining_time": "2:32:54", "throughput": 19800.38, "total_tokens": 64091328} +{"current_steps": 20375, "total_steps": 78105, "loss": 0.4574, "lr": 4.6161861009849315e-06, "epoch": 1.304333909480827, "percentage": 26.09, "elapsed_time": "0:53:57", "remaining_time": "2:32:53", "throughput": 19800.9, "total_tokens": 64105472} +{"current_steps": 20380, "total_steps": 78105, "loss": 0.3478, "lr": 4.615888605240351e-06, "epoch": 1.304653991421804, "percentage": 26.09, "elapsed_time": "0:53:58", "remaining_time": "2:32:51", "throughput": 19801.52, "total_tokens": 64120256} +{"current_steps": 20385, "total_steps": 78105, "loss": 0.3416, "lr": 4.615591003839314e-06, "epoch": 1.3049740733627808, "percentage": 26.1, "elapsed_time": "0:53:58", "remaining_time": "2:32:50", "throughput": 19802.2, "total_tokens": 64135680} +{"current_steps": 20390, "total_steps": 78105, "loss": 0.3507, "lr": 4.615293296796683e-06, "epoch": 1.3052941553037578, "percentage": 26.11, "elapsed_time": "0:53:59", "remaining_time": "2:32:49", "throughput": 19802.8, "total_tokens": 64150592} +{"current_steps": 20395, "total_steps": 78105, "loss": 0.3035, "lr": 4.614995484127322e-06, "epoch": 1.3056142372447346, "percentage": 26.11, "elapsed_time": "0:54:00", "remaining_time": "2:32:48", "throughput": 19803.5, "total_tokens": 64166080} +{"current_steps": 20400, "total_steps": 78105, "loss": 0.3247, "lr": 4.6146975658461025e-06, "epoch": 1.3059343191857116, "percentage": 26.12, "elapsed_time": "0:54:00", "remaining_time": "2:32:47", "throughput": 19804.19, "total_tokens": 64181632} +{"current_steps": 20405, "total_steps": 78105, "loss": 0.2535, "lr": 4.614399541967902e-06, "epoch": 1.3062544011266883, "percentage": 26.13, "elapsed_time": "0:54:01", "remaining_time": "2:32:46", "throughput": 19804.96, "total_tokens": 64197760} +{"current_steps": 20410, "total_steps": 78105, "loss": 0.3006, "lr": 4.614101412507602e-06, "epoch": 1.3065744830676653, "percentage": 26.13, "elapsed_time": "0:54:02", "remaining_time": "2:32:44", "throughput": 19805.58, "total_tokens": 64212416} +{"current_steps": 20415, "total_steps": 78105, "loss": 0.2959, "lr": 4.61380317748009e-06, "epoch": 1.3068945650086423, "percentage": 26.14, "elapsed_time": "0:54:02", "remaining_time": "2:32:43", "throughput": 19806.26, "total_tokens": 64228032} +{"current_steps": 20420, "total_steps": 78105, "loss": 0.2968, "lr": 4.613504836900258e-06, "epoch": 1.307214646949619, "percentage": 26.14, "elapsed_time": "0:54:03", "remaining_time": "2:32:42", "throughput": 19806.85, "total_tokens": 64243008} +{"current_steps": 20425, "total_steps": 78105, "loss": 0.4151, "lr": 4.613206390783003e-06, "epoch": 1.3075347288905959, "percentage": 26.15, "elapsed_time": "0:54:04", "remaining_time": "2:32:41", "throughput": 19807.53, "total_tokens": 64257920} +{"current_steps": 20430, "total_steps": 78105, "loss": 0.3256, "lr": 4.612907839143227e-06, "epoch": 1.3078548108315728, "percentage": 26.16, "elapsed_time": "0:54:04", "remaining_time": "2:32:40", "throughput": 19808.25, "total_tokens": 64273664} +{"current_steps": 20435, "total_steps": 78105, "loss": 0.2772, "lr": 4.61260918199584e-06, "epoch": 1.3081748927725498, "percentage": 26.16, "elapsed_time": "0:54:05", "remaining_time": "2:32:39", "throughput": 19808.86, "total_tokens": 64288704} +{"current_steps": 20440, "total_steps": 78105, "loss": 0.3003, "lr": 4.612310419355754e-06, "epoch": 1.3084949747135266, "percentage": 26.17, "elapsed_time": "0:54:06", "remaining_time": "2:32:37", "throughput": 19809.65, "total_tokens": 64304896} +{"current_steps": 20445, "total_steps": 78105, "loss": 0.2842, "lr": 4.61201155123789e-06, "epoch": 1.3088150566545036, "percentage": 26.18, "elapsed_time": "0:54:06", "remaining_time": "2:32:36", "throughput": 19810.47, "total_tokens": 64321664} +{"current_steps": 20450, "total_steps": 78105, "loss": 0.3828, "lr": 4.611712577657169e-06, "epoch": 1.3091351385954804, "percentage": 26.18, "elapsed_time": "0:54:07", "remaining_time": "2:32:35", "throughput": 19811.15, "total_tokens": 64337152} +{"current_steps": 20455, "total_steps": 78105, "loss": 0.3349, "lr": 4.611413498628521e-06, "epoch": 1.3094552205364574, "percentage": 26.19, "elapsed_time": "0:54:08", "remaining_time": "2:32:34", "throughput": 19811.82, "total_tokens": 64352320} +{"current_steps": 20460, "total_steps": 78105, "loss": 0.2901, "lr": 4.611114314166881e-06, "epoch": 1.3097753024774343, "percentage": 26.2, "elapsed_time": "0:54:08", "remaining_time": "2:32:33", "throughput": 19812.55, "total_tokens": 64368256} +{"current_steps": 20465, "total_steps": 78105, "loss": 0.297, "lr": 4.610815024287189e-06, "epoch": 1.3100953844184111, "percentage": 26.2, "elapsed_time": "0:54:09", "remaining_time": "2:32:32", "throughput": 19813.1, "total_tokens": 64382848} +{"current_steps": 20470, "total_steps": 78105, "loss": 0.3665, "lr": 4.610515629004391e-06, "epoch": 1.3104154663593879, "percentage": 26.21, "elapsed_time": "0:54:10", "remaining_time": "2:32:31", "throughput": 19813.75, "total_tokens": 64398336} +{"current_steps": 20475, "total_steps": 78105, "loss": 0.3125, "lr": 4.610216128333433e-06, "epoch": 1.3107355483003649, "percentage": 26.21, "elapsed_time": "0:54:10", "remaining_time": "2:32:30", "throughput": 19814.41, "total_tokens": 64413632} +{"current_steps": 20480, "total_steps": 78105, "loss": 0.3254, "lr": 4.6099165222892746e-06, "epoch": 1.3110556302413419, "percentage": 26.22, "elapsed_time": "0:54:11", "remaining_time": "2:32:28", "throughput": 19815.01, "total_tokens": 64428480} +{"current_steps": 20485, "total_steps": 78105, "loss": 0.382, "lr": 4.609616810886875e-06, "epoch": 1.3113757121823186, "percentage": 26.23, "elapsed_time": "0:54:12", "remaining_time": "2:32:27", "throughput": 19815.66, "total_tokens": 64443904} +{"current_steps": 20490, "total_steps": 78105, "loss": 0.3723, "lr": 4.609316994141201e-06, "epoch": 1.3116957941232956, "percentage": 26.23, "elapsed_time": "0:54:12", "remaining_time": "2:32:26", "throughput": 19816.54, "total_tokens": 64461056} +{"current_steps": 20495, "total_steps": 78105, "loss": 0.3374, "lr": 4.6090170720672234e-06, "epoch": 1.3120158760642724, "percentage": 26.24, "elapsed_time": "0:54:13", "remaining_time": "2:32:25", "throughput": 19817.25, "total_tokens": 64476416} +{"current_steps": 20500, "total_steps": 78105, "loss": 0.2786, "lr": 4.608717044679918e-06, "epoch": 1.3123359580052494, "percentage": 26.25, "elapsed_time": "0:54:14", "remaining_time": "2:32:24", "throughput": 19818.0, "total_tokens": 64492544} +{"current_steps": 20505, "total_steps": 78105, "loss": 0.3677, "lr": 4.6084169119942674e-06, "epoch": 1.3126560399462264, "percentage": 26.25, "elapsed_time": "0:54:14", "remaining_time": "2:32:23", "throughput": 19818.62, "total_tokens": 64507456} +{"current_steps": 20510, "total_steps": 78105, "loss": 0.3362, "lr": 4.608116674025259e-06, "epoch": 1.3129761218872031, "percentage": 26.26, "elapsed_time": "0:54:15", "remaining_time": "2:32:22", "throughput": 19819.33, "total_tokens": 64523392} +{"current_steps": 20515, "total_steps": 78105, "loss": 0.2361, "lr": 4.6078163307878845e-06, "epoch": 1.31329620382818, "percentage": 26.27, "elapsed_time": "0:54:16", "remaining_time": "2:32:21", "throughput": 19820.12, "total_tokens": 64539968} +{"current_steps": 20520, "total_steps": 78105, "loss": 0.2773, "lr": 4.607515882297141e-06, "epoch": 1.313616285769157, "percentage": 26.27, "elapsed_time": "0:54:16", "remaining_time": "2:32:19", "throughput": 19820.78, "total_tokens": 64555200} +{"current_steps": 20525, "total_steps": 78105, "loss": 0.4187, "lr": 4.607215328568032e-06, "epoch": 1.313936367710134, "percentage": 26.28, "elapsed_time": "0:54:17", "remaining_time": "2:32:18", "throughput": 19821.48, "total_tokens": 64570944} +{"current_steps": 20530, "total_steps": 78105, "loss": 0.3009, "lr": 4.6069146696155664e-06, "epoch": 1.3142564496511107, "percentage": 26.29, "elapsed_time": "0:54:18", "remaining_time": "2:32:17", "throughput": 19822.13, "total_tokens": 64585984} +{"current_steps": 20535, "total_steps": 78105, "loss": 0.4433, "lr": 4.606613905454757e-06, "epoch": 1.3145765315920876, "percentage": 26.29, "elapsed_time": "0:54:18", "remaining_time": "2:32:16", "throughput": 19822.77, "total_tokens": 64601216} +{"current_steps": 20540, "total_steps": 78105, "loss": 0.4154, "lr": 4.606313036100621e-06, "epoch": 1.3148966135330644, "percentage": 26.3, "elapsed_time": "0:54:19", "remaining_time": "2:32:15", "throughput": 19823.52, "total_tokens": 64617344} +{"current_steps": 20545, "total_steps": 78105, "loss": 0.3504, "lr": 4.606012061568184e-06, "epoch": 1.3152166954740414, "percentage": 26.3, "elapsed_time": "0:54:20", "remaining_time": "2:32:14", "throughput": 19824.16, "total_tokens": 64632576} +{"current_steps": 20550, "total_steps": 78105, "loss": 0.3389, "lr": 4.605710981872474e-06, "epoch": 1.3155367774150182, "percentage": 26.31, "elapsed_time": "0:54:20", "remaining_time": "2:32:13", "throughput": 19824.79, "total_tokens": 64647808} +{"current_steps": 20555, "total_steps": 78105, "loss": 0.4269, "lr": 4.605409797028526e-06, "epoch": 1.3158568593559952, "percentage": 26.32, "elapsed_time": "0:54:21", "remaining_time": "2:32:12", "throughput": 19825.69, "total_tokens": 64664960} +{"current_steps": 20560, "total_steps": 78105, "loss": 0.3313, "lr": 4.60510850705138e-06, "epoch": 1.316176941296972, "percentage": 26.32, "elapsed_time": "0:54:22", "remaining_time": "2:32:10", "throughput": 19826.48, "total_tokens": 64681152} +{"current_steps": 20565, "total_steps": 78105, "loss": 0.2658, "lr": 4.60480711195608e-06, "epoch": 1.316497023237949, "percentage": 26.33, "elapsed_time": "0:54:23", "remaining_time": "2:32:09", "throughput": 19827.19, "total_tokens": 64696576} +{"current_steps": 20570, "total_steps": 78105, "loss": 0.2836, "lr": 4.604505611757676e-06, "epoch": 1.316817105178926, "percentage": 26.34, "elapsed_time": "0:54:23", "remaining_time": "2:32:08", "throughput": 19827.84, "total_tokens": 64711552} +{"current_steps": 20575, "total_steps": 78105, "loss": 0.527, "lr": 4.604204006471223e-06, "epoch": 1.3171371871199027, "percentage": 26.34, "elapsed_time": "0:54:24", "remaining_time": "2:32:07", "throughput": 19828.49, "total_tokens": 64726720} +{"current_steps": 20580, "total_steps": 78105, "loss": 0.3877, "lr": 4.603902296111784e-06, "epoch": 1.3174572690608795, "percentage": 26.35, "elapsed_time": "0:54:24", "remaining_time": "2:32:06", "throughput": 19829.09, "total_tokens": 64741760} +{"current_steps": 20585, "total_steps": 78105, "loss": 0.366, "lr": 4.6036004806944225e-06, "epoch": 1.3177773510018564, "percentage": 26.36, "elapsed_time": "0:54:25", "remaining_time": "2:32:05", "throughput": 19829.79, "total_tokens": 64757696} +{"current_steps": 20590, "total_steps": 78105, "loss": 0.4845, "lr": 4.603298560234209e-06, "epoch": 1.3180974329428334, "percentage": 26.36, "elapsed_time": "0:54:26", "remaining_time": "2:32:04", "throughput": 19830.44, "total_tokens": 64773056} +{"current_steps": 20595, "total_steps": 78105, "loss": 0.2912, "lr": 4.602996534746223e-06, "epoch": 1.3184175148838102, "percentage": 26.37, "elapsed_time": "0:54:26", "remaining_time": "2:32:02", "throughput": 19831.05, "total_tokens": 64787904} +{"current_steps": 20600, "total_steps": 78105, "loss": 0.3269, "lr": 4.6026944042455434e-06, "epoch": 1.3187375968247872, "percentage": 26.37, "elapsed_time": "0:54:27", "remaining_time": "2:32:01", "throughput": 19831.75, "total_tokens": 64803712} +{"current_steps": 20605, "total_steps": 78105, "loss": 0.2514, "lr": 4.602392168747258e-06, "epoch": 1.319057678765764, "percentage": 26.38, "elapsed_time": "0:54:28", "remaining_time": "2:32:00", "throughput": 19832.45, "total_tokens": 64819264} +{"current_steps": 20610, "total_steps": 78105, "loss": 0.4147, "lr": 4.602089828266458e-06, "epoch": 1.319377760706741, "percentage": 26.39, "elapsed_time": "0:54:29", "remaining_time": "2:31:59", "throughput": 19833.19, "total_tokens": 64835136} +{"current_steps": 20615, "total_steps": 78105, "loss": 0.2751, "lr": 4.601787382818242e-06, "epoch": 1.319697842647718, "percentage": 26.39, "elapsed_time": "0:54:29", "remaining_time": "2:31:58", "throughput": 19833.87, "total_tokens": 64850560} +{"current_steps": 20620, "total_steps": 78105, "loss": 0.2697, "lr": 4.601484832417712e-06, "epoch": 1.3200179245886947, "percentage": 26.4, "elapsed_time": "0:54:30", "remaining_time": "2:31:57", "throughput": 19834.52, "total_tokens": 64865920} +{"current_steps": 20625, "total_steps": 78105, "loss": 0.3781, "lr": 4.6011821770799744e-06, "epoch": 1.3203380065296715, "percentage": 26.41, "elapsed_time": "0:54:31", "remaining_time": "2:31:56", "throughput": 19835.45, "total_tokens": 64883520} +{"current_steps": 20630, "total_steps": 78105, "loss": 0.2386, "lr": 4.600879416820146e-06, "epoch": 1.3206580884706485, "percentage": 26.41, "elapsed_time": "0:54:31", "remaining_time": "2:31:55", "throughput": 19836.14, "total_tokens": 64898816} +{"current_steps": 20635, "total_steps": 78105, "loss": 0.3511, "lr": 4.60057655165334e-06, "epoch": 1.3209781704116255, "percentage": 26.42, "elapsed_time": "0:54:32", "remaining_time": "2:31:54", "throughput": 19836.94, "total_tokens": 64915392} +{"current_steps": 20640, "total_steps": 78105, "loss": 0.3353, "lr": 4.6002735815946845e-06, "epoch": 1.3212982523526022, "percentage": 26.43, "elapsed_time": "0:54:33", "remaining_time": "2:31:52", "throughput": 19837.53, "total_tokens": 64930176} +{"current_steps": 20645, "total_steps": 78105, "loss": 0.3782, "lr": 4.5999705066593046e-06, "epoch": 1.3216183342935792, "percentage": 26.43, "elapsed_time": "0:54:33", "remaining_time": "2:31:51", "throughput": 19838.23, "total_tokens": 64945600} +{"current_steps": 20650, "total_steps": 78105, "loss": 0.2715, "lr": 4.599667326862337e-06, "epoch": 1.321938416234556, "percentage": 26.44, "elapsed_time": "0:54:34", "remaining_time": "2:31:50", "throughput": 19838.88, "total_tokens": 64960704} +{"current_steps": 20655, "total_steps": 78105, "loss": 0.3153, "lr": 4.599364042218919e-06, "epoch": 1.322258498175533, "percentage": 26.45, "elapsed_time": "0:54:35", "remaining_time": "2:31:49", "throughput": 19839.65, "total_tokens": 64976832} +{"current_steps": 20660, "total_steps": 78105, "loss": 0.2554, "lr": 4.599060652744197e-06, "epoch": 1.3225785801165097, "percentage": 26.45, "elapsed_time": "0:54:35", "remaining_time": "2:31:48", "throughput": 19840.32, "total_tokens": 64992256} +{"current_steps": 20665, "total_steps": 78105, "loss": 0.2524, "lr": 4.598757158453319e-06, "epoch": 1.3228986620574867, "percentage": 26.46, "elapsed_time": "0:54:36", "remaining_time": "2:31:47", "throughput": 19841.08, "total_tokens": 65008256} +{"current_steps": 20670, "total_steps": 78105, "loss": 0.2878, "lr": 4.598453559361441e-06, "epoch": 1.3232187439984635, "percentage": 26.46, "elapsed_time": "0:54:37", "remaining_time": "2:31:45", "throughput": 19841.61, "total_tokens": 65022720} +{"current_steps": 20675, "total_steps": 78105, "loss": 0.1734, "lr": 4.598149855483722e-06, "epoch": 1.3235388259394405, "percentage": 26.47, "elapsed_time": "0:54:37", "remaining_time": "2:31:44", "throughput": 19842.3, "total_tokens": 65038400} +{"current_steps": 20680, "total_steps": 78105, "loss": 0.411, "lr": 4.597846046835329e-06, "epoch": 1.3238589078804175, "percentage": 26.48, "elapsed_time": "0:54:38", "remaining_time": "2:31:43", "throughput": 19843.04, "total_tokens": 65054272} +{"current_steps": 20685, "total_steps": 78105, "loss": 0.4788, "lr": 4.597542133431431e-06, "epoch": 1.3241789898213943, "percentage": 26.48, "elapsed_time": "0:54:39", "remaining_time": "2:31:42", "throughput": 19843.93, "total_tokens": 65071488} +{"current_steps": 20690, "total_steps": 78105, "loss": 0.3476, "lr": 4.597238115287204e-06, "epoch": 1.3244990717623712, "percentage": 26.49, "elapsed_time": "0:54:39", "remaining_time": "2:31:41", "throughput": 19844.79, "total_tokens": 65088512} +{"current_steps": 20695, "total_steps": 78105, "loss": 0.4639, "lr": 4.596933992417831e-06, "epoch": 1.324819153703348, "percentage": 26.5, "elapsed_time": "0:54:40", "remaining_time": "2:31:40", "throughput": 19845.51, "total_tokens": 65104512} +{"current_steps": 20700, "total_steps": 78105, "loss": 0.2843, "lr": 4.596629764838496e-06, "epoch": 1.325139235644325, "percentage": 26.5, "elapsed_time": "0:54:41", "remaining_time": "2:31:39", "throughput": 19846.24, "total_tokens": 65120256} +{"current_steps": 20705, "total_steps": 78105, "loss": 0.4256, "lr": 4.596325432564392e-06, "epoch": 1.3254593175853018, "percentage": 26.51, "elapsed_time": "0:54:41", "remaining_time": "2:31:38", "throughput": 19846.8, "total_tokens": 65134720} +{"current_steps": 20710, "total_steps": 78105, "loss": 0.3155, "lr": 4.596020995610715e-06, "epoch": 1.3257793995262788, "percentage": 26.52, "elapsed_time": "0:54:42", "remaining_time": "2:31:37", "throughput": 19847.87, "total_tokens": 65153344} +{"current_steps": 20715, "total_steps": 78105, "loss": 0.3079, "lr": 4.595716453992668e-06, "epoch": 1.3260994814672555, "percentage": 26.52, "elapsed_time": "0:54:43", "remaining_time": "2:31:36", "throughput": 19848.61, "total_tokens": 65169280} +{"current_steps": 20720, "total_steps": 78105, "loss": 0.4038, "lr": 4.595411807725456e-06, "epoch": 1.3264195634082325, "percentage": 26.53, "elapsed_time": "0:54:43", "remaining_time": "2:31:35", "throughput": 19849.29, "total_tokens": 65184640} +{"current_steps": 20725, "total_steps": 78105, "loss": 0.4733, "lr": 4.595107056824294e-06, "epoch": 1.3267396453492095, "percentage": 26.53, "elapsed_time": "0:54:44", "remaining_time": "2:31:33", "throughput": 19850.01, "total_tokens": 65200128} +{"current_steps": 20730, "total_steps": 78105, "loss": 0.3263, "lr": 4.594802201304398e-06, "epoch": 1.3270597272901863, "percentage": 26.54, "elapsed_time": "0:54:45", "remaining_time": "2:31:32", "throughput": 19850.76, "total_tokens": 65216256} +{"current_steps": 20735, "total_steps": 78105, "loss": 0.4066, "lr": 4.594497241180992e-06, "epoch": 1.327379809231163, "percentage": 26.55, "elapsed_time": "0:54:46", "remaining_time": "2:31:31", "throughput": 19851.43, "total_tokens": 65231936} +{"current_steps": 20740, "total_steps": 78105, "loss": 0.318, "lr": 4.594192176469303e-06, "epoch": 1.32769989117214, "percentage": 26.55, "elapsed_time": "0:54:46", "remaining_time": "2:31:30", "throughput": 19852.12, "total_tokens": 65247872} +{"current_steps": 20745, "total_steps": 78105, "loss": 0.4635, "lr": 4.593887007184565e-06, "epoch": 1.328019973113117, "percentage": 26.56, "elapsed_time": "0:54:47", "remaining_time": "2:31:29", "throughput": 19852.76, "total_tokens": 65262976} +{"current_steps": 20750, "total_steps": 78105, "loss": 0.2729, "lr": 4.5935817333420176e-06, "epoch": 1.3283400550540938, "percentage": 26.57, "elapsed_time": "0:54:48", "remaining_time": "2:31:28", "throughput": 19853.42, "total_tokens": 65278336} +{"current_steps": 20755, "total_steps": 78105, "loss": 0.3281, "lr": 4.5932763549569026e-06, "epoch": 1.3286601369950708, "percentage": 26.57, "elapsed_time": "0:54:48", "remaining_time": "2:31:27", "throughput": 19854.2, "total_tokens": 65294912} +{"current_steps": 20760, "total_steps": 78105, "loss": 0.2953, "lr": 4.59297087204447e-06, "epoch": 1.3289802189360476, "percentage": 26.58, "elapsed_time": "0:54:49", "remaining_time": "2:31:26", "throughput": 19855.04, "total_tokens": 65312000} +{"current_steps": 20765, "total_steps": 78105, "loss": 0.2865, "lr": 4.592665284619975e-06, "epoch": 1.3293003008770246, "percentage": 26.59, "elapsed_time": "0:54:50", "remaining_time": "2:31:25", "throughput": 19855.68, "total_tokens": 65327360} +{"current_steps": 20770, "total_steps": 78105, "loss": 0.387, "lr": 4.5923595926986745e-06, "epoch": 1.3296203828180015, "percentage": 26.59, "elapsed_time": "0:54:50", "remaining_time": "2:31:24", "throughput": 19856.63, "total_tokens": 65344832} +{"current_steps": 20775, "total_steps": 78105, "loss": 0.344, "lr": 4.592053796295835e-06, "epoch": 1.3299404647589783, "percentage": 26.6, "elapsed_time": "0:54:51", "remaining_time": "2:31:23", "throughput": 19857.51, "total_tokens": 65362304} +{"current_steps": 20780, "total_steps": 78105, "loss": 0.3077, "lr": 4.591747895426726e-06, "epoch": 1.330260546699955, "percentage": 26.61, "elapsed_time": "0:54:52", "remaining_time": "2:31:22", "throughput": 19858.21, "total_tokens": 65378304} +{"current_steps": 20785, "total_steps": 78105, "loss": 0.2716, "lr": 4.591441890106623e-06, "epoch": 1.330580628640932, "percentage": 26.61, "elapsed_time": "0:54:52", "remaining_time": "2:31:21", "throughput": 19858.83, "total_tokens": 65393344} +{"current_steps": 20790, "total_steps": 78105, "loss": 0.315, "lr": 4.591135780350805e-06, "epoch": 1.330900710581909, "percentage": 26.62, "elapsed_time": "0:54:53", "remaining_time": "2:31:19", "throughput": 19859.53, "total_tokens": 65409408} +{"current_steps": 20795, "total_steps": 78105, "loss": 0.4502, "lr": 4.590829566174559e-06, "epoch": 1.3312207925228858, "percentage": 26.62, "elapsed_time": "0:54:54", "remaining_time": "2:31:18", "throughput": 19860.2, "total_tokens": 65424832} +{"current_steps": 20800, "total_steps": 78105, "loss": 0.3672, "lr": 4.590523247593176e-06, "epoch": 1.3315408744638628, "percentage": 26.63, "elapsed_time": "0:54:54", "remaining_time": "2:31:17", "throughput": 19861.01, "total_tokens": 65441216} +{"current_steps": 20805, "total_steps": 78105, "loss": 0.4188, "lr": 4.590216824621951e-06, "epoch": 1.3318609564048396, "percentage": 26.64, "elapsed_time": "0:54:55", "remaining_time": "2:31:16", "throughput": 19861.78, "total_tokens": 65457600} +{"current_steps": 20810, "total_steps": 78105, "loss": 0.4023, "lr": 4.589910297276185e-06, "epoch": 1.3321810383458166, "percentage": 26.64, "elapsed_time": "0:54:56", "remaining_time": "2:31:15", "throughput": 19862.51, "total_tokens": 65473728} +{"current_steps": 20815, "total_steps": 78105, "loss": 0.2281, "lr": 4.589603665571184e-06, "epoch": 1.3325011202867934, "percentage": 26.65, "elapsed_time": "0:54:57", "remaining_time": "2:31:14", "throughput": 19863.23, "total_tokens": 65489728} +{"current_steps": 20820, "total_steps": 78105, "loss": 0.2818, "lr": 4.5892969295222615e-06, "epoch": 1.3328212022277703, "percentage": 26.66, "elapsed_time": "0:54:57", "remaining_time": "2:31:13", "throughput": 19864.04, "total_tokens": 65506176} +{"current_steps": 20825, "total_steps": 78105, "loss": 0.3487, "lr": 4.588990089144731e-06, "epoch": 1.333141284168747, "percentage": 26.66, "elapsed_time": "0:54:58", "remaining_time": "2:31:12", "throughput": 19864.72, "total_tokens": 65521728} +{"current_steps": 20830, "total_steps": 78105, "loss": 0.2838, "lr": 4.5886831444539195e-06, "epoch": 1.333461366109724, "percentage": 26.67, "elapsed_time": "0:54:59", "remaining_time": "2:31:11", "throughput": 19865.4, "total_tokens": 65536832} +{"current_steps": 20835, "total_steps": 78105, "loss": 0.3904, "lr": 4.588376095465149e-06, "epoch": 1.333781448050701, "percentage": 26.68, "elapsed_time": "0:54:59", "remaining_time": "2:31:10", "throughput": 19866.46, "total_tokens": 65555776} +{"current_steps": 20840, "total_steps": 78105, "loss": 0.3607, "lr": 4.588068942193755e-06, "epoch": 1.3341015299916779, "percentage": 26.68, "elapsed_time": "0:55:00", "remaining_time": "2:31:09", "throughput": 19867.04, "total_tokens": 65570944} +{"current_steps": 20845, "total_steps": 78105, "loss": 0.284, "lr": 4.587761684655075e-06, "epoch": 1.3344216119326546, "percentage": 26.69, "elapsed_time": "0:55:01", "remaining_time": "2:31:08", "throughput": 19867.63, "total_tokens": 65586048} +{"current_steps": 20850, "total_steps": 78105, "loss": 0.4453, "lr": 4.587454322864451e-06, "epoch": 1.3347416938736316, "percentage": 26.69, "elapsed_time": "0:55:01", "remaining_time": "2:31:06", "throughput": 19868.32, "total_tokens": 65601408} +{"current_steps": 20855, "total_steps": 78105, "loss": 0.2531, "lr": 4.5871468568372305e-06, "epoch": 1.3350617758146086, "percentage": 26.7, "elapsed_time": "0:55:02", "remaining_time": "2:31:05", "throughput": 19868.9, "total_tokens": 65616256} +{"current_steps": 20860, "total_steps": 78105, "loss": 0.2526, "lr": 4.586839286588768e-06, "epoch": 1.3353818577555854, "percentage": 26.71, "elapsed_time": "0:55:03", "remaining_time": "2:31:04", "throughput": 19869.58, "total_tokens": 65631936} +{"current_steps": 20865, "total_steps": 78105, "loss": 0.3054, "lr": 4.5865316121344215e-06, "epoch": 1.3357019396965624, "percentage": 26.71, "elapsed_time": "0:55:03", "remaining_time": "2:31:03", "throughput": 19870.26, "total_tokens": 65647488} +{"current_steps": 20870, "total_steps": 78105, "loss": 0.2884, "lr": 4.586223833489555e-06, "epoch": 1.3360220216375391, "percentage": 26.72, "elapsed_time": "0:55:04", "remaining_time": "2:31:02", "throughput": 19871.14, "total_tokens": 65664704} +{"current_steps": 20875, "total_steps": 78105, "loss": 0.4319, "lr": 4.585915950669536e-06, "epoch": 1.3363421035785161, "percentage": 26.73, "elapsed_time": "0:55:05", "remaining_time": "2:31:01", "throughput": 19871.8, "total_tokens": 65680256} +{"current_steps": 20880, "total_steps": 78105, "loss": 0.1895, "lr": 4.585607963689741e-06, "epoch": 1.3366621855194931, "percentage": 26.73, "elapsed_time": "0:55:05", "remaining_time": "2:31:00", "throughput": 19872.5, "total_tokens": 65696000} +{"current_steps": 20885, "total_steps": 78105, "loss": 0.2608, "lr": 4.5852998725655465e-06, "epoch": 1.3369822674604699, "percentage": 26.74, "elapsed_time": "0:55:06", "remaining_time": "2:30:59", "throughput": 19873.27, "total_tokens": 65712640} +{"current_steps": 20890, "total_steps": 78105, "loss": 0.4189, "lr": 4.584991677312339e-06, "epoch": 1.3373023494014467, "percentage": 26.75, "elapsed_time": "0:55:07", "remaining_time": "2:30:58", "throughput": 19873.92, "total_tokens": 65728064} +{"current_steps": 20895, "total_steps": 78105, "loss": 0.2957, "lr": 4.584683377945507e-06, "epoch": 1.3376224313424236, "percentage": 26.75, "elapsed_time": "0:55:08", "remaining_time": "2:30:57", "throughput": 19874.84, "total_tokens": 65746048} +{"current_steps": 20900, "total_steps": 78105, "loss": 0.2619, "lr": 4.584374974480445e-06, "epoch": 1.3379425132834006, "percentage": 26.76, "elapsed_time": "0:55:08", "remaining_time": "2:30:56", "throughput": 19875.47, "total_tokens": 65761344} +{"current_steps": 20905, "total_steps": 78105, "loss": 0.2377, "lr": 4.584066466932556e-06, "epoch": 1.3382625952243774, "percentage": 26.77, "elapsed_time": "0:55:09", "remaining_time": "2:30:54", "throughput": 19876.16, "total_tokens": 65777088} +{"current_steps": 20910, "total_steps": 78105, "loss": 0.322, "lr": 4.583757855317242e-06, "epoch": 1.3385826771653544, "percentage": 26.77, "elapsed_time": "0:55:10", "remaining_time": "2:30:53", "throughput": 19876.87, "total_tokens": 65793024} +{"current_steps": 20915, "total_steps": 78105, "loss": 0.3514, "lr": 4.5834491396499145e-06, "epoch": 1.3389027591063312, "percentage": 26.78, "elapsed_time": "0:55:10", "remaining_time": "2:30:52", "throughput": 19877.41, "total_tokens": 65807744} +{"current_steps": 20920, "total_steps": 78105, "loss": 0.3269, "lr": 4.58314031994599e-06, "epoch": 1.3392228410473082, "percentage": 26.78, "elapsed_time": "0:55:11", "remaining_time": "2:30:51", "throughput": 19878.15, "total_tokens": 65824000} +{"current_steps": 20925, "total_steps": 78105, "loss": 0.3618, "lr": 4.582831396220888e-06, "epoch": 1.339542922988285, "percentage": 26.79, "elapsed_time": "0:55:12", "remaining_time": "2:30:50", "throughput": 19878.87, "total_tokens": 65840064} +{"current_steps": 20930, "total_steps": 78105, "loss": 0.3187, "lr": 4.582522368490035e-06, "epoch": 1.339863004929262, "percentage": 26.8, "elapsed_time": "0:55:12", "remaining_time": "2:30:49", "throughput": 19879.52, "total_tokens": 65855552} +{"current_steps": 20935, "total_steps": 78105, "loss": 0.2784, "lr": 4.582213236768863e-06, "epoch": 1.3401830868702387, "percentage": 26.8, "elapsed_time": "0:55:13", "remaining_time": "2:30:48", "throughput": 19880.39, "total_tokens": 65872448} +{"current_steps": 20940, "total_steps": 78105, "loss": 0.3654, "lr": 4.5819040010728064e-06, "epoch": 1.3405031688112157, "percentage": 26.81, "elapsed_time": "0:55:14", "remaining_time": "2:30:47", "throughput": 19881.14, "total_tokens": 65888448} +{"current_steps": 20945, "total_steps": 78105, "loss": 0.3229, "lr": 4.58159466141731e-06, "epoch": 1.3408232507521927, "percentage": 26.82, "elapsed_time": "0:55:14", "remaining_time": "2:30:46", "throughput": 19881.68, "total_tokens": 65902912} +{"current_steps": 20950, "total_steps": 78105, "loss": 0.4098, "lr": 4.581285217817818e-06, "epoch": 1.3411433326931694, "percentage": 26.82, "elapsed_time": "0:55:15", "remaining_time": "2:30:44", "throughput": 19882.28, "total_tokens": 65918016} +{"current_steps": 20955, "total_steps": 78105, "loss": 0.3622, "lr": 4.580975670289783e-06, "epoch": 1.3414634146341464, "percentage": 26.83, "elapsed_time": "0:55:16", "remaining_time": "2:30:43", "throughput": 19883.1, "total_tokens": 65934656} +{"current_steps": 20960, "total_steps": 78105, "loss": 0.3221, "lr": 4.580666018848663e-06, "epoch": 1.3417834965751232, "percentage": 26.84, "elapsed_time": "0:55:16", "remaining_time": "2:30:42", "throughput": 19883.74, "total_tokens": 65950080} +{"current_steps": 20965, "total_steps": 78105, "loss": 0.3835, "lr": 4.580356263509919e-06, "epoch": 1.3421035785161002, "percentage": 26.84, "elapsed_time": "0:55:17", "remaining_time": "2:30:41", "throughput": 19884.38, "total_tokens": 65965568} +{"current_steps": 20970, "total_steps": 78105, "loss": 0.406, "lr": 4.58004640428902e-06, "epoch": 1.342423660457077, "percentage": 26.85, "elapsed_time": "0:55:18", "remaining_time": "2:30:40", "throughput": 19884.94, "total_tokens": 65980672} +{"current_steps": 20975, "total_steps": 78105, "loss": 0.4468, "lr": 4.579736441201439e-06, "epoch": 1.342743742398054, "percentage": 26.85, "elapsed_time": "0:55:18", "remaining_time": "2:30:39", "throughput": 19885.57, "total_tokens": 65996096} +{"current_steps": 20980, "total_steps": 78105, "loss": 0.388, "lr": 4.579426374262652e-06, "epoch": 1.3430638243390307, "percentage": 26.86, "elapsed_time": "0:55:19", "remaining_time": "2:30:38", "throughput": 19886.45, "total_tokens": 66013120} +{"current_steps": 20985, "total_steps": 78105, "loss": 0.4042, "lr": 4.579116203488143e-06, "epoch": 1.3433839062800077, "percentage": 26.87, "elapsed_time": "0:55:20", "remaining_time": "2:30:37", "throughput": 19887.06, "total_tokens": 66028160} +{"current_steps": 20990, "total_steps": 78105, "loss": 0.4532, "lr": 4.578805928893401e-06, "epoch": 1.3437039882209847, "percentage": 26.87, "elapsed_time": "0:55:20", "remaining_time": "2:30:36", "throughput": 19887.72, "total_tokens": 66043904} +{"current_steps": 20995, "total_steps": 78105, "loss": 0.2902, "lr": 4.578495550493919e-06, "epoch": 1.3440240701619615, "percentage": 26.88, "elapsed_time": "0:55:21", "remaining_time": "2:30:35", "throughput": 19888.27, "total_tokens": 66058624} +{"current_steps": 21000, "total_steps": 78105, "loss": 0.3911, "lr": 4.578185068305195e-06, "epoch": 1.3443441521029382, "percentage": 26.89, "elapsed_time": "0:55:22", "remaining_time": "2:30:33", "throughput": 19888.97, "total_tokens": 66074816} +{"current_steps": 21005, "total_steps": 78105, "loss": 0.2529, "lr": 4.577874482342734e-06, "epoch": 1.3446642340439152, "percentage": 26.89, "elapsed_time": "0:55:22", "remaining_time": "2:30:32", "throughput": 19889.78, "total_tokens": 66091584} +{"current_steps": 21010, "total_steps": 78105, "loss": 0.3473, "lr": 4.577563792622046e-06, "epoch": 1.3449843159848922, "percentage": 26.9, "elapsed_time": "0:55:23", "remaining_time": "2:30:31", "throughput": 19890.41, "total_tokens": 66106752} +{"current_steps": 21015, "total_steps": 78105, "loss": 0.3127, "lr": 4.577252999158642e-06, "epoch": 1.345304397925869, "percentage": 26.91, "elapsed_time": "0:55:24", "remaining_time": "2:30:30", "throughput": 19891.13, "total_tokens": 66122880} +{"current_steps": 21020, "total_steps": 78105, "loss": 0.3402, "lr": 4.576942101968044e-06, "epoch": 1.345624479866846, "percentage": 26.91, "elapsed_time": "0:55:24", "remaining_time": "2:30:29", "throughput": 19891.74, "total_tokens": 66138368} +{"current_steps": 21025, "total_steps": 78105, "loss": 0.3136, "lr": 4.576631101065776e-06, "epoch": 1.3459445618078227, "percentage": 26.92, "elapsed_time": "0:55:25", "remaining_time": "2:30:28", "throughput": 19892.41, "total_tokens": 66154112} +{"current_steps": 21030, "total_steps": 78105, "loss": 0.2815, "lr": 4.576319996467366e-06, "epoch": 1.3462646437487997, "percentage": 26.93, "elapsed_time": "0:55:26", "remaining_time": "2:30:27", "throughput": 19893.01, "total_tokens": 66169408} +{"current_steps": 21035, "total_steps": 78105, "loss": 0.3331, "lr": 4.576008788188353e-06, "epoch": 1.3465847256897767, "percentage": 26.93, "elapsed_time": "0:55:26", "remaining_time": "2:30:26", "throughput": 19893.59, "total_tokens": 66184512} +{"current_steps": 21040, "total_steps": 78105, "loss": 0.3088, "lr": 4.575697476244273e-06, "epoch": 1.3469048076307535, "percentage": 26.94, "elapsed_time": "0:55:27", "remaining_time": "2:30:25", "throughput": 19894.31, "total_tokens": 66200512} +{"current_steps": 21045, "total_steps": 78105, "loss": 0.2739, "lr": 4.575386060650673e-06, "epoch": 1.3472248895717303, "percentage": 26.94, "elapsed_time": "0:55:28", "remaining_time": "2:30:24", "throughput": 19895.06, "total_tokens": 66216512} +{"current_steps": 21050, "total_steps": 78105, "loss": 0.2862, "lr": 4.575074541423104e-06, "epoch": 1.3475449715127072, "percentage": 26.95, "elapsed_time": "0:55:28", "remaining_time": "2:30:23", "throughput": 19895.75, "total_tokens": 66232576} +{"current_steps": 21055, "total_steps": 78105, "loss": 0.2462, "lr": 4.574762918577121e-06, "epoch": 1.3478650534536842, "percentage": 26.96, "elapsed_time": "0:55:29", "remaining_time": "2:30:21", "throughput": 19896.28, "total_tokens": 66247232} +{"current_steps": 21060, "total_steps": 78105, "loss": 0.449, "lr": 4.574451192128286e-06, "epoch": 1.348185135394661, "percentage": 26.96, "elapsed_time": "0:55:30", "remaining_time": "2:30:20", "throughput": 19896.94, "total_tokens": 66262784} +{"current_steps": 21065, "total_steps": 78105, "loss": 0.3133, "lr": 4.574139362092162e-06, "epoch": 1.348505217335638, "percentage": 26.97, "elapsed_time": "0:55:30", "remaining_time": "2:30:19", "throughput": 19897.58, "total_tokens": 66278272} +{"current_steps": 21070, "total_steps": 78105, "loss": 0.3309, "lr": 4.573827428484322e-06, "epoch": 1.3488252992766148, "percentage": 26.98, "elapsed_time": "0:55:31", "remaining_time": "2:30:18", "throughput": 19898.32, "total_tokens": 66294528} +{"current_steps": 21075, "total_steps": 78105, "loss": 0.4058, "lr": 4.5735153913203436e-06, "epoch": 1.3491453812175918, "percentage": 26.98, "elapsed_time": "0:55:32", "remaining_time": "2:30:17", "throughput": 19898.94, "total_tokens": 66309760} +{"current_steps": 21080, "total_steps": 78105, "loss": 0.3035, "lr": 4.573203250615807e-06, "epoch": 1.3494654631585685, "percentage": 26.99, "elapsed_time": "0:55:32", "remaining_time": "2:30:16", "throughput": 19899.53, "total_tokens": 66324800} +{"current_steps": 21085, "total_steps": 78105, "loss": 0.2789, "lr": 4.572891006386298e-06, "epoch": 1.3497855450995455, "percentage": 27.0, "elapsed_time": "0:55:33", "remaining_time": "2:30:15", "throughput": 19900.13, "total_tokens": 66340160} +{"current_steps": 21090, "total_steps": 78105, "loss": 0.339, "lr": 4.5725786586474105e-06, "epoch": 1.3501056270405223, "percentage": 27.0, "elapsed_time": "0:55:34", "remaining_time": "2:30:14", "throughput": 19900.78, "total_tokens": 66355584} +{"current_steps": 21095, "total_steps": 78105, "loss": 0.4497, "lr": 4.57226620741474e-06, "epoch": 1.3504257089814993, "percentage": 27.01, "elapsed_time": "0:55:34", "remaining_time": "2:30:12", "throughput": 19901.37, "total_tokens": 66370624} +{"current_steps": 21100, "total_steps": 78105, "loss": 0.3005, "lr": 4.571953652703889e-06, "epoch": 1.3507457909224763, "percentage": 27.01, "elapsed_time": "0:55:35", "remaining_time": "2:30:11", "throughput": 19901.99, "total_tokens": 66385728} +{"current_steps": 21105, "total_steps": 78105, "loss": 0.3838, "lr": 4.571640994530465e-06, "epoch": 1.351065872863453, "percentage": 27.02, "elapsed_time": "0:55:36", "remaining_time": "2:30:10", "throughput": 19902.65, "total_tokens": 66401600} +{"current_steps": 21110, "total_steps": 78105, "loss": 0.3371, "lr": 4.571328232910081e-06, "epoch": 1.3513859548044298, "percentage": 27.03, "elapsed_time": "0:55:36", "remaining_time": "2:30:09", "throughput": 19903.26, "total_tokens": 66416896} +{"current_steps": 21115, "total_steps": 78105, "loss": 0.4107, "lr": 4.571015367858354e-06, "epoch": 1.3517060367454068, "percentage": 27.03, "elapsed_time": "0:55:37", "remaining_time": "2:30:08", "throughput": 19903.86, "total_tokens": 66432000} +{"current_steps": 21120, "total_steps": 78105, "loss": 0.3327, "lr": 4.570702399390906e-06, "epoch": 1.3520261186863838, "percentage": 27.04, "elapsed_time": "0:55:38", "remaining_time": "2:30:07", "throughput": 19904.43, "total_tokens": 66447104} +{"current_steps": 21125, "total_steps": 78105, "loss": 0.4573, "lr": 4.570389327523367e-06, "epoch": 1.3523462006273606, "percentage": 27.05, "elapsed_time": "0:55:38", "remaining_time": "2:30:06", "throughput": 19904.98, "total_tokens": 66462080} +{"current_steps": 21130, "total_steps": 78105, "loss": 0.3239, "lr": 4.5700761522713694e-06, "epoch": 1.3526662825683375, "percentage": 27.05, "elapsed_time": "0:55:39", "remaining_time": "2:30:05", "throughput": 19905.76, "total_tokens": 66478720} +{"current_steps": 21135, "total_steps": 78105, "loss": 0.2951, "lr": 4.569762873650551e-06, "epoch": 1.3529863645093143, "percentage": 27.06, "elapsed_time": "0:55:40", "remaining_time": "2:30:03", "throughput": 19906.27, "total_tokens": 66492992} +{"current_steps": 21140, "total_steps": 78105, "loss": 0.2879, "lr": 4.5694494916765564e-06, "epoch": 1.3533064464502913, "percentage": 27.07, "elapsed_time": "0:55:40", "remaining_time": "2:30:02", "throughput": 19906.93, "total_tokens": 66508608} +{"current_steps": 21145, "total_steps": 78105, "loss": 0.3293, "lr": 4.569136006365033e-06, "epoch": 1.3536265283912683, "percentage": 27.07, "elapsed_time": "0:55:41", "remaining_time": "2:30:01", "throughput": 19907.65, "total_tokens": 66524608} +{"current_steps": 21150, "total_steps": 78105, "loss": 0.2751, "lr": 4.568822417731635e-06, "epoch": 1.353946610332245, "percentage": 27.08, "elapsed_time": "0:55:42", "remaining_time": "2:30:00", "throughput": 19908.21, "total_tokens": 66539328} +{"current_steps": 21155, "total_steps": 78105, "loss": 0.2727, "lr": 4.568508725792022e-06, "epoch": 1.3542666922732218, "percentage": 27.09, "elapsed_time": "0:55:43", "remaining_time": "2:29:59", "throughput": 19908.98, "total_tokens": 66555776} +{"current_steps": 21160, "total_steps": 78105, "loss": 0.3236, "lr": 4.568194930561857e-06, "epoch": 1.3545867742141988, "percentage": 27.09, "elapsed_time": "0:55:43", "remaining_time": "2:29:58", "throughput": 19909.54, "total_tokens": 66570816} +{"current_steps": 21165, "total_steps": 78105, "loss": 0.3128, "lr": 4.567881032056811e-06, "epoch": 1.3549068561551758, "percentage": 27.1, "elapsed_time": "0:55:44", "remaining_time": "2:29:57", "throughput": 19910.23, "total_tokens": 66586560} +{"current_steps": 21170, "total_steps": 78105, "loss": 0.4138, "lr": 4.567567030292557e-06, "epoch": 1.3552269380961526, "percentage": 27.1, "elapsed_time": "0:55:45", "remaining_time": "2:29:56", "throughput": 19911.14, "total_tokens": 66603840} +{"current_steps": 21175, "total_steps": 78105, "loss": 0.3511, "lr": 4.567252925284775e-06, "epoch": 1.3555470200371296, "percentage": 27.11, "elapsed_time": "0:55:45", "remaining_time": "2:29:55", "throughput": 19911.78, "total_tokens": 66619072} +{"current_steps": 21180, "total_steps": 78105, "loss": 0.3001, "lr": 4.56693871704915e-06, "epoch": 1.3558671019781063, "percentage": 27.12, "elapsed_time": "0:55:46", "remaining_time": "2:29:54", "throughput": 19912.4, "total_tokens": 66634560} +{"current_steps": 21185, "total_steps": 78105, "loss": 0.3181, "lr": 4.5666244056013705e-06, "epoch": 1.3561871839190833, "percentage": 27.12, "elapsed_time": "0:55:47", "remaining_time": "2:29:52", "throughput": 19913.05, "total_tokens": 66650112} +{"current_steps": 21190, "total_steps": 78105, "loss": 0.3988, "lr": 4.566309990957134e-06, "epoch": 1.35650726586006, "percentage": 27.13, "elapsed_time": "0:55:47", "remaining_time": "2:29:51", "throughput": 19913.58, "total_tokens": 66664832} +{"current_steps": 21195, "total_steps": 78105, "loss": 0.3962, "lr": 4.5659954731321405e-06, "epoch": 1.356827347801037, "percentage": 27.14, "elapsed_time": "0:55:48", "remaining_time": "2:29:50", "throughput": 19914.18, "total_tokens": 66680000} +{"current_steps": 21200, "total_steps": 78105, "loss": 0.3252, "lr": 4.565680852142093e-06, "epoch": 1.3571474297420139, "percentage": 27.14, "elapsed_time": "0:55:49", "remaining_time": "2:29:49", "throughput": 19914.79, "total_tokens": 66695296} +{"current_steps": 21205, "total_steps": 78105, "loss": 0.4093, "lr": 4.565366128002704e-06, "epoch": 1.3574675116829908, "percentage": 27.15, "elapsed_time": "0:55:49", "remaining_time": "2:29:48", "throughput": 19915.48, "total_tokens": 66711040} +{"current_steps": 21210, "total_steps": 78105, "loss": 0.3274, "lr": 4.565051300729689e-06, "epoch": 1.3577875936239678, "percentage": 27.16, "elapsed_time": "0:55:50", "remaining_time": "2:29:47", "throughput": 19916.12, "total_tokens": 66726720} +{"current_steps": 21215, "total_steps": 78105, "loss": 0.3752, "lr": 4.564736370338768e-06, "epoch": 1.3581076755649446, "percentage": 27.16, "elapsed_time": "0:55:51", "remaining_time": "2:29:46", "throughput": 19916.92, "total_tokens": 66743360} +{"current_steps": 21220, "total_steps": 78105, "loss": 0.2026, "lr": 4.564421336845668e-06, "epoch": 1.3584277575059216, "percentage": 27.17, "elapsed_time": "0:55:51", "remaining_time": "2:29:45", "throughput": 19917.54, "total_tokens": 66758912} +{"current_steps": 21225, "total_steps": 78105, "loss": 0.3119, "lr": 4.564106200266119e-06, "epoch": 1.3587478394468984, "percentage": 27.17, "elapsed_time": "0:55:52", "remaining_time": "2:29:44", "throughput": 19918.18, "total_tokens": 66774144} +{"current_steps": 21230, "total_steps": 78105, "loss": 0.3391, "lr": 4.563790960615858e-06, "epoch": 1.3590679213878754, "percentage": 27.18, "elapsed_time": "0:55:53", "remaining_time": "2:29:42", "throughput": 19918.83, "total_tokens": 66790080} +{"current_steps": 21235, "total_steps": 78105, "loss": 0.3171, "lr": 4.563475617910627e-06, "epoch": 1.3593880033288521, "percentage": 27.19, "elapsed_time": "0:55:53", "remaining_time": "2:29:41", "throughput": 19919.47, "total_tokens": 66805376} +{"current_steps": 21240, "total_steps": 78105, "loss": 0.2565, "lr": 4.5631601721661715e-06, "epoch": 1.3597080852698291, "percentage": 27.19, "elapsed_time": "0:55:54", "remaining_time": "2:29:40", "throughput": 19920.37, "total_tokens": 66823040} +{"current_steps": 21245, "total_steps": 78105, "loss": 0.3125, "lr": 4.562844623398242e-06, "epoch": 1.3600281672108059, "percentage": 27.2, "elapsed_time": "0:55:55", "remaining_time": "2:29:39", "throughput": 19920.94, "total_tokens": 66838144} +{"current_steps": 21250, "total_steps": 78105, "loss": 0.4704, "lr": 4.5625289716226e-06, "epoch": 1.3603482491517829, "percentage": 27.21, "elapsed_time": "0:55:55", "remaining_time": "2:29:38", "throughput": 19921.63, "total_tokens": 66854144} +{"current_steps": 21255, "total_steps": 78105, "loss": 0.4002, "lr": 4.5622132168550025e-06, "epoch": 1.3606683310927599, "percentage": 27.21, "elapsed_time": "0:55:56", "remaining_time": "2:29:37", "throughput": 19922.29, "total_tokens": 66869568} +{"current_steps": 21260, "total_steps": 78105, "loss": 0.3586, "lr": 4.5618973591112186e-06, "epoch": 1.3609884130337366, "percentage": 27.22, "elapsed_time": "0:55:57", "remaining_time": "2:29:36", "throughput": 19922.98, "total_tokens": 66885184} +{"current_steps": 21265, "total_steps": 78105, "loss": 0.393, "lr": 4.5615813984070215e-06, "epoch": 1.3613084949747134, "percentage": 27.23, "elapsed_time": "0:55:57", "remaining_time": "2:29:35", "throughput": 19923.63, "total_tokens": 66900480} +{"current_steps": 21270, "total_steps": 78105, "loss": 0.2858, "lr": 4.561265334758187e-06, "epoch": 1.3616285769156904, "percentage": 27.23, "elapsed_time": "0:55:58", "remaining_time": "2:29:34", "throughput": 19924.56, "total_tokens": 66918144} +{"current_steps": 21275, "total_steps": 78105, "loss": 0.3018, "lr": 4.560949168180499e-06, "epoch": 1.3619486588566674, "percentage": 27.24, "elapsed_time": "0:55:59", "remaining_time": "2:29:33", "throughput": 19925.2, "total_tokens": 66933376} +{"current_steps": 21280, "total_steps": 78105, "loss": 0.3694, "lr": 4.560632898689744e-06, "epoch": 1.3622687407976442, "percentage": 27.25, "elapsed_time": "0:55:59", "remaining_time": "2:29:32", "throughput": 19925.88, "total_tokens": 66949248} +{"current_steps": 21285, "total_steps": 78105, "loss": 0.4479, "lr": 4.560316526301716e-06, "epoch": 1.3625888227386211, "percentage": 27.25, "elapsed_time": "0:56:00", "remaining_time": "2:29:30", "throughput": 19926.47, "total_tokens": 66964224} +{"current_steps": 21290, "total_steps": 78105, "loss": 0.3103, "lr": 4.560000051032212e-06, "epoch": 1.362908904679598, "percentage": 27.26, "elapsed_time": "0:56:01", "remaining_time": "2:29:29", "throughput": 19927.09, "total_tokens": 66979264} +{"current_steps": 21295, "total_steps": 78105, "loss": 0.4098, "lr": 4.559683472897037e-06, "epoch": 1.363228986620575, "percentage": 27.26, "elapsed_time": "0:56:01", "remaining_time": "2:29:28", "throughput": 19927.68, "total_tokens": 66994176} +{"current_steps": 21300, "total_steps": 78105, "loss": 0.3061, "lr": 4.5593667919119956e-06, "epoch": 1.363549068561552, "percentage": 27.27, "elapsed_time": "0:56:02", "remaining_time": "2:29:27", "throughput": 19928.33, "total_tokens": 67009856} +{"current_steps": 21305, "total_steps": 78105, "loss": 0.2881, "lr": 4.559050008092904e-06, "epoch": 1.3638691505025287, "percentage": 27.28, "elapsed_time": "0:56:03", "remaining_time": "2:29:26", "throughput": 19929.09, "total_tokens": 67026304} +{"current_steps": 21310, "total_steps": 78105, "loss": 0.3375, "lr": 4.558733121455582e-06, "epoch": 1.3641892324435054, "percentage": 27.28, "elapsed_time": "0:56:03", "remaining_time": "2:29:25", "throughput": 19929.73, "total_tokens": 67041984} +{"current_steps": 21315, "total_steps": 78105, "loss": 0.4684, "lr": 4.55841613201585e-06, "epoch": 1.3645093143844824, "percentage": 27.29, "elapsed_time": "0:56:04", "remaining_time": "2:29:24", "throughput": 19930.34, "total_tokens": 67057344} +{"current_steps": 21320, "total_steps": 78105, "loss": 0.3709, "lr": 4.558099039789539e-06, "epoch": 1.3648293963254594, "percentage": 27.3, "elapsed_time": "0:56:05", "remaining_time": "2:29:23", "throughput": 19930.94, "total_tokens": 67072512} +{"current_steps": 21325, "total_steps": 78105, "loss": 0.3174, "lr": 4.557781844792482e-06, "epoch": 1.3651494782664362, "percentage": 27.3, "elapsed_time": "0:56:05", "remaining_time": "2:29:22", "throughput": 19931.65, "total_tokens": 67088384} +{"current_steps": 21330, "total_steps": 78105, "loss": 0.3135, "lr": 4.557464547040517e-06, "epoch": 1.3654695602074132, "percentage": 27.31, "elapsed_time": "0:56:06", "remaining_time": "2:29:21", "throughput": 19932.43, "total_tokens": 67105024} +{"current_steps": 21335, "total_steps": 78105, "loss": 0.4033, "lr": 4.557147146549491e-06, "epoch": 1.36578964214839, "percentage": 27.32, "elapsed_time": "0:56:07", "remaining_time": "2:29:21", "throughput": 19934.17, "total_tokens": 67134528} +{"current_steps": 21340, "total_steps": 78105, "loss": 0.3901, "lr": 4.556829643335251e-06, "epoch": 1.366109724089367, "percentage": 27.32, "elapsed_time": "0:56:08", "remaining_time": "2:29:20", "throughput": 19934.76, "total_tokens": 67149760} +{"current_steps": 21345, "total_steps": 78105, "loss": 0.2852, "lr": 4.556512037413653e-06, "epoch": 1.3664298060303437, "percentage": 27.33, "elapsed_time": "0:56:09", "remaining_time": "2:29:19", "throughput": 19935.31, "total_tokens": 67164800} +{"current_steps": 21350, "total_steps": 78105, "loss": 0.3968, "lr": 4.556194328800555e-06, "epoch": 1.3667498879713207, "percentage": 27.33, "elapsed_time": "0:56:09", "remaining_time": "2:29:17", "throughput": 19936.01, "total_tokens": 67180416} +{"current_steps": 21355, "total_steps": 78105, "loss": 0.3061, "lr": 4.555876517511822e-06, "epoch": 1.3670699699122975, "percentage": 27.34, "elapsed_time": "0:56:10", "remaining_time": "2:29:16", "throughput": 19936.62, "total_tokens": 67195840} +{"current_steps": 21360, "total_steps": 78105, "loss": 0.2871, "lr": 4.555558603563326e-06, "epoch": 1.3673900518532744, "percentage": 27.35, "elapsed_time": "0:56:11", "remaining_time": "2:29:15", "throughput": 19937.24, "total_tokens": 67211328} +{"current_steps": 21365, "total_steps": 78105, "loss": 0.3657, "lr": 4.55524058697094e-06, "epoch": 1.3677101337942514, "percentage": 27.35, "elapsed_time": "0:56:11", "remaining_time": "2:29:14", "throughput": 19937.79, "total_tokens": 67225920} +{"current_steps": 21370, "total_steps": 78105, "loss": 0.3366, "lr": 4.554922467750544e-06, "epoch": 1.3680302157352282, "percentage": 27.36, "elapsed_time": "0:56:12", "remaining_time": "2:29:13", "throughput": 19938.31, "total_tokens": 67240832} +{"current_steps": 21375, "total_steps": 78105, "loss": 0.4125, "lr": 4.554604245918024e-06, "epoch": 1.368350297676205, "percentage": 27.37, "elapsed_time": "0:56:13", "remaining_time": "2:29:12", "throughput": 19938.92, "total_tokens": 67256128} +{"current_steps": 21380, "total_steps": 78105, "loss": 0.3582, "lr": 4.55428592148927e-06, "epoch": 1.368670379617182, "percentage": 27.37, "elapsed_time": "0:56:13", "remaining_time": "2:29:11", "throughput": 19939.66, "total_tokens": 67272512} +{"current_steps": 21385, "total_steps": 78105, "loss": 0.2941, "lr": 4.553967494480177e-06, "epoch": 1.368990461558159, "percentage": 27.38, "elapsed_time": "0:56:14", "remaining_time": "2:29:10", "throughput": 19940.3, "total_tokens": 67288064} +{"current_steps": 21390, "total_steps": 78105, "loss": 0.2946, "lr": 4.553648964906646e-06, "epoch": 1.3693105434991357, "percentage": 27.39, "elapsed_time": "0:56:15", "remaining_time": "2:29:09", "throughput": 19941.0, "total_tokens": 67304128} +{"current_steps": 21395, "total_steps": 78105, "loss": 0.352, "lr": 4.553330332784583e-06, "epoch": 1.3696306254401127, "percentage": 27.39, "elapsed_time": "0:56:15", "remaining_time": "2:29:08", "throughput": 19941.58, "total_tokens": 67319232} +{"current_steps": 21400, "total_steps": 78105, "loss": 0.3625, "lr": 4.553011598129899e-06, "epoch": 1.3699507073810895, "percentage": 27.4, "elapsed_time": "0:56:16", "remaining_time": "2:29:06", "throughput": 19942.3, "total_tokens": 67335296} +{"current_steps": 21405, "total_steps": 78105, "loss": 0.3375, "lr": 4.552692760958509e-06, "epoch": 1.3702707893220665, "percentage": 27.41, "elapsed_time": "0:56:17", "remaining_time": "2:29:05", "throughput": 19942.77, "total_tokens": 67349760} +{"current_steps": 21410, "total_steps": 78105, "loss": 0.3049, "lr": 4.552373821286334e-06, "epoch": 1.3705908712630435, "percentage": 27.41, "elapsed_time": "0:56:17", "remaining_time": "2:29:04", "throughput": 19943.36, "total_tokens": 67364672} +{"current_steps": 21415, "total_steps": 78105, "loss": 0.3813, "lr": 4.5520547791293025e-06, "epoch": 1.3709109532040202, "percentage": 27.42, "elapsed_time": "0:56:18", "remaining_time": "2:29:03", "throughput": 19943.96, "total_tokens": 67379648} +{"current_steps": 21420, "total_steps": 78105, "loss": 0.2384, "lr": 4.551735634503343e-06, "epoch": 1.371231035144997, "percentage": 27.42, "elapsed_time": "0:56:19", "remaining_time": "2:29:02", "throughput": 19944.54, "total_tokens": 67394432} +{"current_steps": 21425, "total_steps": 78105, "loss": 0.3446, "lr": 4.551416387424393e-06, "epoch": 1.371551117085974, "percentage": 27.43, "elapsed_time": "0:56:19", "remaining_time": "2:29:01", "throughput": 19945.17, "total_tokens": 67409536} +{"current_steps": 21430, "total_steps": 78105, "loss": 0.3889, "lr": 4.551097037908394e-06, "epoch": 1.371871199026951, "percentage": 27.44, "elapsed_time": "0:56:20", "remaining_time": "2:29:00", "throughput": 19945.86, "total_tokens": 67425664} +{"current_steps": 21435, "total_steps": 78105, "loss": 0.3429, "lr": 4.5507775859712935e-06, "epoch": 1.3721912809679278, "percentage": 27.44, "elapsed_time": "0:56:21", "remaining_time": "2:28:58", "throughput": 19946.45, "total_tokens": 67440960} +{"current_steps": 21440, "total_steps": 78105, "loss": 0.3046, "lr": 4.550458031629041e-06, "epoch": 1.3725113629089047, "percentage": 27.45, "elapsed_time": "0:56:21", "remaining_time": "2:28:57", "throughput": 19946.94, "total_tokens": 67455744} +{"current_steps": 21445, "total_steps": 78105, "loss": 0.3499, "lr": 4.550138374897596e-06, "epoch": 1.3728314448498815, "percentage": 27.46, "elapsed_time": "0:56:22", "remaining_time": "2:28:56", "throughput": 19947.53, "total_tokens": 67471104} +{"current_steps": 21450, "total_steps": 78105, "loss": 0.4008, "lr": 4.549818615792919e-06, "epoch": 1.3731515267908585, "percentage": 27.46, "elapsed_time": "0:56:23", "remaining_time": "2:28:55", "throughput": 19948.13, "total_tokens": 67486272} +{"current_steps": 21455, "total_steps": 78105, "loss": 0.3321, "lr": 4.549498754330978e-06, "epoch": 1.3734716087318353, "percentage": 27.47, "elapsed_time": "0:56:23", "remaining_time": "2:28:54", "throughput": 19948.78, "total_tokens": 67502144} +{"current_steps": 21460, "total_steps": 78105, "loss": 0.285, "lr": 4.549178790527744e-06, "epoch": 1.3737916906728123, "percentage": 27.48, "elapsed_time": "0:56:24", "remaining_time": "2:28:53", "throughput": 19949.34, "total_tokens": 67517248} +{"current_steps": 21465, "total_steps": 78105, "loss": 0.3678, "lr": 4.548858724399195e-06, "epoch": 1.374111772613789, "percentage": 27.48, "elapsed_time": "0:56:25", "remaining_time": "2:28:52", "throughput": 19949.98, "total_tokens": 67532864} +{"current_steps": 21470, "total_steps": 78105, "loss": 0.2855, "lr": 4.548538555961314e-06, "epoch": 1.374431854554766, "percentage": 27.49, "elapsed_time": "0:56:25", "remaining_time": "2:28:51", "throughput": 19950.66, "total_tokens": 67548608} +{"current_steps": 21475, "total_steps": 78105, "loss": 0.3305, "lr": 4.548218285230087e-06, "epoch": 1.374751936495743, "percentage": 27.5, "elapsed_time": "0:56:26", "remaining_time": "2:28:50", "throughput": 19951.33, "total_tokens": 67564544} +{"current_steps": 21480, "total_steps": 78105, "loss": 0.3284, "lr": 4.547897912221509e-06, "epoch": 1.3750720184367198, "percentage": 27.5, "elapsed_time": "0:56:27", "remaining_time": "2:28:49", "throughput": 19951.92, "total_tokens": 67579776} +{"current_steps": 21485, "total_steps": 78105, "loss": 0.4116, "lr": 4.547577436951575e-06, "epoch": 1.3753921003776968, "percentage": 27.51, "elapsed_time": "0:56:27", "remaining_time": "2:28:48", "throughput": 19952.62, "total_tokens": 67596096} +{"current_steps": 21490, "total_steps": 78105, "loss": 0.3156, "lr": 4.547256859436289e-06, "epoch": 1.3757121823186735, "percentage": 27.51, "elapsed_time": "0:56:28", "remaining_time": "2:28:46", "throughput": 19953.2, "total_tokens": 67611136} +{"current_steps": 21495, "total_steps": 78105, "loss": 0.3529, "lr": 4.54693617969166e-06, "epoch": 1.3760322642596505, "percentage": 27.52, "elapsed_time": "0:56:29", "remaining_time": "2:28:45", "throughput": 19953.96, "total_tokens": 67627840} +{"current_steps": 21500, "total_steps": 78105, "loss": 0.348, "lr": 4.5466153977337004e-06, "epoch": 1.3763523462006273, "percentage": 27.53, "elapsed_time": "0:56:29", "remaining_time": "2:28:44", "throughput": 19954.6, "total_tokens": 67643456} +{"current_steps": 21505, "total_steps": 78105, "loss": 0.4476, "lr": 4.5462945135784285e-06, "epoch": 1.3766724281416043, "percentage": 27.53, "elapsed_time": "0:56:30", "remaining_time": "2:28:43", "throughput": 19955.33, "total_tokens": 67659840} +{"current_steps": 21510, "total_steps": 78105, "loss": 0.357, "lr": 4.545973527241867e-06, "epoch": 1.376992510082581, "percentage": 27.54, "elapsed_time": "0:56:31", "remaining_time": "2:28:42", "throughput": 19955.86, "total_tokens": 67674624} +{"current_steps": 21515, "total_steps": 78105, "loss": 0.4121, "lr": 4.545652438740045e-06, "epoch": 1.377312592023558, "percentage": 27.55, "elapsed_time": "0:56:31", "remaining_time": "2:28:41", "throughput": 19956.43, "total_tokens": 67689664} +{"current_steps": 21520, "total_steps": 78105, "loss": 0.3684, "lr": 4.5453312480889946e-06, "epoch": 1.377632673964535, "percentage": 27.55, "elapsed_time": "0:56:32", "remaining_time": "2:28:40", "throughput": 19957.04, "total_tokens": 67705280} +{"current_steps": 21525, "total_steps": 78105, "loss": 0.2759, "lr": 4.545009955304756e-06, "epoch": 1.3779527559055118, "percentage": 27.56, "elapsed_time": "0:56:33", "remaining_time": "2:28:39", "throughput": 19957.64, "total_tokens": 67720640} +{"current_steps": 21530, "total_steps": 78105, "loss": 0.3539, "lr": 4.544688560403372e-06, "epoch": 1.3782728378464886, "percentage": 27.57, "elapsed_time": "0:56:33", "remaining_time": "2:28:38", "throughput": 19958.17, "total_tokens": 67735424} +{"current_steps": 21535, "total_steps": 78105, "loss": 0.287, "lr": 4.544367063400893e-06, "epoch": 1.3785929197874656, "percentage": 27.57, "elapsed_time": "0:56:34", "remaining_time": "2:28:37", "throughput": 19958.8, "total_tokens": 67750528} +{"current_steps": 21540, "total_steps": 78105, "loss": 0.2853, "lr": 4.544045464313371e-06, "epoch": 1.3789130017284426, "percentage": 27.58, "elapsed_time": "0:56:35", "remaining_time": "2:28:35", "throughput": 19959.47, "total_tokens": 67766464} +{"current_steps": 21545, "total_steps": 78105, "loss": 0.2795, "lr": 4.543723763156864e-06, "epoch": 1.3792330836694193, "percentage": 27.58, "elapsed_time": "0:56:35", "remaining_time": "2:28:34", "throughput": 19960.08, "total_tokens": 67782144} +{"current_steps": 21550, "total_steps": 78105, "loss": 0.3607, "lr": 4.543401959947439e-06, "epoch": 1.3795531656103963, "percentage": 27.59, "elapsed_time": "0:56:36", "remaining_time": "2:28:33", "throughput": 19960.74, "total_tokens": 67798208} +{"current_steps": 21555, "total_steps": 78105, "loss": 0.278, "lr": 4.543080054701164e-06, "epoch": 1.379873247551373, "percentage": 27.6, "elapsed_time": "0:56:37", "remaining_time": "2:28:32", "throughput": 19961.37, "total_tokens": 67813504} +{"current_steps": 21560, "total_steps": 78105, "loss": 0.3043, "lr": 4.542758047434113e-06, "epoch": 1.38019332949235, "percentage": 27.6, "elapsed_time": "0:56:37", "remaining_time": "2:28:31", "throughput": 19962.0, "total_tokens": 67829248} +{"current_steps": 21565, "total_steps": 78105, "loss": 0.3572, "lr": 4.542435938162365e-06, "epoch": 1.380513411433327, "percentage": 27.61, "elapsed_time": "0:56:38", "remaining_time": "2:28:30", "throughput": 19962.6, "total_tokens": 67844544} +{"current_steps": 21570, "total_steps": 78105, "loss": 0.2984, "lr": 4.542113726902005e-06, "epoch": 1.3808334933743038, "percentage": 27.62, "elapsed_time": "0:56:39", "remaining_time": "2:28:29", "throughput": 19963.22, "total_tokens": 67860032} +{"current_steps": 21575, "total_steps": 78105, "loss": 0.4087, "lr": 4.5417914136691225e-06, "epoch": 1.3811535753152806, "percentage": 27.62, "elapsed_time": "0:56:39", "remaining_time": "2:28:28", "throughput": 19963.81, "total_tokens": 67875584} +{"current_steps": 21580, "total_steps": 78105, "loss": 0.3223, "lr": 4.541468998479812e-06, "epoch": 1.3814736572562576, "percentage": 27.63, "elapsed_time": "0:56:40", "remaining_time": "2:28:27", "throughput": 19964.51, "total_tokens": 67891584} +{"current_steps": 21585, "total_steps": 78105, "loss": 0.296, "lr": 4.5411464813501736e-06, "epoch": 1.3817937391972346, "percentage": 27.64, "elapsed_time": "0:56:41", "remaining_time": "2:28:26", "throughput": 19965.39, "total_tokens": 67909120} +{"current_steps": 21590, "total_steps": 78105, "loss": 0.2323, "lr": 4.540823862296311e-06, "epoch": 1.3821138211382114, "percentage": 27.64, "elapsed_time": "0:56:42", "remaining_time": "2:28:25", "throughput": 19966.16, "total_tokens": 67925952} +{"current_steps": 21595, "total_steps": 78105, "loss": 0.3601, "lr": 4.5405011413343355e-06, "epoch": 1.3824339030791883, "percentage": 27.65, "elapsed_time": "0:56:42", "remaining_time": "2:28:24", "throughput": 19966.76, "total_tokens": 67941120} +{"current_steps": 21600, "total_steps": 78105, "loss": 0.3833, "lr": 4.540178318480362e-06, "epoch": 1.382753985020165, "percentage": 27.66, "elapsed_time": "0:56:43", "remaining_time": "2:28:23", "throughput": 19967.46, "total_tokens": 67957120} +{"current_steps": 21605, "total_steps": 78105, "loss": 0.3215, "lr": 4.53985539375051e-06, "epoch": 1.383074066961142, "percentage": 27.66, "elapsed_time": "0:56:44", "remaining_time": "2:28:22", "throughput": 19968.06, "total_tokens": 67972288} +{"current_steps": 21610, "total_steps": 78105, "loss": 0.4772, "lr": 4.539532367160905e-06, "epoch": 1.3833941489021189, "percentage": 27.67, "elapsed_time": "0:56:44", "remaining_time": "2:28:21", "throughput": 19968.75, "total_tokens": 67988544} +{"current_steps": 21615, "total_steps": 78105, "loss": 0.2616, "lr": 4.539209238727677e-06, "epoch": 1.3837142308430959, "percentage": 27.67, "elapsed_time": "0:56:45", "remaining_time": "2:28:19", "throughput": 19969.29, "total_tokens": 68003200} +{"current_steps": 21620, "total_steps": 78105, "loss": 0.2397, "lr": 4.5388860084669616e-06, "epoch": 1.3840343127840726, "percentage": 27.68, "elapsed_time": "0:56:46", "remaining_time": "2:28:18", "throughput": 19969.96, "total_tokens": 68019392} +{"current_steps": 21625, "total_steps": 78105, "loss": 0.2829, "lr": 4.538562676394898e-06, "epoch": 1.3843543947250496, "percentage": 27.69, "elapsed_time": "0:56:46", "remaining_time": "2:28:17", "throughput": 19970.58, "total_tokens": 68035328} +{"current_steps": 21630, "total_steps": 78105, "loss": 0.3202, "lr": 4.5382392425276345e-06, "epoch": 1.3846744766660266, "percentage": 27.69, "elapsed_time": "0:56:47", "remaining_time": "2:28:16", "throughput": 19971.23, "total_tokens": 68051136} +{"current_steps": 21635, "total_steps": 78105, "loss": 0.3868, "lr": 4.537915706881319e-06, "epoch": 1.3849945586070034, "percentage": 27.7, "elapsed_time": "0:56:48", "remaining_time": "2:28:15", "throughput": 19971.77, "total_tokens": 68066176} +{"current_steps": 21640, "total_steps": 78105, "loss": 0.2949, "lr": 4.5375920694721085e-06, "epoch": 1.3853146405479801, "percentage": 27.71, "elapsed_time": "0:56:48", "remaining_time": "2:28:14", "throughput": 19972.37, "total_tokens": 68081536} +{"current_steps": 21645, "total_steps": 78105, "loss": 0.3576, "lr": 4.537268330316163e-06, "epoch": 1.3856347224889571, "percentage": 27.71, "elapsed_time": "0:56:49", "remaining_time": "2:28:13", "throughput": 19973.04, "total_tokens": 68097472} +{"current_steps": 21650, "total_steps": 78105, "loss": 0.4295, "lr": 4.53694448942965e-06, "epoch": 1.3859548044299341, "percentage": 27.72, "elapsed_time": "0:56:50", "remaining_time": "2:28:12", "throughput": 19973.73, "total_tokens": 68113728} +{"current_steps": 21655, "total_steps": 78105, "loss": 0.3789, "lr": 4.536620546828738e-06, "epoch": 1.386274886370911, "percentage": 27.73, "elapsed_time": "0:56:50", "remaining_time": "2:28:11", "throughput": 19974.41, "total_tokens": 68129920} +{"current_steps": 21660, "total_steps": 78105, "loss": 0.3784, "lr": 4.536296502529605e-06, "epoch": 1.3865949683118879, "percentage": 27.73, "elapsed_time": "0:56:51", "remaining_time": "2:28:10", "throughput": 19974.96, "total_tokens": 68144832} +{"current_steps": 21665, "total_steps": 78105, "loss": 0.2829, "lr": 4.535972356548431e-06, "epoch": 1.3869150502528647, "percentage": 27.74, "elapsed_time": "0:56:52", "remaining_time": "2:28:09", "throughput": 19975.73, "total_tokens": 68161728} +{"current_steps": 21670, "total_steps": 78105, "loss": 0.3346, "lr": 4.535648108901403e-06, "epoch": 1.3872351321938416, "percentage": 27.74, "elapsed_time": "0:56:52", "remaining_time": "2:28:08", "throughput": 19976.38, "total_tokens": 68177280} +{"current_steps": 21675, "total_steps": 78105, "loss": 0.3269, "lr": 4.535323759604712e-06, "epoch": 1.3875552141348186, "percentage": 27.75, "elapsed_time": "0:56:53", "remaining_time": "2:28:07", "throughput": 19977.02, "total_tokens": 68192640} +{"current_steps": 21680, "total_steps": 78105, "loss": 0.2403, "lr": 4.534999308674553e-06, "epoch": 1.3878752960757954, "percentage": 27.76, "elapsed_time": "0:56:54", "remaining_time": "2:28:06", "throughput": 19977.75, "total_tokens": 68208960} +{"current_steps": 21685, "total_steps": 78105, "loss": 0.3585, "lr": 4.534674756127129e-06, "epoch": 1.3881953780167722, "percentage": 27.76, "elapsed_time": "0:56:54", "remaining_time": "2:28:04", "throughput": 19978.31, "total_tokens": 68224192} +{"current_steps": 21690, "total_steps": 78105, "loss": 0.231, "lr": 4.534350101978646e-06, "epoch": 1.3885154599577492, "percentage": 27.77, "elapsed_time": "0:56:55", "remaining_time": "2:28:03", "throughput": 19978.99, "total_tokens": 68240128} +{"current_steps": 21695, "total_steps": 78105, "loss": 0.2199, "lr": 4.5340253462453155e-06, "epoch": 1.3888355418987262, "percentage": 27.78, "elapsed_time": "0:56:56", "remaining_time": "2:28:02", "throughput": 19979.81, "total_tokens": 68257408} +{"current_steps": 21700, "total_steps": 78105, "loss": 0.2658, "lr": 4.533700488943354e-06, "epoch": 1.389155623839703, "percentage": 27.78, "elapsed_time": "0:56:57", "remaining_time": "2:28:01", "throughput": 19980.46, "total_tokens": 68273408} +{"current_steps": 21705, "total_steps": 78105, "loss": 0.3838, "lr": 4.533375530088983e-06, "epoch": 1.38947570578068, "percentage": 27.79, "elapsed_time": "0:56:57", "remaining_time": "2:28:00", "throughput": 19981.28, "total_tokens": 68290688} +{"current_steps": 21710, "total_steps": 78105, "loss": 0.2756, "lr": 4.533050469698429e-06, "epoch": 1.3897957877216567, "percentage": 27.8, "elapsed_time": "0:56:58", "remaining_time": "2:27:59", "throughput": 19981.95, "total_tokens": 68306560} +{"current_steps": 21715, "total_steps": 78105, "loss": 0.2777, "lr": 4.5327253077879255e-06, "epoch": 1.3901158696626337, "percentage": 27.8, "elapsed_time": "0:56:59", "remaining_time": "2:27:58", "throughput": 19982.47, "total_tokens": 68321472} +{"current_steps": 21720, "total_steps": 78105, "loss": 0.3467, "lr": 4.532400044373707e-06, "epoch": 1.3904359516036104, "percentage": 27.81, "elapsed_time": "0:56:59", "remaining_time": "2:27:57", "throughput": 19983.04, "total_tokens": 68336640} +{"current_steps": 21725, "total_steps": 78105, "loss": 0.4255, "lr": 4.532074679472017e-06, "epoch": 1.3907560335445874, "percentage": 27.82, "elapsed_time": "0:57:00", "remaining_time": "2:27:56", "throughput": 19983.94, "total_tokens": 68354880} +{"current_steps": 21730, "total_steps": 78105, "loss": 0.2911, "lr": 4.531749213099103e-06, "epoch": 1.3910761154855642, "percentage": 27.82, "elapsed_time": "0:57:01", "remaining_time": "2:27:55", "throughput": 19984.65, "total_tokens": 68370944} +{"current_steps": 21735, "total_steps": 78105, "loss": 0.414, "lr": 4.531423645271215e-06, "epoch": 1.3913961974265412, "percentage": 27.83, "elapsed_time": "0:57:01", "remaining_time": "2:27:54", "throughput": 19985.37, "total_tokens": 68387264} +{"current_steps": 21740, "total_steps": 78105, "loss": 0.3035, "lr": 4.531097976004613e-06, "epoch": 1.3917162793675182, "percentage": 27.83, "elapsed_time": "0:57:02", "remaining_time": "2:27:53", "throughput": 19986.14, "total_tokens": 68403968} +{"current_steps": 21745, "total_steps": 78105, "loss": 0.4344, "lr": 4.530772205315557e-06, "epoch": 1.392036361308495, "percentage": 27.84, "elapsed_time": "0:57:03", "remaining_time": "2:27:52", "throughput": 19986.73, "total_tokens": 68419200} +{"current_steps": 21750, "total_steps": 78105, "loss": 0.2462, "lr": 4.530446333220314e-06, "epoch": 1.392356443249472, "percentage": 27.85, "elapsed_time": "0:57:03", "remaining_time": "2:27:51", "throughput": 19987.38, "total_tokens": 68434816} +{"current_steps": 21755, "total_steps": 78105, "loss": 0.3194, "lr": 4.530120359735158e-06, "epoch": 1.3926765251904487, "percentage": 27.85, "elapsed_time": "0:57:04", "remaining_time": "2:27:50", "throughput": 19988.08, "total_tokens": 68451200} +{"current_steps": 21760, "total_steps": 78105, "loss": 0.2332, "lr": 4.529794284876367e-06, "epoch": 1.3929966071314257, "percentage": 27.86, "elapsed_time": "0:57:05", "remaining_time": "2:27:49", "throughput": 19988.88, "total_tokens": 68468480} +{"current_steps": 21765, "total_steps": 78105, "loss": 0.3941, "lr": 4.529468108660221e-06, "epoch": 1.3933166890724025, "percentage": 27.87, "elapsed_time": "0:57:06", "remaining_time": "2:27:48", "throughput": 19989.57, "total_tokens": 68484928} +{"current_steps": 21770, "total_steps": 78105, "loss": 0.4212, "lr": 4.529141831103009e-06, "epoch": 1.3936367710133795, "percentage": 27.87, "elapsed_time": "0:57:06", "remaining_time": "2:27:47", "throughput": 19990.08, "total_tokens": 68499648} +{"current_steps": 21775, "total_steps": 78105, "loss": 0.2763, "lr": 4.528815452221023e-06, "epoch": 1.3939568529543562, "percentage": 27.88, "elapsed_time": "0:57:07", "remaining_time": "2:27:46", "throughput": 19990.72, "total_tokens": 68515072} +{"current_steps": 21780, "total_steps": 78105, "loss": 0.3863, "lr": 4.528488972030561e-06, "epoch": 1.3942769348953332, "percentage": 27.89, "elapsed_time": "0:57:08", "remaining_time": "2:27:45", "throughput": 19991.36, "total_tokens": 68530688} +{"current_steps": 21785, "total_steps": 78105, "loss": 0.3287, "lr": 4.528162390547926e-06, "epoch": 1.3945970168363102, "percentage": 27.89, "elapsed_time": "0:57:08", "remaining_time": "2:27:44", "throughput": 19991.9, "total_tokens": 68545536} +{"current_steps": 21790, "total_steps": 78105, "loss": 0.3477, "lr": 4.527835707789426e-06, "epoch": 1.394917098777287, "percentage": 27.9, "elapsed_time": "0:57:09", "remaining_time": "2:27:42", "throughput": 19992.36, "total_tokens": 68559680} +{"current_steps": 21795, "total_steps": 78105, "loss": 0.3721, "lr": 4.527508923771373e-06, "epoch": 1.3952371807182637, "percentage": 27.9, "elapsed_time": "0:57:09", "remaining_time": "2:27:41", "throughput": 19992.93, "total_tokens": 68574784} +{"current_steps": 21800, "total_steps": 78105, "loss": 0.3482, "lr": 4.527182038510085e-06, "epoch": 1.3955572626592407, "percentage": 27.91, "elapsed_time": "0:57:10", "remaining_time": "2:27:40", "throughput": 19993.56, "total_tokens": 68590336} +{"current_steps": 21805, "total_steps": 78105, "loss": 0.3791, "lr": 4.5268550520218854e-06, "epoch": 1.3958773446002177, "percentage": 27.92, "elapsed_time": "0:57:11", "remaining_time": "2:27:39", "throughput": 19994.26, "total_tokens": 68606592} +{"current_steps": 21810, "total_steps": 78105, "loss": 0.2334, "lr": 4.526527964323102e-06, "epoch": 1.3961974265411945, "percentage": 27.92, "elapsed_time": "0:57:11", "remaining_time": "2:27:38", "throughput": 19994.77, "total_tokens": 68621376} +{"current_steps": 21815, "total_steps": 78105, "loss": 0.494, "lr": 4.526200775430068e-06, "epoch": 1.3965175084821715, "percentage": 27.93, "elapsed_time": "0:57:12", "remaining_time": "2:27:37", "throughput": 19995.57, "total_tokens": 68638464} +{"current_steps": 21820, "total_steps": 78105, "loss": 0.3872, "lr": 4.525873485359121e-06, "epoch": 1.3968375904231483, "percentage": 27.94, "elapsed_time": "0:57:13", "remaining_time": "2:27:36", "throughput": 19996.2, "total_tokens": 68653824} +{"current_steps": 21825, "total_steps": 78105, "loss": 0.3141, "lr": 4.525546094126606e-06, "epoch": 1.3971576723641252, "percentage": 27.94, "elapsed_time": "0:57:14", "remaining_time": "2:27:35", "throughput": 19996.92, "total_tokens": 68670144} +{"current_steps": 21830, "total_steps": 78105, "loss": 0.2614, "lr": 4.525218601748867e-06, "epoch": 1.3974777543051022, "percentage": 27.95, "elapsed_time": "0:57:14", "remaining_time": "2:27:34", "throughput": 19997.59, "total_tokens": 68685824} +{"current_steps": 21835, "total_steps": 78105, "loss": 0.3664, "lr": 4.5248910082422625e-06, "epoch": 1.397797836246079, "percentage": 27.96, "elapsed_time": "0:57:15", "remaining_time": "2:27:33", "throughput": 19998.19, "total_tokens": 68701120} +{"current_steps": 21840, "total_steps": 78105, "loss": 0.3722, "lr": 4.524563313623147e-06, "epoch": 1.3981179181870558, "percentage": 27.96, "elapsed_time": "0:57:16", "remaining_time": "2:27:32", "throughput": 19998.78, "total_tokens": 68716416} +{"current_steps": 21845, "total_steps": 78105, "loss": 0.3258, "lr": 4.524235517907885e-06, "epoch": 1.3984380001280328, "percentage": 27.97, "elapsed_time": "0:57:16", "remaining_time": "2:27:30", "throughput": 19999.35, "total_tokens": 68731648} +{"current_steps": 21850, "total_steps": 78105, "loss": 0.2361, "lr": 4.523907621112846e-06, "epoch": 1.3987580820690098, "percentage": 27.98, "elapsed_time": "0:57:17", "remaining_time": "2:27:29", "throughput": 19999.91, "total_tokens": 68746560} +{"current_steps": 21855, "total_steps": 78105, "loss": 0.2422, "lr": 4.523579623254403e-06, "epoch": 1.3990781640099865, "percentage": 27.98, "elapsed_time": "0:57:18", "remaining_time": "2:27:28", "throughput": 20000.85, "total_tokens": 68764352} +{"current_steps": 21860, "total_steps": 78105, "loss": 0.3823, "lr": 4.523251524348933e-06, "epoch": 1.3993982459509635, "percentage": 27.99, "elapsed_time": "0:57:18", "remaining_time": "2:27:27", "throughput": 20001.45, "total_tokens": 68779584} +{"current_steps": 21865, "total_steps": 78105, "loss": 0.5307, "lr": 4.522923324412821e-06, "epoch": 1.3997183278919403, "percentage": 27.99, "elapsed_time": "0:57:19", "remaining_time": "2:27:26", "throughput": 20002.08, "total_tokens": 68795584} +{"current_steps": 21870, "total_steps": 78105, "loss": 0.4641, "lr": 4.5225950234624545e-06, "epoch": 1.4000384098329173, "percentage": 28.0, "elapsed_time": "0:57:20", "remaining_time": "2:27:25", "throughput": 20002.76, "total_tokens": 68811648} +{"current_steps": 21875, "total_steps": 78105, "loss": 0.3552, "lr": 4.5222666215142284e-06, "epoch": 1.400358491773894, "percentage": 28.01, "elapsed_time": "0:57:20", "remaining_time": "2:27:24", "throughput": 20003.3, "total_tokens": 68826816} +{"current_steps": 21880, "total_steps": 78105, "loss": 0.4762, "lr": 4.521938118584541e-06, "epoch": 1.400678573714871, "percentage": 28.01, "elapsed_time": "0:57:21", "remaining_time": "2:27:23", "throughput": 20003.96, "total_tokens": 68842432} +{"current_steps": 21885, "total_steps": 78105, "loss": 0.3016, "lr": 4.521609514689796e-06, "epoch": 1.4009986556558478, "percentage": 28.02, "elapsed_time": "0:57:22", "remaining_time": "2:27:22", "throughput": 20004.75, "total_tokens": 68859456} +{"current_steps": 21890, "total_steps": 78105, "loss": 0.416, "lr": 4.5212808098464015e-06, "epoch": 1.4013187375968248, "percentage": 28.03, "elapsed_time": "0:57:22", "remaining_time": "2:27:21", "throughput": 20005.35, "total_tokens": 68874816} +{"current_steps": 21895, "total_steps": 78105, "loss": 0.2954, "lr": 4.5209520040707725e-06, "epoch": 1.4016388195378018, "percentage": 28.03, "elapsed_time": "0:57:23", "remaining_time": "2:27:20", "throughput": 20005.95, "total_tokens": 68890752} +{"current_steps": 21900, "total_steps": 78105, "loss": 0.428, "lr": 4.5206230973793266e-06, "epoch": 1.4019589014787786, "percentage": 28.04, "elapsed_time": "0:57:24", "remaining_time": "2:27:19", "throughput": 20006.57, "total_tokens": 68906304} +{"current_steps": 21905, "total_steps": 78105, "loss": 0.3513, "lr": 4.520294089788488e-06, "epoch": 1.4022789834197553, "percentage": 28.05, "elapsed_time": "0:57:24", "remaining_time": "2:27:18", "throughput": 20007.28, "total_tokens": 68922752} +{"current_steps": 21910, "total_steps": 78105, "loss": 0.3209, "lr": 4.519964981314686e-06, "epoch": 1.4025990653607323, "percentage": 28.05, "elapsed_time": "0:57:25", "remaining_time": "2:27:17", "throughput": 20007.92, "total_tokens": 68938624} +{"current_steps": 21915, "total_steps": 78105, "loss": 0.364, "lr": 4.519635771974355e-06, "epoch": 1.4029191473017093, "percentage": 28.06, "elapsed_time": "0:57:26", "remaining_time": "2:27:16", "throughput": 20008.63, "total_tokens": 68955008} +{"current_steps": 21920, "total_steps": 78105, "loss": 0.3084, "lr": 4.519306461783933e-06, "epoch": 1.403239229242686, "percentage": 28.06, "elapsed_time": "0:57:26", "remaining_time": "2:27:15", "throughput": 20009.39, "total_tokens": 68971840} +{"current_steps": 21925, "total_steps": 78105, "loss": 0.3228, "lr": 4.5189770507598655e-06, "epoch": 1.403559311183663, "percentage": 28.07, "elapsed_time": "0:57:27", "remaining_time": "2:27:14", "throughput": 20010.07, "total_tokens": 68987968} +{"current_steps": 21930, "total_steps": 78105, "loss": 0.484, "lr": 4.518647538918599e-06, "epoch": 1.4038793931246398, "percentage": 28.08, "elapsed_time": "0:57:28", "remaining_time": "2:27:13", "throughput": 20010.66, "total_tokens": 69003136} +{"current_steps": 21935, "total_steps": 78105, "loss": 0.4617, "lr": 4.51831792627659e-06, "epoch": 1.4041994750656168, "percentage": 28.08, "elapsed_time": "0:57:28", "remaining_time": "2:27:12", "throughput": 20011.34, "total_tokens": 69019072} +{"current_steps": 21940, "total_steps": 78105, "loss": 0.2423, "lr": 4.517988212850296e-06, "epoch": 1.4045195570065938, "percentage": 28.09, "elapsed_time": "0:57:29", "remaining_time": "2:27:10", "throughput": 20011.85, "total_tokens": 69033728} +{"current_steps": 21945, "total_steps": 78105, "loss": 0.294, "lr": 4.517658398656183e-06, "epoch": 1.4048396389475706, "percentage": 28.1, "elapsed_time": "0:57:30", "remaining_time": "2:27:09", "throughput": 20012.57, "total_tokens": 69049920} +{"current_steps": 21950, "total_steps": 78105, "loss": 0.3204, "lr": 4.517328483710719e-06, "epoch": 1.4051597208885473, "percentage": 28.1, "elapsed_time": "0:57:30", "remaining_time": "2:27:08", "throughput": 20013.12, "total_tokens": 69065280} +{"current_steps": 21955, "total_steps": 78105, "loss": 0.2705, "lr": 4.516998468030378e-06, "epoch": 1.4054798028295243, "percentage": 28.11, "elapsed_time": "0:57:31", "remaining_time": "2:27:07", "throughput": 20013.77, "total_tokens": 69080704} +{"current_steps": 21960, "total_steps": 78105, "loss": 0.3338, "lr": 4.5166683516316405e-06, "epoch": 1.4057998847705013, "percentage": 28.12, "elapsed_time": "0:57:32", "remaining_time": "2:27:06", "throughput": 20014.38, "total_tokens": 69096128} +{"current_steps": 21965, "total_steps": 78105, "loss": 0.3733, "lr": 4.516338134530989e-06, "epoch": 1.406119966711478, "percentage": 28.12, "elapsed_time": "0:57:32", "remaining_time": "2:27:05", "throughput": 20014.93, "total_tokens": 69110976} +{"current_steps": 21970, "total_steps": 78105, "loss": 0.3231, "lr": 4.516007816744914e-06, "epoch": 1.406440048652455, "percentage": 28.13, "elapsed_time": "0:57:33", "remaining_time": "2:27:04", "throughput": 20015.5, "total_tokens": 69126080} +{"current_steps": 21975, "total_steps": 78105, "loss": 0.2929, "lr": 4.515677398289909e-06, "epoch": 1.4067601305934319, "percentage": 28.14, "elapsed_time": "0:57:34", "remaining_time": "2:27:03", "throughput": 20016.32, "total_tokens": 69143552} +{"current_steps": 21980, "total_steps": 78105, "loss": 0.2825, "lr": 4.515346879182475e-06, "epoch": 1.4070802125344088, "percentage": 28.14, "elapsed_time": "0:57:35", "remaining_time": "2:27:02", "throughput": 20016.96, "total_tokens": 69159424} +{"current_steps": 21985, "total_steps": 78105, "loss": 0.348, "lr": 4.515016259439115e-06, "epoch": 1.4074002944753856, "percentage": 28.15, "elapsed_time": "0:57:35", "remaining_time": "2:27:01", "throughput": 20017.61, "total_tokens": 69175744} +{"current_steps": 21990, "total_steps": 78105, "loss": 0.376, "lr": 4.514685539076339e-06, "epoch": 1.4077203764163626, "percentage": 28.15, "elapsed_time": "0:57:36", "remaining_time": "2:27:00", "throughput": 20018.19, "total_tokens": 69190784} +{"current_steps": 21995, "total_steps": 78105, "loss": 0.3178, "lr": 4.514354718110662e-06, "epoch": 1.4080404583573394, "percentage": 28.16, "elapsed_time": "0:57:37", "remaining_time": "2:26:59", "throughput": 20018.71, "total_tokens": 69205888} +{"current_steps": 22000, "total_steps": 78105, "loss": 0.4727, "lr": 4.514023796558601e-06, "epoch": 1.4083605402983164, "percentage": 28.17, "elapsed_time": "0:57:37", "remaining_time": "2:26:57", "throughput": 20019.21, "total_tokens": 69220352} +{"current_steps": 22005, "total_steps": 78105, "loss": 0.2668, "lr": 4.5136927744366835e-06, "epoch": 1.4086806222392934, "percentage": 28.17, "elapsed_time": "0:57:38", "remaining_time": "2:26:56", "throughput": 20019.94, "total_tokens": 69236992} +{"current_steps": 22010, "total_steps": 78105, "loss": 0.2625, "lr": 4.5133616517614375e-06, "epoch": 1.4090007041802701, "percentage": 28.18, "elapsed_time": "0:57:39", "remaining_time": "2:26:55", "throughput": 20020.65, "total_tokens": 69253376} +{"current_steps": 22015, "total_steps": 78105, "loss": 0.3506, "lr": 4.513030428549398e-06, "epoch": 1.4093207861212471, "percentage": 28.19, "elapsed_time": "0:57:39", "remaining_time": "2:26:54", "throughput": 20021.18, "total_tokens": 69268480} +{"current_steps": 22020, "total_steps": 78105, "loss": 0.3796, "lr": 4.512699104817104e-06, "epoch": 1.4096408680622239, "percentage": 28.19, "elapsed_time": "0:57:40", "remaining_time": "2:26:53", "throughput": 20021.73, "total_tokens": 69283456} +{"current_steps": 22025, "total_steps": 78105, "loss": 0.5586, "lr": 4.512367680581101e-06, "epoch": 1.4099609500032009, "percentage": 28.2, "elapsed_time": "0:57:41", "remaining_time": "2:26:52", "throughput": 20022.28, "total_tokens": 69298432} +{"current_steps": 22030, "total_steps": 78105, "loss": 0.4369, "lr": 4.512036155857939e-06, "epoch": 1.4102810319441776, "percentage": 28.21, "elapsed_time": "0:57:41", "remaining_time": "2:26:51", "throughput": 20022.77, "total_tokens": 69312960} +{"current_steps": 22035, "total_steps": 78105, "loss": 0.248, "lr": 4.511704530664169e-06, "epoch": 1.4106011138851546, "percentage": 28.21, "elapsed_time": "0:57:42", "remaining_time": "2:26:50", "throughput": 20023.27, "total_tokens": 69327488} +{"current_steps": 22040, "total_steps": 78105, "loss": 0.227, "lr": 4.511372805016355e-06, "epoch": 1.4109211958261314, "percentage": 28.22, "elapsed_time": "0:57:43", "remaining_time": "2:26:49", "throughput": 20023.85, "total_tokens": 69342912} +{"current_steps": 22045, "total_steps": 78105, "loss": 0.2542, "lr": 4.51104097893106e-06, "epoch": 1.4112412777671084, "percentage": 28.22, "elapsed_time": "0:57:43", "remaining_time": "2:26:48", "throughput": 20024.44, "total_tokens": 69358592} +{"current_steps": 22050, "total_steps": 78105, "loss": 0.336, "lr": 4.510709052424854e-06, "epoch": 1.4115613597080854, "percentage": 28.23, "elapsed_time": "0:57:44", "remaining_time": "2:26:46", "throughput": 20025.0, "total_tokens": 69373632} +{"current_steps": 22055, "total_steps": 78105, "loss": 0.2679, "lr": 4.510377025514311e-06, "epoch": 1.4118814416490622, "percentage": 28.24, "elapsed_time": "0:57:45", "remaining_time": "2:26:46", "throughput": 20025.73, "total_tokens": 69390720} +{"current_steps": 22060, "total_steps": 78105, "loss": 0.3682, "lr": 4.51004489821601e-06, "epoch": 1.412201523590039, "percentage": 28.24, "elapsed_time": "0:57:45", "remaining_time": "2:26:45", "throughput": 20026.36, "total_tokens": 69406528} +{"current_steps": 22065, "total_steps": 78105, "loss": 0.4413, "lr": 4.509712670546538e-06, "epoch": 1.412521605531016, "percentage": 28.25, "elapsed_time": "0:57:46", "remaining_time": "2:26:43", "throughput": 20027.08, "total_tokens": 69422976} +{"current_steps": 22070, "total_steps": 78105, "loss": 0.2929, "lr": 4.509380342522483e-06, "epoch": 1.412841687471993, "percentage": 28.26, "elapsed_time": "0:57:47", "remaining_time": "2:26:43", "throughput": 20027.9, "total_tokens": 69440448} +{"current_steps": 22075, "total_steps": 78105, "loss": 0.4309, "lr": 4.50904791416044e-06, "epoch": 1.4131617694129697, "percentage": 28.26, "elapsed_time": "0:57:47", "remaining_time": "2:26:41", "throughput": 20028.48, "total_tokens": 69455744} +{"current_steps": 22080, "total_steps": 78105, "loss": 0.2869, "lr": 4.508715385477009e-06, "epoch": 1.4134818513539467, "percentage": 28.27, "elapsed_time": "0:57:48", "remaining_time": "2:26:40", "throughput": 20029.07, "total_tokens": 69471168} +{"current_steps": 22085, "total_steps": 78105, "loss": 0.4174, "lr": 4.508382756488795e-06, "epoch": 1.4138019332949234, "percentage": 28.28, "elapsed_time": "0:57:49", "remaining_time": "2:26:39", "throughput": 20029.72, "total_tokens": 69486976} +{"current_steps": 22090, "total_steps": 78105, "loss": 0.4435, "lr": 4.508050027212407e-06, "epoch": 1.4141220152359004, "percentage": 28.28, "elapsed_time": "0:57:49", "remaining_time": "2:26:38", "throughput": 20030.43, "total_tokens": 69503616} +{"current_steps": 22095, "total_steps": 78105, "loss": 0.2814, "lr": 4.507717197664461e-06, "epoch": 1.4144420971768774, "percentage": 28.29, "elapsed_time": "0:57:50", "remaining_time": "2:26:37", "throughput": 20031.08, "total_tokens": 69519744} +{"current_steps": 22100, "total_steps": 78105, "loss": 0.3086, "lr": 4.507384267861576e-06, "epoch": 1.4147621791178542, "percentage": 28.3, "elapsed_time": "0:57:51", "remaining_time": "2:26:36", "throughput": 20031.81, "total_tokens": 69536256} +{"current_steps": 22105, "total_steps": 78105, "loss": 0.4418, "lr": 4.507051237820375e-06, "epoch": 1.415082261058831, "percentage": 28.3, "elapsed_time": "0:57:51", "remaining_time": "2:26:35", "throughput": 20032.49, "total_tokens": 69552384} +{"current_steps": 22110, "total_steps": 78105, "loss": 0.2333, "lr": 4.506718107557491e-06, "epoch": 1.415402342999808, "percentage": 28.31, "elapsed_time": "0:57:52", "remaining_time": "2:26:34", "throughput": 20033.12, "total_tokens": 69568064} +{"current_steps": 22115, "total_steps": 78105, "loss": 0.4221, "lr": 4.506384877089557e-06, "epoch": 1.415722424940785, "percentage": 28.31, "elapsed_time": "0:57:53", "remaining_time": "2:26:33", "throughput": 20033.66, "total_tokens": 69583040} +{"current_steps": 22120, "total_steps": 78105, "loss": 0.5593, "lr": 4.506051546433213e-06, "epoch": 1.4160425068817617, "percentage": 28.32, "elapsed_time": "0:57:53", "remaining_time": "2:26:32", "throughput": 20034.17, "total_tokens": 69598016} +{"current_steps": 22125, "total_steps": 78105, "loss": 0.2703, "lr": 4.505718115605103e-06, "epoch": 1.4163625888227387, "percentage": 28.33, "elapsed_time": "0:57:54", "remaining_time": "2:26:31", "throughput": 20034.65, "total_tokens": 69612736} +{"current_steps": 22130, "total_steps": 78105, "loss": 0.2639, "lr": 4.505384584621879e-06, "epoch": 1.4166826707637155, "percentage": 28.33, "elapsed_time": "0:57:55", "remaining_time": "2:26:30", "throughput": 20035.27, "total_tokens": 69628544} +{"current_steps": 22135, "total_steps": 78105, "loss": 0.312, "lr": 4.505050953500194e-06, "epoch": 1.4170027527046924, "percentage": 28.34, "elapsed_time": "0:57:55", "remaining_time": "2:26:29", "throughput": 20035.86, "total_tokens": 69643904} +{"current_steps": 22140, "total_steps": 78105, "loss": 0.3547, "lr": 4.50471722225671e-06, "epoch": 1.4173228346456692, "percentage": 28.35, "elapsed_time": "0:57:56", "remaining_time": "2:26:28", "throughput": 20036.43, "total_tokens": 69659200} +{"current_steps": 22145, "total_steps": 78105, "loss": 0.3342, "lr": 4.504383390908088e-06, "epoch": 1.4176429165866462, "percentage": 28.35, "elapsed_time": "0:57:57", "remaining_time": "2:26:27", "throughput": 20037.26, "total_tokens": 69676800} +{"current_steps": 22150, "total_steps": 78105, "loss": 0.3124, "lr": 4.504049459471e-06, "epoch": 1.417962998527623, "percentage": 28.36, "elapsed_time": "0:57:58", "remaining_time": "2:26:26", "throughput": 20037.83, "total_tokens": 69692544} +{"current_steps": 22155, "total_steps": 78105, "loss": 0.2454, "lr": 4.503715427962121e-06, "epoch": 1.4182830804686, "percentage": 28.37, "elapsed_time": "0:57:58", "remaining_time": "2:26:25", "throughput": 20038.46, "total_tokens": 69708096} +{"current_steps": 22160, "total_steps": 78105, "loss": 0.361, "lr": 4.503381296398131e-06, "epoch": 1.418603162409577, "percentage": 28.37, "elapsed_time": "0:57:59", "remaining_time": "2:26:23", "throughput": 20038.97, "total_tokens": 69722944} +{"current_steps": 22165, "total_steps": 78105, "loss": 0.2242, "lr": 4.503047064795713e-06, "epoch": 1.4189232443505537, "percentage": 28.38, "elapsed_time": "0:58:00", "remaining_time": "2:26:22", "throughput": 20039.53, "total_tokens": 69738048} +{"current_steps": 22170, "total_steps": 78105, "loss": 0.4147, "lr": 4.50271273317156e-06, "epoch": 1.4192433262915305, "percentage": 28.38, "elapsed_time": "0:58:00", "remaining_time": "2:26:21", "throughput": 20040.16, "total_tokens": 69753728} +{"current_steps": 22175, "total_steps": 78105, "loss": 0.3756, "lr": 4.502378301542364e-06, "epoch": 1.4195634082325075, "percentage": 28.39, "elapsed_time": "0:58:01", "remaining_time": "2:26:20", "throughput": 20040.85, "total_tokens": 69769728} +{"current_steps": 22180, "total_steps": 78105, "loss": 0.3585, "lr": 4.502043769924825e-06, "epoch": 1.4198834901734845, "percentage": 28.4, "elapsed_time": "0:58:02", "remaining_time": "2:26:19", "throughput": 20041.34, "total_tokens": 69784448} +{"current_steps": 22185, "total_steps": 78105, "loss": 0.4064, "lr": 4.501709138335649e-06, "epoch": 1.4202035721144612, "percentage": 28.4, "elapsed_time": "0:58:02", "remaining_time": "2:26:18", "throughput": 20042.01, "total_tokens": 69800704} +{"current_steps": 22190, "total_steps": 78105, "loss": 0.4838, "lr": 4.501374406791546e-06, "epoch": 1.4205236540554382, "percentage": 28.41, "elapsed_time": "0:58:03", "remaining_time": "2:26:17", "throughput": 20042.68, "total_tokens": 69817152} +{"current_steps": 22195, "total_steps": 78105, "loss": 0.3046, "lr": 4.501039575309229e-06, "epoch": 1.420843735996415, "percentage": 28.42, "elapsed_time": "0:58:04", "remaining_time": "2:26:16", "throughput": 20043.32, "total_tokens": 69833088} +{"current_steps": 22200, "total_steps": 78105, "loss": 0.3752, "lr": 4.500704643905418e-06, "epoch": 1.421163817937392, "percentage": 28.42, "elapsed_time": "0:58:04", "remaining_time": "2:26:15", "throughput": 20043.82, "total_tokens": 69847936} +{"current_steps": 22205, "total_steps": 78105, "loss": 0.4038, "lr": 4.500369612596839e-06, "epoch": 1.421483899878369, "percentage": 28.43, "elapsed_time": "0:58:05", "remaining_time": "2:26:14", "throughput": 20044.64, "total_tokens": 69865408} +{"current_steps": 22210, "total_steps": 78105, "loss": 0.3607, "lr": 4.5000344814002215e-06, "epoch": 1.4218039818193458, "percentage": 28.44, "elapsed_time": "0:58:06", "remaining_time": "2:26:13", "throughput": 20045.27, "total_tokens": 69881216} +{"current_steps": 22215, "total_steps": 78105, "loss": 0.4551, "lr": 4.499699250332299e-06, "epoch": 1.4221240637603225, "percentage": 28.44, "elapsed_time": "0:58:06", "remaining_time": "2:26:12", "throughput": 20046.21, "total_tokens": 69899520} +{"current_steps": 22220, "total_steps": 78105, "loss": 0.3379, "lr": 4.499363919409813e-06, "epoch": 1.4224441457012995, "percentage": 28.45, "elapsed_time": "0:58:07", "remaining_time": "2:26:11", "throughput": 20046.89, "total_tokens": 69915904} +{"current_steps": 22225, "total_steps": 78105, "loss": 0.2545, "lr": 4.499028488649506e-06, "epoch": 1.4227642276422765, "percentage": 28.46, "elapsed_time": "0:58:08", "remaining_time": "2:26:10", "throughput": 20047.6, "total_tokens": 69932416} +{"current_steps": 22230, "total_steps": 78105, "loss": 0.3255, "lr": 4.49869295806813e-06, "epoch": 1.4230843095832533, "percentage": 28.46, "elapsed_time": "0:58:08", "remaining_time": "2:26:09", "throughput": 20048.11, "total_tokens": 69947328} +{"current_steps": 22235, "total_steps": 78105, "loss": 0.4659, "lr": 4.498357327682437e-06, "epoch": 1.4234043915242303, "percentage": 28.47, "elapsed_time": "0:58:09", "remaining_time": "2:26:08", "throughput": 20048.63, "total_tokens": 69962048} +{"current_steps": 22240, "total_steps": 78105, "loss": 0.346, "lr": 4.49802159750919e-06, "epoch": 1.423724473465207, "percentage": 28.47, "elapsed_time": "0:58:10", "remaining_time": "2:26:07", "throughput": 20049.09, "total_tokens": 69976320} +{"current_steps": 22245, "total_steps": 78105, "loss": 0.2972, "lr": 4.497685767565151e-06, "epoch": 1.424044555406184, "percentage": 28.48, "elapsed_time": "0:58:10", "remaining_time": "2:26:06", "throughput": 20049.67, "total_tokens": 69991744} +{"current_steps": 22250, "total_steps": 78105, "loss": 0.3883, "lr": 4.49734983786709e-06, "epoch": 1.424364637347161, "percentage": 28.49, "elapsed_time": "0:58:11", "remaining_time": "2:26:05", "throughput": 20050.24, "total_tokens": 70006912} +{"current_steps": 22255, "total_steps": 78105, "loss": 0.3055, "lr": 4.497013808431781e-06, "epoch": 1.4246847192881378, "percentage": 28.49, "elapsed_time": "0:58:12", "remaining_time": "2:26:04", "throughput": 20050.93, "total_tokens": 70023360} +{"current_steps": 22260, "total_steps": 78105, "loss": 0.272, "lr": 4.496677679276006e-06, "epoch": 1.4250048012291145, "percentage": 28.5, "elapsed_time": "0:58:12", "remaining_time": "2:26:02", "throughput": 20051.55, "total_tokens": 70039232} +{"current_steps": 22265, "total_steps": 78105, "loss": 0.3403, "lr": 4.496341450416548e-06, "epoch": 1.4253248831700915, "percentage": 28.51, "elapsed_time": "0:58:13", "remaining_time": "2:26:01", "throughput": 20052.2, "total_tokens": 70055552} +{"current_steps": 22270, "total_steps": 78105, "loss": 0.4385, "lr": 4.496005121870196e-06, "epoch": 1.4256449651110685, "percentage": 28.51, "elapsed_time": "0:58:14", "remaining_time": "2:26:00", "throughput": 20052.82, "total_tokens": 70071360} +{"current_steps": 22275, "total_steps": 78105, "loss": 0.3796, "lr": 4.495668693653745e-06, "epoch": 1.4259650470520453, "percentage": 28.52, "elapsed_time": "0:58:15", "remaining_time": "2:25:59", "throughput": 20053.36, "total_tokens": 70086720} +{"current_steps": 22280, "total_steps": 78105, "loss": 0.3569, "lr": 4.495332165783995e-06, "epoch": 1.4262851289930223, "percentage": 28.53, "elapsed_time": "0:58:15", "remaining_time": "2:25:58", "throughput": 20053.96, "total_tokens": 70102336} +{"current_steps": 22285, "total_steps": 78105, "loss": 0.4547, "lr": 4.494995538277749e-06, "epoch": 1.426605210933999, "percentage": 28.53, "elapsed_time": "0:58:16", "remaining_time": "2:25:57", "throughput": 20054.5, "total_tokens": 70117504} +{"current_steps": 22290, "total_steps": 78105, "loss": 0.258, "lr": 4.494658811151817e-06, "epoch": 1.426925292874976, "percentage": 28.54, "elapsed_time": "0:58:17", "remaining_time": "2:25:56", "throughput": 20055.05, "total_tokens": 70133120} +{"current_steps": 22295, "total_steps": 78105, "loss": 0.2478, "lr": 4.4943219844230155e-06, "epoch": 1.4272453748159528, "percentage": 28.54, "elapsed_time": "0:58:17", "remaining_time": "2:25:55", "throughput": 20055.69, "total_tokens": 70149120} +{"current_steps": 22300, "total_steps": 78105, "loss": 0.3174, "lr": 4.493985058108161e-06, "epoch": 1.4275654567569298, "percentage": 28.55, "elapsed_time": "0:58:18", "remaining_time": "2:25:54", "throughput": 20056.34, "total_tokens": 70165056} +{"current_steps": 22305, "total_steps": 78105, "loss": 0.2791, "lr": 4.493648032224079e-06, "epoch": 1.4278855386979066, "percentage": 28.56, "elapsed_time": "0:58:19", "remaining_time": "2:25:53", "throughput": 20056.86, "total_tokens": 70180032} +{"current_steps": 22310, "total_steps": 78105, "loss": 0.341, "lr": 4.493310906787599e-06, "epoch": 1.4282056206388836, "percentage": 28.56, "elapsed_time": "0:58:19", "remaining_time": "2:25:52", "throughput": 20057.45, "total_tokens": 70195456} +{"current_steps": 22315, "total_steps": 78105, "loss": 0.2998, "lr": 4.492973681815555e-06, "epoch": 1.4285257025798606, "percentage": 28.57, "elapsed_time": "0:58:20", "remaining_time": "2:25:51", "throughput": 20058.07, "total_tokens": 70211264} +{"current_steps": 22320, "total_steps": 78105, "loss": 0.3334, "lr": 4.492636357324787e-06, "epoch": 1.4288457845208373, "percentage": 28.58, "elapsed_time": "0:58:21", "remaining_time": "2:25:50", "throughput": 20058.61, "total_tokens": 70226176} +{"current_steps": 22325, "total_steps": 78105, "loss": 0.2868, "lr": 4.492298933332138e-06, "epoch": 1.429165866461814, "percentage": 28.58, "elapsed_time": "0:58:21", "remaining_time": "2:25:49", "throughput": 20059.27, "total_tokens": 70242432} +{"current_steps": 22330, "total_steps": 78105, "loss": 0.3106, "lr": 4.4919614098544585e-06, "epoch": 1.429485948402791, "percentage": 28.59, "elapsed_time": "0:58:22", "remaining_time": "2:25:48", "throughput": 20059.92, "total_tokens": 70258560} +{"current_steps": 22335, "total_steps": 78105, "loss": 0.333, "lr": 4.491623786908601e-06, "epoch": 1.429806030343768, "percentage": 28.6, "elapsed_time": "0:58:23", "remaining_time": "2:25:47", "throughput": 20060.63, "total_tokens": 70274944} +{"current_steps": 22340, "total_steps": 78105, "loss": 0.3755, "lr": 4.491286064511426e-06, "epoch": 1.4301261122847448, "percentage": 28.6, "elapsed_time": "0:58:23", "remaining_time": "2:25:46", "throughput": 20061.21, "total_tokens": 70290560} +{"current_steps": 22345, "total_steps": 78105, "loss": 0.283, "lr": 4.490948242679796e-06, "epoch": 1.4304461942257218, "percentage": 28.61, "elapsed_time": "0:58:24", "remaining_time": "2:25:45", "throughput": 20061.89, "total_tokens": 70306880} +{"current_steps": 22350, "total_steps": 78105, "loss": 0.2891, "lr": 4.490610321430583e-06, "epoch": 1.4307662761666986, "percentage": 28.62, "elapsed_time": "0:58:25", "remaining_time": "2:25:44", "throughput": 20062.52, "total_tokens": 70322496} +{"current_steps": 22355, "total_steps": 78105, "loss": 0.2829, "lr": 4.4902723007806585e-06, "epoch": 1.4310863581076756, "percentage": 28.62, "elapsed_time": "0:58:25", "remaining_time": "2:25:43", "throughput": 20063.12, "total_tokens": 70338496} +{"current_steps": 22360, "total_steps": 78105, "loss": 0.2031, "lr": 4.4899341807469025e-06, "epoch": 1.4314064400486526, "percentage": 28.63, "elapsed_time": "0:58:26", "remaining_time": "2:25:42", "throughput": 20063.77, "total_tokens": 70354176} +{"current_steps": 22365, "total_steps": 78105, "loss": 0.4652, "lr": 4.489595961346198e-06, "epoch": 1.4317265219896294, "percentage": 28.63, "elapsed_time": "0:58:27", "remaining_time": "2:25:40", "throughput": 20064.46, "total_tokens": 70370304} +{"current_steps": 22370, "total_steps": 78105, "loss": 0.257, "lr": 4.489257642595436e-06, "epoch": 1.4320466039306061, "percentage": 28.64, "elapsed_time": "0:58:27", "remaining_time": "2:25:39", "throughput": 20065.06, "total_tokens": 70385856} +{"current_steps": 22375, "total_steps": 78105, "loss": 0.299, "lr": 4.488919224511508e-06, "epoch": 1.4323666858715831, "percentage": 28.65, "elapsed_time": "0:58:28", "remaining_time": "2:25:38", "throughput": 20065.64, "total_tokens": 70401344} +{"current_steps": 22380, "total_steps": 78105, "loss": 0.3748, "lr": 4.488580707111314e-06, "epoch": 1.43268676781256, "percentage": 28.65, "elapsed_time": "0:58:29", "remaining_time": "2:25:37", "throughput": 20066.19, "total_tokens": 70416384} +{"current_steps": 22385, "total_steps": 78105, "loss": 0.3671, "lr": 4.488242090411758e-06, "epoch": 1.4330068497535369, "percentage": 28.66, "elapsed_time": "0:58:29", "remaining_time": "2:25:36", "throughput": 20066.76, "total_tokens": 70431552} +{"current_steps": 22390, "total_steps": 78105, "loss": 0.3749, "lr": 4.487903374429748e-06, "epoch": 1.4333269316945139, "percentage": 28.67, "elapsed_time": "0:58:30", "remaining_time": "2:25:35", "throughput": 20067.49, "total_tokens": 70448192} +{"current_steps": 22395, "total_steps": 78105, "loss": 0.3445, "lr": 4.487564559182197e-06, "epoch": 1.4336470136354906, "percentage": 28.67, "elapsed_time": "0:58:31", "remaining_time": "2:25:34", "throughput": 20068.14, "total_tokens": 70464128} +{"current_steps": 22400, "total_steps": 78105, "loss": 0.3425, "lr": 4.487225644686027e-06, "epoch": 1.4339670955764676, "percentage": 28.68, "elapsed_time": "0:58:31", "remaining_time": "2:25:33", "throughput": 20068.8, "total_tokens": 70480192} +{"current_steps": 22405, "total_steps": 78105, "loss": 0.3538, "lr": 4.486886630958157e-06, "epoch": 1.4342871775174444, "percentage": 28.69, "elapsed_time": "0:58:32", "remaining_time": "2:25:32", "throughput": 20069.35, "total_tokens": 70495360} +{"current_steps": 22410, "total_steps": 78105, "loss": 0.3122, "lr": 4.486547518015519e-06, "epoch": 1.4346072594584214, "percentage": 28.69, "elapsed_time": "0:58:33", "remaining_time": "2:25:31", "throughput": 20069.95, "total_tokens": 70511232} +{"current_steps": 22415, "total_steps": 78105, "loss": 0.2743, "lr": 4.486208305875046e-06, "epoch": 1.4349273413993981, "percentage": 28.7, "elapsed_time": "0:58:33", "remaining_time": "2:25:30", "throughput": 20070.65, "total_tokens": 70527488} +{"current_steps": 22420, "total_steps": 78105, "loss": 0.3068, "lr": 4.4858689945536755e-06, "epoch": 1.4352474233403751, "percentage": 28.7, "elapsed_time": "0:58:34", "remaining_time": "2:25:29", "throughput": 20071.23, "total_tokens": 70542656} +{"current_steps": 22425, "total_steps": 78105, "loss": 0.2907, "lr": 4.4855295840683516e-06, "epoch": 1.4355675052813521, "percentage": 28.71, "elapsed_time": "0:58:35", "remaining_time": "2:25:28", "throughput": 20071.75, "total_tokens": 70557568} +{"current_steps": 22430, "total_steps": 78105, "loss": 0.2776, "lr": 4.485190074436022e-06, "epoch": 1.435887587222329, "percentage": 28.72, "elapsed_time": "0:58:35", "remaining_time": "2:25:27", "throughput": 20072.53, "total_tokens": 70574592} +{"current_steps": 22435, "total_steps": 78105, "loss": 0.2727, "lr": 4.484850465673641e-06, "epoch": 1.4362076691633059, "percentage": 28.72, "elapsed_time": "0:58:36", "remaining_time": "2:25:26", "throughput": 20073.2, "total_tokens": 70590784} +{"current_steps": 22440, "total_steps": 78105, "loss": 0.2752, "lr": 4.4845107577981676e-06, "epoch": 1.4365277511042827, "percentage": 28.73, "elapsed_time": "0:58:37", "remaining_time": "2:25:25", "throughput": 20073.63, "total_tokens": 70605248} +{"current_steps": 22445, "total_steps": 78105, "loss": 0.4739, "lr": 4.484170950826562e-06, "epoch": 1.4368478330452596, "percentage": 28.74, "elapsed_time": "0:58:38", "remaining_time": "2:25:24", "throughput": 20074.38, "total_tokens": 70622208} +{"current_steps": 22450, "total_steps": 78105, "loss": 0.3522, "lr": 4.483831044775795e-06, "epoch": 1.4371679149862364, "percentage": 28.74, "elapsed_time": "0:58:38", "remaining_time": "2:25:23", "throughput": 20075.18, "total_tokens": 70639680} +{"current_steps": 22455, "total_steps": 78105, "loss": 0.3858, "lr": 4.4834910396628394e-06, "epoch": 1.4374879969272134, "percentage": 28.75, "elapsed_time": "0:58:39", "remaining_time": "2:25:22", "throughput": 20075.8, "total_tokens": 70655296} +{"current_steps": 22460, "total_steps": 78105, "loss": 0.3428, "lr": 4.483150935504672e-06, "epoch": 1.4378080788681902, "percentage": 28.76, "elapsed_time": "0:58:40", "remaining_time": "2:25:21", "throughput": 20076.46, "total_tokens": 70671424} +{"current_steps": 22465, "total_steps": 78105, "loss": 0.3458, "lr": 4.482810732318278e-06, "epoch": 1.4381281608091672, "percentage": 28.76, "elapsed_time": "0:58:40", "remaining_time": "2:25:20", "throughput": 20077.02, "total_tokens": 70686528} +{"current_steps": 22470, "total_steps": 78105, "loss": 0.4523, "lr": 4.482470430120643e-06, "epoch": 1.4384482427501442, "percentage": 28.77, "elapsed_time": "0:58:41", "remaining_time": "2:25:19", "throughput": 20077.74, "total_tokens": 70703360} +{"current_steps": 22475, "total_steps": 78105, "loss": 0.4637, "lr": 4.482130028928761e-06, "epoch": 1.438768324691121, "percentage": 28.78, "elapsed_time": "0:58:42", "remaining_time": "2:25:17", "throughput": 20078.34, "total_tokens": 70718784} +{"current_steps": 22480, "total_steps": 78105, "loss": 0.3014, "lr": 4.481789528759631e-06, "epoch": 1.4390884066320977, "percentage": 28.78, "elapsed_time": "0:58:42", "remaining_time": "2:25:16", "throughput": 20078.98, "total_tokens": 70734976} +{"current_steps": 22485, "total_steps": 78105, "loss": 0.2595, "lr": 4.481448929630254e-06, "epoch": 1.4394084885730747, "percentage": 28.79, "elapsed_time": "0:58:43", "remaining_time": "2:25:16", "throughput": 20079.75, "total_tokens": 70752064} +{"current_steps": 22490, "total_steps": 78105, "loss": 0.3057, "lr": 4.481108231557639e-06, "epoch": 1.4397285705140517, "percentage": 28.79, "elapsed_time": "0:58:44", "remaining_time": "2:25:14", "throughput": 20080.26, "total_tokens": 70767040} +{"current_steps": 22495, "total_steps": 78105, "loss": 0.3347, "lr": 4.480767434558797e-06, "epoch": 1.4400486524550284, "percentage": 28.8, "elapsed_time": "0:58:44", "remaining_time": "2:25:13", "throughput": 20080.8, "total_tokens": 70782272} +{"current_steps": 22500, "total_steps": 78105, "loss": 0.3547, "lr": 4.480426538650747e-06, "epoch": 1.4403687343960054, "percentage": 28.81, "elapsed_time": "0:58:45", "remaining_time": "2:25:12", "throughput": 20081.4, "total_tokens": 70797760} +{"current_steps": 22505, "total_steps": 78105, "loss": 0.1942, "lr": 4.480085543850512e-06, "epoch": 1.4406888163369822, "percentage": 28.81, "elapsed_time": "0:58:46", "remaining_time": "2:25:11", "throughput": 20082.05, "total_tokens": 70813440} +{"current_steps": 22510, "total_steps": 78105, "loss": 0.3961, "lr": 4.479744450175119e-06, "epoch": 1.4410088982779592, "percentage": 28.82, "elapsed_time": "0:58:46", "remaining_time": "2:25:10", "throughput": 20082.64, "total_tokens": 70829248} +{"current_steps": 22515, "total_steps": 78105, "loss": 0.4574, "lr": 4.4794032576416005e-06, "epoch": 1.4413289802189362, "percentage": 28.83, "elapsed_time": "0:58:47", "remaining_time": "2:25:09", "throughput": 20083.22, "total_tokens": 70844800} +{"current_steps": 22520, "total_steps": 78105, "loss": 0.3219, "lr": 4.479061966266992e-06, "epoch": 1.441649062159913, "percentage": 28.83, "elapsed_time": "0:58:48", "remaining_time": "2:25:08", "throughput": 20083.89, "total_tokens": 70861056} +{"current_steps": 22525, "total_steps": 78105, "loss": 0.3847, "lr": 4.478720576068339e-06, "epoch": 1.4419691441008897, "percentage": 28.84, "elapsed_time": "0:58:48", "remaining_time": "2:25:07", "throughput": 20084.45, "total_tokens": 70876480} +{"current_steps": 22530, "total_steps": 78105, "loss": 0.2795, "lr": 4.478379087062687e-06, "epoch": 1.4422892260418667, "percentage": 28.85, "elapsed_time": "0:58:49", "remaining_time": "2:25:06", "throughput": 20085.0, "total_tokens": 70891840} +{"current_steps": 22535, "total_steps": 78105, "loss": 0.4677, "lr": 4.478037499267087e-06, "epoch": 1.4426093079828437, "percentage": 28.85, "elapsed_time": "0:58:50", "remaining_time": "2:25:05", "throughput": 20085.61, "total_tokens": 70907584} +{"current_steps": 22540, "total_steps": 78105, "loss": 0.3079, "lr": 4.477695812698599e-06, "epoch": 1.4429293899238205, "percentage": 28.86, "elapsed_time": "0:58:50", "remaining_time": "2:25:04", "throughput": 20086.1, "total_tokens": 70922496} +{"current_steps": 22545, "total_steps": 78105, "loss": 0.3876, "lr": 4.477354027374283e-06, "epoch": 1.4432494718647975, "percentage": 28.86, "elapsed_time": "0:58:51", "remaining_time": "2:25:03", "throughput": 20086.74, "total_tokens": 70938304} +{"current_steps": 22550, "total_steps": 78105, "loss": 0.423, "lr": 4.477012143311207e-06, "epoch": 1.4435695538057742, "percentage": 28.87, "elapsed_time": "0:58:52", "remaining_time": "2:25:02", "throughput": 20087.3, "total_tokens": 70953600} +{"current_steps": 22555, "total_steps": 78105, "loss": 0.3551, "lr": 4.476670160526442e-06, "epoch": 1.4438896357467512, "percentage": 28.88, "elapsed_time": "0:58:52", "remaining_time": "2:25:01", "throughput": 20088.0, "total_tokens": 70970240} +{"current_steps": 22560, "total_steps": 78105, "loss": 0.3449, "lr": 4.476328079037065e-06, "epoch": 1.444209717687728, "percentage": 28.88, "elapsed_time": "0:58:53", "remaining_time": "2:25:00", "throughput": 20088.44, "total_tokens": 70984512} +{"current_steps": 22565, "total_steps": 78105, "loss": 0.3704, "lr": 4.475985898860158e-06, "epoch": 1.444529799628705, "percentage": 28.89, "elapsed_time": "0:58:54", "remaining_time": "2:24:58", "throughput": 20088.93, "total_tokens": 70998976} +{"current_steps": 22570, "total_steps": 78105, "loss": 0.3395, "lr": 4.475643620012808e-06, "epoch": 1.4448498815696817, "percentage": 28.9, "elapsed_time": "0:58:54", "remaining_time": "2:24:57", "throughput": 20089.56, "total_tokens": 71014976} +{"current_steps": 22575, "total_steps": 78105, "loss": 0.3526, "lr": 4.475301242512107e-06, "epoch": 1.4451699635106587, "percentage": 28.9, "elapsed_time": "0:58:55", "remaining_time": "2:24:56", "throughput": 20090.14, "total_tokens": 71030464} +{"current_steps": 22580, "total_steps": 78105, "loss": 0.3768, "lr": 4.47495876637515e-06, "epoch": 1.4454900454516357, "percentage": 28.91, "elapsed_time": "0:58:56", "remaining_time": "2:24:55", "throughput": 20090.66, "total_tokens": 71045888} +{"current_steps": 22585, "total_steps": 78105, "loss": 0.4105, "lr": 4.474616191619041e-06, "epoch": 1.4458101273926125, "percentage": 28.92, "elapsed_time": "0:58:56", "remaining_time": "2:24:54", "throughput": 20091.21, "total_tokens": 71061248} +{"current_steps": 22590, "total_steps": 78105, "loss": 0.3056, "lr": 4.474273518260883e-06, "epoch": 1.4461302093335893, "percentage": 28.92, "elapsed_time": "0:58:57", "remaining_time": "2:24:53", "throughput": 20091.71, "total_tokens": 71076096} +{"current_steps": 22595, "total_steps": 78105, "loss": 0.2822, "lr": 4.47393074631779e-06, "epoch": 1.4464502912745663, "percentage": 28.93, "elapsed_time": "0:58:58", "remaining_time": "2:24:52", "throughput": 20092.29, "total_tokens": 71091776} +{"current_steps": 22600, "total_steps": 78105, "loss": 0.3365, "lr": 4.473587875806878e-06, "epoch": 1.4467703732155432, "percentage": 28.94, "elapsed_time": "0:58:58", "remaining_time": "2:24:51", "throughput": 20092.97, "total_tokens": 71108096} +{"current_steps": 22605, "total_steps": 78105, "loss": 0.3015, "lr": 4.473244906745265e-06, "epoch": 1.44709045515652, "percentage": 28.94, "elapsed_time": "0:58:59", "remaining_time": "2:24:50", "throughput": 20093.55, "total_tokens": 71123584} +{"current_steps": 22610, "total_steps": 78105, "loss": 0.3567, "lr": 4.472901839150082e-06, "epoch": 1.447410537097497, "percentage": 28.95, "elapsed_time": "0:59:00", "remaining_time": "2:24:49", "throughput": 20094.29, "total_tokens": 71140352} +{"current_steps": 22615, "total_steps": 78105, "loss": 0.4354, "lr": 4.472558673038457e-06, "epoch": 1.4477306190384738, "percentage": 28.95, "elapsed_time": "0:59:01", "remaining_time": "2:24:48", "throughput": 20095.02, "total_tokens": 71157376} +{"current_steps": 22620, "total_steps": 78105, "loss": 0.3941, "lr": 4.472215408427526e-06, "epoch": 1.4480507009794508, "percentage": 28.96, "elapsed_time": "0:59:01", "remaining_time": "2:24:47", "throughput": 20095.6, "total_tokens": 71173120} +{"current_steps": 22625, "total_steps": 78105, "loss": 0.4154, "lr": 4.471872045334431e-06, "epoch": 1.4483707829204278, "percentage": 28.97, "elapsed_time": "0:59:02", "remaining_time": "2:24:46", "throughput": 20096.28, "total_tokens": 71189632} +{"current_steps": 22630, "total_steps": 78105, "loss": 0.4497, "lr": 4.471528583776317e-06, "epoch": 1.4486908648614045, "percentage": 28.97, "elapsed_time": "0:59:03", "remaining_time": "2:24:45", "throughput": 20096.92, "total_tokens": 71205952} +{"current_steps": 22635, "total_steps": 78105, "loss": 0.5939, "lr": 4.471185023770335e-06, "epoch": 1.4490109468023813, "percentage": 28.98, "elapsed_time": "0:59:03", "remaining_time": "2:24:44", "throughput": 20097.51, "total_tokens": 71221696} +{"current_steps": 22640, "total_steps": 78105, "loss": 0.3606, "lr": 4.47084136533364e-06, "epoch": 1.4493310287433583, "percentage": 28.99, "elapsed_time": "0:59:04", "remaining_time": "2:24:43", "throughput": 20098.02, "total_tokens": 71236352} +{"current_steps": 22645, "total_steps": 78105, "loss": 0.3271, "lr": 4.470497608483393e-06, "epoch": 1.4496511106843353, "percentage": 28.99, "elapsed_time": "0:59:05", "remaining_time": "2:24:42", "throughput": 20098.51, "total_tokens": 71251136} +{"current_steps": 22650, "total_steps": 78105, "loss": 0.3462, "lr": 4.47015375323676e-06, "epoch": 1.449971192625312, "percentage": 29.0, "elapsed_time": "0:59:05", "remaining_time": "2:24:41", "throughput": 20099.13, "total_tokens": 71267008} +{"current_steps": 22655, "total_steps": 78105, "loss": 0.5195, "lr": 4.46980979961091e-06, "epoch": 1.450291274566289, "percentage": 29.01, "elapsed_time": "0:59:06", "remaining_time": "2:24:40", "throughput": 20099.59, "total_tokens": 71281536} +{"current_steps": 22660, "total_steps": 78105, "loss": 0.4385, "lr": 4.4694657476230195e-06, "epoch": 1.4506113565072658, "percentage": 29.01, "elapsed_time": "0:59:07", "remaining_time": "2:24:39", "throughput": 20100.17, "total_tokens": 71296768} +{"current_steps": 22665, "total_steps": 78105, "loss": 0.346, "lr": 4.469121597290268e-06, "epoch": 1.4509314384482428, "percentage": 29.02, "elapsed_time": "0:59:07", "remaining_time": "2:24:38", "throughput": 20100.83, "total_tokens": 71313216} +{"current_steps": 22670, "total_steps": 78105, "loss": 0.3773, "lr": 4.46877734862984e-06, "epoch": 1.4512515203892196, "percentage": 29.03, "elapsed_time": "0:59:08", "remaining_time": "2:24:37", "throughput": 20101.39, "total_tokens": 71328640} +{"current_steps": 22675, "total_steps": 78105, "loss": 0.4061, "lr": 4.468433001658927e-06, "epoch": 1.4515716023301966, "percentage": 29.03, "elapsed_time": "0:59:09", "remaining_time": "2:24:35", "throughput": 20101.95, "total_tokens": 71344000} +{"current_steps": 22680, "total_steps": 78105, "loss": 0.2994, "lr": 4.468088556394723e-06, "epoch": 1.4518916842711733, "percentage": 29.04, "elapsed_time": "0:59:09", "remaining_time": "2:24:34", "throughput": 20102.64, "total_tokens": 71360768} +{"current_steps": 22685, "total_steps": 78105, "loss": 0.31, "lr": 4.467744012854428e-06, "epoch": 1.4522117662121503, "percentage": 29.04, "elapsed_time": "0:59:10", "remaining_time": "2:24:33", "throughput": 20103.23, "total_tokens": 71376576} +{"current_steps": 22690, "total_steps": 78105, "loss": 0.4804, "lr": 4.467399371055246e-06, "epoch": 1.4525318481531273, "percentage": 29.05, "elapsed_time": "0:59:11", "remaining_time": "2:24:32", "throughput": 20103.69, "total_tokens": 71391488} +{"current_steps": 22695, "total_steps": 78105, "loss": 0.4239, "lr": 4.467054631014388e-06, "epoch": 1.452851930094104, "percentage": 29.06, "elapsed_time": "0:59:11", "remaining_time": "2:24:31", "throughput": 20104.25, "total_tokens": 71406912} +{"current_steps": 22700, "total_steps": 78105, "loss": 0.3706, "lr": 4.4667097927490675e-06, "epoch": 1.453172012035081, "percentage": 29.06, "elapsed_time": "0:59:12", "remaining_time": "2:24:30", "throughput": 20104.8, "total_tokens": 71422144} +{"current_steps": 22705, "total_steps": 78105, "loss": 0.4073, "lr": 4.4663648562765034e-06, "epoch": 1.4534920939760578, "percentage": 29.07, "elapsed_time": "0:59:13", "remaining_time": "2:24:29", "throughput": 20105.42, "total_tokens": 71438080} +{"current_steps": 22710, "total_steps": 78105, "loss": 0.3904, "lr": 4.466019821613923e-06, "epoch": 1.4538121759170348, "percentage": 29.08, "elapsed_time": "0:59:13", "remaining_time": "2:24:28", "throughput": 20105.95, "total_tokens": 71453312} +{"current_steps": 22715, "total_steps": 78105, "loss": 0.302, "lr": 4.46567468877855e-06, "epoch": 1.4541322578580116, "percentage": 29.08, "elapsed_time": "0:59:14", "remaining_time": "2:24:27", "throughput": 20106.43, "total_tokens": 71467776} +{"current_steps": 22720, "total_steps": 78105, "loss": 0.2381, "lr": 4.4653294577876245e-06, "epoch": 1.4544523397989886, "percentage": 29.09, "elapsed_time": "0:59:15", "remaining_time": "2:24:26", "throughput": 20106.89, "total_tokens": 71482496} +{"current_steps": 22725, "total_steps": 78105, "loss": 0.3746, "lr": 4.464984128658382e-06, "epoch": 1.4547724217399653, "percentage": 29.1, "elapsed_time": "0:59:15", "remaining_time": "2:24:25", "throughput": 20107.35, "total_tokens": 71496960} +{"current_steps": 22730, "total_steps": 78105, "loss": 0.4394, "lr": 4.4646387014080675e-06, "epoch": 1.4550925036809423, "percentage": 29.1, "elapsed_time": "0:59:16", "remaining_time": "2:24:24", "throughput": 20107.93, "total_tokens": 71512768} +{"current_steps": 22735, "total_steps": 78105, "loss": 0.3005, "lr": 4.46429317605393e-06, "epoch": 1.4554125856219193, "percentage": 29.11, "elapsed_time": "0:59:17", "remaining_time": "2:24:23", "throughput": 20108.63, "total_tokens": 71529344} +{"current_steps": 22740, "total_steps": 78105, "loss": 0.3661, "lr": 4.463947552613222e-06, "epoch": 1.455732667562896, "percentage": 29.11, "elapsed_time": "0:59:17", "remaining_time": "2:24:22", "throughput": 20109.16, "total_tokens": 71544576} +{"current_steps": 22745, "total_steps": 78105, "loss": 0.2361, "lr": 4.463601831103204e-06, "epoch": 1.4560527495038729, "percentage": 29.12, "elapsed_time": "0:59:18", "remaining_time": "2:24:21", "throughput": 20109.67, "total_tokens": 71559488} +{"current_steps": 22750, "total_steps": 78105, "loss": 0.3353, "lr": 4.463256011541138e-06, "epoch": 1.4563728314448499, "percentage": 29.13, "elapsed_time": "0:59:19", "remaining_time": "2:24:20", "throughput": 20110.27, "total_tokens": 71575424} +{"current_steps": 22755, "total_steps": 78105, "loss": 0.2346, "lr": 4.462910093944293e-06, "epoch": 1.4566929133858268, "percentage": 29.13, "elapsed_time": "0:59:19", "remaining_time": "2:24:19", "throughput": 20110.82, "total_tokens": 71590720} +{"current_steps": 22760, "total_steps": 78105, "loss": 0.6359, "lr": 4.462564078329942e-06, "epoch": 1.4570129953268036, "percentage": 29.14, "elapsed_time": "0:59:20", "remaining_time": "2:24:17", "throughput": 20111.34, "total_tokens": 71605696} +{"current_steps": 22765, "total_steps": 78105, "loss": 0.4651, "lr": 4.462217964715364e-06, "epoch": 1.4573330772677806, "percentage": 29.15, "elapsed_time": "0:59:21", "remaining_time": "2:24:16", "throughput": 20111.88, "total_tokens": 71620672} +{"current_steps": 22770, "total_steps": 78105, "loss": 0.3109, "lr": 4.4618717531178425e-06, "epoch": 1.4576531592087574, "percentage": 29.15, "elapsed_time": "0:59:21", "remaining_time": "2:24:15", "throughput": 20112.57, "total_tokens": 71637184} +{"current_steps": 22775, "total_steps": 78105, "loss": 0.2394, "lr": 4.461525443554663e-06, "epoch": 1.4579732411497344, "percentage": 29.16, "elapsed_time": "0:59:22", "remaining_time": "2:24:14", "throughput": 20113.11, "total_tokens": 71652480} +{"current_steps": 22780, "total_steps": 78105, "loss": 0.2355, "lr": 4.461179036043122e-06, "epoch": 1.4582933230907114, "percentage": 29.17, "elapsed_time": "0:59:23", "remaining_time": "2:24:13", "throughput": 20113.65, "total_tokens": 71668096} +{"current_steps": 22785, "total_steps": 78105, "loss": 0.4601, "lr": 4.460832530600514e-06, "epoch": 1.4586134050316881, "percentage": 29.17, "elapsed_time": "0:59:23", "remaining_time": "2:24:12", "throughput": 20114.08, "total_tokens": 71682560} +{"current_steps": 22790, "total_steps": 78105, "loss": 0.2997, "lr": 4.460485927244142e-06, "epoch": 1.458933486972665, "percentage": 29.18, "elapsed_time": "0:59:24", "remaining_time": "2:24:11", "throughput": 20114.6, "total_tokens": 71697920} +{"current_steps": 22795, "total_steps": 78105, "loss": 0.2865, "lr": 4.460139225991317e-06, "epoch": 1.4592535689136419, "percentage": 29.19, "elapsed_time": "0:59:25", "remaining_time": "2:24:10", "throughput": 20115.12, "total_tokens": 71713280} +{"current_steps": 22800, "total_steps": 78105, "loss": 0.3617, "lr": 4.4597924268593474e-06, "epoch": 1.4595736508546189, "percentage": 29.19, "elapsed_time": "0:59:25", "remaining_time": "2:24:09", "throughput": 20115.74, "total_tokens": 71729088} +{"current_steps": 22805, "total_steps": 78105, "loss": 0.3602, "lr": 4.459445529865553e-06, "epoch": 1.4598937327955956, "percentage": 29.2, "elapsed_time": "0:59:26", "remaining_time": "2:24:08", "throughput": 20116.34, "total_tokens": 71744896} +{"current_steps": 22810, "total_steps": 78105, "loss": 0.329, "lr": 4.4590985350272555e-06, "epoch": 1.4602138147365726, "percentage": 29.2, "elapsed_time": "0:59:27", "remaining_time": "2:24:07", "throughput": 20116.89, "total_tokens": 71759936} +{"current_steps": 22815, "total_steps": 78105, "loss": 0.354, "lr": 4.458751442361781e-06, "epoch": 1.4605338966775494, "percentage": 29.21, "elapsed_time": "0:59:27", "remaining_time": "2:24:06", "throughput": 20117.45, "total_tokens": 71775552} +{"current_steps": 22820, "total_steps": 78105, "loss": 0.2664, "lr": 4.458404251886462e-06, "epoch": 1.4608539786185264, "percentage": 29.22, "elapsed_time": "0:59:28", "remaining_time": "2:24:05", "throughput": 20118.09, "total_tokens": 71791936} +{"current_steps": 22825, "total_steps": 78105, "loss": 0.4964, "lr": 4.458056963618636e-06, "epoch": 1.4611740605595032, "percentage": 29.22, "elapsed_time": "0:59:29", "remaining_time": "2:24:04", "throughput": 20118.68, "total_tokens": 71807552} +{"current_steps": 22830, "total_steps": 78105, "loss": 0.3495, "lr": 4.4577095775756445e-06, "epoch": 1.4614941425004802, "percentage": 29.23, "elapsed_time": "0:59:29", "remaining_time": "2:24:03", "throughput": 20119.22, "total_tokens": 71823360} +{"current_steps": 22835, "total_steps": 78105, "loss": 0.2641, "lr": 4.457362093774834e-06, "epoch": 1.461814224441457, "percentage": 29.24, "elapsed_time": "0:59:30", "remaining_time": "2:24:02", "throughput": 20119.82, "total_tokens": 71839104} +{"current_steps": 22840, "total_steps": 78105, "loss": 0.2915, "lr": 4.457014512233557e-06, "epoch": 1.462134306382434, "percentage": 29.24, "elapsed_time": "0:59:31", "remaining_time": "2:24:01", "throughput": 20120.38, "total_tokens": 71854592} +{"current_steps": 22845, "total_steps": 78105, "loss": 0.2737, "lr": 4.456666832969167e-06, "epoch": 1.462454388323411, "percentage": 29.25, "elapsed_time": "0:59:31", "remaining_time": "2:24:00", "throughput": 20121.02, "total_tokens": 71870592} +{"current_steps": 22850, "total_steps": 78105, "loss": 0.4396, "lr": 4.456319055999027e-06, "epoch": 1.4627744702643877, "percentage": 29.26, "elapsed_time": "0:59:32", "remaining_time": "2:23:59", "throughput": 20121.93, "total_tokens": 71888832} +{"current_steps": 22855, "total_steps": 78105, "loss": 0.3411, "lr": 4.455971181340506e-06, "epoch": 1.4630945522053644, "percentage": 29.26, "elapsed_time": "0:59:33", "remaining_time": "2:23:58", "throughput": 20122.44, "total_tokens": 71903808} +{"current_steps": 22860, "total_steps": 78105, "loss": 0.4472, "lr": 4.455623209010971e-06, "epoch": 1.4634146341463414, "percentage": 29.27, "elapsed_time": "0:59:34", "remaining_time": "2:23:57", "throughput": 20123.3, "total_tokens": 71921792} +{"current_steps": 22865, "total_steps": 78105, "loss": 0.4584, "lr": 4.4552751390278e-06, "epoch": 1.4637347160873184, "percentage": 29.27, "elapsed_time": "0:59:34", "remaining_time": "2:23:56", "throughput": 20123.85, "total_tokens": 71937088} +{"current_steps": 22870, "total_steps": 78105, "loss": 0.2597, "lr": 4.454926971408372e-06, "epoch": 1.4640547980282952, "percentage": 29.28, "elapsed_time": "0:59:35", "remaining_time": "2:23:55", "throughput": 20124.4, "total_tokens": 71952256} +{"current_steps": 22875, "total_steps": 78105, "loss": 0.3663, "lr": 4.454578706170075e-06, "epoch": 1.4643748799692722, "percentage": 29.29, "elapsed_time": "0:59:36", "remaining_time": "2:23:54", "throughput": 20124.98, "total_tokens": 71967872} +{"current_steps": 22880, "total_steps": 78105, "loss": 0.231, "lr": 4.454230343330298e-06, "epoch": 1.464694961910249, "percentage": 29.29, "elapsed_time": "0:59:36", "remaining_time": "2:23:53", "throughput": 20125.46, "total_tokens": 71982912} +{"current_steps": 22885, "total_steps": 78105, "loss": 0.3808, "lr": 4.453881882906437e-06, "epoch": 1.465015043851226, "percentage": 29.3, "elapsed_time": "0:59:37", "remaining_time": "2:23:52", "throughput": 20126.16, "total_tokens": 71999424} +{"current_steps": 22890, "total_steps": 78105, "loss": 0.3723, "lr": 4.453533324915892e-06, "epoch": 1.465335125792203, "percentage": 29.31, "elapsed_time": "0:59:38", "remaining_time": "2:23:51", "throughput": 20126.91, "total_tokens": 72016512} +{"current_steps": 22895, "total_steps": 78105, "loss": 0.3125, "lr": 4.453184669376068e-06, "epoch": 1.4656552077331797, "percentage": 29.31, "elapsed_time": "0:59:38", "remaining_time": "2:23:50", "throughput": 20127.43, "total_tokens": 72031744} +{"current_steps": 22900, "total_steps": 78105, "loss": 0.2324, "lr": 4.4528359163043764e-06, "epoch": 1.4659752896741565, "percentage": 29.32, "elapsed_time": "0:59:39", "remaining_time": "2:23:48", "throughput": 20128.06, "total_tokens": 72047488} +{"current_steps": 22905, "total_steps": 78105, "loss": 0.3736, "lr": 4.452487065718231e-06, "epoch": 1.4662953716151335, "percentage": 29.33, "elapsed_time": "0:59:40", "remaining_time": "2:23:47", "throughput": 20128.65, "total_tokens": 72063296} +{"current_steps": 22910, "total_steps": 78105, "loss": 0.438, "lr": 4.452138117635051e-06, "epoch": 1.4666154535561104, "percentage": 29.33, "elapsed_time": "0:59:40", "remaining_time": "2:23:46", "throughput": 20129.23, "total_tokens": 72079104} +{"current_steps": 22915, "total_steps": 78105, "loss": 0.3682, "lr": 4.4517890720722625e-06, "epoch": 1.4669355354970872, "percentage": 29.34, "elapsed_time": "0:59:41", "remaining_time": "2:23:45", "throughput": 20129.79, "total_tokens": 72094400} +{"current_steps": 22920, "total_steps": 78105, "loss": 0.4157, "lr": 4.451439929047293e-06, "epoch": 1.4672556174380642, "percentage": 29.35, "elapsed_time": "0:59:42", "remaining_time": "2:23:44", "throughput": 20130.32, "total_tokens": 72109760} +{"current_steps": 22925, "total_steps": 78105, "loss": 0.4526, "lr": 4.451090688577579e-06, "epoch": 1.467575699379041, "percentage": 29.35, "elapsed_time": "0:59:42", "remaining_time": "2:23:43", "throughput": 20131.06, "total_tokens": 72126784} +{"current_steps": 22930, "total_steps": 78105, "loss": 0.2457, "lr": 4.450741350680559e-06, "epoch": 1.467895781320018, "percentage": 29.36, "elapsed_time": "0:59:43", "remaining_time": "2:23:42", "throughput": 20131.61, "total_tokens": 72142272} +{"current_steps": 22935, "total_steps": 78105, "loss": 0.4563, "lr": 4.450391915373676e-06, "epoch": 1.4682158632609947, "percentage": 29.36, "elapsed_time": "0:59:44", "remaining_time": "2:23:41", "throughput": 20132.21, "total_tokens": 72158208} +{"current_steps": 22940, "total_steps": 78105, "loss": 0.3258, "lr": 4.450042382674381e-06, "epoch": 1.4685359452019717, "percentage": 29.37, "elapsed_time": "0:59:44", "remaining_time": "2:23:40", "throughput": 20132.71, "total_tokens": 72173376} +{"current_steps": 22945, "total_steps": 78105, "loss": 0.2837, "lr": 4.449692752600127e-06, "epoch": 1.4688560271429485, "percentage": 29.38, "elapsed_time": "0:59:45", "remaining_time": "2:23:39", "throughput": 20133.24, "total_tokens": 72188480} +{"current_steps": 22950, "total_steps": 78105, "loss": 0.3976, "lr": 4.449343025168371e-06, "epoch": 1.4691761090839255, "percentage": 29.38, "elapsed_time": "0:59:46", "remaining_time": "2:23:38", "throughput": 20133.89, "total_tokens": 72205056} +{"current_steps": 22955, "total_steps": 78105, "loss": 0.3797, "lr": 4.44899320039658e-06, "epoch": 1.4694961910249025, "percentage": 29.39, "elapsed_time": "0:59:46", "remaining_time": "2:23:37", "throughput": 20134.37, "total_tokens": 72220160} +{"current_steps": 22960, "total_steps": 78105, "loss": 0.3993, "lr": 4.448643278302219e-06, "epoch": 1.4698162729658792, "percentage": 29.4, "elapsed_time": "0:59:47", "remaining_time": "2:23:36", "throughput": 20134.81, "total_tokens": 72234688} +{"current_steps": 22965, "total_steps": 78105, "loss": 0.34, "lr": 4.448293258902764e-06, "epoch": 1.4701363549068562, "percentage": 29.4, "elapsed_time": "0:59:48", "remaining_time": "2:23:35", "throughput": 20135.35, "total_tokens": 72249856} +{"current_steps": 22970, "total_steps": 78105, "loss": 0.2964, "lr": 4.44794314221569e-06, "epoch": 1.470456436847833, "percentage": 29.41, "elapsed_time": "0:59:48", "remaining_time": "2:23:34", "throughput": 20135.97, "total_tokens": 72266048} +{"current_steps": 22975, "total_steps": 78105, "loss": 0.3189, "lr": 4.447592928258484e-06, "epoch": 1.47077651878881, "percentage": 29.42, "elapsed_time": "0:59:49", "remaining_time": "2:23:33", "throughput": 20136.45, "total_tokens": 72280960} +{"current_steps": 22980, "total_steps": 78105, "loss": 0.3027, "lr": 4.44724261704863e-06, "epoch": 1.4710966007297868, "percentage": 29.42, "elapsed_time": "0:59:50", "remaining_time": "2:23:32", "throughput": 20137.19, "total_tokens": 72298304} +{"current_steps": 22985, "total_steps": 78105, "loss": 0.4437, "lr": 4.446892208603625e-06, "epoch": 1.4714166826707638, "percentage": 29.43, "elapsed_time": "0:59:50", "remaining_time": "2:23:31", "throughput": 20137.89, "total_tokens": 72314944} +{"current_steps": 22990, "total_steps": 78105, "loss": 0.3845, "lr": 4.446541702940962e-06, "epoch": 1.4717367646117405, "percentage": 29.43, "elapsed_time": "0:59:51", "remaining_time": "2:23:30", "throughput": 20138.45, "total_tokens": 72330880} +{"current_steps": 22995, "total_steps": 78105, "loss": 0.3411, "lr": 4.446191100078147e-06, "epoch": 1.4720568465527175, "percentage": 29.44, "elapsed_time": "0:59:52", "remaining_time": "2:23:29", "throughput": 20138.94, "total_tokens": 72345536} +{"current_steps": 23000, "total_steps": 78105, "loss": 0.2296, "lr": 4.445840400032684e-06, "epoch": 1.4723769284936945, "percentage": 29.45, "elapsed_time": "0:59:53", "remaining_time": "2:23:28", "throughput": 20139.54, "total_tokens": 72361408} +{"current_steps": 23005, "total_steps": 78105, "loss": 0.4182, "lr": 4.445489602822088e-06, "epoch": 1.4726970104346713, "percentage": 29.45, "elapsed_time": "0:59:53", "remaining_time": "2:23:27", "throughput": 20140.1, "total_tokens": 72377216} +{"current_steps": 23010, "total_steps": 78105, "loss": 0.3062, "lr": 4.445138708463875e-06, "epoch": 1.473017092375648, "percentage": 29.46, "elapsed_time": "0:59:54", "remaining_time": "2:23:26", "throughput": 20140.78, "total_tokens": 72393536} +{"current_steps": 23015, "total_steps": 78105, "loss": 0.2834, "lr": 4.444787716975567e-06, "epoch": 1.473337174316625, "percentage": 29.47, "elapsed_time": "0:59:55", "remaining_time": "2:23:25", "throughput": 20141.34, "total_tokens": 72408896} +{"current_steps": 23020, "total_steps": 78105, "loss": 0.4575, "lr": 4.44443662837469e-06, "epoch": 1.473657256257602, "percentage": 29.47, "elapsed_time": "0:59:55", "remaining_time": "2:23:24", "throughput": 20141.95, "total_tokens": 72424896} +{"current_steps": 23025, "total_steps": 78105, "loss": 0.3044, "lr": 4.444085442678776e-06, "epoch": 1.4739773381985788, "percentage": 29.48, "elapsed_time": "0:59:56", "remaining_time": "2:23:23", "throughput": 20142.52, "total_tokens": 72440384} +{"current_steps": 23030, "total_steps": 78105, "loss": 0.3461, "lr": 4.443734159905361e-06, "epoch": 1.4742974201395558, "percentage": 29.49, "elapsed_time": "0:59:57", "remaining_time": "2:23:22", "throughput": 20142.98, "total_tokens": 72455040} +{"current_steps": 23035, "total_steps": 78105, "loss": 0.3132, "lr": 4.443382780071987e-06, "epoch": 1.4746175020805325, "percentage": 29.49, "elapsed_time": "0:59:57", "remaining_time": "2:23:21", "throughput": 20143.58, "total_tokens": 72470912} +{"current_steps": 23040, "total_steps": 78105, "loss": 0.4875, "lr": 4.443031303196199e-06, "epoch": 1.4749375840215095, "percentage": 29.5, "elapsed_time": "0:59:58", "remaining_time": "2:23:20", "throughput": 20144.13, "total_tokens": 72486464} +{"current_steps": 23045, "total_steps": 78105, "loss": 0.3707, "lr": 4.4426797292955504e-06, "epoch": 1.4752576659624865, "percentage": 29.51, "elapsed_time": "0:59:59", "remaining_time": "2:23:18", "throughput": 20144.64, "total_tokens": 72501440} +{"current_steps": 23050, "total_steps": 78105, "loss": 0.2885, "lr": 4.4423280583875935e-06, "epoch": 1.4755777479034633, "percentage": 29.51, "elapsed_time": "0:59:59", "remaining_time": "2:23:17", "throughput": 20145.1, "total_tokens": 72516480} +{"current_steps": 23055, "total_steps": 78105, "loss": 0.42, "lr": 4.441976290489891e-06, "epoch": 1.47589782984444, "percentage": 29.52, "elapsed_time": "1:00:00", "remaining_time": "2:23:16", "throughput": 20145.69, "total_tokens": 72532608} +{"current_steps": 23060, "total_steps": 78105, "loss": 0.3192, "lr": 4.441624425620008e-06, "epoch": 1.476217911785417, "percentage": 29.52, "elapsed_time": "1:00:01", "remaining_time": "2:23:15", "throughput": 20146.3, "total_tokens": 72548480} +{"current_steps": 23065, "total_steps": 78105, "loss": 0.3426, "lr": 4.4412724637955154e-06, "epoch": 1.476537993726394, "percentage": 29.53, "elapsed_time": "1:00:01", "remaining_time": "2:23:14", "throughput": 20146.98, "total_tokens": 72564800} +{"current_steps": 23070, "total_steps": 78105, "loss": 0.3251, "lr": 4.440920405033987e-06, "epoch": 1.4768580756673708, "percentage": 29.54, "elapsed_time": "1:00:02", "remaining_time": "2:23:13", "throughput": 20147.54, "total_tokens": 72580672} +{"current_steps": 23075, "total_steps": 78105, "loss": 0.2693, "lr": 4.440568249353003e-06, "epoch": 1.4771781576083478, "percentage": 29.54, "elapsed_time": "1:00:03", "remaining_time": "2:23:12", "throughput": 20148.11, "total_tokens": 72596736} +{"current_steps": 23080, "total_steps": 78105, "loss": 0.2994, "lr": 4.44021599677015e-06, "epoch": 1.4774982395493246, "percentage": 29.55, "elapsed_time": "1:00:03", "remaining_time": "2:23:12", "throughput": 20149.05, "total_tokens": 72615296} +{"current_steps": 23085, "total_steps": 78105, "loss": 0.3802, "lr": 4.439863647303015e-06, "epoch": 1.4778183214903016, "percentage": 29.56, "elapsed_time": "1:00:04", "remaining_time": "2:23:10", "throughput": 20149.55, "total_tokens": 72630144} +{"current_steps": 23090, "total_steps": 78105, "loss": 0.2283, "lr": 4.439511200969194e-06, "epoch": 1.4781384034312783, "percentage": 29.56, "elapsed_time": "1:00:05", "remaining_time": "2:23:09", "throughput": 20150.08, "total_tokens": 72645440} +{"current_steps": 23095, "total_steps": 78105, "loss": 0.4254, "lr": 4.439158657786286e-06, "epoch": 1.4784584853722553, "percentage": 29.57, "elapsed_time": "1:00:05", "remaining_time": "2:23:08", "throughput": 20150.61, "total_tokens": 72660480} +{"current_steps": 23100, "total_steps": 78105, "loss": 0.3099, "lr": 4.438806017771897e-06, "epoch": 1.478778567313232, "percentage": 29.58, "elapsed_time": "1:00:06", "remaining_time": "2:23:07", "throughput": 20151.36, "total_tokens": 72677696} +{"current_steps": 23105, "total_steps": 78105, "loss": 0.3439, "lr": 4.438453280943633e-06, "epoch": 1.479098649254209, "percentage": 29.58, "elapsed_time": "1:00:07", "remaining_time": "2:23:06", "throughput": 20152.09, "total_tokens": 72694912} +{"current_steps": 23110, "total_steps": 78105, "loss": 0.4268, "lr": 4.438100447319109e-06, "epoch": 1.479418731195186, "percentage": 29.59, "elapsed_time": "1:00:07", "remaining_time": "2:23:05", "throughput": 20152.56, "total_tokens": 72709440} +{"current_steps": 23115, "total_steps": 78105, "loss": 0.3321, "lr": 4.437747516915945e-06, "epoch": 1.4797388131361628, "percentage": 29.59, "elapsed_time": "1:00:08", "remaining_time": "2:23:04", "throughput": 20153.04, "total_tokens": 72724416} +{"current_steps": 23120, "total_steps": 78105, "loss": 0.3163, "lr": 4.437394489751762e-06, "epoch": 1.4800588950771396, "percentage": 29.6, "elapsed_time": "1:00:09", "remaining_time": "2:23:03", "throughput": 20153.44, "total_tokens": 72738560} +{"current_steps": 23125, "total_steps": 78105, "loss": 0.29, "lr": 4.43704136584419e-06, "epoch": 1.4803789770181166, "percentage": 29.61, "elapsed_time": "1:00:09", "remaining_time": "2:23:02", "throughput": 20153.95, "total_tokens": 72753536} +{"current_steps": 23130, "total_steps": 78105, "loss": 0.4311, "lr": 4.436688145210862e-06, "epoch": 1.4806990589590936, "percentage": 29.61, "elapsed_time": "1:00:10", "remaining_time": "2:23:01", "throughput": 20154.55, "total_tokens": 72769728} +{"current_steps": 23135, "total_steps": 78105, "loss": 0.2633, "lr": 4.436334827869416e-06, "epoch": 1.4810191409000704, "percentage": 29.62, "elapsed_time": "1:00:11", "remaining_time": "2:23:00", "throughput": 20155.03, "total_tokens": 72784448} +{"current_steps": 23140, "total_steps": 78105, "loss": 0.3225, "lr": 4.435981413837495e-06, "epoch": 1.4813392228410474, "percentage": 29.63, "elapsed_time": "1:00:11", "remaining_time": "2:22:59", "throughput": 20155.62, "total_tokens": 72800384} +{"current_steps": 23145, "total_steps": 78105, "loss": 0.3205, "lr": 4.435627903132747e-06, "epoch": 1.4816593047820241, "percentage": 29.63, "elapsed_time": "1:00:12", "remaining_time": "2:22:58", "throughput": 20156.19, "total_tokens": 72816064} +{"current_steps": 23150, "total_steps": 78105, "loss": 0.3254, "lr": 4.435274295772822e-06, "epoch": 1.4819793867230011, "percentage": 29.64, "elapsed_time": "1:00:13", "remaining_time": "2:22:57", "throughput": 20156.88, "total_tokens": 72832960} +{"current_steps": 23155, "total_steps": 78105, "loss": 0.4106, "lr": 4.43492059177538e-06, "epoch": 1.482299468663978, "percentage": 29.65, "elapsed_time": "1:00:13", "remaining_time": "2:22:56", "throughput": 20157.34, "total_tokens": 72847424} +{"current_steps": 23160, "total_steps": 78105, "loss": 0.4817, "lr": 4.434566791158083e-06, "epoch": 1.4826195506049549, "percentage": 29.65, "elapsed_time": "1:00:14", "remaining_time": "2:22:55", "throughput": 20157.86, "total_tokens": 72863168} +{"current_steps": 23165, "total_steps": 78105, "loss": 0.2532, "lr": 4.434212893938596e-06, "epoch": 1.4829396325459316, "percentage": 29.66, "elapsed_time": "1:00:15", "remaining_time": "2:22:54", "throughput": 20158.41, "total_tokens": 72878592} +{"current_steps": 23170, "total_steps": 78105, "loss": 0.3743, "lr": 4.433858900134593e-06, "epoch": 1.4832597144869086, "percentage": 29.67, "elapsed_time": "1:00:15", "remaining_time": "2:22:53", "throughput": 20158.95, "total_tokens": 72894016} +{"current_steps": 23175, "total_steps": 78105, "loss": 0.4044, "lr": 4.433504809763749e-06, "epoch": 1.4835797964278856, "percentage": 29.67, "elapsed_time": "1:00:17", "remaining_time": "2:22:53", "throughput": 20158.01, "total_tokens": 72912000} +{"current_steps": 23180, "total_steps": 78105, "loss": 0.2907, "lr": 4.4331506228437465e-06, "epoch": 1.4838998783688624, "percentage": 29.68, "elapsed_time": "1:00:17", "remaining_time": "2:22:52", "throughput": 20158.6, "total_tokens": 72928128} +{"current_steps": 23185, "total_steps": 78105, "loss": 0.3626, "lr": 4.432796339392272e-06, "epoch": 1.4842199603098394, "percentage": 29.68, "elapsed_time": "1:00:18", "remaining_time": "2:22:51", "throughput": 20159.31, "total_tokens": 72945024} +{"current_steps": 23190, "total_steps": 78105, "loss": 0.3614, "lr": 4.432441959427014e-06, "epoch": 1.4845400422508161, "percentage": 29.69, "elapsed_time": "1:00:19", "remaining_time": "2:22:50", "throughput": 20159.93, "total_tokens": 72961216} +{"current_steps": 23195, "total_steps": 78105, "loss": 0.333, "lr": 4.432087482965671e-06, "epoch": 1.4848601241917931, "percentage": 29.7, "elapsed_time": "1:00:19", "remaining_time": "2:22:49", "throughput": 20160.46, "total_tokens": 72976640} +{"current_steps": 23200, "total_steps": 78105, "loss": 0.2421, "lr": 4.431732910025943e-06, "epoch": 1.48518020613277, "percentage": 29.7, "elapsed_time": "1:00:20", "remaining_time": "2:22:48", "throughput": 20161.01, "total_tokens": 72992256} +{"current_steps": 23205, "total_steps": 78105, "loss": 0.3324, "lr": 4.431378240625537e-06, "epoch": 1.485500288073747, "percentage": 29.71, "elapsed_time": "1:00:21", "remaining_time": "2:22:47", "throughput": 20161.57, "total_tokens": 73007936} +{"current_steps": 23210, "total_steps": 78105, "loss": 0.3451, "lr": 4.43102347478216e-06, "epoch": 1.4858203700147237, "percentage": 29.72, "elapsed_time": "1:00:21", "remaining_time": "2:22:46", "throughput": 20162.18, "total_tokens": 73023808} +{"current_steps": 23215, "total_steps": 78105, "loss": 0.2459, "lr": 4.4306686125135305e-06, "epoch": 1.4861404519557007, "percentage": 29.72, "elapsed_time": "1:00:22", "remaining_time": "2:22:45", "throughput": 20162.65, "total_tokens": 73038464} +{"current_steps": 23220, "total_steps": 78105, "loss": 0.2915, "lr": 4.430313653837365e-06, "epoch": 1.4864605338966776, "percentage": 29.73, "elapsed_time": "1:00:23", "remaining_time": "2:22:44", "throughput": 20163.3, "total_tokens": 73055104} +{"current_steps": 23225, "total_steps": 78105, "loss": 0.3593, "lr": 4.429958598771392e-06, "epoch": 1.4867806158376544, "percentage": 29.74, "elapsed_time": "1:00:23", "remaining_time": "2:22:43", "throughput": 20163.8, "total_tokens": 73070336} +{"current_steps": 23230, "total_steps": 78105, "loss": 0.2923, "lr": 4.429603447333338e-06, "epoch": 1.4871006977786314, "percentage": 29.74, "elapsed_time": "1:00:24", "remaining_time": "2:22:42", "throughput": 20164.46, "total_tokens": 73086912} +{"current_steps": 23235, "total_steps": 78105, "loss": 0.3545, "lr": 4.42924819954094e-06, "epoch": 1.4874207797196082, "percentage": 29.75, "elapsed_time": "1:00:25", "remaining_time": "2:22:41", "throughput": 20165.12, "total_tokens": 73103616} +{"current_steps": 23240, "total_steps": 78105, "loss": 0.3, "lr": 4.428892855411935e-06, "epoch": 1.4877408616605852, "percentage": 29.75, "elapsed_time": "1:00:25", "remaining_time": "2:22:40", "throughput": 20165.75, "total_tokens": 73119872} +{"current_steps": 23245, "total_steps": 78105, "loss": 0.3219, "lr": 4.428537414964069e-06, "epoch": 1.488060943601562, "percentage": 29.76, "elapsed_time": "1:00:26", "remaining_time": "2:22:39", "throughput": 20166.37, "total_tokens": 73136192} +{"current_steps": 23250, "total_steps": 78105, "loss": 0.3172, "lr": 4.4281818782150895e-06, "epoch": 1.488381025542539, "percentage": 29.77, "elapsed_time": "1:00:27", "remaining_time": "2:22:38", "throughput": 20166.89, "total_tokens": 73151744} +{"current_steps": 23255, "total_steps": 78105, "loss": 0.3102, "lr": 4.42782624518275e-06, "epoch": 1.4887011074835157, "percentage": 29.77, "elapsed_time": "1:00:27", "remaining_time": "2:22:37", "throughput": 20167.42, "total_tokens": 73166656} +{"current_steps": 23260, "total_steps": 78105, "loss": 0.2958, "lr": 4.42747051588481e-06, "epoch": 1.4890211894244927, "percentage": 29.78, "elapsed_time": "1:00:28", "remaining_time": "2:22:35", "throughput": 20167.85, "total_tokens": 73181248} +{"current_steps": 23265, "total_steps": 78105, "loss": 0.3511, "lr": 4.427114690339032e-06, "epoch": 1.4893412713654697, "percentage": 29.79, "elapsed_time": "1:00:29", "remaining_time": "2:22:34", "throughput": 20168.39, "total_tokens": 73196608} +{"current_steps": 23270, "total_steps": 78105, "loss": 0.3076, "lr": 4.4267587685631855e-06, "epoch": 1.4896613533064464, "percentage": 29.79, "elapsed_time": "1:00:29", "remaining_time": "2:22:33", "throughput": 20168.94, "total_tokens": 73212096} +{"current_steps": 23275, "total_steps": 78105, "loss": 0.3852, "lr": 4.426402750575041e-06, "epoch": 1.4899814352474232, "percentage": 29.8, "elapsed_time": "1:00:30", "remaining_time": "2:22:32", "throughput": 20169.64, "total_tokens": 73229376} +{"current_steps": 23280, "total_steps": 78105, "loss": 0.292, "lr": 4.426046636392377e-06, "epoch": 1.4903015171884002, "percentage": 29.81, "elapsed_time": "1:00:31", "remaining_time": "2:22:31", "throughput": 20170.16, "total_tokens": 73244608} +{"current_steps": 23285, "total_steps": 78105, "loss": 0.3003, "lr": 4.425690426032977e-06, "epoch": 1.4906215991293772, "percentage": 29.81, "elapsed_time": "1:00:32", "remaining_time": "2:22:30", "throughput": 20170.75, "total_tokens": 73260864} +{"current_steps": 23290, "total_steps": 78105, "loss": 0.3578, "lr": 4.425334119514628e-06, "epoch": 1.490941681070354, "percentage": 29.82, "elapsed_time": "1:00:32", "remaining_time": "2:22:29", "throughput": 20171.46, "total_tokens": 73277568} +{"current_steps": 23295, "total_steps": 78105, "loss": 0.3293, "lr": 4.424977716855121e-06, "epoch": 1.491261763011331, "percentage": 29.83, "elapsed_time": "1:00:33", "remaining_time": "2:22:28", "throughput": 20172.12, "total_tokens": 73294208} +{"current_steps": 23300, "total_steps": 78105, "loss": 0.42, "lr": 4.424621218072255e-06, "epoch": 1.4915818449523077, "percentage": 29.83, "elapsed_time": "1:00:34", "remaining_time": "2:22:27", "throughput": 20172.65, "total_tokens": 73309120} +{"current_steps": 23305, "total_steps": 78105, "loss": 0.4557, "lr": 4.424264623183829e-06, "epoch": 1.4919019268932847, "percentage": 29.84, "elapsed_time": "1:00:34", "remaining_time": "2:22:26", "throughput": 20173.18, "total_tokens": 73324288} +{"current_steps": 23310, "total_steps": 78105, "loss": 0.3435, "lr": 4.4239079322076515e-06, "epoch": 1.4922220088342617, "percentage": 29.84, "elapsed_time": "1:00:35", "remaining_time": "2:22:25", "throughput": 20173.85, "total_tokens": 73340736} +{"current_steps": 23315, "total_steps": 78105, "loss": 0.2707, "lr": 4.423551145161533e-06, "epoch": 1.4925420907752385, "percentage": 29.85, "elapsed_time": "1:00:36", "remaining_time": "2:22:24", "throughput": 20174.29, "total_tokens": 73355456} +{"current_steps": 23320, "total_steps": 78105, "loss": 0.2235, "lr": 4.4231942620632895e-06, "epoch": 1.4928621727162152, "percentage": 29.86, "elapsed_time": "1:00:36", "remaining_time": "2:22:23", "throughput": 20174.86, "total_tokens": 73370816} +{"current_steps": 23325, "total_steps": 78105, "loss": 0.2683, "lr": 4.422837282930743e-06, "epoch": 1.4931822546571922, "percentage": 29.86, "elapsed_time": "1:00:37", "remaining_time": "2:22:22", "throughput": 20175.34, "total_tokens": 73385536} +{"current_steps": 23330, "total_steps": 78105, "loss": 0.3443, "lr": 4.422480207781718e-06, "epoch": 1.4935023365981692, "percentage": 29.87, "elapsed_time": "1:00:38", "remaining_time": "2:22:21", "throughput": 20175.87, "total_tokens": 73400576} +{"current_steps": 23335, "total_steps": 78105, "loss": 0.3525, "lr": 4.422123036634044e-06, "epoch": 1.493822418539146, "percentage": 29.88, "elapsed_time": "1:00:38", "remaining_time": "2:22:20", "throughput": 20176.34, "total_tokens": 73415296} +{"current_steps": 23340, "total_steps": 78105, "loss": 0.3324, "lr": 4.421765769505559e-06, "epoch": 1.494142500480123, "percentage": 29.88, "elapsed_time": "1:00:39", "remaining_time": "2:22:19", "throughput": 20176.94, "total_tokens": 73431232} +{"current_steps": 23345, "total_steps": 78105, "loss": 0.3936, "lr": 4.421408406414101e-06, "epoch": 1.4944625824210998, "percentage": 29.89, "elapsed_time": "1:00:40", "remaining_time": "2:22:18", "throughput": 20177.45, "total_tokens": 73446336} +{"current_steps": 23350, "total_steps": 78105, "loss": 0.4062, "lr": 4.421050947377515e-06, "epoch": 1.4947826643620767, "percentage": 29.9, "elapsed_time": "1:00:40", "remaining_time": "2:22:17", "throughput": 20177.96, "total_tokens": 73461440} +{"current_steps": 23355, "total_steps": 78105, "loss": 0.3053, "lr": 4.4206933924136515e-06, "epoch": 1.4951027463030535, "percentage": 29.9, "elapsed_time": "1:00:41", "remaining_time": "2:22:16", "throughput": 20178.52, "total_tokens": 73476992} +{"current_steps": 23360, "total_steps": 78105, "loss": 0.395, "lr": 4.4203357415403645e-06, "epoch": 1.4954228282440305, "percentage": 29.91, "elapsed_time": "1:00:42", "remaining_time": "2:22:15", "throughput": 20179.27, "total_tokens": 73494208} +{"current_steps": 23365, "total_steps": 78105, "loss": 0.3168, "lr": 4.419977994775513e-06, "epoch": 1.4957429101850073, "percentage": 29.91, "elapsed_time": "1:00:42", "remaining_time": "2:22:14", "throughput": 20179.81, "total_tokens": 73509760} +{"current_steps": 23370, "total_steps": 78105, "loss": 0.211, "lr": 4.419620152136962e-06, "epoch": 1.4960629921259843, "percentage": 29.92, "elapsed_time": "1:00:43", "remaining_time": "2:22:13", "throughput": 20180.39, "total_tokens": 73525568} +{"current_steps": 23375, "total_steps": 78105, "loss": 0.3046, "lr": 4.419262213642579e-06, "epoch": 1.4963830740669612, "percentage": 29.93, "elapsed_time": "1:00:44", "remaining_time": "2:22:12", "throughput": 20181.01, "total_tokens": 73541824} +{"current_steps": 23380, "total_steps": 78105, "loss": 0.299, "lr": 4.418904179310238e-06, "epoch": 1.496703156007938, "percentage": 29.93, "elapsed_time": "1:00:44", "remaining_time": "2:22:11", "throughput": 20181.62, "total_tokens": 73558080} +{"current_steps": 23385, "total_steps": 78105, "loss": 0.3257, "lr": 4.418546049157817e-06, "epoch": 1.4970232379489148, "percentage": 29.94, "elapsed_time": "1:00:45", "remaining_time": "2:22:10", "throughput": 20182.36, "total_tokens": 73575232} +{"current_steps": 23390, "total_steps": 78105, "loss": 0.3447, "lr": 4.4181878232032005e-06, "epoch": 1.4973433198898918, "percentage": 29.95, "elapsed_time": "1:00:46", "remaining_time": "2:22:09", "throughput": 20182.92, "total_tokens": 73590720} +{"current_steps": 23395, "total_steps": 78105, "loss": 0.513, "lr": 4.417829501464276e-06, "epoch": 1.4976634018308688, "percentage": 29.95, "elapsed_time": "1:00:46", "remaining_time": "2:22:08", "throughput": 20183.48, "total_tokens": 73606336} +{"current_steps": 23400, "total_steps": 78105, "loss": 0.4223, "lr": 4.417471083958935e-06, "epoch": 1.4979834837718455, "percentage": 29.96, "elapsed_time": "1:00:47", "remaining_time": "2:22:07", "throughput": 20184.09, "total_tokens": 73622336} +{"current_steps": 23405, "total_steps": 78105, "loss": 0.471, "lr": 4.417112570705075e-06, "epoch": 1.4983035657128225, "percentage": 29.97, "elapsed_time": "1:00:48", "remaining_time": "2:22:06", "throughput": 20184.7, "total_tokens": 73640320} +{"current_steps": 23410, "total_steps": 78105, "loss": 0.4929, "lr": 4.4167539617206005e-06, "epoch": 1.4986236476537993, "percentage": 29.97, "elapsed_time": "1:00:49", "remaining_time": "2:22:05", "throughput": 20185.34, "total_tokens": 73656832} +{"current_steps": 23415, "total_steps": 78105, "loss": 0.3796, "lr": 4.416395257023417e-06, "epoch": 1.4989437295947763, "percentage": 29.98, "elapsed_time": "1:00:49", "remaining_time": "2:22:04", "throughput": 20185.91, "total_tokens": 73672448} +{"current_steps": 23420, "total_steps": 78105, "loss": 0.3427, "lr": 4.416036456631436e-06, "epoch": 1.4992638115357533, "percentage": 29.99, "elapsed_time": "1:00:50", "remaining_time": "2:22:03", "throughput": 20186.64, "total_tokens": 73689600} +{"current_steps": 23425, "total_steps": 78105, "loss": 0.3204, "lr": 4.415677560562575e-06, "epoch": 1.49958389347673, "percentage": 29.99, "elapsed_time": "1:00:51", "remaining_time": "2:22:02", "throughput": 20187.15, "total_tokens": 73705152} +{"current_steps": 23430, "total_steps": 78105, "loss": 0.3287, "lr": 4.4153185688347555e-06, "epoch": 1.4999039754177068, "percentage": 30.0, "elapsed_time": "1:00:51", "remaining_time": "2:22:01", "throughput": 20187.71, "total_tokens": 73720512} +{"current_steps": 23435, "total_steps": 78105, "loss": 0.2897, "lr": 4.414959481465902e-06, "epoch": 1.5002240573586838, "percentage": 30.0, "elapsed_time": "1:00:52", "remaining_time": "2:22:00", "throughput": 20188.29, "total_tokens": 73736704} +{"current_steps": 23436, "total_steps": 78105, "eval_loss": 0.4805953800678253, "epoch": 1.5002880737468791, "percentage": 30.01, "elapsed_time": "1:01:43", "remaining_time": "2:23:59", "throughput": 19910.54, "total_tokens": 73739776} +{"current_steps": 23440, "total_steps": 78105, "loss": 0.4865, "lr": 4.414600298473947e-06, "epoch": 1.5005441392996608, "percentage": 30.01, "elapsed_time": "1:02:16", "remaining_time": "2:25:13", "throughput": 19739.82, "total_tokens": 73752704} +{"current_steps": 23445, "total_steps": 78105, "loss": 0.3438, "lr": 4.414241019876826e-06, "epoch": 1.5008642212406376, "percentage": 30.02, "elapsed_time": "1:02:16", "remaining_time": "2:25:12", "throughput": 19740.51, "total_tokens": 73768896} +{"current_steps": 23450, "total_steps": 78105, "loss": 0.3562, "lr": 4.4138816456924795e-06, "epoch": 1.5011843031816143, "percentage": 30.02, "elapsed_time": "1:02:17", "remaining_time": "2:25:11", "throughput": 19741.05, "total_tokens": 73783744} +{"current_steps": 23455, "total_steps": 78105, "loss": 0.2767, "lr": 4.413522175938854e-06, "epoch": 1.5015043851225913, "percentage": 30.03, "elapsed_time": "1:02:18", "remaining_time": "2:25:10", "throughput": 19741.65, "total_tokens": 73798784} +{"current_steps": 23460, "total_steps": 78105, "loss": 0.2411, "lr": 4.413162610633896e-06, "epoch": 1.5018244670635683, "percentage": 30.04, "elapsed_time": "1:02:18", "remaining_time": "2:25:09", "throughput": 19742.42, "total_tokens": 73815424} +{"current_steps": 23465, "total_steps": 78105, "loss": 0.2666, "lr": 4.412802949795563e-06, "epoch": 1.5021445490045453, "percentage": 30.04, "elapsed_time": "1:02:19", "remaining_time": "2:25:07", "throughput": 19743.12, "total_tokens": 73831488} +{"current_steps": 23470, "total_steps": 78105, "loss": 0.3185, "lr": 4.412443193441815e-06, "epoch": 1.502464630945522, "percentage": 30.05, "elapsed_time": "1:02:20", "remaining_time": "2:25:06", "throughput": 19743.68, "total_tokens": 73846976} +{"current_steps": 23475, "total_steps": 78105, "loss": 0.1718, "lr": 4.412083341590615e-06, "epoch": 1.5027847128864988, "percentage": 30.06, "elapsed_time": "1:02:20", "remaining_time": "2:25:05", "throughput": 19744.38, "total_tokens": 73863168} +{"current_steps": 23480, "total_steps": 78105, "loss": 0.2745, "lr": 4.411723394259933e-06, "epoch": 1.5031047948274758, "percentage": 30.06, "elapsed_time": "1:02:21", "remaining_time": "2:25:04", "throughput": 19744.98, "total_tokens": 73878912} +{"current_steps": 23485, "total_steps": 78105, "loss": 0.3519, "lr": 4.411363351467742e-06, "epoch": 1.5034248767684528, "percentage": 30.07, "elapsed_time": "1:02:22", "remaining_time": "2:25:03", "throughput": 19745.69, "total_tokens": 73895104} +{"current_steps": 23490, "total_steps": 78105, "loss": 0.2996, "lr": 4.411003213232021e-06, "epoch": 1.5037449587094296, "percentage": 30.07, "elapsed_time": "1:02:22", "remaining_time": "2:25:02", "throughput": 19746.19, "total_tokens": 73909568} +{"current_steps": 23495, "total_steps": 78105, "loss": 0.3208, "lr": 4.410642979570754e-06, "epoch": 1.5040650406504064, "percentage": 30.08, "elapsed_time": "1:02:23", "remaining_time": "2:25:01", "throughput": 19747.1, "total_tokens": 73927744} +{"current_steps": 23500, "total_steps": 78105, "loss": 0.4484, "lr": 4.410282650501928e-06, "epoch": 1.5043851225913834, "percentage": 30.09, "elapsed_time": "1:02:24", "remaining_time": "2:25:00", "throughput": 19747.69, "total_tokens": 73943040} +{"current_steps": 23505, "total_steps": 78105, "loss": 0.3276, "lr": 4.409922226043537e-06, "epoch": 1.5047052045323603, "percentage": 30.09, "elapsed_time": "1:02:25", "remaining_time": "2:24:59", "throughput": 19748.35, "total_tokens": 73959360} +{"current_steps": 23510, "total_steps": 78105, "loss": 0.2826, "lr": 4.40956170621358e-06, "epoch": 1.5050252864733373, "percentage": 30.1, "elapsed_time": "1:02:25", "remaining_time": "2:24:58", "throughput": 19748.91, "total_tokens": 73974144} +{"current_steps": 23515, "total_steps": 78105, "loss": 0.3449, "lr": 4.409201091030057e-06, "epoch": 1.505345368414314, "percentage": 30.11, "elapsed_time": "1:02:26", "remaining_time": "2:24:57", "throughput": 19749.42, "total_tokens": 73988736} +{"current_steps": 23520, "total_steps": 78105, "loss": 0.2782, "lr": 4.408840380510975e-06, "epoch": 1.5056654503552909, "percentage": 30.11, "elapsed_time": "1:02:27", "remaining_time": "2:24:56", "throughput": 19750.1, "total_tokens": 74004928} +{"current_steps": 23525, "total_steps": 78105, "loss": 0.2831, "lr": 4.408479574674348e-06, "epoch": 1.5059855322962679, "percentage": 30.12, "elapsed_time": "1:02:27", "remaining_time": "2:24:55", "throughput": 19750.73, "total_tokens": 74020544} +{"current_steps": 23530, "total_steps": 78105, "loss": 0.3303, "lr": 4.408118673538192e-06, "epoch": 1.5063056142372448, "percentage": 30.13, "elapsed_time": "1:02:28", "remaining_time": "2:24:53", "throughput": 19751.27, "total_tokens": 74035648} +{"current_steps": 23535, "total_steps": 78105, "loss": 0.375, "lr": 4.407757677120529e-06, "epoch": 1.5066256961782216, "percentage": 30.13, "elapsed_time": "1:02:29", "remaining_time": "2:24:52", "throughput": 19751.83, "total_tokens": 74051008} +{"current_steps": 23540, "total_steps": 78105, "loss": 0.4906, "lr": 4.407396585439384e-06, "epoch": 1.5069457781191984, "percentage": 30.14, "elapsed_time": "1:02:29", "remaining_time": "2:24:51", "throughput": 19752.43, "total_tokens": 74066368} +{"current_steps": 23545, "total_steps": 78105, "loss": 0.3316, "lr": 4.407035398512789e-06, "epoch": 1.5072658600601754, "percentage": 30.15, "elapsed_time": "1:02:30", "remaining_time": "2:24:50", "throughput": 19752.99, "total_tokens": 74081344} +{"current_steps": 23550, "total_steps": 78105, "loss": 0.4379, "lr": 4.40667411635878e-06, "epoch": 1.5075859420011524, "percentage": 30.15, "elapsed_time": "1:02:31", "remaining_time": "2:24:49", "throughput": 19753.78, "total_tokens": 74098624} +{"current_steps": 23555, "total_steps": 78105, "loss": 0.2507, "lr": 4.406312738995397e-06, "epoch": 1.5079060239421291, "percentage": 30.16, "elapsed_time": "1:02:31", "remaining_time": "2:24:48", "throughput": 19754.48, "total_tokens": 74115072} +{"current_steps": 23560, "total_steps": 78105, "loss": 0.4069, "lr": 4.405951266440685e-06, "epoch": 1.5082261058831061, "percentage": 30.16, "elapsed_time": "1:02:32", "remaining_time": "2:24:47", "throughput": 19755.03, "total_tokens": 74130112} +{"current_steps": 23565, "total_steps": 78105, "loss": 0.5056, "lr": 4.405589698712695e-06, "epoch": 1.508546187824083, "percentage": 30.17, "elapsed_time": "1:02:33", "remaining_time": "2:24:46", "throughput": 19755.71, "total_tokens": 74146432} +{"current_steps": 23570, "total_steps": 78105, "loss": 0.3095, "lr": 4.40522803582948e-06, "epoch": 1.5088662697650599, "percentage": 30.18, "elapsed_time": "1:02:33", "remaining_time": "2:24:45", "throughput": 19756.27, "total_tokens": 74161664} +{"current_steps": 23575, "total_steps": 78105, "loss": 0.3228, "lr": 4.404866277809102e-06, "epoch": 1.5091863517060369, "percentage": 30.18, "elapsed_time": "1:02:34", "remaining_time": "2:24:44", "throughput": 19756.89, "total_tokens": 74177408} +{"current_steps": 23580, "total_steps": 78105, "loss": 0.3015, "lr": 4.404504424669624e-06, "epoch": 1.5095064336470136, "percentage": 30.19, "elapsed_time": "1:02:35", "remaining_time": "2:24:43", "throughput": 19757.49, "total_tokens": 74193088} +{"current_steps": 23585, "total_steps": 78105, "loss": 0.4241, "lr": 4.404142476429116e-06, "epoch": 1.5098265155879904, "percentage": 30.2, "elapsed_time": "1:02:35", "remaining_time": "2:24:42", "throughput": 19758.08, "total_tokens": 74208640} +{"current_steps": 23590, "total_steps": 78105, "loss": 0.2578, "lr": 4.40378043310565e-06, "epoch": 1.5101465975289674, "percentage": 30.2, "elapsed_time": "1:02:36", "remaining_time": "2:24:41", "throughput": 19758.73, "total_tokens": 74224320} +{"current_steps": 23595, "total_steps": 78105, "loss": 0.4185, "lr": 4.403418294717305e-06, "epoch": 1.5104666794699444, "percentage": 30.21, "elapsed_time": "1:02:37", "remaining_time": "2:24:40", "throughput": 19759.35, "total_tokens": 74240192} +{"current_steps": 23600, "total_steps": 78105, "loss": 0.3921, "lr": 4.403056061282166e-06, "epoch": 1.5107867614109212, "percentage": 30.22, "elapsed_time": "1:02:37", "remaining_time": "2:24:38", "throughput": 19759.82, "total_tokens": 74254976} +{"current_steps": 23605, "total_steps": 78105, "loss": 0.2923, "lr": 4.40269373281832e-06, "epoch": 1.511106843351898, "percentage": 30.22, "elapsed_time": "1:02:38", "remaining_time": "2:24:37", "throughput": 19760.38, "total_tokens": 74270208} +{"current_steps": 23610, "total_steps": 78105, "loss": 0.3144, "lr": 4.40233130934386e-06, "epoch": 1.511426925292875, "percentage": 30.23, "elapsed_time": "1:02:39", "remaining_time": "2:24:36", "throughput": 19760.92, "total_tokens": 74285184} +{"current_steps": 23615, "total_steps": 78105, "loss": 0.3244, "lr": 4.401968790876882e-06, "epoch": 1.511747007233852, "percentage": 30.23, "elapsed_time": "1:02:39", "remaining_time": "2:24:35", "throughput": 19761.55, "total_tokens": 74300992} +{"current_steps": 23620, "total_steps": 78105, "loss": 0.5305, "lr": 4.40160617743549e-06, "epoch": 1.512067089174829, "percentage": 30.24, "elapsed_time": "1:02:40", "remaining_time": "2:24:34", "throughput": 19762.23, "total_tokens": 74317504} +{"current_steps": 23625, "total_steps": 78105, "loss": 0.2176, "lr": 4.4012434690377916e-06, "epoch": 1.5123871711158057, "percentage": 30.25, "elapsed_time": "1:02:41", "remaining_time": "2:24:33", "throughput": 19762.79, "total_tokens": 74332928} +{"current_steps": 23630, "total_steps": 78105, "loss": 0.3203, "lr": 4.400880665701897e-06, "epoch": 1.5127072530567824, "percentage": 30.25, "elapsed_time": "1:02:41", "remaining_time": "2:24:32", "throughput": 19763.49, "total_tokens": 74349760} +{"current_steps": 23635, "total_steps": 78105, "loss": 0.2868, "lr": 4.400517767445923e-06, "epoch": 1.5130273349977594, "percentage": 30.26, "elapsed_time": "1:02:42", "remaining_time": "2:24:31", "throughput": 19764.0, "total_tokens": 74364544} +{"current_steps": 23640, "total_steps": 78105, "loss": 0.2648, "lr": 4.4001547742879915e-06, "epoch": 1.5133474169387364, "percentage": 30.27, "elapsed_time": "1:02:43", "remaining_time": "2:24:30", "throughput": 19764.64, "total_tokens": 74380736} +{"current_steps": 23645, "total_steps": 78105, "loss": 0.3199, "lr": 4.3997916862462286e-06, "epoch": 1.5136674988797132, "percentage": 30.27, "elapsed_time": "1:02:43", "remaining_time": "2:24:29", "throughput": 19765.23, "total_tokens": 74396032} +{"current_steps": 23650, "total_steps": 78105, "loss": 0.3758, "lr": 4.3994285033387635e-06, "epoch": 1.51398758082069, "percentage": 30.28, "elapsed_time": "1:02:44", "remaining_time": "2:24:28", "throughput": 19765.77, "total_tokens": 74411264} +{"current_steps": 23655, "total_steps": 78105, "loss": 0.2341, "lr": 4.399065225583733e-06, "epoch": 1.514307662761667, "percentage": 30.29, "elapsed_time": "1:02:45", "remaining_time": "2:24:27", "throughput": 19766.25, "total_tokens": 74425984} +{"current_steps": 23660, "total_steps": 78105, "loss": 0.3427, "lr": 4.398701852999277e-06, "epoch": 1.514627744702644, "percentage": 30.29, "elapsed_time": "1:02:45", "remaining_time": "2:24:26", "throughput": 19766.91, "total_tokens": 74441984} +{"current_steps": 23665, "total_steps": 78105, "loss": 0.2755, "lr": 4.398338385603541e-06, "epoch": 1.514947826643621, "percentage": 30.3, "elapsed_time": "1:02:46", "remaining_time": "2:24:25", "throughput": 19767.68, "total_tokens": 74459200} +{"current_steps": 23670, "total_steps": 78105, "loss": 0.2266, "lr": 4.397974823414673e-06, "epoch": 1.5152679085845977, "percentage": 30.31, "elapsed_time": "1:02:47", "remaining_time": "2:24:24", "throughput": 19768.4, "total_tokens": 74475776} +{"current_steps": 23675, "total_steps": 78105, "loss": 0.303, "lr": 4.39761116645083e-06, "epoch": 1.5155879905255745, "percentage": 30.31, "elapsed_time": "1:02:48", "remaining_time": "2:24:23", "throughput": 19769.04, "total_tokens": 74491712} +{"current_steps": 23680, "total_steps": 78105, "loss": 0.333, "lr": 4.397247414730169e-06, "epoch": 1.5159080724665515, "percentage": 30.32, "elapsed_time": "1:02:48", "remaining_time": "2:24:22", "throughput": 19769.74, "total_tokens": 74508224} +{"current_steps": 23685, "total_steps": 78105, "loss": 0.363, "lr": 4.396883568270855e-06, "epoch": 1.5162281544075285, "percentage": 30.32, "elapsed_time": "1:02:49", "remaining_time": "2:24:20", "throughput": 19770.33, "total_tokens": 74523968} +{"current_steps": 23690, "total_steps": 78105, "loss": 0.211, "lr": 4.396519627091055e-06, "epoch": 1.5165482363485052, "percentage": 30.33, "elapsed_time": "1:02:50", "remaining_time": "2:24:19", "throughput": 19771.04, "total_tokens": 74540416} +{"current_steps": 23695, "total_steps": 78105, "loss": 0.2272, "lr": 4.3961555912089434e-06, "epoch": 1.516868318289482, "percentage": 30.34, "elapsed_time": "1:02:50", "remaining_time": "2:24:18", "throughput": 19771.5, "total_tokens": 74555072} +{"current_steps": 23700, "total_steps": 78105, "loss": 0.2657, "lr": 4.395791460642698e-06, "epoch": 1.517188400230459, "percentage": 30.34, "elapsed_time": "1:02:51", "remaining_time": "2:24:17", "throughput": 19772.03, "total_tokens": 74570240} +{"current_steps": 23705, "total_steps": 78105, "loss": 0.3457, "lr": 4.395427235410504e-06, "epoch": 1.517508482171436, "percentage": 30.35, "elapsed_time": "1:02:52", "remaining_time": "2:24:16", "throughput": 19772.56, "total_tokens": 74585280} +{"current_steps": 23710, "total_steps": 78105, "loss": 0.4364, "lr": 4.395062915530545e-06, "epoch": 1.5178285641124127, "percentage": 30.36, "elapsed_time": "1:02:52", "remaining_time": "2:24:15", "throughput": 19773.12, "total_tokens": 74600576} +{"current_steps": 23715, "total_steps": 78105, "loss": 0.2814, "lr": 4.394698501021015e-06, "epoch": 1.5181486460533895, "percentage": 30.36, "elapsed_time": "1:02:53", "remaining_time": "2:24:14", "throughput": 19773.69, "total_tokens": 74616000} +{"current_steps": 23720, "total_steps": 78105, "loss": 0.2533, "lr": 4.394333991900111e-06, "epoch": 1.5184687279943665, "percentage": 30.37, "elapsed_time": "1:02:54", "remaining_time": "2:24:13", "throughput": 19774.22, "total_tokens": 74631360} +{"current_steps": 23725, "total_steps": 78105, "loss": 0.4002, "lr": 4.393969388186036e-06, "epoch": 1.5187888099353435, "percentage": 30.38, "elapsed_time": "1:02:54", "remaining_time": "2:24:12", "throughput": 19774.86, "total_tokens": 74647680} +{"current_steps": 23730, "total_steps": 78105, "loss": 0.507, "lr": 4.3936046898969945e-06, "epoch": 1.5191088918763205, "percentage": 30.38, "elapsed_time": "1:02:55", "remaining_time": "2:24:11", "throughput": 19775.35, "total_tokens": 74662912} +{"current_steps": 23735, "total_steps": 78105, "loss": 0.3759, "lr": 4.393239897051197e-06, "epoch": 1.5194289738172972, "percentage": 30.39, "elapsed_time": "1:02:56", "remaining_time": "2:24:10", "throughput": 19776.05, "total_tokens": 74679744} +{"current_steps": 23740, "total_steps": 78105, "loss": 0.4432, "lr": 4.392875009666862e-06, "epoch": 1.519749055758274, "percentage": 30.39, "elapsed_time": "1:02:56", "remaining_time": "2:24:09", "throughput": 19776.8, "total_tokens": 74696768} +{"current_steps": 23745, "total_steps": 78105, "loss": 0.463, "lr": 4.392510027762208e-06, "epoch": 1.520069137699251, "percentage": 30.4, "elapsed_time": "1:02:57", "remaining_time": "2:24:08", "throughput": 19777.31, "total_tokens": 74711936} +{"current_steps": 23750, "total_steps": 78105, "loss": 0.3643, "lr": 4.392144951355461e-06, "epoch": 1.520389219640228, "percentage": 30.41, "elapsed_time": "1:02:58", "remaining_time": "2:24:07", "throughput": 19777.87, "total_tokens": 74727552} +{"current_steps": 23755, "total_steps": 78105, "loss": 0.3228, "lr": 4.391779780464851e-06, "epoch": 1.5207093015812048, "percentage": 30.41, "elapsed_time": "1:02:59", "remaining_time": "2:24:06", "throughput": 19778.4, "total_tokens": 74742592} +{"current_steps": 23760, "total_steps": 78105, "loss": 0.2362, "lr": 4.391414515108613e-06, "epoch": 1.5210293835221815, "percentage": 30.42, "elapsed_time": "1:02:59", "remaining_time": "2:24:05", "throughput": 19778.98, "total_tokens": 74758400} +{"current_steps": 23765, "total_steps": 78105, "loss": 0.3775, "lr": 4.391049155304986e-06, "epoch": 1.5213494654631585, "percentage": 30.43, "elapsed_time": "1:03:00", "remaining_time": "2:24:04", "throughput": 19779.67, "total_tokens": 74774784} +{"current_steps": 23770, "total_steps": 78105, "loss": 0.4361, "lr": 4.390683701072214e-06, "epoch": 1.5216695474041355, "percentage": 30.43, "elapsed_time": "1:03:01", "remaining_time": "2:24:02", "throughput": 19780.21, "total_tokens": 74789888} +{"current_steps": 23775, "total_steps": 78105, "loss": 0.3499, "lr": 4.390318152428546e-06, "epoch": 1.5219896293451125, "percentage": 30.44, "elapsed_time": "1:03:01", "remaining_time": "2:24:01", "throughput": 19780.83, "total_tokens": 74805696} +{"current_steps": 23780, "total_steps": 78105, "loss": 0.3708, "lr": 4.3899525093922354e-06, "epoch": 1.5223097112860893, "percentage": 30.45, "elapsed_time": "1:03:02", "remaining_time": "2:24:00", "throughput": 19781.36, "total_tokens": 74820864} +{"current_steps": 23785, "total_steps": 78105, "loss": 0.2826, "lr": 4.3895867719815425e-06, "epoch": 1.522629793227066, "percentage": 30.45, "elapsed_time": "1:03:03", "remaining_time": "2:23:59", "throughput": 19782.08, "total_tokens": 74837312} +{"current_steps": 23790, "total_steps": 78105, "loss": 0.2652, "lr": 4.3892209402147275e-06, "epoch": 1.522949875168043, "percentage": 30.46, "elapsed_time": "1:03:03", "remaining_time": "2:23:58", "throughput": 19782.63, "total_tokens": 74852736} +{"current_steps": 23795, "total_steps": 78105, "loss": 0.231, "lr": 4.388855014110059e-06, "epoch": 1.52326995710902, "percentage": 30.47, "elapsed_time": "1:03:04", "remaining_time": "2:23:57", "throughput": 19783.15, "total_tokens": 74867840} +{"current_steps": 23800, "total_steps": 78105, "loss": 0.2718, "lr": 4.388488993685811e-06, "epoch": 1.5235900390499968, "percentage": 30.47, "elapsed_time": "1:03:05", "remaining_time": "2:23:56", "throughput": 19783.61, "total_tokens": 74882368} +{"current_steps": 23805, "total_steps": 78105, "loss": 0.3443, "lr": 4.388122878960259e-06, "epoch": 1.5239101209909736, "percentage": 30.48, "elapsed_time": "1:03:05", "remaining_time": "2:23:55", "throughput": 19784.06, "total_tokens": 74896576} +{"current_steps": 23810, "total_steps": 78105, "loss": 0.2606, "lr": 4.387756669951685e-06, "epoch": 1.5242302029319506, "percentage": 30.48, "elapsed_time": "1:03:06", "remaining_time": "2:23:54", "throughput": 19784.86, "total_tokens": 74913792} +{"current_steps": 23815, "total_steps": 78105, "loss": 0.3876, "lr": 4.387390366678376e-06, "epoch": 1.5245502848729275, "percentage": 30.49, "elapsed_time": "1:03:07", "remaining_time": "2:23:53", "throughput": 19785.42, "total_tokens": 74929152} +{"current_steps": 23820, "total_steps": 78105, "loss": 0.2947, "lr": 4.387023969158623e-06, "epoch": 1.5248703668139043, "percentage": 30.5, "elapsed_time": "1:03:07", "remaining_time": "2:23:52", "throughput": 19786.13, "total_tokens": 74945792} +{"current_steps": 23825, "total_steps": 78105, "loss": 0.423, "lr": 4.3866574774107226e-06, "epoch": 1.5251904487548813, "percentage": 30.5, "elapsed_time": "1:03:08", "remaining_time": "2:23:51", "throughput": 19786.57, "total_tokens": 74959872} +{"current_steps": 23830, "total_steps": 78105, "loss": 0.321, "lr": 4.3862908914529745e-06, "epoch": 1.525510530695858, "percentage": 30.51, "elapsed_time": "1:03:09", "remaining_time": "2:23:50", "throughput": 19787.21, "total_tokens": 74976064} +{"current_steps": 23835, "total_steps": 78105, "loss": 0.3477, "lr": 4.385924211303685e-06, "epoch": 1.525830612636835, "percentage": 30.52, "elapsed_time": "1:03:09", "remaining_time": "2:23:48", "throughput": 19787.75, "total_tokens": 74990848} +{"current_steps": 23840, "total_steps": 78105, "loss": 0.5634, "lr": 4.385557436981164e-06, "epoch": 1.526150694577812, "percentage": 30.52, "elapsed_time": "1:03:10", "remaining_time": "2:23:47", "throughput": 19788.44, "total_tokens": 75007232} +{"current_steps": 23845, "total_steps": 78105, "loss": 0.3472, "lr": 4.385190568503725e-06, "epoch": 1.5264707765187888, "percentage": 30.53, "elapsed_time": "1:03:11", "remaining_time": "2:23:46", "throughput": 19789.21, "total_tokens": 75024192} +{"current_steps": 23850, "total_steps": 78105, "loss": 0.244, "lr": 4.3848236058896894e-06, "epoch": 1.5267908584597656, "percentage": 30.54, "elapsed_time": "1:03:11", "remaining_time": "2:23:45", "throughput": 19789.72, "total_tokens": 75039104} +{"current_steps": 23855, "total_steps": 78105, "loss": 0.2927, "lr": 4.38445654915738e-06, "epoch": 1.5271109404007426, "percentage": 30.54, "elapsed_time": "1:03:12", "remaining_time": "2:23:44", "throughput": 19790.28, "total_tokens": 75054464} +{"current_steps": 23860, "total_steps": 78105, "loss": 0.2484, "lr": 4.384089398325128e-06, "epoch": 1.5274310223417196, "percentage": 30.55, "elapsed_time": "1:03:13", "remaining_time": "2:23:43", "throughput": 19790.92, "total_tokens": 75070400} +{"current_steps": 23865, "total_steps": 78105, "loss": 0.3555, "lr": 4.3837221534112636e-06, "epoch": 1.5277511042826963, "percentage": 30.56, "elapsed_time": "1:03:13", "remaining_time": "2:23:42", "throughput": 19791.56, "total_tokens": 75086528} +{"current_steps": 23870, "total_steps": 78105, "loss": 0.2417, "lr": 4.383354814434127e-06, "epoch": 1.528071186223673, "percentage": 30.56, "elapsed_time": "1:03:14", "remaining_time": "2:23:41", "throughput": 19792.14, "total_tokens": 75101760} +{"current_steps": 23875, "total_steps": 78105, "loss": 0.2892, "lr": 4.382987381412061e-06, "epoch": 1.52839126816465, "percentage": 30.57, "elapsed_time": "1:03:15", "remaining_time": "2:23:40", "throughput": 19792.88, "total_tokens": 75118400} +{"current_steps": 23880, "total_steps": 78105, "loss": 0.4187, "lr": 4.382619854363414e-06, "epoch": 1.528711350105627, "percentage": 30.57, "elapsed_time": "1:03:15", "remaining_time": "2:23:39", "throughput": 19793.52, "total_tokens": 75134336} +{"current_steps": 23885, "total_steps": 78105, "loss": 0.2502, "lr": 4.382252233306537e-06, "epoch": 1.529031432046604, "percentage": 30.58, "elapsed_time": "1:03:16", "remaining_time": "2:23:38", "throughput": 19794.47, "total_tokens": 75153280} +{"current_steps": 23890, "total_steps": 78105, "loss": 0.4253, "lr": 4.381884518259789e-06, "epoch": 1.5293515139875808, "percentage": 30.59, "elapsed_time": "1:03:17", "remaining_time": "2:23:37", "throughput": 19794.93, "total_tokens": 75167808} +{"current_steps": 23895, "total_steps": 78105, "loss": 0.2872, "lr": 4.381516709241529e-06, "epoch": 1.5296715959285576, "percentage": 30.59, "elapsed_time": "1:03:17", "remaining_time": "2:23:36", "throughput": 19795.49, "total_tokens": 75183104} +{"current_steps": 23900, "total_steps": 78105, "loss": 0.2429, "lr": 4.381148806270126e-06, "epoch": 1.5299916778695346, "percentage": 30.6, "elapsed_time": "1:03:18", "remaining_time": "2:23:35", "throughput": 19796.12, "total_tokens": 75198912} +{"current_steps": 23905, "total_steps": 78105, "loss": 0.3455, "lr": 4.3807808093639495e-06, "epoch": 1.5303117598105116, "percentage": 30.61, "elapsed_time": "1:03:19", "remaining_time": "2:23:34", "throughput": 19796.73, "total_tokens": 75214784} +{"current_steps": 23910, "total_steps": 78105, "loss": 0.2963, "lr": 4.380412718541376e-06, "epoch": 1.5306318417514884, "percentage": 30.61, "elapsed_time": "1:03:20", "remaining_time": "2:23:33", "throughput": 19797.35, "total_tokens": 75230592} +{"current_steps": 23915, "total_steps": 78105, "loss": 0.3013, "lr": 4.380044533820787e-06, "epoch": 1.5309519236924651, "percentage": 30.62, "elapsed_time": "1:03:20", "remaining_time": "2:23:32", "throughput": 19798.12, "total_tokens": 75247872} +{"current_steps": 23920, "total_steps": 78105, "loss": 0.3078, "lr": 4.379676255220566e-06, "epoch": 1.5312720056334421, "percentage": 30.63, "elapsed_time": "1:03:21", "remaining_time": "2:23:31", "throughput": 19798.79, "total_tokens": 75264064} +{"current_steps": 23925, "total_steps": 78105, "loss": 0.286, "lr": 4.379307882759104e-06, "epoch": 1.5315920875744191, "percentage": 30.63, "elapsed_time": "1:03:22", "remaining_time": "2:23:30", "throughput": 19799.34, "total_tokens": 75279424} +{"current_steps": 23930, "total_steps": 78105, "loss": 0.405, "lr": 4.378939416454795e-06, "epoch": 1.531912169515396, "percentage": 30.64, "elapsed_time": "1:03:22", "remaining_time": "2:23:29", "throughput": 19800.17, "total_tokens": 75297024} +{"current_steps": 23935, "total_steps": 78105, "loss": 0.3025, "lr": 4.378570856326039e-06, "epoch": 1.5322322514563729, "percentage": 30.64, "elapsed_time": "1:03:23", "remaining_time": "2:23:28", "throughput": 19800.8, "total_tokens": 75312768} +{"current_steps": 23940, "total_steps": 78105, "loss": 0.3311, "lr": 4.3782022023912394e-06, "epoch": 1.5325523333973496, "percentage": 30.65, "elapsed_time": "1:03:24", "remaining_time": "2:23:27", "throughput": 19801.43, "total_tokens": 75328896} +{"current_steps": 23945, "total_steps": 78105, "loss": 0.3668, "lr": 4.377833454668805e-06, "epoch": 1.5328724153383266, "percentage": 30.66, "elapsed_time": "1:03:24", "remaining_time": "2:23:26", "throughput": 19802.06, "total_tokens": 75344640} +{"current_steps": 23950, "total_steps": 78105, "loss": 0.2484, "lr": 4.377464613177148e-06, "epoch": 1.5331924972793036, "percentage": 30.66, "elapsed_time": "1:03:25", "remaining_time": "2:23:24", "throughput": 19802.57, "total_tokens": 75359232} +{"current_steps": 23955, "total_steps": 78105, "loss": 0.3305, "lr": 4.377095677934689e-06, "epoch": 1.5335125792202804, "percentage": 30.67, "elapsed_time": "1:03:26", "remaining_time": "2:23:23", "throughput": 19803.24, "total_tokens": 75375488} +{"current_steps": 23960, "total_steps": 78105, "loss": 0.3971, "lr": 4.376726648959849e-06, "epoch": 1.5338326611612572, "percentage": 30.68, "elapsed_time": "1:03:26", "remaining_time": "2:23:22", "throughput": 19803.87, "total_tokens": 75391168} +{"current_steps": 23965, "total_steps": 78105, "loss": 0.3744, "lr": 4.376357526271055e-06, "epoch": 1.5341527431022342, "percentage": 30.68, "elapsed_time": "1:03:27", "remaining_time": "2:23:21", "throughput": 19804.5, "total_tokens": 75407232} +{"current_steps": 23970, "total_steps": 78105, "loss": 0.2905, "lr": 4.375988309886741e-06, "epoch": 1.5344728250432111, "percentage": 30.69, "elapsed_time": "1:03:28", "remaining_time": "2:23:20", "throughput": 19805.07, "total_tokens": 75422656} +{"current_steps": 23975, "total_steps": 78105, "loss": 0.3688, "lr": 4.375618999825341e-06, "epoch": 1.534792906984188, "percentage": 30.7, "elapsed_time": "1:03:28", "remaining_time": "2:23:19", "throughput": 19805.6, "total_tokens": 75437632} +{"current_steps": 23980, "total_steps": 78105, "loss": 0.3125, "lr": 4.375249596105299e-06, "epoch": 1.5351129889251647, "percentage": 30.7, "elapsed_time": "1:03:29", "remaining_time": "2:23:18", "throughput": 19806.31, "total_tokens": 75454464} +{"current_steps": 23985, "total_steps": 78105, "loss": 0.3745, "lr": 4.37488009874506e-06, "epoch": 1.5354330708661417, "percentage": 30.71, "elapsed_time": "1:03:30", "remaining_time": "2:23:17", "throughput": 19806.85, "total_tokens": 75469696} +{"current_steps": 23990, "total_steps": 78105, "loss": 0.4366, "lr": 4.3745105077630745e-06, "epoch": 1.5357531528071187, "percentage": 30.72, "elapsed_time": "1:03:30", "remaining_time": "2:23:16", "throughput": 19807.48, "total_tokens": 75485696} +{"current_steps": 23995, "total_steps": 78105, "loss": 0.4604, "lr": 4.374140823177798e-06, "epoch": 1.5360732347480957, "percentage": 30.72, "elapsed_time": "1:03:31", "remaining_time": "2:23:15", "throughput": 19808.12, "total_tokens": 75501632} +{"current_steps": 24000, "total_steps": 78105, "loss": 0.3418, "lr": 4.3737710450076905e-06, "epoch": 1.5363933166890724, "percentage": 30.73, "elapsed_time": "1:03:32", "remaining_time": "2:23:14", "throughput": 19808.76, "total_tokens": 75517696} +{"current_steps": 24005, "total_steps": 78105, "loss": 0.3233, "lr": 4.373401173271218e-06, "epoch": 1.5367133986300492, "percentage": 30.73, "elapsed_time": "1:03:33", "remaining_time": "2:23:13", "throughput": 19809.34, "total_tokens": 75533056} +{"current_steps": 24010, "total_steps": 78105, "loss": 0.4352, "lr": 4.373031207986849e-06, "epoch": 1.5370334805710262, "percentage": 30.74, "elapsed_time": "1:03:33", "remaining_time": "2:23:12", "throughput": 19809.87, "total_tokens": 75547904} +{"current_steps": 24015, "total_steps": 78105, "loss": 0.5884, "lr": 4.3726611491730565e-06, "epoch": 1.5373535625120032, "percentage": 30.75, "elapsed_time": "1:03:34", "remaining_time": "2:23:11", "throughput": 19810.46, "total_tokens": 75563584} +{"current_steps": 24020, "total_steps": 78105, "loss": 0.3698, "lr": 4.372290996848322e-06, "epoch": 1.53767364445298, "percentage": 30.75, "elapsed_time": "1:03:34", "remaining_time": "2:23:10", "throughput": 19811.01, "total_tokens": 75578688} +{"current_steps": 24025, "total_steps": 78105, "loss": 0.3471, "lr": 4.371920751031127e-06, "epoch": 1.5379937263939567, "percentage": 30.76, "elapsed_time": "1:03:35", "remaining_time": "2:23:09", "throughput": 19811.88, "total_tokens": 75596672} +{"current_steps": 24030, "total_steps": 78105, "loss": 0.2921, "lr": 4.37155041173996e-06, "epoch": 1.5383138083349337, "percentage": 30.77, "elapsed_time": "1:03:36", "remaining_time": "2:23:08", "throughput": 19812.5, "total_tokens": 75612544} +{"current_steps": 24035, "total_steps": 78105, "loss": 0.2571, "lr": 4.3711799789933144e-06, "epoch": 1.5386338902759107, "percentage": 30.77, "elapsed_time": "1:03:37", "remaining_time": "2:23:07", "throughput": 19813.22, "total_tokens": 75629504} +{"current_steps": 24040, "total_steps": 78105, "loss": 0.2932, "lr": 4.370809452809687e-06, "epoch": 1.5389539722168877, "percentage": 30.78, "elapsed_time": "1:03:37", "remaining_time": "2:23:06", "throughput": 19814.05, "total_tokens": 75647424} +{"current_steps": 24045, "total_steps": 78105, "loss": 0.4218, "lr": 4.37043883320758e-06, "epoch": 1.5392740541578644, "percentage": 30.79, "elapsed_time": "1:03:38", "remaining_time": "2:23:05", "throughput": 19814.59, "total_tokens": 75662528} +{"current_steps": 24050, "total_steps": 78105, "loss": 0.4146, "lr": 4.3700681202055e-06, "epoch": 1.5395941360988412, "percentage": 30.79, "elapsed_time": "1:03:39", "remaining_time": "2:23:04", "throughput": 19815.15, "total_tokens": 75677824} +{"current_steps": 24055, "total_steps": 78105, "loss": 0.2612, "lr": 4.36969731382196e-06, "epoch": 1.5399142180398182, "percentage": 30.8, "elapsed_time": "1:03:39", "remaining_time": "2:23:02", "throughput": 19815.71, "total_tokens": 75693056} +{"current_steps": 24060, "total_steps": 78105, "loss": 0.2937, "lr": 4.369326414075474e-06, "epoch": 1.5402342999807952, "percentage": 30.8, "elapsed_time": "1:03:40", "remaining_time": "2:23:01", "throughput": 19816.35, "total_tokens": 75709184} +{"current_steps": 24065, "total_steps": 78105, "loss": 0.3962, "lr": 4.3689554209845645e-06, "epoch": 1.540554381921772, "percentage": 30.81, "elapsed_time": "1:03:41", "remaining_time": "2:23:00", "throughput": 19816.95, "total_tokens": 75724928} +{"current_steps": 24070, "total_steps": 78105, "loss": 0.3195, "lr": 4.368584334567757e-06, "epoch": 1.5408744638627487, "percentage": 30.82, "elapsed_time": "1:03:41", "remaining_time": "2:22:59", "throughput": 19817.58, "total_tokens": 75740800} +{"current_steps": 24075, "total_steps": 78105, "loss": 0.3436, "lr": 4.36821315484358e-06, "epoch": 1.5411945458037257, "percentage": 30.82, "elapsed_time": "1:03:42", "remaining_time": "2:22:58", "throughput": 19818.22, "total_tokens": 75756672} +{"current_steps": 24080, "total_steps": 78105, "loss": 0.2751, "lr": 4.367841881830569e-06, "epoch": 1.5415146277447027, "percentage": 30.83, "elapsed_time": "1:03:43", "remaining_time": "2:22:57", "throughput": 19818.94, "total_tokens": 75773440} +{"current_steps": 24085, "total_steps": 78105, "loss": 0.2876, "lr": 4.367470515547264e-06, "epoch": 1.5418347096856795, "percentage": 30.84, "elapsed_time": "1:03:44", "remaining_time": "2:22:57", "throughput": 19819.95, "total_tokens": 75793280} +{"current_steps": 24090, "total_steps": 78105, "loss": 0.1805, "lr": 4.367099056012209e-06, "epoch": 1.5421547916266565, "percentage": 30.84, "elapsed_time": "1:03:44", "remaining_time": "2:22:55", "throughput": 19820.47, "total_tokens": 75808384} +{"current_steps": 24095, "total_steps": 78105, "loss": 0.2682, "lr": 4.366727503243953e-06, "epoch": 1.5424748735676332, "percentage": 30.85, "elapsed_time": "1:03:45", "remaining_time": "2:22:54", "throughput": 19821.15, "total_tokens": 75824768} +{"current_steps": 24100, "total_steps": 78105, "loss": 0.3056, "lr": 4.366355857261048e-06, "epoch": 1.5427949555086102, "percentage": 30.86, "elapsed_time": "1:03:46", "remaining_time": "2:22:53", "throughput": 19821.94, "total_tokens": 75842240} +{"current_steps": 24105, "total_steps": 78105, "loss": 0.1769, "lr": 4.3659841180820525e-06, "epoch": 1.5431150374495872, "percentage": 30.86, "elapsed_time": "1:03:46", "remaining_time": "2:22:52", "throughput": 19822.47, "total_tokens": 75857344} +{"current_steps": 24110, "total_steps": 78105, "loss": 0.2045, "lr": 4.365612285725531e-06, "epoch": 1.543435119390564, "percentage": 30.87, "elapsed_time": "1:03:47", "remaining_time": "2:22:51", "throughput": 19823.04, "total_tokens": 75873024} +{"current_steps": 24115, "total_steps": 78105, "loss": 0.2491, "lr": 4.365240360210048e-06, "epoch": 1.5437552013315408, "percentage": 30.88, "elapsed_time": "1:03:48", "remaining_time": "2:22:50", "throughput": 19823.62, "total_tokens": 75888256} +{"current_steps": 24120, "total_steps": 78105, "loss": 0.3277, "lr": 4.364868341554179e-06, "epoch": 1.5440752832725178, "percentage": 30.88, "elapsed_time": "1:03:48", "remaining_time": "2:22:49", "throughput": 19824.39, "total_tokens": 75905728} +{"current_steps": 24125, "total_steps": 78105, "loss": 0.2696, "lr": 4.364496229776497e-06, "epoch": 1.5443953652134947, "percentage": 30.89, "elapsed_time": "1:03:49", "remaining_time": "2:22:48", "throughput": 19824.92, "total_tokens": 75921024} +{"current_steps": 24130, "total_steps": 78105, "loss": 0.283, "lr": 4.364124024895586e-06, "epoch": 1.5447154471544715, "percentage": 30.89, "elapsed_time": "1:03:50", "remaining_time": "2:22:47", "throughput": 19825.37, "total_tokens": 75935616} +{"current_steps": 24135, "total_steps": 78105, "loss": 0.2721, "lr": 4.363751726930031e-06, "epoch": 1.5450355290954483, "percentage": 30.9, "elapsed_time": "1:03:50", "remaining_time": "2:22:46", "throughput": 19825.97, "total_tokens": 75951296} +{"current_steps": 24140, "total_steps": 78105, "loss": 0.3857, "lr": 4.363379335898423e-06, "epoch": 1.5453556110364253, "percentage": 30.91, "elapsed_time": "1:03:51", "remaining_time": "2:22:45", "throughput": 19826.47, "total_tokens": 75966272} +{"current_steps": 24145, "total_steps": 78105, "loss": 0.4448, "lr": 4.363006851819357e-06, "epoch": 1.5456756929774023, "percentage": 30.91, "elapsed_time": "1:03:52", "remaining_time": "2:22:44", "throughput": 19827.06, "total_tokens": 75982080} +{"current_steps": 24150, "total_steps": 78105, "loss": 0.3493, "lr": 4.362634274711432e-06, "epoch": 1.5459957749183793, "percentage": 30.92, "elapsed_time": "1:03:52", "remaining_time": "2:22:43", "throughput": 19827.65, "total_tokens": 75997760} +{"current_steps": 24155, "total_steps": 78105, "loss": 0.372, "lr": 4.362261604593254e-06, "epoch": 1.546315856859356, "percentage": 30.93, "elapsed_time": "1:03:53", "remaining_time": "2:22:42", "throughput": 19828.35, "total_tokens": 76014528} +{"current_steps": 24160, "total_steps": 78105, "loss": 0.4029, "lr": 4.3618888414834315e-06, "epoch": 1.5466359388003328, "percentage": 30.93, "elapsed_time": "1:03:54", "remaining_time": "2:22:41", "throughput": 19829.0, "total_tokens": 76030656} +{"current_steps": 24165, "total_steps": 78105, "loss": 0.328, "lr": 4.361515985400578e-06, "epoch": 1.5469560207413098, "percentage": 30.94, "elapsed_time": "1:03:54", "remaining_time": "2:22:40", "throughput": 19829.46, "total_tokens": 76045248} +{"current_steps": 24170, "total_steps": 78105, "loss": 0.3907, "lr": 4.361143036363313e-06, "epoch": 1.5472761026822868, "percentage": 30.95, "elapsed_time": "1:03:55", "remaining_time": "2:22:39", "throughput": 19830.1, "total_tokens": 76061440} +{"current_steps": 24175, "total_steps": 78105, "loss": 0.3093, "lr": 4.3607699943902594e-06, "epoch": 1.5475961846232635, "percentage": 30.95, "elapsed_time": "1:03:56", "remaining_time": "2:22:38", "throughput": 19830.71, "total_tokens": 76077504} +{"current_steps": 24180, "total_steps": 78105, "loss": 0.3813, "lr": 4.3603968595000446e-06, "epoch": 1.5479162665642403, "percentage": 30.96, "elapsed_time": "1:03:56", "remaining_time": "2:22:37", "throughput": 19831.2, "total_tokens": 76092160} +{"current_steps": 24185, "total_steps": 78105, "loss": 0.5046, "lr": 4.360023631711301e-06, "epoch": 1.5482363485052173, "percentage": 30.96, "elapsed_time": "1:03:57", "remaining_time": "2:22:36", "throughput": 19831.84, "total_tokens": 76108352} +{"current_steps": 24190, "total_steps": 78105, "loss": 0.3662, "lr": 4.359650311042666e-06, "epoch": 1.5485564304461943, "percentage": 30.97, "elapsed_time": "1:03:58", "remaining_time": "2:22:35", "throughput": 19832.52, "total_tokens": 76125184} +{"current_steps": 24195, "total_steps": 78105, "loss": 0.3409, "lr": 4.3592768975127805e-06, "epoch": 1.5488765123871713, "percentage": 30.98, "elapsed_time": "1:03:59", "remaining_time": "2:22:34", "throughput": 19833.22, "total_tokens": 76141504} +{"current_steps": 24200, "total_steps": 78105, "loss": 0.27, "lr": 4.3589033911402915e-06, "epoch": 1.549196594328148, "percentage": 30.98, "elapsed_time": "1:03:59", "remaining_time": "2:22:33", "throughput": 19833.82, "total_tokens": 76157440} +{"current_steps": 24205, "total_steps": 78105, "loss": 0.3269, "lr": 4.35852979194385e-06, "epoch": 1.5495166762691248, "percentage": 30.99, "elapsed_time": "1:04:00", "remaining_time": "2:22:31", "throughput": 19834.41, "total_tokens": 76173120} +{"current_steps": 24210, "total_steps": 78105, "loss": 0.4056, "lr": 4.358156099942112e-06, "epoch": 1.5498367582101018, "percentage": 31.0, "elapsed_time": "1:04:01", "remaining_time": "2:22:30", "throughput": 19834.94, "total_tokens": 76188096} +{"current_steps": 24215, "total_steps": 78105, "loss": 0.3998, "lr": 4.357782315153736e-06, "epoch": 1.5501568401510788, "percentage": 31.0, "elapsed_time": "1:04:01", "remaining_time": "2:22:29", "throughput": 19835.72, "total_tokens": 76205312} +{"current_steps": 24220, "total_steps": 78105, "loss": 0.3335, "lr": 4.357408437597388e-06, "epoch": 1.5504769220920556, "percentage": 31.01, "elapsed_time": "1:04:02", "remaining_time": "2:22:28", "throughput": 19836.3, "total_tokens": 76220672} +{"current_steps": 24225, "total_steps": 78105, "loss": 0.3525, "lr": 4.357034467291737e-06, "epoch": 1.5507970040330323, "percentage": 31.02, "elapsed_time": "1:04:03", "remaining_time": "2:22:27", "throughput": 19836.82, "total_tokens": 76235392} +{"current_steps": 24230, "total_steps": 78105, "loss": 0.3526, "lr": 4.356660404255459e-06, "epoch": 1.5511170859740093, "percentage": 31.02, "elapsed_time": "1:04:03", "remaining_time": "2:22:26", "throughput": 19837.42, "total_tokens": 76251520} +{"current_steps": 24235, "total_steps": 78105, "loss": 0.3537, "lr": 4.356286248507231e-06, "epoch": 1.5514371679149863, "percentage": 31.03, "elapsed_time": "1:04:04", "remaining_time": "2:22:25", "throughput": 19838.1, "total_tokens": 76267904} +{"current_steps": 24240, "total_steps": 78105, "loss": 0.4103, "lr": 4.355912000065737e-06, "epoch": 1.551757249855963, "percentage": 31.04, "elapsed_time": "1:04:05", "remaining_time": "2:22:24", "throughput": 19838.65, "total_tokens": 76283008} +{"current_steps": 24245, "total_steps": 78105, "loss": 0.3751, "lr": 4.355537658949665e-06, "epoch": 1.5520773317969399, "percentage": 31.04, "elapsed_time": "1:04:05", "remaining_time": "2:22:23", "throughput": 19839.17, "total_tokens": 76297728} +{"current_steps": 24250, "total_steps": 78105, "loss": 0.3512, "lr": 4.355163225177707e-06, "epoch": 1.5523974137379168, "percentage": 31.05, "elapsed_time": "1:04:06", "remaining_time": "2:22:22", "throughput": 19839.64, "total_tokens": 76312384} +{"current_steps": 24255, "total_steps": 78105, "loss": 0.2696, "lr": 4.354788698768561e-06, "epoch": 1.5527174956788938, "percentage": 31.05, "elapsed_time": "1:04:07", "remaining_time": "2:22:21", "throughput": 19840.29, "total_tokens": 76328320} +{"current_steps": 24260, "total_steps": 78105, "loss": 0.3207, "lr": 4.354414079740928e-06, "epoch": 1.5530375776198708, "percentage": 31.06, "elapsed_time": "1:04:07", "remaining_time": "2:22:20", "throughput": 19840.9, "total_tokens": 76344320} +{"current_steps": 24265, "total_steps": 78105, "loss": 0.3513, "lr": 4.354039368113515e-06, "epoch": 1.5533576595608476, "percentage": 31.07, "elapsed_time": "1:04:08", "remaining_time": "2:22:19", "throughput": 19841.55, "total_tokens": 76360256} +{"current_steps": 24270, "total_steps": 78105, "loss": 0.3803, "lr": 4.353664563905034e-06, "epoch": 1.5536777415018244, "percentage": 31.07, "elapsed_time": "1:04:09", "remaining_time": "2:22:18", "throughput": 19842.03, "total_tokens": 76374848} +{"current_steps": 24275, "total_steps": 78105, "loss": 0.41, "lr": 4.353289667134201e-06, "epoch": 1.5539978234428014, "percentage": 31.08, "elapsed_time": "1:04:09", "remaining_time": "2:22:17", "throughput": 19842.65, "total_tokens": 76390912} +{"current_steps": 24280, "total_steps": 78105, "loss": 0.2854, "lr": 4.352914677819733e-06, "epoch": 1.5543179053837783, "percentage": 31.09, "elapsed_time": "1:04:10", "remaining_time": "2:22:15", "throughput": 19843.23, "total_tokens": 76406528} +{"current_steps": 24285, "total_steps": 78105, "loss": 0.3075, "lr": 4.352539595980359e-06, "epoch": 1.554637987324755, "percentage": 31.09, "elapsed_time": "1:04:11", "remaining_time": "2:22:14", "throughput": 19843.78, "total_tokens": 76421760} +{"current_steps": 24290, "total_steps": 78105, "loss": 0.3739, "lr": 4.352164421634807e-06, "epoch": 1.5549580692657319, "percentage": 31.1, "elapsed_time": "1:04:11", "remaining_time": "2:22:13", "throughput": 19844.23, "total_tokens": 76436224} +{"current_steps": 24295, "total_steps": 78105, "loss": 0.341, "lr": 4.351789154801811e-06, "epoch": 1.5552781512067089, "percentage": 31.11, "elapsed_time": "1:04:12", "remaining_time": "2:22:12", "throughput": 19844.82, "total_tokens": 76452224} +{"current_steps": 24300, "total_steps": 78105, "loss": 0.3821, "lr": 4.351413795500111e-06, "epoch": 1.5555982331476859, "percentage": 31.11, "elapsed_time": "1:04:13", "remaining_time": "2:22:11", "throughput": 19845.27, "total_tokens": 76466496} +{"current_steps": 24305, "total_steps": 78105, "loss": 0.3833, "lr": 4.3510383437484495e-06, "epoch": 1.5559183150886629, "percentage": 31.12, "elapsed_time": "1:04:13", "remaining_time": "2:22:10", "throughput": 19845.82, "total_tokens": 76481408} +{"current_steps": 24310, "total_steps": 78105, "loss": 0.2321, "lr": 4.350662799565574e-06, "epoch": 1.5562383970296396, "percentage": 31.12, "elapsed_time": "1:04:14", "remaining_time": "2:22:09", "throughput": 19846.61, "total_tokens": 76499136} +{"current_steps": 24315, "total_steps": 78105, "loss": 0.5105, "lr": 4.350287162970239e-06, "epoch": 1.5565584789706164, "percentage": 31.13, "elapsed_time": "1:04:15", "remaining_time": "2:22:08", "throughput": 19847.3, "total_tokens": 76515968} +{"current_steps": 24320, "total_steps": 78105, "loss": 0.4383, "lr": 4.3499114339812e-06, "epoch": 1.5568785609115934, "percentage": 31.14, "elapsed_time": "1:04:15", "remaining_time": "2:22:07", "throughput": 19848.0, "total_tokens": 76532928} +{"current_steps": 24325, "total_steps": 78105, "loss": 0.4107, "lr": 4.349535612617221e-06, "epoch": 1.5571986428525704, "percentage": 31.14, "elapsed_time": "1:04:16", "remaining_time": "2:22:06", "throughput": 19848.51, "total_tokens": 76547776} +{"current_steps": 24330, "total_steps": 78105, "loss": 0.1995, "lr": 4.349159698897066e-06, "epoch": 1.5575187247935471, "percentage": 31.15, "elapsed_time": "1:04:17", "remaining_time": "2:22:05", "throughput": 19848.97, "total_tokens": 76562304} +{"current_steps": 24335, "total_steps": 78105, "loss": 0.3399, "lr": 4.348783692839509e-06, "epoch": 1.557838806734524, "percentage": 31.16, "elapsed_time": "1:04:17", "remaining_time": "2:22:04", "throughput": 19849.64, "total_tokens": 76578816} +{"current_steps": 24340, "total_steps": 78105, "loss": 0.3594, "lr": 4.348407594463323e-06, "epoch": 1.558158888675501, "percentage": 31.16, "elapsed_time": "1:04:18", "remaining_time": "2:22:03", "throughput": 19850.32, "total_tokens": 76595136} +{"current_steps": 24345, "total_steps": 78105, "loss": 0.382, "lr": 4.34803140378729e-06, "epoch": 1.5584789706164779, "percentage": 31.17, "elapsed_time": "1:04:19", "remaining_time": "2:22:02", "throughput": 19850.79, "total_tokens": 76609792} +{"current_steps": 24350, "total_steps": 78105, "loss": 0.4403, "lr": 4.347655120830195e-06, "epoch": 1.5587990525574547, "percentage": 31.18, "elapsed_time": "1:04:19", "remaining_time": "2:22:01", "throughput": 19851.29, "total_tokens": 76624512} +{"current_steps": 24355, "total_steps": 78105, "loss": 0.4182, "lr": 4.347278745610828e-06, "epoch": 1.5591191344984316, "percentage": 31.18, "elapsed_time": "1:04:20", "remaining_time": "2:22:00", "throughput": 19851.89, "total_tokens": 76640256} +{"current_steps": 24360, "total_steps": 78105, "loss": 0.3146, "lr": 4.346902278147982e-06, "epoch": 1.5594392164394084, "percentage": 31.19, "elapsed_time": "1:04:21", "remaining_time": "2:21:59", "throughput": 19852.54, "total_tokens": 76656640} +{"current_steps": 24365, "total_steps": 78105, "loss": 0.3663, "lr": 4.346525718460456e-06, "epoch": 1.5597592983803854, "percentage": 31.2, "elapsed_time": "1:04:21", "remaining_time": "2:21:58", "throughput": 19853.15, "total_tokens": 76672192} +{"current_steps": 24370, "total_steps": 78105, "loss": 0.3104, "lr": 4.346149066567054e-06, "epoch": 1.5600793803213624, "percentage": 31.2, "elapsed_time": "1:04:22", "remaining_time": "2:21:56", "throughput": 19853.78, "total_tokens": 76688064} +{"current_steps": 24375, "total_steps": 78105, "loss": 0.3194, "lr": 4.345772322486584e-06, "epoch": 1.5603994622623392, "percentage": 31.21, "elapsed_time": "1:04:23", "remaining_time": "2:21:55", "throughput": 19854.2, "total_tokens": 76702208} +{"current_steps": 24380, "total_steps": 78105, "loss": 0.3413, "lr": 4.3453954862378586e-06, "epoch": 1.560719544203316, "percentage": 31.21, "elapsed_time": "1:04:23", "remaining_time": "2:21:54", "throughput": 19854.87, "total_tokens": 76718464} +{"current_steps": 24385, "total_steps": 78105, "loss": 0.3397, "lr": 4.345018557839695e-06, "epoch": 1.561039626144293, "percentage": 31.22, "elapsed_time": "1:04:24", "remaining_time": "2:21:53", "throughput": 19855.5, "total_tokens": 76734400} +{"current_steps": 24390, "total_steps": 78105, "loss": 0.3075, "lr": 4.344641537310915e-06, "epoch": 1.56135970808527, "percentage": 31.23, "elapsed_time": "1:04:25", "remaining_time": "2:21:52", "throughput": 19856.19, "total_tokens": 76750720} +{"current_steps": 24395, "total_steps": 78105, "loss": 0.2896, "lr": 4.344264424670345e-06, "epoch": 1.5616797900262467, "percentage": 31.23, "elapsed_time": "1:04:26", "remaining_time": "2:21:51", "throughput": 19857.0, "total_tokens": 76768320} +{"current_steps": 24400, "total_steps": 78105, "loss": 0.3499, "lr": 4.343887219936815e-06, "epoch": 1.5619998719672235, "percentage": 31.24, "elapsed_time": "1:04:26", "remaining_time": "2:21:50", "throughput": 19857.56, "total_tokens": 76783936} +{"current_steps": 24405, "total_steps": 78105, "loss": 0.246, "lr": 4.343509923129163e-06, "epoch": 1.5623199539082004, "percentage": 31.25, "elapsed_time": "1:04:27", "remaining_time": "2:21:49", "throughput": 19858.21, "total_tokens": 76800256} +{"current_steps": 24410, "total_steps": 78105, "loss": 0.4125, "lr": 4.343132534266228e-06, "epoch": 1.5626400358491774, "percentage": 31.25, "elapsed_time": "1:04:28", "remaining_time": "2:21:48", "throughput": 19858.77, "total_tokens": 76815616} +{"current_steps": 24415, "total_steps": 78105, "loss": 0.2967, "lr": 4.342755053366854e-06, "epoch": 1.5629601177901544, "percentage": 31.26, "elapsed_time": "1:04:28", "remaining_time": "2:21:47", "throughput": 19859.25, "total_tokens": 76830080} +{"current_steps": 24420, "total_steps": 78105, "loss": 0.4358, "lr": 4.342377480449892e-06, "epoch": 1.5632801997311312, "percentage": 31.27, "elapsed_time": "1:04:29", "remaining_time": "2:21:46", "throughput": 19859.96, "total_tokens": 76846656} +{"current_steps": 24425, "total_steps": 78105, "loss": 0.3601, "lr": 4.341999815534194e-06, "epoch": 1.563600281672108, "percentage": 31.27, "elapsed_time": "1:04:30", "remaining_time": "2:21:45", "throughput": 19860.5, "total_tokens": 76861888} +{"current_steps": 24430, "total_steps": 78105, "loss": 0.3554, "lr": 4.341622058638621e-06, "epoch": 1.563920363613085, "percentage": 31.28, "elapsed_time": "1:04:30", "remaining_time": "2:21:44", "throughput": 19861.26, "total_tokens": 76879360} +{"current_steps": 24435, "total_steps": 78105, "loss": 0.4199, "lr": 4.341244209782035e-06, "epoch": 1.564240445554062, "percentage": 31.28, "elapsed_time": "1:04:31", "remaining_time": "2:21:43", "throughput": 19861.75, "total_tokens": 76894208} +{"current_steps": 24440, "total_steps": 78105, "loss": 0.382, "lr": 4.340866268983303e-06, "epoch": 1.5645605274950387, "percentage": 31.29, "elapsed_time": "1:04:32", "remaining_time": "2:21:42", "throughput": 19862.36, "total_tokens": 76910016} +{"current_steps": 24445, "total_steps": 78105, "loss": 0.2945, "lr": 4.340488236261299e-06, "epoch": 1.5648806094360155, "percentage": 31.3, "elapsed_time": "1:04:32", "remaining_time": "2:21:41", "throughput": 19863.06, "total_tokens": 76926592} +{"current_steps": 24450, "total_steps": 78105, "loss": 0.3547, "lr": 4.340110111634899e-06, "epoch": 1.5652006913769925, "percentage": 31.3, "elapsed_time": "1:04:33", "remaining_time": "2:21:40", "throughput": 19863.68, "total_tokens": 76942464} +{"current_steps": 24455, "total_steps": 78105, "loss": 0.4236, "lr": 4.339731895122984e-06, "epoch": 1.5655207733179695, "percentage": 31.31, "elapsed_time": "1:04:34", "remaining_time": "2:21:39", "throughput": 19864.22, "total_tokens": 76957568} +{"current_steps": 24460, "total_steps": 78105, "loss": 0.2503, "lr": 4.339353586744442e-06, "epoch": 1.5658408552589465, "percentage": 31.32, "elapsed_time": "1:04:34", "remaining_time": "2:21:38", "throughput": 19864.93, "total_tokens": 76974336} +{"current_steps": 24465, "total_steps": 78105, "loss": 0.2646, "lr": 4.338975186518162e-06, "epoch": 1.5661609371999232, "percentage": 31.32, "elapsed_time": "1:04:35", "remaining_time": "2:21:37", "throughput": 19865.53, "total_tokens": 76990208} +{"current_steps": 24470, "total_steps": 78105, "loss": 0.2416, "lr": 4.338596694463041e-06, "epoch": 1.5664810191409, "percentage": 31.33, "elapsed_time": "1:04:36", "remaining_time": "2:21:36", "throughput": 19865.99, "total_tokens": 77005120} +{"current_steps": 24475, "total_steps": 78105, "loss": 0.3828, "lr": 4.338218110597977e-06, "epoch": 1.566801101081877, "percentage": 31.34, "elapsed_time": "1:04:36", "remaining_time": "2:21:35", "throughput": 19866.48, "total_tokens": 77019968} +{"current_steps": 24480, "total_steps": 78105, "loss": 0.4541, "lr": 4.337839434941876e-06, "epoch": 1.567121183022854, "percentage": 31.34, "elapsed_time": "1:04:37", "remaining_time": "2:21:34", "throughput": 19867.34, "total_tokens": 77038336} +{"current_steps": 24485, "total_steps": 78105, "loss": 0.2851, "lr": 4.337460667513647e-06, "epoch": 1.5674412649638307, "percentage": 31.35, "elapsed_time": "1:04:38", "remaining_time": "2:21:33", "throughput": 19867.89, "total_tokens": 77053760} +{"current_steps": 24490, "total_steps": 78105, "loss": 0.2864, "lr": 4.337081808332203e-06, "epoch": 1.5677613469048075, "percentage": 31.36, "elapsed_time": "1:04:38", "remaining_time": "2:21:32", "throughput": 19868.52, "total_tokens": 77069632} +{"current_steps": 24495, "total_steps": 78105, "loss": 0.2691, "lr": 4.3367028574164615e-06, "epoch": 1.5680814288457845, "percentage": 31.36, "elapsed_time": "1:04:39", "remaining_time": "2:21:31", "throughput": 19869.1, "total_tokens": 77084992} +{"current_steps": 24500, "total_steps": 78105, "loss": 0.2951, "lr": 4.3363238147853474e-06, "epoch": 1.5684015107867615, "percentage": 31.37, "elapsed_time": "1:04:40", "remaining_time": "2:21:29", "throughput": 19869.6, "total_tokens": 77099840} +{"current_steps": 24505, "total_steps": 78105, "loss": 0.291, "lr": 4.335944680457787e-06, "epoch": 1.5687215927277383, "percentage": 31.37, "elapsed_time": "1:04:40", "remaining_time": "2:21:28", "throughput": 19870.21, "total_tokens": 77115712} +{"current_steps": 24510, "total_steps": 78105, "loss": 0.3405, "lr": 4.335565454452713e-06, "epoch": 1.569041674668715, "percentage": 31.38, "elapsed_time": "1:04:41", "remaining_time": "2:21:27", "throughput": 19870.79, "total_tokens": 77131392} +{"current_steps": 24515, "total_steps": 78105, "loss": 0.294, "lr": 4.33518613678906e-06, "epoch": 1.569361756609692, "percentage": 31.39, "elapsed_time": "1:04:42", "remaining_time": "2:21:26", "throughput": 19871.37, "total_tokens": 77146944} +{"current_steps": 24520, "total_steps": 78105, "loss": 0.3917, "lr": 4.334806727485771e-06, "epoch": 1.569681838550669, "percentage": 31.39, "elapsed_time": "1:04:42", "remaining_time": "2:21:25", "throughput": 19871.9, "total_tokens": 77162240} +{"current_steps": 24525, "total_steps": 78105, "loss": 0.387, "lr": 4.33442722656179e-06, "epoch": 1.570001920491646, "percentage": 31.4, "elapsed_time": "1:04:43", "remaining_time": "2:21:24", "throughput": 19872.51, "total_tokens": 77178304} +{"current_steps": 24530, "total_steps": 78105, "loss": 0.3978, "lr": 4.33404763403607e-06, "epoch": 1.5703220024326228, "percentage": 31.41, "elapsed_time": "1:04:44", "remaining_time": "2:21:23", "throughput": 19873.38, "total_tokens": 77197056} +{"current_steps": 24535, "total_steps": 78105, "loss": 0.3741, "lr": 4.333667949927564e-06, "epoch": 1.5706420843735995, "percentage": 31.41, "elapsed_time": "1:04:45", "remaining_time": "2:21:22", "throughput": 19874.01, "total_tokens": 77213120} +{"current_steps": 24540, "total_steps": 78105, "loss": 0.2273, "lr": 4.3332881742552315e-06, "epoch": 1.5709621663145765, "percentage": 31.42, "elapsed_time": "1:04:45", "remaining_time": "2:21:21", "throughput": 19874.66, "total_tokens": 77229376} +{"current_steps": 24545, "total_steps": 78105, "loss": 0.2782, "lr": 4.332908307038037e-06, "epoch": 1.5712822482555535, "percentage": 31.43, "elapsed_time": "1:04:46", "remaining_time": "2:21:20", "throughput": 19875.23, "total_tokens": 77244800} +{"current_steps": 24550, "total_steps": 78105, "loss": 0.435, "lr": 4.33252834829495e-06, "epoch": 1.5716023301965303, "percentage": 31.43, "elapsed_time": "1:04:47", "remaining_time": "2:21:19", "throughput": 19875.83, "total_tokens": 77260864} +{"current_steps": 24555, "total_steps": 78105, "loss": 0.378, "lr": 4.332148298044941e-06, "epoch": 1.571922412137507, "percentage": 31.44, "elapsed_time": "1:04:47", "remaining_time": "2:21:18", "throughput": 19876.4, "total_tokens": 77276480} +{"current_steps": 24560, "total_steps": 78105, "loss": 0.3058, "lr": 4.33176815630699e-06, "epoch": 1.572242494078484, "percentage": 31.44, "elapsed_time": "1:04:48", "remaining_time": "2:21:17", "throughput": 19876.93, "total_tokens": 77291328} +{"current_steps": 24565, "total_steps": 78105, "loss": 0.2797, "lr": 4.331387923100078e-06, "epoch": 1.572562576019461, "percentage": 31.45, "elapsed_time": "1:04:49", "remaining_time": "2:21:16", "throughput": 19877.43, "total_tokens": 77306240} +{"current_steps": 24570, "total_steps": 78105, "loss": 0.3244, "lr": 4.331007598443193e-06, "epoch": 1.572882657960438, "percentage": 31.46, "elapsed_time": "1:04:49", "remaining_time": "2:21:15", "throughput": 19877.97, "total_tokens": 77321856} +{"current_steps": 24575, "total_steps": 78105, "loss": 0.3209, "lr": 4.3306271823553255e-06, "epoch": 1.5732027399014148, "percentage": 31.46, "elapsed_time": "1:04:50", "remaining_time": "2:21:14", "throughput": 19878.5, "total_tokens": 77337344} +{"current_steps": 24580, "total_steps": 78105, "loss": 0.2657, "lr": 4.330246674855473e-06, "epoch": 1.5735228218423916, "percentage": 31.47, "elapsed_time": "1:04:51", "remaining_time": "2:21:13", "throughput": 19879.09, "total_tokens": 77353216} +{"current_steps": 24585, "total_steps": 78105, "loss": 0.3113, "lr": 4.329866075962634e-06, "epoch": 1.5738429037833686, "percentage": 31.48, "elapsed_time": "1:04:51", "remaining_time": "2:21:12", "throughput": 19879.71, "total_tokens": 77369152} +{"current_steps": 24590, "total_steps": 78105, "loss": 0.4957, "lr": 4.329485385695815e-06, "epoch": 1.5741629857243455, "percentage": 31.48, "elapsed_time": "1:04:52", "remaining_time": "2:21:11", "throughput": 19880.21, "total_tokens": 77384192} +{"current_steps": 24595, "total_steps": 78105, "loss": 0.3878, "lr": 4.329104604074025e-06, "epoch": 1.5744830676653223, "percentage": 31.49, "elapsed_time": "1:04:53", "remaining_time": "2:21:10", "throughput": 19880.81, "total_tokens": 77400192} +{"current_steps": 24600, "total_steps": 78105, "loss": 0.3286, "lr": 4.3287237311162785e-06, "epoch": 1.574803149606299, "percentage": 31.5, "elapsed_time": "1:04:53", "remaining_time": "2:21:09", "throughput": 19881.56, "total_tokens": 77417472} +{"current_steps": 24605, "total_steps": 78105, "loss": 0.3442, "lr": 4.328342766841594e-06, "epoch": 1.575123231547276, "percentage": 31.5, "elapsed_time": "1:04:54", "remaining_time": "2:21:08", "throughput": 19882.12, "total_tokens": 77432896} +{"current_steps": 24610, "total_steps": 78105, "loss": 0.3132, "lr": 4.327961711268996e-06, "epoch": 1.575443313488253, "percentage": 31.51, "elapsed_time": "1:04:55", "remaining_time": "2:21:07", "throughput": 19882.65, "total_tokens": 77448064} +{"current_steps": 24615, "total_steps": 78105, "loss": 0.3277, "lr": 4.327580564417512e-06, "epoch": 1.5757633954292298, "percentage": 31.52, "elapsed_time": "1:04:55", "remaining_time": "2:21:06", "throughput": 19883.32, "total_tokens": 77464448} +{"current_steps": 24620, "total_steps": 78105, "loss": 0.2517, "lr": 4.327199326306173e-06, "epoch": 1.5760834773702068, "percentage": 31.52, "elapsed_time": "1:04:56", "remaining_time": "2:21:05", "throughput": 19883.89, "total_tokens": 77479936} +{"current_steps": 24625, "total_steps": 78105, "loss": 0.3847, "lr": 4.326817996954018e-06, "epoch": 1.5764035593111836, "percentage": 31.53, "elapsed_time": "1:04:57", "remaining_time": "2:21:04", "throughput": 19884.48, "total_tokens": 77495552} +{"current_steps": 24630, "total_steps": 78105, "loss": 0.4093, "lr": 4.326436576380087e-06, "epoch": 1.5767236412521606, "percentage": 31.53, "elapsed_time": "1:04:57", "remaining_time": "2:21:03", "throughput": 19885.07, "total_tokens": 77511552} +{"current_steps": 24635, "total_steps": 78105, "loss": 0.3874, "lr": 4.3260550646034276e-06, "epoch": 1.5770437231931376, "percentage": 31.54, "elapsed_time": "1:04:58", "remaining_time": "2:21:01", "throughput": 19885.66, "total_tokens": 77527232} +{"current_steps": 24640, "total_steps": 78105, "loss": 0.2757, "lr": 4.3256734616430896e-06, "epoch": 1.5773638051341143, "percentage": 31.55, "elapsed_time": "1:04:59", "remaining_time": "2:21:00", "throughput": 19886.18, "total_tokens": 77542208} +{"current_steps": 24645, "total_steps": 78105, "loss": 0.3221, "lr": 4.3252917675181286e-06, "epoch": 1.577683887075091, "percentage": 31.55, "elapsed_time": "1:04:59", "remaining_time": "2:20:59", "throughput": 19886.74, "total_tokens": 77557888} +{"current_steps": 24650, "total_steps": 78105, "loss": 0.3624, "lr": 4.324909982247604e-06, "epoch": 1.578003969016068, "percentage": 31.56, "elapsed_time": "1:05:00", "remaining_time": "2:20:58", "throughput": 19887.23, "total_tokens": 77573056} +{"current_steps": 24655, "total_steps": 78105, "loss": 0.3257, "lr": 4.324528105850581e-06, "epoch": 1.578324050957045, "percentage": 31.57, "elapsed_time": "1:05:01", "remaining_time": "2:20:57", "throughput": 19887.82, "total_tokens": 77589056} +{"current_steps": 24660, "total_steps": 78105, "loss": 0.2694, "lr": 4.324146138346127e-06, "epoch": 1.5786441328980219, "percentage": 31.57, "elapsed_time": "1:05:02", "remaining_time": "2:20:56", "throughput": 19888.52, "total_tokens": 77606016} +{"current_steps": 24665, "total_steps": 78105, "loss": 0.4408, "lr": 4.323764079753318e-06, "epoch": 1.5789642148389986, "percentage": 31.58, "elapsed_time": "1:05:02", "remaining_time": "2:20:55", "throughput": 19889.07, "total_tokens": 77621440} +{"current_steps": 24670, "total_steps": 78105, "loss": 0.3611, "lr": 4.323381930091229e-06, "epoch": 1.5792842967799756, "percentage": 31.59, "elapsed_time": "1:05:03", "remaining_time": "2:20:54", "throughput": 19889.63, "total_tokens": 77637184} +{"current_steps": 24675, "total_steps": 78105, "loss": 0.4302, "lr": 4.322999689378945e-06, "epoch": 1.5796043787209526, "percentage": 31.59, "elapsed_time": "1:05:04", "remaining_time": "2:20:54", "throughput": 19889.69, "total_tokens": 77653760} +{"current_steps": 24680, "total_steps": 78105, "loss": 0.5172, "lr": 4.322617357635553e-06, "epoch": 1.5799244606619296, "percentage": 31.6, "elapsed_time": "1:05:04", "remaining_time": "2:20:52", "throughput": 19890.17, "total_tokens": 77668480} +{"current_steps": 24685, "total_steps": 78105, "loss": 0.2008, "lr": 4.3222349348801415e-06, "epoch": 1.5802445426029064, "percentage": 31.6, "elapsed_time": "1:05:05", "remaining_time": "2:20:51", "throughput": 19890.94, "total_tokens": 77685632} +{"current_steps": 24690, "total_steps": 78105, "loss": 0.3615, "lr": 4.321852421131811e-06, "epoch": 1.5805646245438831, "percentage": 31.61, "elapsed_time": "1:05:06", "remaining_time": "2:20:50", "throughput": 19891.57, "total_tokens": 77701952} +{"current_steps": 24695, "total_steps": 78105, "loss": 0.1978, "lr": 4.321469816409659e-06, "epoch": 1.5808847064848601, "percentage": 31.62, "elapsed_time": "1:05:06", "remaining_time": "2:20:49", "throughput": 19892.11, "total_tokens": 77717056} +{"current_steps": 24700, "total_steps": 78105, "loss": 0.4455, "lr": 4.3210871207327935e-06, "epoch": 1.5812047884258371, "percentage": 31.62, "elapsed_time": "1:05:07", "remaining_time": "2:20:48", "throughput": 19892.7, "total_tokens": 77732800} +{"current_steps": 24705, "total_steps": 78105, "loss": 0.3804, "lr": 4.320704334120322e-06, "epoch": 1.5815248703668139, "percentage": 31.63, "elapsed_time": "1:05:08", "remaining_time": "2:20:47", "throughput": 19893.23, "total_tokens": 77748224} +{"current_steps": 24710, "total_steps": 78105, "loss": 0.3229, "lr": 4.320321456591359e-06, "epoch": 1.5818449523077907, "percentage": 31.64, "elapsed_time": "1:05:08", "remaining_time": "2:20:46", "throughput": 19893.91, "total_tokens": 77764928} +{"current_steps": 24715, "total_steps": 78105, "loss": 0.3176, "lr": 4.3199384881650255e-06, "epoch": 1.5821650342487676, "percentage": 31.64, "elapsed_time": "1:05:09", "remaining_time": "2:20:45", "throughput": 19894.57, "total_tokens": 77781184} +{"current_steps": 24720, "total_steps": 78105, "loss": 0.216, "lr": 4.319555428860443e-06, "epoch": 1.5824851161897446, "percentage": 31.65, "elapsed_time": "1:05:10", "remaining_time": "2:20:44", "throughput": 19895.23, "total_tokens": 77797888} +{"current_steps": 24725, "total_steps": 78105, "loss": 0.2957, "lr": 4.31917227869674e-06, "epoch": 1.5828051981307216, "percentage": 31.66, "elapsed_time": "1:05:11", "remaining_time": "2:20:43", "throughput": 19895.83, "total_tokens": 77813888} +{"current_steps": 24730, "total_steps": 78105, "loss": 0.2514, "lr": 4.31878903769305e-06, "epoch": 1.5831252800716984, "percentage": 31.66, "elapsed_time": "1:05:11", "remaining_time": "2:20:42", "throughput": 19896.37, "total_tokens": 77829632} +{"current_steps": 24735, "total_steps": 78105, "loss": 0.5077, "lr": 4.318405705868508e-06, "epoch": 1.5834453620126752, "percentage": 31.67, "elapsed_time": "1:05:12", "remaining_time": "2:20:41", "throughput": 19896.98, "total_tokens": 77845504} +{"current_steps": 24740, "total_steps": 78105, "loss": 0.285, "lr": 4.318022283242257e-06, "epoch": 1.5837654439536522, "percentage": 31.68, "elapsed_time": "1:05:13", "remaining_time": "2:20:40", "throughput": 19897.49, "total_tokens": 77860608} +{"current_steps": 24745, "total_steps": 78105, "loss": 0.3475, "lr": 4.3176387698334435e-06, "epoch": 1.5840855258946291, "percentage": 31.68, "elapsed_time": "1:05:13", "remaining_time": "2:20:39", "throughput": 19898.14, "total_tokens": 77876736} +{"current_steps": 24750, "total_steps": 78105, "loss": 0.3145, "lr": 4.317255165661217e-06, "epoch": 1.584405607835606, "percentage": 31.69, "elapsed_time": "1:05:14", "remaining_time": "2:20:38", "throughput": 19898.83, "total_tokens": 77894016} +{"current_steps": 24755, "total_steps": 78105, "loss": 0.2956, "lr": 4.3168714707447336e-06, "epoch": 1.5847256897765827, "percentage": 31.69, "elapsed_time": "1:05:15", "remaining_time": "2:20:37", "throughput": 19899.4, "total_tokens": 77909504} +{"current_steps": 24760, "total_steps": 78105, "loss": 0.3287, "lr": 4.316487685103153e-06, "epoch": 1.5850457717175597, "percentage": 31.7, "elapsed_time": "1:05:15", "remaining_time": "2:20:36", "throughput": 19900.02, "total_tokens": 77925568} +{"current_steps": 24765, "total_steps": 78105, "loss": 0.4362, "lr": 4.316103808755638e-06, "epoch": 1.5853658536585367, "percentage": 31.71, "elapsed_time": "1:05:16", "remaining_time": "2:20:35", "throughput": 19900.64, "total_tokens": 77941632} +{"current_steps": 24770, "total_steps": 78105, "loss": 0.2431, "lr": 4.31571984172136e-06, "epoch": 1.5856859355995134, "percentage": 31.71, "elapsed_time": "1:05:17", "remaining_time": "2:20:34", "throughput": 19901.28, "total_tokens": 77958208} +{"current_steps": 24775, "total_steps": 78105, "loss": 0.45, "lr": 4.31533578401949e-06, "epoch": 1.5860060175404904, "percentage": 31.72, "elapsed_time": "1:05:17", "remaining_time": "2:20:33", "throughput": 19901.79, "total_tokens": 77973312} +{"current_steps": 24780, "total_steps": 78105, "loss": 0.377, "lr": 4.314951635669207e-06, "epoch": 1.5863260994814672, "percentage": 31.73, "elapsed_time": "1:05:18", "remaining_time": "2:20:32", "throughput": 19902.4, "total_tokens": 77989120} +{"current_steps": 24785, "total_steps": 78105, "loss": 0.4339, "lr": 4.314567396689692e-06, "epoch": 1.5866461814224442, "percentage": 31.73, "elapsed_time": "1:05:19", "remaining_time": "2:20:31", "throughput": 19902.97, "total_tokens": 78004672} +{"current_steps": 24790, "total_steps": 78105, "loss": 0.3368, "lr": 4.3141830671001335e-06, "epoch": 1.5869662633634212, "percentage": 31.74, "elapsed_time": "1:05:19", "remaining_time": "2:20:30", "throughput": 19903.72, "total_tokens": 78022208} +{"current_steps": 24795, "total_steps": 78105, "loss": 0.4114, "lr": 4.313798646919723e-06, "epoch": 1.587286345304398, "percentage": 31.75, "elapsed_time": "1:05:20", "remaining_time": "2:20:29", "throughput": 19904.35, "total_tokens": 78038400} +{"current_steps": 24800, "total_steps": 78105, "loss": 0.3207, "lr": 4.3134141361676555e-06, "epoch": 1.5876064272453747, "percentage": 31.75, "elapsed_time": "1:05:21", "remaining_time": "2:20:28", "throughput": 19904.85, "total_tokens": 78053440} +{"current_steps": 24805, "total_steps": 78105, "loss": 0.276, "lr": 4.3130295348631315e-06, "epoch": 1.5879265091863517, "percentage": 31.76, "elapsed_time": "1:05:22", "remaining_time": "2:20:27", "throughput": 19905.45, "total_tokens": 78069312} +{"current_steps": 24810, "total_steps": 78105, "loss": 0.3519, "lr": 4.312644843025356e-06, "epoch": 1.5882465911273287, "percentage": 31.76, "elapsed_time": "1:05:22", "remaining_time": "2:20:26", "throughput": 19906.08, "total_tokens": 78085504} +{"current_steps": 24815, "total_steps": 78105, "loss": 0.2728, "lr": 4.31226006067354e-06, "epoch": 1.5885666730683055, "percentage": 31.77, "elapsed_time": "1:05:23", "remaining_time": "2:20:25", "throughput": 19906.65, "total_tokens": 78101312} +{"current_steps": 24820, "total_steps": 78105, "loss": 0.2978, "lr": 4.3118751878268965e-06, "epoch": 1.5888867550092822, "percentage": 31.78, "elapsed_time": "1:05:24", "remaining_time": "2:20:24", "throughput": 19907.32, "total_tokens": 78118144} +{"current_steps": 24825, "total_steps": 78105, "loss": 0.5495, "lr": 4.311490224504642e-06, "epoch": 1.5892068369502592, "percentage": 31.78, "elapsed_time": "1:05:24", "remaining_time": "2:20:23", "throughput": 19907.79, "total_tokens": 78133312} +{"current_steps": 24830, "total_steps": 78105, "loss": 0.315, "lr": 4.311105170726002e-06, "epoch": 1.5895269188912362, "percentage": 31.79, "elapsed_time": "1:05:25", "remaining_time": "2:20:22", "throughput": 19908.36, "total_tokens": 78149120} +{"current_steps": 24835, "total_steps": 78105, "loss": 0.3602, "lr": 4.310720026510204e-06, "epoch": 1.5898470008322132, "percentage": 31.8, "elapsed_time": "1:05:26", "remaining_time": "2:20:21", "throughput": 19908.9, "total_tokens": 78164480} +{"current_steps": 24840, "total_steps": 78105, "loss": 0.3107, "lr": 4.310334791876479e-06, "epoch": 1.59016708277319, "percentage": 31.8, "elapsed_time": "1:05:26", "remaining_time": "2:20:20", "throughput": 19909.35, "total_tokens": 78179200} +{"current_steps": 24845, "total_steps": 78105, "loss": 0.3701, "lr": 4.309949466844065e-06, "epoch": 1.5904871647141667, "percentage": 31.81, "elapsed_time": "1:05:27", "remaining_time": "2:20:19", "throughput": 19909.85, "total_tokens": 78193920} +{"current_steps": 24850, "total_steps": 78105, "loss": 0.3104, "lr": 4.3095640514322015e-06, "epoch": 1.5908072466551437, "percentage": 31.82, "elapsed_time": "1:05:28", "remaining_time": "2:20:18", "throughput": 19910.48, "total_tokens": 78210112} +{"current_steps": 24855, "total_steps": 78105, "loss": 0.3044, "lr": 4.309178545660136e-06, "epoch": 1.5911273285961207, "percentage": 31.82, "elapsed_time": "1:05:28", "remaining_time": "2:20:17", "throughput": 19911.01, "total_tokens": 78225664} +{"current_steps": 24860, "total_steps": 78105, "loss": 0.3001, "lr": 4.308792949547116e-06, "epoch": 1.5914474105370975, "percentage": 31.83, "elapsed_time": "1:05:29", "remaining_time": "2:20:16", "throughput": 19911.61, "total_tokens": 78241728} +{"current_steps": 24865, "total_steps": 78105, "loss": 0.21, "lr": 4.308407263112399e-06, "epoch": 1.5917674924780743, "percentage": 31.84, "elapsed_time": "1:05:30", "remaining_time": "2:20:15", "throughput": 19912.2, "total_tokens": 78258048} +{"current_steps": 24870, "total_steps": 78105, "loss": 0.3303, "lr": 4.308021486375243e-06, "epoch": 1.5920875744190512, "percentage": 31.84, "elapsed_time": "1:05:30", "remaining_time": "2:20:14", "throughput": 19912.69, "total_tokens": 78273088} +{"current_steps": 24875, "total_steps": 78105, "loss": 0.3772, "lr": 4.307635619354911e-06, "epoch": 1.5924076563600282, "percentage": 31.85, "elapsed_time": "1:05:31", "remaining_time": "2:20:13", "throughput": 19913.36, "total_tokens": 78289472} +{"current_steps": 24880, "total_steps": 78105, "loss": 0.4343, "lr": 4.307249662070671e-06, "epoch": 1.592727738301005, "percentage": 31.85, "elapsed_time": "1:05:32", "remaining_time": "2:20:11", "throughput": 19913.76, "total_tokens": 78303552} +{"current_steps": 24885, "total_steps": 78105, "loss": 0.255, "lr": 4.3068636145417984e-06, "epoch": 1.593047820241982, "percentage": 31.86, "elapsed_time": "1:05:32", "remaining_time": "2:20:10", "throughput": 19914.36, "total_tokens": 78319488} +{"current_steps": 24890, "total_steps": 78105, "loss": 0.5656, "lr": 4.306477476787567e-06, "epoch": 1.5933679021829588, "percentage": 31.87, "elapsed_time": "1:05:33", "remaining_time": "2:20:09", "throughput": 19914.91, "total_tokens": 78335296} +{"current_steps": 24895, "total_steps": 78105, "loss": 0.331, "lr": 4.30609124882726e-06, "epoch": 1.5936879841239358, "percentage": 31.87, "elapsed_time": "1:05:34", "remaining_time": "2:20:08", "throughput": 19915.36, "total_tokens": 78349888} +{"current_steps": 24900, "total_steps": 78105, "loss": 0.4926, "lr": 4.305704930680165e-06, "epoch": 1.5940080660649127, "percentage": 31.88, "elapsed_time": "1:05:34", "remaining_time": "2:20:07", "throughput": 19915.92, "total_tokens": 78365120} +{"current_steps": 24905, "total_steps": 78105, "loss": 0.3521, "lr": 4.30531852236557e-06, "epoch": 1.5943281480058895, "percentage": 31.89, "elapsed_time": "1:05:35", "remaining_time": "2:20:06", "throughput": 19916.35, "total_tokens": 78379456} +{"current_steps": 24910, "total_steps": 78105, "loss": 0.3758, "lr": 4.304932023902773e-06, "epoch": 1.5946482299468663, "percentage": 31.89, "elapsed_time": "1:05:36", "remaining_time": "2:20:05", "throughput": 19917.0, "total_tokens": 78395520} +{"current_steps": 24915, "total_steps": 78105, "loss": 0.3558, "lr": 4.30454543531107e-06, "epoch": 1.5949683118878433, "percentage": 31.9, "elapsed_time": "1:05:36", "remaining_time": "2:20:04", "throughput": 19917.64, "total_tokens": 78412224} +{"current_steps": 24920, "total_steps": 78105, "loss": 0.4059, "lr": 4.30415875660977e-06, "epoch": 1.5952883938288203, "percentage": 31.91, "elapsed_time": "1:05:37", "remaining_time": "2:20:03", "throughput": 19918.26, "total_tokens": 78428224} +{"current_steps": 24925, "total_steps": 78105, "loss": 0.3417, "lr": 4.303771987818177e-06, "epoch": 1.595608475769797, "percentage": 31.91, "elapsed_time": "1:05:38", "remaining_time": "2:20:02", "throughput": 19919.04, "total_tokens": 78445632} +{"current_steps": 24930, "total_steps": 78105, "loss": 0.3598, "lr": 4.303385128955609e-06, "epoch": 1.5959285577107738, "percentage": 31.92, "elapsed_time": "1:05:39", "remaining_time": "2:20:01", "throughput": 19919.27, "total_tokens": 78462464} +{"current_steps": 24935, "total_steps": 78105, "loss": 0.262, "lr": 4.302998180041379e-06, "epoch": 1.5962486396517508, "percentage": 31.92, "elapsed_time": "1:05:39", "remaining_time": "2:20:00", "throughput": 19919.5, "total_tokens": 78476864} +{"current_steps": 24940, "total_steps": 78105, "loss": 0.3377, "lr": 4.3026111410948115e-06, "epoch": 1.5965687215927278, "percentage": 31.93, "elapsed_time": "1:05:40", "remaining_time": "2:19:59", "throughput": 19920.09, "total_tokens": 78492800} +{"current_steps": 24945, "total_steps": 78105, "loss": 0.3009, "lr": 4.302224012135233e-06, "epoch": 1.5968888035337048, "percentage": 31.94, "elapsed_time": "1:05:41", "remaining_time": "2:19:58", "throughput": 19920.63, "total_tokens": 78508288} +{"current_steps": 24950, "total_steps": 78105, "loss": 0.4052, "lr": 4.301836793181976e-06, "epoch": 1.5972088854746815, "percentage": 31.94, "elapsed_time": "1:05:41", "remaining_time": "2:19:57", "throughput": 19921.28, "total_tokens": 78524544} +{"current_steps": 24955, "total_steps": 78105, "loss": 0.265, "lr": 4.3014494842543745e-06, "epoch": 1.5975289674156583, "percentage": 31.95, "elapsed_time": "1:05:42", "remaining_time": "2:19:56", "throughput": 19921.88, "total_tokens": 78540416} +{"current_steps": 24960, "total_steps": 78105, "loss": 0.4615, "lr": 4.3010620853717685e-06, "epoch": 1.5978490493566353, "percentage": 31.96, "elapsed_time": "1:05:43", "remaining_time": "2:19:55", "throughput": 19922.4, "total_tokens": 78555712} +{"current_steps": 24965, "total_steps": 78105, "loss": 0.3191, "lr": 4.300674596553504e-06, "epoch": 1.5981691312976123, "percentage": 31.96, "elapsed_time": "1:05:43", "remaining_time": "2:19:54", "throughput": 19922.94, "total_tokens": 78571200} +{"current_steps": 24970, "total_steps": 78105, "loss": 0.3562, "lr": 4.300287017818929e-06, "epoch": 1.598489213238589, "percentage": 31.97, "elapsed_time": "1:05:44", "remaining_time": "2:19:53", "throughput": 19923.53, "total_tokens": 78586944} +{"current_steps": 24975, "total_steps": 78105, "loss": 0.235, "lr": 4.299899349187399e-06, "epoch": 1.5988092951795658, "percentage": 31.98, "elapsed_time": "1:05:45", "remaining_time": "2:19:52", "throughput": 19924.05, "total_tokens": 78601984} +{"current_steps": 24980, "total_steps": 78105, "loss": 0.31, "lr": 4.299511590678269e-06, "epoch": 1.5991293771205428, "percentage": 31.98, "elapsed_time": "1:05:45", "remaining_time": "2:19:51", "throughput": 19924.64, "total_tokens": 78617920} +{"current_steps": 24985, "total_steps": 78105, "loss": 0.3823, "lr": 4.299123742310904e-06, "epoch": 1.5994494590615198, "percentage": 31.99, "elapsed_time": "1:05:46", "remaining_time": "2:19:50", "throughput": 19925.3, "total_tokens": 78634496} +{"current_steps": 24990, "total_steps": 78105, "loss": 0.2959, "lr": 4.298735804104672e-06, "epoch": 1.5997695410024968, "percentage": 32.0, "elapsed_time": "1:05:47", "remaining_time": "2:19:49", "throughput": 19925.65, "total_tokens": 78648512} +{"current_steps": 24995, "total_steps": 78105, "loss": 0.3299, "lr": 4.2983477760789414e-06, "epoch": 1.6000896229434736, "percentage": 32.0, "elapsed_time": "1:05:47", "remaining_time": "2:19:48", "throughput": 19926.36, "total_tokens": 78665472} +{"current_steps": 25000, "total_steps": 78105, "loss": 0.2835, "lr": 4.297959658253091e-06, "epoch": 1.6004097048844503, "percentage": 32.01, "elapsed_time": "1:05:48", "remaining_time": "2:19:47", "throughput": 19926.85, "total_tokens": 78680512} +{"current_steps": 25005, "total_steps": 78105, "loss": 0.349, "lr": 4.2975714506465e-06, "epoch": 1.6007297868254273, "percentage": 32.01, "elapsed_time": "1:05:49", "remaining_time": "2:19:46", "throughput": 19927.24, "total_tokens": 78694784} +{"current_steps": 25010, "total_steps": 78105, "loss": 0.3617, "lr": 4.297183153278555e-06, "epoch": 1.6010498687664043, "percentage": 32.02, "elapsed_time": "1:05:49", "remaining_time": "2:19:45", "throughput": 19927.79, "total_tokens": 78710464} +{"current_steps": 25015, "total_steps": 78105, "loss": 0.2122, "lr": 4.296794766168643e-06, "epoch": 1.601369950707381, "percentage": 32.03, "elapsed_time": "1:05:50", "remaining_time": "2:19:44", "throughput": 19928.36, "total_tokens": 78726336} +{"current_steps": 25020, "total_steps": 78105, "loss": 0.3485, "lr": 4.2964062893361614e-06, "epoch": 1.6016900326483579, "percentage": 32.03, "elapsed_time": "1:05:51", "remaining_time": "2:19:43", "throughput": 19928.84, "total_tokens": 78741440} +{"current_steps": 25025, "total_steps": 78105, "loss": 0.2863, "lr": 4.296017722800505e-06, "epoch": 1.6020101145893348, "percentage": 32.04, "elapsed_time": "1:05:51", "remaining_time": "2:19:42", "throughput": 19929.35, "total_tokens": 78756480} +{"current_steps": 25030, "total_steps": 78105, "loss": 0.2454, "lr": 4.29562906658108e-06, "epoch": 1.6023301965303118, "percentage": 32.05, "elapsed_time": "1:05:52", "remaining_time": "2:19:40", "throughput": 19929.86, "total_tokens": 78771520} +{"current_steps": 25035, "total_steps": 78105, "loss": 0.41, "lr": 4.295240320697292e-06, "epoch": 1.6026502784712886, "percentage": 32.05, "elapsed_time": "1:05:53", "remaining_time": "2:19:39", "throughput": 19930.41, "total_tokens": 78787136} +{"current_steps": 25040, "total_steps": 78105, "loss": 0.2647, "lr": 4.294851485168553e-06, "epoch": 1.6029703604122656, "percentage": 32.06, "elapsed_time": "1:05:53", "remaining_time": "2:19:38", "throughput": 19930.93, "total_tokens": 78802688} +{"current_steps": 25045, "total_steps": 78105, "loss": 0.282, "lr": 4.294462560014281e-06, "epoch": 1.6032904423532424, "percentage": 32.07, "elapsed_time": "1:05:54", "remaining_time": "2:19:37", "throughput": 19931.55, "total_tokens": 78819072} +{"current_steps": 25050, "total_steps": 78105, "loss": 0.1755, "lr": 4.294073545253895e-06, "epoch": 1.6036105242942194, "percentage": 32.07, "elapsed_time": "1:05:55", "remaining_time": "2:19:36", "throughput": 19932.09, "total_tokens": 78834624} +{"current_steps": 25055, "total_steps": 78105, "loss": 0.3974, "lr": 4.293684440906821e-06, "epoch": 1.6039306062351963, "percentage": 32.08, "elapsed_time": "1:05:55", "remaining_time": "2:19:35", "throughput": 19932.7, "total_tokens": 78850688} +{"current_steps": 25060, "total_steps": 78105, "loss": 0.3474, "lr": 4.29329524699249e-06, "epoch": 1.6042506881761731, "percentage": 32.09, "elapsed_time": "1:05:56", "remaining_time": "2:19:34", "throughput": 19933.25, "total_tokens": 78866816} +{"current_steps": 25065, "total_steps": 78105, "loss": 0.2823, "lr": 4.292905963530334e-06, "epoch": 1.6045707701171499, "percentage": 32.09, "elapsed_time": "1:05:57", "remaining_time": "2:19:33", "throughput": 19933.66, "total_tokens": 78881472} +{"current_steps": 25070, "total_steps": 78105, "loss": 0.2369, "lr": 4.292516590539793e-06, "epoch": 1.6048908520581269, "percentage": 32.1, "elapsed_time": "1:05:57", "remaining_time": "2:19:32", "throughput": 19934.16, "total_tokens": 78896704} +{"current_steps": 25075, "total_steps": 78105, "loss": 0.2956, "lr": 4.292127128040311e-06, "epoch": 1.6052109339991039, "percentage": 32.1, "elapsed_time": "1:05:58", "remaining_time": "2:19:31", "throughput": 19934.67, "total_tokens": 78912000} +{"current_steps": 25080, "total_steps": 78105, "loss": 0.2587, "lr": 4.291737576051335e-06, "epoch": 1.6055310159400806, "percentage": 32.11, "elapsed_time": "1:05:59", "remaining_time": "2:19:30", "throughput": 19935.22, "total_tokens": 78927488} +{"current_steps": 25085, "total_steps": 78105, "loss": 0.2982, "lr": 4.291347934592317e-06, "epoch": 1.6058510978810574, "percentage": 32.12, "elapsed_time": "1:05:59", "remaining_time": "2:19:29", "throughput": 19935.79, "total_tokens": 78943424} +{"current_steps": 25090, "total_steps": 78105, "loss": 0.3259, "lr": 4.290958203682715e-06, "epoch": 1.6061711798220344, "percentage": 32.12, "elapsed_time": "1:06:00", "remaining_time": "2:19:28", "throughput": 19936.3, "total_tokens": 78958784} +{"current_steps": 25095, "total_steps": 78105, "loss": 0.3878, "lr": 4.290568383341987e-06, "epoch": 1.6064912617630114, "percentage": 32.13, "elapsed_time": "1:06:01", "remaining_time": "2:19:27", "throughput": 19936.83, "total_tokens": 78974272} +{"current_steps": 25100, "total_steps": 78105, "loss": 0.3547, "lr": 4.290178473589602e-06, "epoch": 1.6068113437039884, "percentage": 32.14, "elapsed_time": "1:06:01", "remaining_time": "2:19:26", "throughput": 19937.34, "total_tokens": 78989440} +{"current_steps": 25105, "total_steps": 78105, "loss": 0.335, "lr": 4.289788474445029e-06, "epoch": 1.6071314256449651, "percentage": 32.14, "elapsed_time": "1:06:02", "remaining_time": "2:19:25", "throughput": 19937.88, "total_tokens": 79005056} +{"current_steps": 25110, "total_steps": 78105, "loss": 0.3209, "lr": 4.289398385927742e-06, "epoch": 1.607451507585942, "percentage": 32.15, "elapsed_time": "1:06:03", "remaining_time": "2:19:24", "throughput": 19938.43, "total_tokens": 79020864} +{"current_steps": 25115, "total_steps": 78105, "loss": 0.3117, "lr": 4.2890082080572205e-06, "epoch": 1.607771589526919, "percentage": 32.16, "elapsed_time": "1:06:03", "remaining_time": "2:19:23", "throughput": 19939.05, "total_tokens": 79037056} +{"current_steps": 25120, "total_steps": 78105, "loss": 0.3472, "lr": 4.288617940852947e-06, "epoch": 1.608091671467896, "percentage": 32.16, "elapsed_time": "1:06:04", "remaining_time": "2:19:22", "throughput": 19939.48, "total_tokens": 79051648} +{"current_steps": 25125, "total_steps": 78105, "loss": 0.3045, "lr": 4.28822758433441e-06, "epoch": 1.6084117534088727, "percentage": 32.17, "elapsed_time": "1:06:05", "remaining_time": "2:19:21", "throughput": 19940.21, "total_tokens": 79069120} +{"current_steps": 25130, "total_steps": 78105, "loss": 0.2427, "lr": 4.287837138521103e-06, "epoch": 1.6087318353498494, "percentage": 32.17, "elapsed_time": "1:06:06", "remaining_time": "2:19:20", "throughput": 19940.83, "total_tokens": 79085440} +{"current_steps": 25135, "total_steps": 78105, "loss": 0.4127, "lr": 4.287446603432522e-06, "epoch": 1.6090519172908264, "percentage": 32.18, "elapsed_time": "1:06:06", "remaining_time": "2:19:19", "throughput": 19941.34, "total_tokens": 79100672} +{"current_steps": 25140, "total_steps": 78105, "loss": 0.4773, "lr": 4.2870559790881665e-06, "epoch": 1.6093719992318034, "percentage": 32.19, "elapsed_time": "1:06:07", "remaining_time": "2:19:18", "throughput": 19941.85, "total_tokens": 79115904} +{"current_steps": 25145, "total_steps": 78105, "loss": 0.4103, "lr": 4.286665265507544e-06, "epoch": 1.6096920811727804, "percentage": 32.19, "elapsed_time": "1:06:07", "remaining_time": "2:19:17", "throughput": 19942.31, "total_tokens": 79130816} +{"current_steps": 25150, "total_steps": 78105, "loss": 0.2558, "lr": 4.286274462710166e-06, "epoch": 1.6100121631137572, "percentage": 32.2, "elapsed_time": "1:06:08", "remaining_time": "2:19:16", "throughput": 19942.8, "total_tokens": 79145792} +{"current_steps": 25155, "total_steps": 78105, "loss": 0.2833, "lr": 4.285883570715545e-06, "epoch": 1.610332245054734, "percentage": 32.21, "elapsed_time": "1:06:09", "remaining_time": "2:19:15", "throughput": 19943.4, "total_tokens": 79161920} +{"current_steps": 25160, "total_steps": 78105, "loss": 0.3262, "lr": 4.2854925895432005e-06, "epoch": 1.610652326995711, "percentage": 32.21, "elapsed_time": "1:06:10", "remaining_time": "2:19:14", "throughput": 19943.95, "total_tokens": 79177536} +{"current_steps": 25165, "total_steps": 78105, "loss": 0.4285, "lr": 4.285101519212657e-06, "epoch": 1.610972408936688, "percentage": 32.22, "elapsed_time": "1:06:10", "remaining_time": "2:19:13", "throughput": 19944.49, "total_tokens": 79193088} +{"current_steps": 25170, "total_steps": 78105, "loss": 0.3316, "lr": 4.2847103597434415e-06, "epoch": 1.6112924908776647, "percentage": 32.23, "elapsed_time": "1:06:11", "remaining_time": "2:19:12", "throughput": 19945.13, "total_tokens": 79209536} +{"current_steps": 25175, "total_steps": 78105, "loss": 0.2112, "lr": 4.284319111155086e-06, "epoch": 1.6116125728186415, "percentage": 32.23, "elapsed_time": "1:06:12", "remaining_time": "2:19:11", "throughput": 19945.69, "total_tokens": 79225344} +{"current_steps": 25180, "total_steps": 78105, "loss": 0.4177, "lr": 4.28392777346713e-06, "epoch": 1.6119326547596184, "percentage": 32.24, "elapsed_time": "1:06:12", "remaining_time": "2:19:10", "throughput": 19946.36, "total_tokens": 79242304} +{"current_steps": 25185, "total_steps": 78105, "loss": 0.2604, "lr": 4.283536346699112e-06, "epoch": 1.6122527367005954, "percentage": 32.25, "elapsed_time": "1:06:13", "remaining_time": "2:19:09", "throughput": 19946.78, "total_tokens": 79256832} +{"current_steps": 25190, "total_steps": 78105, "loss": 0.4069, "lr": 4.2831448308705795e-06, "epoch": 1.6125728186415722, "percentage": 32.25, "elapsed_time": "1:06:14", "remaining_time": "2:19:08", "throughput": 19947.34, "total_tokens": 79272448} +{"current_steps": 25195, "total_steps": 78105, "loss": 0.2348, "lr": 4.282753226001082e-06, "epoch": 1.612892900582549, "percentage": 32.26, "elapsed_time": "1:06:14", "remaining_time": "2:19:07", "throughput": 19947.94, "total_tokens": 79288832} +{"current_steps": 25200, "total_steps": 78105, "loss": 0.4387, "lr": 4.282361532110174e-06, "epoch": 1.613212982523526, "percentage": 32.26, "elapsed_time": "1:06:15", "remaining_time": "2:19:06", "throughput": 19948.58, "total_tokens": 79305408} +{"current_steps": 25205, "total_steps": 78105, "loss": 0.3998, "lr": 4.281969749217415e-06, "epoch": 1.613533064464503, "percentage": 32.27, "elapsed_time": "1:06:16", "remaining_time": "2:19:05", "throughput": 19949.12, "total_tokens": 79320960} +{"current_steps": 25210, "total_steps": 78105, "loss": 0.3832, "lr": 4.281577877342369e-06, "epoch": 1.61385314640548, "percentage": 32.28, "elapsed_time": "1:06:16", "remaining_time": "2:19:04", "throughput": 19949.6, "total_tokens": 79336064} +{"current_steps": 25215, "total_steps": 78105, "loss": 0.3635, "lr": 4.281185916504604e-06, "epoch": 1.6141732283464567, "percentage": 32.28, "elapsed_time": "1:06:17", "remaining_time": "2:19:03", "throughput": 19950.16, "total_tokens": 79351808} +{"current_steps": 25220, "total_steps": 78105, "loss": 0.3179, "lr": 4.2807938667236915e-06, "epoch": 1.6144933102874335, "percentage": 32.29, "elapsed_time": "1:06:18", "remaining_time": "2:19:01", "throughput": 19950.56, "total_tokens": 79366144} +{"current_steps": 25225, "total_steps": 78105, "loss": 0.3024, "lr": 4.28040172801921e-06, "epoch": 1.6148133922284105, "percentage": 32.3, "elapsed_time": "1:06:18", "remaining_time": "2:19:00", "throughput": 19951.08, "total_tokens": 79381632} +{"current_steps": 25230, "total_steps": 78105, "loss": 0.2843, "lr": 4.28000950041074e-06, "epoch": 1.6151334741693875, "percentage": 32.3, "elapsed_time": "1:06:19", "remaining_time": "2:18:59", "throughput": 19951.57, "total_tokens": 79397248} +{"current_steps": 25235, "total_steps": 78105, "loss": 0.2988, "lr": 4.279617183917866e-06, "epoch": 1.6154535561103642, "percentage": 32.31, "elapsed_time": "1:06:20", "remaining_time": "2:18:58", "throughput": 19952.08, "total_tokens": 79412992} +{"current_steps": 25240, "total_steps": 78105, "loss": 0.3663, "lr": 4.2792247785601805e-06, "epoch": 1.615773638051341, "percentage": 32.32, "elapsed_time": "1:06:20", "remaining_time": "2:18:57", "throughput": 19952.6, "total_tokens": 79428736} +{"current_steps": 25245, "total_steps": 78105, "loss": 0.2814, "lr": 4.278832284357277e-06, "epoch": 1.616093719992318, "percentage": 32.32, "elapsed_time": "1:06:21", "remaining_time": "2:18:56", "throughput": 19953.08, "total_tokens": 79444032} +{"current_steps": 25250, "total_steps": 78105, "loss": 0.2383, "lr": 4.278439701328755e-06, "epoch": 1.616413801933295, "percentage": 32.33, "elapsed_time": "1:06:22", "remaining_time": "2:18:55", "throughput": 19953.64, "total_tokens": 79459904} +{"current_steps": 25255, "total_steps": 78105, "loss": 0.2475, "lr": 4.278047029494218e-06, "epoch": 1.616733883874272, "percentage": 32.33, "elapsed_time": "1:06:22", "remaining_time": "2:18:54", "throughput": 19954.25, "total_tokens": 79476480} +{"current_steps": 25260, "total_steps": 78105, "loss": 0.2758, "lr": 4.2776542688732734e-06, "epoch": 1.6170539658152487, "percentage": 32.34, "elapsed_time": "1:06:23", "remaining_time": "2:18:53", "throughput": 19954.83, "total_tokens": 79492864} +{"current_steps": 25265, "total_steps": 78105, "loss": 0.4661, "lr": 4.277261419485534e-06, "epoch": 1.6173740477562255, "percentage": 32.35, "elapsed_time": "1:06:24", "remaining_time": "2:18:52", "throughput": 19955.4, "total_tokens": 79509056} +{"current_steps": 25270, "total_steps": 78105, "loss": 0.1872, "lr": 4.2768684813506166e-06, "epoch": 1.6176941296972025, "percentage": 32.35, "elapsed_time": "1:06:25", "remaining_time": "2:18:52", "throughput": 19956.04, "total_tokens": 79526016} +{"current_steps": 25275, "total_steps": 78105, "loss": 0.2831, "lr": 4.276475454488143e-06, "epoch": 1.6180142116381795, "percentage": 32.36, "elapsed_time": "1:06:25", "remaining_time": "2:18:50", "throughput": 19956.53, "total_tokens": 79541248} +{"current_steps": 25280, "total_steps": 78105, "loss": 0.3685, "lr": 4.276082338917739e-06, "epoch": 1.6183342935791563, "percentage": 32.37, "elapsed_time": "1:06:26", "remaining_time": "2:18:49", "throughput": 19957.07, "total_tokens": 79556736} +{"current_steps": 25285, "total_steps": 78105, "loss": 0.3198, "lr": 4.275689134659033e-06, "epoch": 1.618654375520133, "percentage": 32.37, "elapsed_time": "1:06:27", "remaining_time": "2:18:48", "throughput": 19957.56, "total_tokens": 79571648} +{"current_steps": 25290, "total_steps": 78105, "loss": 0.2793, "lr": 4.275295841731663e-06, "epoch": 1.61897445746111, "percentage": 32.38, "elapsed_time": "1:06:27", "remaining_time": "2:18:47", "throughput": 19957.98, "total_tokens": 79586304} +{"current_steps": 25295, "total_steps": 78105, "loss": 0.2783, "lr": 4.274902460155263e-06, "epoch": 1.619294539402087, "percentage": 32.39, "elapsed_time": "1:06:28", "remaining_time": "2:18:46", "throughput": 19958.45, "total_tokens": 79601408} +{"current_steps": 25300, "total_steps": 78105, "loss": 0.3915, "lr": 4.274508989949482e-06, "epoch": 1.6196146213430638, "percentage": 32.39, "elapsed_time": "1:06:29", "remaining_time": "2:18:45", "throughput": 19958.95, "total_tokens": 79616768} +{"current_steps": 25305, "total_steps": 78105, "loss": 0.3715, "lr": 4.274115431133963e-06, "epoch": 1.6199347032840408, "percentage": 32.4, "elapsed_time": "1:06:29", "remaining_time": "2:18:44", "throughput": 19959.45, "total_tokens": 79632128} +{"current_steps": 25310, "total_steps": 78105, "loss": 0.2483, "lr": 4.273721783728362e-06, "epoch": 1.6202547852250175, "percentage": 32.41, "elapsed_time": "1:06:30", "remaining_time": "2:18:43", "throughput": 19959.87, "total_tokens": 79646592} +{"current_steps": 25315, "total_steps": 78105, "loss": 0.3975, "lr": 4.273328047752333e-06, "epoch": 1.6205748671659945, "percentage": 32.41, "elapsed_time": "1:06:31", "remaining_time": "2:18:42", "throughput": 19960.34, "total_tokens": 79662080} +{"current_steps": 25320, "total_steps": 78105, "loss": 0.2074, "lr": 4.272934223225539e-06, "epoch": 1.6208949491069715, "percentage": 32.42, "elapsed_time": "1:06:31", "remaining_time": "2:18:41", "throughput": 19960.78, "total_tokens": 79676992} +{"current_steps": 25325, "total_steps": 78105, "loss": 0.2971, "lr": 4.272540310167644e-06, "epoch": 1.6212150310479483, "percentage": 32.42, "elapsed_time": "1:06:32", "remaining_time": "2:18:40", "throughput": 19961.26, "total_tokens": 79692224} +{"current_steps": 25330, "total_steps": 78105, "loss": 0.3212, "lr": 4.272146308598319e-06, "epoch": 1.621535112988925, "percentage": 32.43, "elapsed_time": "1:06:33", "remaining_time": "2:18:39", "throughput": 19961.83, "total_tokens": 79708672} +{"current_steps": 25335, "total_steps": 78105, "loss": 0.3313, "lr": 4.271752218537239e-06, "epoch": 1.621855194929902, "percentage": 32.44, "elapsed_time": "1:06:33", "remaining_time": "2:18:38", "throughput": 19962.36, "total_tokens": 79724544} +{"current_steps": 25340, "total_steps": 78105, "loss": 0.1934, "lr": 4.27135804000408e-06, "epoch": 1.622175276870879, "percentage": 32.44, "elapsed_time": "1:06:34", "remaining_time": "2:18:37", "throughput": 19962.82, "total_tokens": 79739456} +{"current_steps": 25345, "total_steps": 78105, "loss": 0.297, "lr": 4.270963773018528e-06, "epoch": 1.6224953588118558, "percentage": 32.45, "elapsed_time": "1:06:35", "remaining_time": "2:18:36", "throughput": 19963.4, "total_tokens": 79755648} +{"current_steps": 25350, "total_steps": 78105, "loss": 0.4037, "lr": 4.270569417600271e-06, "epoch": 1.6228154407528326, "percentage": 32.46, "elapsed_time": "1:06:35", "remaining_time": "2:18:35", "throughput": 19963.94, "total_tokens": 79771456} +{"current_steps": 25355, "total_steps": 78105, "loss": 0.3551, "lr": 4.270174973768998e-06, "epoch": 1.6231355226938096, "percentage": 32.46, "elapsed_time": "1:06:36", "remaining_time": "2:18:34", "throughput": 19964.45, "total_tokens": 79786624} +{"current_steps": 25360, "total_steps": 78105, "loss": 0.2503, "lr": 4.269780441544409e-06, "epoch": 1.6234556046347866, "percentage": 32.47, "elapsed_time": "1:06:37", "remaining_time": "2:18:33", "throughput": 19964.87, "total_tokens": 79801536} +{"current_steps": 25365, "total_steps": 78105, "loss": 0.4006, "lr": 4.269385820946203e-06, "epoch": 1.6237756865757635, "percentage": 32.48, "elapsed_time": "1:06:37", "remaining_time": "2:18:32", "throughput": 19965.55, "total_tokens": 79818368} +{"current_steps": 25370, "total_steps": 78105, "loss": 0.2708, "lr": 4.268991111994084e-06, "epoch": 1.6240957685167403, "percentage": 32.48, "elapsed_time": "1:06:38", "remaining_time": "2:18:31", "throughput": 19966.14, "total_tokens": 79834240} +{"current_steps": 25375, "total_steps": 78105, "loss": 0.369, "lr": 4.268596314707764e-06, "epoch": 1.624415850457717, "percentage": 32.49, "elapsed_time": "1:06:39", "remaining_time": "2:18:30", "throughput": 19966.67, "total_tokens": 79849792} +{"current_steps": 25380, "total_steps": 78105, "loss": 0.4254, "lr": 4.268201429106957e-06, "epoch": 1.624735932398694, "percentage": 32.49, "elapsed_time": "1:06:39", "remaining_time": "2:18:29", "throughput": 19967.21, "total_tokens": 79865472} +{"current_steps": 25385, "total_steps": 78105, "loss": 0.3663, "lr": 4.26780645521138e-06, "epoch": 1.625056014339671, "percentage": 32.5, "elapsed_time": "1:06:40", "remaining_time": "2:18:28", "throughput": 19967.79, "total_tokens": 79881792} +{"current_steps": 25390, "total_steps": 78105, "loss": 0.3271, "lr": 4.267411393040757e-06, "epoch": 1.6253760962806478, "percentage": 32.51, "elapsed_time": "1:06:41", "remaining_time": "2:18:27", "throughput": 19968.41, "total_tokens": 79897920} +{"current_steps": 25395, "total_steps": 78105, "loss": 0.3354, "lr": 4.267016242614816e-06, "epoch": 1.6256961782216246, "percentage": 32.51, "elapsed_time": "1:06:41", "remaining_time": "2:18:26", "throughput": 19968.86, "total_tokens": 79912640} +{"current_steps": 25400, "total_steps": 78105, "loss": 0.2257, "lr": 4.2666210039532865e-06, "epoch": 1.6260162601626016, "percentage": 32.52, "elapsed_time": "1:06:42", "remaining_time": "2:18:25", "throughput": 19969.37, "total_tokens": 79927936} +{"current_steps": 25405, "total_steps": 78105, "loss": 0.3853, "lr": 4.266225677075907e-06, "epoch": 1.6263363421035786, "percentage": 32.53, "elapsed_time": "1:06:43", "remaining_time": "2:18:24", "throughput": 19969.9, "total_tokens": 79943744} +{"current_steps": 25410, "total_steps": 78105, "loss": 0.2559, "lr": 4.265830262002416e-06, "epoch": 1.6266564240445556, "percentage": 32.53, "elapsed_time": "1:06:43", "remaining_time": "2:18:23", "throughput": 19970.35, "total_tokens": 79958720} +{"current_steps": 25415, "total_steps": 78105, "loss": 0.3398, "lr": 4.265434758752561e-06, "epoch": 1.6269765059855323, "percentage": 32.54, "elapsed_time": "1:06:44", "remaining_time": "2:18:22", "throughput": 19970.95, "total_tokens": 79974912} +{"current_steps": 25420, "total_steps": 78105, "loss": 0.3683, "lr": 4.265039167346089e-06, "epoch": 1.627296587926509, "percentage": 32.55, "elapsed_time": "1:06:45", "remaining_time": "2:18:21", "throughput": 19971.5, "total_tokens": 79990976} +{"current_steps": 25425, "total_steps": 78105, "loss": 0.3442, "lr": 4.264643487802756e-06, "epoch": 1.627616669867486, "percentage": 32.55, "elapsed_time": "1:06:45", "remaining_time": "2:18:20", "throughput": 19972.07, "total_tokens": 80007040} +{"current_steps": 25430, "total_steps": 78105, "loss": 0.3008, "lr": 4.264247720142317e-06, "epoch": 1.627936751808463, "percentage": 32.56, "elapsed_time": "1:06:46", "remaining_time": "2:18:19", "throughput": 19972.54, "total_tokens": 80022144} +{"current_steps": 25435, "total_steps": 78105, "loss": 0.3936, "lr": 4.2638518643845375e-06, "epoch": 1.6282568337494399, "percentage": 32.57, "elapsed_time": "1:06:47", "remaining_time": "2:18:18", "throughput": 19973.12, "total_tokens": 80038272} +{"current_steps": 25440, "total_steps": 78105, "loss": 0.3108, "lr": 4.263455920549184e-06, "epoch": 1.6285769156904166, "percentage": 32.57, "elapsed_time": "1:06:47", "remaining_time": "2:18:17", "throughput": 19973.73, "total_tokens": 80054592} +{"current_steps": 25445, "total_steps": 78105, "loss": 0.3196, "lr": 4.263059888656026e-06, "epoch": 1.6288969976313936, "percentage": 32.58, "elapsed_time": "1:06:48", "remaining_time": "2:18:16", "throughput": 19974.23, "total_tokens": 80069824} +{"current_steps": 25450, "total_steps": 78105, "loss": 0.5387, "lr": 4.262663768724841e-06, "epoch": 1.6292170795723706, "percentage": 32.58, "elapsed_time": "1:06:49", "remaining_time": "2:18:15", "throughput": 19974.77, "total_tokens": 80085568} +{"current_steps": 25455, "total_steps": 78105, "loss": 0.3174, "lr": 4.26226756077541e-06, "epoch": 1.6295371615133474, "percentage": 32.59, "elapsed_time": "1:06:49", "remaining_time": "2:18:14", "throughput": 19975.21, "total_tokens": 80100096} +{"current_steps": 25460, "total_steps": 78105, "loss": 0.3078, "lr": 4.261871264827515e-06, "epoch": 1.6298572434543241, "percentage": 32.6, "elapsed_time": "1:06:50", "remaining_time": "2:18:13", "throughput": 19975.74, "total_tokens": 80115712} +{"current_steps": 25465, "total_steps": 78105, "loss": 0.436, "lr": 4.2614748809009476e-06, "epoch": 1.6301773253953011, "percentage": 32.6, "elapsed_time": "1:06:51", "remaining_time": "2:18:11", "throughput": 19976.31, "total_tokens": 80131264} +{"current_steps": 25470, "total_steps": 78105, "loss": 0.4129, "lr": 4.261078409015499e-06, "epoch": 1.6304974073362781, "percentage": 32.61, "elapsed_time": "1:06:51", "remaining_time": "2:18:10", "throughput": 19976.79, "total_tokens": 80146432} +{"current_steps": 25475, "total_steps": 78105, "loss": 0.3428, "lr": 4.260681849190967e-06, "epoch": 1.6308174892772551, "percentage": 32.62, "elapsed_time": "1:06:52", "remaining_time": "2:18:09", "throughput": 19977.34, "total_tokens": 80162112} +{"current_steps": 25480, "total_steps": 78105, "loss": 0.4165, "lr": 4.260285201447156e-06, "epoch": 1.6311375712182319, "percentage": 32.62, "elapsed_time": "1:06:53", "remaining_time": "2:18:08", "throughput": 19977.85, "total_tokens": 80177472} +{"current_steps": 25485, "total_steps": 78105, "loss": 0.3656, "lr": 4.25988846580387e-06, "epoch": 1.6314576531592087, "percentage": 32.63, "elapsed_time": "1:06:54", "remaining_time": "2:18:07", "throughput": 19978.47, "total_tokens": 80193984} +{"current_steps": 25490, "total_steps": 78105, "loss": 0.2376, "lr": 4.2594916422809214e-06, "epoch": 1.6317777351001856, "percentage": 32.64, "elapsed_time": "1:06:54", "remaining_time": "2:18:06", "throughput": 19978.99, "total_tokens": 80209344} +{"current_steps": 25495, "total_steps": 78105, "loss": 0.2847, "lr": 4.259094730898125e-06, "epoch": 1.6320978170411626, "percentage": 32.64, "elapsed_time": "1:06:55", "remaining_time": "2:18:05", "throughput": 19979.46, "total_tokens": 80224448} +{"current_steps": 25500, "total_steps": 78105, "loss": 0.4157, "lr": 4.2586977316753e-06, "epoch": 1.6324178989821394, "percentage": 32.65, "elapsed_time": "1:06:56", "remaining_time": "2:18:04", "throughput": 19980.0, "total_tokens": 80240064} +{"current_steps": 25505, "total_steps": 78105, "loss": 0.3217, "lr": 4.258300644632272e-06, "epoch": 1.6327379809231162, "percentage": 32.65, "elapsed_time": "1:06:56", "remaining_time": "2:18:03", "throughput": 19980.49, "total_tokens": 80255104} +{"current_steps": 25510, "total_steps": 78105, "loss": 0.2862, "lr": 4.257903469788867e-06, "epoch": 1.6330580628640932, "percentage": 32.66, "elapsed_time": "1:06:57", "remaining_time": "2:18:02", "throughput": 19981.05, "total_tokens": 80270912} +{"current_steps": 25515, "total_steps": 78105, "loss": 0.2993, "lr": 4.257506207164921e-06, "epoch": 1.6333781448050702, "percentage": 32.67, "elapsed_time": "1:06:58", "remaining_time": "2:18:01", "throughput": 19981.51, "total_tokens": 80285760} +{"current_steps": 25520, "total_steps": 78105, "loss": 0.4181, "lr": 4.257108856780268e-06, "epoch": 1.6336982267460471, "percentage": 32.67, "elapsed_time": "1:06:58", "remaining_time": "2:18:00", "throughput": 19982.07, "total_tokens": 80301376} +{"current_steps": 25525, "total_steps": 78105, "loss": 0.3956, "lr": 4.256711418654753e-06, "epoch": 1.634018308687024, "percentage": 32.68, "elapsed_time": "1:06:59", "remaining_time": "2:17:59", "throughput": 19982.6, "total_tokens": 80316992} +{"current_steps": 25530, "total_steps": 78105, "loss": 0.247, "lr": 4.256313892808219e-06, "epoch": 1.6343383906280007, "percentage": 32.69, "elapsed_time": "1:06:59", "remaining_time": "2:17:58", "throughput": 19983.07, "total_tokens": 80331904} +{"current_steps": 25535, "total_steps": 78105, "loss": 0.2341, "lr": 4.255916279260517e-06, "epoch": 1.6346584725689777, "percentage": 32.69, "elapsed_time": "1:07:00", "remaining_time": "2:17:57", "throughput": 19983.55, "total_tokens": 80346944} +{"current_steps": 25540, "total_steps": 78105, "loss": 0.2443, "lr": 4.255518578031503e-06, "epoch": 1.6349785545099547, "percentage": 32.7, "elapsed_time": "1:07:01", "remaining_time": "2:17:56", "throughput": 19984.17, "total_tokens": 80363648} +{"current_steps": 25545, "total_steps": 78105, "loss": 0.303, "lr": 4.255120789141035e-06, "epoch": 1.6352986364509314, "percentage": 32.71, "elapsed_time": "1:07:02", "remaining_time": "2:17:55", "throughput": 19984.6, "total_tokens": 80378496} +{"current_steps": 25550, "total_steps": 78105, "loss": 0.3314, "lr": 4.254722912608977e-06, "epoch": 1.6356187183919082, "percentage": 32.71, "elapsed_time": "1:07:02", "remaining_time": "2:17:54", "throughput": 19985.12, "total_tokens": 80394112} +{"current_steps": 25555, "total_steps": 78105, "loss": 0.4088, "lr": 4.2543249484551976e-06, "epoch": 1.6359388003328852, "percentage": 32.72, "elapsed_time": "1:07:03", "remaining_time": "2:17:53", "throughput": 19985.69, "total_tokens": 80409984} +{"current_steps": 25560, "total_steps": 78105, "loss": 0.3809, "lr": 4.253926896699566e-06, "epoch": 1.6362588822738622, "percentage": 32.73, "elapsed_time": "1:07:04", "remaining_time": "2:17:52", "throughput": 19986.26, "total_tokens": 80426048} +{"current_steps": 25565, "total_steps": 78105, "loss": 0.3026, "lr": 4.2535287573619635e-06, "epoch": 1.636578964214839, "percentage": 32.73, "elapsed_time": "1:07:04", "remaining_time": "2:17:51", "throughput": 19986.81, "total_tokens": 80442112} +{"current_steps": 25570, "total_steps": 78105, "loss": 0.2828, "lr": 4.253130530462267e-06, "epoch": 1.636899046155816, "percentage": 32.74, "elapsed_time": "1:07:05", "remaining_time": "2:17:50", "throughput": 19987.29, "total_tokens": 80457216} +{"current_steps": 25575, "total_steps": 78105, "loss": 0.3377, "lr": 4.252732216020364e-06, "epoch": 1.6372191280967927, "percentage": 32.74, "elapsed_time": "1:07:06", "remaining_time": "2:17:49", "throughput": 19987.89, "total_tokens": 80473280} +{"current_steps": 25580, "total_steps": 78105, "loss": 0.4095, "lr": 4.252333814056145e-06, "epoch": 1.6375392100377697, "percentage": 32.75, "elapsed_time": "1:07:06", "remaining_time": "2:17:48", "throughput": 19988.36, "total_tokens": 80488320} +{"current_steps": 25585, "total_steps": 78105, "loss": 0.3433, "lr": 4.251935324589502e-06, "epoch": 1.6378592919787467, "percentage": 32.76, "elapsed_time": "1:07:07", "remaining_time": "2:17:47", "throughput": 19988.95, "total_tokens": 80504640} +{"current_steps": 25590, "total_steps": 78105, "loss": 0.4683, "lr": 4.2515367476403335e-06, "epoch": 1.6381793739197235, "percentage": 32.76, "elapsed_time": "1:07:08", "remaining_time": "2:17:46", "throughput": 19989.37, "total_tokens": 80519232} +{"current_steps": 25595, "total_steps": 78105, "loss": 0.2173, "lr": 4.251138083228544e-06, "epoch": 1.6384994558607002, "percentage": 32.77, "elapsed_time": "1:07:08", "remaining_time": "2:17:45", "throughput": 19989.87, "total_tokens": 80534720} +{"current_steps": 25600, "total_steps": 78105, "loss": 0.4885, "lr": 4.25073933137404e-06, "epoch": 1.6388195378016772, "percentage": 32.78, "elapsed_time": "1:07:09", "remaining_time": "2:17:44", "throughput": 19990.49, "total_tokens": 80551040} +{"current_steps": 25605, "total_steps": 78105, "loss": 0.2751, "lr": 4.2503404920967326e-06, "epoch": 1.6391396197426542, "percentage": 32.78, "elapsed_time": "1:07:10", "remaining_time": "2:17:43", "throughput": 19991.06, "total_tokens": 80566848} +{"current_steps": 25610, "total_steps": 78105, "loss": 0.3141, "lr": 4.249941565416539e-06, "epoch": 1.639459701683631, "percentage": 32.79, "elapsed_time": "1:07:10", "remaining_time": "2:17:42", "throughput": 19991.65, "total_tokens": 80583040} +{"current_steps": 25615, "total_steps": 78105, "loss": 0.3434, "lr": 4.249542551353378e-06, "epoch": 1.6397797836246077, "percentage": 32.8, "elapsed_time": "1:07:11", "remaining_time": "2:17:41", "throughput": 19992.15, "total_tokens": 80598144} +{"current_steps": 25620, "total_steps": 78105, "loss": 0.2803, "lr": 4.249143449927176e-06, "epoch": 1.6400998655655847, "percentage": 32.8, "elapsed_time": "1:07:12", "remaining_time": "2:17:40", "throughput": 19992.7, "total_tokens": 80614080} +{"current_steps": 25625, "total_steps": 78105, "loss": 0.4398, "lr": 4.24874426115786e-06, "epoch": 1.6404199475065617, "percentage": 32.81, "elapsed_time": "1:07:12", "remaining_time": "2:17:39", "throughput": 19993.28, "total_tokens": 80630144} +{"current_steps": 25630, "total_steps": 78105, "loss": 0.2719, "lr": 4.248344985065364e-06, "epoch": 1.6407400294475387, "percentage": 32.81, "elapsed_time": "1:07:13", "remaining_time": "2:17:38", "throughput": 19993.77, "total_tokens": 80645568} +{"current_steps": 25635, "total_steps": 78105, "loss": 0.3412, "lr": 4.247945621669628e-06, "epoch": 1.6410601113885155, "percentage": 32.82, "elapsed_time": "1:07:14", "remaining_time": "2:17:37", "throughput": 19994.58, "total_tokens": 80663872} +{"current_steps": 25640, "total_steps": 78105, "loss": 0.3643, "lr": 4.2475461709905915e-06, "epoch": 1.6413801933294923, "percentage": 32.83, "elapsed_time": "1:07:14", "remaining_time": "2:17:36", "throughput": 19995.13, "total_tokens": 80679872} +{"current_steps": 25645, "total_steps": 78105, "loss": 0.3802, "lr": 4.247146633048202e-06, "epoch": 1.6417002752704692, "percentage": 32.83, "elapsed_time": "1:07:15", "remaining_time": "2:17:35", "throughput": 19995.66, "total_tokens": 80695488} +{"current_steps": 25650, "total_steps": 78105, "loss": 0.2717, "lr": 4.24674700786241e-06, "epoch": 1.6420203572114462, "percentage": 32.84, "elapsed_time": "1:07:16", "remaining_time": "2:17:34", "throughput": 19996.24, "total_tokens": 80711360} +{"current_steps": 25655, "total_steps": 78105, "loss": 0.3598, "lr": 4.246347295453171e-06, "epoch": 1.642340439152423, "percentage": 32.85, "elapsed_time": "1:07:17", "remaining_time": "2:17:33", "throughput": 19996.79, "total_tokens": 80727808} +{"current_steps": 25660, "total_steps": 78105, "loss": 0.2513, "lr": 4.245947495840444e-06, "epoch": 1.6426605210933998, "percentage": 32.85, "elapsed_time": "1:07:17", "remaining_time": "2:17:32", "throughput": 19997.44, "total_tokens": 80744512} +{"current_steps": 25665, "total_steps": 78105, "loss": 0.408, "lr": 4.245547609044194e-06, "epoch": 1.6429806030343768, "percentage": 32.86, "elapsed_time": "1:07:18", "remaining_time": "2:17:31", "throughput": 19998.08, "total_tokens": 80761280} +{"current_steps": 25670, "total_steps": 78105, "loss": 0.4443, "lr": 4.2451476350843885e-06, "epoch": 1.6433006849753538, "percentage": 32.87, "elapsed_time": "1:07:19", "remaining_time": "2:17:30", "throughput": 19998.61, "total_tokens": 80776320} +{"current_steps": 25675, "total_steps": 78105, "loss": 0.3911, "lr": 4.244747573981e-06, "epoch": 1.6436207669163307, "percentage": 32.87, "elapsed_time": "1:07:19", "remaining_time": "2:17:29", "throughput": 19999.18, "total_tokens": 80792256} +{"current_steps": 25680, "total_steps": 78105, "loss": 0.3401, "lr": 4.2443474257540064e-06, "epoch": 1.6439408488573075, "percentage": 32.88, "elapsed_time": "1:07:20", "remaining_time": "2:17:28", "throughput": 19999.67, "total_tokens": 80807744} +{"current_steps": 25685, "total_steps": 78105, "loss": 0.2824, "lr": 4.243947190423387e-06, "epoch": 1.6442609307982843, "percentage": 32.89, "elapsed_time": "1:07:21", "remaining_time": "2:17:27", "throughput": 20000.21, "total_tokens": 80823360} +{"current_steps": 25690, "total_steps": 78105, "loss": 0.4377, "lr": 4.2435468680091305e-06, "epoch": 1.6445810127392613, "percentage": 32.89, "elapsed_time": "1:07:21", "remaining_time": "2:17:26", "throughput": 20000.82, "total_tokens": 80839424} +{"current_steps": 25695, "total_steps": 78105, "loss": 0.4255, "lr": 4.243146458531224e-06, "epoch": 1.6449010946802383, "percentage": 32.9, "elapsed_time": "1:07:22", "remaining_time": "2:17:25", "throughput": 20001.37, "total_tokens": 80855232} +{"current_steps": 25700, "total_steps": 78105, "loss": 0.3378, "lr": 4.2427459620096635e-06, "epoch": 1.645221176621215, "percentage": 32.9, "elapsed_time": "1:07:23", "remaining_time": "2:17:24", "throughput": 20001.88, "total_tokens": 80870592} +{"current_steps": 25705, "total_steps": 78105, "loss": 0.3124, "lr": 4.242345378464448e-06, "epoch": 1.6455412585621918, "percentage": 32.91, "elapsed_time": "1:07:23", "remaining_time": "2:17:23", "throughput": 20002.26, "total_tokens": 80884544} +{"current_steps": 25710, "total_steps": 78105, "loss": 0.4361, "lr": 4.241944707915579e-06, "epoch": 1.6458613405031688, "percentage": 32.92, "elapsed_time": "1:07:24", "remaining_time": "2:17:22", "throughput": 20002.84, "total_tokens": 80900416} +{"current_steps": 25715, "total_steps": 78105, "loss": 0.3588, "lr": 4.241543950383063e-06, "epoch": 1.6461814224441458, "percentage": 32.92, "elapsed_time": "1:07:25", "remaining_time": "2:17:21", "throughput": 20003.34, "total_tokens": 80915648} +{"current_steps": 25720, "total_steps": 78105, "loss": 0.2621, "lr": 4.241143105886916e-06, "epoch": 1.6465015043851225, "percentage": 32.93, "elapsed_time": "1:07:25", "remaining_time": "2:17:20", "throughput": 20003.84, "total_tokens": 80930944} +{"current_steps": 25725, "total_steps": 78105, "loss": 0.4127, "lr": 4.240742174447151e-06, "epoch": 1.6468215863260993, "percentage": 32.94, "elapsed_time": "1:07:26", "remaining_time": "2:17:19", "throughput": 20004.29, "total_tokens": 80945600} +{"current_steps": 25730, "total_steps": 78105, "loss": 0.3735, "lr": 4.240341156083789e-06, "epoch": 1.6471416682670763, "percentage": 32.94, "elapsed_time": "1:07:27", "remaining_time": "2:17:18", "throughput": 20004.88, "total_tokens": 80961856} +{"current_steps": 25735, "total_steps": 78105, "loss": 0.2615, "lr": 4.239940050816854e-06, "epoch": 1.6474617502080533, "percentage": 32.95, "elapsed_time": "1:07:27", "remaining_time": "2:17:17", "throughput": 20005.32, "total_tokens": 80976640} +{"current_steps": 25740, "total_steps": 78105, "loss": 0.3408, "lr": 4.239538858666377e-06, "epoch": 1.6477818321490303, "percentage": 32.96, "elapsed_time": "1:07:28", "remaining_time": "2:17:16", "throughput": 20005.79, "total_tokens": 80991552} +{"current_steps": 25745, "total_steps": 78105, "loss": 0.2641, "lr": 4.23913757965239e-06, "epoch": 1.648101914090007, "percentage": 32.96, "elapsed_time": "1:07:29", "remaining_time": "2:17:14", "throughput": 20006.35, "total_tokens": 81007168} +{"current_steps": 25750, "total_steps": 78105, "loss": 0.4058, "lr": 4.238736213794931e-06, "epoch": 1.6484219960309838, "percentage": 32.97, "elapsed_time": "1:07:29", "remaining_time": "2:17:13", "throughput": 20006.85, "total_tokens": 81022400} +{"current_steps": 25755, "total_steps": 78105, "loss": 0.2596, "lr": 4.238334761114042e-06, "epoch": 1.6487420779719608, "percentage": 32.97, "elapsed_time": "1:07:30", "remaining_time": "2:17:12", "throughput": 20007.37, "total_tokens": 81037888} +{"current_steps": 25760, "total_steps": 78105, "loss": 0.347, "lr": 4.23793322162977e-06, "epoch": 1.6490621599129378, "percentage": 32.98, "elapsed_time": "1:07:31", "remaining_time": "2:17:11", "throughput": 20007.76, "total_tokens": 81052352} +{"current_steps": 25765, "total_steps": 78105, "loss": 0.4247, "lr": 4.237531595362165e-06, "epoch": 1.6493822418539146, "percentage": 32.99, "elapsed_time": "1:07:31", "remaining_time": "2:17:10", "throughput": 20008.32, "total_tokens": 81068032} +{"current_steps": 25770, "total_steps": 78105, "loss": 0.407, "lr": 4.237129882331283e-06, "epoch": 1.6497023237948913, "percentage": 32.99, "elapsed_time": "1:07:32", "remaining_time": "2:17:09", "throughput": 20008.82, "total_tokens": 81083328} +{"current_steps": 25775, "total_steps": 78105, "loss": 0.2753, "lr": 4.236728082557183e-06, "epoch": 1.6500224057358683, "percentage": 33.0, "elapsed_time": "1:07:33", "remaining_time": "2:17:08", "throughput": 20009.43, "total_tokens": 81099712} +{"current_steps": 25780, "total_steps": 78105, "loss": 0.404, "lr": 4.236326196059929e-06, "epoch": 1.6503424876768453, "percentage": 33.01, "elapsed_time": "1:07:33", "remaining_time": "2:17:07", "throughput": 20010.02, "total_tokens": 81115968} +{"current_steps": 25785, "total_steps": 78105, "loss": 0.4052, "lr": 4.235924222859589e-06, "epoch": 1.6506625696178223, "percentage": 33.01, "elapsed_time": "1:07:34", "remaining_time": "2:17:06", "throughput": 20010.51, "total_tokens": 81130880} +{"current_steps": 25790, "total_steps": 78105, "loss": 0.3187, "lr": 4.235522162976234e-06, "epoch": 1.650982651558799, "percentage": 33.02, "elapsed_time": "1:07:35", "remaining_time": "2:17:05", "throughput": 20011.0, "total_tokens": 81146176} +{"current_steps": 25795, "total_steps": 78105, "loss": 0.2374, "lr": 4.235120016429945e-06, "epoch": 1.6513027334997759, "percentage": 33.03, "elapsed_time": "1:07:35", "remaining_time": "2:17:04", "throughput": 20011.51, "total_tokens": 81161472} +{"current_steps": 25800, "total_steps": 78105, "loss": 0.3276, "lr": 4.234717783240798e-06, "epoch": 1.6516228154407528, "percentage": 33.03, "elapsed_time": "1:07:36", "remaining_time": "2:17:03", "throughput": 20012.03, "total_tokens": 81177152} +{"current_steps": 25805, "total_steps": 78105, "loss": 0.2702, "lr": 4.234315463428881e-06, "epoch": 1.6519428973817298, "percentage": 33.04, "elapsed_time": "1:07:37", "remaining_time": "2:17:02", "throughput": 20012.76, "total_tokens": 81194688} +{"current_steps": 25810, "total_steps": 78105, "loss": 0.2992, "lr": 4.233913057014284e-06, "epoch": 1.6522629793227066, "percentage": 33.05, "elapsed_time": "1:07:37", "remaining_time": "2:17:01", "throughput": 20013.35, "total_tokens": 81210944} +{"current_steps": 25815, "total_steps": 78105, "loss": 0.3213, "lr": 4.233510564017101e-06, "epoch": 1.6525830612636834, "percentage": 33.05, "elapsed_time": "1:07:38", "remaining_time": "2:17:00", "throughput": 20013.9, "total_tokens": 81226624} +{"current_steps": 25820, "total_steps": 78105, "loss": 0.3132, "lr": 4.2331079844574295e-06, "epoch": 1.6529031432046604, "percentage": 33.06, "elapsed_time": "1:07:39", "remaining_time": "2:16:59", "throughput": 20014.59, "total_tokens": 81244224} +{"current_steps": 25825, "total_steps": 78105, "loss": 0.2685, "lr": 4.232705318355372e-06, "epoch": 1.6532232251456374, "percentage": 33.06, "elapsed_time": "1:07:39", "remaining_time": "2:16:58", "throughput": 20015.08, "total_tokens": 81259072} +{"current_steps": 25830, "total_steps": 78105, "loss": 0.241, "lr": 4.232302565731037e-06, "epoch": 1.6535433070866141, "percentage": 33.07, "elapsed_time": "1:07:40", "remaining_time": "2:16:57", "throughput": 20015.56, "total_tokens": 81273856} +{"current_steps": 25835, "total_steps": 78105, "loss": 0.39, "lr": 4.231899726604534e-06, "epoch": 1.6538633890275911, "percentage": 33.08, "elapsed_time": "1:07:41", "remaining_time": "2:16:56", "throughput": 20016.01, "total_tokens": 81288896} +{"current_steps": 25840, "total_steps": 78105, "loss": 0.3298, "lr": 4.2314968009959815e-06, "epoch": 1.6541834709685679, "percentage": 33.08, "elapsed_time": "1:07:41", "remaining_time": "2:16:55", "throughput": 20016.55, "total_tokens": 81304448} +{"current_steps": 25845, "total_steps": 78105, "loss": 0.3232, "lr": 4.231093788925497e-06, "epoch": 1.6545035529095449, "percentage": 33.09, "elapsed_time": "1:07:42", "remaining_time": "2:16:54", "throughput": 20017.11, "total_tokens": 81320576} +{"current_steps": 25850, "total_steps": 78105, "loss": 0.4215, "lr": 4.2306906904132054e-06, "epoch": 1.6548236348505219, "percentage": 33.1, "elapsed_time": "1:07:43", "remaining_time": "2:16:53", "throughput": 20017.69, "total_tokens": 81337152} +{"current_steps": 25855, "total_steps": 78105, "loss": 0.3967, "lr": 4.230287505479236e-06, "epoch": 1.6551437167914986, "percentage": 33.1, "elapsed_time": "1:07:43", "remaining_time": "2:16:52", "throughput": 20018.24, "total_tokens": 81352896} +{"current_steps": 25860, "total_steps": 78105, "loss": 0.3677, "lr": 4.2298842341437204e-06, "epoch": 1.6554637987324754, "percentage": 33.11, "elapsed_time": "1:07:44", "remaining_time": "2:16:51", "throughput": 20018.65, "total_tokens": 81367488} +{"current_steps": 25865, "total_steps": 78105, "loss": 0.3216, "lr": 4.229480876426798e-06, "epoch": 1.6557838806734524, "percentage": 33.12, "elapsed_time": "1:07:45", "remaining_time": "2:16:50", "throughput": 20019.11, "total_tokens": 81382528} +{"current_steps": 25870, "total_steps": 78105, "loss": 0.325, "lr": 4.2290774323486084e-06, "epoch": 1.6561039626144294, "percentage": 33.12, "elapsed_time": "1:07:45", "remaining_time": "2:16:49", "throughput": 20019.64, "total_tokens": 81398272} +{"current_steps": 25875, "total_steps": 78105, "loss": 0.2839, "lr": 4.228673901929298e-06, "epoch": 1.6564240445554061, "percentage": 33.13, "elapsed_time": "1:07:46", "remaining_time": "2:16:48", "throughput": 20020.01, "total_tokens": 81412544} +{"current_steps": 25880, "total_steps": 78105, "loss": 0.2743, "lr": 4.228270285189019e-06, "epoch": 1.656744126496383, "percentage": 33.13, "elapsed_time": "1:07:47", "remaining_time": "2:16:47", "throughput": 20020.49, "total_tokens": 81427968} +{"current_steps": 25885, "total_steps": 78105, "loss": 0.4283, "lr": 4.227866582147922e-06, "epoch": 1.65706420843736, "percentage": 33.14, "elapsed_time": "1:07:47", "remaining_time": "2:16:46", "throughput": 20020.87, "total_tokens": 81442368} +{"current_steps": 25890, "total_steps": 78105, "loss": 0.3572, "lr": 4.22746279282617e-06, "epoch": 1.657384290378337, "percentage": 33.15, "elapsed_time": "1:07:48", "remaining_time": "2:16:45", "throughput": 20021.34, "total_tokens": 81457280} +{"current_steps": 25895, "total_steps": 78105, "loss": 0.5254, "lr": 4.227058917243922e-06, "epoch": 1.657704372319314, "percentage": 33.15, "elapsed_time": "1:07:49", "remaining_time": "2:16:44", "throughput": 20021.89, "total_tokens": 81473280} +{"current_steps": 25900, "total_steps": 78105, "loss": 0.3787, "lr": 4.226654955421349e-06, "epoch": 1.6580244542602907, "percentage": 33.16, "elapsed_time": "1:07:49", "remaining_time": "2:16:43", "throughput": 20022.51, "total_tokens": 81489600} +{"current_steps": 25905, "total_steps": 78105, "loss": 0.4178, "lr": 4.226250907378622e-06, "epoch": 1.6583445362012674, "percentage": 33.17, "elapsed_time": "1:07:50", "remaining_time": "2:16:42", "throughput": 20023.1, "total_tokens": 81505792} +{"current_steps": 25910, "total_steps": 78105, "loss": 0.3319, "lr": 4.2258467731359145e-06, "epoch": 1.6586646181422444, "percentage": 33.17, "elapsed_time": "1:07:51", "remaining_time": "2:16:41", "throughput": 20023.65, "total_tokens": 81521536} +{"current_steps": 25915, "total_steps": 78105, "loss": 0.3064, "lr": 4.2254425527134105e-06, "epoch": 1.6589847000832214, "percentage": 33.18, "elapsed_time": "1:07:51", "remaining_time": "2:16:40", "throughput": 20024.17, "total_tokens": 81537024} +{"current_steps": 25920, "total_steps": 78105, "loss": 0.4137, "lr": 4.225038246131292e-06, "epoch": 1.6593047820241982, "percentage": 33.19, "elapsed_time": "1:07:52", "remaining_time": "2:16:39", "throughput": 20024.7, "total_tokens": 81552448} +{"current_steps": 25925, "total_steps": 78105, "loss": 0.2729, "lr": 4.224633853409749e-06, "epoch": 1.659624863965175, "percentage": 33.19, "elapsed_time": "1:07:53", "remaining_time": "2:16:38", "throughput": 20025.2, "total_tokens": 81567808} +{"current_steps": 25930, "total_steps": 78105, "loss": 0.4086, "lr": 4.224229374568974e-06, "epoch": 1.659944945906152, "percentage": 33.2, "elapsed_time": "1:07:53", "remaining_time": "2:16:37", "throughput": 20025.81, "total_tokens": 81584320} +{"current_steps": 25935, "total_steps": 78105, "loss": 0.3573, "lr": 4.2238248096291666e-06, "epoch": 1.660265027847129, "percentage": 33.21, "elapsed_time": "1:07:54", "remaining_time": "2:16:36", "throughput": 20026.26, "total_tokens": 81599168} +{"current_steps": 25940, "total_steps": 78105, "loss": 0.2321, "lr": 4.223420158610526e-06, "epoch": 1.660585109788106, "percentage": 33.21, "elapsed_time": "1:07:55", "remaining_time": "2:16:35", "throughput": 20026.78, "total_tokens": 81614720} +{"current_steps": 25945, "total_steps": 78105, "loss": 0.4207, "lr": 4.223015421533261e-06, "epoch": 1.6609051917290827, "percentage": 33.22, "elapsed_time": "1:07:55", "remaining_time": "2:16:34", "throughput": 20027.25, "total_tokens": 81629888} +{"current_steps": 25950, "total_steps": 78105, "loss": 0.526, "lr": 4.222610598417579e-06, "epoch": 1.6612252736700595, "percentage": 33.22, "elapsed_time": "1:07:56", "remaining_time": "2:16:33", "throughput": 20027.73, "total_tokens": 81645184} +{"current_steps": 25955, "total_steps": 78105, "loss": 0.289, "lr": 4.222205689283698e-06, "epoch": 1.6615453556110364, "percentage": 33.23, "elapsed_time": "1:07:57", "remaining_time": "2:16:32", "throughput": 20028.26, "total_tokens": 81660928} +{"current_steps": 25960, "total_steps": 78105, "loss": 0.3843, "lr": 4.221800694151835e-06, "epoch": 1.6618654375520134, "percentage": 33.24, "elapsed_time": "1:07:57", "remaining_time": "2:16:31", "throughput": 20028.88, "total_tokens": 81677312} +{"current_steps": 25965, "total_steps": 78105, "loss": 0.4712, "lr": 4.221395613042214e-06, "epoch": 1.6621855194929902, "percentage": 33.24, "elapsed_time": "1:07:58", "remaining_time": "2:16:30", "throughput": 20029.41, "total_tokens": 81692928} +{"current_steps": 25970, "total_steps": 78105, "loss": 0.3219, "lr": 4.220990445975062e-06, "epoch": 1.662505601433967, "percentage": 33.25, "elapsed_time": "1:07:59", "remaining_time": "2:16:29", "throughput": 20029.96, "total_tokens": 81708800} +{"current_steps": 25975, "total_steps": 78105, "loss": 0.3933, "lr": 4.220585192970611e-06, "epoch": 1.662825683374944, "percentage": 33.26, "elapsed_time": "1:07:59", "remaining_time": "2:16:28", "throughput": 20030.46, "total_tokens": 81724224} +{"current_steps": 25980, "total_steps": 78105, "loss": 0.4215, "lr": 4.220179854049099e-06, "epoch": 1.663145765315921, "percentage": 33.26, "elapsed_time": "1:08:00", "remaining_time": "2:16:27", "throughput": 20031.04, "total_tokens": 81740160} +{"current_steps": 25985, "total_steps": 78105, "loss": 0.3602, "lr": 4.219774429230765e-06, "epoch": 1.6634658472568977, "percentage": 33.27, "elapsed_time": "1:08:01", "remaining_time": "2:16:26", "throughput": 20031.45, "total_tokens": 81754560} +{"current_steps": 25990, "total_steps": 78105, "loss": 0.3129, "lr": 4.219368918535853e-06, "epoch": 1.6637859291978745, "percentage": 33.28, "elapsed_time": "1:08:01", "remaining_time": "2:16:25", "throughput": 20031.94, "total_tokens": 81769856} +{"current_steps": 25995, "total_steps": 78105, "loss": 0.2341, "lr": 4.218963321984614e-06, "epoch": 1.6641060111388515, "percentage": 33.28, "elapsed_time": "1:08:02", "remaining_time": "2:16:24", "throughput": 20032.48, "total_tokens": 81785344} +{"current_steps": 26000, "total_steps": 78105, "loss": 0.4718, "lr": 4.2185576395973e-06, "epoch": 1.6644260930798285, "percentage": 33.29, "elapsed_time": "1:08:03", "remaining_time": "2:16:23", "throughput": 20033.1, "total_tokens": 81801920} +{"current_steps": 26005, "total_steps": 78105, "loss": 0.2502, "lr": 4.2181518713941694e-06, "epoch": 1.6647461750208055, "percentage": 33.29, "elapsed_time": "1:08:04", "remaining_time": "2:16:22", "throughput": 20033.7, "total_tokens": 81818368} +{"current_steps": 26010, "total_steps": 78105, "loss": 0.3281, "lr": 4.217746017395483e-06, "epoch": 1.6650662569617822, "percentage": 33.3, "elapsed_time": "1:08:04", "remaining_time": "2:16:21", "throughput": 20034.17, "total_tokens": 81833728} +{"current_steps": 26015, "total_steps": 78105, "loss": 0.2023, "lr": 4.217340077621509e-06, "epoch": 1.665386338902759, "percentage": 33.31, "elapsed_time": "1:08:05", "remaining_time": "2:16:20", "throughput": 20034.67, "total_tokens": 81848960} +{"current_steps": 26020, "total_steps": 78105, "loss": 0.348, "lr": 4.216934052092516e-06, "epoch": 1.665706420843736, "percentage": 33.31, "elapsed_time": "1:08:06", "remaining_time": "2:16:19", "throughput": 20035.24, "total_tokens": 81865280} +{"current_steps": 26025, "total_steps": 78105, "loss": 0.3051, "lr": 4.2165279408287806e-06, "epoch": 1.666026502784713, "percentage": 33.32, "elapsed_time": "1:08:06", "remaining_time": "2:16:18", "throughput": 20035.78, "total_tokens": 81880896} +{"current_steps": 26030, "total_steps": 78105, "loss": 0.3356, "lr": 4.2161217438505794e-06, "epoch": 1.6663465847256898, "percentage": 33.33, "elapsed_time": "1:08:07", "remaining_time": "2:16:17", "throughput": 20036.3, "total_tokens": 81896320} +{"current_steps": 26035, "total_steps": 78105, "loss": 0.3459, "lr": 4.215715461178199e-06, "epoch": 1.6666666666666665, "percentage": 33.33, "elapsed_time": "1:08:08", "remaining_time": "2:16:16", "throughput": 20036.84, "total_tokens": 81912320} +{"current_steps": 26040, "total_steps": 78105, "loss": 0.2861, "lr": 4.215309092831925e-06, "epoch": 1.6669867486076435, "percentage": 33.34, "elapsed_time": "1:08:08", "remaining_time": "2:16:15", "throughput": 20037.4, "total_tokens": 81928448} +{"current_steps": 26045, "total_steps": 78105, "loss": 0.3841, "lr": 4.214902638832049e-06, "epoch": 1.6673068305486205, "percentage": 33.35, "elapsed_time": "1:08:09", "remaining_time": "2:16:14", "throughput": 20037.91, "total_tokens": 81944000} +{"current_steps": 26050, "total_steps": 78105, "loss": 0.277, "lr": 4.214496099198868e-06, "epoch": 1.6676269124895975, "percentage": 33.35, "elapsed_time": "1:08:10", "remaining_time": "2:16:13", "throughput": 20038.55, "total_tokens": 81961216} +{"current_steps": 26055, "total_steps": 78105, "loss": 0.3328, "lr": 4.214089473952682e-06, "epoch": 1.6679469944305743, "percentage": 33.36, "elapsed_time": "1:08:10", "remaining_time": "2:16:12", "throughput": 20039.07, "total_tokens": 81977024} +{"current_steps": 26060, "total_steps": 78105, "loss": 0.2846, "lr": 4.213682763113796e-06, "epoch": 1.668267076371551, "percentage": 33.37, "elapsed_time": "1:08:11", "remaining_time": "2:16:11", "throughput": 20039.61, "total_tokens": 81992768} +{"current_steps": 26065, "total_steps": 78105, "loss": 0.2839, "lr": 4.213275966702519e-06, "epoch": 1.668587158312528, "percentage": 33.37, "elapsed_time": "1:08:12", "remaining_time": "2:16:10", "throughput": 20040.28, "total_tokens": 82009856} +{"current_steps": 26070, "total_steps": 78105, "loss": 0.3912, "lr": 4.212869084739165e-06, "epoch": 1.668907240253505, "percentage": 33.38, "elapsed_time": "1:08:12", "remaining_time": "2:16:09", "throughput": 20040.73, "total_tokens": 82024960} +{"current_steps": 26075, "total_steps": 78105, "loss": 0.3148, "lr": 4.21246211724405e-06, "epoch": 1.6692273221944818, "percentage": 33.38, "elapsed_time": "1:08:13", "remaining_time": "2:16:08", "throughput": 20041.27, "total_tokens": 82040960} +{"current_steps": 26080, "total_steps": 78105, "loss": 0.3879, "lr": 4.212055064237498e-06, "epoch": 1.6695474041354585, "percentage": 33.39, "elapsed_time": "1:08:14", "remaining_time": "2:16:07", "throughput": 20041.83, "total_tokens": 82056896} +{"current_steps": 26085, "total_steps": 78105, "loss": 0.3053, "lr": 4.211647925739834e-06, "epoch": 1.6698674860764355, "percentage": 33.4, "elapsed_time": "1:08:14", "remaining_time": "2:16:06", "throughput": 20042.44, "total_tokens": 82073600} +{"current_steps": 26090, "total_steps": 78105, "loss": 0.2234, "lr": 4.211240701771387e-06, "epoch": 1.6701875680174125, "percentage": 33.4, "elapsed_time": "1:08:15", "remaining_time": "2:16:05", "throughput": 20042.95, "total_tokens": 82088640} +{"current_steps": 26095, "total_steps": 78105, "loss": 0.3423, "lr": 4.210833392352493e-06, "epoch": 1.6705076499583893, "percentage": 33.41, "elapsed_time": "1:08:16", "remaining_time": "2:16:04", "throughput": 20043.4, "total_tokens": 82103744} +{"current_steps": 26100, "total_steps": 78105, "loss": 0.3086, "lr": 4.210425997503491e-06, "epoch": 1.6708277318993663, "percentage": 33.42, "elapsed_time": "1:08:16", "remaining_time": "2:16:03", "throughput": 20043.9, "total_tokens": 82119040} +{"current_steps": 26105, "total_steps": 78105, "loss": 0.3197, "lr": 4.2100185172447236e-06, "epoch": 1.671147813840343, "percentage": 33.42, "elapsed_time": "1:08:17", "remaining_time": "2:16:02", "throughput": 20044.69, "total_tokens": 82137408} +{"current_steps": 26110, "total_steps": 78105, "loss": 0.4827, "lr": 4.20961095159654e-06, "epoch": 1.67146789578132, "percentage": 33.43, "elapsed_time": "1:08:18", "remaining_time": "2:16:01", "throughput": 20045.22, "total_tokens": 82153152} +{"current_steps": 26115, "total_steps": 78105, "loss": 0.2823, "lr": 4.209203300579289e-06, "epoch": 1.671787977722297, "percentage": 33.44, "elapsed_time": "1:08:19", "remaining_time": "2:16:00", "throughput": 20045.75, "total_tokens": 82169024} +{"current_steps": 26120, "total_steps": 78105, "loss": 0.4166, "lr": 4.208795564213329e-06, "epoch": 1.6721080596632738, "percentage": 33.44, "elapsed_time": "1:08:19", "remaining_time": "2:15:59", "throughput": 20046.26, "total_tokens": 82184576} +{"current_steps": 26125, "total_steps": 78105, "loss": 0.3562, "lr": 4.208387742519019e-06, "epoch": 1.6724281416042506, "percentage": 33.45, "elapsed_time": "1:08:20", "remaining_time": "2:15:58", "throughput": 20046.71, "total_tokens": 82199232} +{"current_steps": 26130, "total_steps": 78105, "loss": 0.4136, "lr": 4.207979835516724e-06, "epoch": 1.6727482235452276, "percentage": 33.45, "elapsed_time": "1:08:21", "remaining_time": "2:15:57", "throughput": 20047.3, "total_tokens": 82215744} +{"current_steps": 26135, "total_steps": 78105, "loss": 0.3964, "lr": 4.207571843226812e-06, "epoch": 1.6730683054862046, "percentage": 33.46, "elapsed_time": "1:08:21", "remaining_time": "2:15:56", "throughput": 20047.75, "total_tokens": 82230720} +{"current_steps": 26140, "total_steps": 78105, "loss": 0.2845, "lr": 4.207163765669657e-06, "epoch": 1.6733883874271813, "percentage": 33.47, "elapsed_time": "1:08:22", "remaining_time": "2:15:55", "throughput": 20048.44, "total_tokens": 82248256} +{"current_steps": 26145, "total_steps": 78105, "loss": 0.3653, "lr": 4.206755602865635e-06, "epoch": 1.673708469368158, "percentage": 33.47, "elapsed_time": "1:08:23", "remaining_time": "2:15:54", "throughput": 20048.9, "total_tokens": 82263488} +{"current_steps": 26150, "total_steps": 78105, "loss": 0.3548, "lr": 4.20634735483513e-06, "epoch": 1.674028551309135, "percentage": 33.48, "elapsed_time": "1:08:23", "remaining_time": "2:15:53", "throughput": 20049.45, "total_tokens": 82279360} +{"current_steps": 26155, "total_steps": 78105, "loss": 0.295, "lr": 4.205939021598525e-06, "epoch": 1.674348633250112, "percentage": 33.49, "elapsed_time": "1:08:24", "remaining_time": "2:15:52", "throughput": 20049.99, "total_tokens": 82295104} +{"current_steps": 26160, "total_steps": 78105, "loss": 0.2918, "lr": 4.205530603176212e-06, "epoch": 1.674668715191089, "percentage": 33.49, "elapsed_time": "1:08:25", "remaining_time": "2:15:51", "throughput": 20050.52, "total_tokens": 82311104} +{"current_steps": 26165, "total_steps": 78105, "loss": 0.313, "lr": 4.205122099588583e-06, "epoch": 1.6749887971320658, "percentage": 33.5, "elapsed_time": "1:08:25", "remaining_time": "2:15:50", "throughput": 20051.03, "total_tokens": 82326656} +{"current_steps": 26170, "total_steps": 78105, "loss": 0.2993, "lr": 4.204713510856039e-06, "epoch": 1.6753088790730426, "percentage": 33.51, "elapsed_time": "1:08:26", "remaining_time": "2:15:49", "throughput": 20051.63, "total_tokens": 82343168} +{"current_steps": 26175, "total_steps": 78105, "loss": 0.3222, "lr": 4.204304836998983e-06, "epoch": 1.6756289610140196, "percentage": 33.51, "elapsed_time": "1:08:27", "remaining_time": "2:15:48", "throughput": 20052.1, "total_tokens": 82358208} +{"current_steps": 26180, "total_steps": 78105, "loss": 0.3878, "lr": 4.203896078037819e-06, "epoch": 1.6759490429549966, "percentage": 33.52, "elapsed_time": "1:08:27", "remaining_time": "2:15:47", "throughput": 20052.53, "total_tokens": 82372928} +{"current_steps": 26185, "total_steps": 78105, "loss": 0.3604, "lr": 4.203487233992961e-06, "epoch": 1.6762691248959734, "percentage": 33.53, "elapsed_time": "1:08:28", "remaining_time": "2:15:46", "throughput": 20053.05, "total_tokens": 82388992} +{"current_steps": 26190, "total_steps": 78105, "loss": 0.2632, "lr": 4.203078304884823e-06, "epoch": 1.6765892068369501, "percentage": 33.53, "elapsed_time": "1:08:29", "remaining_time": "2:15:45", "throughput": 20053.54, "total_tokens": 82404416} +{"current_steps": 26195, "total_steps": 78105, "loss": 0.3213, "lr": 4.202669290733825e-06, "epoch": 1.676909288777927, "percentage": 33.54, "elapsed_time": "1:08:29", "remaining_time": "2:15:44", "throughput": 20053.98, "total_tokens": 82419072} +{"current_steps": 26200, "total_steps": 78105, "loss": 0.4798, "lr": 4.202260191560393e-06, "epoch": 1.677229370718904, "percentage": 33.54, "elapsed_time": "1:08:30", "remaining_time": "2:15:43", "throughput": 20054.54, "total_tokens": 82435200} +{"current_steps": 26205, "total_steps": 78105, "loss": 0.4395, "lr": 4.201851007384953e-06, "epoch": 1.677549452659881, "percentage": 33.55, "elapsed_time": "1:08:31", "remaining_time": "2:15:42", "throughput": 20055.05, "total_tokens": 82450752} +{"current_steps": 26210, "total_steps": 78105, "loss": 0.3904, "lr": 4.201441738227938e-06, "epoch": 1.6778695346008579, "percentage": 33.56, "elapsed_time": "1:08:31", "remaining_time": "2:15:41", "throughput": 20055.51, "total_tokens": 82465792} +{"current_steps": 26215, "total_steps": 78105, "loss": 0.2622, "lr": 4.2010323841097855e-06, "epoch": 1.6781896165418346, "percentage": 33.56, "elapsed_time": "1:08:32", "remaining_time": "2:15:40", "throughput": 20056.01, "total_tokens": 82481216} +{"current_steps": 26220, "total_steps": 78105, "loss": 0.3196, "lr": 4.200622945050936e-06, "epoch": 1.6785096984828116, "percentage": 33.57, "elapsed_time": "1:08:33", "remaining_time": "2:15:39", "throughput": 20056.51, "total_tokens": 82496640} +{"current_steps": 26225, "total_steps": 78105, "loss": 0.3211, "lr": 4.200213421071836e-06, "epoch": 1.6788297804237886, "percentage": 33.58, "elapsed_time": "1:08:33", "remaining_time": "2:15:38", "throughput": 20057.17, "total_tokens": 82513600} +{"current_steps": 26230, "total_steps": 78105, "loss": 0.313, "lr": 4.199803812192932e-06, "epoch": 1.6791498623647654, "percentage": 33.58, "elapsed_time": "1:08:34", "remaining_time": "2:15:37", "throughput": 20057.59, "total_tokens": 82528384} +{"current_steps": 26235, "total_steps": 78105, "loss": 0.3087, "lr": 4.199394118434681e-06, "epoch": 1.6794699443057421, "percentage": 33.59, "elapsed_time": "1:08:35", "remaining_time": "2:15:36", "throughput": 20058.08, "total_tokens": 82543552} +{"current_steps": 26240, "total_steps": 78105, "loss": 0.4927, "lr": 4.198984339817538e-06, "epoch": 1.6797900262467191, "percentage": 33.6, "elapsed_time": "1:08:35", "remaining_time": "2:15:35", "throughput": 20058.69, "total_tokens": 82560256} +{"current_steps": 26245, "total_steps": 78105, "loss": 0.3095, "lr": 4.198574476361969e-06, "epoch": 1.6801101081876961, "percentage": 33.6, "elapsed_time": "1:08:36", "remaining_time": "2:15:34", "throughput": 20059.27, "total_tokens": 82576320} +{"current_steps": 26250, "total_steps": 78105, "loss": 0.3584, "lr": 4.198164528088436e-06, "epoch": 1.680430190128673, "percentage": 33.61, "elapsed_time": "1:08:37", "remaining_time": "2:15:33", "throughput": 20059.92, "total_tokens": 82593280} +{"current_steps": 26255, "total_steps": 78105, "loss": 0.2621, "lr": 4.197754495017413e-06, "epoch": 1.6807502720696497, "percentage": 33.62, "elapsed_time": "1:08:38", "remaining_time": "2:15:32", "throughput": 20060.42, "total_tokens": 82608832} +{"current_steps": 26260, "total_steps": 78105, "loss": 0.2594, "lr": 4.1973443771693735e-06, "epoch": 1.6810703540106267, "percentage": 33.62, "elapsed_time": "1:08:38", "remaining_time": "2:15:31", "throughput": 20060.98, "total_tokens": 82625024} +{"current_steps": 26265, "total_steps": 78105, "loss": 0.3151, "lr": 4.1969341745647966e-06, "epoch": 1.6813904359516036, "percentage": 33.63, "elapsed_time": "1:08:39", "remaining_time": "2:15:30", "throughput": 20061.49, "total_tokens": 82640896} +{"current_steps": 26270, "total_steps": 78105, "loss": 0.4024, "lr": 4.196523887224167e-06, "epoch": 1.6817105178925806, "percentage": 33.63, "elapsed_time": "1:08:40", "remaining_time": "2:15:29", "throughput": 20062.07, "total_tokens": 82656960} +{"current_steps": 26275, "total_steps": 78105, "loss": 0.3083, "lr": 4.1961135151679695e-06, "epoch": 1.6820305998335574, "percentage": 33.64, "elapsed_time": "1:08:40", "remaining_time": "2:15:28", "throughput": 20062.63, "total_tokens": 82673024} +{"current_steps": 26280, "total_steps": 78105, "loss": 0.4133, "lr": 4.195703058416701e-06, "epoch": 1.6823506817745342, "percentage": 33.65, "elapsed_time": "1:08:41", "remaining_time": "2:15:27", "throughput": 20063.29, "total_tokens": 82689920} +{"current_steps": 26285, "total_steps": 78105, "loss": 0.4182, "lr": 4.195292516990851e-06, "epoch": 1.6826707637155112, "percentage": 33.65, "elapsed_time": "1:08:42", "remaining_time": "2:15:26", "throughput": 20063.79, "total_tokens": 82705152} +{"current_steps": 26290, "total_steps": 78105, "loss": 0.3226, "lr": 4.194881890910924e-06, "epoch": 1.6829908456564882, "percentage": 33.66, "elapsed_time": "1:08:42", "remaining_time": "2:15:25", "throughput": 20064.28, "total_tokens": 82720448} +{"current_steps": 26295, "total_steps": 78105, "loss": 0.3272, "lr": 4.194471180197425e-06, "epoch": 1.683310927597465, "percentage": 33.67, "elapsed_time": "1:08:43", "remaining_time": "2:15:24", "throughput": 20064.95, "total_tokens": 82737536} +{"current_steps": 26300, "total_steps": 78105, "loss": 0.5357, "lr": 4.1940603848708605e-06, "epoch": 1.6836310095384417, "percentage": 33.67, "elapsed_time": "1:08:44", "remaining_time": "2:15:23", "throughput": 20065.42, "total_tokens": 82752896} +{"current_steps": 26305, "total_steps": 78105, "loss": 0.3715, "lr": 4.193649504951745e-06, "epoch": 1.6839510914794187, "percentage": 33.68, "elapsed_time": "1:08:44", "remaining_time": "2:15:22", "throughput": 20066.11, "total_tokens": 82769920} +{"current_steps": 26310, "total_steps": 78105, "loss": 0.3821, "lr": 4.193238540460595e-06, "epoch": 1.6842711734203957, "percentage": 33.69, "elapsed_time": "1:08:45", "remaining_time": "2:15:21", "throughput": 20066.66, "total_tokens": 82785728} +{"current_steps": 26315, "total_steps": 78105, "loss": 0.306, "lr": 4.192827491417931e-06, "epoch": 1.6845912553613727, "percentage": 33.69, "elapsed_time": "1:08:46", "remaining_time": "2:15:20", "throughput": 20067.25, "total_tokens": 82801920} +{"current_steps": 26320, "total_steps": 78105, "loss": 0.3343, "lr": 4.192416357844281e-06, "epoch": 1.6849113373023494, "percentage": 33.7, "elapsed_time": "1:08:46", "remaining_time": "2:15:19", "throughput": 20067.72, "total_tokens": 82817216} +{"current_steps": 26325, "total_steps": 78105, "loss": 0.2941, "lr": 4.192005139760172e-06, "epoch": 1.6852314192433262, "percentage": 33.7, "elapsed_time": "1:08:47", "remaining_time": "2:15:18", "throughput": 20068.19, "total_tokens": 82832576} +{"current_steps": 26330, "total_steps": 78105, "loss": 0.3261, "lr": 4.191593837186142e-06, "epoch": 1.6855515011843032, "percentage": 33.71, "elapsed_time": "1:08:48", "remaining_time": "2:15:17", "throughput": 20068.72, "total_tokens": 82848448} +{"current_steps": 26335, "total_steps": 78105, "loss": 0.3725, "lr": 4.191182450142725e-06, "epoch": 1.6858715831252802, "percentage": 33.72, "elapsed_time": "1:08:48", "remaining_time": "2:15:16", "throughput": 20069.19, "total_tokens": 82863936} +{"current_steps": 26340, "total_steps": 78105, "loss": 0.3422, "lr": 4.190770978650466e-06, "epoch": 1.686191665066257, "percentage": 33.72, "elapsed_time": "1:08:49", "remaining_time": "2:15:15", "throughput": 20069.74, "total_tokens": 82880192} +{"current_steps": 26345, "total_steps": 78105, "loss": 0.2793, "lr": 4.190359422729911e-06, "epoch": 1.6865117470072337, "percentage": 33.73, "elapsed_time": "1:08:50", "remaining_time": "2:15:14", "throughput": 20070.17, "total_tokens": 82894656} +{"current_steps": 26350, "total_steps": 78105, "loss": 0.2528, "lr": 4.189947782401612e-06, "epoch": 1.6868318289482107, "percentage": 33.74, "elapsed_time": "1:08:50", "remaining_time": "2:15:13", "throughput": 20070.62, "total_tokens": 82910016} +{"current_steps": 26355, "total_steps": 78105, "loss": 0.3624, "lr": 4.189536057686123e-06, "epoch": 1.6871519108891877, "percentage": 33.74, "elapsed_time": "1:08:51", "remaining_time": "2:15:12", "throughput": 20071.07, "total_tokens": 82925120} +{"current_steps": 26360, "total_steps": 78105, "loss": 0.3493, "lr": 4.189124248604004e-06, "epoch": 1.6874719928301645, "percentage": 33.75, "elapsed_time": "1:08:52", "remaining_time": "2:15:11", "throughput": 20071.62, "total_tokens": 82941184} +{"current_steps": 26365, "total_steps": 78105, "loss": 0.3695, "lr": 4.188712355175818e-06, "epoch": 1.6877920747711415, "percentage": 33.76, "elapsed_time": "1:08:52", "remaining_time": "2:15:10", "throughput": 20072.13, "total_tokens": 82956736} +{"current_steps": 26370, "total_steps": 78105, "loss": 0.2668, "lr": 4.1883003774221335e-06, "epoch": 1.6881121567121182, "percentage": 33.76, "elapsed_time": "1:08:53", "remaining_time": "2:15:09", "throughput": 20072.52, "total_tokens": 82971200} +{"current_steps": 26375, "total_steps": 78105, "loss": 0.3116, "lr": 4.1878883153635205e-06, "epoch": 1.6884322386530952, "percentage": 33.77, "elapsed_time": "1:08:54", "remaining_time": "2:15:08", "throughput": 20073.05, "total_tokens": 82987136} +{"current_steps": 26380, "total_steps": 78105, "loss": 0.3113, "lr": 4.187476169020559e-06, "epoch": 1.6887523205940722, "percentage": 33.78, "elapsed_time": "1:08:54", "remaining_time": "2:15:07", "throughput": 20073.61, "total_tokens": 83003392} +{"current_steps": 26385, "total_steps": 78105, "loss": 0.4604, "lr": 4.187063938413827e-06, "epoch": 1.689072402535049, "percentage": 33.78, "elapsed_time": "1:08:55", "remaining_time": "2:15:06", "throughput": 20074.05, "total_tokens": 83018624} +{"current_steps": 26390, "total_steps": 78105, "loss": 0.3172, "lr": 4.186651623563908e-06, "epoch": 1.6893924844760257, "percentage": 33.79, "elapsed_time": "1:08:56", "remaining_time": "2:15:05", "throughput": 20074.5, "total_tokens": 83033408} +{"current_steps": 26395, "total_steps": 78105, "loss": 0.3299, "lr": 4.186239224491394e-06, "epoch": 1.6897125664170027, "percentage": 33.79, "elapsed_time": "1:08:57", "remaining_time": "2:15:04", "throughput": 20075.27, "total_tokens": 83051520} +{"current_steps": 26400, "total_steps": 78105, "loss": 0.2201, "lr": 4.185826741216876e-06, "epoch": 1.6900326483579797, "percentage": 33.8, "elapsed_time": "1:08:57", "remaining_time": "2:15:03", "throughput": 20075.77, "total_tokens": 83066816} +{"current_steps": 26405, "total_steps": 78105, "loss": 0.3626, "lr": 4.185414173760952e-06, "epoch": 1.6903527302989565, "percentage": 33.81, "elapsed_time": "1:08:58", "remaining_time": "2:15:02", "throughput": 20076.34, "total_tokens": 83083136} +{"current_steps": 26410, "total_steps": 78105, "loss": 0.3798, "lr": 4.185001522144223e-06, "epoch": 1.6906728122399333, "percentage": 33.81, "elapsed_time": "1:08:58", "remaining_time": "2:15:01", "throughput": 20076.77, "total_tokens": 83097728} +{"current_steps": 26415, "total_steps": 78105, "loss": 0.3527, "lr": 4.184588786387295e-06, "epoch": 1.6909928941809103, "percentage": 33.82, "elapsed_time": "1:08:59", "remaining_time": "2:15:00", "throughput": 20077.23, "total_tokens": 83112704} +{"current_steps": 26420, "total_steps": 78105, "loss": 0.2633, "lr": 4.184175966510777e-06, "epoch": 1.6913129761218872, "percentage": 33.83, "elapsed_time": "1:09:00", "remaining_time": "2:14:59", "throughput": 20077.74, "total_tokens": 83128064} +{"current_steps": 26425, "total_steps": 78105, "loss": 0.5295, "lr": 4.183763062535285e-06, "epoch": 1.6916330580628642, "percentage": 33.83, "elapsed_time": "1:09:01", "remaining_time": "2:14:59", "throughput": 20077.12, "total_tokens": 83144384} +{"current_steps": 26430, "total_steps": 78105, "loss": 0.4544, "lr": 4.183350074481434e-06, "epoch": 1.691953140003841, "percentage": 33.84, "elapsed_time": "1:09:01", "remaining_time": "2:14:58", "throughput": 20077.52, "total_tokens": 83159168} +{"current_steps": 26435, "total_steps": 78105, "loss": 0.292, "lr": 4.18293700236985e-06, "epoch": 1.6922732219448178, "percentage": 33.85, "elapsed_time": "1:09:02", "remaining_time": "2:14:57", "throughput": 20078.04, "total_tokens": 83174720} +{"current_steps": 26440, "total_steps": 78105, "loss": 0.3355, "lr": 4.182523846221158e-06, "epoch": 1.6925933038857948, "percentage": 33.85, "elapsed_time": "1:09:03", "remaining_time": "2:14:56", "throughput": 20078.48, "total_tokens": 83189504} +{"current_steps": 26445, "total_steps": 78105, "loss": 0.444, "lr": 4.182110606055989e-06, "epoch": 1.6929133858267718, "percentage": 33.86, "elapsed_time": "1:09:03", "remaining_time": "2:14:55", "throughput": 20078.94, "total_tokens": 83204992} +{"current_steps": 26450, "total_steps": 78105, "loss": 0.3017, "lr": 4.181697281894978e-06, "epoch": 1.6932334677677485, "percentage": 33.86, "elapsed_time": "1:09:04", "remaining_time": "2:14:54", "throughput": 20079.46, "total_tokens": 83221056} +{"current_steps": 26455, "total_steps": 78105, "loss": 0.4095, "lr": 4.181283873758765e-06, "epoch": 1.6935535497087253, "percentage": 33.87, "elapsed_time": "1:09:05", "remaining_time": "2:14:53", "throughput": 20080.08, "total_tokens": 83237568} +{"current_steps": 26460, "total_steps": 78105, "loss": 0.3218, "lr": 4.1808703816679915e-06, "epoch": 1.6938736316497023, "percentage": 33.88, "elapsed_time": "1:09:05", "remaining_time": "2:14:52", "throughput": 20080.69, "total_tokens": 83254144} +{"current_steps": 26465, "total_steps": 78105, "loss": 0.3484, "lr": 4.180456805643308e-06, "epoch": 1.6941937135906793, "percentage": 33.88, "elapsed_time": "1:09:06", "remaining_time": "2:14:51", "throughput": 20081.16, "total_tokens": 83269056} +{"current_steps": 26470, "total_steps": 78105, "loss": 0.4254, "lr": 4.180043145705363e-06, "epoch": 1.6945137955316563, "percentage": 33.89, "elapsed_time": "1:09:07", "remaining_time": "2:14:50", "throughput": 20081.72, "total_tokens": 83285504} +{"current_steps": 26475, "total_steps": 78105, "loss": 0.3334, "lr": 4.1796294018748165e-06, "epoch": 1.694833877472633, "percentage": 33.9, "elapsed_time": "1:09:07", "remaining_time": "2:14:49", "throughput": 20082.27, "total_tokens": 83301248} +{"current_steps": 26480, "total_steps": 78105, "loss": 0.4201, "lr": 4.179215574172325e-06, "epoch": 1.6951539594136098, "percentage": 33.9, "elapsed_time": "1:09:08", "remaining_time": "2:14:48", "throughput": 20082.77, "total_tokens": 83316864} +{"current_steps": 26485, "total_steps": 78105, "loss": 0.3501, "lr": 4.178801662618555e-06, "epoch": 1.6954740413545868, "percentage": 33.91, "elapsed_time": "1:09:09", "remaining_time": "2:14:47", "throughput": 20083.26, "total_tokens": 83332480} +{"current_steps": 26490, "total_steps": 78105, "loss": 0.311, "lr": 4.178387667234174e-06, "epoch": 1.6957941232955638, "percentage": 33.92, "elapsed_time": "1:09:10", "remaining_time": "2:14:46", "throughput": 20083.75, "total_tokens": 83348224} +{"current_steps": 26495, "total_steps": 78105, "loss": 0.4381, "lr": 4.177973588039856e-06, "epoch": 1.6961142052365406, "percentage": 33.92, "elapsed_time": "1:09:10", "remaining_time": "2:14:45", "throughput": 20084.2, "total_tokens": 83363776} +{"current_steps": 26500, "total_steps": 78105, "loss": 0.3925, "lr": 4.177559425056277e-06, "epoch": 1.6964342871775173, "percentage": 33.93, "elapsed_time": "1:09:11", "remaining_time": "2:14:44", "throughput": 20084.85, "total_tokens": 83380800} +{"current_steps": 26505, "total_steps": 78105, "loss": 0.4818, "lr": 4.177145178304118e-06, "epoch": 1.6967543691184943, "percentage": 33.94, "elapsed_time": "1:09:12", "remaining_time": "2:14:43", "throughput": 20085.49, "total_tokens": 83397632} +{"current_steps": 26510, "total_steps": 78105, "loss": 0.4123, "lr": 4.1767308478040655e-06, "epoch": 1.6970744510594713, "percentage": 33.94, "elapsed_time": "1:09:12", "remaining_time": "2:14:42", "throughput": 20085.94, "total_tokens": 83412352} +{"current_steps": 26515, "total_steps": 78105, "loss": 0.3334, "lr": 4.176316433576808e-06, "epoch": 1.697394533000448, "percentage": 33.95, "elapsed_time": "1:09:13", "remaining_time": "2:14:41", "throughput": 20086.41, "total_tokens": 83427584} +{"current_steps": 26520, "total_steps": 78105, "loss": 0.2709, "lr": 4.1759019356430395e-06, "epoch": 1.697714614941425, "percentage": 33.95, "elapsed_time": "1:09:14", "remaining_time": "2:14:40", "throughput": 20086.81, "total_tokens": 83442240} +{"current_steps": 26525, "total_steps": 78105, "loss": 0.355, "lr": 4.1754873540234575e-06, "epoch": 1.6980346968824018, "percentage": 33.96, "elapsed_time": "1:09:14", "remaining_time": "2:14:39", "throughput": 20087.19, "total_tokens": 83456640} +{"current_steps": 26530, "total_steps": 78105, "loss": 0.2774, "lr": 4.175072688738765e-06, "epoch": 1.6983547788233788, "percentage": 33.97, "elapsed_time": "1:09:15", "remaining_time": "2:14:38", "throughput": 20087.71, "total_tokens": 83472512} +{"current_steps": 26535, "total_steps": 78105, "loss": 0.3193, "lr": 4.174657939809666e-06, "epoch": 1.6986748607643558, "percentage": 33.97, "elapsed_time": "1:09:16", "remaining_time": "2:14:37", "throughput": 20088.29, "total_tokens": 83488768} +{"current_steps": 26540, "total_steps": 78105, "loss": 0.3276, "lr": 4.174243107256874e-06, "epoch": 1.6989949427053326, "percentage": 33.98, "elapsed_time": "1:09:16", "remaining_time": "2:14:36", "throughput": 20088.8, "total_tokens": 83504512} +{"current_steps": 26545, "total_steps": 78105, "loss": 0.3254, "lr": 4.173828191101101e-06, "epoch": 1.6993150246463093, "percentage": 33.99, "elapsed_time": "1:09:17", "remaining_time": "2:14:35", "throughput": 20089.24, "total_tokens": 83519808} +{"current_steps": 26550, "total_steps": 78105, "loss": 0.4312, "lr": 4.173413191363068e-06, "epoch": 1.6996351065872863, "percentage": 33.99, "elapsed_time": "1:09:18", "remaining_time": "2:14:34", "throughput": 20089.71, "total_tokens": 83535104} +{"current_steps": 26555, "total_steps": 78105, "loss": 0.3073, "lr": 4.172998108063495e-06, "epoch": 1.6999551885282633, "percentage": 34.0, "elapsed_time": "1:09:18", "remaining_time": "2:14:33", "throughput": 20090.22, "total_tokens": 83550784} +{"current_steps": 26560, "total_steps": 78105, "loss": 0.3819, "lr": 4.172582941223112e-06, "epoch": 1.70027527046924, "percentage": 34.01, "elapsed_time": "1:09:19", "remaining_time": "2:14:32", "throughput": 20090.82, "total_tokens": 83567296} +{"current_steps": 26565, "total_steps": 78105, "loss": 0.35, "lr": 4.17216769086265e-06, "epoch": 1.7005953524102169, "percentage": 34.01, "elapsed_time": "1:09:20", "remaining_time": "2:14:31", "throughput": 20091.23, "total_tokens": 83582144} +{"current_steps": 26570, "total_steps": 78105, "loss": 0.2856, "lr": 4.171752357002842e-06, "epoch": 1.7009154343511939, "percentage": 34.02, "elapsed_time": "1:09:20", "remaining_time": "2:14:30", "throughput": 20091.74, "total_tokens": 83597952} +{"current_steps": 26575, "total_steps": 78105, "loss": 0.4236, "lr": 4.171336939664429e-06, "epoch": 1.7012355162921708, "percentage": 34.02, "elapsed_time": "1:09:21", "remaining_time": "2:14:29", "throughput": 20092.28, "total_tokens": 83614080} +{"current_steps": 26580, "total_steps": 78105, "loss": 0.2606, "lr": 4.1709214388681565e-06, "epoch": 1.7015555982331478, "percentage": 34.03, "elapsed_time": "1:09:22", "remaining_time": "2:14:28", "throughput": 20092.79, "total_tokens": 83629568} +{"current_steps": 26585, "total_steps": 78105, "loss": 0.4471, "lr": 4.1705058546347694e-06, "epoch": 1.7018756801741246, "percentage": 34.04, "elapsed_time": "1:09:22", "remaining_time": "2:14:27", "throughput": 20093.31, "total_tokens": 83645440} +{"current_steps": 26590, "total_steps": 78105, "loss": 0.4335, "lr": 4.170090186985022e-06, "epoch": 1.7021957621151014, "percentage": 34.04, "elapsed_time": "1:09:23", "remaining_time": "2:14:26", "throughput": 20093.82, "total_tokens": 83660864} +{"current_steps": 26595, "total_steps": 78105, "loss": 0.3545, "lr": 4.169674435939669e-06, "epoch": 1.7025158440560784, "percentage": 34.05, "elapsed_time": "1:09:24", "remaining_time": "2:14:25", "throughput": 20094.24, "total_tokens": 83675840} +{"current_steps": 26600, "total_steps": 78105, "loss": 0.43, "lr": 4.169258601519473e-06, "epoch": 1.7028359259970554, "percentage": 34.06, "elapsed_time": "1:09:24", "remaining_time": "2:14:24", "throughput": 20094.85, "total_tokens": 83692672} +{"current_steps": 26605, "total_steps": 78105, "loss": 0.2911, "lr": 4.168842683745196e-06, "epoch": 1.7031560079380321, "percentage": 34.06, "elapsed_time": "1:09:25", "remaining_time": "2:14:23", "throughput": 20095.48, "total_tokens": 83709376} +{"current_steps": 26610, "total_steps": 78105, "loss": 0.3119, "lr": 4.168426682637609e-06, "epoch": 1.703476089879009, "percentage": 34.07, "elapsed_time": "1:09:26", "remaining_time": "2:14:22", "throughput": 20096.17, "total_tokens": 83726976} +{"current_steps": 26615, "total_steps": 78105, "loss": 0.2443, "lr": 4.168010598217482e-06, "epoch": 1.7037961718199859, "percentage": 34.08, "elapsed_time": "1:09:26", "remaining_time": "2:14:21", "throughput": 20096.59, "total_tokens": 83741888} +{"current_steps": 26620, "total_steps": 78105, "loss": 0.4303, "lr": 4.167594430505596e-06, "epoch": 1.7041162537609629, "percentage": 34.08, "elapsed_time": "1:09:27", "remaining_time": "2:14:20", "throughput": 20097.03, "total_tokens": 83757056} +{"current_steps": 26625, "total_steps": 78105, "loss": 0.3313, "lr": 4.1671781795227305e-06, "epoch": 1.7044363357019396, "percentage": 34.09, "elapsed_time": "1:09:28", "remaining_time": "2:14:19", "throughput": 20097.65, "total_tokens": 83773632} +{"current_steps": 26630, "total_steps": 78105, "loss": 0.3153, "lr": 4.16676184528967e-06, "epoch": 1.7047564176429166, "percentage": 34.1, "elapsed_time": "1:09:29", "remaining_time": "2:14:18", "throughput": 20098.13, "total_tokens": 83789312} +{"current_steps": 26635, "total_steps": 78105, "loss": 0.4139, "lr": 4.1663454278272056e-06, "epoch": 1.7050764995838934, "percentage": 34.1, "elapsed_time": "1:09:29", "remaining_time": "2:14:17", "throughput": 20098.66, "total_tokens": 83805312} +{"current_steps": 26640, "total_steps": 78105, "loss": 0.3763, "lr": 4.165928927156129e-06, "epoch": 1.7053965815248704, "percentage": 34.11, "elapsed_time": "1:09:30", "remaining_time": "2:14:16", "throughput": 20099.22, "total_tokens": 83821504} +{"current_steps": 26645, "total_steps": 78105, "loss": 0.3256, "lr": 4.16551234329724e-06, "epoch": 1.7057166634658474, "percentage": 34.11, "elapsed_time": "1:09:31", "remaining_time": "2:14:15", "throughput": 20099.73, "total_tokens": 83837440} +{"current_steps": 26650, "total_steps": 78105, "loss": 0.3067, "lr": 4.1650956762713395e-06, "epoch": 1.7060367454068242, "percentage": 34.12, "elapsed_time": "1:09:31", "remaining_time": "2:14:14", "throughput": 20100.3, "total_tokens": 83853760} +{"current_steps": 26655, "total_steps": 78105, "loss": 0.3456, "lr": 4.164678926099234e-06, "epoch": 1.706356827347801, "percentage": 34.13, "elapsed_time": "1:09:32", "remaining_time": "2:14:13", "throughput": 20100.88, "total_tokens": 83870080} +{"current_steps": 26660, "total_steps": 78105, "loss": 0.4066, "lr": 4.164262092801734e-06, "epoch": 1.706676909288778, "percentage": 34.13, "elapsed_time": "1:09:33", "remaining_time": "2:14:12", "throughput": 20101.38, "total_tokens": 83885632} +{"current_steps": 26665, "total_steps": 78105, "loss": 0.3023, "lr": 4.163845176399655e-06, "epoch": 1.706996991229755, "percentage": 34.14, "elapsed_time": "1:09:33", "remaining_time": "2:14:11", "throughput": 20101.86, "total_tokens": 83900800} +{"current_steps": 26670, "total_steps": 78105, "loss": 0.1733, "lr": 4.163428176913813e-06, "epoch": 1.7073170731707317, "percentage": 34.15, "elapsed_time": "1:09:34", "remaining_time": "2:14:10", "throughput": 20102.32, "total_tokens": 83915840} +{"current_steps": 26675, "total_steps": 78105, "loss": 0.4431, "lr": 4.1630110943650335e-06, "epoch": 1.7076371551117084, "percentage": 34.15, "elapsed_time": "1:09:35", "remaining_time": "2:14:09", "throughput": 20102.78, "total_tokens": 83931008} +{"current_steps": 26680, "total_steps": 78105, "loss": 0.2354, "lr": 4.162593928774142e-06, "epoch": 1.7079572370526854, "percentage": 34.16, "elapsed_time": "1:09:36", "remaining_time": "2:14:09", "throughput": 20102.06, "total_tokens": 83947648} +{"current_steps": 26685, "total_steps": 78105, "loss": 0.3764, "lr": 4.1621766801619695e-06, "epoch": 1.7082773189936624, "percentage": 34.17, "elapsed_time": "1:09:36", "remaining_time": "2:14:08", "throughput": 20102.53, "total_tokens": 83963008} +{"current_steps": 26690, "total_steps": 78105, "loss": 0.2491, "lr": 4.161759348549352e-06, "epoch": 1.7085974009346394, "percentage": 34.17, "elapsed_time": "1:09:37", "remaining_time": "2:14:07", "throughput": 20103.21, "total_tokens": 83980672} +{"current_steps": 26695, "total_steps": 78105, "loss": 0.3438, "lr": 4.1613419339571284e-06, "epoch": 1.7089174828756162, "percentage": 34.18, "elapsed_time": "1:09:38", "remaining_time": "2:14:06", "throughput": 20103.73, "total_tokens": 83996160} +{"current_steps": 26700, "total_steps": 78105, "loss": 0.3834, "lr": 4.160924436406142e-06, "epoch": 1.709237564816593, "percentage": 34.18, "elapsed_time": "1:09:38", "remaining_time": "2:14:05", "throughput": 20104.18, "total_tokens": 84013824} +{"current_steps": 26705, "total_steps": 78105, "loss": 0.3901, "lr": 4.16050685591724e-06, "epoch": 1.70955764675757, "percentage": 34.19, "elapsed_time": "1:09:39", "remaining_time": "2:14:05", "throughput": 20103.44, "total_tokens": 84030336} +{"current_steps": 26710, "total_steps": 78105, "loss": 0.3067, "lr": 4.160089192511276e-06, "epoch": 1.709877728698547, "percentage": 34.2, "elapsed_time": "1:09:40", "remaining_time": "2:14:04", "throughput": 20103.92, "total_tokens": 84045824} +{"current_steps": 26715, "total_steps": 78105, "loss": 0.2405, "lr": 4.159671446209104e-06, "epoch": 1.7101978106395237, "percentage": 34.2, "elapsed_time": "1:09:41", "remaining_time": "2:14:03", "throughput": 20104.44, "total_tokens": 84061568} +{"current_steps": 26720, "total_steps": 78105, "loss": 0.3842, "lr": 4.159253617031585e-06, "epoch": 1.7105178925805005, "percentage": 34.21, "elapsed_time": "1:09:41", "remaining_time": "2:14:02", "throughput": 20105.01, "total_tokens": 84078080} +{"current_steps": 26725, "total_steps": 78105, "loss": 0.3621, "lr": 4.158835704999583e-06, "epoch": 1.7108379745214775, "percentage": 34.22, "elapsed_time": "1:09:42", "remaining_time": "2:14:01", "throughput": 20105.48, "total_tokens": 84093248} +{"current_steps": 26730, "total_steps": 78105, "loss": 0.2942, "lr": 4.1584177101339675e-06, "epoch": 1.7111580564624544, "percentage": 34.22, "elapsed_time": "1:09:43", "remaining_time": "2:14:00", "throughput": 20106.11, "total_tokens": 84110080} +{"current_steps": 26735, "total_steps": 78105, "loss": 0.3206, "lr": 4.157999632455609e-06, "epoch": 1.7114781384034314, "percentage": 34.23, "elapsed_time": "1:09:43", "remaining_time": "2:13:59", "throughput": 20106.54, "total_tokens": 84125248} +{"current_steps": 26740, "total_steps": 78105, "loss": 0.4036, "lr": 4.157581471985386e-06, "epoch": 1.7117982203444082, "percentage": 34.24, "elapsed_time": "1:09:44", "remaining_time": "2:13:58", "throughput": 20107.14, "total_tokens": 84141824} +{"current_steps": 26745, "total_steps": 78105, "loss": 0.3949, "lr": 4.157163228744177e-06, "epoch": 1.712118302285385, "percentage": 34.24, "elapsed_time": "1:09:45", "remaining_time": "2:13:57", "throughput": 20107.59, "total_tokens": 84157056} +{"current_steps": 26750, "total_steps": 78105, "loss": 0.2899, "lr": 4.156744902752868e-06, "epoch": 1.712438384226362, "percentage": 34.25, "elapsed_time": "1:09:45", "remaining_time": "2:13:56", "throughput": 20107.98, "total_tokens": 84171648} +{"current_steps": 26755, "total_steps": 78105, "loss": 0.3951, "lr": 4.15632649403235e-06, "epoch": 1.712758466167339, "percentage": 34.26, "elapsed_time": "1:09:46", "remaining_time": "2:13:55", "throughput": 20108.47, "total_tokens": 84187136} +{"current_steps": 26760, "total_steps": 78105, "loss": 0.4079, "lr": 4.155908002603513e-06, "epoch": 1.7130785481083157, "percentage": 34.26, "elapsed_time": "1:09:47", "remaining_time": "2:13:54", "throughput": 20109.02, "total_tokens": 84203392} +{"current_steps": 26765, "total_steps": 78105, "loss": 0.4703, "lr": 4.155489428487256e-06, "epoch": 1.7133986300492925, "percentage": 34.27, "elapsed_time": "1:09:48", "remaining_time": "2:13:53", "throughput": 20109.51, "total_tokens": 84219136} +{"current_steps": 26770, "total_steps": 78105, "loss": 0.4732, "lr": 4.15507077170448e-06, "epoch": 1.7137187119902695, "percentage": 34.27, "elapsed_time": "1:09:48", "remaining_time": "2:13:52", "throughput": 20109.97, "total_tokens": 84234304} +{"current_steps": 26775, "total_steps": 78105, "loss": 0.3154, "lr": 4.154652032276091e-06, "epoch": 1.7140387939312465, "percentage": 34.28, "elapsed_time": "1:09:49", "remaining_time": "2:13:51", "throughput": 20110.47, "total_tokens": 84249920} +{"current_steps": 26780, "total_steps": 78105, "loss": 0.2278, "lr": 4.1542332102229975e-06, "epoch": 1.7143588758722232, "percentage": 34.29, "elapsed_time": "1:09:50", "remaining_time": "2:13:50", "throughput": 20110.97, "total_tokens": 84265600} +{"current_steps": 26785, "total_steps": 78105, "loss": 0.3779, "lr": 4.153814305566115e-06, "epoch": 1.7146789578132002, "percentage": 34.29, "elapsed_time": "1:09:50", "remaining_time": "2:13:49", "throughput": 20111.66, "total_tokens": 84283072} +{"current_steps": 26790, "total_steps": 78105, "loss": 0.4423, "lr": 4.15339531832636e-06, "epoch": 1.714999039754177, "percentage": 34.3, "elapsed_time": "1:09:51", "remaining_time": "2:13:48", "throughput": 20112.23, "total_tokens": 84299200} +{"current_steps": 26795, "total_steps": 78105, "loss": 0.329, "lr": 4.152976248524655e-06, "epoch": 1.715319121695154, "percentage": 34.31, "elapsed_time": "1:09:52", "remaining_time": "2:13:47", "throughput": 20112.79, "total_tokens": 84315840} +{"current_steps": 26800, "total_steps": 78105, "loss": 0.3311, "lr": 4.152557096181927e-06, "epoch": 1.715639203636131, "percentage": 34.31, "elapsed_time": "1:09:52", "remaining_time": "2:13:46", "throughput": 20113.19, "total_tokens": 84330240} +{"current_steps": 26805, "total_steps": 78105, "loss": 0.4372, "lr": 4.152137861319104e-06, "epoch": 1.7159592855771078, "percentage": 34.32, "elapsed_time": "1:09:53", "remaining_time": "2:13:45", "throughput": 20113.66, "total_tokens": 84345344} +{"current_steps": 26810, "total_steps": 78105, "loss": 0.4629, "lr": 4.151718543957123e-06, "epoch": 1.7162793675180845, "percentage": 34.33, "elapsed_time": "1:09:54", "remaining_time": "2:13:44", "throughput": 20114.11, "total_tokens": 84360448} +{"current_steps": 26815, "total_steps": 78105, "loss": 0.3464, "lr": 4.151299144116922e-06, "epoch": 1.7165994494590615, "percentage": 34.33, "elapsed_time": "1:09:54", "remaining_time": "2:13:43", "throughput": 20114.69, "total_tokens": 84376256} +{"current_steps": 26820, "total_steps": 78105, "loss": 0.3006, "lr": 4.150879661819441e-06, "epoch": 1.7169195314000385, "percentage": 34.34, "elapsed_time": "1:09:55", "remaining_time": "2:13:42", "throughput": 20115.12, "total_tokens": 84391168} +{"current_steps": 26825, "total_steps": 78105, "loss": 0.3412, "lr": 4.15046009708563e-06, "epoch": 1.7172396133410153, "percentage": 34.34, "elapsed_time": "1:09:56", "remaining_time": "2:13:41", "throughput": 20115.55, "total_tokens": 84406336} +{"current_steps": 26830, "total_steps": 78105, "loss": 0.3759, "lr": 4.150040449936439e-06, "epoch": 1.717559695281992, "percentage": 34.35, "elapsed_time": "1:09:56", "remaining_time": "2:13:40", "throughput": 20116.09, "total_tokens": 84422272} +{"current_steps": 26835, "total_steps": 78105, "loss": 0.2669, "lr": 4.1496207203928215e-06, "epoch": 1.717879777222969, "percentage": 34.36, "elapsed_time": "1:09:57", "remaining_time": "2:13:39", "throughput": 20116.52, "total_tokens": 84436928} +{"current_steps": 26840, "total_steps": 78105, "loss": 0.3133, "lr": 4.149200908475739e-06, "epoch": 1.718199859163946, "percentage": 34.36, "elapsed_time": "1:09:58", "remaining_time": "2:13:38", "throughput": 20116.99, "total_tokens": 84452608} +{"current_steps": 26845, "total_steps": 78105, "loss": 0.37, "lr": 4.1487810142061525e-06, "epoch": 1.718519941104923, "percentage": 34.37, "elapsed_time": "1:09:58", "remaining_time": "2:13:37", "throughput": 20117.52, "total_tokens": 84468736} +{"current_steps": 26850, "total_steps": 78105, "loss": 0.3054, "lr": 4.148361037605031e-06, "epoch": 1.7188400230458998, "percentage": 34.38, "elapsed_time": "1:09:59", "remaining_time": "2:13:36", "throughput": 20117.96, "total_tokens": 84483840} +{"current_steps": 26855, "total_steps": 78105, "loss": 0.3229, "lr": 4.1479409786933455e-06, "epoch": 1.7191601049868765, "percentage": 34.38, "elapsed_time": "1:10:00", "remaining_time": "2:13:35", "throughput": 20118.46, "total_tokens": 84499648} +{"current_steps": 26860, "total_steps": 78105, "loss": 0.383, "lr": 4.147520837492071e-06, "epoch": 1.7194801869278535, "percentage": 34.39, "elapsed_time": "1:10:00", "remaining_time": "2:13:34", "throughput": 20119.05, "total_tokens": 84516224} +{"current_steps": 26865, "total_steps": 78105, "loss": 0.328, "lr": 4.1471006140221876e-06, "epoch": 1.7198002688688305, "percentage": 34.4, "elapsed_time": "1:10:01", "remaining_time": "2:13:33", "throughput": 20119.6, "total_tokens": 84532288} +{"current_steps": 26870, "total_steps": 78105, "loss": 0.2931, "lr": 4.146680308304679e-06, "epoch": 1.7201203508098073, "percentage": 34.4, "elapsed_time": "1:10:02", "remaining_time": "2:13:32", "throughput": 20120.08, "total_tokens": 84547584} +{"current_steps": 26875, "total_steps": 78105, "loss": 0.342, "lr": 4.146259920360533e-06, "epoch": 1.720440432750784, "percentage": 34.41, "elapsed_time": "1:10:02", "remaining_time": "2:13:31", "throughput": 20120.5, "total_tokens": 84562432} +{"current_steps": 26880, "total_steps": 78105, "loss": 0.3926, "lr": 4.145839450210741e-06, "epoch": 1.720760514691761, "percentage": 34.42, "elapsed_time": "1:10:03", "remaining_time": "2:13:30", "throughput": 20121.01, "total_tokens": 84578176} +{"current_steps": 26885, "total_steps": 78105, "loss": 0.2463, "lr": 4.145418897876301e-06, "epoch": 1.721080596632738, "percentage": 34.42, "elapsed_time": "1:10:04", "remaining_time": "2:13:29", "throughput": 20121.38, "total_tokens": 84592448} +{"current_steps": 26890, "total_steps": 78105, "loss": 0.4445, "lr": 4.144998263378211e-06, "epoch": 1.721400678573715, "percentage": 34.43, "elapsed_time": "1:10:04", "remaining_time": "2:13:28", "throughput": 20121.81, "total_tokens": 84607360} +{"current_steps": 26895, "total_steps": 78105, "loss": 0.2856, "lr": 4.144577546737476e-06, "epoch": 1.7217207605146918, "percentage": 34.43, "elapsed_time": "1:10:05", "remaining_time": "2:13:27", "throughput": 20122.26, "total_tokens": 84622528} +{"current_steps": 26900, "total_steps": 78105, "loss": 0.2865, "lr": 4.144156747975105e-06, "epoch": 1.7220408424556686, "percentage": 34.44, "elapsed_time": "1:10:06", "remaining_time": "2:13:26", "throughput": 20122.67, "total_tokens": 84637504} +{"current_steps": 26905, "total_steps": 78105, "loss": 0.2745, "lr": 4.1437358671121095e-06, "epoch": 1.7223609243966456, "percentage": 34.45, "elapsed_time": "1:10:06", "remaining_time": "2:13:25", "throughput": 20123.17, "total_tokens": 84653120} +{"current_steps": 26910, "total_steps": 78105, "loss": 0.2456, "lr": 4.143314904169508e-06, "epoch": 1.7226810063376226, "percentage": 34.45, "elapsed_time": "1:10:07", "remaining_time": "2:13:24", "throughput": 20123.62, "total_tokens": 84668416} +{"current_steps": 26915, "total_steps": 78105, "loss": 0.2846, "lr": 4.142893859168319e-06, "epoch": 1.7230010882785993, "percentage": 34.46, "elapsed_time": "1:10:08", "remaining_time": "2:13:23", "throughput": 20124.1, "total_tokens": 84683584} +{"current_steps": 26920, "total_steps": 78105, "loss": 0.4768, "lr": 4.1424727321295684e-06, "epoch": 1.723321170219576, "percentage": 34.47, "elapsed_time": "1:10:08", "remaining_time": "2:13:22", "throughput": 20124.59, "total_tokens": 84699328} +{"current_steps": 26925, "total_steps": 78105, "loss": 0.3569, "lr": 4.142051523074285e-06, "epoch": 1.723641252160553, "percentage": 34.47, "elapsed_time": "1:10:09", "remaining_time": "2:13:21", "throughput": 20125.04, "total_tokens": 84714688} +{"current_steps": 26930, "total_steps": 78105, "loss": 0.4428, "lr": 4.141630232023502e-06, "epoch": 1.72396133410153, "percentage": 34.48, "elapsed_time": "1:10:10", "remaining_time": "2:13:20", "throughput": 20125.56, "total_tokens": 84730432} +{"current_steps": 26935, "total_steps": 78105, "loss": 0.2943, "lr": 4.1412088589982554e-06, "epoch": 1.7242814160425068, "percentage": 34.49, "elapsed_time": "1:10:10", "remaining_time": "2:13:19", "throughput": 20126.1, "total_tokens": 84746496} +{"current_steps": 26940, "total_steps": 78105, "loss": 0.3809, "lr": 4.1407874040195875e-06, "epoch": 1.7246014979834836, "percentage": 34.49, "elapsed_time": "1:10:11", "remaining_time": "2:13:18", "throughput": 20126.66, "total_tokens": 84762880} +{"current_steps": 26945, "total_steps": 78105, "loss": 0.2378, "lr": 4.140365867108543e-06, "epoch": 1.7249215799244606, "percentage": 34.5, "elapsed_time": "1:10:12", "remaining_time": "2:13:17", "throughput": 20127.13, "total_tokens": 84777984} +{"current_steps": 26950, "total_steps": 78105, "loss": 0.3827, "lr": 4.139944248286172e-06, "epoch": 1.7252416618654376, "percentage": 34.5, "elapsed_time": "1:10:12", "remaining_time": "2:13:16", "throughput": 20127.55, "total_tokens": 84793216} +{"current_steps": 26955, "total_steps": 78105, "loss": 0.4733, "lr": 4.1395225475735265e-06, "epoch": 1.7255617438064146, "percentage": 34.51, "elapsed_time": "1:10:13", "remaining_time": "2:13:15", "throughput": 20128.04, "total_tokens": 84808512} +{"current_steps": 26960, "total_steps": 78105, "loss": 0.3325, "lr": 4.139100764991665e-06, "epoch": 1.7258818257473914, "percentage": 34.52, "elapsed_time": "1:10:14", "remaining_time": "2:13:14", "throughput": 20128.66, "total_tokens": 84825216} +{"current_steps": 26965, "total_steps": 78105, "loss": 0.269, "lr": 4.13867890056165e-06, "epoch": 1.7262019076883681, "percentage": 34.52, "elapsed_time": "1:10:14", "remaining_time": "2:13:13", "throughput": 20129.17, "total_tokens": 84841216} +{"current_steps": 26970, "total_steps": 78105, "loss": 0.3949, "lr": 4.138256954304546e-06, "epoch": 1.726521989629345, "percentage": 34.53, "elapsed_time": "1:10:15", "remaining_time": "2:13:12", "throughput": 20129.66, "total_tokens": 84857024} +{"current_steps": 26975, "total_steps": 78105, "loss": 0.3604, "lr": 4.137834926241423e-06, "epoch": 1.726842071570322, "percentage": 34.54, "elapsed_time": "1:10:16", "remaining_time": "2:13:11", "throughput": 20130.13, "total_tokens": 84872448} +{"current_steps": 26980, "total_steps": 78105, "loss": 0.2039, "lr": 4.137412816393355e-06, "epoch": 1.7271621535112989, "percentage": 34.54, "elapsed_time": "1:10:16", "remaining_time": "2:13:10", "throughput": 20130.54, "total_tokens": 84886976} +{"current_steps": 26985, "total_steps": 78105, "loss": 0.3646, "lr": 4.1369906247814195e-06, "epoch": 1.7274822354522756, "percentage": 34.55, "elapsed_time": "1:10:17", "remaining_time": "2:13:09", "throughput": 20131.0, "total_tokens": 84902144} +{"current_steps": 26990, "total_steps": 78105, "loss": 0.3085, "lr": 4.136568351426698e-06, "epoch": 1.7278023173932526, "percentage": 34.56, "elapsed_time": "1:10:18", "remaining_time": "2:13:08", "throughput": 20131.38, "total_tokens": 84916800} +{"current_steps": 26995, "total_steps": 78105, "loss": 0.3572, "lr": 4.13614599635028e-06, "epoch": 1.7281223993342296, "percentage": 34.56, "elapsed_time": "1:10:18", "remaining_time": "2:13:07", "throughput": 20131.81, "total_tokens": 84931904} +{"current_steps": 27000, "total_steps": 78105, "loss": 0.2276, "lr": 4.135723559573251e-06, "epoch": 1.7284424812752066, "percentage": 34.57, "elapsed_time": "1:10:19", "remaining_time": "2:13:06", "throughput": 20132.22, "total_tokens": 84946880} +{"current_steps": 27005, "total_steps": 78105, "loss": 0.385, "lr": 4.135301041116709e-06, "epoch": 1.7287625632161834, "percentage": 34.58, "elapsed_time": "1:10:20", "remaining_time": "2:13:05", "throughput": 20132.72, "total_tokens": 84962752} +{"current_steps": 27010, "total_steps": 78105, "loss": 0.28, "lr": 4.134878441001751e-06, "epoch": 1.7290826451571601, "percentage": 34.58, "elapsed_time": "1:10:20", "remaining_time": "2:13:04", "throughput": 20133.18, "total_tokens": 84977920} +{"current_steps": 27015, "total_steps": 78105, "loss": 0.3346, "lr": 4.134455759249479e-06, "epoch": 1.7294027270981371, "percentage": 34.59, "elapsed_time": "1:10:21", "remaining_time": "2:13:03", "throughput": 20133.7, "total_tokens": 84994048} +{"current_steps": 27020, "total_steps": 78105, "loss": 0.3173, "lr": 4.134032995881e-06, "epoch": 1.7297228090391141, "percentage": 34.59, "elapsed_time": "1:10:22", "remaining_time": "2:13:02", "throughput": 20134.39, "total_tokens": 85011648} +{"current_steps": 27025, "total_steps": 78105, "loss": 0.3429, "lr": 4.133610150917425e-06, "epoch": 1.730042890980091, "percentage": 34.6, "elapsed_time": "1:10:22", "remaining_time": "2:13:01", "throughput": 20134.91, "total_tokens": 85028096} +{"current_steps": 27030, "total_steps": 78105, "loss": 0.1905, "lr": 4.133187224379868e-06, "epoch": 1.7303629729210677, "percentage": 34.61, "elapsed_time": "1:10:23", "remaining_time": "2:13:00", "throughput": 20135.34, "total_tokens": 85043200} +{"current_steps": 27035, "total_steps": 78105, "loss": 0.3828, "lr": 4.1327642162894475e-06, "epoch": 1.7306830548620447, "percentage": 34.61, "elapsed_time": "1:10:24", "remaining_time": "2:12:59", "throughput": 20135.87, "total_tokens": 85059264} +{"current_steps": 27040, "total_steps": 78105, "loss": 0.2137, "lr": 4.132341126667287e-06, "epoch": 1.7310031368030216, "percentage": 34.62, "elapsed_time": "1:10:24", "remaining_time": "2:12:58", "throughput": 20136.35, "total_tokens": 85074624} +{"current_steps": 27045, "total_steps": 78105, "loss": 0.2352, "lr": 4.131917955534514e-06, "epoch": 1.7313232187439984, "percentage": 34.63, "elapsed_time": "1:10:25", "remaining_time": "2:12:57", "throughput": 20136.89, "total_tokens": 85090432} +{"current_steps": 27050, "total_steps": 78105, "loss": 0.4471, "lr": 4.131494702912258e-06, "epoch": 1.7316433006849754, "percentage": 34.63, "elapsed_time": "1:10:26", "remaining_time": "2:12:56", "throughput": 20137.32, "total_tokens": 85105472} +{"current_steps": 27055, "total_steps": 78105, "loss": 0.346, "lr": 4.131071368821654e-06, "epoch": 1.7319633826259522, "percentage": 34.64, "elapsed_time": "1:10:26", "remaining_time": "2:12:55", "throughput": 20137.8, "total_tokens": 85120832} +{"current_steps": 27060, "total_steps": 78105, "loss": 0.3021, "lr": 4.130647953283843e-06, "epoch": 1.7322834645669292, "percentage": 34.65, "elapsed_time": "1:10:27", "remaining_time": "2:12:54", "throughput": 20138.25, "total_tokens": 85136128} +{"current_steps": 27065, "total_steps": 78105, "loss": 0.3896, "lr": 4.130224456319966e-06, "epoch": 1.7326035465079062, "percentage": 34.65, "elapsed_time": "1:10:28", "remaining_time": "2:12:53", "throughput": 20138.74, "total_tokens": 85151744} +{"current_steps": 27070, "total_steps": 78105, "loss": 0.4597, "lr": 4.129800877951171e-06, "epoch": 1.732923628448883, "percentage": 34.66, "elapsed_time": "1:10:28", "remaining_time": "2:12:52", "throughput": 20139.21, "total_tokens": 85167104} +{"current_steps": 27075, "total_steps": 78105, "loss": 0.3382, "lr": 4.1293772181986105e-06, "epoch": 1.7332437103898597, "percentage": 34.66, "elapsed_time": "1:10:29", "remaining_time": "2:12:51", "throughput": 20139.68, "total_tokens": 85182592} +{"current_steps": 27080, "total_steps": 78105, "loss": 0.4818, "lr": 4.128953477083438e-06, "epoch": 1.7335637923308367, "percentage": 34.67, "elapsed_time": "1:10:30", "remaining_time": "2:12:50", "throughput": 20140.12, "total_tokens": 85197440} +{"current_steps": 27085, "total_steps": 78105, "loss": 0.4165, "lr": 4.128529654626814e-06, "epoch": 1.7338838742718137, "percentage": 34.68, "elapsed_time": "1:10:30", "remaining_time": "2:12:49", "throughput": 20140.61, "total_tokens": 85212992} +{"current_steps": 27090, "total_steps": 78105, "loss": 0.3688, "lr": 4.128105750849902e-06, "epoch": 1.7342039562127904, "percentage": 34.68, "elapsed_time": "1:10:31", "remaining_time": "2:12:48", "throughput": 20141.04, "total_tokens": 85227776} +{"current_steps": 27095, "total_steps": 78105, "loss": 0.3282, "lr": 4.12768176577387e-06, "epoch": 1.7345240381537672, "percentage": 34.69, "elapsed_time": "1:10:32", "remaining_time": "2:12:47", "throughput": 20141.56, "total_tokens": 85243648} +{"current_steps": 27100, "total_steps": 78105, "loss": 0.4574, "lr": 4.127257699419888e-06, "epoch": 1.7348441200947442, "percentage": 34.7, "elapsed_time": "1:10:32", "remaining_time": "2:12:46", "throughput": 20142.07, "total_tokens": 85259584} +{"current_steps": 27105, "total_steps": 78105, "loss": 0.366, "lr": 4.126833551809132e-06, "epoch": 1.7351642020357212, "percentage": 34.7, "elapsed_time": "1:10:33", "remaining_time": "2:12:45", "throughput": 20142.56, "total_tokens": 85275392} +{"current_steps": 27110, "total_steps": 78105, "loss": 0.3598, "lr": 4.126409322962783e-06, "epoch": 1.7354842839766982, "percentage": 34.71, "elapsed_time": "1:10:34", "remaining_time": "2:12:44", "throughput": 20142.96, "total_tokens": 85290240} +{"current_steps": 27115, "total_steps": 78105, "loss": 0.3588, "lr": 4.125985012902024e-06, "epoch": 1.735804365917675, "percentage": 34.72, "elapsed_time": "1:10:34", "remaining_time": "2:12:43", "throughput": 20143.45, "total_tokens": 85305536} +{"current_steps": 27120, "total_steps": 78105, "loss": 0.3226, "lr": 4.125560621648043e-06, "epoch": 1.7361244478586517, "percentage": 34.72, "elapsed_time": "1:10:35", "remaining_time": "2:12:42", "throughput": 20143.91, "total_tokens": 85321216} +{"current_steps": 27125, "total_steps": 78105, "loss": 0.3335, "lr": 4.125136149222032e-06, "epoch": 1.7364445297996287, "percentage": 34.73, "elapsed_time": "1:10:36", "remaining_time": "2:12:41", "throughput": 20144.41, "total_tokens": 85336640} +{"current_steps": 27130, "total_steps": 78105, "loss": 0.3804, "lr": 4.124711595645186e-06, "epoch": 1.7367646117406057, "percentage": 34.74, "elapsed_time": "1:10:36", "remaining_time": "2:12:40", "throughput": 20144.93, "total_tokens": 85352896} +{"current_steps": 27135, "total_steps": 78105, "loss": 0.3106, "lr": 4.124286960938706e-06, "epoch": 1.7370846936815825, "percentage": 34.74, "elapsed_time": "1:10:37", "remaining_time": "2:12:39", "throughput": 20145.32, "total_tokens": 85367488} +{"current_steps": 27140, "total_steps": 78105, "loss": 0.305, "lr": 4.123862245123796e-06, "epoch": 1.7374047756225592, "percentage": 34.75, "elapsed_time": "1:10:38", "remaining_time": "2:12:38", "throughput": 20145.95, "total_tokens": 85384768} +{"current_steps": 27145, "total_steps": 78105, "loss": 0.404, "lr": 4.123437448221664e-06, "epoch": 1.7377248575635362, "percentage": 34.75, "elapsed_time": "1:10:38", "remaining_time": "2:12:37", "throughput": 20146.36, "total_tokens": 85399552} +{"current_steps": 27150, "total_steps": 78105, "loss": 0.273, "lr": 4.123012570253522e-06, "epoch": 1.7380449395045132, "percentage": 34.76, "elapsed_time": "1:10:39", "remaining_time": "2:12:36", "throughput": 20146.79, "total_tokens": 85414272} +{"current_steps": 27155, "total_steps": 78105, "loss": 0.1753, "lr": 4.122587611240586e-06, "epoch": 1.7383650214454902, "percentage": 34.77, "elapsed_time": "1:10:40", "remaining_time": "2:12:35", "throughput": 20147.25, "total_tokens": 85429376} +{"current_steps": 27160, "total_steps": 78105, "loss": 0.3394, "lr": 4.122162571204076e-06, "epoch": 1.738685103386467, "percentage": 34.77, "elapsed_time": "1:10:40", "remaining_time": "2:12:34", "throughput": 20147.72, "total_tokens": 85444800} +{"current_steps": 27165, "total_steps": 78105, "loss": 0.3662, "lr": 4.121737450165217e-06, "epoch": 1.7390051853274437, "percentage": 34.78, "elapsed_time": "1:10:41", "remaining_time": "2:12:33", "throughput": 20148.28, "total_tokens": 85460928} +{"current_steps": 27170, "total_steps": 78105, "loss": 0.3124, "lr": 4.1213122481452376e-06, "epoch": 1.7393252672684207, "percentage": 34.79, "elapsed_time": "1:10:42", "remaining_time": "2:12:32", "throughput": 20148.66, "total_tokens": 85475328} +{"current_steps": 27175, "total_steps": 78105, "loss": 0.305, "lr": 4.1208869651653695e-06, "epoch": 1.7396453492093977, "percentage": 34.79, "elapsed_time": "1:10:42", "remaining_time": "2:12:31", "throughput": 20149.13, "total_tokens": 85491136} +{"current_steps": 27180, "total_steps": 78105, "loss": 0.4092, "lr": 4.120461601246849e-06, "epoch": 1.7399654311503745, "percentage": 34.8, "elapsed_time": "1:10:43", "remaining_time": "2:12:30", "throughput": 20149.68, "total_tokens": 85507520} +{"current_steps": 27185, "total_steps": 78105, "loss": 0.456, "lr": 4.120036156410917e-06, "epoch": 1.7402855130913513, "percentage": 34.81, "elapsed_time": "1:10:44", "remaining_time": "2:12:29", "throughput": 20150.1, "total_tokens": 85522368} +{"current_steps": 27190, "total_steps": 78105, "loss": 0.3396, "lr": 4.1196106306788174e-06, "epoch": 1.7406055950323283, "percentage": 34.81, "elapsed_time": "1:10:44", "remaining_time": "2:12:28", "throughput": 20150.68, "total_tokens": 85538944} +{"current_steps": 27195, "total_steps": 78105, "loss": 0.2535, "lr": 4.119185024071799e-06, "epoch": 1.7409256769733052, "percentage": 34.82, "elapsed_time": "1:10:45", "remaining_time": "2:12:27", "throughput": 20151.13, "total_tokens": 85554368} +{"current_steps": 27200, "total_steps": 78105, "loss": 0.361, "lr": 4.118759336611116e-06, "epoch": 1.741245758914282, "percentage": 34.82, "elapsed_time": "1:10:46", "remaining_time": "2:12:26", "throughput": 20151.56, "total_tokens": 85569408} +{"current_steps": 27205, "total_steps": 78105, "loss": 0.4347, "lr": 4.118333568318022e-06, "epoch": 1.7415658408552588, "percentage": 34.83, "elapsed_time": "1:10:46", "remaining_time": "2:12:26", "throughput": 20152.11, "total_tokens": 85585792} +{"current_steps": 27210, "total_steps": 78105, "loss": 0.462, "lr": 4.11790771921378e-06, "epoch": 1.7418859227962358, "percentage": 34.84, "elapsed_time": "1:10:47", "remaining_time": "2:12:25", "throughput": 20152.75, "total_tokens": 85603200} +{"current_steps": 27215, "total_steps": 78105, "loss": 0.3331, "lr": 4.117481789319653e-06, "epoch": 1.7422060047372128, "percentage": 34.84, "elapsed_time": "1:10:48", "remaining_time": "2:12:24", "throughput": 20153.14, "total_tokens": 85617984} +{"current_steps": 27220, "total_steps": 78105, "loss": 0.2692, "lr": 4.117055778656911e-06, "epoch": 1.7425260866781898, "percentage": 34.85, "elapsed_time": "1:10:49", "remaining_time": "2:12:23", "throughput": 20153.65, "total_tokens": 85633664} +{"current_steps": 27225, "total_steps": 78105, "loss": 0.247, "lr": 4.116629687246827e-06, "epoch": 1.7428461686191665, "percentage": 34.86, "elapsed_time": "1:10:49", "remaining_time": "2:12:22", "throughput": 20154.24, "total_tokens": 85650496} +{"current_steps": 27230, "total_steps": 78105, "loss": 0.3757, "lr": 4.116203515110676e-06, "epoch": 1.7431662505601433, "percentage": 34.86, "elapsed_time": "1:10:50", "remaining_time": "2:12:21", "throughput": 20154.65, "total_tokens": 85665408} +{"current_steps": 27235, "total_steps": 78105, "loss": 0.1952, "lr": 4.115777262269741e-06, "epoch": 1.7434863325011203, "percentage": 34.87, "elapsed_time": "1:10:51", "remaining_time": "2:12:20", "throughput": 20155.18, "total_tokens": 85681408} +{"current_steps": 27240, "total_steps": 78105, "loss": 0.3594, "lr": 4.1153509287453054e-06, "epoch": 1.7438064144420973, "percentage": 34.88, "elapsed_time": "1:10:51", "remaining_time": "2:12:19", "throughput": 20155.72, "total_tokens": 85697472} +{"current_steps": 27245, "total_steps": 78105, "loss": 0.6312, "lr": 4.114924514558658e-06, "epoch": 1.744126496383074, "percentage": 34.88, "elapsed_time": "1:10:52", "remaining_time": "2:12:18", "throughput": 20156.28, "total_tokens": 85714048} +{"current_steps": 27250, "total_steps": 78105, "loss": 0.3488, "lr": 4.114498019731093e-06, "epoch": 1.7444465783240508, "percentage": 34.89, "elapsed_time": "1:10:53", "remaining_time": "2:12:17", "throughput": 20156.69, "total_tokens": 85728896} +{"current_steps": 27255, "total_steps": 78105, "loss": 0.3511, "lr": 4.114071444283905e-06, "epoch": 1.7447666602650278, "percentage": 34.9, "elapsed_time": "1:10:53", "remaining_time": "2:12:16", "throughput": 20157.14, "total_tokens": 85744128} +{"current_steps": 27260, "total_steps": 78105, "loss": 0.2834, "lr": 4.113644788238398e-06, "epoch": 1.7450867422060048, "percentage": 34.9, "elapsed_time": "1:10:54", "remaining_time": "2:12:15", "throughput": 20157.72, "total_tokens": 85760896} +{"current_steps": 27265, "total_steps": 78105, "loss": 0.348, "lr": 4.113218051615875e-06, "epoch": 1.7454068241469818, "percentage": 34.91, "elapsed_time": "1:10:55", "remaining_time": "2:12:14", "throughput": 20158.31, "total_tokens": 85777600} +{"current_steps": 27270, "total_steps": 78105, "loss": 0.3729, "lr": 4.112791234437647e-06, "epoch": 1.7457269060879586, "percentage": 34.91, "elapsed_time": "1:10:55", "remaining_time": "2:12:13", "throughput": 20158.76, "total_tokens": 85792832} +{"current_steps": 27275, "total_steps": 78105, "loss": 0.4245, "lr": 4.112364336725023e-06, "epoch": 1.7460469880289353, "percentage": 34.92, "elapsed_time": "1:10:56", "remaining_time": "2:12:12", "throughput": 20159.15, "total_tokens": 85807424} +{"current_steps": 27280, "total_steps": 78105, "loss": 0.4008, "lr": 4.111937358499324e-06, "epoch": 1.7463670699699123, "percentage": 34.93, "elapsed_time": "1:10:57", "remaining_time": "2:12:11", "throughput": 20159.59, "total_tokens": 85822656} +{"current_steps": 27285, "total_steps": 78105, "loss": 0.2852, "lr": 4.111510299781869e-06, "epoch": 1.7466871519108893, "percentage": 34.93, "elapsed_time": "1:10:57", "remaining_time": "2:12:10", "throughput": 20160.03, "total_tokens": 85837632} +{"current_steps": 27290, "total_steps": 78105, "loss": 0.4014, "lr": 4.111083160593983e-06, "epoch": 1.747007233851866, "percentage": 34.94, "elapsed_time": "1:10:58", "remaining_time": "2:12:09", "throughput": 20160.5, "total_tokens": 85853376} +{"current_steps": 27295, "total_steps": 78105, "loss": 0.2782, "lr": 4.110655940956997e-06, "epoch": 1.7473273157928428, "percentage": 34.95, "elapsed_time": "1:10:59", "remaining_time": "2:12:08", "throughput": 20160.93, "total_tokens": 85868480} +{"current_steps": 27300, "total_steps": 78105, "loss": 0.2557, "lr": 4.110228640892242e-06, "epoch": 1.7476473977338198, "percentage": 34.95, "elapsed_time": "1:10:59", "remaining_time": "2:12:07", "throughput": 20161.48, "total_tokens": 85884608} +{"current_steps": 27305, "total_steps": 78105, "loss": 0.2501, "lr": 4.109801260421057e-06, "epoch": 1.7479674796747968, "percentage": 34.96, "elapsed_time": "1:11:00", "remaining_time": "2:12:06", "throughput": 20161.9, "total_tokens": 85899776} +{"current_steps": 27310, "total_steps": 78105, "loss": 0.2386, "lr": 4.109373799564782e-06, "epoch": 1.7482875616157736, "percentage": 34.97, "elapsed_time": "1:11:01", "remaining_time": "2:12:05", "throughput": 20162.47, "total_tokens": 85915904} +{"current_steps": 27315, "total_steps": 78105, "loss": 0.4077, "lr": 4.1089462583447615e-06, "epoch": 1.7486076435567506, "percentage": 34.97, "elapsed_time": "1:11:01", "remaining_time": "2:12:04", "throughput": 20162.9, "total_tokens": 85930688} +{"current_steps": 27320, "total_steps": 78105, "loss": 0.3356, "lr": 4.108518636782346e-06, "epoch": 1.7489277254977273, "percentage": 34.98, "elapsed_time": "1:11:02", "remaining_time": "2:12:03", "throughput": 20163.39, "total_tokens": 85946304} +{"current_steps": 27325, "total_steps": 78105, "loss": 0.34, "lr": 4.108090934898888e-06, "epoch": 1.7492478074387043, "percentage": 34.98, "elapsed_time": "1:11:03", "remaining_time": "2:12:02", "throughput": 20163.84, "total_tokens": 85961856} +{"current_steps": 27330, "total_steps": 78105, "loss": 0.4829, "lr": 4.107663152715746e-06, "epoch": 1.7495678893796813, "percentage": 34.99, "elapsed_time": "1:11:03", "remaining_time": "2:12:01", "throughput": 20164.36, "total_tokens": 85978048} +{"current_steps": 27335, "total_steps": 78105, "loss": 0.3376, "lr": 4.107235290254279e-06, "epoch": 1.749887971320658, "percentage": 35.0, "elapsed_time": "1:11:04", "remaining_time": "2:12:00", "throughput": 20164.86, "total_tokens": 85994176} +{"current_steps": 27340, "total_steps": 78105, "loss": 0.2995, "lr": 4.106807347535854e-06, "epoch": 1.7502080532616349, "percentage": 35.0, "elapsed_time": "1:11:05", "remaining_time": "2:11:59", "throughput": 20165.36, "total_tokens": 86010048} +{"current_steps": 27342, "total_steps": 78105, "eval_loss": 0.477387011051178, "epoch": 1.7503360860380257, "percentage": 35.01, "elapsed_time": "1:11:56", "remaining_time": "2:13:33", "throughput": 19927.31, "total_tokens": 86015936} +{"current_steps": 27345, "total_steps": 78105, "loss": 0.5011, "lr": 4.10637932458184e-06, "epoch": 1.7505281352026119, "percentage": 35.01, "elapsed_time": "1:12:33", "remaining_time": "2:14:41", "throughput": 19758.84, "total_tokens": 86026496} +{"current_steps": 27350, "total_steps": 78105, "loss": 0.4033, "lr": 4.105951221413609e-06, "epoch": 1.7508482171435888, "percentage": 35.02, "elapsed_time": "1:12:34", "remaining_time": "2:14:40", "throughput": 19759.35, "total_tokens": 86041792} +{"current_steps": 27355, "total_steps": 78105, "loss": 0.4054, "lr": 4.10552303805254e-06, "epoch": 1.7511682990845656, "percentage": 35.02, "elapsed_time": "1:12:35", "remaining_time": "2:14:39", "throughput": 19759.94, "total_tokens": 86058112} +{"current_steps": 27360, "total_steps": 78105, "loss": 0.5139, "lr": 4.105094774520012e-06, "epoch": 1.7514883810255424, "percentage": 35.03, "elapsed_time": "1:12:35", "remaining_time": "2:14:39", "throughput": 19760.75, "total_tokens": 86076608} +{"current_steps": 27365, "total_steps": 78105, "loss": 0.2469, "lr": 4.104666430837413e-06, "epoch": 1.7518084629665194, "percentage": 35.04, "elapsed_time": "1:12:36", "remaining_time": "2:14:38", "throughput": 19761.38, "total_tokens": 86092992} +{"current_steps": 27370, "total_steps": 78105, "loss": 0.3058, "lr": 4.10423800702613e-06, "epoch": 1.7521285449074964, "percentage": 35.04, "elapsed_time": "1:12:37", "remaining_time": "2:14:36", "throughput": 19761.82, "total_tokens": 86107904} +{"current_steps": 27375, "total_steps": 78105, "loss": 0.3834, "lr": 4.103809503107557e-06, "epoch": 1.7524486268484734, "percentage": 35.05, "elapsed_time": "1:12:37", "remaining_time": "2:14:36", "throughput": 19762.48, "total_tokens": 86124800} +{"current_steps": 27380, "total_steps": 78105, "loss": 0.2621, "lr": 4.103380919103092e-06, "epoch": 1.7527687087894501, "percentage": 35.06, "elapsed_time": "1:12:38", "remaining_time": "2:14:34", "throughput": 19762.96, "total_tokens": 86140096} +{"current_steps": 27385, "total_steps": 78105, "loss": 0.3195, "lr": 4.102952255034135e-06, "epoch": 1.753088790730427, "percentage": 35.06, "elapsed_time": "1:12:39", "remaining_time": "2:14:33", "throughput": 19763.42, "total_tokens": 86155008} +{"current_steps": 27390, "total_steps": 78105, "loss": 0.5096, "lr": 4.102523510922092e-06, "epoch": 1.7534088726714039, "percentage": 35.07, "elapsed_time": "1:12:39", "remaining_time": "2:14:32", "throughput": 19763.96, "total_tokens": 86170496} +{"current_steps": 27395, "total_steps": 78105, "loss": 0.3476, "lr": 4.102094686788373e-06, "epoch": 1.7537289546123809, "percentage": 35.07, "elapsed_time": "1:12:40", "remaining_time": "2:14:31", "throughput": 19764.41, "total_tokens": 86185472} +{"current_steps": 27400, "total_steps": 78105, "loss": 0.4326, "lr": 4.1016657826543895e-06, "epoch": 1.7540490365533576, "percentage": 35.08, "elapsed_time": "1:12:41", "remaining_time": "2:14:30", "throughput": 19764.96, "total_tokens": 86201216} +{"current_steps": 27405, "total_steps": 78105, "loss": 0.2136, "lr": 4.1012367985415595e-06, "epoch": 1.7543691184943344, "percentage": 35.09, "elapsed_time": "1:12:41", "remaining_time": "2:14:29", "throughput": 19765.33, "total_tokens": 86215040} +{"current_steps": 27410, "total_steps": 78105, "loss": 0.4943, "lr": 4.1008077344713045e-06, "epoch": 1.7546892004353114, "percentage": 35.09, "elapsed_time": "1:12:42", "remaining_time": "2:14:28", "throughput": 19765.75, "total_tokens": 86229632} +{"current_steps": 27415, "total_steps": 78105, "loss": 0.3653, "lr": 4.100378590465049e-06, "epoch": 1.7550092823762884, "percentage": 35.1, "elapsed_time": "1:12:43", "remaining_time": "2:14:27", "throughput": 19766.14, "total_tokens": 86243840} +{"current_steps": 27420, "total_steps": 78105, "loss": 0.2443, "lr": 4.099949366544224e-06, "epoch": 1.7553293643172654, "percentage": 35.11, "elapsed_time": "1:12:43", "remaining_time": "2:14:26", "throughput": 19766.61, "total_tokens": 86258368} +{"current_steps": 27425, "total_steps": 78105, "loss": 0.4385, "lr": 4.09952006273026e-06, "epoch": 1.7556494462582422, "percentage": 35.11, "elapsed_time": "1:12:44", "remaining_time": "2:14:25", "throughput": 19767.18, "total_tokens": 86274304} +{"current_steps": 27430, "total_steps": 78105, "loss": 0.4115, "lr": 4.099090679044597e-06, "epoch": 1.755969528199219, "percentage": 35.12, "elapsed_time": "1:12:45", "remaining_time": "2:14:24", "throughput": 19767.74, "total_tokens": 86290304} +{"current_steps": 27435, "total_steps": 78105, "loss": 0.3621, "lr": 4.0986612155086745e-06, "epoch": 1.756289610140196, "percentage": 35.13, "elapsed_time": "1:12:45", "remaining_time": "2:14:23", "throughput": 19768.22, "total_tokens": 86305408} +{"current_steps": 27440, "total_steps": 78105, "loss": 0.6266, "lr": 4.098231672143938e-06, "epoch": 1.756609692081173, "percentage": 35.13, "elapsed_time": "1:12:47", "remaining_time": "2:14:23", "throughput": 19769.53, "total_tokens": 86334336} +{"current_steps": 27445, "total_steps": 78105, "loss": 0.3024, "lr": 4.097802048971836e-06, "epoch": 1.7569297740221497, "percentage": 35.14, "elapsed_time": "1:12:47", "remaining_time": "2:14:22", "throughput": 19770.2, "total_tokens": 86351296} +{"current_steps": 27450, "total_steps": 78105, "loss": 0.357, "lr": 4.097372346013823e-06, "epoch": 1.7572498559631264, "percentage": 35.14, "elapsed_time": "1:12:48", "remaining_time": "2:14:21", "throughput": 19771.04, "total_tokens": 86369920} +{"current_steps": 27455, "total_steps": 78105, "loss": 0.3462, "lr": 4.096942563291355e-06, "epoch": 1.7575699379041034, "percentage": 35.15, "elapsed_time": "1:12:49", "remaining_time": "2:14:20", "throughput": 19771.51, "total_tokens": 86384960} +{"current_steps": 27460, "total_steps": 78105, "loss": 0.358, "lr": 4.096512700825894e-06, "epoch": 1.7578900198450804, "percentage": 35.16, "elapsed_time": "1:12:49", "remaining_time": "2:14:19", "throughput": 19771.98, "total_tokens": 86399808} +{"current_steps": 27465, "total_steps": 78105, "loss": 0.4086, "lr": 4.096082758638904e-06, "epoch": 1.7582101017860572, "percentage": 35.16, "elapsed_time": "1:12:50", "remaining_time": "2:14:18", "throughput": 19772.51, "total_tokens": 86415616} +{"current_steps": 27470, "total_steps": 78105, "loss": 0.3551, "lr": 4.095652736751856e-06, "epoch": 1.758530183727034, "percentage": 35.17, "elapsed_time": "1:12:51", "remaining_time": "2:14:17", "throughput": 19773.1, "total_tokens": 86432128} +{"current_steps": 27475, "total_steps": 78105, "loss": 0.415, "lr": 4.0952226351862204e-06, "epoch": 1.758850265668011, "percentage": 35.18, "elapsed_time": "1:12:51", "remaining_time": "2:14:16", "throughput": 19773.5, "total_tokens": 86446336} +{"current_steps": 27480, "total_steps": 78105, "loss": 0.2837, "lr": 4.094792453963476e-06, "epoch": 1.759170347608988, "percentage": 35.18, "elapsed_time": "1:12:52", "remaining_time": "2:14:15", "throughput": 19773.91, "total_tokens": 86461056} +{"current_steps": 27485, "total_steps": 78105, "loss": 0.4098, "lr": 4.094362193105104e-06, "epoch": 1.759490429549965, "percentage": 35.19, "elapsed_time": "1:12:53", "remaining_time": "2:14:14", "throughput": 19774.44, "total_tokens": 86476800} +{"current_steps": 27490, "total_steps": 78105, "loss": 0.3081, "lr": 4.093931852632588e-06, "epoch": 1.7598105114909417, "percentage": 35.2, "elapsed_time": "1:12:53", "remaining_time": "2:14:13", "throughput": 19775.04, "total_tokens": 86493376} +{"current_steps": 27495, "total_steps": 78105, "loss": 0.3491, "lr": 4.093501432567418e-06, "epoch": 1.7601305934319185, "percentage": 35.2, "elapsed_time": "1:12:54", "remaining_time": "2:14:12", "throughput": 19775.5, "total_tokens": 86508544} +{"current_steps": 27500, "total_steps": 78105, "loss": 0.3194, "lr": 4.093070932931087e-06, "epoch": 1.7604506753728955, "percentage": 35.21, "elapsed_time": "1:12:55", "remaining_time": "2:14:11", "throughput": 19776.07, "total_tokens": 86524480} +{"current_steps": 27505, "total_steps": 78105, "loss": 0.2945, "lr": 4.092640353745092e-06, "epoch": 1.7607707573138724, "percentage": 35.22, "elapsed_time": "1:12:55", "remaining_time": "2:14:10", "throughput": 19776.49, "total_tokens": 86539136} +{"current_steps": 27510, "total_steps": 78105, "loss": 0.3528, "lr": 4.092209695030933e-06, "epoch": 1.7610908392548492, "percentage": 35.22, "elapsed_time": "1:12:56", "remaining_time": "2:14:09", "throughput": 19777.04, "total_tokens": 86555328} +{"current_steps": 27515, "total_steps": 78105, "loss": 0.2385, "lr": 4.091778956810115e-06, "epoch": 1.761410921195826, "percentage": 35.23, "elapsed_time": "1:12:57", "remaining_time": "2:14:08", "throughput": 19777.5, "total_tokens": 86570624} +{"current_steps": 27520, "total_steps": 78105, "loss": 0.3079, "lr": 4.091348139104147e-06, "epoch": 1.761731003136803, "percentage": 35.23, "elapsed_time": "1:12:57", "remaining_time": "2:14:07", "throughput": 19777.97, "total_tokens": 86585600} +{"current_steps": 27525, "total_steps": 78105, "loss": 0.4022, "lr": 4.0909172419345436e-06, "epoch": 1.76205108507778, "percentage": 35.24, "elapsed_time": "1:12:58", "remaining_time": "2:14:06", "throughput": 19778.47, "total_tokens": 86601216} +{"current_steps": 27530, "total_steps": 78105, "loss": 0.2999, "lr": 4.090486265322818e-06, "epoch": 1.762371167018757, "percentage": 35.25, "elapsed_time": "1:12:59", "remaining_time": "2:14:05", "throughput": 19779.03, "total_tokens": 86617216} +{"current_steps": 27535, "total_steps": 78105, "loss": 0.2846, "lr": 4.090055209290494e-06, "epoch": 1.7626912489597337, "percentage": 35.25, "elapsed_time": "1:12:59", "remaining_time": "2:14:04", "throughput": 19779.53, "total_tokens": 86632960} +{"current_steps": 27540, "total_steps": 78105, "loss": 0.3345, "lr": 4.089624073859095e-06, "epoch": 1.7630113309007105, "percentage": 35.26, "elapsed_time": "1:13:00", "remaining_time": "2:14:03", "throughput": 19780.05, "total_tokens": 86648576} +{"current_steps": 27545, "total_steps": 78105, "loss": 0.2538, "lr": 4.08919285905015e-06, "epoch": 1.7633314128416875, "percentage": 35.27, "elapsed_time": "1:13:01", "remaining_time": "2:14:01", "throughput": 19780.47, "total_tokens": 86663232} +{"current_steps": 27550, "total_steps": 78105, "loss": 0.2743, "lr": 4.0887615648851906e-06, "epoch": 1.7636514947826645, "percentage": 35.27, "elapsed_time": "1:13:01", "remaining_time": "2:14:00", "throughput": 19781.02, "total_tokens": 86679104} +{"current_steps": 27555, "total_steps": 78105, "loss": 0.3117, "lr": 4.088330191385754e-06, "epoch": 1.7639715767236412, "percentage": 35.28, "elapsed_time": "1:13:02", "remaining_time": "2:14:00", "throughput": 19781.63, "total_tokens": 86695872} +{"current_steps": 27560, "total_steps": 78105, "loss": 0.4026, "lr": 4.087898738573382e-06, "epoch": 1.764291658664618, "percentage": 35.29, "elapsed_time": "1:13:03", "remaining_time": "2:13:59", "throughput": 19782.21, "total_tokens": 86712064} +{"current_steps": 27565, "total_steps": 78105, "loss": 0.3008, "lr": 4.087467206469617e-06, "epoch": 1.764611740605595, "percentage": 35.29, "elapsed_time": "1:13:04", "remaining_time": "2:13:58", "throughput": 19782.84, "total_tokens": 86728832} +{"current_steps": 27570, "total_steps": 78105, "loss": 0.3186, "lr": 4.087035595096009e-06, "epoch": 1.764931822546572, "percentage": 35.3, "elapsed_time": "1:13:04", "remaining_time": "2:13:57", "throughput": 19783.25, "total_tokens": 86743872} +{"current_steps": 27575, "total_steps": 78105, "loss": 0.2668, "lr": 4.08660390447411e-06, "epoch": 1.7652519044875488, "percentage": 35.31, "elapsed_time": "1:13:05", "remaining_time": "2:13:56", "throughput": 19783.67, "total_tokens": 86758720} +{"current_steps": 27580, "total_steps": 78105, "loss": 0.4228, "lr": 4.086172134625477e-06, "epoch": 1.7655719864285258, "percentage": 35.31, "elapsed_time": "1:13:06", "remaining_time": "2:13:55", "throughput": 19784.17, "total_tokens": 86774464} +{"current_steps": 27585, "total_steps": 78105, "loss": 0.2753, "lr": 4.08574028557167e-06, "epoch": 1.7658920683695025, "percentage": 35.32, "elapsed_time": "1:13:06", "remaining_time": "2:13:54", "throughput": 19784.71, "total_tokens": 86790528} +{"current_steps": 27590, "total_steps": 78105, "loss": 0.2646, "lr": 4.085308357334251e-06, "epoch": 1.7662121503104795, "percentage": 35.32, "elapsed_time": "1:13:07", "remaining_time": "2:13:53", "throughput": 19785.18, "total_tokens": 86806016} +{"current_steps": 27595, "total_steps": 78105, "loss": 0.258, "lr": 4.084876349934792e-06, "epoch": 1.7665322322514565, "percentage": 35.33, "elapsed_time": "1:13:08", "remaining_time": "2:13:51", "throughput": 19785.62, "total_tokens": 86821312} +{"current_steps": 27600, "total_steps": 78105, "loss": 0.3119, "lr": 4.084444263394863e-06, "epoch": 1.7668523141924333, "percentage": 35.34, "elapsed_time": "1:13:08", "remaining_time": "2:13:50", "throughput": 19786.11, "total_tokens": 86836800} +{"current_steps": 27605, "total_steps": 78105, "loss": 0.4817, "lr": 4.084012097736039e-06, "epoch": 1.76717239613341, "percentage": 35.34, "elapsed_time": "1:13:09", "remaining_time": "2:13:49", "throughput": 19786.55, "total_tokens": 86852160} +{"current_steps": 27610, "total_steps": 78105, "loss": 0.3468, "lr": 4.083579852979903e-06, "epoch": 1.767492478074387, "percentage": 35.35, "elapsed_time": "1:13:10", "remaining_time": "2:13:48", "throughput": 19787.09, "total_tokens": 86868032} +{"current_steps": 27615, "total_steps": 78105, "loss": 0.4362, "lr": 4.083147529148038e-06, "epoch": 1.767812560015364, "percentage": 35.36, "elapsed_time": "1:13:10", "remaining_time": "2:13:47", "throughput": 19787.59, "total_tokens": 86883456} +{"current_steps": 27620, "total_steps": 78105, "loss": 0.3842, "lr": 4.082715126262031e-06, "epoch": 1.7681326419563408, "percentage": 35.36, "elapsed_time": "1:13:11", "remaining_time": "2:13:46", "throughput": 19788.07, "total_tokens": 86898688} +{"current_steps": 27625, "total_steps": 78105, "loss": 0.2753, "lr": 4.082282644343475e-06, "epoch": 1.7684527238973176, "percentage": 35.37, "elapsed_time": "1:13:12", "remaining_time": "2:13:45", "throughput": 19788.65, "total_tokens": 86915392} +{"current_steps": 27630, "total_steps": 78105, "loss": 0.3564, "lr": 4.0818500834139655e-06, "epoch": 1.7687728058382945, "percentage": 35.38, "elapsed_time": "1:13:12", "remaining_time": "2:13:44", "throughput": 19789.19, "total_tokens": 86931520} +{"current_steps": 27635, "total_steps": 78105, "loss": 0.3086, "lr": 4.081417443495103e-06, "epoch": 1.7690928877792715, "percentage": 35.38, "elapsed_time": "1:13:13", "remaining_time": "2:13:44", "throughput": 19789.85, "total_tokens": 86948608} +{"current_steps": 27640, "total_steps": 78105, "loss": 0.246, "lr": 4.08098472460849e-06, "epoch": 1.7694129697202485, "percentage": 35.39, "elapsed_time": "1:13:14", "remaining_time": "2:13:43", "throughput": 19790.42, "total_tokens": 86964928} +{"current_steps": 27645, "total_steps": 78105, "loss": 0.3752, "lr": 4.080551926775735e-06, "epoch": 1.7697330516612253, "percentage": 35.39, "elapsed_time": "1:13:14", "remaining_time": "2:13:42", "throughput": 19791.01, "total_tokens": 86981440} +{"current_steps": 27650, "total_steps": 78105, "loss": 0.3438, "lr": 4.08011905001845e-06, "epoch": 1.770053133602202, "percentage": 35.4, "elapsed_time": "1:13:15", "remaining_time": "2:13:41", "throughput": 19791.56, "total_tokens": 86997888} +{"current_steps": 27655, "total_steps": 78105, "loss": 0.3245, "lr": 4.079686094358248e-06, "epoch": 1.770373215543179, "percentage": 35.41, "elapsed_time": "1:13:16", "remaining_time": "2:13:40", "throughput": 19791.96, "total_tokens": 87012288} +{"current_steps": 27660, "total_steps": 78105, "loss": 0.2806, "lr": 4.079253059816753e-06, "epoch": 1.770693297484156, "percentage": 35.41, "elapsed_time": "1:13:17", "remaining_time": "2:13:39", "throughput": 19792.51, "total_tokens": 87028160} +{"current_steps": 27665, "total_steps": 78105, "loss": 0.4045, "lr": 4.078819946415586e-06, "epoch": 1.7710133794251328, "percentage": 35.42, "elapsed_time": "1:13:17", "remaining_time": "2:13:38", "throughput": 19793.04, "total_tokens": 87043968} +{"current_steps": 27670, "total_steps": 78105, "loss": 0.4153, "lr": 4.078386754176374e-06, "epoch": 1.7713334613661096, "percentage": 35.43, "elapsed_time": "1:13:18", "remaining_time": "2:13:37", "throughput": 19793.57, "total_tokens": 87059712} +{"current_steps": 27675, "total_steps": 78105, "loss": 0.3821, "lr": 4.077953483120749e-06, "epoch": 1.7716535433070866, "percentage": 35.43, "elapsed_time": "1:13:19", "remaining_time": "2:13:36", "throughput": 19794.33, "total_tokens": 87077504} +{"current_steps": 27680, "total_steps": 78105, "loss": 0.449, "lr": 4.077520133270347e-06, "epoch": 1.7719736252480636, "percentage": 35.44, "elapsed_time": "1:13:19", "remaining_time": "2:13:35", "throughput": 19794.81, "total_tokens": 87092544} +{"current_steps": 27685, "total_steps": 78105, "loss": 0.4026, "lr": 4.077086704646807e-06, "epoch": 1.7722937071890406, "percentage": 35.45, "elapsed_time": "1:13:20", "remaining_time": "2:13:34", "throughput": 19795.39, "total_tokens": 87108800} +{"current_steps": 27690, "total_steps": 78105, "loss": 0.486, "lr": 4.07665319727177e-06, "epoch": 1.7726137891300173, "percentage": 35.45, "elapsed_time": "1:13:21", "remaining_time": "2:13:33", "throughput": 19795.88, "total_tokens": 87123776} +{"current_steps": 27695, "total_steps": 78105, "loss": 0.4676, "lr": 4.076219611166886e-06, "epoch": 1.772933871070994, "percentage": 35.46, "elapsed_time": "1:13:21", "remaining_time": "2:13:32", "throughput": 19796.51, "total_tokens": 87140544} +{"current_steps": 27700, "total_steps": 78105, "loss": 0.4814, "lr": 4.075785946353805e-06, "epoch": 1.773253953011971, "percentage": 35.47, "elapsed_time": "1:13:22", "remaining_time": "2:13:31", "throughput": 19797.15, "total_tokens": 87157440} +{"current_steps": 27705, "total_steps": 78105, "loss": 0.3255, "lr": 4.075352202854181e-06, "epoch": 1.773574034952948, "percentage": 35.47, "elapsed_time": "1:13:23", "remaining_time": "2:13:30", "throughput": 19797.65, "total_tokens": 87172608} +{"current_steps": 27710, "total_steps": 78105, "loss": 0.296, "lr": 4.0749183806896745e-06, "epoch": 1.7738941168939248, "percentage": 35.48, "elapsed_time": "1:13:23", "remaining_time": "2:13:29", "throughput": 19798.33, "total_tokens": 87189888} +{"current_steps": 27715, "total_steps": 78105, "loss": 0.3075, "lr": 4.074484479881947e-06, "epoch": 1.7742141988349016, "percentage": 35.48, "elapsed_time": "1:13:24", "remaining_time": "2:13:28", "throughput": 19799.07, "total_tokens": 87207680} +{"current_steps": 27720, "total_steps": 78105, "loss": 0.2536, "lr": 4.0740505004526655e-06, "epoch": 1.7745342807758786, "percentage": 35.49, "elapsed_time": "1:13:25", "remaining_time": "2:13:27", "throughput": 19799.65, "total_tokens": 87223936} +{"current_steps": 27725, "total_steps": 78105, "loss": 0.3287, "lr": 4.073616442423502e-06, "epoch": 1.7748543627168556, "percentage": 35.5, "elapsed_time": "1:13:25", "remaining_time": "2:13:26", "throughput": 19800.08, "total_tokens": 87238656} +{"current_steps": 27730, "total_steps": 78105, "loss": 0.2594, "lr": 4.073182305816129e-06, "epoch": 1.7751744446578324, "percentage": 35.5, "elapsed_time": "1:13:26", "remaining_time": "2:13:25", "throughput": 19800.62, "total_tokens": 87254784} +{"current_steps": 27735, "total_steps": 78105, "loss": 0.2666, "lr": 4.072748090652226e-06, "epoch": 1.7754945265988091, "percentage": 35.51, "elapsed_time": "1:13:27", "remaining_time": "2:13:24", "throughput": 19801.21, "total_tokens": 87271424} +{"current_steps": 27740, "total_steps": 78105, "loss": 0.4311, "lr": 4.072313796953476e-06, "epoch": 1.7758146085397861, "percentage": 35.52, "elapsed_time": "1:13:28", "remaining_time": "2:13:23", "throughput": 19801.77, "total_tokens": 87287488} +{"current_steps": 27745, "total_steps": 78105, "loss": 0.2449, "lr": 4.071879424741565e-06, "epoch": 1.7761346904807631, "percentage": 35.52, "elapsed_time": "1:13:28", "remaining_time": "2:13:22", "throughput": 19802.26, "total_tokens": 87302784} +{"current_steps": 27750, "total_steps": 78105, "loss": 0.2954, "lr": 4.071444974038182e-06, "epoch": 1.77645477242174, "percentage": 35.53, "elapsed_time": "1:13:29", "remaining_time": "2:13:21", "throughput": 19802.8, "total_tokens": 87318848} +{"current_steps": 27755, "total_steps": 78105, "loss": 0.3946, "lr": 4.071010444865024e-06, "epoch": 1.7767748543627169, "percentage": 35.54, "elapsed_time": "1:13:30", "remaining_time": "2:13:20", "throughput": 19803.28, "total_tokens": 87334080} +{"current_steps": 27760, "total_steps": 78105, "loss": 0.3141, "lr": 4.070575837243786e-06, "epoch": 1.7770949363036936, "percentage": 35.54, "elapsed_time": "1:13:30", "remaining_time": "2:13:19", "throughput": 19803.78, "total_tokens": 87349504} +{"current_steps": 27765, "total_steps": 78105, "loss": 0.3426, "lr": 4.070141151196173e-06, "epoch": 1.7774150182446706, "percentage": 35.55, "elapsed_time": "1:13:31", "remaining_time": "2:13:18", "throughput": 19804.34, "total_tokens": 87365632} +{"current_steps": 27770, "total_steps": 78105, "loss": 0.391, "lr": 4.069706386743888e-06, "epoch": 1.7777351001856476, "percentage": 35.55, "elapsed_time": "1:13:32", "remaining_time": "2:13:17", "throughput": 19804.84, "total_tokens": 87381248} +{"current_steps": 27775, "total_steps": 78105, "loss": 0.3247, "lr": 4.0692715439086435e-06, "epoch": 1.7780551821266244, "percentage": 35.56, "elapsed_time": "1:13:32", "remaining_time": "2:13:16", "throughput": 19805.34, "total_tokens": 87396608} +{"current_steps": 27780, "total_steps": 78105, "loss": 0.3074, "lr": 4.068836622712151e-06, "epoch": 1.7783752640676012, "percentage": 35.57, "elapsed_time": "1:13:33", "remaining_time": "2:13:15", "throughput": 19805.83, "total_tokens": 87411840} +{"current_steps": 27785, "total_steps": 78105, "loss": 0.3801, "lr": 4.068401623176131e-06, "epoch": 1.7786953460085781, "percentage": 35.57, "elapsed_time": "1:13:34", "remaining_time": "2:13:14", "throughput": 19806.45, "total_tokens": 87428416} +{"current_steps": 27790, "total_steps": 78105, "loss": 0.2938, "lr": 4.067966545322302e-06, "epoch": 1.7790154279495551, "percentage": 35.58, "elapsed_time": "1:13:34", "remaining_time": "2:13:13", "throughput": 19806.9, "total_tokens": 87443776} +{"current_steps": 27795, "total_steps": 78105, "loss": 0.4359, "lr": 4.067531389172392e-06, "epoch": 1.7793355098905321, "percentage": 35.59, "elapsed_time": "1:13:35", "remaining_time": "2:13:12", "throughput": 19807.63, "total_tokens": 87461056} +{"current_steps": 27800, "total_steps": 78105, "loss": 0.2891, "lr": 4.067096154748129e-06, "epoch": 1.779655591831509, "percentage": 35.59, "elapsed_time": "1:13:36", "remaining_time": "2:13:11", "throughput": 19808.23, "total_tokens": 87477248} +{"current_steps": 27805, "total_steps": 78105, "loss": 0.3999, "lr": 4.066660842071246e-06, "epoch": 1.7799756737724857, "percentage": 35.6, "elapsed_time": "1:13:36", "remaining_time": "2:13:10", "throughput": 19808.91, "total_tokens": 87494912} +{"current_steps": 27810, "total_steps": 78105, "loss": 0.3158, "lr": 4.066225451163482e-06, "epoch": 1.7802957557134627, "percentage": 35.61, "elapsed_time": "1:13:37", "remaining_time": "2:13:09", "throughput": 19809.55, "total_tokens": 87511872} +{"current_steps": 27815, "total_steps": 78105, "loss": 0.4262, "lr": 4.065789982046576e-06, "epoch": 1.7806158376544396, "percentage": 35.61, "elapsed_time": "1:13:38", "remaining_time": "2:13:08", "throughput": 19810.08, "total_tokens": 87527488} +{"current_steps": 27820, "total_steps": 78105, "loss": 0.4085, "lr": 4.065354434742276e-06, "epoch": 1.7809359195954164, "percentage": 35.62, "elapsed_time": "1:13:39", "remaining_time": "2:13:07", "throughput": 19810.59, "total_tokens": 87543168} +{"current_steps": 27825, "total_steps": 78105, "loss": 0.2856, "lr": 4.064918809272328e-06, "epoch": 1.7812560015363932, "percentage": 35.63, "elapsed_time": "1:13:39", "remaining_time": "2:13:06", "throughput": 19811.16, "total_tokens": 87559424} +{"current_steps": 27830, "total_steps": 78105, "loss": 0.4188, "lr": 4.064483105658486e-06, "epoch": 1.7815760834773702, "percentage": 35.63, "elapsed_time": "1:13:40", "remaining_time": "2:13:05", "throughput": 19811.72, "total_tokens": 87575360} +{"current_steps": 27835, "total_steps": 78105, "loss": 0.3875, "lr": 4.064047323922506e-06, "epoch": 1.7818961654183472, "percentage": 35.64, "elapsed_time": "1:13:41", "remaining_time": "2:13:04", "throughput": 19812.3, "total_tokens": 87591680} +{"current_steps": 27840, "total_steps": 78105, "loss": 0.4363, "lr": 4.063611464086151e-06, "epoch": 1.782216247359324, "percentage": 35.64, "elapsed_time": "1:13:41", "remaining_time": "2:13:03", "throughput": 19812.78, "total_tokens": 87607168} +{"current_steps": 27845, "total_steps": 78105, "loss": 0.3222, "lr": 4.0631755261711835e-06, "epoch": 1.782536329300301, "percentage": 35.65, "elapsed_time": "1:13:42", "remaining_time": "2:13:02", "throughput": 19813.24, "total_tokens": 87622080} +{"current_steps": 27850, "total_steps": 78105, "loss": 0.4569, "lr": 4.0627395101993725e-06, "epoch": 1.7828564112412777, "percentage": 35.66, "elapsed_time": "1:13:43", "remaining_time": "2:13:01", "throughput": 19813.81, "total_tokens": 87638464} +{"current_steps": 27855, "total_steps": 78105, "loss": 0.2934, "lr": 4.0623034161924905e-06, "epoch": 1.7831764931822547, "percentage": 35.66, "elapsed_time": "1:13:43", "remaining_time": "2:13:00", "throughput": 19814.4, "total_tokens": 87654784} +{"current_steps": 27860, "total_steps": 78105, "loss": 0.4392, "lr": 4.061867244172313e-06, "epoch": 1.7834965751232317, "percentage": 35.67, "elapsed_time": "1:13:44", "remaining_time": "2:12:59", "throughput": 19814.83, "total_tokens": 87669248} +{"current_steps": 27865, "total_steps": 78105, "loss": 0.3091, "lr": 4.061430994160621e-06, "epoch": 1.7838166570642084, "percentage": 35.68, "elapsed_time": "1:13:45", "remaining_time": "2:12:58", "throughput": 19815.34, "total_tokens": 87684864} +{"current_steps": 27870, "total_steps": 78105, "loss": 0.3996, "lr": 4.060994666179199e-06, "epoch": 1.7841367390051852, "percentage": 35.68, "elapsed_time": "1:13:45", "remaining_time": "2:12:57", "throughput": 19815.83, "total_tokens": 87700480} +{"current_steps": 27875, "total_steps": 78105, "loss": 0.4457, "lr": 4.0605582602498336e-06, "epoch": 1.7844568209461622, "percentage": 35.69, "elapsed_time": "1:13:46", "remaining_time": "2:12:56", "throughput": 19816.34, "total_tokens": 87716160} +{"current_steps": 27880, "total_steps": 78105, "loss": 0.2791, "lr": 4.060121776394318e-06, "epoch": 1.7847769028871392, "percentage": 35.7, "elapsed_time": "1:13:47", "remaining_time": "2:12:55", "throughput": 19816.88, "total_tokens": 87732160} +{"current_steps": 27885, "total_steps": 78105, "loss": 0.4283, "lr": 4.0596852146344465e-06, "epoch": 1.785096984828116, "percentage": 35.7, "elapsed_time": "1:13:47", "remaining_time": "2:12:54", "throughput": 19817.38, "total_tokens": 87747968} +{"current_steps": 27890, "total_steps": 78105, "loss": 0.3028, "lr": 4.05924857499202e-06, "epoch": 1.7854170667690927, "percentage": 35.71, "elapsed_time": "1:13:48", "remaining_time": "2:12:53", "throughput": 19817.88, "total_tokens": 87763456} +{"current_steps": 27895, "total_steps": 78105, "loss": 0.226, "lr": 4.0588118574888415e-06, "epoch": 1.7857371487100697, "percentage": 35.71, "elapsed_time": "1:13:49", "remaining_time": "2:12:52", "throughput": 19818.39, "total_tokens": 87778752} +{"current_steps": 27900, "total_steps": 78105, "loss": 0.2787, "lr": 4.0583750621467175e-06, "epoch": 1.7860572306510467, "percentage": 35.72, "elapsed_time": "1:13:49", "remaining_time": "2:12:51", "throughput": 19818.9, "total_tokens": 87794496} +{"current_steps": 27905, "total_steps": 78105, "loss": 0.243, "lr": 4.0579381889874615e-06, "epoch": 1.7863773125920237, "percentage": 35.73, "elapsed_time": "1:13:50", "remaining_time": "2:12:50", "throughput": 19819.42, "total_tokens": 87810368} +{"current_steps": 27910, "total_steps": 78105, "loss": 0.3199, "lr": 4.057501238032886e-06, "epoch": 1.7866973945330005, "percentage": 35.73, "elapsed_time": "1:13:51", "remaining_time": "2:12:49", "throughput": 19820.03, "total_tokens": 87827136} +{"current_steps": 27915, "total_steps": 78105, "loss": 0.3267, "lr": 4.057064209304813e-06, "epoch": 1.7870174764739772, "percentage": 35.74, "elapsed_time": "1:13:51", "remaining_time": "2:12:48", "throughput": 19820.76, "total_tokens": 87844800} +{"current_steps": 27920, "total_steps": 78105, "loss": 0.2677, "lr": 4.056627102825062e-06, "epoch": 1.7873375584149542, "percentage": 35.75, "elapsed_time": "1:13:53", "remaining_time": "2:12:48", "throughput": 19822.02, "total_tokens": 87873472} +{"current_steps": 27925, "total_steps": 78105, "loss": 0.2775, "lr": 4.056189918615464e-06, "epoch": 1.7876576403559312, "percentage": 35.75, "elapsed_time": "1:13:53", "remaining_time": "2:12:47", "throughput": 19822.46, "total_tokens": 87888448} +{"current_steps": 27930, "total_steps": 78105, "loss": 0.3773, "lr": 4.0557526566978454e-06, "epoch": 1.787977722296908, "percentage": 35.76, "elapsed_time": "1:13:54", "remaining_time": "2:12:46", "throughput": 19822.99, "total_tokens": 87904128} +{"current_steps": 27935, "total_steps": 78105, "loss": 0.3589, "lr": 4.055315317094044e-06, "epoch": 1.7882978042378848, "percentage": 35.77, "elapsed_time": "1:13:55", "remaining_time": "2:12:45", "throughput": 19823.57, "total_tokens": 87920384} +{"current_steps": 27940, "total_steps": 78105, "loss": 0.2476, "lr": 4.054877899825896e-06, "epoch": 1.7886178861788617, "percentage": 35.77, "elapsed_time": "1:13:55", "remaining_time": "2:12:44", "throughput": 19824.04, "total_tokens": 87935616} +{"current_steps": 27945, "total_steps": 78105, "loss": 0.3289, "lr": 4.054440404915245e-06, "epoch": 1.7889379681198387, "percentage": 35.78, "elapsed_time": "1:13:56", "remaining_time": "2:12:43", "throughput": 19824.53, "total_tokens": 87951424} +{"current_steps": 27950, "total_steps": 78105, "loss": 0.2972, "lr": 4.054002832383936e-06, "epoch": 1.7892580500608157, "percentage": 35.79, "elapsed_time": "1:13:57", "remaining_time": "2:12:42", "throughput": 19825.06, "total_tokens": 87967296} +{"current_steps": 27955, "total_steps": 78105, "loss": 0.5008, "lr": 4.053565182253822e-06, "epoch": 1.7895781320017925, "percentage": 35.79, "elapsed_time": "1:13:57", "remaining_time": "2:12:41", "throughput": 19825.47, "total_tokens": 87982144} +{"current_steps": 27960, "total_steps": 78105, "loss": 0.4279, "lr": 4.053127454546755e-06, "epoch": 1.7898982139427693, "percentage": 35.8, "elapsed_time": "1:13:58", "remaining_time": "2:12:40", "throughput": 19825.91, "total_tokens": 87997120} +{"current_steps": 27965, "total_steps": 78105, "loss": 0.3353, "lr": 4.0526896492845914e-06, "epoch": 1.7902182958837463, "percentage": 35.8, "elapsed_time": "1:13:59", "remaining_time": "2:12:39", "throughput": 19826.37, "total_tokens": 88012288} +{"current_steps": 27970, "total_steps": 78105, "loss": 0.3642, "lr": 4.0522517664891944e-06, "epoch": 1.7905383778247232, "percentage": 35.81, "elapsed_time": "1:13:59", "remaining_time": "2:12:38", "throughput": 19826.85, "total_tokens": 88027776} +{"current_steps": 27975, "total_steps": 78105, "loss": 0.2299, "lr": 4.051813806182431e-06, "epoch": 1.7908584597657, "percentage": 35.82, "elapsed_time": "1:14:00", "remaining_time": "2:12:37", "throughput": 19827.35, "total_tokens": 88042816} +{"current_steps": 27980, "total_steps": 78105, "loss": 0.2514, "lr": 4.051375768386168e-06, "epoch": 1.7911785417066768, "percentage": 35.82, "elapsed_time": "1:14:01", "remaining_time": "2:12:36", "throughput": 19827.8, "total_tokens": 88058112} +{"current_steps": 27985, "total_steps": 78105, "loss": 0.323, "lr": 4.050937653122281e-06, "epoch": 1.7914986236476538, "percentage": 35.83, "elapsed_time": "1:14:01", "remaining_time": "2:12:35", "throughput": 19828.39, "total_tokens": 88074496} +{"current_steps": 27990, "total_steps": 78105, "loss": 0.3806, "lr": 4.0504994604126446e-06, "epoch": 1.7918187055886308, "percentage": 35.84, "elapsed_time": "1:14:02", "remaining_time": "2:12:34", "throughput": 19828.93, "total_tokens": 88090496} +{"current_steps": 27995, "total_steps": 78105, "loss": 0.4742, "lr": 4.0500611902791435e-06, "epoch": 1.7921387875296075, "percentage": 35.84, "elapsed_time": "1:14:03", "remaining_time": "2:12:33", "throughput": 19829.41, "total_tokens": 88105920} +{"current_steps": 28000, "total_steps": 78105, "loss": 0.5227, "lr": 4.049622842743659e-06, "epoch": 1.7924588694705843, "percentage": 35.85, "elapsed_time": "1:14:03", "remaining_time": "2:12:32", "throughput": 19829.92, "total_tokens": 88121600} +{"current_steps": 28005, "total_steps": 78105, "loss": 0.3019, "lr": 4.049184417828081e-06, "epoch": 1.7927789514115613, "percentage": 35.86, "elapsed_time": "1:14:04", "remaining_time": "2:12:31", "throughput": 19830.44, "total_tokens": 88137216} +{"current_steps": 28010, "total_steps": 78105, "loss": 0.346, "lr": 4.048745915554303e-06, "epoch": 1.7930990333525383, "percentage": 35.86, "elapsed_time": "1:14:05", "remaining_time": "2:12:30", "throughput": 19830.94, "total_tokens": 88152832} +{"current_steps": 28015, "total_steps": 78105, "loss": 0.4016, "lr": 4.048307335944221e-06, "epoch": 1.7934191152935153, "percentage": 35.87, "elapsed_time": "1:14:05", "remaining_time": "2:12:29", "throughput": 19831.5, "total_tokens": 88169024} +{"current_steps": 28020, "total_steps": 78105, "loss": 0.2495, "lr": 4.047868679019736e-06, "epoch": 1.793739197234492, "percentage": 35.87, "elapsed_time": "1:14:06", "remaining_time": "2:12:28", "throughput": 19831.97, "total_tokens": 88184064} +{"current_steps": 28025, "total_steps": 78105, "loss": 0.4761, "lr": 4.047429944802752e-06, "epoch": 1.7940592791754688, "percentage": 35.88, "elapsed_time": "1:14:07", "remaining_time": "2:12:27", "throughput": 19832.39, "total_tokens": 88198848} +{"current_steps": 28030, "total_steps": 78105, "loss": 0.4764, "lr": 4.046991133315177e-06, "epoch": 1.7943793611164458, "percentage": 35.89, "elapsed_time": "1:14:07", "remaining_time": "2:12:26", "throughput": 19832.88, "total_tokens": 88214144} +{"current_steps": 28035, "total_steps": 78105, "loss": 0.316, "lr": 4.046552244578922e-06, "epoch": 1.7946994430574228, "percentage": 35.89, "elapsed_time": "1:14:08", "remaining_time": "2:12:25", "throughput": 19833.46, "total_tokens": 88230208} +{"current_steps": 28040, "total_steps": 78105, "loss": 0.2648, "lr": 4.046113278615904e-06, "epoch": 1.7950195249983996, "percentage": 35.9, "elapsed_time": "1:14:09", "remaining_time": "2:12:24", "throughput": 19833.94, "total_tokens": 88245440} +{"current_steps": 28045, "total_steps": 78105, "loss": 0.298, "lr": 4.045674235448042e-06, "epoch": 1.7953396069393763, "percentage": 35.91, "elapsed_time": "1:14:09", "remaining_time": "2:12:22", "throughput": 19834.38, "total_tokens": 88260224} +{"current_steps": 28050, "total_steps": 78105, "loss": 0.3008, "lr": 4.04523511509726e-06, "epoch": 1.7956596888803533, "percentage": 35.91, "elapsed_time": "1:14:10", "remaining_time": "2:12:21", "throughput": 19834.93, "total_tokens": 88276032} +{"current_steps": 28055, "total_steps": 78105, "loss": 0.2457, "lr": 4.044795917585485e-06, "epoch": 1.7959797708213303, "percentage": 35.92, "elapsed_time": "1:14:11", "remaining_time": "2:12:20", "throughput": 19835.39, "total_tokens": 88290816} +{"current_steps": 28060, "total_steps": 78105, "loss": 0.3286, "lr": 4.044356642934649e-06, "epoch": 1.7962998527623073, "percentage": 35.93, "elapsed_time": "1:14:12", "remaining_time": "2:12:20", "throughput": 19833.56, "total_tokens": 88306112} +{"current_steps": 28065, "total_steps": 78105, "loss": 0.2843, "lr": 4.043917291166686e-06, "epoch": 1.796619934703284, "percentage": 35.93, "elapsed_time": "1:14:13", "remaining_time": "2:12:19", "throughput": 19833.61, "total_tokens": 88322112} +{"current_steps": 28070, "total_steps": 78105, "loss": 0.4775, "lr": 4.043477862303535e-06, "epoch": 1.7969400166442608, "percentage": 35.94, "elapsed_time": "1:14:13", "remaining_time": "2:12:19", "throughput": 19834.19, "total_tokens": 88338688} +{"current_steps": 28075, "total_steps": 78105, "loss": 0.3853, "lr": 4.0430383563671395e-06, "epoch": 1.7972600985852378, "percentage": 35.95, "elapsed_time": "1:14:14", "remaining_time": "2:12:18", "throughput": 19834.82, "total_tokens": 88355584} +{"current_steps": 28080, "total_steps": 78105, "loss": 0.4064, "lr": 4.042598773379447e-06, "epoch": 1.7975801805262148, "percentage": 35.95, "elapsed_time": "1:14:15", "remaining_time": "2:12:17", "throughput": 19835.32, "total_tokens": 88371328} +{"current_steps": 28085, "total_steps": 78105, "loss": 0.358, "lr": 4.042159113362406e-06, "epoch": 1.7979002624671916, "percentage": 35.96, "elapsed_time": "1:14:15", "remaining_time": "2:12:16", "throughput": 19835.96, "total_tokens": 88388608} +{"current_steps": 28090, "total_steps": 78105, "loss": 0.369, "lr": 4.041719376337971e-06, "epoch": 1.7982203444081684, "percentage": 35.96, "elapsed_time": "1:14:16", "remaining_time": "2:12:15", "throughput": 19836.43, "total_tokens": 88404096} +{"current_steps": 28095, "total_steps": 78105, "loss": 0.276, "lr": 4.041279562328102e-06, "epoch": 1.7985404263491453, "percentage": 35.97, "elapsed_time": "1:14:17", "remaining_time": "2:12:14", "throughput": 19836.87, "total_tokens": 88418752} +{"current_steps": 28100, "total_steps": 78105, "loss": 0.4765, "lr": 4.040839671354759e-06, "epoch": 1.7988605082901223, "percentage": 35.98, "elapsed_time": "1:14:17", "remaining_time": "2:12:13", "throughput": 19837.4, "total_tokens": 88434752} +{"current_steps": 28105, "total_steps": 78105, "loss": 0.3977, "lr": 4.04039970343991e-06, "epoch": 1.799180590231099, "percentage": 35.98, "elapsed_time": "1:14:18", "remaining_time": "2:12:12", "throughput": 19837.89, "total_tokens": 88450304} +{"current_steps": 28110, "total_steps": 78105, "loss": 0.3332, "lr": 4.039959658605522e-06, "epoch": 1.799500672172076, "percentage": 35.99, "elapsed_time": "1:14:19", "remaining_time": "2:12:11", "throughput": 19838.4, "total_tokens": 88465984} +{"current_steps": 28115, "total_steps": 78105, "loss": 0.264, "lr": 4.039519536873571e-06, "epoch": 1.7998207541130529, "percentage": 36.0, "elapsed_time": "1:14:19", "remaining_time": "2:12:10", "throughput": 19838.83, "total_tokens": 88480576} +{"current_steps": 28120, "total_steps": 78105, "loss": 0.2865, "lr": 4.039079338266033e-06, "epoch": 1.8001408360540299, "percentage": 36.0, "elapsed_time": "1:14:20", "remaining_time": "2:12:09", "throughput": 19839.38, "total_tokens": 88497024} +{"current_steps": 28125, "total_steps": 78105, "loss": 0.3221, "lr": 4.038639062804889e-06, "epoch": 1.8004609179950068, "percentage": 36.01, "elapsed_time": "1:14:21", "remaining_time": "2:12:08", "throughput": 19839.86, "total_tokens": 88512448} +{"current_steps": 28130, "total_steps": 78105, "loss": 0.2948, "lr": 4.038198710512126e-06, "epoch": 1.8007809999359836, "percentage": 36.02, "elapsed_time": "1:14:22", "remaining_time": "2:12:07", "throughput": 19840.37, "total_tokens": 88528192} +{"current_steps": 28135, "total_steps": 78105, "loss": 0.2623, "lr": 4.0377582814097305e-06, "epoch": 1.8011010818769604, "percentage": 36.02, "elapsed_time": "1:14:22", "remaining_time": "2:12:06", "throughput": 19840.83, "total_tokens": 88543424} +{"current_steps": 28140, "total_steps": 78105, "loss": 0.2644, "lr": 4.037317775519697e-06, "epoch": 1.8014211638179374, "percentage": 36.03, "elapsed_time": "1:14:23", "remaining_time": "2:12:05", "throughput": 19841.31, "total_tokens": 88558784} +{"current_steps": 28145, "total_steps": 78105, "loss": 0.2883, "lr": 4.036877192864021e-06, "epoch": 1.8017412457589144, "percentage": 36.03, "elapsed_time": "1:14:23", "remaining_time": "2:12:04", "throughput": 19841.71, "total_tokens": 88573376} +{"current_steps": 28150, "total_steps": 78105, "loss": 0.3588, "lr": 4.036436533464703e-06, "epoch": 1.8020613276998911, "percentage": 36.04, "elapsed_time": "1:14:24", "remaining_time": "2:12:03", "throughput": 19842.24, "total_tokens": 88589504} +{"current_steps": 28155, "total_steps": 78105, "loss": 0.4704, "lr": 4.035995797343748e-06, "epoch": 1.802381409640868, "percentage": 36.05, "elapsed_time": "1:14:25", "remaining_time": "2:12:02", "throughput": 19842.74, "total_tokens": 88605184} +{"current_steps": 28160, "total_steps": 78105, "loss": 0.3376, "lr": 4.035554984523163e-06, "epoch": 1.802701491581845, "percentage": 36.05, "elapsed_time": "1:14:26", "remaining_time": "2:12:01", "throughput": 19843.27, "total_tokens": 88620928} +{"current_steps": 28165, "total_steps": 78105, "loss": 0.4225, "lr": 4.035114095024963e-06, "epoch": 1.8030215735228219, "percentage": 36.06, "elapsed_time": "1:14:26", "remaining_time": "2:12:00", "throughput": 19843.91, "total_tokens": 88637568} +{"current_steps": 28170, "total_steps": 78105, "loss": 0.257, "lr": 4.034673128871159e-06, "epoch": 1.8033416554637989, "percentage": 36.07, "elapsed_time": "1:14:27", "remaining_time": "2:11:59", "throughput": 19844.7, "total_tokens": 88656128} +{"current_steps": 28175, "total_steps": 78105, "loss": 0.4035, "lr": 4.034232086083772e-06, "epoch": 1.8036617374047756, "percentage": 36.07, "elapsed_time": "1:14:28", "remaining_time": "2:11:58", "throughput": 19845.19, "total_tokens": 88671360} +{"current_steps": 28180, "total_steps": 78105, "loss": 0.3485, "lr": 4.033790966684828e-06, "epoch": 1.8039818193457524, "percentage": 36.08, "elapsed_time": "1:14:28", "remaining_time": "2:11:57", "throughput": 19845.67, "total_tokens": 88686784} +{"current_steps": 28185, "total_steps": 78105, "loss": 0.3891, "lr": 4.033349770696351e-06, "epoch": 1.8043019012867294, "percentage": 36.09, "elapsed_time": "1:14:29", "remaining_time": "2:11:56", "throughput": 19846.26, "total_tokens": 88703616} +{"current_steps": 28190, "total_steps": 78105, "loss": 0.451, "lr": 4.032908498140373e-06, "epoch": 1.8046219832277064, "percentage": 36.09, "elapsed_time": "1:14:30", "remaining_time": "2:11:55", "throughput": 19846.74, "total_tokens": 88719168} +{"current_steps": 28195, "total_steps": 78105, "loss": 0.3382, "lr": 4.0324671490389306e-06, "epoch": 1.8049420651686832, "percentage": 36.1, "elapsed_time": "1:14:30", "remaining_time": "2:11:54", "throughput": 19847.17, "total_tokens": 88734080} +{"current_steps": 28200, "total_steps": 78105, "loss": 0.2028, "lr": 4.03202572341406e-06, "epoch": 1.80526214710966, "percentage": 36.11, "elapsed_time": "1:14:31", "remaining_time": "2:11:53", "throughput": 19847.7, "total_tokens": 88750464} +{"current_steps": 28205, "total_steps": 78105, "loss": 0.4104, "lr": 4.031584221287806e-06, "epoch": 1.805582229050637, "percentage": 36.11, "elapsed_time": "1:14:32", "remaining_time": "2:11:52", "throughput": 19848.32, "total_tokens": 88767616} +{"current_steps": 28210, "total_steps": 78105, "loss": 0.2934, "lr": 4.031142642682213e-06, "epoch": 1.805902310991614, "percentage": 36.12, "elapsed_time": "1:14:32", "remaining_time": "2:11:51", "throughput": 19848.84, "total_tokens": 88783552} +{"current_steps": 28215, "total_steps": 78105, "loss": 0.4233, "lr": 4.030700987619332e-06, "epoch": 1.806222392932591, "percentage": 36.12, "elapsed_time": "1:14:33", "remaining_time": "2:11:50", "throughput": 19849.27, "total_tokens": 88798464} +{"current_steps": 28220, "total_steps": 78105, "loss": 0.2509, "lr": 4.030259256121217e-06, "epoch": 1.8065424748735677, "percentage": 36.13, "elapsed_time": "1:14:34", "remaining_time": "2:11:49", "throughput": 19849.68, "total_tokens": 88813376} +{"current_steps": 28225, "total_steps": 78105, "loss": 0.3985, "lr": 4.029817448209926e-06, "epoch": 1.8068625568145444, "percentage": 36.14, "elapsed_time": "1:14:34", "remaining_time": "2:11:48", "throughput": 19850.19, "total_tokens": 88829184} +{"current_steps": 28230, "total_steps": 78105, "loss": 0.2694, "lr": 4.029375563907519e-06, "epoch": 1.8071826387555214, "percentage": 36.14, "elapsed_time": "1:14:35", "remaining_time": "2:11:47", "throughput": 19850.64, "total_tokens": 88844160} +{"current_steps": 28235, "total_steps": 78105, "loss": 0.2542, "lr": 4.0289336032360635e-06, "epoch": 1.8075027206964984, "percentage": 36.15, "elapsed_time": "1:14:36", "remaining_time": "2:11:46", "throughput": 19851.21, "total_tokens": 88860288} +{"current_steps": 28240, "total_steps": 78105, "loss": 0.2856, "lr": 4.028491566217626e-06, "epoch": 1.8078228026374752, "percentage": 36.16, "elapsed_time": "1:14:36", "remaining_time": "2:11:45", "throughput": 19851.65, "total_tokens": 88875264} +{"current_steps": 28245, "total_steps": 78105, "loss": 0.4487, "lr": 4.028049452874283e-06, "epoch": 1.808142884578452, "percentage": 36.16, "elapsed_time": "1:14:37", "remaining_time": "2:11:44", "throughput": 19852.12, "total_tokens": 88890816} +{"current_steps": 28250, "total_steps": 78105, "loss": 0.2714, "lr": 4.027607263228109e-06, "epoch": 1.808462966519429, "percentage": 36.17, "elapsed_time": "1:14:38", "remaining_time": "2:11:43", "throughput": 19852.65, "total_tokens": 88906816} +{"current_steps": 28255, "total_steps": 78105, "loss": 0.2811, "lr": 4.0271649973011864e-06, "epoch": 1.808783048460406, "percentage": 36.18, "elapsed_time": "1:14:39", "remaining_time": "2:11:42", "throughput": 19852.96, "total_tokens": 88922496} +{"current_steps": 28260, "total_steps": 78105, "loss": 0.4157, "lr": 4.026722655115598e-06, "epoch": 1.8091031304013827, "percentage": 36.18, "elapsed_time": "1:14:39", "remaining_time": "2:11:41", "throughput": 19853.54, "total_tokens": 88939072} +{"current_steps": 28265, "total_steps": 78105, "loss": 0.2673, "lr": 4.026280236693433e-06, "epoch": 1.8094232123423597, "percentage": 36.19, "elapsed_time": "1:14:40", "remaining_time": "2:11:40", "throughput": 19854.02, "total_tokens": 88954432} +{"current_steps": 28270, "total_steps": 78105, "loss": 0.3804, "lr": 4.025837742056782e-06, "epoch": 1.8097432942833365, "percentage": 36.19, "elapsed_time": "1:14:41", "remaining_time": "2:11:39", "throughput": 19854.46, "total_tokens": 88969408} +{"current_steps": 28275, "total_steps": 78105, "loss": 0.3138, "lr": 4.025395171227742e-06, "epoch": 1.8100633762243135, "percentage": 36.2, "elapsed_time": "1:14:41", "remaining_time": "2:11:38", "throughput": 19854.98, "total_tokens": 88985280} +{"current_steps": 28280, "total_steps": 78105, "loss": 0.1757, "lr": 4.024952524228413e-06, "epoch": 1.8103834581652904, "percentage": 36.21, "elapsed_time": "1:14:42", "remaining_time": "2:11:37", "throughput": 19855.58, "total_tokens": 89001664} +{"current_steps": 28285, "total_steps": 78105, "loss": 0.2732, "lr": 4.024509801080899e-06, "epoch": 1.8107035401062672, "percentage": 36.21, "elapsed_time": "1:14:43", "remaining_time": "2:11:36", "throughput": 19855.98, "total_tokens": 89016320} +{"current_steps": 28290, "total_steps": 78105, "loss": 0.384, "lr": 4.024067001807305e-06, "epoch": 1.811023622047244, "percentage": 36.22, "elapsed_time": "1:14:43", "remaining_time": "2:11:35", "throughput": 19856.48, "total_tokens": 89032000} +{"current_steps": 28295, "total_steps": 78105, "loss": 0.2746, "lr": 4.023624126429743e-06, "epoch": 1.811343703988221, "percentage": 36.23, "elapsed_time": "1:14:44", "remaining_time": "2:11:34", "throughput": 19856.93, "total_tokens": 89047104} +{"current_steps": 28300, "total_steps": 78105, "loss": 0.3309, "lr": 4.02318117497033e-06, "epoch": 1.811663785929198, "percentage": 36.23, "elapsed_time": "1:14:45", "remaining_time": "2:11:33", "throughput": 19857.49, "total_tokens": 89063552} +{"current_steps": 28305, "total_steps": 78105, "loss": 0.2325, "lr": 4.022738147451183e-06, "epoch": 1.8119838678701747, "percentage": 36.24, "elapsed_time": "1:14:45", "remaining_time": "2:11:32", "throughput": 19858.08, "total_tokens": 89080320} +{"current_steps": 28310, "total_steps": 78105, "loss": 0.453, "lr": 4.022295043894424e-06, "epoch": 1.8123039498111515, "percentage": 36.25, "elapsed_time": "1:14:46", "remaining_time": "2:11:31", "throughput": 19858.77, "total_tokens": 89098112} +{"current_steps": 28315, "total_steps": 78105, "loss": 0.263, "lr": 4.02185186432218e-06, "epoch": 1.8126240317521285, "percentage": 36.25, "elapsed_time": "1:14:47", "remaining_time": "2:11:30", "throughput": 19859.2, "total_tokens": 89113088} +{"current_steps": 28320, "total_steps": 78105, "loss": 0.3333, "lr": 4.021408608756581e-06, "epoch": 1.8129441136931055, "percentage": 36.26, "elapsed_time": "1:14:47", "remaining_time": "2:11:29", "throughput": 19859.62, "total_tokens": 89128192} +{"current_steps": 28325, "total_steps": 78105, "loss": 0.3234, "lr": 4.020965277219761e-06, "epoch": 1.8132641956340825, "percentage": 36.27, "elapsed_time": "1:14:48", "remaining_time": "2:11:28", "throughput": 19860.15, "total_tokens": 89144256} +{"current_steps": 28330, "total_steps": 78105, "loss": 0.3372, "lr": 4.020521869733858e-06, "epoch": 1.8135842775750592, "percentage": 36.27, "elapsed_time": "1:14:49", "remaining_time": "2:11:27", "throughput": 19860.63, "total_tokens": 89159744} +{"current_steps": 28335, "total_steps": 78105, "loss": 0.3835, "lr": 4.020078386321011e-06, "epoch": 1.813904359516036, "percentage": 36.28, "elapsed_time": "1:14:49", "remaining_time": "2:11:26", "throughput": 19861.1, "total_tokens": 89175424} +{"current_steps": 28340, "total_steps": 78105, "loss": 0.4467, "lr": 4.019634827003369e-06, "epoch": 1.814224441457013, "percentage": 36.28, "elapsed_time": "1:14:50", "remaining_time": "2:11:25", "throughput": 19861.65, "total_tokens": 89191552} +{"current_steps": 28345, "total_steps": 78105, "loss": 0.3148, "lr": 4.019191191803078e-06, "epoch": 1.81454452339799, "percentage": 36.29, "elapsed_time": "1:14:51", "remaining_time": "2:11:24", "throughput": 19862.25, "total_tokens": 89208384} +{"current_steps": 28350, "total_steps": 78105, "loss": 0.3173, "lr": 4.0187474807422935e-06, "epoch": 1.8148646053389668, "percentage": 36.3, "elapsed_time": "1:14:52", "remaining_time": "2:11:23", "throughput": 19862.71, "total_tokens": 89223488} +{"current_steps": 28355, "total_steps": 78105, "loss": 0.2967, "lr": 4.0183036938431695e-06, "epoch": 1.8151846872799435, "percentage": 36.3, "elapsed_time": "1:14:52", "remaining_time": "2:11:22", "throughput": 19863.17, "total_tokens": 89238528} +{"current_steps": 28360, "total_steps": 78105, "loss": 0.3506, "lr": 4.017859831127868e-06, "epoch": 1.8155047692209205, "percentage": 36.31, "elapsed_time": "1:14:53", "remaining_time": "2:11:21", "throughput": 19863.7, "total_tokens": 89254592} +{"current_steps": 28365, "total_steps": 78105, "loss": 0.2924, "lr": 4.017415892618552e-06, "epoch": 1.8158248511618975, "percentage": 36.32, "elapsed_time": "1:14:54", "remaining_time": "2:11:20", "throughput": 19864.19, "total_tokens": 89269824} +{"current_steps": 28370, "total_steps": 78105, "loss": 0.3246, "lr": 4.0169718783373915e-06, "epoch": 1.8161449331028743, "percentage": 36.32, "elapsed_time": "1:14:54", "remaining_time": "2:11:19", "throughput": 19864.74, "total_tokens": 89285632} +{"current_steps": 28375, "total_steps": 78105, "loss": 0.3985, "lr": 4.0165277883065565e-06, "epoch": 1.8164650150438513, "percentage": 36.33, "elapsed_time": "1:14:55", "remaining_time": "2:11:18", "throughput": 19865.24, "total_tokens": 89301248} +{"current_steps": 28380, "total_steps": 78105, "loss": 0.4508, "lr": 4.016083622548222e-06, "epoch": 1.816785096984828, "percentage": 36.34, "elapsed_time": "1:14:56", "remaining_time": "2:11:17", "throughput": 19865.81, "total_tokens": 89317504} +{"current_steps": 28385, "total_steps": 78105, "loss": 0.457, "lr": 4.01563938108457e-06, "epoch": 1.817105178925805, "percentage": 36.34, "elapsed_time": "1:14:56", "remaining_time": "2:11:16", "throughput": 19866.34, "total_tokens": 89333632} +{"current_steps": 28390, "total_steps": 78105, "loss": 0.2177, "lr": 4.015195063937781e-06, "epoch": 1.817425260866782, "percentage": 36.35, "elapsed_time": "1:14:57", "remaining_time": "2:11:15", "throughput": 19866.8, "total_tokens": 89349184} +{"current_steps": 28395, "total_steps": 78105, "loss": 0.3487, "lr": 4.014750671130044e-06, "epoch": 1.8177453428077588, "percentage": 36.35, "elapsed_time": "1:14:58", "remaining_time": "2:11:14", "throughput": 19867.35, "total_tokens": 89365248} +{"current_steps": 28400, "total_steps": 78105, "loss": 0.2793, "lr": 4.014306202683548e-06, "epoch": 1.8180654247487356, "percentage": 36.36, "elapsed_time": "1:14:58", "remaining_time": "2:11:13", "throughput": 19867.87, "total_tokens": 89381056} +{"current_steps": 28405, "total_steps": 78105, "loss": 0.2725, "lr": 4.013861658620487e-06, "epoch": 1.8183855066897125, "percentage": 36.37, "elapsed_time": "1:14:59", "remaining_time": "2:11:12", "throughput": 19868.39, "total_tokens": 89396800} +{"current_steps": 28410, "total_steps": 78105, "loss": 0.403, "lr": 4.013417038963061e-06, "epoch": 1.8187055886306895, "percentage": 36.37, "elapsed_time": "1:15:00", "remaining_time": "2:11:11", "throughput": 19868.91, "total_tokens": 89412992} +{"current_steps": 28415, "total_steps": 78105, "loss": 0.2654, "lr": 4.012972343733471e-06, "epoch": 1.8190256705716663, "percentage": 36.38, "elapsed_time": "1:15:00", "remaining_time": "2:11:10", "throughput": 19869.45, "total_tokens": 89429184} +{"current_steps": 28420, "total_steps": 78105, "loss": 0.2509, "lr": 4.012527572953923e-06, "epoch": 1.819345752512643, "percentage": 36.39, "elapsed_time": "1:15:01", "remaining_time": "2:11:09", "throughput": 19870.0, "total_tokens": 89445312} +{"current_steps": 28425, "total_steps": 78105, "loss": 0.3218, "lr": 4.012082726646627e-06, "epoch": 1.81966583445362, "percentage": 36.39, "elapsed_time": "1:15:02", "remaining_time": "2:11:08", "throughput": 19870.46, "total_tokens": 89460160} +{"current_steps": 28430, "total_steps": 78105, "loss": 0.333, "lr": 4.011637804833795e-06, "epoch": 1.819985916394597, "percentage": 36.4, "elapsed_time": "1:15:02", "remaining_time": "2:11:07", "throughput": 19870.9, "total_tokens": 89475200} +{"current_steps": 28435, "total_steps": 78105, "loss": 0.4607, "lr": 4.011192807537645e-06, "epoch": 1.820305998335574, "percentage": 36.41, "elapsed_time": "1:15:03", "remaining_time": "2:11:06", "throughput": 19871.39, "total_tokens": 89490944} +{"current_steps": 28440, "total_steps": 78105, "loss": 0.3291, "lr": 4.010747734780398e-06, "epoch": 1.8206260802765508, "percentage": 36.41, "elapsed_time": "1:15:04", "remaining_time": "2:11:05", "throughput": 19871.8, "total_tokens": 89505600} +{"current_steps": 28445, "total_steps": 78105, "loss": 0.348, "lr": 4.0103025865842785e-06, "epoch": 1.8209461622175276, "percentage": 36.42, "elapsed_time": "1:15:04", "remaining_time": "2:11:04", "throughput": 19872.23, "total_tokens": 89520704} +{"current_steps": 28450, "total_steps": 78105, "loss": 0.3568, "lr": 4.009857362971514e-06, "epoch": 1.8212662441585046, "percentage": 36.43, "elapsed_time": "1:15:05", "remaining_time": "2:11:03", "throughput": 19872.78, "total_tokens": 89537152} +{"current_steps": 28455, "total_steps": 78105, "loss": 0.31, "lr": 4.009412063964338e-06, "epoch": 1.8215863260994816, "percentage": 36.43, "elapsed_time": "1:15:06", "remaining_time": "2:11:02", "throughput": 19873.55, "total_tokens": 89556032} +{"current_steps": 28460, "total_steps": 78105, "loss": 0.2255, "lr": 4.008966689584985e-06, "epoch": 1.8219064080404583, "percentage": 36.44, "elapsed_time": "1:15:06", "remaining_time": "2:11:01", "throughput": 19873.96, "total_tokens": 89570880} +{"current_steps": 28465, "total_steps": 78105, "loss": 0.3559, "lr": 4.008521239855697e-06, "epoch": 1.822226489981435, "percentage": 36.44, "elapsed_time": "1:15:07", "remaining_time": "2:11:00", "throughput": 19874.43, "total_tokens": 89586048} +{"current_steps": 28470, "total_steps": 78105, "loss": 0.277, "lr": 4.0080757147987135e-06, "epoch": 1.822546571922412, "percentage": 36.45, "elapsed_time": "1:15:08", "remaining_time": "2:10:59", "throughput": 19874.98, "total_tokens": 89602240} +{"current_steps": 28475, "total_steps": 78105, "loss": 0.3515, "lr": 4.0076301144362865e-06, "epoch": 1.822866653863389, "percentage": 36.46, "elapsed_time": "1:15:08", "remaining_time": "2:10:58", "throughput": 19875.45, "total_tokens": 89617600} +{"current_steps": 28480, "total_steps": 78105, "loss": 0.371, "lr": 4.007184438790663e-06, "epoch": 1.823186735804366, "percentage": 36.46, "elapsed_time": "1:15:09", "remaining_time": "2:10:57", "throughput": 19875.95, "total_tokens": 89633344} +{"current_steps": 28485, "total_steps": 78105, "loss": 0.2706, "lr": 4.0067386878840995e-06, "epoch": 1.8235068177453428, "percentage": 36.47, "elapsed_time": "1:15:10", "remaining_time": "2:10:56", "throughput": 19876.43, "total_tokens": 89648832} +{"current_steps": 28490, "total_steps": 78105, "loss": 0.3214, "lr": 4.006292861738855e-06, "epoch": 1.8238268996863196, "percentage": 36.48, "elapsed_time": "1:15:10", "remaining_time": "2:10:55", "throughput": 19876.92, "total_tokens": 89664256} +{"current_steps": 28495, "total_steps": 78105, "loss": 0.3532, "lr": 4.00584696037719e-06, "epoch": 1.8241469816272966, "percentage": 36.48, "elapsed_time": "1:15:11", "remaining_time": "2:10:54", "throughput": 19877.38, "total_tokens": 89679488} +{"current_steps": 28500, "total_steps": 78105, "loss": 0.4889, "lr": 4.005400983821373e-06, "epoch": 1.8244670635682736, "percentage": 36.49, "elapsed_time": "1:15:12", "remaining_time": "2:10:53", "throughput": 19877.87, "total_tokens": 89695488} +{"current_steps": 28505, "total_steps": 78105, "loss": 0.3353, "lr": 4.00495493209367e-06, "epoch": 1.8247871455092504, "percentage": 36.5, "elapsed_time": "1:15:12", "remaining_time": "2:10:52", "throughput": 19878.29, "total_tokens": 89710592} +{"current_steps": 28510, "total_steps": 78105, "loss": 0.2648, "lr": 4.0045088052163585e-06, "epoch": 1.8251072274502271, "percentage": 36.5, "elapsed_time": "1:15:13", "remaining_time": "2:10:51", "throughput": 19878.74, "total_tokens": 89725760} +{"current_steps": 28515, "total_steps": 78105, "loss": 0.3814, "lr": 4.004062603211714e-06, "epoch": 1.8254273093912041, "percentage": 36.51, "elapsed_time": "1:15:14", "remaining_time": "2:10:50", "throughput": 19879.29, "total_tokens": 89741696} +{"current_steps": 28520, "total_steps": 78105, "loss": 0.2767, "lr": 4.003616326102015e-06, "epoch": 1.8257473913321811, "percentage": 36.51, "elapsed_time": "1:15:15", "remaining_time": "2:10:49", "throughput": 19879.79, "total_tokens": 89757376} +{"current_steps": 28525, "total_steps": 78105, "loss": 0.4397, "lr": 4.003169973909551e-06, "epoch": 1.8260674732731579, "percentage": 36.52, "elapsed_time": "1:15:15", "remaining_time": "2:10:48", "throughput": 19880.44, "total_tokens": 89774592} +{"current_steps": 28530, "total_steps": 78105, "loss": 0.2701, "lr": 4.002723546656608e-06, "epoch": 1.8263875552141349, "percentage": 36.53, "elapsed_time": "1:15:16", "remaining_time": "2:10:47", "throughput": 19880.97, "total_tokens": 89790784} +{"current_steps": 28535, "total_steps": 78105, "loss": 0.2188, "lr": 4.002277044365478e-06, "epoch": 1.8267076371551116, "percentage": 36.53, "elapsed_time": "1:15:17", "remaining_time": "2:10:46", "throughput": 19881.45, "total_tokens": 89806208} +{"current_steps": 28540, "total_steps": 78105, "loss": 0.2906, "lr": 4.001830467058458e-06, "epoch": 1.8270277190960886, "percentage": 36.54, "elapsed_time": "1:15:17", "remaining_time": "2:10:45", "throughput": 19882.03, "total_tokens": 89822592} +{"current_steps": 28545, "total_steps": 78105, "loss": 0.494, "lr": 4.001383814757847e-06, "epoch": 1.8273478010370656, "percentage": 36.55, "elapsed_time": "1:15:18", "remaining_time": "2:10:44", "throughput": 19882.58, "total_tokens": 89838656} +{"current_steps": 28550, "total_steps": 78105, "loss": 0.2168, "lr": 4.000937087485948e-06, "epoch": 1.8276678829780424, "percentage": 36.55, "elapsed_time": "1:15:19", "remaining_time": "2:10:43", "throughput": 19883.08, "total_tokens": 89854272} +{"current_steps": 28555, "total_steps": 78105, "loss": 0.3996, "lr": 4.00049028526507e-06, "epoch": 1.8279879649190192, "percentage": 36.56, "elapsed_time": "1:15:19", "remaining_time": "2:10:42", "throughput": 19883.53, "total_tokens": 89869504} +{"current_steps": 28560, "total_steps": 78105, "loss": 0.3058, "lr": 4.000043408117523e-06, "epoch": 1.8283080468599961, "percentage": 36.57, "elapsed_time": "1:15:20", "remaining_time": "2:10:41", "throughput": 19884.03, "total_tokens": 89884928} +{"current_steps": 28565, "total_steps": 78105, "loss": 0.3295, "lr": 3.999596456065621e-06, "epoch": 1.8286281288009731, "percentage": 36.57, "elapsed_time": "1:15:21", "remaining_time": "2:10:40", "throughput": 19884.54, "total_tokens": 89901056} +{"current_steps": 28570, "total_steps": 78105, "loss": 0.371, "lr": 3.999149429131683e-06, "epoch": 1.82894821074195, "percentage": 36.58, "elapsed_time": "1:15:21", "remaining_time": "2:10:40", "throughput": 19885.16, "total_tokens": 89917952} +{"current_steps": 28575, "total_steps": 78105, "loss": 0.3834, "lr": 3.998702327338031e-06, "epoch": 1.8292682926829267, "percentage": 36.59, "elapsed_time": "1:15:22", "remaining_time": "2:10:39", "throughput": 19885.64, "total_tokens": 89932992} +{"current_steps": 28580, "total_steps": 78105, "loss": 0.31, "lr": 3.998255150706993e-06, "epoch": 1.8295883746239037, "percentage": 36.59, "elapsed_time": "1:15:23", "remaining_time": "2:10:37", "throughput": 19886.09, "total_tokens": 89948096} +{"current_steps": 28585, "total_steps": 78105, "loss": 0.2606, "lr": 3.9978078992608955e-06, "epoch": 1.8299084565648807, "percentage": 36.6, "elapsed_time": "1:15:23", "remaining_time": "2:10:36", "throughput": 19886.62, "total_tokens": 89963776} +{"current_steps": 28590, "total_steps": 78105, "loss": 0.3278, "lr": 3.997360573022073e-06, "epoch": 1.8302285385058576, "percentage": 36.6, "elapsed_time": "1:15:24", "remaining_time": "2:10:35", "throughput": 19887.03, "total_tokens": 89978368} +{"current_steps": 28595, "total_steps": 78105, "loss": 0.2976, "lr": 3.996913172012864e-06, "epoch": 1.8305486204468344, "percentage": 36.61, "elapsed_time": "1:15:25", "remaining_time": "2:10:34", "throughput": 19887.48, "total_tokens": 89993792} +{"current_steps": 28600, "total_steps": 78105, "loss": 0.3669, "lr": 3.996465696255607e-06, "epoch": 1.8308687023878112, "percentage": 36.62, "elapsed_time": "1:15:25", "remaining_time": "2:10:33", "throughput": 19887.96, "total_tokens": 90009088} +{"current_steps": 28605, "total_steps": 78105, "loss": 0.3186, "lr": 3.996018145772649e-06, "epoch": 1.8311887843287882, "percentage": 36.62, "elapsed_time": "1:15:26", "remaining_time": "2:10:32", "throughput": 19888.44, "total_tokens": 90024576} +{"current_steps": 28610, "total_steps": 78105, "loss": 0.385, "lr": 3.9955705205863375e-06, "epoch": 1.8315088662697652, "percentage": 36.63, "elapsed_time": "1:15:27", "remaining_time": "2:10:31", "throughput": 19888.96, "total_tokens": 90040064} +{"current_steps": 28615, "total_steps": 78105, "loss": 0.3319, "lr": 3.995122820719023e-06, "epoch": 1.831828948210742, "percentage": 36.64, "elapsed_time": "1:15:27", "remaining_time": "2:10:30", "throughput": 19889.5, "total_tokens": 90056384} +{"current_steps": 28620, "total_steps": 78105, "loss": 0.334, "lr": 3.994675046193064e-06, "epoch": 1.8321490301517187, "percentage": 36.64, "elapsed_time": "1:15:28", "remaining_time": "2:10:29", "throughput": 19890.02, "total_tokens": 90072192} +{"current_steps": 28625, "total_steps": 78105, "loss": 0.3245, "lr": 3.994227197030818e-06, "epoch": 1.8324691120926957, "percentage": 36.65, "elapsed_time": "1:15:29", "remaining_time": "2:10:28", "throughput": 19890.53, "total_tokens": 90087936} +{"current_steps": 28630, "total_steps": 78105, "loss": 0.3607, "lr": 3.993779273254649e-06, "epoch": 1.8327891940336727, "percentage": 36.66, "elapsed_time": "1:15:29", "remaining_time": "2:10:27", "throughput": 19891.04, "total_tokens": 90103616} +{"current_steps": 28635, "total_steps": 78105, "loss": 0.2726, "lr": 3.993331274886923e-06, "epoch": 1.8331092759746497, "percentage": 36.66, "elapsed_time": "1:15:30", "remaining_time": "2:10:26", "throughput": 19891.5, "total_tokens": 90118656} +{"current_steps": 28640, "total_steps": 78105, "loss": 0.2995, "lr": 3.992883201950013e-06, "epoch": 1.8334293579156264, "percentage": 36.67, "elapsed_time": "1:15:31", "remaining_time": "2:10:26", "throughput": 19892.2, "total_tokens": 90136384} +{"current_steps": 28645, "total_steps": 78105, "loss": 0.3335, "lr": 3.992435054466291e-06, "epoch": 1.8337494398566032, "percentage": 36.67, "elapsed_time": "1:15:31", "remaining_time": "2:10:25", "throughput": 19892.62, "total_tokens": 90151424} +{"current_steps": 28650, "total_steps": 78105, "loss": 0.4017, "lr": 3.991986832458138e-06, "epoch": 1.8340695217975802, "percentage": 36.68, "elapsed_time": "1:15:32", "remaining_time": "2:10:24", "throughput": 19893.14, "total_tokens": 90167360} +{"current_steps": 28655, "total_steps": 78105, "loss": 0.4695, "lr": 3.991538535947933e-06, "epoch": 1.8343896037385572, "percentage": 36.69, "elapsed_time": "1:15:33", "remaining_time": "2:10:23", "throughput": 19893.67, "total_tokens": 90183296} +{"current_steps": 28660, "total_steps": 78105, "loss": 0.2858, "lr": 3.991090164958062e-06, "epoch": 1.834709685679534, "percentage": 36.69, "elapsed_time": "1:15:33", "remaining_time": "2:10:22", "throughput": 19894.16, "total_tokens": 90199040} +{"current_steps": 28665, "total_steps": 78105, "loss": 0.4025, "lr": 3.990641719510916e-06, "epoch": 1.8350297676205107, "percentage": 36.7, "elapsed_time": "1:15:34", "remaining_time": "2:10:21", "throughput": 19894.64, "total_tokens": 90214656} +{"current_steps": 28670, "total_steps": 78105, "loss": 0.3675, "lr": 3.990193199628886e-06, "epoch": 1.8353498495614877, "percentage": 36.71, "elapsed_time": "1:15:35", "remaining_time": "2:10:20", "throughput": 19895.13, "total_tokens": 90230336} +{"current_steps": 28675, "total_steps": 78105, "loss": 0.2516, "lr": 3.9897446053343705e-06, "epoch": 1.8356699315024647, "percentage": 36.71, "elapsed_time": "1:15:35", "remaining_time": "2:10:19", "throughput": 19895.58, "total_tokens": 90245248} +{"current_steps": 28680, "total_steps": 78105, "loss": 0.3023, "lr": 3.989295936649769e-06, "epoch": 1.8359900134434415, "percentage": 36.72, "elapsed_time": "1:15:36", "remaining_time": "2:10:18", "throughput": 19896.06, "total_tokens": 90260608} +{"current_steps": 28685, "total_steps": 78105, "loss": 0.3535, "lr": 3.988847193597486e-06, "epoch": 1.8363100953844183, "percentage": 36.73, "elapsed_time": "1:15:37", "remaining_time": "2:10:17", "throughput": 19896.59, "total_tokens": 90276672} +{"current_steps": 28690, "total_steps": 78105, "loss": 0.5439, "lr": 3.988398376199929e-06, "epoch": 1.8366301773253952, "percentage": 36.73, "elapsed_time": "1:15:37", "remaining_time": "2:10:16", "throughput": 19897.03, "total_tokens": 90291840} +{"current_steps": 28695, "total_steps": 78105, "loss": 0.2631, "lr": 3.9879494844795095e-06, "epoch": 1.8369502592663722, "percentage": 36.74, "elapsed_time": "1:15:38", "remaining_time": "2:10:15", "throughput": 19897.59, "total_tokens": 90308352} +{"current_steps": 28700, "total_steps": 78105, "loss": 0.2444, "lr": 3.987500518458644e-06, "epoch": 1.8372703412073492, "percentage": 36.75, "elapsed_time": "1:15:39", "remaining_time": "2:10:14", "throughput": 19898.07, "total_tokens": 90323840} +{"current_steps": 28705, "total_steps": 78105, "loss": 0.3744, "lr": 3.98705147815975e-06, "epoch": 1.837590423148326, "percentage": 36.75, "elapsed_time": "1:15:40", "remaining_time": "2:10:13", "throughput": 19898.62, "total_tokens": 90339968} +{"current_steps": 28710, "total_steps": 78105, "loss": 0.359, "lr": 3.986602363605251e-06, "epoch": 1.8379105050893028, "percentage": 36.76, "elapsed_time": "1:15:40", "remaining_time": "2:10:12", "throughput": 19899.12, "total_tokens": 90355328} +{"current_steps": 28715, "total_steps": 78105, "loss": 0.2048, "lr": 3.986153174817574e-06, "epoch": 1.8382305870302798, "percentage": 36.76, "elapsed_time": "1:15:41", "remaining_time": "2:10:11", "throughput": 19899.56, "total_tokens": 90370240} +{"current_steps": 28720, "total_steps": 78105, "loss": 0.3128, "lr": 3.985703911819149e-06, "epoch": 1.8385506689712567, "percentage": 36.77, "elapsed_time": "1:15:42", "remaining_time": "2:10:10", "throughput": 19900.37, "total_tokens": 90389696} +{"current_steps": 28725, "total_steps": 78105, "loss": 0.3714, "lr": 3.985254574632408e-06, "epoch": 1.8388707509122335, "percentage": 36.78, "elapsed_time": "1:15:42", "remaining_time": "2:10:09", "throughput": 19900.82, "total_tokens": 90405184} +{"current_steps": 28730, "total_steps": 78105, "loss": 0.3571, "lr": 3.984805163279791e-06, "epoch": 1.8391908328532103, "percentage": 36.78, "elapsed_time": "1:15:43", "remaining_time": "2:10:08", "throughput": 19901.48, "total_tokens": 90422528} +{"current_steps": 28735, "total_steps": 78105, "loss": 0.2416, "lr": 3.984355677783738e-06, "epoch": 1.8395109147941873, "percentage": 36.79, "elapsed_time": "1:15:44", "remaining_time": "2:10:07", "throughput": 19902.06, "total_tokens": 90439552} +{"current_steps": 28740, "total_steps": 78105, "loss": 0.4798, "lr": 3.983906118166694e-06, "epoch": 1.8398309967351643, "percentage": 36.8, "elapsed_time": "1:15:44", "remaining_time": "2:10:06", "throughput": 19902.47, "total_tokens": 90454336} +{"current_steps": 28745, "total_steps": 78105, "loss": 0.2643, "lr": 3.983456484451108e-06, "epoch": 1.8401510786761412, "percentage": 36.8, "elapsed_time": "1:15:45", "remaining_time": "2:10:05", "throughput": 19902.99, "total_tokens": 90470336} +{"current_steps": 28750, "total_steps": 78105, "loss": 0.3677, "lr": 3.983006776659432e-06, "epoch": 1.840471160617118, "percentage": 36.81, "elapsed_time": "1:15:46", "remaining_time": "2:10:04", "throughput": 19903.49, "total_tokens": 90486144} +{"current_steps": 28755, "total_steps": 78105, "loss": 0.3843, "lr": 3.982556994814122e-06, "epoch": 1.8407912425580948, "percentage": 36.82, "elapsed_time": "1:15:46", "remaining_time": "2:10:03", "throughput": 19903.96, "total_tokens": 90501504} +{"current_steps": 28760, "total_steps": 78105, "loss": 0.366, "lr": 3.982107138937638e-06, "epoch": 1.8411113244990718, "percentage": 36.82, "elapsed_time": "1:15:47", "remaining_time": "2:10:02", "throughput": 19904.45, "total_tokens": 90517632} +{"current_steps": 28765, "total_steps": 78105, "loss": 0.4233, "lr": 3.9816572090524445e-06, "epoch": 1.8414314064400488, "percentage": 36.83, "elapsed_time": "1:15:48", "remaining_time": "2:10:01", "throughput": 19904.97, "total_tokens": 90533440} +{"current_steps": 28770, "total_steps": 78105, "loss": 0.4106, "lr": 3.981207205181006e-06, "epoch": 1.8417514883810255, "percentage": 36.84, "elapsed_time": "1:15:48", "remaining_time": "2:10:00", "throughput": 19905.36, "total_tokens": 90547840} +{"current_steps": 28775, "total_steps": 78105, "loss": 0.3703, "lr": 3.980757127345796e-06, "epoch": 1.8420715703220023, "percentage": 36.84, "elapsed_time": "1:15:49", "remaining_time": "2:09:59", "throughput": 19905.77, "total_tokens": 90562560} +{"current_steps": 28780, "total_steps": 78105, "loss": 0.2585, "lr": 3.980306975569288e-06, "epoch": 1.8423916522629793, "percentage": 36.85, "elapsed_time": "1:15:50", "remaining_time": "2:09:58", "throughput": 19906.22, "total_tokens": 90577664} +{"current_steps": 28785, "total_steps": 78105, "loss": 0.3285, "lr": 3.9798567498739605e-06, "epoch": 1.8427117342039563, "percentage": 36.85, "elapsed_time": "1:15:50", "remaining_time": "2:09:57", "throughput": 19906.68, "total_tokens": 90593024} +{"current_steps": 28790, "total_steps": 78105, "loss": 0.3128, "lr": 3.979406450282295e-06, "epoch": 1.843031816144933, "percentage": 36.86, "elapsed_time": "1:15:51", "remaining_time": "2:09:56", "throughput": 19907.1, "total_tokens": 90607616} +{"current_steps": 28795, "total_steps": 78105, "loss": 0.2937, "lr": 3.978956076816778e-06, "epoch": 1.84335189808591, "percentage": 36.87, "elapsed_time": "1:15:52", "remaining_time": "2:09:55", "throughput": 19907.5, "total_tokens": 90622528} +{"current_steps": 28800, "total_steps": 78105, "loss": 0.5434, "lr": 3.978505629499897e-06, "epoch": 1.8436719800268868, "percentage": 36.87, "elapsed_time": "1:15:52", "remaining_time": "2:09:54", "throughput": 19907.98, "total_tokens": 90638272} +{"current_steps": 28805, "total_steps": 78105, "loss": 0.2492, "lr": 3.9780551083541465e-06, "epoch": 1.8439920619678638, "percentage": 36.88, "elapsed_time": "1:15:53", "remaining_time": "2:09:53", "throughput": 19908.49, "total_tokens": 90654080} +{"current_steps": 28810, "total_steps": 78105, "loss": 0.3155, "lr": 3.9776045134020234e-06, "epoch": 1.8443121439088408, "percentage": 36.89, "elapsed_time": "1:15:54", "remaining_time": "2:09:52", "throughput": 19908.93, "total_tokens": 90668992} +{"current_steps": 28815, "total_steps": 78105, "loss": 0.3145, "lr": 3.977153844666026e-06, "epoch": 1.8446322258498176, "percentage": 36.89, "elapsed_time": "1:15:54", "remaining_time": "2:09:51", "throughput": 19909.42, "total_tokens": 90684736} +{"current_steps": 28820, "total_steps": 78105, "loss": 0.3716, "lr": 3.976703102168661e-06, "epoch": 1.8449523077907943, "percentage": 36.9, "elapsed_time": "1:15:55", "remaining_time": "2:09:50", "throughput": 19909.91, "total_tokens": 90700416} +{"current_steps": 28825, "total_steps": 78105, "loss": 0.3012, "lr": 3.976252285932435e-06, "epoch": 1.8452723897317713, "percentage": 36.91, "elapsed_time": "1:15:56", "remaining_time": "2:09:49", "throughput": 19910.38, "total_tokens": 90715648} +{"current_steps": 28830, "total_steps": 78105, "loss": 0.3135, "lr": 3.975801395979859e-06, "epoch": 1.8455924716727483, "percentage": 36.91, "elapsed_time": "1:15:56", "remaining_time": "2:09:48", "throughput": 19910.81, "total_tokens": 90730432} +{"current_steps": 28835, "total_steps": 78105, "loss": 0.4524, "lr": 3.975350432333449e-06, "epoch": 1.845912553613725, "percentage": 36.92, "elapsed_time": "1:15:57", "remaining_time": "2:09:47", "throughput": 19911.3, "total_tokens": 90746240} +{"current_steps": 28840, "total_steps": 78105, "loss": 0.301, "lr": 3.974899395015722e-06, "epoch": 1.8462326355547019, "percentage": 36.92, "elapsed_time": "1:15:58", "remaining_time": "2:09:46", "throughput": 19911.71, "total_tokens": 90760768} +{"current_steps": 28845, "total_steps": 78105, "loss": 0.3463, "lr": 3.974448284049202e-06, "epoch": 1.8465527174956788, "percentage": 36.93, "elapsed_time": "1:15:58", "remaining_time": "2:09:45", "throughput": 19912.23, "total_tokens": 90776896} +{"current_steps": 28850, "total_steps": 78105, "loss": 0.2872, "lr": 3.973997099456416e-06, "epoch": 1.8468727994366558, "percentage": 36.94, "elapsed_time": "1:15:59", "remaining_time": "2:09:44", "throughput": 19912.71, "total_tokens": 90792512} +{"current_steps": 28855, "total_steps": 78105, "loss": 0.272, "lr": 3.973545841259892e-06, "epoch": 1.8471928813776328, "percentage": 36.94, "elapsed_time": "1:16:00", "remaining_time": "2:09:43", "throughput": 19913.29, "total_tokens": 90808960} +{"current_steps": 28860, "total_steps": 78105, "loss": 0.2062, "lr": 3.973094509482164e-06, "epoch": 1.8475129633186096, "percentage": 36.95, "elapsed_time": "1:16:00", "remaining_time": "2:09:42", "throughput": 19913.7, "total_tokens": 90823744} +{"current_steps": 28865, "total_steps": 78105, "loss": 0.3953, "lr": 3.972643104145769e-06, "epoch": 1.8478330452595864, "percentage": 36.96, "elapsed_time": "1:16:01", "remaining_time": "2:09:41", "throughput": 19914.3, "total_tokens": 90840576} +{"current_steps": 28870, "total_steps": 78105, "loss": 0.2605, "lr": 3.972191625273248e-06, "epoch": 1.8481531272005634, "percentage": 36.96, "elapsed_time": "1:16:02", "remaining_time": "2:09:40", "throughput": 19914.75, "total_tokens": 90855872} +{"current_steps": 28875, "total_steps": 78105, "loss": 0.3846, "lr": 3.971740072887146e-06, "epoch": 1.8484732091415403, "percentage": 36.97, "elapsed_time": "1:16:02", "remaining_time": "2:09:39", "throughput": 19915.18, "total_tokens": 90871040} +{"current_steps": 28880, "total_steps": 78105, "loss": 0.239, "lr": 3.971288447010011e-06, "epoch": 1.848793291082517, "percentage": 36.98, "elapsed_time": "1:16:03", "remaining_time": "2:09:38", "throughput": 19915.71, "total_tokens": 90887360} +{"current_steps": 28885, "total_steps": 78105, "loss": 0.3381, "lr": 3.970836747664394e-06, "epoch": 1.8491133730234939, "percentage": 36.98, "elapsed_time": "1:16:04", "remaining_time": "2:09:37", "throughput": 19916.14, "total_tokens": 90902464} +{"current_steps": 28890, "total_steps": 78105, "loss": 0.33, "lr": 3.970384974872851e-06, "epoch": 1.8494334549644709, "percentage": 36.99, "elapsed_time": "1:16:04", "remaining_time": "2:09:36", "throughput": 19916.52, "total_tokens": 90917056} +{"current_steps": 28895, "total_steps": 78105, "loss": 0.4705, "lr": 3.969933128657942e-06, "epoch": 1.8497535369054479, "percentage": 37.0, "elapsed_time": "1:16:05", "remaining_time": "2:09:35", "throughput": 19916.97, "total_tokens": 90932160} +{"current_steps": 28900, "total_steps": 78105, "loss": 0.3002, "lr": 3.9694812090422294e-06, "epoch": 1.8500736188464248, "percentage": 37.0, "elapsed_time": "1:16:06", "remaining_time": "2:09:34", "throughput": 19917.43, "total_tokens": 90947328} +{"current_steps": 28905, "total_steps": 78105, "loss": 0.3778, "lr": 3.969029216048279e-06, "epoch": 1.8503937007874016, "percentage": 37.01, "elapsed_time": "1:16:06", "remaining_time": "2:09:33", "throughput": 19917.93, "total_tokens": 90962880} +{"current_steps": 28910, "total_steps": 78105, "loss": 0.313, "lr": 3.968577149698661e-06, "epoch": 1.8507137827283784, "percentage": 37.01, "elapsed_time": "1:16:07", "remaining_time": "2:09:32", "throughput": 19918.44, "total_tokens": 90978944} +{"current_steps": 28915, "total_steps": 78105, "loss": 0.3712, "lr": 3.9681250100159505e-06, "epoch": 1.8510338646693554, "percentage": 37.02, "elapsed_time": "1:16:08", "remaining_time": "2:09:31", "throughput": 19918.89, "total_tokens": 90994368} +{"current_steps": 28920, "total_steps": 78105, "loss": 0.4533, "lr": 3.967672797022724e-06, "epoch": 1.8513539466103324, "percentage": 37.03, "elapsed_time": "1:16:08", "remaining_time": "2:09:30", "throughput": 19919.48, "total_tokens": 91011072} +{"current_steps": 28925, "total_steps": 78105, "loss": 0.2506, "lr": 3.967220510741562e-06, "epoch": 1.8516740285513091, "percentage": 37.03, "elapsed_time": "1:16:09", "remaining_time": "2:09:29", "throughput": 19919.96, "total_tokens": 91026688} +{"current_steps": 28930, "total_steps": 78105, "loss": 0.3414, "lr": 3.966768151195051e-06, "epoch": 1.851994110492286, "percentage": 37.04, "elapsed_time": "1:16:10", "remaining_time": "2:09:28", "throughput": 19920.41, "total_tokens": 91042176} +{"current_steps": 28935, "total_steps": 78105, "loss": 0.2907, "lr": 3.966315718405779e-06, "epoch": 1.852314192433263, "percentage": 37.05, "elapsed_time": "1:16:10", "remaining_time": "2:09:27", "throughput": 19920.88, "total_tokens": 91057600} +{"current_steps": 28940, "total_steps": 78105, "loss": 0.3664, "lr": 3.965863212396337e-06, "epoch": 1.8526342743742399, "percentage": 37.05, "elapsed_time": "1:16:11", "remaining_time": "2:09:26", "throughput": 19921.38, "total_tokens": 91073152} +{"current_steps": 28945, "total_steps": 78105, "loss": 0.3997, "lr": 3.965410633189321e-06, "epoch": 1.8529543563152167, "percentage": 37.06, "elapsed_time": "1:16:12", "remaining_time": "2:09:25", "throughput": 19921.85, "total_tokens": 91088896} +{"current_steps": 28950, "total_steps": 78105, "loss": 0.4065, "lr": 3.964957980807332e-06, "epoch": 1.8532744382561934, "percentage": 37.07, "elapsed_time": "1:16:12", "remaining_time": "2:09:24", "throughput": 19922.33, "total_tokens": 91104256} +{"current_steps": 28955, "total_steps": 78105, "loss": 0.3937, "lr": 3.9645052552729725e-06, "epoch": 1.8535945201971704, "percentage": 37.07, "elapsed_time": "1:16:13", "remaining_time": "2:09:23", "throughput": 19922.75, "total_tokens": 91119488} +{"current_steps": 28960, "total_steps": 78105, "loss": 0.3684, "lr": 3.964052456608848e-06, "epoch": 1.8539146021381474, "percentage": 37.08, "elapsed_time": "1:16:14", "remaining_time": "2:09:22", "throughput": 19923.2, "total_tokens": 91134848} +{"current_steps": 28965, "total_steps": 78105, "loss": 0.2936, "lr": 3.9635995848375706e-06, "epoch": 1.8542346840791244, "percentage": 37.08, "elapsed_time": "1:16:15", "remaining_time": "2:09:21", "throughput": 19923.76, "total_tokens": 91151488} +{"current_steps": 28970, "total_steps": 78105, "loss": 0.3816, "lr": 3.963146639981753e-06, "epoch": 1.8545547660201012, "percentage": 37.09, "elapsed_time": "1:16:15", "remaining_time": "2:09:20", "throughput": 19924.2, "total_tokens": 91166912} +{"current_steps": 28975, "total_steps": 78105, "loss": 0.3236, "lr": 3.962693622064013e-06, "epoch": 1.854874847961078, "percentage": 37.1, "elapsed_time": "1:16:16", "remaining_time": "2:09:19", "throughput": 19924.72, "total_tokens": 91183360} +{"current_steps": 28980, "total_steps": 78105, "loss": 0.4398, "lr": 3.962240531106973e-06, "epoch": 1.855194929902055, "percentage": 37.1, "elapsed_time": "1:16:17", "remaining_time": "2:09:18", "throughput": 19925.17, "total_tokens": 91198592} +{"current_steps": 28985, "total_steps": 78105, "loss": 0.2922, "lr": 3.961787367133258e-06, "epoch": 1.855515011843032, "percentage": 37.11, "elapsed_time": "1:16:17", "remaining_time": "2:09:17", "throughput": 19925.59, "total_tokens": 91213696} +{"current_steps": 28990, "total_steps": 78105, "loss": 0.2227, "lr": 3.9613341301654954e-06, "epoch": 1.8558350937840087, "percentage": 37.12, "elapsed_time": "1:16:18", "remaining_time": "2:09:16", "throughput": 19926.0, "total_tokens": 91228672} +{"current_steps": 28995, "total_steps": 78105, "loss": 0.3278, "lr": 3.960880820226318e-06, "epoch": 1.8561551757249855, "percentage": 37.12, "elapsed_time": "1:16:19", "remaining_time": "2:09:15", "throughput": 19926.47, "total_tokens": 91244160} +{"current_steps": 29000, "total_steps": 78105, "loss": 0.2866, "lr": 3.960427437338362e-06, "epoch": 1.8564752576659624, "percentage": 37.13, "elapsed_time": "1:16:19", "remaining_time": "2:09:14", "throughput": 19926.94, "total_tokens": 91259904} +{"current_steps": 29005, "total_steps": 78105, "loss": 0.2409, "lr": 3.9599739815242665e-06, "epoch": 1.8567953396069394, "percentage": 37.14, "elapsed_time": "1:16:20", "remaining_time": "2:09:13", "throughput": 19927.36, "total_tokens": 91274816} +{"current_steps": 29010, "total_steps": 78105, "loss": 0.3317, "lr": 3.959520452806675e-06, "epoch": 1.8571154215479164, "percentage": 37.14, "elapsed_time": "1:16:21", "remaining_time": "2:09:12", "throughput": 19927.87, "total_tokens": 91290624} +{"current_steps": 29015, "total_steps": 78105, "loss": 0.2801, "lr": 3.9590668512082355e-06, "epoch": 1.8574355034888932, "percentage": 37.15, "elapsed_time": "1:16:21", "remaining_time": "2:09:11", "throughput": 19928.45, "total_tokens": 91307264} +{"current_steps": 29020, "total_steps": 78105, "loss": 0.3701, "lr": 3.958613176751597e-06, "epoch": 1.85775558542987, "percentage": 37.16, "elapsed_time": "1:16:22", "remaining_time": "2:09:10", "throughput": 19928.86, "total_tokens": 91322112} +{"current_steps": 29025, "total_steps": 78105, "loss": 0.3043, "lr": 3.9581594294594135e-06, "epoch": 1.858075667370847, "percentage": 37.16, "elapsed_time": "1:16:23", "remaining_time": "2:09:09", "throughput": 19929.41, "total_tokens": 91338112} +{"current_steps": 29030, "total_steps": 78105, "loss": 0.4275, "lr": 3.957705609354345e-06, "epoch": 1.858395749311824, "percentage": 37.17, "elapsed_time": "1:16:23", "remaining_time": "2:09:08", "throughput": 19929.87, "total_tokens": 91353792} +{"current_steps": 29035, "total_steps": 78105, "loss": 0.5796, "lr": 3.9572517164590495e-06, "epoch": 1.8587158312528007, "percentage": 37.17, "elapsed_time": "1:16:24", "remaining_time": "2:09:07", "throughput": 19930.33, "total_tokens": 91369216} +{"current_steps": 29040, "total_steps": 78105, "loss": 0.3269, "lr": 3.956797750796195e-06, "epoch": 1.8590359131937775, "percentage": 37.18, "elapsed_time": "1:16:25", "remaining_time": "2:09:06", "throughput": 19930.73, "total_tokens": 91384000} +{"current_steps": 29045, "total_steps": 78105, "loss": 0.3778, "lr": 3.956343712388448e-06, "epoch": 1.8593559951347545, "percentage": 37.19, "elapsed_time": "1:16:25", "remaining_time": "2:09:05", "throughput": 19931.27, "total_tokens": 91400064} +{"current_steps": 29050, "total_steps": 78105, "loss": 0.3251, "lr": 3.955889601258483e-06, "epoch": 1.8596760770757315, "percentage": 37.19, "elapsed_time": "1:16:26", "remaining_time": "2:09:04", "throughput": 19931.71, "total_tokens": 91415616} +{"current_steps": 29055, "total_steps": 78105, "loss": 0.3249, "lr": 3.955435417428973e-06, "epoch": 1.8599961590167082, "percentage": 37.2, "elapsed_time": "1:16:27", "remaining_time": "2:09:03", "throughput": 19932.28, "total_tokens": 91432192} +{"current_steps": 29060, "total_steps": 78105, "loss": 0.483, "lr": 3.954981160922601e-06, "epoch": 1.8603162409576852, "percentage": 37.21, "elapsed_time": "1:16:27", "remaining_time": "2:09:02", "throughput": 19932.75, "total_tokens": 91447808} +{"current_steps": 29065, "total_steps": 78105, "loss": 0.2738, "lr": 3.954526831762048e-06, "epoch": 1.860636322898662, "percentage": 37.21, "elapsed_time": "1:16:28", "remaining_time": "2:09:01", "throughput": 19933.19, "total_tokens": 91462912} +{"current_steps": 29070, "total_steps": 78105, "loss": 0.3437, "lr": 3.954072429970002e-06, "epoch": 1.860956404839639, "percentage": 37.22, "elapsed_time": "1:16:29", "remaining_time": "2:09:00", "throughput": 19933.66, "total_tokens": 91478656} +{"current_steps": 29075, "total_steps": 78105, "loss": 0.2974, "lr": 3.953617955569151e-06, "epoch": 1.861276486780616, "percentage": 37.23, "elapsed_time": "1:16:29", "remaining_time": "2:08:59", "throughput": 19934.11, "total_tokens": 91493824} +{"current_steps": 29080, "total_steps": 78105, "loss": 0.5277, "lr": 3.953163408582193e-06, "epoch": 1.8615965687215927, "percentage": 37.23, "elapsed_time": "1:16:30", "remaining_time": "2:08:58", "throughput": 19934.57, "total_tokens": 91509184} +{"current_steps": 29085, "total_steps": 78105, "loss": 0.2569, "lr": 3.952708789031822e-06, "epoch": 1.8619166506625695, "percentage": 37.24, "elapsed_time": "1:16:31", "remaining_time": "2:08:57", "throughput": 19935.05, "total_tokens": 91524992} +{"current_steps": 29090, "total_steps": 78105, "loss": 0.3735, "lr": 3.952254096940742e-06, "epoch": 1.8622367326035465, "percentage": 37.24, "elapsed_time": "1:16:31", "remaining_time": "2:08:56", "throughput": 19935.51, "total_tokens": 91540800} +{"current_steps": 29095, "total_steps": 78105, "loss": 0.2988, "lr": 3.951799332331656e-06, "epoch": 1.8625568145445235, "percentage": 37.25, "elapsed_time": "1:16:32", "remaining_time": "2:08:55", "throughput": 19935.95, "total_tokens": 91555904} +{"current_steps": 29100, "total_steps": 78105, "loss": 0.3661, "lr": 3.951344495227275e-06, "epoch": 1.8628768964855003, "percentage": 37.26, "elapsed_time": "1:16:33", "remaining_time": "2:08:54", "throughput": 19936.37, "total_tokens": 91570944} +{"current_steps": 29105, "total_steps": 78105, "loss": 0.3106, "lr": 3.950889585650308e-06, "epoch": 1.863196978426477, "percentage": 37.26, "elapsed_time": "1:16:33", "remaining_time": "2:08:53", "throughput": 19936.82, "total_tokens": 91586432} +{"current_steps": 29110, "total_steps": 78105, "loss": 0.3557, "lr": 3.950434603623474e-06, "epoch": 1.863517060367454, "percentage": 37.27, "elapsed_time": "1:16:34", "remaining_time": "2:08:52", "throughput": 19937.23, "total_tokens": 91601216} +{"current_steps": 29115, "total_steps": 78105, "loss": 0.3512, "lr": 3.949979549169489e-06, "epoch": 1.863837142308431, "percentage": 37.28, "elapsed_time": "1:16:35", "remaining_time": "2:08:52", "throughput": 19937.78, "total_tokens": 91617792} +{"current_steps": 29120, "total_steps": 78105, "loss": 0.2518, "lr": 3.9495244223110795e-06, "epoch": 1.864157224249408, "percentage": 37.28, "elapsed_time": "1:16:35", "remaining_time": "2:08:51", "throughput": 19938.28, "total_tokens": 91633600} +{"current_steps": 29125, "total_steps": 78105, "loss": 0.2783, "lr": 3.94906922307097e-06, "epoch": 1.8644773061903848, "percentage": 37.29, "elapsed_time": "1:16:36", "remaining_time": "2:08:50", "throughput": 19938.75, "total_tokens": 91649408} +{"current_steps": 29130, "total_steps": 78105, "loss": 0.2948, "lr": 3.948613951471892e-06, "epoch": 1.8647973881313615, "percentage": 37.3, "elapsed_time": "1:16:37", "remaining_time": "2:08:49", "throughput": 19939.25, "total_tokens": 91665280} +{"current_steps": 29135, "total_steps": 78105, "loss": 0.2225, "lr": 3.948158607536579e-06, "epoch": 1.8651174700723385, "percentage": 37.3, "elapsed_time": "1:16:37", "remaining_time": "2:08:48", "throughput": 19939.74, "total_tokens": 91681152} +{"current_steps": 29140, "total_steps": 78105, "loss": 0.3353, "lr": 3.947703191287768e-06, "epoch": 1.8654375520133155, "percentage": 37.31, "elapsed_time": "1:16:38", "remaining_time": "2:08:47", "throughput": 19940.22, "total_tokens": 91696768} +{"current_steps": 29145, "total_steps": 78105, "loss": 0.4083, "lr": 3.9472477027482e-06, "epoch": 1.8657576339542923, "percentage": 37.32, "elapsed_time": "1:16:39", "remaining_time": "2:08:46", "throughput": 19940.79, "total_tokens": 91713344} +{"current_steps": 29150, "total_steps": 78105, "loss": 0.3764, "lr": 3.946792141940621e-06, "epoch": 1.866077715895269, "percentage": 37.32, "elapsed_time": "1:16:39", "remaining_time": "2:08:45", "throughput": 19941.28, "total_tokens": 91728768} +{"current_steps": 29155, "total_steps": 78105, "loss": 0.1963, "lr": 3.946336508887778e-06, "epoch": 1.866397797836246, "percentage": 37.33, "elapsed_time": "1:16:40", "remaining_time": "2:08:44", "throughput": 19941.77, "total_tokens": 91744320} +{"current_steps": 29160, "total_steps": 78105, "loss": 0.4213, "lr": 3.945880803612423e-06, "epoch": 1.866717879777223, "percentage": 37.33, "elapsed_time": "1:16:41", "remaining_time": "2:08:43", "throughput": 19942.21, "total_tokens": 91759616} +{"current_steps": 29165, "total_steps": 78105, "loss": 0.3482, "lr": 3.945425026137313e-06, "epoch": 1.8670379617182, "percentage": 37.34, "elapsed_time": "1:16:41", "remaining_time": "2:08:42", "throughput": 19942.7, "total_tokens": 91775040} +{"current_steps": 29170, "total_steps": 78105, "loss": 0.3791, "lr": 3.9449691764852045e-06, "epoch": 1.8673580436591768, "percentage": 37.35, "elapsed_time": "1:16:42", "remaining_time": "2:08:41", "throughput": 19943.06, "total_tokens": 91789568} +{"current_steps": 29175, "total_steps": 78105, "loss": 0.3363, "lr": 3.944513254678863e-06, "epoch": 1.8676781256001536, "percentage": 37.35, "elapsed_time": "1:16:43", "remaining_time": "2:08:40", "throughput": 19943.69, "total_tokens": 91807232} +{"current_steps": 29180, "total_steps": 78105, "loss": 0.3427, "lr": 3.944057260741054e-06, "epoch": 1.8679982075411306, "percentage": 37.36, "elapsed_time": "1:16:43", "remaining_time": "2:08:39", "throughput": 19944.17, "total_tokens": 91822848} +{"current_steps": 29185, "total_steps": 78105, "loss": 0.3246, "lr": 3.943601194694547e-06, "epoch": 1.8683182894821075, "percentage": 37.37, "elapsed_time": "1:16:44", "remaining_time": "2:08:38", "throughput": 19944.66, "total_tokens": 91838080} +{"current_steps": 29190, "total_steps": 78105, "loss": 0.3411, "lr": 3.943145056562116e-06, "epoch": 1.8686383714230843, "percentage": 37.37, "elapsed_time": "1:16:45", "remaining_time": "2:08:37", "throughput": 19945.06, "total_tokens": 91852672} +{"current_steps": 29195, "total_steps": 78105, "loss": 0.2123, "lr": 3.942688846366537e-06, "epoch": 1.868958453364061, "percentage": 37.38, "elapsed_time": "1:16:45", "remaining_time": "2:08:36", "throughput": 19945.49, "total_tokens": 91867520} +{"current_steps": 29200, "total_steps": 78105, "loss": 0.411, "lr": 3.942232564130592e-06, "epoch": 1.869278535305038, "percentage": 37.39, "elapsed_time": "1:16:46", "remaining_time": "2:08:35", "throughput": 19945.88, "total_tokens": 91882368} +{"current_steps": 29205, "total_steps": 78105, "loss": 0.2532, "lr": 3.941776209877066e-06, "epoch": 1.869598617246015, "percentage": 37.39, "elapsed_time": "1:16:47", "remaining_time": "2:08:34", "throughput": 19946.28, "total_tokens": 91897216} +{"current_steps": 29210, "total_steps": 78105, "loss": 0.2938, "lr": 3.941319783628745e-06, "epoch": 1.8699186991869918, "percentage": 37.4, "elapsed_time": "1:16:47", "remaining_time": "2:08:33", "throughput": 19946.8, "total_tokens": 91913088} +{"current_steps": 29215, "total_steps": 78105, "loss": 0.3412, "lr": 3.940863285408423e-06, "epoch": 1.8702387811279686, "percentage": 37.4, "elapsed_time": "1:16:48", "remaining_time": "2:08:32", "throughput": 19947.35, "total_tokens": 91929152} +{"current_steps": 29220, "total_steps": 78105, "loss": 0.2966, "lr": 3.940406715238892e-06, "epoch": 1.8705588630689456, "percentage": 37.41, "elapsed_time": "1:16:49", "remaining_time": "2:08:31", "throughput": 19947.79, "total_tokens": 91944256} +{"current_steps": 29225, "total_steps": 78105, "loss": 0.2381, "lr": 3.939950073142954e-06, "epoch": 1.8708789450099226, "percentage": 37.42, "elapsed_time": "1:16:49", "remaining_time": "2:08:30", "throughput": 19948.39, "total_tokens": 91961216} +{"current_steps": 29230, "total_steps": 78105, "loss": 0.4507, "lr": 3.939493359143408e-06, "epoch": 1.8711990269508996, "percentage": 37.42, "elapsed_time": "1:16:50", "remaining_time": "2:08:29", "throughput": 19948.8, "total_tokens": 91976384} +{"current_steps": 29235, "total_steps": 78105, "loss": 0.2847, "lr": 3.939036573263063e-06, "epoch": 1.8715191088918763, "percentage": 37.43, "elapsed_time": "1:16:51", "remaining_time": "2:08:28", "throughput": 19949.32, "total_tokens": 91992512} +{"current_steps": 29240, "total_steps": 78105, "loss": 0.2979, "lr": 3.938579715524726e-06, "epoch": 1.871839190832853, "percentage": 37.44, "elapsed_time": "1:16:51", "remaining_time": "2:08:27", "throughput": 19949.72, "total_tokens": 92007360} +{"current_steps": 29245, "total_steps": 78105, "loss": 0.2641, "lr": 3.9381227859512125e-06, "epoch": 1.87215927277383, "percentage": 37.44, "elapsed_time": "1:16:52", "remaining_time": "2:08:26", "throughput": 19950.2, "total_tokens": 92023040} +{"current_steps": 29250, "total_steps": 78105, "loss": 0.3321, "lr": 3.9376657845653386e-06, "epoch": 1.872479354714807, "percentage": 37.45, "elapsed_time": "1:16:53", "remaining_time": "2:08:25", "throughput": 19950.7, "total_tokens": 92038912} +{"current_steps": 29255, "total_steps": 78105, "loss": 0.2838, "lr": 3.9372087113899224e-06, "epoch": 1.8727994366557839, "percentage": 37.46, "elapsed_time": "1:16:53", "remaining_time": "2:08:24", "throughput": 19951.14, "total_tokens": 92054080} +{"current_steps": 29260, "total_steps": 78105, "loss": 0.2504, "lr": 3.93675156644779e-06, "epoch": 1.8731195185967606, "percentage": 37.46, "elapsed_time": "1:16:54", "remaining_time": "2:08:23", "throughput": 19951.58, "total_tokens": 92069312} +{"current_steps": 29265, "total_steps": 78105, "loss": 0.3596, "lr": 3.936294349761768e-06, "epoch": 1.8734396005377376, "percentage": 37.47, "elapsed_time": "1:16:55", "remaining_time": "2:08:22", "throughput": 19952.22, "total_tokens": 92086528} +{"current_steps": 29270, "total_steps": 78105, "loss": 0.4362, "lr": 3.935837061354687e-06, "epoch": 1.8737596824787146, "percentage": 37.48, "elapsed_time": "1:16:56", "remaining_time": "2:08:21", "throughput": 19952.71, "total_tokens": 92102336} +{"current_steps": 29275, "total_steps": 78105, "loss": 0.5054, "lr": 3.935379701249383e-06, "epoch": 1.8740797644196916, "percentage": 37.48, "elapsed_time": "1:16:56", "remaining_time": "2:08:20", "throughput": 19953.49, "total_tokens": 92121344} +{"current_steps": 29280, "total_steps": 78105, "loss": 0.2932, "lr": 3.9349222694686926e-06, "epoch": 1.8743998463606684, "percentage": 37.49, "elapsed_time": "1:16:57", "remaining_time": "2:08:19", "throughput": 19953.98, "total_tokens": 92136896} +{"current_steps": 29285, "total_steps": 78105, "loss": 0.3226, "lr": 3.93446476603546e-06, "epoch": 1.8747199283016451, "percentage": 37.49, "elapsed_time": "1:16:58", "remaining_time": "2:08:18", "throughput": 19954.33, "total_tokens": 92151168} +{"current_steps": 29290, "total_steps": 78105, "loss": 0.2129, "lr": 3.934007190972527e-06, "epoch": 1.8750400102426221, "percentage": 37.5, "elapsed_time": "1:16:58", "remaining_time": "2:08:17", "throughput": 19954.79, "total_tokens": 92166848} +{"current_steps": 29295, "total_steps": 78105, "loss": 0.3391, "lr": 3.933549544302745e-06, "epoch": 1.8753600921835991, "percentage": 37.51, "elapsed_time": "1:16:59", "remaining_time": "2:08:16", "throughput": 19955.28, "total_tokens": 92182400} +{"current_steps": 29300, "total_steps": 78105, "loss": 0.3735, "lr": 3.9330918260489656e-06, "epoch": 1.8756801741245759, "percentage": 37.51, "elapsed_time": "1:17:00", "remaining_time": "2:08:15", "throughput": 19955.72, "total_tokens": 92197824} +{"current_steps": 29305, "total_steps": 78105, "loss": 0.3093, "lr": 3.932634036234045e-06, "epoch": 1.8760002560655527, "percentage": 37.52, "elapsed_time": "1:17:00", "remaining_time": "2:08:14", "throughput": 19956.14, "total_tokens": 92212992} +{"current_steps": 29310, "total_steps": 78105, "loss": 0.3101, "lr": 3.932176174880843e-06, "epoch": 1.8763203380065296, "percentage": 37.53, "elapsed_time": "1:17:01", "remaining_time": "2:08:13", "throughput": 19956.64, "total_tokens": 92228864} +{"current_steps": 29315, "total_steps": 78105, "loss": 0.3471, "lr": 3.931718242012223e-06, "epoch": 1.8766404199475066, "percentage": 37.53, "elapsed_time": "1:17:02", "remaining_time": "2:08:12", "throughput": 19957.17, "total_tokens": 92244800} +{"current_steps": 29320, "total_steps": 78105, "loss": 0.2309, "lr": 3.931260237651052e-06, "epoch": 1.8769605018884834, "percentage": 37.54, "elapsed_time": "1:17:02", "remaining_time": "2:08:11", "throughput": 19957.57, "total_tokens": 92259648} +{"current_steps": 29325, "total_steps": 78105, "loss": 0.332, "lr": 3.930802161820199e-06, "epoch": 1.8772805838294604, "percentage": 37.55, "elapsed_time": "1:17:03", "remaining_time": "2:08:10", "throughput": 19958.02, "total_tokens": 92275136} +{"current_steps": 29330, "total_steps": 78105, "loss": 0.4056, "lr": 3.93034401454254e-06, "epoch": 1.8776006657704372, "percentage": 37.55, "elapsed_time": "1:17:04", "remaining_time": "2:08:09", "throughput": 19958.5, "total_tokens": 92290752} +{"current_steps": 29335, "total_steps": 78105, "loss": 0.3318, "lr": 3.92988579584095e-06, "epoch": 1.8779207477114142, "percentage": 37.56, "elapsed_time": "1:17:04", "remaining_time": "2:08:08", "throughput": 19958.92, "total_tokens": 92305728} +{"current_steps": 29340, "total_steps": 78105, "loss": 0.338, "lr": 3.929427505738312e-06, "epoch": 1.8782408296523911, "percentage": 37.56, "elapsed_time": "1:17:05", "remaining_time": "2:08:07", "throughput": 19959.37, "total_tokens": 92321088} +{"current_steps": 29345, "total_steps": 78105, "loss": 0.359, "lr": 3.92896914425751e-06, "epoch": 1.878560911593368, "percentage": 37.57, "elapsed_time": "1:17:06", "remaining_time": "2:08:06", "throughput": 19959.85, "total_tokens": 92336512} +{"current_steps": 29350, "total_steps": 78105, "loss": 0.437, "lr": 3.928510711421431e-06, "epoch": 1.8788809935343447, "percentage": 37.58, "elapsed_time": "1:17:06", "remaining_time": "2:08:05", "throughput": 19960.31, "total_tokens": 92352128} +{"current_steps": 29355, "total_steps": 78105, "loss": 0.3809, "lr": 3.9280522072529694e-06, "epoch": 1.8792010754753217, "percentage": 37.58, "elapsed_time": "1:17:07", "remaining_time": "2:08:04", "throughput": 19960.77, "total_tokens": 92367616} +{"current_steps": 29360, "total_steps": 78105, "loss": 0.3917, "lr": 3.927593631775019e-06, "epoch": 1.8795211574162987, "percentage": 37.59, "elapsed_time": "1:17:08", "remaining_time": "2:08:03", "throughput": 19961.23, "total_tokens": 92383104} +{"current_steps": 29365, "total_steps": 78105, "loss": 0.3139, "lr": 3.927134985010478e-06, "epoch": 1.8798412393572754, "percentage": 37.6, "elapsed_time": "1:17:08", "remaining_time": "2:08:02", "throughput": 19961.79, "total_tokens": 92399552} +{"current_steps": 29370, "total_steps": 78105, "loss": 0.292, "lr": 3.92667626698225e-06, "epoch": 1.8801613212982522, "percentage": 37.6, "elapsed_time": "1:17:09", "remaining_time": "2:08:01", "throughput": 19962.2, "total_tokens": 92414272} +{"current_steps": 29375, "total_steps": 78105, "loss": 0.4266, "lr": 3.92621747771324e-06, "epoch": 1.8804814032392292, "percentage": 37.61, "elapsed_time": "1:17:10", "remaining_time": "2:08:00", "throughput": 19962.69, "total_tokens": 92430464} +{"current_steps": 29380, "total_steps": 78105, "loss": 0.2903, "lr": 3.925758617226358e-06, "epoch": 1.8808014851802062, "percentage": 37.62, "elapsed_time": "1:17:10", "remaining_time": "2:07:59", "throughput": 19963.09, "total_tokens": 92445248} +{"current_steps": 29385, "total_steps": 78105, "loss": 0.3035, "lr": 3.925299685544518e-06, "epoch": 1.8811215671211832, "percentage": 37.62, "elapsed_time": "1:17:11", "remaining_time": "2:07:58", "throughput": 19963.6, "total_tokens": 92461376} +{"current_steps": 29390, "total_steps": 78105, "loss": 0.2456, "lr": 3.924840682690636e-06, "epoch": 1.88144164906216, "percentage": 37.63, "elapsed_time": "1:17:12", "remaining_time": "2:07:57", "throughput": 19964.05, "total_tokens": 92476736} +{"current_steps": 29395, "total_steps": 78105, "loss": 0.3066, "lr": 3.924381608687631e-06, "epoch": 1.8817617310031367, "percentage": 37.64, "elapsed_time": "1:17:12", "remaining_time": "2:07:56", "throughput": 19964.43, "total_tokens": 92491392} +{"current_steps": 29400, "total_steps": 78105, "loss": 0.4081, "lr": 3.923922463558428e-06, "epoch": 1.8820818129441137, "percentage": 37.64, "elapsed_time": "1:17:13", "remaining_time": "2:07:55", "throughput": 19964.82, "total_tokens": 92506240} +{"current_steps": 29405, "total_steps": 78105, "loss": 0.3408, "lr": 3.9234632473259535e-06, "epoch": 1.8824018948850907, "percentage": 37.65, "elapsed_time": "1:17:14", "remaining_time": "2:07:55", "throughput": 19965.28, "total_tokens": 92522240} +{"current_steps": 29410, "total_steps": 78105, "loss": 0.3953, "lr": 3.92300396001314e-06, "epoch": 1.8827219768260675, "percentage": 37.65, "elapsed_time": "1:17:14", "remaining_time": "2:07:54", "throughput": 19965.85, "total_tokens": 92538688} +{"current_steps": 29415, "total_steps": 78105, "loss": 0.2775, "lr": 3.922544601642921e-06, "epoch": 1.8830420587670442, "percentage": 37.66, "elapsed_time": "1:17:15", "remaining_time": "2:07:53", "throughput": 19966.23, "total_tokens": 92553664} +{"current_steps": 29420, "total_steps": 78105, "loss": 0.3444, "lr": 3.922085172238234e-06, "epoch": 1.8833621407080212, "percentage": 37.67, "elapsed_time": "1:17:16", "remaining_time": "2:07:52", "throughput": 19966.68, "total_tokens": 92569152} +{"current_steps": 29425, "total_steps": 78105, "loss": 0.3184, "lr": 3.921625671822021e-06, "epoch": 1.8836822226489982, "percentage": 37.67, "elapsed_time": "1:17:16", "remaining_time": "2:07:51", "throughput": 19967.12, "total_tokens": 92584320} +{"current_steps": 29430, "total_steps": 78105, "loss": 0.3453, "lr": 3.9211661004172265e-06, "epoch": 1.8840023045899752, "percentage": 37.68, "elapsed_time": "1:17:17", "remaining_time": "2:07:50", "throughput": 19967.64, "total_tokens": 92600576} +{"current_steps": 29435, "total_steps": 78105, "loss": 0.3237, "lr": 3.9207064580468e-06, "epoch": 1.884322386530952, "percentage": 37.69, "elapsed_time": "1:17:18", "remaining_time": "2:07:49", "throughput": 19968.12, "total_tokens": 92616384} +{"current_steps": 29440, "total_steps": 78105, "loss": 0.3214, "lr": 3.9202467447336935e-06, "epoch": 1.8846424684719287, "percentage": 37.69, "elapsed_time": "1:17:18", "remaining_time": "2:07:48", "throughput": 19968.72, "total_tokens": 92633216} +{"current_steps": 29445, "total_steps": 78105, "loss": 0.4061, "lr": 3.919786960500862e-06, "epoch": 1.8849625504129057, "percentage": 37.7, "elapsed_time": "1:17:19", "remaining_time": "2:07:47", "throughput": 19969.33, "total_tokens": 92650368} +{"current_steps": 29450, "total_steps": 78105, "loss": 0.3836, "lr": 3.919327105371264e-06, "epoch": 1.8852826323538827, "percentage": 37.71, "elapsed_time": "1:17:20", "remaining_time": "2:07:46", "throughput": 19969.95, "total_tokens": 92667520} +{"current_steps": 29455, "total_steps": 78105, "loss": 0.3477, "lr": 3.918867179367865e-06, "epoch": 1.8856027142948595, "percentage": 37.71, "elapsed_time": "1:17:21", "remaining_time": "2:07:45", "throughput": 19970.44, "total_tokens": 92683520} +{"current_steps": 29460, "total_steps": 78105, "loss": 0.3697, "lr": 3.918407182513629e-06, "epoch": 1.8859227962358363, "percentage": 37.72, "elapsed_time": "1:17:21", "remaining_time": "2:07:44", "throughput": 19971.09, "total_tokens": 92700928} +{"current_steps": 29465, "total_steps": 78105, "loss": 0.3807, "lr": 3.917947114831526e-06, "epoch": 1.8862428781768132, "percentage": 37.72, "elapsed_time": "1:17:22", "remaining_time": "2:07:43", "throughput": 19971.56, "total_tokens": 92716672} +{"current_steps": 29470, "total_steps": 78105, "loss": 0.2563, "lr": 3.91748697634453e-06, "epoch": 1.8865629601177902, "percentage": 37.73, "elapsed_time": "1:17:23", "remaining_time": "2:07:42", "throughput": 19972.11, "total_tokens": 92732928} +{"current_steps": 29475, "total_steps": 78105, "loss": 0.3888, "lr": 3.917026767075619e-06, "epoch": 1.886883042058767, "percentage": 37.74, "elapsed_time": "1:17:23", "remaining_time": "2:07:41", "throughput": 19972.54, "total_tokens": 92748224} +{"current_steps": 29480, "total_steps": 78105, "loss": 0.3139, "lr": 3.916566487047772e-06, "epoch": 1.8872031239997438, "percentage": 37.74, "elapsed_time": "1:17:24", "remaining_time": "2:07:40", "throughput": 19973.04, "total_tokens": 92764224} +{"current_steps": 29485, "total_steps": 78105, "loss": 0.3486, "lr": 3.916106136283973e-06, "epoch": 1.8875232059407208, "percentage": 37.75, "elapsed_time": "1:17:25", "remaining_time": "2:07:39", "throughput": 19973.5, "total_tokens": 92779776} +{"current_steps": 29490, "total_steps": 78105, "loss": 0.3538, "lr": 3.915645714807209e-06, "epoch": 1.8878432878816978, "percentage": 37.76, "elapsed_time": "1:17:25", "remaining_time": "2:07:38", "throughput": 19973.91, "total_tokens": 92795072} +{"current_steps": 29495, "total_steps": 78105, "loss": 0.1942, "lr": 3.9151852226404714e-06, "epoch": 1.8881633698226747, "percentage": 37.76, "elapsed_time": "1:17:26", "remaining_time": "2:07:37", "throughput": 19974.38, "total_tokens": 92810432} +{"current_steps": 29500, "total_steps": 78105, "loss": 0.3684, "lr": 3.914724659806757e-06, "epoch": 1.8884834517636515, "percentage": 37.77, "elapsed_time": "1:17:27", "remaining_time": "2:07:36", "throughput": 19974.86, "total_tokens": 92826048} +{"current_steps": 29505, "total_steps": 78105, "loss": 0.3988, "lr": 3.914264026329062e-06, "epoch": 1.8888035337046283, "percentage": 37.78, "elapsed_time": "1:17:27", "remaining_time": "2:07:35", "throughput": 19975.41, "total_tokens": 92842624} +{"current_steps": 29510, "total_steps": 78105, "loss": 0.4192, "lr": 3.9138033222303876e-06, "epoch": 1.8891236156456053, "percentage": 37.78, "elapsed_time": "1:17:28", "remaining_time": "2:07:34", "throughput": 19975.91, "total_tokens": 92859008} +{"current_steps": 29515, "total_steps": 78105, "loss": 0.2912, "lr": 3.913342547533739e-06, "epoch": 1.8894436975865823, "percentage": 37.79, "elapsed_time": "1:17:29", "remaining_time": "2:07:33", "throughput": 19976.34, "total_tokens": 92874560} +{"current_steps": 29520, "total_steps": 78105, "loss": 0.3068, "lr": 3.9128817022621255e-06, "epoch": 1.889763779527559, "percentage": 37.8, "elapsed_time": "1:17:29", "remaining_time": "2:07:32", "throughput": 19976.75, "total_tokens": 92889472} +{"current_steps": 29525, "total_steps": 78105, "loss": 0.2897, "lr": 3.91242078643856e-06, "epoch": 1.8900838614685358, "percentage": 37.8, "elapsed_time": "1:17:30", "remaining_time": "2:07:31", "throughput": 19977.19, "total_tokens": 92904640} +{"current_steps": 29530, "total_steps": 78105, "loss": 0.5306, "lr": 3.911959800086057e-06, "epoch": 1.8904039434095128, "percentage": 37.81, "elapsed_time": "1:17:31", "remaining_time": "2:07:30", "throughput": 19977.57, "total_tokens": 92919296} +{"current_steps": 29535, "total_steps": 78105, "loss": 0.3709, "lr": 3.911498743227636e-06, "epoch": 1.8907240253504898, "percentage": 37.81, "elapsed_time": "1:17:31", "remaining_time": "2:07:29", "throughput": 19978.0, "total_tokens": 92934656} +{"current_steps": 29540, "total_steps": 78105, "loss": 0.3673, "lr": 3.911037615886319e-06, "epoch": 1.8910441072914668, "percentage": 37.82, "elapsed_time": "1:17:32", "remaining_time": "2:07:28", "throughput": 19978.48, "total_tokens": 92950336} +{"current_steps": 29545, "total_steps": 78105, "loss": 0.3713, "lr": 3.910576418085135e-06, "epoch": 1.8913641892324435, "percentage": 37.83, "elapsed_time": "1:17:33", "remaining_time": "2:07:27", "throughput": 19978.94, "total_tokens": 92965696} +{"current_steps": 29550, "total_steps": 78105, "loss": 0.2579, "lr": 3.910115149847112e-06, "epoch": 1.8916842711734203, "percentage": 37.83, "elapsed_time": "1:17:33", "remaining_time": "2:07:26", "throughput": 19979.39, "total_tokens": 92981184} +{"current_steps": 29555, "total_steps": 78105, "loss": 0.2935, "lr": 3.909653811195283e-06, "epoch": 1.8920043531143973, "percentage": 37.84, "elapsed_time": "1:17:34", "remaining_time": "2:07:25", "throughput": 19979.9, "total_tokens": 92997056} +{"current_steps": 29560, "total_steps": 78105, "loss": 0.4026, "lr": 3.909192402152685e-06, "epoch": 1.8923244350553743, "percentage": 37.85, "elapsed_time": "1:17:35", "remaining_time": "2:07:25", "throughput": 19980.36, "total_tokens": 93012992} +{"current_steps": 29565, "total_steps": 78105, "loss": 0.3713, "lr": 3.90873092274236e-06, "epoch": 1.892644516996351, "percentage": 37.85, "elapsed_time": "1:17:35", "remaining_time": "2:07:24", "throughput": 19980.95, "total_tokens": 93030016} +{"current_steps": 29570, "total_steps": 78105, "loss": 0.4132, "lr": 3.908269372987349e-06, "epoch": 1.8929645989373278, "percentage": 37.86, "elapsed_time": "1:17:36", "remaining_time": "2:07:23", "throughput": 19981.51, "total_tokens": 93046592} +{"current_steps": 29575, "total_steps": 78105, "loss": 0.3479, "lr": 3.907807752910701e-06, "epoch": 1.8932846808783048, "percentage": 37.87, "elapsed_time": "1:17:37", "remaining_time": "2:07:22", "throughput": 19982.02, "total_tokens": 93062528} +{"current_steps": 29580, "total_steps": 78105, "loss": 0.3162, "lr": 3.907346062535467e-06, "epoch": 1.8936047628192818, "percentage": 37.87, "elapsed_time": "1:17:37", "remaining_time": "2:07:21", "throughput": 19982.51, "total_tokens": 93078464} +{"current_steps": 29585, "total_steps": 78105, "loss": 0.2174, "lr": 3.906884301884702e-06, "epoch": 1.8939248447602586, "percentage": 37.88, "elapsed_time": "1:17:38", "remaining_time": "2:07:20", "throughput": 19982.86, "total_tokens": 93092928} +{"current_steps": 29590, "total_steps": 78105, "loss": 0.2567, "lr": 3.906422470981462e-06, "epoch": 1.8942449267012356, "percentage": 37.88, "elapsed_time": "1:17:39", "remaining_time": "2:07:19", "throughput": 19983.28, "total_tokens": 93107648} +{"current_steps": 29595, "total_steps": 78105, "loss": 0.3699, "lr": 3.905960569848811e-06, "epoch": 1.8945650086422123, "percentage": 37.89, "elapsed_time": "1:17:39", "remaining_time": "2:07:18", "throughput": 19983.78, "total_tokens": 93123584} +{"current_steps": 29600, "total_steps": 78105, "loss": 0.3734, "lr": 3.905498598509812e-06, "epoch": 1.8948850905831893, "percentage": 37.9, "elapsed_time": "1:17:40", "remaining_time": "2:07:17", "throughput": 19984.25, "total_tokens": 93139328} +{"current_steps": 29605, "total_steps": 78105, "loss": 0.2258, "lr": 3.905036556987534e-06, "epoch": 1.8952051725241663, "percentage": 37.9, "elapsed_time": "1:17:41", "remaining_time": "2:07:16", "throughput": 19984.69, "total_tokens": 93154304} +{"current_steps": 29610, "total_steps": 78105, "loss": 0.3358, "lr": 3.904574445305048e-06, "epoch": 1.895525254465143, "percentage": 37.91, "elapsed_time": "1:17:41", "remaining_time": "2:07:15", "throughput": 19985.12, "total_tokens": 93169344} +{"current_steps": 29615, "total_steps": 78105, "loss": 0.2756, "lr": 3.904112263485431e-06, "epoch": 1.8958453364061199, "percentage": 37.92, "elapsed_time": "1:17:42", "remaining_time": "2:07:14", "throughput": 19985.57, "total_tokens": 93184704} +{"current_steps": 29620, "total_steps": 78105, "loss": 0.3054, "lr": 3.903650011551762e-06, "epoch": 1.8961654183470968, "percentage": 37.92, "elapsed_time": "1:17:43", "remaining_time": "2:07:13", "throughput": 19985.93, "total_tokens": 93199232} +{"current_steps": 29625, "total_steps": 78105, "loss": 0.2537, "lr": 3.903187689527121e-06, "epoch": 1.8964855002880738, "percentage": 37.93, "elapsed_time": "1:17:43", "remaining_time": "2:07:12", "throughput": 19986.43, "total_tokens": 93215552} +{"current_steps": 29630, "total_steps": 78105, "loss": 0.3309, "lr": 3.902725297434598e-06, "epoch": 1.8968055822290506, "percentage": 37.94, "elapsed_time": "1:17:44", "remaining_time": "2:07:11", "throughput": 19986.91, "total_tokens": 93231232} +{"current_steps": 29635, "total_steps": 78105, "loss": 0.3343, "lr": 3.902262835297278e-06, "epoch": 1.8971256641700274, "percentage": 37.94, "elapsed_time": "1:17:45", "remaining_time": "2:07:10", "throughput": 19987.33, "total_tokens": 93246464} +{"current_steps": 29640, "total_steps": 78105, "loss": 0.4265, "lr": 3.9018003031382565e-06, "epoch": 1.8974457461110044, "percentage": 37.95, "elapsed_time": "1:17:45", "remaining_time": "2:07:09", "throughput": 19987.87, "total_tokens": 93262976} +{"current_steps": 29645, "total_steps": 78105, "loss": 0.41, "lr": 3.9013377009806306e-06, "epoch": 1.8977658280519814, "percentage": 37.96, "elapsed_time": "1:17:46", "remaining_time": "2:07:08", "throughput": 19988.28, "total_tokens": 93278208} +{"current_steps": 29650, "total_steps": 78105, "loss": 0.3079, "lr": 3.900875028847499e-06, "epoch": 1.8980859099929583, "percentage": 37.96, "elapsed_time": "1:17:47", "remaining_time": "2:07:07", "throughput": 19988.91, "total_tokens": 93295744} +{"current_steps": 29655, "total_steps": 78105, "loss": 0.2987, "lr": 3.900412286761965e-06, "epoch": 1.898405991933935, "percentage": 37.97, "elapsed_time": "1:17:48", "remaining_time": "2:07:06", "throughput": 19989.25, "total_tokens": 93310016} +{"current_steps": 29660, "total_steps": 78105, "loss": 0.2464, "lr": 3.899949474747136e-06, "epoch": 1.8987260738749119, "percentage": 37.97, "elapsed_time": "1:17:48", "remaining_time": "2:07:05", "throughput": 19989.73, "total_tokens": 93326016} +{"current_steps": 29665, "total_steps": 78105, "loss": 0.3691, "lr": 3.899486592826122e-06, "epoch": 1.8990461558158889, "percentage": 37.98, "elapsed_time": "1:17:49", "remaining_time": "2:07:04", "throughput": 19990.26, "total_tokens": 93342272} +{"current_steps": 29670, "total_steps": 78105, "loss": 0.2123, "lr": 3.899023641022038e-06, "epoch": 1.8993662377568659, "percentage": 37.99, "elapsed_time": "1:17:50", "remaining_time": "2:07:03", "throughput": 19990.69, "total_tokens": 93357248} +{"current_steps": 29675, "total_steps": 78105, "loss": 0.2229, "lr": 3.898560619358001e-06, "epoch": 1.8996863196978426, "percentage": 37.99, "elapsed_time": "1:17:50", "remaining_time": "2:07:02", "throughput": 19991.14, "total_tokens": 93372480} +{"current_steps": 29680, "total_steps": 78105, "loss": 0.3734, "lr": 3.89809752785713e-06, "epoch": 1.9000064016388194, "percentage": 38.0, "elapsed_time": "1:17:51", "remaining_time": "2:07:01", "throughput": 19991.72, "total_tokens": 93389376} +{"current_steps": 29685, "total_steps": 78105, "loss": 0.2921, "lr": 3.897634366542552e-06, "epoch": 1.9003264835797964, "percentage": 38.01, "elapsed_time": "1:17:52", "remaining_time": "2:07:00", "throughput": 19992.24, "total_tokens": 93405568} +{"current_steps": 29690, "total_steps": 78105, "loss": 0.2887, "lr": 3.897171135437393e-06, "epoch": 1.9006465655207734, "percentage": 38.01, "elapsed_time": "1:17:52", "remaining_time": "2:06:59", "throughput": 19992.64, "total_tokens": 93420224} +{"current_steps": 29695, "total_steps": 78105, "loss": 0.3296, "lr": 3.896707834564785e-06, "epoch": 1.9009666474617504, "percentage": 38.02, "elapsed_time": "1:17:53", "remaining_time": "2:06:58", "throughput": 19993.08, "total_tokens": 93435904} +{"current_steps": 29700, "total_steps": 78105, "loss": 0.3328, "lr": 3.896244463947863e-06, "epoch": 1.9012867294027271, "percentage": 38.03, "elapsed_time": "1:17:54", "remaining_time": "2:06:57", "throughput": 19993.44, "total_tokens": 93450240} +{"current_steps": 29705, "total_steps": 78105, "loss": 0.2082, "lr": 3.895781023609766e-06, "epoch": 1.901606811343704, "percentage": 38.03, "elapsed_time": "1:17:54", "remaining_time": "2:06:56", "throughput": 19993.87, "total_tokens": 93465536} +{"current_steps": 29710, "total_steps": 78105, "loss": 0.3424, "lr": 3.8953175135736345e-06, "epoch": 1.901926893284681, "percentage": 38.04, "elapsed_time": "1:17:55", "remaining_time": "2:06:55", "throughput": 19994.26, "total_tokens": 93480704} +{"current_steps": 29715, "total_steps": 78105, "loss": 0.4787, "lr": 3.894853933862614e-06, "epoch": 1.9022469752256579, "percentage": 38.04, "elapsed_time": "1:17:56", "remaining_time": "2:06:54", "throughput": 19994.72, "total_tokens": 93496128} +{"current_steps": 29720, "total_steps": 78105, "loss": 0.2776, "lr": 3.894390284499855e-06, "epoch": 1.9025670571666347, "percentage": 38.05, "elapsed_time": "1:17:56", "remaining_time": "2:06:53", "throughput": 19995.14, "total_tokens": 93511232} +{"current_steps": 29725, "total_steps": 78105, "loss": 0.4588, "lr": 3.893926565508507e-06, "epoch": 1.9028871391076114, "percentage": 38.06, "elapsed_time": "1:17:57", "remaining_time": "2:06:52", "throughput": 19995.63, "total_tokens": 93526848} +{"current_steps": 29730, "total_steps": 78105, "loss": 0.3266, "lr": 3.893462776911727e-06, "epoch": 1.9032072210485884, "percentage": 38.06, "elapsed_time": "1:17:58", "remaining_time": "2:06:51", "throughput": 19996.02, "total_tokens": 93541504} +{"current_steps": 29735, "total_steps": 78105, "loss": 0.3062, "lr": 3.8929989187326745e-06, "epoch": 1.9035273029895654, "percentage": 38.07, "elapsed_time": "1:17:58", "remaining_time": "2:06:50", "throughput": 19996.49, "total_tokens": 93557056} +{"current_steps": 29740, "total_steps": 78105, "loss": 0.4412, "lr": 3.892534990994512e-06, "epoch": 1.9038473849305422, "percentage": 38.08, "elapsed_time": "1:17:59", "remaining_time": "2:06:49", "throughput": 19997.08, "total_tokens": 93574016} +{"current_steps": 29745, "total_steps": 78105, "loss": 0.3652, "lr": 3.892070993720405e-06, "epoch": 1.904167466871519, "percentage": 38.08, "elapsed_time": "1:18:00", "remaining_time": "2:06:48", "throughput": 19997.59, "total_tokens": 93590208} +{"current_steps": 29750, "total_steps": 78105, "loss": 0.3777, "lr": 3.891606926933524e-06, "epoch": 1.904487548812496, "percentage": 38.09, "elapsed_time": "1:18:00", "remaining_time": "2:06:48", "throughput": 19998.11, "total_tokens": 93606592} +{"current_steps": 29755, "total_steps": 78105, "loss": 0.3252, "lr": 3.891142790657041e-06, "epoch": 1.904807630753473, "percentage": 38.1, "elapsed_time": "1:18:01", "remaining_time": "2:06:47", "throughput": 19998.54, "total_tokens": 93621632} +{"current_steps": 29760, "total_steps": 78105, "loss": 0.2774, "lr": 3.890678584914134e-06, "epoch": 1.90512771269445, "percentage": 38.1, "elapsed_time": "1:18:02", "remaining_time": "2:06:46", "throughput": 19999.16, "total_tokens": 93638912} +{"current_steps": 29765, "total_steps": 78105, "loss": 0.4059, "lr": 3.890214309727983e-06, "epoch": 1.9054477946354267, "percentage": 38.11, "elapsed_time": "1:18:02", "remaining_time": "2:06:45", "throughput": 19999.71, "total_tokens": 93655552} +{"current_steps": 29770, "total_steps": 78105, "loss": 0.3385, "lr": 3.8897499651217695e-06, "epoch": 1.9057678765764035, "percentage": 38.12, "elapsed_time": "1:18:03", "remaining_time": "2:06:44", "throughput": 20000.11, "total_tokens": 93670400} +{"current_steps": 29775, "total_steps": 78105, "loss": 0.3391, "lr": 3.889285551118682e-06, "epoch": 1.9060879585173804, "percentage": 38.12, "elapsed_time": "1:18:04", "remaining_time": "2:06:43", "throughput": 20000.52, "total_tokens": 93686016} +{"current_steps": 29780, "total_steps": 78105, "loss": 0.2084, "lr": 3.8888210677419105e-06, "epoch": 1.9064080404583574, "percentage": 38.13, "elapsed_time": "1:18:04", "remaining_time": "2:06:42", "throughput": 20000.81, "total_tokens": 93699840} +{"current_steps": 29785, "total_steps": 78105, "loss": 0.2909, "lr": 3.888356515014649e-06, "epoch": 1.9067281223993342, "percentage": 38.13, "elapsed_time": "1:18:05", "remaining_time": "2:06:41", "throughput": 20001.26, "total_tokens": 93715456} +{"current_steps": 29790, "total_steps": 78105, "loss": 0.436, "lr": 3.887891892960095e-06, "epoch": 1.907048204340311, "percentage": 38.14, "elapsed_time": "1:18:06", "remaining_time": "2:06:40", "throughput": 20001.68, "total_tokens": 93730368} +{"current_steps": 29795, "total_steps": 78105, "loss": 0.3454, "lr": 3.8874272016014494e-06, "epoch": 1.907368286281288, "percentage": 38.15, "elapsed_time": "1:18:06", "remaining_time": "2:06:39", "throughput": 20002.09, "total_tokens": 93745536} +{"current_steps": 29800, "total_steps": 78105, "loss": 0.3915, "lr": 3.886962440961916e-06, "epoch": 1.907688368222265, "percentage": 38.15, "elapsed_time": "1:18:07", "remaining_time": "2:06:38", "throughput": 20002.6, "total_tokens": 93761664} +{"current_steps": 29805, "total_steps": 78105, "loss": 0.2676, "lr": 3.886497611064703e-06, "epoch": 1.908008450163242, "percentage": 38.16, "elapsed_time": "1:18:08", "remaining_time": "2:06:37", "throughput": 20003.22, "total_tokens": 93778880} +{"current_steps": 29810, "total_steps": 78105, "loss": 0.313, "lr": 3.886032711933021e-06, "epoch": 1.9083285321042187, "percentage": 38.17, "elapsed_time": "1:18:08", "remaining_time": "2:06:36", "throughput": 20003.75, "total_tokens": 93795456} +{"current_steps": 29815, "total_steps": 78105, "loss": 0.3767, "lr": 3.885567743590084e-06, "epoch": 1.9086486140451955, "percentage": 38.17, "elapsed_time": "1:18:09", "remaining_time": "2:06:35", "throughput": 20004.2, "total_tokens": 93810944} +{"current_steps": 29820, "total_steps": 78105, "loss": 0.3712, "lr": 3.885102706059112e-06, "epoch": 1.9089686959861725, "percentage": 38.18, "elapsed_time": "1:18:10", "remaining_time": "2:06:34", "throughput": 20004.65, "total_tokens": 93826432} +{"current_steps": 29825, "total_steps": 78105, "loss": 0.4187, "lr": 3.884637599363325e-06, "epoch": 1.9092887779271495, "percentage": 38.19, "elapsed_time": "1:18:10", "remaining_time": "2:06:33", "throughput": 20005.19, "total_tokens": 93843200} +{"current_steps": 29830, "total_steps": 78105, "loss": 0.4812, "lr": 3.8841724235259495e-06, "epoch": 1.9096088598681262, "percentage": 38.19, "elapsed_time": "1:18:11", "remaining_time": "2:06:32", "throughput": 20005.9, "total_tokens": 93861760} +{"current_steps": 29835, "total_steps": 78105, "loss": 0.3444, "lr": 3.883707178570213e-06, "epoch": 1.909928941809103, "percentage": 38.2, "elapsed_time": "1:18:12", "remaining_time": "2:06:31", "throughput": 20006.33, "total_tokens": 93877440} +{"current_steps": 29840, "total_steps": 78105, "loss": 0.3317, "lr": 3.883241864519347e-06, "epoch": 1.91024902375008, "percentage": 38.2, "elapsed_time": "1:18:13", "remaining_time": "2:06:30", "throughput": 20006.76, "total_tokens": 93892672} +{"current_steps": 29845, "total_steps": 78105, "loss": 0.2844, "lr": 3.882776481396588e-06, "epoch": 1.910569105691057, "percentage": 38.21, "elapsed_time": "1:18:13", "remaining_time": "2:06:29", "throughput": 20007.17, "total_tokens": 93907776} +{"current_steps": 29850, "total_steps": 78105, "loss": 0.4965, "lr": 3.882311029225173e-06, "epoch": 1.9108891876320337, "percentage": 38.22, "elapsed_time": "1:18:14", "remaining_time": "2:06:28", "throughput": 20007.59, "total_tokens": 93922880} +{"current_steps": 29855, "total_steps": 78105, "loss": 0.4514, "lr": 3.881845508028346e-06, "epoch": 1.9112092695730107, "percentage": 38.22, "elapsed_time": "1:18:15", "remaining_time": "2:06:27", "throughput": 20008.06, "total_tokens": 93938752} +{"current_steps": 29860, "total_steps": 78105, "loss": 0.4591, "lr": 3.8813799178293525e-06, "epoch": 1.9115293515139875, "percentage": 38.23, "elapsed_time": "1:18:15", "remaining_time": "2:06:26", "throughput": 20008.55, "total_tokens": 93954752} +{"current_steps": 29865, "total_steps": 78105, "loss": 0.3295, "lr": 3.8809142586514406e-06, "epoch": 1.9118494334549645, "percentage": 38.24, "elapsed_time": "1:18:16", "remaining_time": "2:06:25", "throughput": 20008.99, "total_tokens": 93970240} +{"current_steps": 29870, "total_steps": 78105, "loss": 0.2836, "lr": 3.880448530517864e-06, "epoch": 1.9121695153959415, "percentage": 38.24, "elapsed_time": "1:18:17", "remaining_time": "2:06:24", "throughput": 20009.4, "total_tokens": 93985152} +{"current_steps": 29875, "total_steps": 78105, "loss": 0.5027, "lr": 3.879982733451878e-06, "epoch": 1.9124895973369183, "percentage": 38.25, "elapsed_time": "1:18:17", "remaining_time": "2:06:23", "throughput": 20009.82, "total_tokens": 94000832} +{"current_steps": 29880, "total_steps": 78105, "loss": 0.2764, "lr": 3.8795168674767425e-06, "epoch": 1.912809679277895, "percentage": 38.26, "elapsed_time": "1:18:18", "remaining_time": "2:06:23", "throughput": 20010.28, "total_tokens": 94016640} +{"current_steps": 29885, "total_steps": 78105, "loss": 0.3356, "lr": 3.87905093261572e-06, "epoch": 1.913129761218872, "percentage": 38.26, "elapsed_time": "1:18:19", "remaining_time": "2:06:22", "throughput": 20010.86, "total_tokens": 94033472} +{"current_steps": 29890, "total_steps": 78105, "loss": 0.3717, "lr": 3.878584928892077e-06, "epoch": 1.913449843159849, "percentage": 38.27, "elapsed_time": "1:18:19", "remaining_time": "2:06:21", "throughput": 20011.46, "total_tokens": 94050432} +{"current_steps": 29895, "total_steps": 78105, "loss": 0.3477, "lr": 3.878118856329084e-06, "epoch": 1.9137699251008258, "percentage": 38.28, "elapsed_time": "1:18:20", "remaining_time": "2:06:20", "throughput": 20011.87, "total_tokens": 94065536} +{"current_steps": 29900, "total_steps": 78105, "loss": 0.3671, "lr": 3.877652714950014e-06, "epoch": 1.9140900070418025, "percentage": 38.28, "elapsed_time": "1:18:21", "remaining_time": "2:06:19", "throughput": 20012.26, "total_tokens": 94080256} +{"current_steps": 29905, "total_steps": 78105, "loss": 0.2348, "lr": 3.877186504778143e-06, "epoch": 1.9144100889827795, "percentage": 38.29, "elapsed_time": "1:18:21", "remaining_time": "2:06:18", "throughput": 20012.69, "total_tokens": 94095424} +{"current_steps": 29910, "total_steps": 78105, "loss": 0.2735, "lr": 3.876720225836751e-06, "epoch": 1.9147301709237565, "percentage": 38.29, "elapsed_time": "1:18:22", "remaining_time": "2:06:17", "throughput": 20013.11, "total_tokens": 94110464} +{"current_steps": 29915, "total_steps": 78105, "loss": 0.2435, "lr": 3.876253878149122e-06, "epoch": 1.9150502528647335, "percentage": 38.3, "elapsed_time": "1:18:23", "remaining_time": "2:06:16", "throughput": 20013.63, "total_tokens": 94126976} +{"current_steps": 29920, "total_steps": 78105, "loss": 0.2175, "lr": 3.875787461738544e-06, "epoch": 1.9153703348057103, "percentage": 38.31, "elapsed_time": "1:18:23", "remaining_time": "2:06:15", "throughput": 20014.17, "total_tokens": 94143616} +{"current_steps": 29925, "total_steps": 78105, "loss": 0.2355, "lr": 3.875320976628305e-06, "epoch": 1.915690416746687, "percentage": 38.31, "elapsed_time": "1:18:24", "remaining_time": "2:06:14", "throughput": 20014.67, "total_tokens": 94159744} +{"current_steps": 29930, "total_steps": 78105, "loss": 0.3534, "lr": 3.874854422841701e-06, "epoch": 1.916010498687664, "percentage": 38.32, "elapsed_time": "1:18:25", "remaining_time": "2:06:13", "throughput": 20015.19, "total_tokens": 94176064} +{"current_steps": 29935, "total_steps": 78105, "loss": 0.3364, "lr": 3.874387800402028e-06, "epoch": 1.916330580628641, "percentage": 38.33, "elapsed_time": "1:18:25", "remaining_time": "2:06:12", "throughput": 20015.62, "total_tokens": 94191424} +{"current_steps": 29940, "total_steps": 78105, "loss": 0.3102, "lr": 3.873921109332587e-06, "epoch": 1.9166506625696178, "percentage": 38.33, "elapsed_time": "1:18:26", "remaining_time": "2:06:11", "throughput": 20016.12, "total_tokens": 94207552} +{"current_steps": 29945, "total_steps": 78105, "loss": 0.3507, "lr": 3.873454349656683e-06, "epoch": 1.9169707445105946, "percentage": 38.34, "elapsed_time": "1:18:27", "remaining_time": "2:06:10", "throughput": 20016.53, "total_tokens": 94222784} +{"current_steps": 29950, "total_steps": 78105, "loss": 0.3056, "lr": 3.8729875213976235e-06, "epoch": 1.9172908264515716, "percentage": 38.35, "elapsed_time": "1:18:27", "remaining_time": "2:06:09", "throughput": 20017.01, "total_tokens": 94238784} +{"current_steps": 29955, "total_steps": 78105, "loss": 0.2813, "lr": 3.8725206245787175e-06, "epoch": 1.9176109083925486, "percentage": 38.35, "elapsed_time": "1:18:28", "remaining_time": "2:06:08", "throughput": 20017.5, "total_tokens": 94254912} +{"current_steps": 29960, "total_steps": 78105, "loss": 0.3429, "lr": 3.872053659223281e-06, "epoch": 1.9179309903335255, "percentage": 38.36, "elapsed_time": "1:18:29", "remaining_time": "2:06:07", "throughput": 20018.0, "total_tokens": 94270912} +{"current_steps": 29965, "total_steps": 78105, "loss": 0.2434, "lr": 3.871586625354632e-06, "epoch": 1.9182510722745023, "percentage": 38.37, "elapsed_time": "1:18:30", "remaining_time": "2:06:06", "throughput": 20018.51, "total_tokens": 94287296} +{"current_steps": 29970, "total_steps": 78105, "loss": 0.3745, "lr": 3.871119522996091e-06, "epoch": 1.918571154215479, "percentage": 38.37, "elapsed_time": "1:18:30", "remaining_time": "2:06:05", "throughput": 20019.06, "total_tokens": 94303744} +{"current_steps": 29975, "total_steps": 78105, "loss": 0.3355, "lr": 3.870652352170983e-06, "epoch": 1.918891236156456, "percentage": 38.38, "elapsed_time": "1:18:31", "remaining_time": "2:06:04", "throughput": 20019.5, "total_tokens": 94319552} +{"current_steps": 29980, "total_steps": 78105, "loss": 0.3076, "lr": 3.870185112902636e-06, "epoch": 1.919211318097433, "percentage": 38.38, "elapsed_time": "1:18:32", "remaining_time": "2:06:03", "throughput": 20020.0, "total_tokens": 94335616} +{"current_steps": 29985, "total_steps": 78105, "loss": 0.2829, "lr": 3.869717805214381e-06, "epoch": 1.9195314000384098, "percentage": 38.39, "elapsed_time": "1:18:32", "remaining_time": "2:06:03", "throughput": 20020.42, "total_tokens": 94351168} +{"current_steps": 29990, "total_steps": 78105, "loss": 0.3977, "lr": 3.869250429129553e-06, "epoch": 1.9198514819793866, "percentage": 38.4, "elapsed_time": "1:18:33", "remaining_time": "2:06:02", "throughput": 20020.9, "total_tokens": 94367104} +{"current_steps": 29995, "total_steps": 78105, "loss": 0.2625, "lr": 3.8687829846714915e-06, "epoch": 1.9201715639203636, "percentage": 38.4, "elapsed_time": "1:18:34", "remaining_time": "2:06:01", "throughput": 20021.3, "total_tokens": 94382720} +{"current_steps": 30000, "total_steps": 78105, "loss": 0.3499, "lr": 3.868315471863537e-06, "epoch": 1.9204916458613406, "percentage": 38.41, "elapsed_time": "1:18:34", "remaining_time": "2:06:00", "throughput": 20021.77, "total_tokens": 94398528} +{"current_steps": 30005, "total_steps": 78105, "loss": 0.353, "lr": 3.8678478907290365e-06, "epoch": 1.9208117278023173, "percentage": 38.42, "elapsed_time": "1:18:35", "remaining_time": "2:05:59", "throughput": 20022.19, "total_tokens": 94413568} +{"current_steps": 30010, "total_steps": 78105, "loss": 0.295, "lr": 3.8673802412913355e-06, "epoch": 1.9211318097432943, "percentage": 38.42, "elapsed_time": "1:18:36", "remaining_time": "2:05:58", "throughput": 20022.67, "total_tokens": 94429376} +{"current_steps": 30015, "total_steps": 78105, "loss": 0.3622, "lr": 3.8669125235737894e-06, "epoch": 1.921451891684271, "percentage": 38.43, "elapsed_time": "1:18:36", "remaining_time": "2:05:57", "throughput": 20023.11, "total_tokens": 94444608} +{"current_steps": 30020, "total_steps": 78105, "loss": 0.2796, "lr": 3.86644473759975e-06, "epoch": 1.921771973625248, "percentage": 38.44, "elapsed_time": "1:18:37", "remaining_time": "2:05:56", "throughput": 20023.55, "total_tokens": 94460224} +{"current_steps": 30025, "total_steps": 78105, "loss": 0.3065, "lr": 3.8659768833925795e-06, "epoch": 1.922092055566225, "percentage": 38.44, "elapsed_time": "1:18:38", "remaining_time": "2:05:55", "throughput": 20023.99, "total_tokens": 94476032} +{"current_steps": 30030, "total_steps": 78105, "loss": 0.2967, "lr": 3.865508960975638e-06, "epoch": 1.9224121375072019, "percentage": 38.45, "elapsed_time": "1:18:38", "remaining_time": "2:05:54", "throughput": 20024.24, "total_tokens": 94491264} +{"current_steps": 30035, "total_steps": 78105, "loss": 0.4575, "lr": 3.865040970372291e-06, "epoch": 1.9227322194481786, "percentage": 38.45, "elapsed_time": "1:18:39", "remaining_time": "2:05:53", "throughput": 20024.62, "total_tokens": 94505920} +{"current_steps": 30040, "total_steps": 78105, "loss": 0.3527, "lr": 3.86457291160591e-06, "epoch": 1.9230523013891556, "percentage": 38.46, "elapsed_time": "1:18:40", "remaining_time": "2:05:52", "throughput": 20025.05, "total_tokens": 94521216} +{"current_steps": 30045, "total_steps": 78105, "loss": 0.4377, "lr": 3.864104784699864e-06, "epoch": 1.9233723833301326, "percentage": 38.47, "elapsed_time": "1:18:40", "remaining_time": "2:05:51", "throughput": 20025.44, "total_tokens": 94536064} +{"current_steps": 30050, "total_steps": 78105, "loss": 0.3294, "lr": 3.86363658967753e-06, "epoch": 1.9236924652711094, "percentage": 38.47, "elapsed_time": "1:18:41", "remaining_time": "2:05:50", "throughput": 20025.88, "total_tokens": 94551808} +{"current_steps": 30055, "total_steps": 78105, "loss": 0.2252, "lr": 3.863168326562289e-06, "epoch": 1.9240125472120861, "percentage": 38.48, "elapsed_time": "1:18:42", "remaining_time": "2:05:49", "throughput": 20026.29, "total_tokens": 94567104} +{"current_steps": 30060, "total_steps": 78105, "loss": 0.3768, "lr": 3.862699995377521e-06, "epoch": 1.9243326291530631, "percentage": 38.49, "elapsed_time": "1:18:42", "remaining_time": "2:05:48", "throughput": 20027.0, "total_tokens": 94585536} +{"current_steps": 30065, "total_steps": 78105, "loss": 0.296, "lr": 3.862231596146614e-06, "epoch": 1.9246527110940401, "percentage": 38.49, "elapsed_time": "1:18:43", "remaining_time": "2:05:47", "throughput": 20027.36, "total_tokens": 94600448} +{"current_steps": 30070, "total_steps": 78105, "loss": 0.42, "lr": 3.861763128892957e-06, "epoch": 1.9249727930350171, "percentage": 38.5, "elapsed_time": "1:18:44", "remaining_time": "2:05:46", "throughput": 20027.8, "total_tokens": 94615872} +{"current_steps": 30075, "total_steps": 78105, "loss": 0.4731, "lr": 3.861294593639942e-06, "epoch": 1.9252928749759939, "percentage": 38.51, "elapsed_time": "1:18:44", "remaining_time": "2:05:45", "throughput": 20028.31, "total_tokens": 94631744} +{"current_steps": 30080, "total_steps": 78105, "loss": 0.3117, "lr": 3.860825990410966e-06, "epoch": 1.9256129569169707, "percentage": 38.51, "elapsed_time": "1:18:45", "remaining_time": "2:05:44", "throughput": 20028.77, "total_tokens": 94647552} +{"current_steps": 30085, "total_steps": 78105, "loss": 0.2447, "lr": 3.860357319229428e-06, "epoch": 1.9259330388579476, "percentage": 38.52, "elapsed_time": "1:18:46", "remaining_time": "2:05:43", "throughput": 20029.32, "total_tokens": 94664384} +{"current_steps": 30090, "total_steps": 78105, "loss": 0.3645, "lr": 3.859888580118732e-06, "epoch": 1.9262531207989246, "percentage": 38.53, "elapsed_time": "1:18:46", "remaining_time": "2:05:42", "throughput": 20029.68, "total_tokens": 94679296} +{"current_steps": 30095, "total_steps": 78105, "loss": 0.303, "lr": 3.859419773102283e-06, "epoch": 1.9265732027399014, "percentage": 38.53, "elapsed_time": "1:18:47", "remaining_time": "2:05:41", "throughput": 20030.19, "total_tokens": 94695424} +{"current_steps": 30100, "total_steps": 78105, "loss": 0.3463, "lr": 3.858950898203491e-06, "epoch": 1.9268932846808782, "percentage": 38.54, "elapsed_time": "1:18:48", "remaining_time": "2:05:40", "throughput": 20030.7, "total_tokens": 94711872} +{"current_steps": 30105, "total_steps": 78105, "loss": 0.2347, "lr": 3.858481955445771e-06, "epoch": 1.9272133666218552, "percentage": 38.54, "elapsed_time": "1:18:49", "remaining_time": "2:05:40", "throughput": 20031.21, "total_tokens": 94728320} +{"current_steps": 30110, "total_steps": 78105, "loss": 0.4611, "lr": 3.8580129448525375e-06, "epoch": 1.9275334485628322, "percentage": 38.55, "elapsed_time": "1:18:49", "remaining_time": "2:05:39", "throughput": 20031.6, "total_tokens": 94743488} +{"current_steps": 30115, "total_steps": 78105, "loss": 0.2032, "lr": 3.857543866447211e-06, "epoch": 1.927853530503809, "percentage": 38.56, "elapsed_time": "1:18:50", "remaining_time": "2:05:38", "throughput": 20032.03, "total_tokens": 94758912} +{"current_steps": 30120, "total_steps": 78105, "loss": 0.267, "lr": 3.857074720253215e-06, "epoch": 1.928173612444786, "percentage": 38.56, "elapsed_time": "1:18:51", "remaining_time": "2:05:37", "throughput": 20032.47, "total_tokens": 94774720} +{"current_steps": 30125, "total_steps": 78105, "loss": 0.2482, "lr": 3.856605506293977e-06, "epoch": 1.9284936943857627, "percentage": 38.57, "elapsed_time": "1:18:51", "remaining_time": "2:05:36", "throughput": 20032.99, "total_tokens": 94791296} +{"current_steps": 30130, "total_steps": 78105, "loss": 0.4299, "lr": 3.856136224592926e-06, "epoch": 1.9288137763267397, "percentage": 38.58, "elapsed_time": "1:18:52", "remaining_time": "2:05:35", "throughput": 20033.56, "total_tokens": 94808320} +{"current_steps": 30135, "total_steps": 78105, "loss": 0.3139, "lr": 3.855666875173496e-06, "epoch": 1.9291338582677167, "percentage": 38.58, "elapsed_time": "1:18:53", "remaining_time": "2:05:34", "throughput": 20034.02, "total_tokens": 94824192} +{"current_steps": 30140, "total_steps": 78105, "loss": 0.3475, "lr": 3.855197458059122e-06, "epoch": 1.9294539402086934, "percentage": 38.59, "elapsed_time": "1:18:53", "remaining_time": "2:05:33", "throughput": 20034.51, "total_tokens": 94839808} +{"current_steps": 30145, "total_steps": 78105, "loss": 0.2858, "lr": 3.854727973273248e-06, "epoch": 1.9297740221496702, "percentage": 38.6, "elapsed_time": "1:18:54", "remaining_time": "2:05:32", "throughput": 20034.96, "total_tokens": 94855424} +{"current_steps": 30150, "total_steps": 78105, "loss": 0.2617, "lr": 3.854258420839315e-06, "epoch": 1.9300941040906472, "percentage": 38.6, "elapsed_time": "1:18:57", "remaining_time": "2:05:34", "throughput": 20026.69, "total_tokens": 94872000} +{"current_steps": 30155, "total_steps": 78105, "loss": 0.3139, "lr": 3.85378880078077e-06, "epoch": 1.9304141860316242, "percentage": 38.61, "elapsed_time": "1:18:57", "remaining_time": "2:05:33", "throughput": 20027.23, "total_tokens": 94888512} +{"current_steps": 30160, "total_steps": 78105, "loss": 0.3184, "lr": 3.853319113121064e-06, "epoch": 1.930734267972601, "percentage": 38.61, "elapsed_time": "1:18:58", "remaining_time": "2:05:33", "throughput": 20027.68, "total_tokens": 94904640} +{"current_steps": 30165, "total_steps": 78105, "loss": 0.3605, "lr": 3.85284935788365e-06, "epoch": 1.9310543499135777, "percentage": 38.62, "elapsed_time": "1:18:59", "remaining_time": "2:05:32", "throughput": 20028.19, "total_tokens": 94920384} +{"current_steps": 30170, "total_steps": 78105, "loss": 0.3403, "lr": 3.852379535091987e-06, "epoch": 1.9313744318545547, "percentage": 38.63, "elapsed_time": "1:18:59", "remaining_time": "2:05:31", "throughput": 20028.6, "total_tokens": 94935424} +{"current_steps": 30175, "total_steps": 78105, "loss": 0.4861, "lr": 3.851909644769534e-06, "epoch": 1.9316945137955317, "percentage": 38.63, "elapsed_time": "1:19:00", "remaining_time": "2:05:30", "throughput": 20029.02, "total_tokens": 94950656} +{"current_steps": 30180, "total_steps": 78105, "loss": 0.2875, "lr": 3.851439686939755e-06, "epoch": 1.9320145957365087, "percentage": 38.64, "elapsed_time": "1:19:01", "remaining_time": "2:05:29", "throughput": 20029.51, "total_tokens": 94966976} +{"current_steps": 30185, "total_steps": 78105, "loss": 0.273, "lr": 3.8509696616261175e-06, "epoch": 1.9323346776774855, "percentage": 38.65, "elapsed_time": "1:19:02", "remaining_time": "2:05:28", "throughput": 20029.96, "total_tokens": 94982336} +{"current_steps": 30190, "total_steps": 78105, "loss": 0.4421, "lr": 3.850499568852092e-06, "epoch": 1.9326547596184622, "percentage": 38.65, "elapsed_time": "1:19:02", "remaining_time": "2:05:27", "throughput": 20030.39, "total_tokens": 94997568} +{"current_steps": 30195, "total_steps": 78105, "loss": 0.384, "lr": 3.850029408641153e-06, "epoch": 1.9329748415594392, "percentage": 38.66, "elapsed_time": "1:19:03", "remaining_time": "2:05:26", "throughput": 20030.8, "total_tokens": 95012608} +{"current_steps": 30200, "total_steps": 78105, "loss": 0.423, "lr": 3.849559181016777e-06, "epoch": 1.9332949235004162, "percentage": 38.67, "elapsed_time": "1:19:04", "remaining_time": "2:05:25", "throughput": 20031.26, "total_tokens": 95028544} +{"current_steps": 30205, "total_steps": 78105, "loss": 0.3546, "lr": 3.849088886002445e-06, "epoch": 1.933615005441393, "percentage": 38.67, "elapsed_time": "1:19:04", "remaining_time": "2:05:24", "throughput": 20031.66, "total_tokens": 95043648} +{"current_steps": 30210, "total_steps": 78105, "loss": 0.4762, "lr": 3.848618523621642e-06, "epoch": 1.9339350873823697, "percentage": 38.68, "elapsed_time": "1:19:05", "remaining_time": "2:05:23", "throughput": 20032.06, "total_tokens": 95059136} +{"current_steps": 30215, "total_steps": 78105, "loss": 0.3033, "lr": 3.8481480938978536e-06, "epoch": 1.9342551693233467, "percentage": 38.69, "elapsed_time": "1:19:06", "remaining_time": "2:05:22", "throughput": 20032.54, "total_tokens": 95075456} +{"current_steps": 30220, "total_steps": 78105, "loss": 0.2361, "lr": 3.847677596854572e-06, "epoch": 1.9345752512643237, "percentage": 38.69, "elapsed_time": "1:19:06", "remaining_time": "2:05:21", "throughput": 20032.98, "total_tokens": 95090944} +{"current_steps": 30225, "total_steps": 78105, "loss": 0.4519, "lr": 3.84720703251529e-06, "epoch": 1.9348953332053007, "percentage": 38.7, "elapsed_time": "1:19:08", "remaining_time": "2:05:21", "throughput": 20030.46, "total_tokens": 95106560} +{"current_steps": 30230, "total_steps": 78105, "loss": 0.2382, "lr": 3.846736400903507e-06, "epoch": 1.9352154151462775, "percentage": 38.7, "elapsed_time": "1:19:09", "remaining_time": "2:05:21", "throughput": 20027.5, "total_tokens": 95122560} +{"current_steps": 30235, "total_steps": 78105, "loss": 0.2277, "lr": 3.846265702042724e-06, "epoch": 1.9355354970872543, "percentage": 38.71, "elapsed_time": "1:19:10", "remaining_time": "2:05:20", "throughput": 20027.87, "total_tokens": 95137344} +{"current_steps": 30240, "total_steps": 78105, "loss": 0.3356, "lr": 3.845794935956443e-06, "epoch": 1.9358555790282312, "percentage": 38.72, "elapsed_time": "1:19:10", "remaining_time": "2:05:19", "throughput": 20028.35, "total_tokens": 95153216} +{"current_steps": 30245, "total_steps": 78105, "loss": 0.2712, "lr": 3.845324102668173e-06, "epoch": 1.9361756609692082, "percentage": 38.72, "elapsed_time": "1:19:11", "remaining_time": "2:05:19", "throughput": 20028.79, "total_tokens": 95169088} +{"current_steps": 30250, "total_steps": 78105, "loss": 0.3282, "lr": 3.844853202201425e-06, "epoch": 1.936495742910185, "percentage": 38.73, "elapsed_time": "1:19:13", "remaining_time": "2:05:20", "throughput": 20023.49, "total_tokens": 95184256} +{"current_steps": 30255, "total_steps": 78105, "loss": 0.3786, "lr": 3.844382234579714e-06, "epoch": 1.9368158248511618, "percentage": 38.74, "elapsed_time": "1:19:14", "remaining_time": "2:05:19", "throughput": 20023.98, "total_tokens": 95200640} +{"current_steps": 30260, "total_steps": 78105, "loss": 0.2359, "lr": 3.843911199826556e-06, "epoch": 1.9371359067921388, "percentage": 38.74, "elapsed_time": "1:19:14", "remaining_time": "2:05:18", "throughput": 20024.32, "total_tokens": 95215232} +{"current_steps": 30265, "total_steps": 78105, "loss": 0.3188, "lr": 3.843440097965473e-06, "epoch": 1.9374559887331158, "percentage": 38.75, "elapsed_time": "1:19:15", "remaining_time": "2:05:17", "throughput": 20024.72, "total_tokens": 95230208} +{"current_steps": 30270, "total_steps": 78105, "loss": 0.4323, "lr": 3.84296892901999e-06, "epoch": 1.9377760706740925, "percentage": 38.76, "elapsed_time": "1:19:16", "remaining_time": "2:05:16", "throughput": 20025.1, "total_tokens": 95245056} +{"current_steps": 30275, "total_steps": 78105, "loss": 0.278, "lr": 3.842497693013632e-06, "epoch": 1.9380961526150695, "percentage": 38.76, "elapsed_time": "1:19:16", "remaining_time": "2:05:15", "throughput": 20025.55, "total_tokens": 95260992} +{"current_steps": 30280, "total_steps": 78105, "loss": 0.2226, "lr": 3.842026389969933e-06, "epoch": 1.9384162345560463, "percentage": 38.77, "elapsed_time": "1:19:17", "remaining_time": "2:05:14", "throughput": 20026.08, "total_tokens": 95277696} +{"current_steps": 30285, "total_steps": 78105, "loss": 0.2805, "lr": 3.841555019912427e-06, "epoch": 1.9387363164970233, "percentage": 38.77, "elapsed_time": "1:19:18", "remaining_time": "2:05:13", "throughput": 20026.45, "total_tokens": 95292352} +{"current_steps": 30290, "total_steps": 78105, "loss": 0.4224, "lr": 3.841083582864651e-06, "epoch": 1.9390563984380003, "percentage": 38.78, "elapsed_time": "1:19:19", "remaining_time": "2:05:12", "throughput": 20026.94, "total_tokens": 95308224} +{"current_steps": 30295, "total_steps": 78105, "loss": 0.2287, "lr": 3.840612078850146e-06, "epoch": 1.939376480378977, "percentage": 38.79, "elapsed_time": "1:19:19", "remaining_time": "2:05:11", "throughput": 20027.39, "total_tokens": 95323840} +{"current_steps": 30300, "total_steps": 78105, "loss": 0.3425, "lr": 3.840140507892457e-06, "epoch": 1.9396965623199538, "percentage": 38.79, "elapsed_time": "1:19:20", "remaining_time": "2:05:10", "throughput": 20027.76, "total_tokens": 95338624} +{"current_steps": 30305, "total_steps": 78105, "loss": 0.2687, "lr": 3.839668870015131e-06, "epoch": 1.9400166442609308, "percentage": 38.8, "elapsed_time": "1:19:20", "remaining_time": "2:05:09", "throughput": 20028.17, "total_tokens": 95353920} +{"current_steps": 30310, "total_steps": 78105, "loss": 0.2283, "lr": 3.8391971652417206e-06, "epoch": 1.9403367262019078, "percentage": 38.81, "elapsed_time": "1:19:21", "remaining_time": "2:05:08", "throughput": 20028.54, "total_tokens": 95368704} +{"current_steps": 30315, "total_steps": 78105, "loss": 0.2701, "lr": 3.838725393595779e-06, "epoch": 1.9406568081428845, "percentage": 38.81, "elapsed_time": "1:19:22", "remaining_time": "2:05:07", "throughput": 20028.93, "total_tokens": 95384192} +{"current_steps": 30320, "total_steps": 78105, "loss": 0.3183, "lr": 3.838253555100865e-06, "epoch": 1.9409768900838613, "percentage": 38.82, "elapsed_time": "1:19:22", "remaining_time": "2:05:06", "throughput": 20029.28, "total_tokens": 95399040} +{"current_steps": 30325, "total_steps": 78105, "loss": 0.353, "lr": 3.837781649780539e-06, "epoch": 1.9412969720248383, "percentage": 38.83, "elapsed_time": "1:19:23", "remaining_time": "2:05:05", "throughput": 20029.74, "total_tokens": 95414784} +{"current_steps": 30330, "total_steps": 78105, "loss": 0.3793, "lr": 3.837309677658365e-06, "epoch": 1.9416170539658153, "percentage": 38.83, "elapsed_time": "1:19:24", "remaining_time": "2:05:04", "throughput": 20030.26, "total_tokens": 95431552} +{"current_steps": 30335, "total_steps": 78105, "loss": 0.3414, "lr": 3.836837638757911e-06, "epoch": 1.9419371359067923, "percentage": 38.84, "elapsed_time": "1:19:25", "remaining_time": "2:05:03", "throughput": 20030.96, "total_tokens": 95450112} +{"current_steps": 30340, "total_steps": 78105, "loss": 0.4655, "lr": 3.83636553310275e-06, "epoch": 1.942257217847769, "percentage": 38.85, "elapsed_time": "1:19:25", "remaining_time": "2:05:02", "throughput": 20031.37, "total_tokens": 95465344} +{"current_steps": 30345, "total_steps": 78105, "loss": 0.269, "lr": 3.835893360716454e-06, "epoch": 1.9425772997887458, "percentage": 38.85, "elapsed_time": "1:19:26", "remaining_time": "2:05:01", "throughput": 20031.75, "total_tokens": 95480256} +{"current_steps": 30350, "total_steps": 78105, "loss": 0.2237, "lr": 3.835421121622603e-06, "epoch": 1.9428973817297228, "percentage": 38.86, "elapsed_time": "1:19:27", "remaining_time": "2:05:00", "throughput": 20032.2, "total_tokens": 95495744} +{"current_steps": 30355, "total_steps": 78105, "loss": 0.3931, "lr": 3.8349488158447765e-06, "epoch": 1.9432174636706998, "percentage": 38.86, "elapsed_time": "1:19:27", "remaining_time": "2:05:00", "throughput": 20032.74, "total_tokens": 95512704} +{"current_steps": 30360, "total_steps": 78105, "loss": 0.3496, "lr": 3.83447644340656e-06, "epoch": 1.9435375456116766, "percentage": 38.87, "elapsed_time": "1:19:28", "remaining_time": "2:04:59", "throughput": 20033.22, "total_tokens": 95528832} +{"current_steps": 30365, "total_steps": 78105, "loss": 0.3195, "lr": 3.834004004331541e-06, "epoch": 1.9438576275526533, "percentage": 38.88, "elapsed_time": "1:19:29", "remaining_time": "2:04:58", "throughput": 20033.55, "total_tokens": 95543360} +{"current_steps": 30370, "total_steps": 78105, "loss": 0.3097, "lr": 3.83353149864331e-06, "epoch": 1.9441777094936303, "percentage": 38.88, "elapsed_time": "1:19:29", "remaining_time": "2:04:57", "throughput": 20033.95, "total_tokens": 95558720} +{"current_steps": 30375, "total_steps": 78105, "loss": 0.3635, "lr": 3.833058926365463e-06, "epoch": 1.9444977914346073, "percentage": 38.89, "elapsed_time": "1:19:30", "remaining_time": "2:04:56", "throughput": 20034.4, "total_tokens": 95574592} +{"current_steps": 30380, "total_steps": 78105, "loss": 0.3275, "lr": 3.8325862875215956e-06, "epoch": 1.944817873375584, "percentage": 38.9, "elapsed_time": "1:19:31", "remaining_time": "2:04:55", "throughput": 20034.82, "total_tokens": 95589952} +{"current_steps": 30385, "total_steps": 78105, "loss": 0.3504, "lr": 3.8321135821353114e-06, "epoch": 1.945137955316561, "percentage": 38.9, "elapsed_time": "1:19:31", "remaining_time": "2:04:54", "throughput": 20035.35, "total_tokens": 95606784} +{"current_steps": 30390, "total_steps": 78105, "loss": 0.4305, "lr": 3.831640810230213e-06, "epoch": 1.9454580372575379, "percentage": 38.91, "elapsed_time": "1:19:32", "remaining_time": "2:04:53", "throughput": 20035.78, "total_tokens": 95622464} +{"current_steps": 30395, "total_steps": 78105, "loss": 0.2161, "lr": 3.831167971829909e-06, "epoch": 1.9457781191985148, "percentage": 38.92, "elapsed_time": "1:19:33", "remaining_time": "2:04:52", "throughput": 20036.34, "total_tokens": 95639744} +{"current_steps": 30400, "total_steps": 78105, "loss": 0.3016, "lr": 3.8306950669580105e-06, "epoch": 1.9460982011394918, "percentage": 38.92, "elapsed_time": "1:19:34", "remaining_time": "2:04:51", "throughput": 20036.84, "total_tokens": 95656320} +{"current_steps": 30405, "total_steps": 78105, "loss": 0.2999, "lr": 3.8302220956381305e-06, "epoch": 1.9464182830804686, "percentage": 38.93, "elapsed_time": "1:19:34", "remaining_time": "2:04:50", "throughput": 20037.23, "total_tokens": 95671552} +{"current_steps": 30410, "total_steps": 78105, "loss": 0.3643, "lr": 3.829749057893889e-06, "epoch": 1.9467383650214454, "percentage": 38.93, "elapsed_time": "1:19:35", "remaining_time": "2:04:49", "throughput": 20037.68, "total_tokens": 95687360} +{"current_steps": 30415, "total_steps": 78105, "loss": 0.3055, "lr": 3.829275953748906e-06, "epoch": 1.9470584469624224, "percentage": 38.94, "elapsed_time": "1:19:36", "remaining_time": "2:04:48", "throughput": 20038.11, "total_tokens": 95702848} +{"current_steps": 30420, "total_steps": 78105, "loss": 0.2595, "lr": 3.828802783226805e-06, "epoch": 1.9473785289033994, "percentage": 38.95, "elapsed_time": "1:19:36", "remaining_time": "2:04:47", "throughput": 20038.61, "total_tokens": 95719040} +{"current_steps": 30425, "total_steps": 78105, "loss": 0.3429, "lr": 3.828329546351216e-06, "epoch": 1.9476986108443761, "percentage": 38.95, "elapsed_time": "1:19:37", "remaining_time": "2:04:46", "throughput": 20039.1, "total_tokens": 95735360} +{"current_steps": 30430, "total_steps": 78105, "loss": 0.3531, "lr": 3.827856243145768e-06, "epoch": 1.948018692785353, "percentage": 38.96, "elapsed_time": "1:19:38", "remaining_time": "2:04:45", "throughput": 20039.6, "total_tokens": 95751616} +{"current_steps": 30435, "total_steps": 78105, "loss": 0.5013, "lr": 3.827382873634095e-06, "epoch": 1.9483387747263299, "percentage": 38.97, "elapsed_time": "1:19:38", "remaining_time": "2:04:44", "throughput": 20040.02, "total_tokens": 95767168} +{"current_steps": 30440, "total_steps": 78105, "loss": 0.372, "lr": 3.826909437839836e-06, "epoch": 1.9486588566673069, "percentage": 38.97, "elapsed_time": "1:19:39", "remaining_time": "2:04:44", "throughput": 20040.4, "total_tokens": 95782464} +{"current_steps": 30445, "total_steps": 78105, "loss": 0.399, "lr": 3.82643593578663e-06, "epoch": 1.9489789386082839, "percentage": 38.98, "elapsed_time": "1:19:40", "remaining_time": "2:04:43", "throughput": 20040.83, "total_tokens": 95798400} +{"current_steps": 30450, "total_steps": 78105, "loss": 0.3005, "lr": 3.825962367498124e-06, "epoch": 1.9492990205492606, "percentage": 38.99, "elapsed_time": "1:19:40", "remaining_time": "2:04:42", "throughput": 20041.33, "total_tokens": 95814912} +{"current_steps": 30455, "total_steps": 78105, "loss": 0.2929, "lr": 3.825488732997963e-06, "epoch": 1.9496191024902374, "percentage": 38.99, "elapsed_time": "1:19:41", "remaining_time": "2:04:41", "throughput": 20041.8, "total_tokens": 95831232} +{"current_steps": 30460, "total_steps": 78105, "loss": 0.3082, "lr": 3.8250150323098e-06, "epoch": 1.9499391844312144, "percentage": 39.0, "elapsed_time": "1:19:42", "remaining_time": "2:04:40", "throughput": 20042.26, "total_tokens": 95847232} +{"current_steps": 30465, "total_steps": 78105, "loss": 0.223, "lr": 3.824541265457287e-06, "epoch": 1.9502592663721914, "percentage": 39.01, "elapsed_time": "1:19:42", "remaining_time": "2:04:39", "throughput": 20042.72, "total_tokens": 95863040} +{"current_steps": 30470, "total_steps": 78105, "loss": 0.3478, "lr": 3.824067432464083e-06, "epoch": 1.9505793483131681, "percentage": 39.01, "elapsed_time": "1:19:43", "remaining_time": "2:04:38", "throughput": 20043.13, "total_tokens": 95879040} +{"current_steps": 30475, "total_steps": 78105, "loss": 0.3021, "lr": 3.823593533353849e-06, "epoch": 1.950899430254145, "percentage": 39.02, "elapsed_time": "1:19:44", "remaining_time": "2:04:37", "throughput": 20043.61, "total_tokens": 95895360} +{"current_steps": 30480, "total_steps": 78105, "loss": 0.3454, "lr": 3.823119568150247e-06, "epoch": 1.951219512195122, "percentage": 39.02, "elapsed_time": "1:19:45", "remaining_time": "2:04:36", "throughput": 20044.11, "total_tokens": 95911872} +{"current_steps": 30485, "total_steps": 78105, "loss": 0.4653, "lr": 3.822645536876946e-06, "epoch": 1.951539594136099, "percentage": 39.03, "elapsed_time": "1:19:45", "remaining_time": "2:04:35", "throughput": 20044.59, "total_tokens": 95928256} +{"current_steps": 30490, "total_steps": 78105, "loss": 0.3173, "lr": 3.822171439557617e-06, "epoch": 1.951859676077076, "percentage": 39.04, "elapsed_time": "1:19:46", "remaining_time": "2:04:34", "throughput": 20045.23, "total_tokens": 95946560} +{"current_steps": 30495, "total_steps": 78105, "loss": 0.3329, "lr": 3.8216972762159325e-06, "epoch": 1.9521797580180527, "percentage": 39.04, "elapsed_time": "1:19:47", "remaining_time": "2:04:34", "throughput": 20045.76, "total_tokens": 95963520} +{"current_steps": 30500, "total_steps": 78105, "loss": 0.2395, "lr": 3.821223046875571e-06, "epoch": 1.9524998399590294, "percentage": 39.05, "elapsed_time": "1:19:47", "remaining_time": "2:04:33", "throughput": 20046.21, "total_tokens": 95979200} +{"current_steps": 30505, "total_steps": 78105, "loss": 0.228, "lr": 3.8207487515602115e-06, "epoch": 1.9528199219000064, "percentage": 39.06, "elapsed_time": "1:19:48", "remaining_time": "2:04:32", "throughput": 20046.61, "total_tokens": 95994368} +{"current_steps": 30510, "total_steps": 78105, "loss": 0.3221, "lr": 3.82027439029354e-06, "epoch": 1.9531400038409834, "percentage": 39.06, "elapsed_time": "1:19:49", "remaining_time": "2:04:31", "throughput": 20047.27, "total_tokens": 96012992} +{"current_steps": 30515, "total_steps": 78105, "loss": 0.4123, "lr": 3.819799963099242e-06, "epoch": 1.9534600857819602, "percentage": 39.07, "elapsed_time": "1:19:49", "remaining_time": "2:04:30", "throughput": 20047.67, "total_tokens": 96028288} +{"current_steps": 30520, "total_steps": 78105, "loss": 0.4322, "lr": 3.819325470001008e-06, "epoch": 1.953780167722937, "percentage": 39.08, "elapsed_time": "1:19:50", "remaining_time": "2:04:29", "throughput": 20048.18, "total_tokens": 96044992} +{"current_steps": 30525, "total_steps": 78105, "loss": 0.291, "lr": 3.818850911022534e-06, "epoch": 1.954100249663914, "percentage": 39.08, "elapsed_time": "1:19:51", "remaining_time": "2:04:28", "throughput": 20048.59, "total_tokens": 96060544} +{"current_steps": 30530, "total_steps": 78105, "loss": 0.3012, "lr": 3.818376286187513e-06, "epoch": 1.954420331604891, "percentage": 39.09, "elapsed_time": "1:19:52", "remaining_time": "2:04:27", "throughput": 20049.01, "total_tokens": 96076416} +{"current_steps": 30535, "total_steps": 78105, "loss": 0.3354, "lr": 3.817901595519648e-06, "epoch": 1.9547404135458677, "percentage": 39.09, "elapsed_time": "1:19:52", "remaining_time": "2:04:26", "throughput": 20049.35, "total_tokens": 96091136} +{"current_steps": 30540, "total_steps": 78105, "loss": 0.2579, "lr": 3.817426839042643e-06, "epoch": 1.9550604954868447, "percentage": 39.1, "elapsed_time": "1:19:53", "remaining_time": "2:04:25", "throughput": 20049.73, "total_tokens": 96106304} +{"current_steps": 30545, "total_steps": 78105, "loss": 0.2388, "lr": 3.816952016780203e-06, "epoch": 1.9553805774278215, "percentage": 39.11, "elapsed_time": "1:19:54", "remaining_time": "2:04:24", "throughput": 20050.18, "total_tokens": 96122624} +{"current_steps": 30550, "total_steps": 78105, "loss": 0.3569, "lr": 3.8164771287560394e-06, "epoch": 1.9557006593687984, "percentage": 39.11, "elapsed_time": "1:19:54", "remaining_time": "2:04:23", "throughput": 20050.64, "total_tokens": 96138688} +{"current_steps": 30555, "total_steps": 78105, "loss": 0.3053, "lr": 3.816002174993865e-06, "epoch": 1.9560207413097754, "percentage": 39.12, "elapsed_time": "1:19:55", "remaining_time": "2:04:22", "throughput": 20051.08, "total_tokens": 96154560} +{"current_steps": 30560, "total_steps": 78105, "loss": 0.2488, "lr": 3.815527155517397e-06, "epoch": 1.9563408232507522, "percentage": 39.13, "elapsed_time": "1:19:56", "remaining_time": "2:04:21", "throughput": 20051.52, "total_tokens": 96170624} +{"current_steps": 30565, "total_steps": 78105, "loss": 0.4322, "lr": 3.815052070350355e-06, "epoch": 1.956660905191729, "percentage": 39.13, "elapsed_time": "1:19:56", "remaining_time": "2:04:20", "throughput": 20051.94, "total_tokens": 96185984} +{"current_steps": 30570, "total_steps": 78105, "loss": 0.3333, "lr": 3.8145769195164628e-06, "epoch": 1.956980987132706, "percentage": 39.14, "elapsed_time": "1:19:57", "remaining_time": "2:04:19", "throughput": 20052.44, "total_tokens": 96202368} +{"current_steps": 30575, "total_steps": 78105, "loss": 0.2573, "lr": 3.8141017030394454e-06, "epoch": 1.957301069073683, "percentage": 39.15, "elapsed_time": "1:19:58", "remaining_time": "2:04:19", "throughput": 20052.94, "total_tokens": 96218816} +{"current_steps": 30580, "total_steps": 78105, "loss": 0.3146, "lr": 3.813626420943035e-06, "epoch": 1.9576211510146597, "percentage": 39.15, "elapsed_time": "1:19:58", "remaining_time": "2:04:18", "throughput": 20053.28, "total_tokens": 96233472} +{"current_steps": 30585, "total_steps": 78105, "loss": 0.3778, "lr": 3.8131510732509636e-06, "epoch": 1.9579412329556365, "percentage": 39.16, "elapsed_time": "1:19:59", "remaining_time": "2:04:17", "throughput": 20053.62, "total_tokens": 96248128} +{"current_steps": 30590, "total_steps": 78105, "loss": 0.3476, "lr": 3.8126756599869673e-06, "epoch": 1.9582613148966135, "percentage": 39.17, "elapsed_time": "1:20:00", "remaining_time": "2:04:16", "throughput": 20053.99, "total_tokens": 96263296} +{"current_steps": 30595, "total_steps": 78105, "loss": 0.2856, "lr": 3.812200181174786e-06, "epoch": 1.9585813968375905, "percentage": 39.17, "elapsed_time": "1:20:00", "remaining_time": "2:04:15", "throughput": 20054.38, "total_tokens": 96278400} +{"current_steps": 30600, "total_steps": 78105, "loss": 0.3761, "lr": 3.8117246368381627e-06, "epoch": 1.9589014787785675, "percentage": 39.18, "elapsed_time": "1:20:01", "remaining_time": "2:04:14", "throughput": 20054.8, "total_tokens": 96294080} +{"current_steps": 30605, "total_steps": 78105, "loss": 0.271, "lr": 3.811249027000844e-06, "epoch": 1.9592215607195442, "percentage": 39.18, "elapsed_time": "1:20:02", "remaining_time": "2:04:13", "throughput": 20055.11, "total_tokens": 96308416} +{"current_steps": 30610, "total_steps": 78105, "loss": 0.2707, "lr": 3.8107733516865787e-06, "epoch": 1.959541642660521, "percentage": 39.19, "elapsed_time": "1:20:02", "remaining_time": "2:04:12", "throughput": 20055.57, "total_tokens": 96324672} +{"current_steps": 30615, "total_steps": 78105, "loss": 0.5187, "lr": 3.810297610919119e-06, "epoch": 1.959861724601498, "percentage": 39.2, "elapsed_time": "1:20:03", "remaining_time": "2:04:11", "throughput": 20056.0, "total_tokens": 96340416} +{"current_steps": 30620, "total_steps": 78105, "loss": 0.3842, "lr": 3.8098218047222225e-06, "epoch": 1.960181806542475, "percentage": 39.2, "elapsed_time": "1:20:04", "remaining_time": "2:04:10", "throughput": 20056.4, "total_tokens": 96355584} +{"current_steps": 30625, "total_steps": 78105, "loss": 0.4289, "lr": 3.8093459331196474e-06, "epoch": 1.9605018884834517, "percentage": 39.21, "elapsed_time": "1:20:04", "remaining_time": "2:04:09", "throughput": 20056.79, "total_tokens": 96370752} +{"current_steps": 30630, "total_steps": 78105, "loss": 0.3577, "lr": 3.8088699961351573e-06, "epoch": 1.9608219704244285, "percentage": 39.22, "elapsed_time": "1:20:05", "remaining_time": "2:04:08", "throughput": 20057.32, "total_tokens": 96387136} +{"current_steps": 30635, "total_steps": 78105, "loss": 0.3082, "lr": 3.8083939937925157e-06, "epoch": 1.9611420523654055, "percentage": 39.22, "elapsed_time": "1:20:06", "remaining_time": "2:04:07", "throughput": 20057.73, "total_tokens": 96402560} +{"current_steps": 30640, "total_steps": 78105, "loss": 0.3179, "lr": 3.8079179261154942e-06, "epoch": 1.9614621343063825, "percentage": 39.23, "elapsed_time": "1:20:06", "remaining_time": "2:04:06", "throughput": 20058.32, "total_tokens": 96420288} +{"current_steps": 30645, "total_steps": 78105, "loss": 0.2698, "lr": 3.807441793127864e-06, "epoch": 1.9617822162473595, "percentage": 39.24, "elapsed_time": "1:20:07", "remaining_time": "2:04:05", "throughput": 20058.83, "total_tokens": 96436800} +{"current_steps": 30650, "total_steps": 78105, "loss": 0.2348, "lr": 3.8069655948534006e-06, "epoch": 1.9621022981883363, "percentage": 39.24, "elapsed_time": "1:20:08", "remaining_time": "2:04:04", "throughput": 20059.24, "total_tokens": 96451968} +{"current_steps": 30655, "total_steps": 78105, "loss": 0.4277, "lr": 3.8064893313158834e-06, "epoch": 1.962422380129313, "percentage": 39.25, "elapsed_time": "1:20:09", "remaining_time": "2:04:03", "throughput": 20059.69, "total_tokens": 96467840} +{"current_steps": 30660, "total_steps": 78105, "loss": 0.5217, "lr": 3.8060130025390946e-06, "epoch": 1.96274246207029, "percentage": 39.25, "elapsed_time": "1:20:09", "remaining_time": "2:04:02", "throughput": 20060.15, "total_tokens": 96483392} +{"current_steps": 30665, "total_steps": 78105, "loss": 0.2882, "lr": 3.805536608546819e-06, "epoch": 1.963062544011267, "percentage": 39.26, "elapsed_time": "1:20:10", "remaining_time": "2:04:01", "throughput": 20060.67, "total_tokens": 96500032} +{"current_steps": 30670, "total_steps": 78105, "loss": 0.5469, "lr": 3.805060149362846e-06, "epoch": 1.9633826259522438, "percentage": 39.27, "elapsed_time": "1:20:11", "remaining_time": "2:04:00", "throughput": 20061.13, "total_tokens": 96515904} +{"current_steps": 30675, "total_steps": 78105, "loss": 0.2334, "lr": 3.804583625010966e-06, "epoch": 1.9637027078932205, "percentage": 39.27, "elapsed_time": "1:20:11", "remaining_time": "2:03:59", "throughput": 20061.53, "total_tokens": 96531264} +{"current_steps": 30680, "total_steps": 78105, "loss": 0.3117, "lr": 3.804107035514975e-06, "epoch": 1.9640227898341975, "percentage": 39.28, "elapsed_time": "1:20:12", "remaining_time": "2:03:59", "throughput": 20061.94, "total_tokens": 96546816} +{"current_steps": 30685, "total_steps": 78105, "loss": 0.3487, "lr": 3.803630380898672e-06, "epoch": 1.9643428717751745, "percentage": 39.29, "elapsed_time": "1:20:13", "remaining_time": "2:03:58", "throughput": 20062.3, "total_tokens": 96561408} +{"current_steps": 30690, "total_steps": 78105, "loss": 0.3256, "lr": 3.803153661185858e-06, "epoch": 1.9646629537161513, "percentage": 39.29, "elapsed_time": "1:20:13", "remaining_time": "2:03:57", "throughput": 20062.78, "total_tokens": 96577152} +{"current_steps": 30695, "total_steps": 78105, "loss": 0.3654, "lr": 3.802676876400338e-06, "epoch": 1.964983035657128, "percentage": 39.3, "elapsed_time": "1:20:14", "remaining_time": "2:03:56", "throughput": 20063.23, "total_tokens": 96592960} +{"current_steps": 30700, "total_steps": 78105, "loss": 0.5017, "lr": 3.802200026565921e-06, "epoch": 1.965303117598105, "percentage": 39.31, "elapsed_time": "1:20:15", "remaining_time": "2:03:55", "throughput": 20063.62, "total_tokens": 96608192} +{"current_steps": 30705, "total_steps": 78105, "loss": 0.284, "lr": 3.8017231117064166e-06, "epoch": 1.965623199539082, "percentage": 39.31, "elapsed_time": "1:20:15", "remaining_time": "2:03:54", "throughput": 20064.08, "total_tokens": 96623680} +{"current_steps": 30710, "total_steps": 78105, "loss": 0.3915, "lr": 3.8012461318456407e-06, "epoch": 1.965943281480059, "percentage": 39.32, "elapsed_time": "1:20:16", "remaining_time": "2:03:53", "throughput": 20064.48, "total_tokens": 96638912} +{"current_steps": 30715, "total_steps": 78105, "loss": 0.3317, "lr": 3.800769087007411e-06, "epoch": 1.9662633634210358, "percentage": 39.33, "elapsed_time": "1:20:17", "remaining_time": "2:03:52", "throughput": 20065.02, "total_tokens": 96655744} +{"current_steps": 30720, "total_steps": 78105, "loss": 0.2268, "lr": 3.8002919772155477e-06, "epoch": 1.9665834453620126, "percentage": 39.33, "elapsed_time": "1:20:17", "remaining_time": "2:03:51", "throughput": 20065.47, "total_tokens": 96671616} +{"current_steps": 30725, "total_steps": 78105, "loss": 0.3062, "lr": 3.7998148024938775e-06, "epoch": 1.9669035273029896, "percentage": 39.34, "elapsed_time": "1:20:18", "remaining_time": "2:03:50", "throughput": 20065.86, "total_tokens": 96686976} +{"current_steps": 30730, "total_steps": 78105, "loss": 0.3562, "lr": 3.799337562866226e-06, "epoch": 1.9672236092439666, "percentage": 39.34, "elapsed_time": "1:20:19", "remaining_time": "2:03:49", "throughput": 20066.39, "total_tokens": 96703744} +{"current_steps": 30735, "total_steps": 78105, "loss": 0.4155, "lr": 3.7988602583564236e-06, "epoch": 1.9675436911849433, "percentage": 39.35, "elapsed_time": "1:20:19", "remaining_time": "2:03:48", "throughput": 20066.81, "total_tokens": 96719488} +{"current_steps": 30740, "total_steps": 78105, "loss": 0.3041, "lr": 3.7983828889883067e-06, "epoch": 1.96786377312592, "percentage": 39.36, "elapsed_time": "1:20:20", "remaining_time": "2:03:47", "throughput": 20067.37, "total_tokens": 96736448} +{"current_steps": 30745, "total_steps": 78105, "loss": 0.3438, "lr": 3.797905454785711e-06, "epoch": 1.968183855066897, "percentage": 39.36, "elapsed_time": "1:20:21", "remaining_time": "2:03:46", "throughput": 20067.85, "total_tokens": 96752960} +{"current_steps": 30750, "total_steps": 78105, "loss": 0.4202, "lr": 3.797427955772477e-06, "epoch": 1.968503937007874, "percentage": 39.37, "elapsed_time": "1:20:21", "remaining_time": "2:03:45", "throughput": 20068.22, "total_tokens": 96768192} +{"current_steps": 30755, "total_steps": 78105, "loss": 0.2469, "lr": 3.7969503919724493e-06, "epoch": 1.968824018948851, "percentage": 39.38, "elapsed_time": "1:20:22", "remaining_time": "2:03:44", "throughput": 20068.6, "total_tokens": 96783296} +{"current_steps": 30760, "total_steps": 78105, "loss": 0.3581, "lr": 3.796472763409475e-06, "epoch": 1.9691441008898278, "percentage": 39.38, "elapsed_time": "1:20:23", "remaining_time": "2:03:43", "throughput": 20069.05, "total_tokens": 96798976} +{"current_steps": 30765, "total_steps": 78105, "loss": 0.2204, "lr": 3.795995070107403e-06, "epoch": 1.9694641828308046, "percentage": 39.39, "elapsed_time": "1:20:23", "remaining_time": "2:03:42", "throughput": 20069.46, "total_tokens": 96814464} +{"current_steps": 30770, "total_steps": 78105, "loss": 0.2773, "lr": 3.795517312090089e-06, "epoch": 1.9697842647717816, "percentage": 39.4, "elapsed_time": "1:20:24", "remaining_time": "2:03:41", "throughput": 20069.82, "total_tokens": 96829120} +{"current_steps": 30775, "total_steps": 78105, "loss": 0.3547, "lr": 3.795039489381388e-06, "epoch": 1.9701043467127586, "percentage": 39.4, "elapsed_time": "1:20:25", "remaining_time": "2:03:40", "throughput": 20070.27, "total_tokens": 96844672} +{"current_steps": 30780, "total_steps": 78105, "loss": 0.3898, "lr": 3.79456160200516e-06, "epoch": 1.9704244286537353, "percentage": 39.41, "elapsed_time": "1:20:25", "remaining_time": "2:03:40", "throughput": 20070.73, "total_tokens": 96860544} +{"current_steps": 30785, "total_steps": 78105, "loss": 0.4881, "lr": 3.7940836499852697e-06, "epoch": 1.9707445105947121, "percentage": 39.41, "elapsed_time": "1:20:26", "remaining_time": "2:03:39", "throughput": 20071.15, "total_tokens": 96875968} +{"current_steps": 30790, "total_steps": 78105, "loss": 0.4323, "lr": 3.793605633345582e-06, "epoch": 1.971064592535689, "percentage": 39.42, "elapsed_time": "1:20:27", "remaining_time": "2:03:38", "throughput": 20071.62, "total_tokens": 96891904} +{"current_steps": 30795, "total_steps": 78105, "loss": 0.2951, "lr": 3.793127552109966e-06, "epoch": 1.971384674476666, "percentage": 39.43, "elapsed_time": "1:20:27", "remaining_time": "2:03:37", "throughput": 20072.05, "total_tokens": 96907840} +{"current_steps": 30800, "total_steps": 78105, "loss": 0.5639, "lr": 3.7926494063022978e-06, "epoch": 1.9717047564176429, "percentage": 39.43, "elapsed_time": "1:20:28", "remaining_time": "2:03:36", "throughput": 20072.45, "total_tokens": 96923072} +{"current_steps": 30805, "total_steps": 78105, "loss": 0.441, "lr": 3.79217119594645e-06, "epoch": 1.9720248383586199, "percentage": 39.44, "elapsed_time": "1:20:29", "remaining_time": "2:03:35", "throughput": 20072.83, "total_tokens": 96937792} +{"current_steps": 30810, "total_steps": 78105, "loss": 0.2818, "lr": 3.7916929210663037e-06, "epoch": 1.9723449202995966, "percentage": 39.45, "elapsed_time": "1:20:29", "remaining_time": "2:03:34", "throughput": 20073.21, "total_tokens": 96952960} +{"current_steps": 30815, "total_steps": 78105, "loss": 0.3657, "lr": 3.7912145816857414e-06, "epoch": 1.9726650022405736, "percentage": 39.45, "elapsed_time": "1:20:30", "remaining_time": "2:03:33", "throughput": 20073.72, "total_tokens": 96969792} +{"current_steps": 30820, "total_steps": 78105, "loss": 0.269, "lr": 3.790736177828648e-06, "epoch": 1.9729850841815506, "percentage": 39.46, "elapsed_time": "1:20:31", "remaining_time": "2:03:32", "throughput": 20074.3, "total_tokens": 96987072} +{"current_steps": 30825, "total_steps": 78105, "loss": 0.2769, "lr": 3.7902577095189137e-06, "epoch": 1.9733051661225274, "percentage": 39.47, "elapsed_time": "1:20:32", "remaining_time": "2:03:31", "throughput": 20074.83, "total_tokens": 97003968} +{"current_steps": 30830, "total_steps": 78105, "loss": 0.213, "lr": 3.78977917678043e-06, "epoch": 1.9736252480635041, "percentage": 39.47, "elapsed_time": "1:20:32", "remaining_time": "2:03:30", "throughput": 20075.25, "total_tokens": 97019456} +{"current_steps": 30835, "total_steps": 78105, "loss": 0.2732, "lr": 3.7893005796370924e-06, "epoch": 1.9739453300044811, "percentage": 39.48, "elapsed_time": "1:20:33", "remaining_time": "2:03:29", "throughput": 20075.64, "total_tokens": 97034432} +{"current_steps": 30840, "total_steps": 78105, "loss": 0.2402, "lr": 3.7888219181127995e-06, "epoch": 1.9742654119454581, "percentage": 39.49, "elapsed_time": "1:20:34", "remaining_time": "2:03:28", "throughput": 20076.17, "total_tokens": 97051392} +{"current_steps": 30845, "total_steps": 78105, "loss": 0.3779, "lr": 3.7883431922314532e-06, "epoch": 1.974585493886435, "percentage": 39.49, "elapsed_time": "1:20:34", "remaining_time": "2:03:27", "throughput": 20076.64, "total_tokens": 97067648} +{"current_steps": 30850, "total_steps": 78105, "loss": 0.4301, "lr": 3.7878644020169585e-06, "epoch": 1.9749055758274117, "percentage": 39.5, "elapsed_time": "1:20:35", "remaining_time": "2:03:26", "throughput": 20077.03, "total_tokens": 97082624} +{"current_steps": 30855, "total_steps": 78105, "loss": 0.423, "lr": 3.787385547493224e-06, "epoch": 1.9752256577683887, "percentage": 39.5, "elapsed_time": "1:20:36", "remaining_time": "2:03:25", "throughput": 20077.45, "total_tokens": 97098240} +{"current_steps": 30860, "total_steps": 78105, "loss": 0.4389, "lr": 3.7869066286841612e-06, "epoch": 1.9755457397093656, "percentage": 39.51, "elapsed_time": "1:20:36", "remaining_time": "2:03:25", "throughput": 20078.05, "total_tokens": 97115584} +{"current_steps": 30865, "total_steps": 78105, "loss": 0.2542, "lr": 3.786427645613684e-06, "epoch": 1.9758658216503426, "percentage": 39.52, "elapsed_time": "1:20:37", "remaining_time": "2:03:24", "throughput": 20078.45, "total_tokens": 97130432} +{"current_steps": 30870, "total_steps": 78105, "loss": 0.3285, "lr": 3.785948598305711e-06, "epoch": 1.9761859035913194, "percentage": 39.52, "elapsed_time": "1:20:38", "remaining_time": "2:03:23", "throughput": 20078.87, "total_tokens": 97145728} +{"current_steps": 30875, "total_steps": 78105, "loss": 0.3806, "lr": 3.7854694867841637e-06, "epoch": 1.9765059855322962, "percentage": 39.53, "elapsed_time": "1:20:38", "remaining_time": "2:03:22", "throughput": 20079.28, "total_tokens": 97161152} +{"current_steps": 30880, "total_steps": 78105, "loss": 0.2684, "lr": 3.784990311072966e-06, "epoch": 1.9768260674732732, "percentage": 39.54, "elapsed_time": "1:20:39", "remaining_time": "2:03:21", "throughput": 20079.66, "total_tokens": 97176128} +{"current_steps": 30885, "total_steps": 78105, "loss": 0.3563, "lr": 3.784511071196045e-06, "epoch": 1.9771461494142502, "percentage": 39.54, "elapsed_time": "1:20:40", "remaining_time": "2:03:20", "throughput": 20080.01, "total_tokens": 97190912} +{"current_steps": 30890, "total_steps": 78105, "loss": 0.2885, "lr": 3.784031767177332e-06, "epoch": 1.977466231355227, "percentage": 39.55, "elapsed_time": "1:20:40", "remaining_time": "2:03:19", "throughput": 20080.57, "total_tokens": 97208128} +{"current_steps": 30895, "total_steps": 78105, "loss": 0.3819, "lr": 3.783552399040761e-06, "epoch": 1.9777863132962037, "percentage": 39.56, "elapsed_time": "1:20:41", "remaining_time": "2:03:18", "throughput": 20080.97, "total_tokens": 97223744} +{"current_steps": 30900, "total_steps": 78105, "loss": 0.3393, "lr": 3.7830729668102694e-06, "epoch": 1.9781063952371807, "percentage": 39.56, "elapsed_time": "1:20:42", "remaining_time": "2:03:17", "throughput": 20081.44, "total_tokens": 97239872} +{"current_steps": 30905, "total_steps": 78105, "loss": 0.5342, "lr": 3.782593470509796e-06, "epoch": 1.9784264771781577, "percentage": 39.57, "elapsed_time": "1:20:42", "remaining_time": "2:03:16", "throughput": 20081.91, "total_tokens": 97255936} +{"current_steps": 30910, "total_steps": 78105, "loss": 0.2545, "lr": 3.7821139101632864e-06, "epoch": 1.9787465591191347, "percentage": 39.57, "elapsed_time": "1:20:43", "remaining_time": "2:03:15", "throughput": 20082.33, "total_tokens": 97271424} +{"current_steps": 30915, "total_steps": 78105, "loss": 0.4111, "lr": 3.7816342857946864e-06, "epoch": 1.9790666410601114, "percentage": 39.58, "elapsed_time": "1:20:44", "remaining_time": "2:03:14", "throughput": 20082.69, "total_tokens": 97286144} +{"current_steps": 30920, "total_steps": 78105, "loss": 0.4032, "lr": 3.7811545974279454e-06, "epoch": 1.9793867230010882, "percentage": 39.59, "elapsed_time": "1:20:44", "remaining_time": "2:03:13", "throughput": 20082.97, "total_tokens": 97300160} +{"current_steps": 30925, "total_steps": 78105, "loss": 0.3373, "lr": 3.780674845087017e-06, "epoch": 1.9797068049420652, "percentage": 39.59, "elapsed_time": "1:20:45", "remaining_time": "2:03:12", "throughput": 20083.31, "total_tokens": 97314816} +{"current_steps": 30930, "total_steps": 78105, "loss": 0.3002, "lr": 3.780195028795858e-06, "epoch": 1.9800268868830422, "percentage": 39.6, "elapsed_time": "1:20:46", "remaining_time": "2:03:11", "throughput": 20083.71, "total_tokens": 97329984} +{"current_steps": 30935, "total_steps": 78105, "loss": 0.311, "lr": 3.7797151485784277e-06, "epoch": 1.980346968824019, "percentage": 39.61, "elapsed_time": "1:20:46", "remaining_time": "2:03:10", "throughput": 20084.15, "total_tokens": 97345792} +{"current_steps": 30940, "total_steps": 78105, "loss": 0.322, "lr": 3.779235204458689e-06, "epoch": 1.9806670507649957, "percentage": 39.61, "elapsed_time": "1:20:47", "remaining_time": "2:03:09", "throughput": 20084.57, "total_tokens": 97361536} +{"current_steps": 30945, "total_steps": 78105, "loss": 0.2427, "lr": 3.7787551964606066e-06, "epoch": 1.9809871327059727, "percentage": 39.62, "elapsed_time": "1:20:48", "remaining_time": "2:03:08", "throughput": 20085.02, "total_tokens": 97377472} +{"current_steps": 30950, "total_steps": 78105, "loss": 0.4856, "lr": 3.7782751246081513e-06, "epoch": 1.9813072146469497, "percentage": 39.63, "elapsed_time": "1:20:48", "remaining_time": "2:03:07", "throughput": 20085.64, "total_tokens": 97395200} +{"current_steps": 30955, "total_steps": 78105, "loss": 0.3891, "lr": 3.7777949889252942e-06, "epoch": 1.9816272965879265, "percentage": 39.63, "elapsed_time": "1:20:49", "remaining_time": "2:03:06", "throughput": 20086.04, "total_tokens": 97410560} +{"current_steps": 30960, "total_steps": 78105, "loss": 0.2833, "lr": 3.7773147894360106e-06, "epoch": 1.9819473785289032, "percentage": 39.64, "elapsed_time": "1:20:50", "remaining_time": "2:03:05", "throughput": 20086.35, "total_tokens": 97424960} +{"current_steps": 30965, "total_steps": 78105, "loss": 0.3072, "lr": 3.7768345261642804e-06, "epoch": 1.9822674604698802, "percentage": 39.65, "elapsed_time": "1:20:50", "remaining_time": "2:03:04", "throughput": 20086.78, "total_tokens": 97440768} +{"current_steps": 30970, "total_steps": 78105, "loss": 0.3888, "lr": 3.776354199134085e-06, "epoch": 1.9825875424108572, "percentage": 39.65, "elapsed_time": "1:20:51", "remaining_time": "2:03:04", "throughput": 20087.25, "total_tokens": 97456960} +{"current_steps": 30975, "total_steps": 78105, "loss": 0.2775, "lr": 3.7758738083694084e-06, "epoch": 1.9829076243518342, "percentage": 39.66, "elapsed_time": "1:20:52", "remaining_time": "2:03:03", "throughput": 20087.59, "total_tokens": 97471424} +{"current_steps": 30980, "total_steps": 78105, "loss": 0.3476, "lr": 3.77539335389424e-06, "epoch": 1.983227706292811, "percentage": 39.66, "elapsed_time": "1:20:52", "remaining_time": "2:03:02", "throughput": 20087.99, "total_tokens": 97486784} +{"current_steps": 30985, "total_steps": 78105, "loss": 0.2606, "lr": 3.7749128357325706e-06, "epoch": 1.9835477882337877, "percentage": 39.67, "elapsed_time": "1:20:53", "remaining_time": "2:03:01", "throughput": 20088.46, "total_tokens": 97503104} +{"current_steps": 30990, "total_steps": 78105, "loss": 0.4286, "lr": 3.7744322539083956e-06, "epoch": 1.9838678701747647, "percentage": 39.68, "elapsed_time": "1:20:54", "remaining_time": "2:03:00", "throughput": 20088.92, "total_tokens": 97519296} +{"current_steps": 30995, "total_steps": 78105, "loss": 0.3691, "lr": 3.7739516084457104e-06, "epoch": 1.9841879521157417, "percentage": 39.68, "elapsed_time": "1:20:55", "remaining_time": "2:02:59", "throughput": 20089.34, "total_tokens": 97534656} +{"current_steps": 31000, "total_steps": 78105, "loss": 0.3158, "lr": 3.7734708993685194e-06, "epoch": 1.9845080340567185, "percentage": 39.69, "elapsed_time": "1:20:56", "remaining_time": "2:02:59", "throughput": 20090.54, "total_tokens": 97564672} +{"current_steps": 31005, "total_steps": 78105, "loss": 0.3386, "lr": 3.7729901267008246e-06, "epoch": 1.9848281159976953, "percentage": 39.7, "elapsed_time": "1:20:56", "remaining_time": "2:02:58", "throughput": 20090.96, "total_tokens": 97579904} +{"current_steps": 31010, "total_steps": 78105, "loss": 0.2765, "lr": 3.7725092904666337e-06, "epoch": 1.9851481979386723, "percentage": 39.7, "elapsed_time": "1:20:57", "remaining_time": "2:02:57", "throughput": 20091.37, "total_tokens": 97595520} +{"current_steps": 31015, "total_steps": 78105, "loss": 0.2711, "lr": 3.772028390689957e-06, "epoch": 1.9854682798796492, "percentage": 39.71, "elapsed_time": "1:20:58", "remaining_time": "2:02:56", "throughput": 20091.82, "total_tokens": 97611392} +{"current_steps": 31020, "total_steps": 78105, "loss": 0.4288, "lr": 3.771547427394807e-06, "epoch": 1.9857883618206262, "percentage": 39.72, "elapsed_time": "1:20:58", "remaining_time": "2:02:55", "throughput": 20092.28, "total_tokens": 97627328} +{"current_steps": 31025, "total_steps": 78105, "loss": 0.3723, "lr": 3.771066400605203e-06, "epoch": 1.986108443761603, "percentage": 39.72, "elapsed_time": "1:20:59", "remaining_time": "2:02:54", "throughput": 20092.63, "total_tokens": 97641984} +{"current_steps": 31030, "total_steps": 78105, "loss": 0.3042, "lr": 3.770585310345164e-06, "epoch": 1.9864285257025798, "percentage": 39.73, "elapsed_time": "1:21:00", "remaining_time": "2:02:53", "throughput": 20093.07, "total_tokens": 97657856} +{"current_steps": 31035, "total_steps": 78105, "loss": 0.2281, "lr": 3.7701041566387123e-06, "epoch": 1.9867486076435568, "percentage": 39.73, "elapsed_time": "1:21:00", "remaining_time": "2:02:52", "throughput": 20093.52, "total_tokens": 97673792} +{"current_steps": 31040, "total_steps": 78105, "loss": 0.3068, "lr": 3.769622939509875e-06, "epoch": 1.9870686895845338, "percentage": 39.74, "elapsed_time": "1:21:02", "remaining_time": "2:02:52", "throughput": 20091.69, "total_tokens": 97688768} +{"current_steps": 31045, "total_steps": 78105, "loss": 0.2365, "lr": 3.769141658982681e-06, "epoch": 1.9873887715255105, "percentage": 39.75, "elapsed_time": "1:21:02", "remaining_time": "2:02:51", "throughput": 20092.14, "total_tokens": 97704768} +{"current_steps": 31050, "total_steps": 78105, "loss": 0.3298, "lr": 3.768660315081163e-06, "epoch": 1.9877088534664873, "percentage": 39.75, "elapsed_time": "1:21:03", "remaining_time": "2:02:50", "throughput": 20092.49, "total_tokens": 97719360} +{"current_steps": 31055, "total_steps": 78105, "loss": 0.3928, "lr": 3.7681789078293575e-06, "epoch": 1.9880289354074643, "percentage": 39.76, "elapsed_time": "1:21:04", "remaining_time": "2:02:49", "throughput": 20092.88, "total_tokens": 97734528} +{"current_steps": 31060, "total_steps": 78105, "loss": 0.3649, "lr": 3.767697437251303e-06, "epoch": 1.9883490173484413, "percentage": 39.77, "elapsed_time": "1:21:04", "remaining_time": "2:02:48", "throughput": 20093.29, "total_tokens": 97749760} +{"current_steps": 31065, "total_steps": 78105, "loss": 0.4059, "lr": 3.7672159033710422e-06, "epoch": 1.988669099289418, "percentage": 39.77, "elapsed_time": "1:21:05", "remaining_time": "2:02:47", "throughput": 20093.79, "total_tokens": 97766144} +{"current_steps": 31070, "total_steps": 78105, "loss": 0.2611, "lr": 3.7667343062126193e-06, "epoch": 1.988989181230395, "percentage": 39.78, "elapsed_time": "1:21:06", "remaining_time": "2:02:46", "throughput": 20094.34, "total_tokens": 97783104} +{"current_steps": 31075, "total_steps": 78105, "loss": 0.3792, "lr": 3.766252645800083e-06, "epoch": 1.9893092631713718, "percentage": 39.79, "elapsed_time": "1:21:06", "remaining_time": "2:02:45", "throughput": 20094.75, "total_tokens": 97798528} +{"current_steps": 31080, "total_steps": 78105, "loss": 0.2394, "lr": 3.7657709221574856e-06, "epoch": 1.9896293451123488, "percentage": 39.79, "elapsed_time": "1:21:07", "remaining_time": "2:02:44", "throughput": 20095.13, "total_tokens": 97813376} +{"current_steps": 31085, "total_steps": 78105, "loss": 0.2987, "lr": 3.7652891353088814e-06, "epoch": 1.9899494270533258, "percentage": 39.8, "elapsed_time": "1:21:08", "remaining_time": "2:02:43", "throughput": 20095.49, "total_tokens": 97828480} +{"current_steps": 31090, "total_steps": 78105, "loss": 0.3523, "lr": 3.764807285278329e-06, "epoch": 1.9902695089943025, "percentage": 39.81, "elapsed_time": "1:21:08", "remaining_time": "2:02:42", "throughput": 20095.98, "total_tokens": 97844864} +{"current_steps": 31095, "total_steps": 78105, "loss": 0.3876, "lr": 3.7643253720898882e-06, "epoch": 1.9905895909352793, "percentage": 39.81, "elapsed_time": "1:21:09", "remaining_time": "2:02:41", "throughput": 20096.37, "total_tokens": 97859648} +{"current_steps": 31100, "total_steps": 78105, "loss": 0.4626, "lr": 3.763843395767624e-06, "epoch": 1.9909096728762563, "percentage": 39.82, "elapsed_time": "1:21:10", "remaining_time": "2:02:40", "throughput": 20096.79, "total_tokens": 97875136} +{"current_steps": 31105, "total_steps": 78105, "loss": 0.2729, "lr": 3.763361356335604e-06, "epoch": 1.9912297548172333, "percentage": 39.82, "elapsed_time": "1:21:10", "remaining_time": "2:02:39", "throughput": 20097.37, "total_tokens": 97892352} +{"current_steps": 31110, "total_steps": 78105, "loss": 0.286, "lr": 3.762879253817898e-06, "epoch": 1.99154983675821, "percentage": 39.83, "elapsed_time": "1:21:11", "remaining_time": "2:02:39", "throughput": 20097.8, "total_tokens": 97907712} +{"current_steps": 31115, "total_steps": 78105, "loss": 0.3811, "lr": 3.7623970882385806e-06, "epoch": 1.9918699186991868, "percentage": 39.84, "elapsed_time": "1:21:12", "remaining_time": "2:02:38", "throughput": 20098.24, "total_tokens": 97923456} +{"current_steps": 31120, "total_steps": 78105, "loss": 0.4266, "lr": 3.7619148596217283e-06, "epoch": 1.9921900006401638, "percentage": 39.84, "elapsed_time": "1:21:12", "remaining_time": "2:02:37", "throughput": 20098.68, "total_tokens": 97939136} +{"current_steps": 31125, "total_steps": 78105, "loss": 0.227, "lr": 3.761432567991421e-06, "epoch": 1.9925100825811408, "percentage": 39.85, "elapsed_time": "1:21:13", "remaining_time": "2:02:36", "throughput": 20099.07, "total_tokens": 97954368} +{"current_steps": 31130, "total_steps": 78105, "loss": 0.2321, "lr": 3.760950213371742e-06, "epoch": 1.9928301645221178, "percentage": 39.86, "elapsed_time": "1:21:14", "remaining_time": "2:02:35", "throughput": 20099.52, "total_tokens": 97970240} +{"current_steps": 31135, "total_steps": 78105, "loss": 0.3824, "lr": 3.7604677957867763e-06, "epoch": 1.9931502464630946, "percentage": 39.86, "elapsed_time": "1:21:14", "remaining_time": "2:02:34", "throughput": 20099.95, "total_tokens": 97986304} +{"current_steps": 31140, "total_steps": 78105, "loss": 0.3487, "lr": 3.7599853152606157e-06, "epoch": 1.9934703284040713, "percentage": 39.87, "elapsed_time": "1:21:15", "remaining_time": "2:02:33", "throughput": 20100.41, "total_tokens": 98002880} +{"current_steps": 31145, "total_steps": 78105, "loss": 0.245, "lr": 3.759502771817351e-06, "epoch": 1.9937904103450483, "percentage": 39.88, "elapsed_time": "1:21:16", "remaining_time": "2:02:32", "throughput": 20100.79, "total_tokens": 98017920} +{"current_steps": 31150, "total_steps": 78105, "loss": 0.3172, "lr": 3.759020165481079e-06, "epoch": 1.9941104922860253, "percentage": 39.88, "elapsed_time": "1:21:17", "remaining_time": "2:02:31", "throughput": 20101.24, "total_tokens": 98033792} +{"current_steps": 31155, "total_steps": 78105, "loss": 0.5168, "lr": 3.758537496275897e-06, "epoch": 1.994430574227002, "percentage": 39.89, "elapsed_time": "1:21:17", "remaining_time": "2:02:30", "throughput": 20101.71, "total_tokens": 98049920} +{"current_steps": 31160, "total_steps": 78105, "loss": 0.4145, "lr": 3.758054764225908e-06, "epoch": 1.9947506561679789, "percentage": 39.9, "elapsed_time": "1:21:18", "remaining_time": "2:02:29", "throughput": 20102.15, "total_tokens": 98065536} +{"current_steps": 31165, "total_steps": 78105, "loss": 0.3843, "lr": 3.7575719693552165e-06, "epoch": 1.9950707381089559, "percentage": 39.9, "elapsed_time": "1:21:19", "remaining_time": "2:02:28", "throughput": 20102.57, "total_tokens": 98080832} +{"current_steps": 31170, "total_steps": 78105, "loss": 0.3953, "lr": 3.7570891116879326e-06, "epoch": 1.9953908200499328, "percentage": 39.91, "elapsed_time": "1:21:19", "remaining_time": "2:02:27", "throughput": 20102.97, "total_tokens": 98096192} +{"current_steps": 31175, "total_steps": 78105, "loss": 0.2898, "lr": 3.7566061912481657e-06, "epoch": 1.9957109019909098, "percentage": 39.91, "elapsed_time": "1:21:20", "remaining_time": "2:02:26", "throughput": 20103.35, "total_tokens": 98111232} +{"current_steps": 31180, "total_steps": 78105, "loss": 0.2436, "lr": 3.756123208060031e-06, "epoch": 1.9960309839318866, "percentage": 39.92, "elapsed_time": "1:21:20", "remaining_time": "2:02:25", "throughput": 20103.73, "total_tokens": 98126208} +{"current_steps": 31185, "total_steps": 78105, "loss": 0.2302, "lr": 3.7556401621476466e-06, "epoch": 1.9963510658728634, "percentage": 39.93, "elapsed_time": "1:21:21", "remaining_time": "2:02:24", "throughput": 20104.17, "total_tokens": 98142272} +{"current_steps": 31190, "total_steps": 78105, "loss": 0.2753, "lr": 3.7551570535351334e-06, "epoch": 1.9966711478138404, "percentage": 39.93, "elapsed_time": "1:21:22", "remaining_time": "2:02:23", "throughput": 20104.56, "total_tokens": 98157120} +{"current_steps": 31195, "total_steps": 78105, "loss": 0.2842, "lr": 3.7546738822466134e-06, "epoch": 1.9969912297548174, "percentage": 39.94, "elapsed_time": "1:21:23", "remaining_time": "2:02:22", "throughput": 20105.02, "total_tokens": 98173248} +{"current_steps": 31200, "total_steps": 78105, "loss": 0.2516, "lr": 3.754190648306216e-06, "epoch": 1.9973113116957941, "percentage": 39.95, "elapsed_time": "1:21:23", "remaining_time": "2:02:22", "throughput": 20105.53, "total_tokens": 98190080} +{"current_steps": 31205, "total_steps": 78105, "loss": 0.3013, "lr": 3.75370735173807e-06, "epoch": 1.997631393636771, "percentage": 39.95, "elapsed_time": "1:21:24", "remaining_time": "2:02:21", "throughput": 20105.99, "total_tokens": 98206400} +{"current_steps": 31210, "total_steps": 78105, "loss": 0.3755, "lr": 3.7532239925663094e-06, "epoch": 1.9979514755777479, "percentage": 39.96, "elapsed_time": "1:21:25", "remaining_time": "2:02:20", "throughput": 20106.39, "total_tokens": 98221312} +{"current_steps": 31215, "total_steps": 78105, "loss": 0.3245, "lr": 3.7527405708150707e-06, "epoch": 1.9982715575187249, "percentage": 39.97, "elapsed_time": "1:21:25", "remaining_time": "2:02:19", "throughput": 20106.79, "total_tokens": 98236672} +{"current_steps": 31220, "total_steps": 78105, "loss": 0.3046, "lr": 3.752257086508493e-06, "epoch": 1.9985916394597016, "percentage": 39.97, "elapsed_time": "1:21:26", "remaining_time": "2:02:18", "throughput": 20107.22, "total_tokens": 98252224} +{"current_steps": 31225, "total_steps": 78105, "loss": 0.3517, "lr": 3.7517735396707184e-06, "epoch": 1.9989117214006784, "percentage": 39.98, "elapsed_time": "1:21:27", "remaining_time": "2:02:17", "throughput": 20107.62, "total_tokens": 98267776} +{"current_steps": 31230, "total_steps": 78105, "loss": 0.2957, "lr": 3.7512899303258943e-06, "epoch": 1.9992318033416554, "percentage": 39.98, "elapsed_time": "1:21:27", "remaining_time": "2:02:16", "throughput": 20108.16, "total_tokens": 98284672} +{"current_steps": 31235, "total_steps": 78105, "loss": 0.3742, "lr": 3.7508062584981686e-06, "epoch": 1.9995518852826324, "percentage": 39.99, "elapsed_time": "1:21:28", "remaining_time": "2:02:15", "throughput": 20108.69, "total_tokens": 98301696} +{"current_steps": 31240, "total_steps": 78105, "loss": 0.3667, "lr": 3.7503225242116937e-06, "epoch": 1.9998719672236094, "percentage": 40.0, "elapsed_time": "1:21:29", "remaining_time": "2:02:14", "throughput": 20109.1, "total_tokens": 98317056} +{"current_steps": 31245, "total_steps": 78105, "loss": 0.2658, "lr": 3.7498387274906253e-06, "epoch": 2.000192049164586, "percentage": 40.0, "elapsed_time": "1:21:29", "remaining_time": "2:02:13", "throughput": 20109.11, "total_tokens": 98332416} +{"current_steps": 31248, "total_steps": 78105, "eval_loss": 0.4481422007083893, "epoch": 2.0003840983291723, "percentage": 40.01, "elapsed_time": "1:22:21", "remaining_time": "2:03:29", "throughput": 19901.81, "total_tokens": 98341056} +{"current_steps": 31250, "total_steps": 78105, "loss": 0.1909, "lr": 3.7493548683591198e-06, "epoch": 2.000512131105563, "percentage": 40.01, "elapsed_time": "1:22:53", "remaining_time": "2:04:17", "throughput": 19772.43, "total_tokens": 98346752} +{"current_steps": 31255, "total_steps": 78105, "loss": 0.2203, "lr": 3.7488709468413405e-06, "epoch": 2.00083221304654, "percentage": 40.02, "elapsed_time": "1:22:54", "remaining_time": "2:04:16", "throughput": 19772.94, "total_tokens": 98362944} +{"current_steps": 31260, "total_steps": 78105, "loss": 0.2375, "lr": 3.748386962961451e-06, "epoch": 2.001152294987517, "percentage": 40.02, "elapsed_time": "1:22:55", "remaining_time": "2:04:15", "throughput": 19773.38, "total_tokens": 98378112} +{"current_steps": 31265, "total_steps": 78105, "loss": 0.1562, "lr": 3.7479029167436193e-06, "epoch": 2.001472376928494, "percentage": 40.03, "elapsed_time": "1:22:55", "remaining_time": "2:04:14", "throughput": 19773.82, "total_tokens": 98393536} +{"current_steps": 31270, "total_steps": 78105, "loss": 0.2236, "lr": 3.747418808212016e-06, "epoch": 2.0017924588694704, "percentage": 40.04, "elapsed_time": "1:22:56", "remaining_time": "2:04:13", "throughput": 19774.26, "total_tokens": 98409088} +{"current_steps": 31275, "total_steps": 78105, "loss": 0.1775, "lr": 3.7469346373908145e-06, "epoch": 2.0021125408104474, "percentage": 40.04, "elapsed_time": "1:22:57", "remaining_time": "2:04:12", "throughput": 19774.73, "total_tokens": 98424768} +{"current_steps": 31280, "total_steps": 78105, "loss": 0.1839, "lr": 3.7464504043041925e-06, "epoch": 2.0024326227514244, "percentage": 40.05, "elapsed_time": "1:22:57", "remaining_time": "2:04:11", "throughput": 19775.15, "total_tokens": 98439872} +{"current_steps": 31285, "total_steps": 78105, "loss": 0.167, "lr": 3.74596610897633e-06, "epoch": 2.0027527046924014, "percentage": 40.06, "elapsed_time": "1:22:58", "remaining_time": "2:04:10", "throughput": 19775.6, "total_tokens": 98455296} +{"current_steps": 31290, "total_steps": 78105, "loss": 0.211, "lr": 3.74548175143141e-06, "epoch": 2.003072786633378, "percentage": 40.06, "elapsed_time": "1:22:59", "remaining_time": "2:04:09", "throughput": 19776.09, "total_tokens": 98471360} +{"current_steps": 31295, "total_steps": 78105, "loss": 0.1803, "lr": 3.7449973316936195e-06, "epoch": 2.003392868574355, "percentage": 40.07, "elapsed_time": "1:23:00", "remaining_time": "2:04:08", "throughput": 19776.58, "total_tokens": 98487360} +{"current_steps": 31300, "total_steps": 78105, "loss": 0.2111, "lr": 3.7445128497871463e-06, "epoch": 2.003712950515332, "percentage": 40.07, "elapsed_time": "1:23:00", "remaining_time": "2:04:07", "throughput": 19777.02, "total_tokens": 98503104} +{"current_steps": 31305, "total_steps": 78105, "loss": 0.1779, "lr": 3.7440283057361844e-06, "epoch": 2.004033032456309, "percentage": 40.08, "elapsed_time": "1:23:01", "remaining_time": "2:04:07", "throughput": 19777.54, "total_tokens": 98519744} +{"current_steps": 31310, "total_steps": 78105, "loss": 0.176, "lr": 3.7435436995649282e-06, "epoch": 2.004353114397286, "percentage": 40.09, "elapsed_time": "1:23:02", "remaining_time": "2:04:06", "throughput": 19778.16, "total_tokens": 98537408} +{"current_steps": 31315, "total_steps": 78105, "loss": 0.1705, "lr": 3.7430590312975774e-06, "epoch": 2.0046731963382625, "percentage": 40.09, "elapsed_time": "1:23:02", "remaining_time": "2:04:05", "throughput": 19778.68, "total_tokens": 98553728} +{"current_steps": 31320, "total_steps": 78105, "loss": 0.2459, "lr": 3.742574300958334e-06, "epoch": 2.0049932782792395, "percentage": 40.1, "elapsed_time": "1:23:03", "remaining_time": "2:04:04", "throughput": 19779.23, "total_tokens": 98570432} +{"current_steps": 31325, "total_steps": 78105, "loss": 0.2012, "lr": 3.7420895085714014e-06, "epoch": 2.0053133602202164, "percentage": 40.11, "elapsed_time": "1:23:04", "remaining_time": "2:04:03", "throughput": 19779.77, "total_tokens": 98586816} +{"current_steps": 31330, "total_steps": 78105, "loss": 0.2318, "lr": 3.7416046541609892e-06, "epoch": 2.0056334421611934, "percentage": 40.11, "elapsed_time": "1:23:04", "remaining_time": "2:04:02", "throughput": 19780.23, "total_tokens": 98602432} +{"current_steps": 31335, "total_steps": 78105, "loss": 0.1981, "lr": 3.741119737751307e-06, "epoch": 2.00595352410217, "percentage": 40.12, "elapsed_time": "1:23:05", "remaining_time": "2:04:01", "throughput": 19780.66, "total_tokens": 98617664} +{"current_steps": 31340, "total_steps": 78105, "loss": 0.2127, "lr": 3.74063475936657e-06, "epoch": 2.006273606043147, "percentage": 40.13, "elapsed_time": "1:23:06", "remaining_time": "2:04:00", "throughput": 19781.1, "total_tokens": 98632896} +{"current_steps": 31345, "total_steps": 78105, "loss": 0.1451, "lr": 3.7401497190309955e-06, "epoch": 2.006593687984124, "percentage": 40.13, "elapsed_time": "1:23:06", "remaining_time": "2:03:59", "throughput": 19781.56, "total_tokens": 98648768} +{"current_steps": 31350, "total_steps": 78105, "loss": 0.144, "lr": 3.7396646167688043e-06, "epoch": 2.006913769925101, "percentage": 40.14, "elapsed_time": "1:23:07", "remaining_time": "2:03:58", "throughput": 19781.98, "total_tokens": 98664000} +{"current_steps": 31355, "total_steps": 78105, "loss": 0.3766, "lr": 3.7391794526042192e-06, "epoch": 2.0072338518660775, "percentage": 40.14, "elapsed_time": "1:23:08", "remaining_time": "2:03:57", "throughput": 19782.38, "total_tokens": 98678912} +{"current_steps": 31360, "total_steps": 78105, "loss": 0.2094, "lr": 3.7386942265614667e-06, "epoch": 2.0075539338070545, "percentage": 40.15, "elapsed_time": "1:23:08", "remaining_time": "2:03:56", "throughput": 19782.83, "total_tokens": 98694144} +{"current_steps": 31365, "total_steps": 78105, "loss": 0.2508, "lr": 3.738208938664776e-06, "epoch": 2.0078740157480315, "percentage": 40.16, "elapsed_time": "1:23:09", "remaining_time": "2:03:55", "throughput": 19783.25, "total_tokens": 98709376} +{"current_steps": 31370, "total_steps": 78105, "loss": 0.1977, "lr": 3.737723588938381e-06, "epoch": 2.0081940976890085, "percentage": 40.16, "elapsed_time": "1:23:10", "remaining_time": "2:03:54", "throughput": 19783.7, "total_tokens": 98724864} +{"current_steps": 31375, "total_steps": 78105, "loss": 0.1377, "lr": 3.737238177406518e-06, "epoch": 2.0085141796299855, "percentage": 40.17, "elapsed_time": "1:23:10", "remaining_time": "2:03:53", "throughput": 19784.13, "total_tokens": 98740160} +{"current_steps": 31380, "total_steps": 78105, "loss": 0.1949, "lr": 3.736752704093424e-06, "epoch": 2.008834261570962, "percentage": 40.18, "elapsed_time": "1:23:11", "remaining_time": "2:03:52", "throughput": 19784.54, "total_tokens": 98755456} +{"current_steps": 31385, "total_steps": 78105, "loss": 0.2044, "lr": 3.736267169023342e-06, "epoch": 2.009154343511939, "percentage": 40.18, "elapsed_time": "1:23:12", "remaining_time": "2:03:51", "throughput": 19785.19, "total_tokens": 98773504} +{"current_steps": 31390, "total_steps": 78105, "loss": 0.1996, "lr": 3.735781572220517e-06, "epoch": 2.009474425452916, "percentage": 40.19, "elapsed_time": "1:23:12", "remaining_time": "2:03:50", "throughput": 19785.65, "total_tokens": 98789120} +{"current_steps": 31395, "total_steps": 78105, "loss": 0.2712, "lr": 3.735295913709197e-06, "epoch": 2.009794507393893, "percentage": 40.2, "elapsed_time": "1:23:13", "remaining_time": "2:03:49", "throughput": 19786.19, "total_tokens": 98805440} +{"current_steps": 31400, "total_steps": 78105, "loss": 0.1854, "lr": 3.734810193513634e-06, "epoch": 2.0101145893348695, "percentage": 40.2, "elapsed_time": "1:23:14", "remaining_time": "2:03:48", "throughput": 19786.68, "total_tokens": 98821824} +{"current_steps": 31405, "total_steps": 78105, "loss": 0.2738, "lr": 3.7343244116580816e-06, "epoch": 2.0104346712758465, "percentage": 40.21, "elapsed_time": "1:23:15", "remaining_time": "2:03:47", "throughput": 19787.12, "total_tokens": 98837184} +{"current_steps": 31410, "total_steps": 78105, "loss": 0.2292, "lr": 3.7338385681667976e-06, "epoch": 2.0107547532168235, "percentage": 40.22, "elapsed_time": "1:23:15", "remaining_time": "2:03:46", "throughput": 19787.57, "total_tokens": 98852800} +{"current_steps": 31415, "total_steps": 78105, "loss": 0.2003, "lr": 3.7333526630640425e-06, "epoch": 2.0110748351578005, "percentage": 40.22, "elapsed_time": "1:23:16", "remaining_time": "2:03:45", "throughput": 19788.01, "total_tokens": 98868480} +{"current_steps": 31420, "total_steps": 78105, "loss": 0.1576, "lr": 3.732866696374079e-06, "epoch": 2.0113949170987775, "percentage": 40.23, "elapsed_time": "1:23:17", "remaining_time": "2:03:44", "throughput": 19788.38, "total_tokens": 98883072} +{"current_steps": 31425, "total_steps": 78105, "loss": 0.1872, "lr": 3.732380668121175e-06, "epoch": 2.011714999039754, "percentage": 40.23, "elapsed_time": "1:23:17", "remaining_time": "2:03:43", "throughput": 19788.8, "total_tokens": 98898560} +{"current_steps": 31430, "total_steps": 78105, "loss": 0.2457, "lr": 3.7318945783296002e-06, "epoch": 2.012035080980731, "percentage": 40.24, "elapsed_time": "1:23:18", "remaining_time": "2:03:42", "throughput": 19789.22, "total_tokens": 98914112} +{"current_steps": 31435, "total_steps": 78105, "loss": 0.1851, "lr": 3.731408427023626e-06, "epoch": 2.012355162921708, "percentage": 40.25, "elapsed_time": "1:23:19", "remaining_time": "2:03:41", "throughput": 19789.75, "total_tokens": 98930752} +{"current_steps": 31440, "total_steps": 78105, "loss": 0.1586, "lr": 3.730922214227529e-06, "epoch": 2.012675244862685, "percentage": 40.25, "elapsed_time": "1:23:19", "remaining_time": "2:03:40", "throughput": 19790.18, "total_tokens": 98945920} +{"current_steps": 31445, "total_steps": 78105, "loss": 0.1758, "lr": 3.730435939965589e-06, "epoch": 2.0129953268036616, "percentage": 40.26, "elapsed_time": "1:23:20", "remaining_time": "2:03:39", "throughput": 19790.71, "total_tokens": 98962560} +{"current_steps": 31450, "total_steps": 78105, "loss": 0.1546, "lr": 3.729949604262087e-06, "epoch": 2.0133154087446385, "percentage": 40.27, "elapsed_time": "1:23:21", "remaining_time": "2:03:39", "throughput": 19791.2, "total_tokens": 98978560} +{"current_steps": 31455, "total_steps": 78105, "loss": 0.1061, "lr": 3.7294632071413076e-06, "epoch": 2.0136354906856155, "percentage": 40.27, "elapsed_time": "1:23:21", "remaining_time": "2:03:38", "throughput": 19791.57, "total_tokens": 98993408} +{"current_steps": 31460, "total_steps": 78105, "loss": 0.123, "lr": 3.7289767486275406e-06, "epoch": 2.0139555726265925, "percentage": 40.28, "elapsed_time": "1:23:22", "remaining_time": "2:03:37", "throughput": 19792.12, "total_tokens": 99010112} +{"current_steps": 31465, "total_steps": 78105, "loss": 0.2356, "lr": 3.7284902287450765e-06, "epoch": 2.0142756545675695, "percentage": 40.29, "elapsed_time": "1:23:23", "remaining_time": "2:03:36", "throughput": 19792.65, "total_tokens": 99026880} +{"current_steps": 31470, "total_steps": 78105, "loss": 0.1587, "lr": 3.7280036475182083e-06, "epoch": 2.014595736508546, "percentage": 40.29, "elapsed_time": "1:23:23", "remaining_time": "2:03:35", "throughput": 19793.17, "total_tokens": 99043328} +{"current_steps": 31475, "total_steps": 78105, "loss": 0.2923, "lr": 3.7275170049712352e-06, "epoch": 2.014915818449523, "percentage": 40.3, "elapsed_time": "1:23:24", "remaining_time": "2:03:34", "throughput": 19793.75, "total_tokens": 99060544} +{"current_steps": 31480, "total_steps": 78105, "loss": 0.2537, "lr": 3.7270303011284557e-06, "epoch": 2.0152359003905, "percentage": 40.3, "elapsed_time": "1:23:25", "remaining_time": "2:03:33", "throughput": 19794.14, "total_tokens": 99075136} +{"current_steps": 31485, "total_steps": 78105, "loss": 0.2203, "lr": 3.7265435360141757e-06, "epoch": 2.015555982331477, "percentage": 40.31, "elapsed_time": "1:23:25", "remaining_time": "2:03:32", "throughput": 19794.61, "total_tokens": 99091200} +{"current_steps": 31490, "total_steps": 78105, "loss": 0.0922, "lr": 3.7260567096527e-06, "epoch": 2.0158760642724536, "percentage": 40.32, "elapsed_time": "1:23:26", "remaining_time": "2:03:31", "throughput": 19795.05, "total_tokens": 99106880} +{"current_steps": 31495, "total_steps": 78105, "loss": 0.2122, "lr": 3.7255698220683386e-06, "epoch": 2.0161961462134306, "percentage": 40.32, "elapsed_time": "1:23:27", "remaining_time": "2:03:30", "throughput": 19795.51, "total_tokens": 99122624} +{"current_steps": 31500, "total_steps": 78105, "loss": 0.2027, "lr": 3.7250828732854037e-06, "epoch": 2.0165162281544076, "percentage": 40.33, "elapsed_time": "1:23:27", "remaining_time": "2:03:29", "throughput": 19795.87, "total_tokens": 99137088} +{"current_steps": 31505, "total_steps": 78105, "loss": 0.2087, "lr": 3.7245958633282107e-06, "epoch": 2.0168363100953846, "percentage": 40.34, "elapsed_time": "1:23:28", "remaining_time": "2:03:28", "throughput": 19796.28, "total_tokens": 99152448} +{"current_steps": 31510, "total_steps": 78105, "loss": 0.2042, "lr": 3.7241087922210796e-06, "epoch": 2.017156392036361, "percentage": 40.34, "elapsed_time": "1:23:29", "remaining_time": "2:03:27", "throughput": 19796.71, "total_tokens": 99168128} +{"current_steps": 31515, "total_steps": 78105, "loss": 0.218, "lr": 3.7236216599883317e-06, "epoch": 2.017476473977338, "percentage": 40.35, "elapsed_time": "1:23:30", "remaining_time": "2:03:26", "throughput": 19797.13, "total_tokens": 99183808} +{"current_steps": 31520, "total_steps": 78105, "loss": 0.1838, "lr": 3.7231344666542917e-06, "epoch": 2.017796555918315, "percentage": 40.36, "elapsed_time": "1:23:30", "remaining_time": "2:03:25", "throughput": 19797.49, "total_tokens": 99198528} +{"current_steps": 31525, "total_steps": 78105, "loss": 0.2434, "lr": 3.7226472122432877e-06, "epoch": 2.018116637859292, "percentage": 40.36, "elapsed_time": "1:23:31", "remaining_time": "2:03:24", "throughput": 19797.81, "total_tokens": 99212864} +{"current_steps": 31530, "total_steps": 78105, "loss": 0.209, "lr": 3.72215989677965e-06, "epoch": 2.018436719800269, "percentage": 40.37, "elapsed_time": "1:23:31", "remaining_time": "2:03:23", "throughput": 19798.21, "total_tokens": 99228224} +{"current_steps": 31535, "total_steps": 78105, "loss": 0.193, "lr": 3.721672520287713e-06, "epoch": 2.0187568017412456, "percentage": 40.38, "elapsed_time": "1:23:32", "remaining_time": "2:03:22", "throughput": 19798.69, "total_tokens": 99244736} +{"current_steps": 31540, "total_steps": 78105, "loss": 0.264, "lr": 3.721185082791814e-06, "epoch": 2.0190768836822226, "percentage": 40.38, "elapsed_time": "1:23:33", "remaining_time": "2:03:21", "throughput": 19799.13, "total_tokens": 99260224} +{"current_steps": 31545, "total_steps": 78105, "loss": 0.1281, "lr": 3.720697584316293e-06, "epoch": 2.0193969656231996, "percentage": 40.39, "elapsed_time": "1:23:34", "remaining_time": "2:03:20", "throughput": 19799.56, "total_tokens": 99275968} +{"current_steps": 31550, "total_steps": 78105, "loss": 0.2161, "lr": 3.7202100248854932e-06, "epoch": 2.0197170475641766, "percentage": 40.39, "elapsed_time": "1:23:34", "remaining_time": "2:03:19", "throughput": 19800.03, "total_tokens": 99292032} +{"current_steps": 31555, "total_steps": 78105, "loss": 0.1193, "lr": 3.719722404523761e-06, "epoch": 2.020037129505153, "percentage": 40.4, "elapsed_time": "1:23:35", "remaining_time": "2:03:18", "throughput": 19800.51, "total_tokens": 99308288} +{"current_steps": 31560, "total_steps": 78105, "loss": 0.1895, "lr": 3.719234723255444e-06, "epoch": 2.02035721144613, "percentage": 40.41, "elapsed_time": "1:23:36", "remaining_time": "2:03:17", "throughput": 19800.99, "total_tokens": 99324480} +{"current_steps": 31565, "total_steps": 78105, "loss": 0.1933, "lr": 3.718746981104896e-06, "epoch": 2.020677293387107, "percentage": 40.41, "elapsed_time": "1:23:36", "remaining_time": "2:03:16", "throughput": 19801.56, "total_tokens": 99341696} +{"current_steps": 31570, "total_steps": 78105, "loss": 0.1834, "lr": 3.7182591780964715e-06, "epoch": 2.020997375328084, "percentage": 40.42, "elapsed_time": "1:23:37", "remaining_time": "2:03:15", "throughput": 19802.0, "total_tokens": 99356992} +{"current_steps": 31575, "total_steps": 78105, "loss": 0.1159, "lr": 3.7177713142545306e-06, "epoch": 2.021317457269061, "percentage": 40.43, "elapsed_time": "1:23:38", "remaining_time": "2:03:15", "throughput": 19802.51, "total_tokens": 99373248} +{"current_steps": 31580, "total_steps": 78105, "loss": 0.1897, "lr": 3.7172833896034332e-06, "epoch": 2.0216375392100376, "percentage": 40.43, "elapsed_time": "1:23:38", "remaining_time": "2:03:14", "throughput": 19802.9, "total_tokens": 99388160} +{"current_steps": 31585, "total_steps": 78105, "loss": 0.1918, "lr": 3.7167954041675435e-06, "epoch": 2.0219576211510146, "percentage": 40.44, "elapsed_time": "1:23:39", "remaining_time": "2:03:13", "throughput": 19803.43, "total_tokens": 99404800} +{"current_steps": 31590, "total_steps": 78105, "loss": 0.2138, "lr": 3.7163073579712294e-06, "epoch": 2.0222777030919916, "percentage": 40.45, "elapsed_time": "1:23:40", "remaining_time": "2:03:12", "throughput": 19803.9, "total_tokens": 99420736} +{"current_steps": 31595, "total_steps": 78105, "loss": 0.247, "lr": 3.715819251038861e-06, "epoch": 2.0225977850329686, "percentage": 40.45, "elapsed_time": "1:23:40", "remaining_time": "2:03:11", "throughput": 19804.33, "total_tokens": 99436288} +{"current_steps": 31600, "total_steps": 78105, "loss": 0.229, "lr": 3.715331083394813e-06, "epoch": 2.022917866973945, "percentage": 40.46, "elapsed_time": "1:23:41", "remaining_time": "2:03:10", "throughput": 19804.77, "total_tokens": 99451968} +{"current_steps": 31605, "total_steps": 78105, "loss": 0.219, "lr": 3.7148428550634607e-06, "epoch": 2.023237948914922, "percentage": 40.46, "elapsed_time": "1:23:42", "remaining_time": "2:03:09", "throughput": 19805.22, "total_tokens": 99467648} +{"current_steps": 31610, "total_steps": 78105, "loss": 0.174, "lr": 3.7143545660691845e-06, "epoch": 2.023558030855899, "percentage": 40.47, "elapsed_time": "1:23:42", "remaining_time": "2:03:08", "throughput": 19805.63, "total_tokens": 99482624} +{"current_steps": 31615, "total_steps": 78105, "loss": 0.316, "lr": 3.713866216436366e-06, "epoch": 2.023878112796876, "percentage": 40.48, "elapsed_time": "1:23:43", "remaining_time": "2:03:07", "throughput": 19806.06, "total_tokens": 99498240} +{"current_steps": 31620, "total_steps": 78105, "loss": 0.2307, "lr": 3.7133778061893915e-06, "epoch": 2.0241981947378527, "percentage": 40.48, "elapsed_time": "1:23:44", "remaining_time": "2:03:06", "throughput": 19806.5, "total_tokens": 99513536} +{"current_steps": 31625, "total_steps": 78105, "loss": 0.193, "lr": 3.7128893353526495e-06, "epoch": 2.0245182766788297, "percentage": 40.49, "elapsed_time": "1:23:45", "remaining_time": "2:03:05", "throughput": 19807.09, "total_tokens": 99530816} +{"current_steps": 31630, "total_steps": 78105, "loss": 0.224, "lr": 3.712400803950532e-06, "epoch": 2.0248383586198067, "percentage": 40.5, "elapsed_time": "1:23:45", "remaining_time": "2:03:04", "throughput": 19807.55, "total_tokens": 99546688} +{"current_steps": 31635, "total_steps": 78105, "loss": 0.1821, "lr": 3.7119122120074334e-06, "epoch": 2.0251584405607836, "percentage": 40.5, "elapsed_time": "1:23:46", "remaining_time": "2:03:03", "throughput": 19808.09, "total_tokens": 99563072} +{"current_steps": 31640, "total_steps": 78105, "loss": 0.172, "lr": 3.7114235595477522e-06, "epoch": 2.0254785225017606, "percentage": 40.51, "elapsed_time": "1:23:47", "remaining_time": "2:03:02", "throughput": 19808.55, "total_tokens": 99578432} +{"current_steps": 31645, "total_steps": 78105, "loss": 0.2526, "lr": 3.7109348465958884e-06, "epoch": 2.025798604442737, "percentage": 40.52, "elapsed_time": "1:23:47", "remaining_time": "2:03:01", "throughput": 19809.02, "total_tokens": 99594048} +{"current_steps": 31650, "total_steps": 78105, "loss": 0.1652, "lr": 3.710446073176245e-06, "epoch": 2.026118686383714, "percentage": 40.52, "elapsed_time": "1:23:48", "remaining_time": "2:03:00", "throughput": 19809.56, "total_tokens": 99610496} +{"current_steps": 31655, "total_steps": 78105, "loss": 0.2609, "lr": 3.7099572393132303e-06, "epoch": 2.026438768324691, "percentage": 40.53, "elapsed_time": "1:23:49", "remaining_time": "2:02:59", "throughput": 19810.06, "total_tokens": 99626432} +{"current_steps": 31660, "total_steps": 78105, "loss": 0.1763, "lr": 3.709468345031254e-06, "epoch": 2.026758850265668, "percentage": 40.54, "elapsed_time": "1:23:49", "remaining_time": "2:02:58", "throughput": 19810.41, "total_tokens": 99641088} +{"current_steps": 31665, "total_steps": 78105, "loss": 0.2441, "lr": 3.7089793903547276e-06, "epoch": 2.0270789322066447, "percentage": 40.54, "elapsed_time": "1:23:50", "remaining_time": "2:02:57", "throughput": 19810.85, "total_tokens": 99656448} +{"current_steps": 31670, "total_steps": 78105, "loss": 0.2303, "lr": 3.708490375308068e-06, "epoch": 2.0273990141476217, "percentage": 40.55, "elapsed_time": "1:23:51", "remaining_time": "2:02:56", "throughput": 19811.26, "total_tokens": 99671424} +{"current_steps": 31675, "total_steps": 78105, "loss": 0.2227, "lr": 3.7080012999156943e-06, "epoch": 2.0277190960885987, "percentage": 40.55, "elapsed_time": "1:23:51", "remaining_time": "2:02:55", "throughput": 19811.64, "total_tokens": 99685952} +{"current_steps": 31680, "total_steps": 78105, "loss": 0.1612, "lr": 3.7075121642020273e-06, "epoch": 2.0280391780295757, "percentage": 40.56, "elapsed_time": "1:23:52", "remaining_time": "2:02:54", "throughput": 19812.1, "total_tokens": 99701760} +{"current_steps": 31685, "total_steps": 78105, "loss": 0.1307, "lr": 3.7070229681914927e-06, "epoch": 2.0283592599705527, "percentage": 40.57, "elapsed_time": "1:23:53", "remaining_time": "2:02:53", "throughput": 19812.51, "total_tokens": 99717056} +{"current_steps": 31690, "total_steps": 78105, "loss": 0.2032, "lr": 3.7065337119085182e-06, "epoch": 2.028679341911529, "percentage": 40.57, "elapsed_time": "1:23:53", "remaining_time": "2:02:52", "throughput": 19813.0, "total_tokens": 99733184} +{"current_steps": 31695, "total_steps": 78105, "loss": 0.2394, "lr": 3.706044395377535e-06, "epoch": 2.028999423852506, "percentage": 40.58, "elapsed_time": "1:23:54", "remaining_time": "2:02:51", "throughput": 19813.57, "total_tokens": 99750464} +{"current_steps": 31700, "total_steps": 78105, "loss": 0.2861, "lr": 3.7055550186229765e-06, "epoch": 2.029319505793483, "percentage": 40.59, "elapsed_time": "1:23:55", "remaining_time": "2:02:50", "throughput": 19813.99, "total_tokens": 99765504} +{"current_steps": 31705, "total_steps": 78105, "loss": 0.26, "lr": 3.7050655816692804e-06, "epoch": 2.02963958773446, "percentage": 40.59, "elapsed_time": "1:23:55", "remaining_time": "2:02:49", "throughput": 19814.46, "total_tokens": 99781376} +{"current_steps": 31710, "total_steps": 78105, "loss": 0.2029, "lr": 3.7045760845408853e-06, "epoch": 2.0299596696754367, "percentage": 40.6, "elapsed_time": "1:23:56", "remaining_time": "2:02:48", "throughput": 19815.06, "total_tokens": 99798528} +{"current_steps": 31715, "total_steps": 78105, "loss": 0.2184, "lr": 3.7040865272622352e-06, "epoch": 2.0302797516164137, "percentage": 40.61, "elapsed_time": "1:23:57", "remaining_time": "2:02:47", "throughput": 19815.5, "total_tokens": 99813952} +{"current_steps": 31720, "total_steps": 78105, "loss": 0.2437, "lr": 3.7035969098577764e-06, "epoch": 2.0305998335573907, "percentage": 40.61, "elapsed_time": "1:23:57", "remaining_time": "2:02:46", "throughput": 19815.85, "total_tokens": 99828480} +{"current_steps": 31725, "total_steps": 78105, "loss": 0.2714, "lr": 3.7031072323519567e-06, "epoch": 2.0309199154983677, "percentage": 40.62, "elapsed_time": "1:23:58", "remaining_time": "2:02:45", "throughput": 19816.35, "total_tokens": 99844608} +{"current_steps": 31730, "total_steps": 78105, "loss": 0.2792, "lr": 3.7026174947692284e-06, "epoch": 2.0312399974393447, "percentage": 40.62, "elapsed_time": "1:23:59", "remaining_time": "2:02:45", "throughput": 19816.81, "total_tokens": 99860608} +{"current_steps": 31735, "total_steps": 78105, "loss": 0.2103, "lr": 3.702127697134047e-06, "epoch": 2.0315600793803212, "percentage": 40.63, "elapsed_time": "1:23:59", "remaining_time": "2:02:44", "throughput": 19817.34, "total_tokens": 99877248} +{"current_steps": 31740, "total_steps": 78105, "loss": 0.2149, "lr": 3.7016378394708694e-06, "epoch": 2.0318801613212982, "percentage": 40.64, "elapsed_time": "1:24:00", "remaining_time": "2:02:43", "throughput": 19817.91, "total_tokens": 99894016} +{"current_steps": 31745, "total_steps": 78105, "loss": 0.2266, "lr": 3.701147921804158e-06, "epoch": 2.032200243262275, "percentage": 40.64, "elapsed_time": "1:24:01", "remaining_time": "2:02:42", "throughput": 19818.39, "total_tokens": 99909888} +{"current_steps": 31750, "total_steps": 78105, "loss": 0.1853, "lr": 3.7006579441583756e-06, "epoch": 2.032520325203252, "percentage": 40.65, "elapsed_time": "1:24:01", "remaining_time": "2:02:41", "throughput": 19818.9, "total_tokens": 99926272} +{"current_steps": 31755, "total_steps": 78105, "loss": 0.2258, "lr": 3.7001679065579893e-06, "epoch": 2.0328404071442288, "percentage": 40.66, "elapsed_time": "1:24:02", "remaining_time": "2:02:40", "throughput": 19819.39, "total_tokens": 99942464} +{"current_steps": 31760, "total_steps": 78105, "loss": 0.2136, "lr": 3.69967780902747e-06, "epoch": 2.0331604890852057, "percentage": 40.66, "elapsed_time": "1:24:03", "remaining_time": "2:02:39", "throughput": 19819.73, "total_tokens": 99956544} +{"current_steps": 31765, "total_steps": 78105, "loss": 0.1381, "lr": 3.699187651591288e-06, "epoch": 2.0334805710261827, "percentage": 40.67, "elapsed_time": "1:24:03", "remaining_time": "2:02:38", "throughput": 19820.17, "total_tokens": 99972032} +{"current_steps": 31770, "total_steps": 78105, "loss": 0.2557, "lr": 3.698697434273923e-06, "epoch": 2.0338006529671597, "percentage": 40.68, "elapsed_time": "1:24:04", "remaining_time": "2:02:37", "throughput": 19820.61, "total_tokens": 99987520} +{"current_steps": 31775, "total_steps": 78105, "loss": 0.2953, "lr": 3.6982071570998512e-06, "epoch": 2.0341207349081363, "percentage": 40.68, "elapsed_time": "1:24:05", "remaining_time": "2:02:36", "throughput": 19821.03, "total_tokens": 100003008} +{"current_steps": 31780, "total_steps": 78105, "loss": 0.1572, "lr": 3.6977168200935554e-06, "epoch": 2.0344408168491133, "percentage": 40.69, "elapsed_time": "1:24:06", "remaining_time": "2:02:36", "throughput": 19819.64, "total_tokens": 100019136} +{"current_steps": 31785, "total_steps": 78105, "loss": 0.2309, "lr": 3.6972264232795198e-06, "epoch": 2.0347608987900903, "percentage": 40.7, "elapsed_time": "1:24:07", "remaining_time": "2:02:35", "throughput": 19820.1, "total_tokens": 100034688} +{"current_steps": 31790, "total_steps": 78105, "loss": 0.2005, "lr": 3.6967359666822333e-06, "epoch": 2.0350809807310672, "percentage": 40.7, "elapsed_time": "1:24:07", "remaining_time": "2:02:34", "throughput": 19820.47, "total_tokens": 100049536} +{"current_steps": 31795, "total_steps": 78105, "loss": 0.1672, "lr": 3.696245450326186e-06, "epoch": 2.0354010626720442, "percentage": 40.71, "elapsed_time": "1:24:08", "remaining_time": "2:02:33", "throughput": 19820.91, "total_tokens": 100064768} +{"current_steps": 31800, "total_steps": 78105, "loss": 0.1763, "lr": 3.6957548742358706e-06, "epoch": 2.035721144613021, "percentage": 40.71, "elapsed_time": "1:24:09", "remaining_time": "2:02:32", "throughput": 19821.35, "total_tokens": 100080448} +{"current_steps": 31805, "total_steps": 78105, "loss": 0.1021, "lr": 3.6952642384357867e-06, "epoch": 2.0360412265539978, "percentage": 40.72, "elapsed_time": "1:24:09", "remaining_time": "2:02:31", "throughput": 19821.74, "total_tokens": 100095296} +{"current_steps": 31810, "total_steps": 78105, "loss": 0.1704, "lr": 3.694773542950433e-06, "epoch": 2.0363613084949748, "percentage": 40.73, "elapsed_time": "1:24:10", "remaining_time": "2:02:30", "throughput": 19822.27, "total_tokens": 100111936} +{"current_steps": 31815, "total_steps": 78105, "loss": 0.1592, "lr": 3.694282787804311e-06, "epoch": 2.0366813904359518, "percentage": 40.73, "elapsed_time": "1:24:11", "remaining_time": "2:02:29", "throughput": 19822.72, "total_tokens": 100127296} +{"current_steps": 31820, "total_steps": 78105, "loss": 0.256, "lr": 3.693791973021928e-06, "epoch": 2.0370014723769283, "percentage": 40.74, "elapsed_time": "1:24:11", "remaining_time": "2:02:28", "throughput": 19823.15, "total_tokens": 100142464} +{"current_steps": 31825, "total_steps": 78105, "loss": 0.2572, "lr": 3.693301098627791e-06, "epoch": 2.0373215543179053, "percentage": 40.75, "elapsed_time": "1:24:12", "remaining_time": "2:02:27", "throughput": 19823.59, "total_tokens": 100157952} +{"current_steps": 31830, "total_steps": 78105, "loss": 0.1787, "lr": 3.692810164646414e-06, "epoch": 2.0376416362588823, "percentage": 40.75, "elapsed_time": "1:24:13", "remaining_time": "2:02:26", "throughput": 19824.02, "total_tokens": 100173312} +{"current_steps": 31835, "total_steps": 78105, "loss": 0.2623, "lr": 3.6923191711023097e-06, "epoch": 2.0379617181998593, "percentage": 40.76, "elapsed_time": "1:24:14", "remaining_time": "2:02:25", "throughput": 19823.1, "total_tokens": 100188352} +{"current_steps": 31840, "total_steps": 78105, "loss": 0.2514, "lr": 3.6918281180199977e-06, "epoch": 2.0382818001408363, "percentage": 40.77, "elapsed_time": "1:24:14", "remaining_time": "2:02:24", "throughput": 19823.54, "total_tokens": 100204032} +{"current_steps": 31845, "total_steps": 78105, "loss": 0.1957, "lr": 3.6913370054239967e-06, "epoch": 2.038601882081813, "percentage": 40.77, "elapsed_time": "1:24:15", "remaining_time": "2:02:23", "throughput": 19823.96, "total_tokens": 100219008} +{"current_steps": 31850, "total_steps": 78105, "loss": 0.2899, "lr": 3.690845833338831e-06, "epoch": 2.03892196402279, "percentage": 40.78, "elapsed_time": "1:24:16", "remaining_time": "2:02:22", "throughput": 19824.35, "total_tokens": 100233856} +{"current_steps": 31855, "total_steps": 78105, "loss": 0.2437, "lr": 3.6903546017890275e-06, "epoch": 2.039242045963767, "percentage": 40.78, "elapsed_time": "1:24:16", "remaining_time": "2:02:21", "throughput": 19824.7, "total_tokens": 100248256} +{"current_steps": 31860, "total_steps": 78105, "loss": 0.1458, "lr": 3.6898633107991143e-06, "epoch": 2.039562127904744, "percentage": 40.79, "elapsed_time": "1:24:17", "remaining_time": "2:02:20", "throughput": 19825.12, "total_tokens": 100263616} +{"current_steps": 31865, "total_steps": 78105, "loss": 0.1635, "lr": 3.689371960393627e-06, "epoch": 2.0398822098457203, "percentage": 40.8, "elapsed_time": "1:24:18", "remaining_time": "2:02:19", "throughput": 19825.6, "total_tokens": 100279744} +{"current_steps": 31870, "total_steps": 78105, "loss": 0.1918, "lr": 3.688880550597098e-06, "epoch": 2.0402022917866973, "percentage": 40.8, "elapsed_time": "1:24:18", "remaining_time": "2:02:18", "throughput": 19826.07, "total_tokens": 100295552} +{"current_steps": 31875, "total_steps": 78105, "loss": 0.2051, "lr": 3.6883890814340678e-06, "epoch": 2.0405223737276743, "percentage": 40.81, "elapsed_time": "1:24:19", "remaining_time": "2:02:17", "throughput": 19826.49, "total_tokens": 100310912} +{"current_steps": 31880, "total_steps": 78105, "loss": 0.2058, "lr": 3.687897552929076e-06, "epoch": 2.0408424556686513, "percentage": 40.82, "elapsed_time": "1:24:20", "remaining_time": "2:02:17", "throughput": 19826.91, "total_tokens": 100326464} +{"current_steps": 31885, "total_steps": 78105, "loss": 0.2004, "lr": 3.687405965106669e-06, "epoch": 2.041162537609628, "percentage": 40.82, "elapsed_time": "1:24:20", "remaining_time": "2:02:16", "throughput": 19827.3, "total_tokens": 100341568} +{"current_steps": 31890, "total_steps": 78105, "loss": 0.1228, "lr": 3.6869143179913923e-06, "epoch": 2.041482619550605, "percentage": 40.83, "elapsed_time": "1:24:21", "remaining_time": "2:02:15", "throughput": 19827.8, "total_tokens": 100357696} +{"current_steps": 31895, "total_steps": 78105, "loss": 0.2019, "lr": 3.6864226116077977e-06, "epoch": 2.041802701491582, "percentage": 40.84, "elapsed_time": "1:24:22", "remaining_time": "2:02:14", "throughput": 19828.25, "total_tokens": 100373248} +{"current_steps": 31900, "total_steps": 78105, "loss": 0.1433, "lr": 3.685930845980438e-06, "epoch": 2.042122783432559, "percentage": 40.84, "elapsed_time": "1:24:22", "remaining_time": "2:02:13", "throughput": 19828.7, "total_tokens": 100388672} +{"current_steps": 31905, "total_steps": 78105, "loss": 0.2607, "lr": 3.685439021133868e-06, "epoch": 2.042442865373536, "percentage": 40.85, "elapsed_time": "1:24:23", "remaining_time": "2:02:12", "throughput": 19829.25, "total_tokens": 100405376} +{"current_steps": 31910, "total_steps": 78105, "loss": 0.248, "lr": 3.6849471370926483e-06, "epoch": 2.0427629473145124, "percentage": 40.86, "elapsed_time": "1:24:24", "remaining_time": "2:02:11", "throughput": 19829.71, "total_tokens": 100421376} +{"current_steps": 31915, "total_steps": 78105, "loss": 0.1678, "lr": 3.68445519388134e-06, "epoch": 2.0430830292554893, "percentage": 40.86, "elapsed_time": "1:24:24", "remaining_time": "2:02:10", "throughput": 19830.08, "total_tokens": 100436096} +{"current_steps": 31920, "total_steps": 78105, "loss": 0.2534, "lr": 3.68396319152451e-06, "epoch": 2.0434031111964663, "percentage": 40.87, "elapsed_time": "1:24:25", "remaining_time": "2:02:09", "throughput": 19830.45, "total_tokens": 100450752} +{"current_steps": 31925, "total_steps": 78105, "loss": 0.302, "lr": 3.6834711300467254e-06, "epoch": 2.0437231931374433, "percentage": 40.87, "elapsed_time": "1:24:26", "remaining_time": "2:02:08", "throughput": 19830.99, "total_tokens": 100467392} +{"current_steps": 31930, "total_steps": 78105, "loss": 0.1205, "lr": 3.682979009472557e-06, "epoch": 2.04404327507842, "percentage": 40.88, "elapsed_time": "1:24:26", "remaining_time": "2:02:07", "throughput": 19831.48, "total_tokens": 100483712} +{"current_steps": 31935, "total_steps": 78105, "loss": 0.269, "lr": 3.6824868298265782e-06, "epoch": 2.044363357019397, "percentage": 40.89, "elapsed_time": "1:24:27", "remaining_time": "2:02:06", "throughput": 19831.97, "total_tokens": 100499648} +{"current_steps": 31940, "total_steps": 78105, "loss": 0.2446, "lr": 3.6819945911333666e-06, "epoch": 2.044683438960374, "percentage": 40.89, "elapsed_time": "1:24:28", "remaining_time": "2:02:05", "throughput": 19832.4, "total_tokens": 100515008} +{"current_steps": 31945, "total_steps": 78105, "loss": 0.1921, "lr": 3.681502293417502e-06, "epoch": 2.045003520901351, "percentage": 40.9, "elapsed_time": "1:24:28", "remaining_time": "2:02:04", "throughput": 19832.78, "total_tokens": 100529664} +{"current_steps": 31950, "total_steps": 78105, "loss": 0.2747, "lr": 3.681009936703567e-06, "epoch": 2.045323602842328, "percentage": 40.91, "elapsed_time": "1:24:29", "remaining_time": "2:02:03", "throughput": 19833.21, "total_tokens": 100545088} +{"current_steps": 31955, "total_steps": 78105, "loss": 0.209, "lr": 3.680517521016148e-06, "epoch": 2.0456436847833044, "percentage": 40.91, "elapsed_time": "1:24:30", "remaining_time": "2:02:02", "throughput": 19833.64, "total_tokens": 100560576} +{"current_steps": 31960, "total_steps": 78105, "loss": 0.2114, "lr": 3.680025046379833e-06, "epoch": 2.0459637667242814, "percentage": 40.92, "elapsed_time": "1:24:30", "remaining_time": "2:02:01", "throughput": 19834.13, "total_tokens": 100576640} +{"current_steps": 31965, "total_steps": 78105, "loss": 0.2232, "lr": 3.679532512819213e-06, "epoch": 2.0462838486652584, "percentage": 40.93, "elapsed_time": "1:24:31", "remaining_time": "2:02:00", "throughput": 19834.5, "total_tokens": 100591360} +{"current_steps": 31970, "total_steps": 78105, "loss": 0.1649, "lr": 3.6790399203588834e-06, "epoch": 2.0466039306062354, "percentage": 40.93, "elapsed_time": "1:24:32", "remaining_time": "2:01:59", "throughput": 19834.93, "total_tokens": 100606912} +{"current_steps": 31975, "total_steps": 78105, "loss": 0.2078, "lr": 3.6785472690234415e-06, "epoch": 2.046924012547212, "percentage": 40.94, "elapsed_time": "1:24:32", "remaining_time": "2:01:58", "throughput": 19835.45, "total_tokens": 100623552} +{"current_steps": 31980, "total_steps": 78105, "loss": 0.2281, "lr": 3.6780545588374883e-06, "epoch": 2.047244094488189, "percentage": 40.94, "elapsed_time": "1:24:33", "remaining_time": "2:01:57", "throughput": 19835.86, "total_tokens": 100638720} +{"current_steps": 31985, "total_steps": 78105, "loss": 0.2779, "lr": 3.6775617898256267e-06, "epoch": 2.047564176429166, "percentage": 40.95, "elapsed_time": "1:24:34", "remaining_time": "2:01:56", "throughput": 19836.32, "total_tokens": 100654592} +{"current_steps": 31990, "total_steps": 78105, "loss": 0.2415, "lr": 3.6770689620124626e-06, "epoch": 2.047884258370143, "percentage": 40.96, "elapsed_time": "1:24:34", "remaining_time": "2:01:55", "throughput": 19836.76, "total_tokens": 100670336} +{"current_steps": 31995, "total_steps": 78105, "loss": 0.1341, "lr": 3.6765760754226053e-06, "epoch": 2.04820434031112, "percentage": 40.96, "elapsed_time": "1:24:35", "remaining_time": "2:01:54", "throughput": 19837.18, "total_tokens": 100685696} +{"current_steps": 32000, "total_steps": 78105, "loss": 0.1803, "lr": 3.6760831300806687e-06, "epoch": 2.0485244222520964, "percentage": 40.97, "elapsed_time": "1:24:36", "remaining_time": "2:01:53", "throughput": 19837.53, "total_tokens": 100700352} +{"current_steps": 32005, "total_steps": 78105, "loss": 0.2749, "lr": 3.675590126011266e-06, "epoch": 2.0488445041930734, "percentage": 40.98, "elapsed_time": "1:24:36", "remaining_time": "2:01:52", "throughput": 19838.04, "total_tokens": 100716544} +{"current_steps": 32010, "total_steps": 78105, "loss": 0.2017, "lr": 3.6750970632390156e-06, "epoch": 2.0491645861340504, "percentage": 40.98, "elapsed_time": "1:24:37", "remaining_time": "2:01:51", "throughput": 19838.48, "total_tokens": 100732096} +{"current_steps": 32015, "total_steps": 78105, "loss": 0.2598, "lr": 3.6746039417885387e-06, "epoch": 2.0494846680750274, "percentage": 40.99, "elapsed_time": "1:24:38", "remaining_time": "2:01:50", "throughput": 19838.96, "total_tokens": 100747968} +{"current_steps": 32020, "total_steps": 78105, "loss": 0.1603, "lr": 3.674110761684459e-06, "epoch": 2.049804750016004, "percentage": 41.0, "elapsed_time": "1:24:38", "remaining_time": "2:01:49", "throughput": 19839.41, "total_tokens": 100763648} +{"current_steps": 32025, "total_steps": 78105, "loss": 0.2008, "lr": 3.6736175229514036e-06, "epoch": 2.050124831956981, "percentage": 41.0, "elapsed_time": "1:24:39", "remaining_time": "2:01:49", "throughput": 19840.2, "total_tokens": 100783488} +{"current_steps": 32030, "total_steps": 78105, "loss": 0.2693, "lr": 3.6731242256140033e-06, "epoch": 2.050444913897958, "percentage": 41.01, "elapsed_time": "1:24:40", "remaining_time": "2:01:48", "throughput": 19840.7, "total_tokens": 100799552} +{"current_steps": 32035, "total_steps": 78105, "loss": 0.273, "lr": 3.672630869696888e-06, "epoch": 2.050764995838935, "percentage": 41.02, "elapsed_time": "1:24:41", "remaining_time": "2:01:47", "throughput": 19841.23, "total_tokens": 100816192} +{"current_steps": 32040, "total_steps": 78105, "loss": 0.1789, "lr": 3.6721374552246962e-06, "epoch": 2.0510850777799114, "percentage": 41.02, "elapsed_time": "1:24:41", "remaining_time": "2:01:46", "throughput": 19841.68, "total_tokens": 100831680} +{"current_steps": 32045, "total_steps": 78105, "loss": 0.2565, "lr": 3.6716439822220656e-06, "epoch": 2.0514051597208884, "percentage": 41.03, "elapsed_time": "1:24:42", "remaining_time": "2:01:45", "throughput": 19842.04, "total_tokens": 100846400} +{"current_steps": 32050, "total_steps": 78105, "loss": 0.1367, "lr": 3.671150450713637e-06, "epoch": 2.0517252416618654, "percentage": 41.03, "elapsed_time": "1:24:43", "remaining_time": "2:01:44", "throughput": 19842.49, "total_tokens": 100862400} +{"current_steps": 32055, "total_steps": 78105, "loss": 0.2691, "lr": 3.670656860724055e-06, "epoch": 2.0520453236028424, "percentage": 41.04, "elapsed_time": "1:24:43", "remaining_time": "2:01:43", "throughput": 19842.88, "total_tokens": 100877312} +{"current_steps": 32060, "total_steps": 78105, "loss": 0.1851, "lr": 3.6701632122779674e-06, "epoch": 2.0523654055438194, "percentage": 41.05, "elapsed_time": "1:24:44", "remaining_time": "2:01:42", "throughput": 19843.29, "total_tokens": 100892416} +{"current_steps": 32065, "total_steps": 78105, "loss": 0.1442, "lr": 3.669669505400024e-06, "epoch": 2.052685487484796, "percentage": 41.05, "elapsed_time": "1:24:45", "remaining_time": "2:01:41", "throughput": 19843.73, "total_tokens": 100908032} +{"current_steps": 32070, "total_steps": 78105, "loss": 0.2362, "lr": 3.6691757401148785e-06, "epoch": 2.053005569425773, "percentage": 41.06, "elapsed_time": "1:24:45", "remaining_time": "2:01:40", "throughput": 19844.24, "total_tokens": 100924352} +{"current_steps": 32075, "total_steps": 78105, "loss": 0.1598, "lr": 3.668681916447186e-06, "epoch": 2.05332565136675, "percentage": 41.07, "elapsed_time": "1:24:46", "remaining_time": "2:01:39", "throughput": 19844.72, "total_tokens": 100940032} +{"current_steps": 32080, "total_steps": 78105, "loss": 0.2386, "lr": 3.668188034421606e-06, "epoch": 2.053645733307727, "percentage": 41.07, "elapsed_time": "1:24:47", "remaining_time": "2:01:38", "throughput": 19845.29, "total_tokens": 100957376} +{"current_steps": 32085, "total_steps": 78105, "loss": 0.2209, "lr": 3.6676940940628007e-06, "epoch": 2.0539658152487035, "percentage": 41.08, "elapsed_time": "1:24:47", "remaining_time": "2:01:37", "throughput": 19845.68, "total_tokens": 100972544} +{"current_steps": 32090, "total_steps": 78105, "loss": 0.1616, "lr": 3.6672000953954346e-06, "epoch": 2.0542858971896805, "percentage": 41.09, "elapsed_time": "1:24:48", "remaining_time": "2:01:36", "throughput": 19846.15, "total_tokens": 100988224} +{"current_steps": 32095, "total_steps": 78105, "loss": 0.1884, "lr": 3.666706038444175e-06, "epoch": 2.0546059791306575, "percentage": 41.09, "elapsed_time": "1:24:49", "remaining_time": "2:01:35", "throughput": 19846.6, "total_tokens": 101003968} +{"current_steps": 32100, "total_steps": 78105, "loss": 0.3679, "lr": 3.6662119232336933e-06, "epoch": 2.0549260610716344, "percentage": 41.1, "elapsed_time": "1:24:49", "remaining_time": "2:01:34", "throughput": 19846.94, "total_tokens": 101018368} +{"current_steps": 32105, "total_steps": 78105, "loss": 0.1868, "lr": 3.6657177497886633e-06, "epoch": 2.0552461430126114, "percentage": 41.1, "elapsed_time": "1:24:50", "remaining_time": "2:01:33", "throughput": 19847.3, "total_tokens": 101032896} +{"current_steps": 32110, "total_steps": 78105, "loss": 0.1823, "lr": 3.6652235181337605e-06, "epoch": 2.055566224953588, "percentage": 41.11, "elapsed_time": "1:24:51", "remaining_time": "2:01:32", "throughput": 19847.85, "total_tokens": 101049792} +{"current_steps": 32115, "total_steps": 78105, "loss": 0.1792, "lr": 3.664729228293664e-06, "epoch": 2.055886306894565, "percentage": 41.12, "elapsed_time": "1:24:51", "remaining_time": "2:01:31", "throughput": 19848.31, "total_tokens": 101065600} +{"current_steps": 32120, "total_steps": 78105, "loss": 0.1921, "lr": 3.6642348802930576e-06, "epoch": 2.056206388835542, "percentage": 41.12, "elapsed_time": "1:24:52", "remaining_time": "2:01:30", "throughput": 19848.74, "total_tokens": 101081088} +{"current_steps": 32125, "total_steps": 78105, "loss": 0.209, "lr": 3.663740474156625e-06, "epoch": 2.056526470776519, "percentage": 41.13, "elapsed_time": "1:24:53", "remaining_time": "2:01:29", "throughput": 19849.15, "total_tokens": 101096704} +{"current_steps": 32130, "total_steps": 78105, "loss": 0.1708, "lr": 3.6632460099090555e-06, "epoch": 2.0568465527174955, "percentage": 41.14, "elapsed_time": "1:24:53", "remaining_time": "2:01:28", "throughput": 19849.58, "total_tokens": 101111936} +{"current_steps": 32135, "total_steps": 78105, "loss": 0.3031, "lr": 3.6627514875750386e-06, "epoch": 2.0571666346584725, "percentage": 41.14, "elapsed_time": "1:24:54", "remaining_time": "2:01:27", "throughput": 19850.07, "total_tokens": 101127744} +{"current_steps": 32140, "total_steps": 78105, "loss": 0.1409, "lr": 3.662256907179269e-06, "epoch": 2.0574867165994495, "percentage": 41.15, "elapsed_time": "1:24:55", "remaining_time": "2:01:26", "throughput": 19850.59, "total_tokens": 101143872} +{"current_steps": 32145, "total_steps": 78105, "loss": 0.2047, "lr": 3.6617622687464446e-06, "epoch": 2.0578067985404265, "percentage": 41.16, "elapsed_time": "1:24:55", "remaining_time": "2:01:26", "throughput": 19851.07, "total_tokens": 101160000} +{"current_steps": 32150, "total_steps": 78105, "loss": 0.3062, "lr": 3.6612675723012626e-06, "epoch": 2.058126880481403, "percentage": 41.16, "elapsed_time": "1:24:56", "remaining_time": "2:01:25", "throughput": 19851.55, "total_tokens": 101176256} +{"current_steps": 32155, "total_steps": 78105, "loss": 0.1899, "lr": 3.660772817868427e-06, "epoch": 2.05844696242238, "percentage": 41.17, "elapsed_time": "1:24:57", "remaining_time": "2:01:24", "throughput": 19852.02, "total_tokens": 101192192} +{"current_steps": 32160, "total_steps": 78105, "loss": 0.173, "lr": 3.660278005472643e-06, "epoch": 2.058767044363357, "percentage": 41.18, "elapsed_time": "1:24:58", "remaining_time": "2:01:23", "throughput": 19852.49, "total_tokens": 101208512} +{"current_steps": 32165, "total_steps": 78105, "loss": 0.1725, "lr": 3.6597831351386203e-06, "epoch": 2.059087126304334, "percentage": 41.18, "elapsed_time": "1:24:58", "remaining_time": "2:01:22", "throughput": 19852.96, "total_tokens": 101224384} +{"current_steps": 32170, "total_steps": 78105, "loss": 0.1945, "lr": 3.6592882068910673e-06, "epoch": 2.059407208245311, "percentage": 41.19, "elapsed_time": "1:24:59", "remaining_time": "2:01:21", "throughput": 19853.36, "total_tokens": 101239360} +{"current_steps": 32175, "total_steps": 78105, "loss": 0.1768, "lr": 3.6587932207547004e-06, "epoch": 2.0597272901862875, "percentage": 41.19, "elapsed_time": "1:25:00", "remaining_time": "2:01:20", "throughput": 19853.78, "total_tokens": 101254528} +{"current_steps": 32180, "total_steps": 78105, "loss": 0.2859, "lr": 3.658298176754237e-06, "epoch": 2.0600473721272645, "percentage": 41.2, "elapsed_time": "1:25:00", "remaining_time": "2:01:19", "throughput": 19854.22, "total_tokens": 101270144} +{"current_steps": 32185, "total_steps": 78105, "loss": 0.2116, "lr": 3.657803074914395e-06, "epoch": 2.0603674540682415, "percentage": 41.21, "elapsed_time": "1:25:01", "remaining_time": "2:01:18", "throughput": 19854.63, "total_tokens": 101284992} +{"current_steps": 32190, "total_steps": 78105, "loss": 0.2396, "lr": 3.6573079152598982e-06, "epoch": 2.0606875360092185, "percentage": 41.21, "elapsed_time": "1:25:02", "remaining_time": "2:01:17", "throughput": 19855.21, "total_tokens": 101302784} +{"current_steps": 32195, "total_steps": 78105, "loss": 0.2634, "lr": 3.656812697815472e-06, "epoch": 2.061007617950195, "percentage": 41.22, "elapsed_time": "1:25:02", "remaining_time": "2:01:16", "throughput": 19855.73, "total_tokens": 101319552} +{"current_steps": 32200, "total_steps": 78105, "loss": 0.1979, "lr": 3.656317422605846e-06, "epoch": 2.061327699891172, "percentage": 41.23, "elapsed_time": "1:25:03", "remaining_time": "2:01:15", "throughput": 19856.12, "total_tokens": 101334272} +{"current_steps": 32205, "total_steps": 78105, "loss": 0.1825, "lr": 3.655822089655751e-06, "epoch": 2.061647781832149, "percentage": 41.23, "elapsed_time": "1:25:04", "remaining_time": "2:01:14", "throughput": 19856.59, "total_tokens": 101350208} +{"current_steps": 32210, "total_steps": 78105, "loss": 0.1583, "lr": 3.6553266989899207e-06, "epoch": 2.061967863773126, "percentage": 41.24, "elapsed_time": "1:25:04", "remaining_time": "2:01:13", "throughput": 19856.95, "total_tokens": 101364928} +{"current_steps": 32215, "total_steps": 78105, "loss": 0.1841, "lr": 3.6548312506330934e-06, "epoch": 2.062287945714103, "percentage": 41.25, "elapsed_time": "1:25:05", "remaining_time": "2:01:12", "throughput": 19857.54, "total_tokens": 101382208} +{"current_steps": 32220, "total_steps": 78105, "loss": 0.2441, "lr": 3.6543357446100085e-06, "epoch": 2.0626080276550796, "percentage": 41.25, "elapsed_time": "1:25:06", "remaining_time": "2:01:11", "throughput": 19858.02, "total_tokens": 101398400} +{"current_steps": 32225, "total_steps": 78105, "loss": 0.2626, "lr": 3.653840180945409e-06, "epoch": 2.0629281095960565, "percentage": 41.26, "elapsed_time": "1:25:06", "remaining_time": "2:01:10", "throughput": 19858.55, "total_tokens": 101414720} +{"current_steps": 32230, "total_steps": 78105, "loss": 0.1868, "lr": 3.6533445596640414e-06, "epoch": 2.0632481915370335, "percentage": 41.26, "elapsed_time": "1:25:07", "remaining_time": "2:01:09", "throughput": 19858.93, "total_tokens": 101429440} +{"current_steps": 32235, "total_steps": 78105, "loss": 0.2584, "lr": 3.6528488807906543e-06, "epoch": 2.0635682734780105, "percentage": 41.27, "elapsed_time": "1:25:08", "remaining_time": "2:01:08", "throughput": 19859.34, "total_tokens": 101444608} +{"current_steps": 32240, "total_steps": 78105, "loss": 0.2329, "lr": 3.652353144349999e-06, "epoch": 2.063888355418987, "percentage": 41.28, "elapsed_time": "1:25:08", "remaining_time": "2:01:07", "throughput": 19859.84, "total_tokens": 101460800} +{"current_steps": 32245, "total_steps": 78105, "loss": 0.2626, "lr": 3.65185735036683e-06, "epoch": 2.064208437359964, "percentage": 41.28, "elapsed_time": "1:25:09", "remaining_time": "2:01:07", "throughput": 19860.38, "total_tokens": 101477888} +{"current_steps": 32250, "total_steps": 78105, "loss": 0.1905, "lr": 3.651361498865904e-06, "epoch": 2.064528519300941, "percentage": 41.29, "elapsed_time": "1:25:10", "remaining_time": "2:01:06", "throughput": 19860.82, "total_tokens": 101493632} +{"current_steps": 32255, "total_steps": 78105, "loss": 0.1524, "lr": 3.6508655898719824e-06, "epoch": 2.064848601241918, "percentage": 41.3, "elapsed_time": "1:25:10", "remaining_time": "2:01:05", "throughput": 19861.28, "total_tokens": 101509760} +{"current_steps": 32260, "total_steps": 78105, "loss": 0.2483, "lr": 3.650369623409828e-06, "epoch": 2.065168683182895, "percentage": 41.3, "elapsed_time": "1:25:11", "remaining_time": "2:01:04", "throughput": 19861.87, "total_tokens": 101527104} +{"current_steps": 32265, "total_steps": 78105, "loss": 0.1585, "lr": 3.6498735995042066e-06, "epoch": 2.0654887651238716, "percentage": 41.31, "elapsed_time": "1:25:12", "remaining_time": "2:01:03", "throughput": 19862.31, "total_tokens": 101543040} +{"current_steps": 32270, "total_steps": 78105, "loss": 0.2534, "lr": 3.6493775181798866e-06, "epoch": 2.0658088470648486, "percentage": 41.32, "elapsed_time": "1:25:13", "remaining_time": "2:01:02", "throughput": 19862.92, "total_tokens": 101561024} +{"current_steps": 32275, "total_steps": 78105, "loss": 0.2004, "lr": 3.6488813794616412e-06, "epoch": 2.0661289290058256, "percentage": 41.32, "elapsed_time": "1:25:13", "remaining_time": "2:01:01", "throughput": 19863.35, "total_tokens": 101576448} +{"current_steps": 32280, "total_steps": 78105, "loss": 0.2001, "lr": 3.6483851833742435e-06, "epoch": 2.0664490109468026, "percentage": 41.33, "elapsed_time": "1:25:14", "remaining_time": "2:01:00", "throughput": 19863.79, "total_tokens": 101592000} +{"current_steps": 32285, "total_steps": 78105, "loss": 0.2706, "lr": 3.6478889299424713e-06, "epoch": 2.066769092887779, "percentage": 41.34, "elapsed_time": "1:25:15", "remaining_time": "2:00:59", "throughput": 19864.29, "total_tokens": 101608192} +{"current_steps": 32290, "total_steps": 78105, "loss": 0.2167, "lr": 3.647392619191106e-06, "epoch": 2.067089174828756, "percentage": 41.34, "elapsed_time": "1:25:15", "remaining_time": "2:00:58", "throughput": 19864.78, "total_tokens": 101624320} +{"current_steps": 32295, "total_steps": 78105, "loss": 0.2007, "lr": 3.6468962511449293e-06, "epoch": 2.067409256769733, "percentage": 41.35, "elapsed_time": "1:25:16", "remaining_time": "2:00:57", "throughput": 19865.24, "total_tokens": 101640128} +{"current_steps": 32300, "total_steps": 78105, "loss": 0.2372, "lr": 3.646399825828727e-06, "epoch": 2.06772933871071, "percentage": 41.35, "elapsed_time": "1:25:17", "remaining_time": "2:00:56", "throughput": 19865.66, "total_tokens": 101655808} +{"current_steps": 32305, "total_steps": 78105, "loss": 0.2353, "lr": 3.6459033432672898e-06, "epoch": 2.0680494206516866, "percentage": 41.36, "elapsed_time": "1:25:17", "remaining_time": "2:00:55", "throughput": 19866.0, "total_tokens": 101670080} +{"current_steps": 32310, "total_steps": 78105, "loss": 0.2576, "lr": 3.645406803485407e-06, "epoch": 2.0683695025926636, "percentage": 41.37, "elapsed_time": "1:25:18", "remaining_time": "2:00:54", "throughput": 19866.43, "total_tokens": 101685184} +{"current_steps": 32315, "total_steps": 78105, "loss": 0.2226, "lr": 3.6449102065078767e-06, "epoch": 2.0686895845336406, "percentage": 41.37, "elapsed_time": "1:25:19", "remaining_time": "2:00:53", "throughput": 19866.9, "total_tokens": 101701184} +{"current_steps": 32320, "total_steps": 78105, "loss": 0.3052, "lr": 3.6444135523594933e-06, "epoch": 2.0690096664746176, "percentage": 41.38, "elapsed_time": "1:25:19", "remaining_time": "2:00:52", "throughput": 19867.47, "total_tokens": 101718592} +{"current_steps": 32325, "total_steps": 78105, "loss": 0.2081, "lr": 3.6439168410650576e-06, "epoch": 2.0693297484155946, "percentage": 41.39, "elapsed_time": "1:25:20", "remaining_time": "2:00:51", "throughput": 19867.89, "total_tokens": 101733632} +{"current_steps": 32330, "total_steps": 78105, "loss": 0.2453, "lr": 3.643420072649374e-06, "epoch": 2.069649830356571, "percentage": 41.39, "elapsed_time": "1:25:21", "remaining_time": "2:00:50", "throughput": 19868.42, "total_tokens": 101750400} +{"current_steps": 32335, "total_steps": 78105, "loss": 0.1596, "lr": 3.6429232471372478e-06, "epoch": 2.069969912297548, "percentage": 41.4, "elapsed_time": "1:25:21", "remaining_time": "2:00:49", "throughput": 19868.87, "total_tokens": 101765888} +{"current_steps": 32340, "total_steps": 78105, "loss": 0.1673, "lr": 3.6424263645534873e-06, "epoch": 2.070289994238525, "percentage": 41.41, "elapsed_time": "1:25:22", "remaining_time": "2:00:49", "throughput": 19869.29, "total_tokens": 101781376} +{"current_steps": 32345, "total_steps": 78105, "loss": 0.1531, "lr": 3.6419294249229053e-06, "epoch": 2.070610076179502, "percentage": 41.41, "elapsed_time": "1:25:23", "remaining_time": "2:00:48", "throughput": 19869.79, "total_tokens": 101797760} +{"current_steps": 32350, "total_steps": 78105, "loss": 0.1952, "lr": 3.641432428270316e-06, "epoch": 2.0709301581204786, "percentage": 41.42, "elapsed_time": "1:25:23", "remaining_time": "2:00:47", "throughput": 19870.25, "total_tokens": 101813760} +{"current_steps": 32355, "total_steps": 78105, "loss": 0.3066, "lr": 3.6409353746205367e-06, "epoch": 2.0712502400614556, "percentage": 41.43, "elapsed_time": "1:25:24", "remaining_time": "2:00:46", "throughput": 19870.76, "total_tokens": 101830144} +{"current_steps": 32360, "total_steps": 78105, "loss": 0.4124, "lr": 3.6404382639983883e-06, "epoch": 2.0715703220024326, "percentage": 41.43, "elapsed_time": "1:25:25", "remaining_time": "2:00:45", "throughput": 19871.31, "total_tokens": 101846848} +{"current_steps": 32365, "total_steps": 78105, "loss": 0.2141, "lr": 3.639941096428692e-06, "epoch": 2.0718904039434096, "percentage": 41.44, "elapsed_time": "1:25:25", "remaining_time": "2:00:44", "throughput": 19871.74, "total_tokens": 101861888} +{"current_steps": 32370, "total_steps": 78105, "loss": 0.2322, "lr": 3.6394438719362758e-06, "epoch": 2.0722104858843866, "percentage": 41.44, "elapsed_time": "1:25:26", "remaining_time": "2:00:43", "throughput": 19872.2, "total_tokens": 101877696} +{"current_steps": 32375, "total_steps": 78105, "loss": 0.1493, "lr": 3.638946590545968e-06, "epoch": 2.072530567825363, "percentage": 41.45, "elapsed_time": "1:25:27", "remaining_time": "2:00:42", "throughput": 19872.57, "total_tokens": 101892544} +{"current_steps": 32380, "total_steps": 78105, "loss": 0.2721, "lr": 3.6384492522826e-06, "epoch": 2.07285064976634, "percentage": 41.46, "elapsed_time": "1:25:27", "remaining_time": "2:00:41", "throughput": 19873.02, "total_tokens": 101908416} +{"current_steps": 32385, "total_steps": 78105, "loss": 0.2413, "lr": 3.6379518571710053e-06, "epoch": 2.073170731707317, "percentage": 41.46, "elapsed_time": "1:25:28", "remaining_time": "2:00:40", "throughput": 19873.44, "total_tokens": 101923712} +{"current_steps": 32390, "total_steps": 78105, "loss": 0.1763, "lr": 3.637454405236023e-06, "epoch": 2.073490813648294, "percentage": 41.47, "elapsed_time": "1:25:29", "remaining_time": "2:00:39", "throughput": 19873.93, "total_tokens": 101939968} +{"current_steps": 32395, "total_steps": 78105, "loss": 0.1755, "lr": 3.636956896502493e-06, "epoch": 2.0738108955892707, "percentage": 41.48, "elapsed_time": "1:25:29", "remaining_time": "2:00:38", "throughput": 19874.29, "total_tokens": 101954816} +{"current_steps": 32400, "total_steps": 78105, "loss": 0.2567, "lr": 3.636459330995257e-06, "epoch": 2.0741309775302477, "percentage": 41.48, "elapsed_time": "1:25:30", "remaining_time": "2:00:37", "throughput": 19874.69, "total_tokens": 101970048} +{"current_steps": 32405, "total_steps": 78105, "loss": 0.2265, "lr": 3.635961708739162e-06, "epoch": 2.0744510594712247, "percentage": 41.49, "elapsed_time": "1:25:31", "remaining_time": "2:00:36", "throughput": 19875.09, "total_tokens": 101985344} +{"current_steps": 32410, "total_steps": 78105, "loss": 0.2665, "lr": 3.635464029759056e-06, "epoch": 2.0747711414122016, "percentage": 41.5, "elapsed_time": "1:25:32", "remaining_time": "2:00:35", "throughput": 19875.49, "total_tokens": 102001088} +{"current_steps": 32415, "total_steps": 78105, "loss": 0.3026, "lr": 3.634966294079791e-06, "epoch": 2.075091223353178, "percentage": 41.5, "elapsed_time": "1:25:32", "remaining_time": "2:00:34", "throughput": 19876.0, "total_tokens": 102018048} +{"current_steps": 32420, "total_steps": 78105, "loss": 0.2463, "lr": 3.6344685017262216e-06, "epoch": 2.075411305294155, "percentage": 41.51, "elapsed_time": "1:25:33", "remaining_time": "2:00:33", "throughput": 19876.43, "total_tokens": 102033344} +{"current_steps": 32425, "total_steps": 78105, "loss": 0.249, "lr": 3.633970652723203e-06, "epoch": 2.075731387235132, "percentage": 41.51, "elapsed_time": "1:25:34", "remaining_time": "2:00:32", "throughput": 19876.8, "total_tokens": 102048448} +{"current_steps": 32430, "total_steps": 78105, "loss": 0.2439, "lr": 3.6334727470955978e-06, "epoch": 2.076051469176109, "percentage": 41.52, "elapsed_time": "1:25:34", "remaining_time": "2:00:31", "throughput": 19877.37, "total_tokens": 102065792} +{"current_steps": 32435, "total_steps": 78105, "loss": 0.1378, "lr": 3.6329747848682674e-06, "epoch": 2.076371551117086, "percentage": 41.53, "elapsed_time": "1:25:35", "remaining_time": "2:00:30", "throughput": 19877.74, "total_tokens": 102080704} +{"current_steps": 32440, "total_steps": 78105, "loss": 0.3276, "lr": 3.632476766066078e-06, "epoch": 2.0766916330580627, "percentage": 41.53, "elapsed_time": "1:25:36", "remaining_time": "2:00:29", "throughput": 19878.19, "total_tokens": 102096512} +{"current_steps": 32445, "total_steps": 78105, "loss": 0.2661, "lr": 3.6319786907138973e-06, "epoch": 2.0770117149990397, "percentage": 41.54, "elapsed_time": "1:25:36", "remaining_time": "2:00:29", "throughput": 19878.62, "total_tokens": 102112320} +{"current_steps": 32450, "total_steps": 78105, "loss": 0.1498, "lr": 3.631480558836597e-06, "epoch": 2.0773317969400167, "percentage": 41.55, "elapsed_time": "1:25:37", "remaining_time": "2:00:28", "throughput": 19879.05, "total_tokens": 102127936} +{"current_steps": 32455, "total_steps": 78105, "loss": 0.2003, "lr": 3.6309823704590506e-06, "epoch": 2.0776518788809937, "percentage": 41.55, "elapsed_time": "1:25:38", "remaining_time": "2:00:27", "throughput": 19879.47, "total_tokens": 102143552} +{"current_steps": 32460, "total_steps": 78105, "loss": 0.2259, "lr": 3.6304841256061373e-06, "epoch": 2.07797196082197, "percentage": 41.56, "elapsed_time": "1:25:38", "remaining_time": "2:00:26", "throughput": 19879.91, "total_tokens": 102159104} +{"current_steps": 32465, "total_steps": 78105, "loss": 0.2576, "lr": 3.629985824302734e-06, "epoch": 2.078292042762947, "percentage": 41.57, "elapsed_time": "1:25:39", "remaining_time": "2:00:25", "throughput": 19880.37, "total_tokens": 102174848} +{"current_steps": 32470, "total_steps": 78105, "loss": 0.1689, "lr": 3.6294874665737257e-06, "epoch": 2.078612124703924, "percentage": 41.57, "elapsed_time": "1:25:40", "remaining_time": "2:00:24", "throughput": 19880.9, "total_tokens": 102191360} +{"current_steps": 32475, "total_steps": 78105, "loss": 0.216, "lr": 3.6289890524439956e-06, "epoch": 2.078932206644901, "percentage": 41.58, "elapsed_time": "1:25:40", "remaining_time": "2:00:23", "throughput": 19881.27, "total_tokens": 102206016} +{"current_steps": 32480, "total_steps": 78105, "loss": 0.2358, "lr": 3.628490581938433e-06, "epoch": 2.079252288585878, "percentage": 41.59, "elapsed_time": "1:25:41", "remaining_time": "2:00:22", "throughput": 19881.68, "total_tokens": 102221312} +{"current_steps": 32485, "total_steps": 78105, "loss": 0.1419, "lr": 3.627992055081929e-06, "epoch": 2.0795723705268547, "percentage": 41.59, "elapsed_time": "1:25:42", "remaining_time": "2:00:21", "throughput": 19882.1, "total_tokens": 102236928} +{"current_steps": 32490, "total_steps": 78105, "loss": 0.2382, "lr": 3.6274934718993776e-06, "epoch": 2.0798924524678317, "percentage": 41.6, "elapsed_time": "1:25:42", "remaining_time": "2:00:20", "throughput": 19882.5, "total_tokens": 102252160} +{"current_steps": 32495, "total_steps": 78105, "loss": 0.2546, "lr": 3.626994832415675e-06, "epoch": 2.0802125344088087, "percentage": 41.6, "elapsed_time": "1:25:43", "remaining_time": "2:00:19", "throughput": 19882.93, "total_tokens": 102267328} +{"current_steps": 32500, "total_steps": 78105, "loss": 0.2345, "lr": 3.62649613665572e-06, "epoch": 2.0805326163497857, "percentage": 41.61, "elapsed_time": "1:25:44", "remaining_time": "2:00:18", "throughput": 19883.38, "total_tokens": 102283136} +{"current_steps": 32505, "total_steps": 78105, "loss": 0.2405, "lr": 3.6259973846444162e-06, "epoch": 2.0808526982907622, "percentage": 41.62, "elapsed_time": "1:25:44", "remaining_time": "2:00:17", "throughput": 19883.79, "total_tokens": 102298560} +{"current_steps": 32510, "total_steps": 78105, "loss": 0.171, "lr": 3.6254985764066688e-06, "epoch": 2.0811727802317392, "percentage": 41.62, "elapsed_time": "1:25:45", "remaining_time": "2:00:16", "throughput": 19884.2, "total_tokens": 102313728} +{"current_steps": 32515, "total_steps": 78105, "loss": 0.2459, "lr": 3.6249997119673835e-06, "epoch": 2.0814928621727162, "percentage": 41.63, "elapsed_time": "1:25:46", "remaining_time": "2:00:15", "throughput": 19884.59, "total_tokens": 102329088} +{"current_steps": 32520, "total_steps": 78105, "loss": 0.2137, "lr": 3.6245007913514736e-06, "epoch": 2.081812944113693, "percentage": 41.64, "elapsed_time": "1:25:46", "remaining_time": "2:00:14", "throughput": 19885.03, "total_tokens": 102344768} +{"current_steps": 32525, "total_steps": 78105, "loss": 0.3477, "lr": 3.6240018145838513e-06, "epoch": 2.08213302605467, "percentage": 41.64, "elapsed_time": "1:25:47", "remaining_time": "2:00:13", "throughput": 19885.47, "total_tokens": 102360256} +{"current_steps": 32530, "total_steps": 78105, "loss": 0.1191, "lr": 3.623502781689433e-06, "epoch": 2.0824531079956468, "percentage": 41.65, "elapsed_time": "1:25:48", "remaining_time": "2:00:12", "throughput": 19885.96, "total_tokens": 102376640} +{"current_steps": 32535, "total_steps": 78105, "loss": 0.1723, "lr": 3.623003692693138e-06, "epoch": 2.0827731899366237, "percentage": 41.66, "elapsed_time": "1:25:48", "remaining_time": "2:00:11", "throughput": 19886.36, "total_tokens": 102391744} +{"current_steps": 32540, "total_steps": 78105, "loss": 0.2269, "lr": 3.6225045476198876e-06, "epoch": 2.0830932718776007, "percentage": 41.66, "elapsed_time": "1:25:49", "remaining_time": "2:00:10", "throughput": 19886.82, "total_tokens": 102407936} +{"current_steps": 32545, "total_steps": 78105, "loss": 0.1461, "lr": 3.6220053464946074e-06, "epoch": 2.0834133538185777, "percentage": 41.67, "elapsed_time": "1:25:50", "remaining_time": "2:00:09", "throughput": 19887.27, "total_tokens": 102423744} +{"current_steps": 32550, "total_steps": 78105, "loss": 0.2772, "lr": 3.621506089342225e-06, "epoch": 2.0837334357595543, "percentage": 41.67, "elapsed_time": "1:25:50", "remaining_time": "2:00:08", "throughput": 19887.79, "total_tokens": 102440384} +{"current_steps": 32555, "total_steps": 78105, "loss": 0.2889, "lr": 3.62100677618767e-06, "epoch": 2.0840535177005313, "percentage": 41.68, "elapsed_time": "1:25:51", "remaining_time": "2:00:07", "throughput": 19888.14, "total_tokens": 102454784} +{"current_steps": 32560, "total_steps": 78105, "loss": 0.1941, "lr": 3.6205074070558747e-06, "epoch": 2.0843735996415083, "percentage": 41.69, "elapsed_time": "1:25:52", "remaining_time": "2:00:06", "throughput": 19888.63, "total_tokens": 102471232} +{"current_steps": 32565, "total_steps": 78105, "loss": 0.1543, "lr": 3.620007981971777e-06, "epoch": 2.0846936815824852, "percentage": 41.69, "elapsed_time": "1:25:52", "remaining_time": "2:00:05", "throughput": 19888.99, "total_tokens": 102485824} +{"current_steps": 32570, "total_steps": 78105, "loss": 0.1995, "lr": 3.6195085009603136e-06, "epoch": 2.085013763523462, "percentage": 41.7, "elapsed_time": "1:25:53", "remaining_time": "2:00:05", "throughput": 19889.34, "total_tokens": 102500928} +{"current_steps": 32575, "total_steps": 78105, "loss": 0.2033, "lr": 3.6190089640464283e-06, "epoch": 2.085333845464439, "percentage": 41.71, "elapsed_time": "1:25:54", "remaining_time": "2:00:04", "throughput": 19889.77, "total_tokens": 102516608} +{"current_steps": 32580, "total_steps": 78105, "loss": 0.2386, "lr": 3.6185093712550636e-06, "epoch": 2.0856539274054158, "percentage": 41.71, "elapsed_time": "1:25:54", "remaining_time": "2:00:03", "throughput": 19890.3, "total_tokens": 102533440} +{"current_steps": 32585, "total_steps": 78105, "loss": 0.1653, "lr": 3.6180097226111667e-06, "epoch": 2.0859740093463928, "percentage": 41.72, "elapsed_time": "1:25:55", "remaining_time": "2:00:02", "throughput": 19890.86, "total_tokens": 102550464} +{"current_steps": 32590, "total_steps": 78105, "loss": 0.1673, "lr": 3.6175100181396882e-06, "epoch": 2.0862940912873698, "percentage": 41.73, "elapsed_time": "1:25:56", "remaining_time": "2:00:01", "throughput": 19891.33, "total_tokens": 102566848} +{"current_steps": 32595, "total_steps": 78105, "loss": 0.1834, "lr": 3.6170102578655797e-06, "epoch": 2.0866141732283463, "percentage": 41.73, "elapsed_time": "1:25:57", "remaining_time": "2:00:00", "throughput": 19891.87, "total_tokens": 102583744} +{"current_steps": 32600, "total_steps": 78105, "loss": 0.167, "lr": 3.6165104418137976e-06, "epoch": 2.0869342551693233, "percentage": 41.74, "elapsed_time": "1:25:57", "remaining_time": "1:59:59", "throughput": 19892.26, "total_tokens": 102598848} +{"current_steps": 32605, "total_steps": 78105, "loss": 0.1778, "lr": 3.6160105700092994e-06, "epoch": 2.0872543371103003, "percentage": 41.75, "elapsed_time": "1:25:58", "remaining_time": "1:59:58", "throughput": 19892.7, "total_tokens": 102614464} +{"current_steps": 32610, "total_steps": 78105, "loss": 0.316, "lr": 3.615510642477047e-06, "epoch": 2.0875744190512773, "percentage": 41.75, "elapsed_time": "1:25:59", "remaining_time": "1:59:57", "throughput": 19893.08, "total_tokens": 102629632} +{"current_steps": 32615, "total_steps": 78105, "loss": 0.205, "lr": 3.615010659242003e-06, "epoch": 2.087894500992254, "percentage": 41.76, "elapsed_time": "1:25:59", "remaining_time": "1:59:56", "throughput": 19893.59, "total_tokens": 102646528} +{"current_steps": 32620, "total_steps": 78105, "loss": 0.2306, "lr": 3.614510620329135e-06, "epoch": 2.088214582933231, "percentage": 41.76, "elapsed_time": "1:26:00", "remaining_time": "1:59:55", "throughput": 19894.01, "total_tokens": 102662144} +{"current_steps": 32625, "total_steps": 78105, "loss": 0.254, "lr": 3.6140105257634117e-06, "epoch": 2.088534664874208, "percentage": 41.77, "elapsed_time": "1:26:01", "remaining_time": "1:59:54", "throughput": 19894.45, "total_tokens": 102677760} +{"current_steps": 32630, "total_steps": 78105, "loss": 0.2597, "lr": 3.613510375569805e-06, "epoch": 2.088854746815185, "percentage": 41.78, "elapsed_time": "1:26:01", "remaining_time": "1:59:53", "throughput": 19895.02, "total_tokens": 102695232} +{"current_steps": 32635, "total_steps": 78105, "loss": 0.1964, "lr": 3.6130101697732906e-06, "epoch": 2.089174828756162, "percentage": 41.78, "elapsed_time": "1:26:02", "remaining_time": "1:59:52", "throughput": 19895.43, "total_tokens": 102710464} +{"current_steps": 32640, "total_steps": 78105, "loss": 0.262, "lr": 3.612509908398846e-06, "epoch": 2.0894949106971383, "percentage": 41.79, "elapsed_time": "1:26:03", "remaining_time": "1:59:51", "throughput": 19895.88, "total_tokens": 102726528} +{"current_steps": 32645, "total_steps": 78105, "loss": 0.228, "lr": 3.6120095914714514e-06, "epoch": 2.0898149926381153, "percentage": 41.8, "elapsed_time": "1:26:03", "remaining_time": "1:59:50", "throughput": 19896.33, "total_tokens": 102742208} +{"current_steps": 32650, "total_steps": 78105, "loss": 0.2535, "lr": 3.61150921901609e-06, "epoch": 2.0901350745790923, "percentage": 41.8, "elapsed_time": "1:26:04", "remaining_time": "1:59:50", "throughput": 19896.79, "total_tokens": 102758144} +{"current_steps": 32655, "total_steps": 78105, "loss": 0.337, "lr": 3.6110087910577474e-06, "epoch": 2.0904551565200693, "percentage": 41.81, "elapsed_time": "1:26:05", "remaining_time": "1:59:49", "throughput": 19897.19, "total_tokens": 102773440} +{"current_steps": 32660, "total_steps": 78105, "loss": 0.3579, "lr": 3.610508307621413e-06, "epoch": 2.090775238461046, "percentage": 41.82, "elapsed_time": "1:26:05", "remaining_time": "1:59:48", "throughput": 19897.68, "total_tokens": 102789824} +{"current_steps": 32665, "total_steps": 78105, "loss": 0.3105, "lr": 3.6100077687320783e-06, "epoch": 2.091095320402023, "percentage": 41.82, "elapsed_time": "1:26:06", "remaining_time": "1:59:47", "throughput": 19898.33, "total_tokens": 102808000} +{"current_steps": 32670, "total_steps": 78105, "loss": 0.2647, "lr": 3.6095071744147377e-06, "epoch": 2.091415402343, "percentage": 41.83, "elapsed_time": "1:26:07", "remaining_time": "1:59:46", "throughput": 19898.79, "total_tokens": 102824064} +{"current_steps": 32675, "total_steps": 78105, "loss": 0.2256, "lr": 3.6090065246943875e-06, "epoch": 2.091735484283977, "percentage": 41.83, "elapsed_time": "1:26:08", "remaining_time": "1:59:45", "throughput": 19899.35, "total_tokens": 102841408} +{"current_steps": 32680, "total_steps": 78105, "loss": 0.2509, "lr": 3.608505819596028e-06, "epoch": 2.0920555662249534, "percentage": 41.84, "elapsed_time": "1:26:08", "remaining_time": "1:59:44", "throughput": 19899.76, "total_tokens": 102856768} +{"current_steps": 32685, "total_steps": 78105, "loss": 0.2774, "lr": 3.608005059144662e-06, "epoch": 2.0923756481659304, "percentage": 41.85, "elapsed_time": "1:26:09", "remaining_time": "1:59:43", "throughput": 19900.22, "total_tokens": 102872640} +{"current_steps": 32690, "total_steps": 78105, "loss": 0.1289, "lr": 3.6075042433652953e-06, "epoch": 2.0926957301069073, "percentage": 41.85, "elapsed_time": "1:26:10", "remaining_time": "1:59:42", "throughput": 19900.68, "total_tokens": 102888640} +{"current_steps": 32695, "total_steps": 78105, "loss": 0.2728, "lr": 3.607003372282935e-06, "epoch": 2.0930158120478843, "percentage": 41.86, "elapsed_time": "1:26:10", "remaining_time": "1:59:41", "throughput": 19900.97, "total_tokens": 102902400} +{"current_steps": 32700, "total_steps": 78105, "loss": 0.2277, "lr": 3.606502445922593e-06, "epoch": 2.0933358939888613, "percentage": 41.87, "elapsed_time": "1:26:11", "remaining_time": "1:59:40", "throughput": 19901.3, "total_tokens": 102916992} +{"current_steps": 32705, "total_steps": 78105, "loss": 0.1895, "lr": 3.6060014643092815e-06, "epoch": 2.093655975929838, "percentage": 41.87, "elapsed_time": "1:26:12", "remaining_time": "1:59:39", "throughput": 19901.7, "total_tokens": 102932352} +{"current_steps": 32710, "total_steps": 78105, "loss": 0.1889, "lr": 3.6055004274680183e-06, "epoch": 2.093976057870815, "percentage": 41.88, "elapsed_time": "1:26:12", "remaining_time": "1:59:38", "throughput": 19902.09, "total_tokens": 102947392} +{"current_steps": 32715, "total_steps": 78105, "loss": 0.2022, "lr": 3.6049993354238223e-06, "epoch": 2.094296139811792, "percentage": 41.89, "elapsed_time": "1:26:13", "remaining_time": "1:59:37", "throughput": 19902.59, "total_tokens": 102963520} +{"current_steps": 32720, "total_steps": 78105, "loss": 0.2681, "lr": 3.604498188201715e-06, "epoch": 2.094616221752769, "percentage": 41.89, "elapsed_time": "1:26:14", "remaining_time": "1:59:36", "throughput": 19902.96, "total_tokens": 102978432} +{"current_steps": 32725, "total_steps": 78105, "loss": 0.2638, "lr": 3.6039969858267208e-06, "epoch": 2.0949363036937454, "percentage": 41.9, "elapsed_time": "1:26:14", "remaining_time": "1:59:35", "throughput": 19903.38, "total_tokens": 102993856} +{"current_steps": 32730, "total_steps": 78105, "loss": 0.174, "lr": 3.6034957283238686e-06, "epoch": 2.0952563856347224, "percentage": 41.91, "elapsed_time": "1:26:15", "remaining_time": "1:59:34", "throughput": 19903.77, "total_tokens": 103008768} +{"current_steps": 32735, "total_steps": 78105, "loss": 0.2459, "lr": 3.6029944157181868e-06, "epoch": 2.0955764675756994, "percentage": 41.91, "elapsed_time": "1:26:16", "remaining_time": "1:59:33", "throughput": 19904.27, "total_tokens": 103025408} +{"current_steps": 32740, "total_steps": 78105, "loss": 0.1956, "lr": 3.6024930480347097e-06, "epoch": 2.0958965495166764, "percentage": 41.92, "elapsed_time": "1:26:16", "remaining_time": "1:59:32", "throughput": 19904.68, "total_tokens": 103040640} +{"current_steps": 32745, "total_steps": 78105, "loss": 0.1956, "lr": 3.6019916252984715e-06, "epoch": 2.0962166314576534, "percentage": 41.92, "elapsed_time": "1:26:17", "remaining_time": "1:59:31", "throughput": 19905.12, "total_tokens": 103056192} +{"current_steps": 32750, "total_steps": 78105, "loss": 0.1755, "lr": 3.6014901475345126e-06, "epoch": 2.09653671339863, "percentage": 41.93, "elapsed_time": "1:26:18", "remaining_time": "1:59:30", "throughput": 19905.51, "total_tokens": 103071552} +{"current_steps": 32755, "total_steps": 78105, "loss": 0.1854, "lr": 3.600988614767873e-06, "epoch": 2.096856795339607, "percentage": 41.94, "elapsed_time": "1:26:18", "remaining_time": "1:59:30", "throughput": 19905.98, "total_tokens": 103087936} +{"current_steps": 32760, "total_steps": 78105, "loss": 0.2523, "lr": 3.6004870270235968e-06, "epoch": 2.097176877280584, "percentage": 41.94, "elapsed_time": "1:26:19", "remaining_time": "1:59:29", "throughput": 19906.43, "total_tokens": 103104384} +{"current_steps": 32765, "total_steps": 78105, "loss": 0.1531, "lr": 3.599985384326731e-06, "epoch": 2.097496959221561, "percentage": 41.95, "elapsed_time": "1:26:20", "remaining_time": "1:59:28", "throughput": 19906.87, "total_tokens": 103119936} +{"current_steps": 32770, "total_steps": 78105, "loss": 0.1785, "lr": 3.599483686702324e-06, "epoch": 2.0978170411625374, "percentage": 41.96, "elapsed_time": "1:26:20", "remaining_time": "1:59:27", "throughput": 19907.31, "total_tokens": 103135744} +{"current_steps": 32775, "total_steps": 78105, "loss": 0.2357, "lr": 3.5989819341754295e-06, "epoch": 2.0981371231035144, "percentage": 41.96, "elapsed_time": "1:26:21", "remaining_time": "1:59:26", "throughput": 19907.84, "total_tokens": 103152704} +{"current_steps": 32780, "total_steps": 78105, "loss": 0.2085, "lr": 3.598480126771101e-06, "epoch": 2.0984572050444914, "percentage": 41.97, "elapsed_time": "1:26:22", "remaining_time": "1:59:25", "throughput": 19908.19, "total_tokens": 103167360} +{"current_steps": 32785, "total_steps": 78105, "loss": 0.2125, "lr": 3.597978264514397e-06, "epoch": 2.0987772869854684, "percentage": 41.98, "elapsed_time": "1:26:22", "remaining_time": "1:59:24", "throughput": 19908.57, "total_tokens": 103182336} +{"current_steps": 32790, "total_steps": 78105, "loss": 0.2493, "lr": 3.5974763474303776e-06, "epoch": 2.0990973689264454, "percentage": 41.98, "elapsed_time": "1:26:23", "remaining_time": "1:59:23", "throughput": 19909.01, "total_tokens": 103198336} +{"current_steps": 32795, "total_steps": 78105, "loss": 0.1539, "lr": 3.5969743755441055e-06, "epoch": 2.099417450867422, "percentage": 41.99, "elapsed_time": "1:26:24", "remaining_time": "1:59:22", "throughput": 19909.49, "total_tokens": 103214272} +{"current_steps": 32800, "total_steps": 78105, "loss": 0.1957, "lr": 3.5964723488806473e-06, "epoch": 2.099737532808399, "percentage": 41.99, "elapsed_time": "1:26:24", "remaining_time": "1:59:21", "throughput": 19910.21, "total_tokens": 103233792} +{"current_steps": 32805, "total_steps": 78105, "loss": 0.2853, "lr": 3.5959702674650717e-06, "epoch": 2.100057614749376, "percentage": 42.0, "elapsed_time": "1:26:25", "remaining_time": "1:59:20", "throughput": 19910.72, "total_tokens": 103250368} +{"current_steps": 32810, "total_steps": 78105, "loss": 0.2134, "lr": 3.595468131322449e-06, "epoch": 2.100377696690353, "percentage": 42.01, "elapsed_time": "1:26:26", "remaining_time": "1:59:19", "throughput": 19911.23, "total_tokens": 103266560} +{"current_steps": 32815, "total_steps": 78105, "loss": 0.1418, "lr": 3.594965940477855e-06, "epoch": 2.1006977786313294, "percentage": 42.01, "elapsed_time": "1:26:27", "remaining_time": "1:59:18", "throughput": 19911.79, "total_tokens": 103283456} +{"current_steps": 32820, "total_steps": 78105, "loss": 0.2374, "lr": 3.5944636949563644e-06, "epoch": 2.1010178605723064, "percentage": 42.02, "elapsed_time": "1:26:27", "remaining_time": "1:59:18", "throughput": 19912.36, "total_tokens": 103300864} +{"current_steps": 32825, "total_steps": 78105, "loss": 0.1968, "lr": 3.5939613947830576e-06, "epoch": 2.1013379425132834, "percentage": 42.03, "elapsed_time": "1:26:28", "remaining_time": "1:59:17", "throughput": 19912.73, "total_tokens": 103315648} +{"current_steps": 32830, "total_steps": 78105, "loss": 0.2184, "lr": 3.593459039983017e-06, "epoch": 2.1016580244542604, "percentage": 42.03, "elapsed_time": "1:26:29", "remaining_time": "1:59:16", "throughput": 19913.12, "total_tokens": 103331136} +{"current_steps": 32835, "total_steps": 78105, "loss": 0.2968, "lr": 3.5929566305813286e-06, "epoch": 2.101978106395237, "percentage": 42.04, "elapsed_time": "1:26:29", "remaining_time": "1:59:15", "throughput": 19913.5, "total_tokens": 103346304} +{"current_steps": 32840, "total_steps": 78105, "loss": 0.1904, "lr": 3.592454166603079e-06, "epoch": 2.102298188336214, "percentage": 42.05, "elapsed_time": "1:26:30", "remaining_time": "1:59:14", "throughput": 19913.88, "total_tokens": 103361600} +{"current_steps": 32845, "total_steps": 78105, "loss": 0.1331, "lr": 3.5919516480733573e-06, "epoch": 2.102618270277191, "percentage": 42.05, "elapsed_time": "1:26:31", "remaining_time": "1:59:13", "throughput": 19914.26, "total_tokens": 103376768} +{"current_steps": 32850, "total_steps": 78105, "loss": 0.2789, "lr": 3.5914490750172594e-06, "epoch": 2.102938352218168, "percentage": 42.06, "elapsed_time": "1:26:31", "remaining_time": "1:59:12", "throughput": 19914.7, "total_tokens": 103392384} +{"current_steps": 32855, "total_steps": 78105, "loss": 0.2005, "lr": 3.5909464474598786e-06, "epoch": 2.103258434159145, "percentage": 42.07, "elapsed_time": "1:26:32", "remaining_time": "1:59:11", "throughput": 19915.13, "total_tokens": 103408320} +{"current_steps": 32860, "total_steps": 78105, "loss": 0.3303, "lr": 3.5904437654263157e-06, "epoch": 2.1035785161001215, "percentage": 42.07, "elapsed_time": "1:26:33", "remaining_time": "1:59:10", "throughput": 19915.52, "total_tokens": 103423360} +{"current_steps": 32865, "total_steps": 78105, "loss": 0.2978, "lr": 3.589941028941671e-06, "epoch": 2.1038985980410985, "percentage": 42.08, "elapsed_time": "1:26:33", "remaining_time": "1:59:09", "throughput": 19915.96, "total_tokens": 103439424} +{"current_steps": 32870, "total_steps": 78105, "loss": 0.1799, "lr": 3.5894382380310483e-06, "epoch": 2.1042186799820755, "percentage": 42.08, "elapsed_time": "1:26:34", "remaining_time": "1:59:08", "throughput": 19916.35, "total_tokens": 103454848} +{"current_steps": 32875, "total_steps": 78105, "loss": 0.2245, "lr": 3.5889353927195546e-06, "epoch": 2.1045387619230524, "percentage": 42.09, "elapsed_time": "1:26:35", "remaining_time": "1:59:07", "throughput": 19916.81, "total_tokens": 103470656} +{"current_steps": 32880, "total_steps": 78105, "loss": 0.2617, "lr": 3.5884324930323e-06, "epoch": 2.104858843864029, "percentage": 42.1, "elapsed_time": "1:26:35", "remaining_time": "1:59:06", "throughput": 19917.25, "total_tokens": 103486464} +{"current_steps": 32885, "total_steps": 78105, "loss": 0.2073, "lr": 3.5879295389943957e-06, "epoch": 2.105178925805006, "percentage": 42.1, "elapsed_time": "1:26:36", "remaining_time": "1:59:05", "throughput": 19917.64, "total_tokens": 103501312} +{"current_steps": 32890, "total_steps": 78105, "loss": 0.2908, "lr": 3.5874265306309574e-06, "epoch": 2.105499007745983, "percentage": 42.11, "elapsed_time": "1:26:37", "remaining_time": "1:59:04", "throughput": 19918.1, "total_tokens": 103517376} +{"current_steps": 32895, "total_steps": 78105, "loss": 0.1572, "lr": 3.5869234679671024e-06, "epoch": 2.10581908968696, "percentage": 42.12, "elapsed_time": "1:26:37", "remaining_time": "1:59:03", "throughput": 19918.55, "total_tokens": 103532928} +{"current_steps": 32900, "total_steps": 78105, "loss": 0.2208, "lr": 3.5864203510279503e-06, "epoch": 2.106139171627937, "percentage": 42.12, "elapsed_time": "1:26:38", "remaining_time": "1:59:02", "throughput": 19918.94, "total_tokens": 103548096} +{"current_steps": 32905, "total_steps": 78105, "loss": 0.1649, "lr": 3.5859171798386257e-06, "epoch": 2.1064592535689135, "percentage": 42.13, "elapsed_time": "1:26:39", "remaining_time": "1:59:01", "throughput": 19919.48, "total_tokens": 103564736} +{"current_steps": 32910, "total_steps": 78105, "loss": 0.1681, "lr": 3.585413954424252e-06, "epoch": 2.1067793355098905, "percentage": 42.14, "elapsed_time": "1:26:39", "remaining_time": "1:59:00", "throughput": 19919.92, "total_tokens": 103580800} +{"current_steps": 32915, "total_steps": 78105, "loss": 0.2048, "lr": 3.5849106748099606e-06, "epoch": 2.1070994174508675, "percentage": 42.14, "elapsed_time": "1:26:40", "remaining_time": "1:58:59", "throughput": 19920.33, "total_tokens": 103596480} +{"current_steps": 32920, "total_steps": 78105, "loss": 0.1806, "lr": 3.58440734102088e-06, "epoch": 2.1074194993918445, "percentage": 42.15, "elapsed_time": "1:26:41", "remaining_time": "1:58:59", "throughput": 19920.77, "total_tokens": 103612416} +{"current_steps": 32925, "total_steps": 78105, "loss": 0.1449, "lr": 3.583903953082146e-06, "epoch": 2.107739581332821, "percentage": 42.15, "elapsed_time": "1:26:41", "remaining_time": "1:58:58", "throughput": 19921.13, "total_tokens": 103627648} +{"current_steps": 32930, "total_steps": 78105, "loss": 0.2911, "lr": 3.5834005110188953e-06, "epoch": 2.108059663273798, "percentage": 42.16, "elapsed_time": "1:26:42", "remaining_time": "1:58:57", "throughput": 19921.67, "total_tokens": 103644736} +{"current_steps": 32935, "total_steps": 78105, "loss": 0.3524, "lr": 3.582897014856265e-06, "epoch": 2.108379745214775, "percentage": 42.17, "elapsed_time": "1:26:43", "remaining_time": "1:58:56", "throughput": 19922.21, "total_tokens": 103661760} +{"current_steps": 32940, "total_steps": 78105, "loss": 0.3149, "lr": 3.5823934646193986e-06, "epoch": 2.108699827155752, "percentage": 42.17, "elapsed_time": "1:26:43", "remaining_time": "1:58:55", "throughput": 19922.62, "total_tokens": 103677184} +{"current_steps": 32945, "total_steps": 78105, "loss": 0.1633, "lr": 3.5818898603334413e-06, "epoch": 2.1090199090967285, "percentage": 42.18, "elapsed_time": "1:26:44", "remaining_time": "1:58:54", "throughput": 19923.1, "total_tokens": 103693632} +{"current_steps": 32950, "total_steps": 78105, "loss": 0.12, "lr": 3.581386202023539e-06, "epoch": 2.1093399910377055, "percentage": 42.19, "elapsed_time": "1:26:45", "remaining_time": "1:58:53", "throughput": 19923.47, "total_tokens": 103708800} +{"current_steps": 32955, "total_steps": 78105, "loss": 0.1988, "lr": 3.580882489714843e-06, "epoch": 2.1096600729786825, "percentage": 42.19, "elapsed_time": "1:26:46", "remaining_time": "1:58:52", "throughput": 19924.1, "total_tokens": 103726656} +{"current_steps": 32960, "total_steps": 78105, "loss": 0.2024, "lr": 3.5803787234325057e-06, "epoch": 2.1099801549196595, "percentage": 42.2, "elapsed_time": "1:26:46", "remaining_time": "1:58:51", "throughput": 19924.56, "total_tokens": 103742848} +{"current_steps": 32965, "total_steps": 78105, "loss": 0.2063, "lr": 3.579874903201681e-06, "epoch": 2.1103002368606365, "percentage": 42.21, "elapsed_time": "1:26:47", "remaining_time": "1:58:50", "throughput": 19924.96, "total_tokens": 103758464} +{"current_steps": 32970, "total_steps": 78105, "loss": 0.2494, "lr": 3.57937102904753e-06, "epoch": 2.110620318801613, "percentage": 42.21, "elapsed_time": "1:26:48", "remaining_time": "1:58:49", "throughput": 19925.56, "total_tokens": 103776128} +{"current_steps": 32975, "total_steps": 78105, "loss": 0.26, "lr": 3.578867100995211e-06, "epoch": 2.11094040074259, "percentage": 42.22, "elapsed_time": "1:26:48", "remaining_time": "1:58:48", "throughput": 19925.96, "total_tokens": 103791168} +{"current_steps": 32980, "total_steps": 78105, "loss": 0.1586, "lr": 3.578363119069889e-06, "epoch": 2.111260482683567, "percentage": 42.23, "elapsed_time": "1:26:49", "remaining_time": "1:58:47", "throughput": 19926.45, "total_tokens": 103807808} +{"current_steps": 32985, "total_steps": 78105, "loss": 0.2688, "lr": 3.5778590832967305e-06, "epoch": 2.111580564624544, "percentage": 42.23, "elapsed_time": "1:26:50", "remaining_time": "1:58:47", "throughput": 19926.84, "total_tokens": 103823040} +{"current_steps": 32990, "total_steps": 78105, "loss": 0.2284, "lr": 3.577354993700903e-06, "epoch": 2.1119006465655206, "percentage": 42.24, "elapsed_time": "1:26:50", "remaining_time": "1:58:46", "throughput": 19927.17, "total_tokens": 103837632} +{"current_steps": 32995, "total_steps": 78105, "loss": 0.2555, "lr": 3.5768508503075795e-06, "epoch": 2.1122207285064976, "percentage": 42.24, "elapsed_time": "1:26:51", "remaining_time": "1:58:45", "throughput": 19927.59, "total_tokens": 103853440} +{"current_steps": 33000, "total_steps": 78105, "loss": 0.2679, "lr": 3.5763466531419328e-06, "epoch": 2.1125408104474745, "percentage": 42.25, "elapsed_time": "1:26:52", "remaining_time": "1:58:44", "throughput": 19928.01, "total_tokens": 103869120} +{"current_steps": 33005, "total_steps": 78105, "loss": 0.315, "lr": 3.5758424022291415e-06, "epoch": 2.1128608923884515, "percentage": 42.26, "elapsed_time": "1:26:52", "remaining_time": "1:58:43", "throughput": 19928.41, "total_tokens": 103884608} +{"current_steps": 33010, "total_steps": 78105, "loss": 0.1961, "lr": 3.575338097594384e-06, "epoch": 2.1131809743294285, "percentage": 42.26, "elapsed_time": "1:26:53", "remaining_time": "1:58:42", "throughput": 19928.92, "total_tokens": 103901056} +{"current_steps": 33015, "total_steps": 78105, "loss": 0.2865, "lr": 3.5748337392628434e-06, "epoch": 2.113501056270405, "percentage": 42.27, "elapsed_time": "1:26:54", "remaining_time": "1:58:41", "throughput": 19929.31, "total_tokens": 103916160} +{"current_steps": 33020, "total_steps": 78105, "loss": 0.1648, "lr": 3.5743293272597045e-06, "epoch": 2.113821138211382, "percentage": 42.28, "elapsed_time": "1:26:54", "remaining_time": "1:58:40", "throughput": 19929.71, "total_tokens": 103931328} +{"current_steps": 33025, "total_steps": 78105, "loss": 0.1693, "lr": 3.5738248616101545e-06, "epoch": 2.114141220152359, "percentage": 42.28, "elapsed_time": "1:26:55", "remaining_time": "1:58:39", "throughput": 19930.15, "total_tokens": 103947392} +{"current_steps": 33030, "total_steps": 78105, "loss": 0.1568, "lr": 3.573320342339384e-06, "epoch": 2.114461302093336, "percentage": 42.29, "elapsed_time": "1:26:56", "remaining_time": "1:58:38", "throughput": 19930.53, "total_tokens": 103962496} +{"current_steps": 33035, "total_steps": 78105, "loss": 0.1916, "lr": 3.5728157694725875e-06, "epoch": 2.1147813840343126, "percentage": 42.3, "elapsed_time": "1:26:56", "remaining_time": "1:58:37", "throughput": 19931.0, "total_tokens": 103978688} +{"current_steps": 33040, "total_steps": 78105, "loss": 0.194, "lr": 3.5723111430349587e-06, "epoch": 2.1151014659752896, "percentage": 42.3, "elapsed_time": "1:26:57", "remaining_time": "1:58:36", "throughput": 19931.49, "total_tokens": 103995392} +{"current_steps": 33045, "total_steps": 78105, "loss": 0.3004, "lr": 3.5718064630516975e-06, "epoch": 2.1154215479162666, "percentage": 42.31, "elapsed_time": "1:26:58", "remaining_time": "1:58:35", "throughput": 19931.96, "total_tokens": 104011648} +{"current_steps": 33050, "total_steps": 78105, "loss": 0.2046, "lr": 3.571301729548004e-06, "epoch": 2.1157416298572436, "percentage": 42.31, "elapsed_time": "1:26:59", "remaining_time": "1:58:34", "throughput": 19932.41, "total_tokens": 104027392} +{"current_steps": 33055, "total_steps": 78105, "loss": 0.1911, "lr": 3.570796942549082e-06, "epoch": 2.1160617117982206, "percentage": 42.32, "elapsed_time": "1:26:59", "remaining_time": "1:58:33", "throughput": 19932.84, "total_tokens": 104043008} +{"current_steps": 33060, "total_steps": 78105, "loss": 0.2238, "lr": 3.5702921020801385e-06, "epoch": 2.116381793739197, "percentage": 42.33, "elapsed_time": "1:27:00", "remaining_time": "1:58:32", "throughput": 19933.36, "total_tokens": 104059968} +{"current_steps": 33065, "total_steps": 78105, "loss": 0.2035, "lr": 3.5697872081663826e-06, "epoch": 2.116701875680174, "percentage": 42.33, "elapsed_time": "1:27:01", "remaining_time": "1:58:31", "throughput": 19933.76, "total_tokens": 104075456} +{"current_steps": 33070, "total_steps": 78105, "loss": 0.2817, "lr": 3.569282260833026e-06, "epoch": 2.117021957621151, "percentage": 42.34, "elapsed_time": "1:27:01", "remaining_time": "1:58:31", "throughput": 19934.2, "total_tokens": 104091264} +{"current_steps": 33075, "total_steps": 78105, "loss": 0.15, "lr": 3.568777260105283e-06, "epoch": 2.117342039562128, "percentage": 42.35, "elapsed_time": "1:27:02", "remaining_time": "1:58:30", "throughput": 19934.63, "total_tokens": 104107008} +{"current_steps": 33080, "total_steps": 78105, "loss": 0.2618, "lr": 3.5682722060083695e-06, "epoch": 2.1176621215031046, "percentage": 42.35, "elapsed_time": "1:27:03", "remaining_time": "1:58:29", "throughput": 19935.01, "total_tokens": 104122304} +{"current_steps": 33085, "total_steps": 78105, "loss": 0.1627, "lr": 3.5677670985675072e-06, "epoch": 2.1179822034440816, "percentage": 42.36, "elapsed_time": "1:27:03", "remaining_time": "1:58:28", "throughput": 19935.42, "total_tokens": 104137600} +{"current_steps": 33090, "total_steps": 78105, "loss": 0.3528, "lr": 3.5672619378079183e-06, "epoch": 2.1183022853850586, "percentage": 42.37, "elapsed_time": "1:27:04", "remaining_time": "1:58:27", "throughput": 19935.87, "total_tokens": 104153344} +{"current_steps": 33095, "total_steps": 78105, "loss": 0.2339, "lr": 3.566756723754827e-06, "epoch": 2.1186223673260356, "percentage": 42.37, "elapsed_time": "1:27:05", "remaining_time": "1:58:26", "throughput": 19936.17, "total_tokens": 104167680} +{"current_steps": 33100, "total_steps": 78105, "loss": 0.23, "lr": 3.5662514564334616e-06, "epoch": 2.118942449267012, "percentage": 42.38, "elapsed_time": "1:27:05", "remaining_time": "1:58:25", "throughput": 19936.52, "total_tokens": 104182464} +{"current_steps": 33105, "total_steps": 78105, "loss": 0.2296, "lr": 3.5657461358690526e-06, "epoch": 2.119262531207989, "percentage": 42.39, "elapsed_time": "1:27:06", "remaining_time": "1:58:24", "throughput": 19936.93, "total_tokens": 104197760} +{"current_steps": 33110, "total_steps": 78105, "loss": 0.2549, "lr": 3.5652407620868323e-06, "epoch": 2.119582613148966, "percentage": 42.39, "elapsed_time": "1:27:07", "remaining_time": "1:58:23", "throughput": 19937.48, "total_tokens": 104215168} +{"current_steps": 33115, "total_steps": 78105, "loss": 0.187, "lr": 3.5647353351120374e-06, "epoch": 2.119902695089943, "percentage": 42.4, "elapsed_time": "1:27:07", "remaining_time": "1:58:22", "throughput": 19937.91, "total_tokens": 104230720} +{"current_steps": 33120, "total_steps": 78105, "loss": 0.1922, "lr": 3.5642298549699054e-06, "epoch": 2.12022277703092, "percentage": 42.4, "elapsed_time": "1:27:08", "remaining_time": "1:58:21", "throughput": 19938.32, "total_tokens": 104246144} +{"current_steps": 33125, "total_steps": 78105, "loss": 0.2326, "lr": 3.563724321685678e-06, "epoch": 2.1205428589718966, "percentage": 42.41, "elapsed_time": "1:27:09", "remaining_time": "1:58:20", "throughput": 19938.75, "total_tokens": 104261568} +{"current_steps": 33130, "total_steps": 78105, "loss": 0.1748, "lr": 3.563218735284599e-06, "epoch": 2.1208629409128736, "percentage": 42.42, "elapsed_time": "1:27:09", "remaining_time": "1:58:19", "throughput": 19939.28, "total_tokens": 104278528} +{"current_steps": 33135, "total_steps": 78105, "loss": 0.1412, "lr": 3.5627130957919144e-06, "epoch": 2.1211830228538506, "percentage": 42.42, "elapsed_time": "1:27:10", "remaining_time": "1:58:18", "throughput": 19939.68, "total_tokens": 104293760} +{"current_steps": 33140, "total_steps": 78105, "loss": 0.2032, "lr": 3.562207403232873e-06, "epoch": 2.1215031047948276, "percentage": 42.43, "elapsed_time": "1:27:11", "remaining_time": "1:58:17", "throughput": 19940.06, "total_tokens": 104308928} +{"current_steps": 33145, "total_steps": 78105, "loss": 0.223, "lr": 3.561701657632726e-06, "epoch": 2.121823186735804, "percentage": 42.44, "elapsed_time": "1:27:12", "remaining_time": "1:58:17", "throughput": 19939.55, "total_tokens": 104324032} +{"current_steps": 33150, "total_steps": 78105, "loss": 0.3532, "lr": 3.5611958590167295e-06, "epoch": 2.122143268676781, "percentage": 42.44, "elapsed_time": "1:27:12", "remaining_time": "1:58:16", "throughput": 19939.88, "total_tokens": 104338816} +{"current_steps": 33155, "total_steps": 78105, "loss": 0.334, "lr": 3.560690007410139e-06, "epoch": 2.122463350617758, "percentage": 42.45, "elapsed_time": "1:27:13", "remaining_time": "1:58:15", "throughput": 19940.27, "total_tokens": 104353984} +{"current_steps": 33160, "total_steps": 78105, "loss": 0.2286, "lr": 3.560184102838215e-06, "epoch": 2.122783432558735, "percentage": 42.46, "elapsed_time": "1:27:13", "remaining_time": "1:58:14", "throughput": 19940.67, "total_tokens": 104369408} +{"current_steps": 33165, "total_steps": 78105, "loss": 0.2311, "lr": 3.5596781453262193e-06, "epoch": 2.123103514499712, "percentage": 42.46, "elapsed_time": "1:27:14", "remaining_time": "1:58:13", "throughput": 19941.08, "total_tokens": 104384832} +{"current_steps": 33170, "total_steps": 78105, "loss": 0.2808, "lr": 3.5591721348994157e-06, "epoch": 2.1234235964406887, "percentage": 42.47, "elapsed_time": "1:27:15", "remaining_time": "1:58:12", "throughput": 19941.44, "total_tokens": 104400000} +{"current_steps": 33175, "total_steps": 78105, "loss": 0.273, "lr": 3.5586660715830727e-06, "epoch": 2.1237436783816657, "percentage": 42.47, "elapsed_time": "1:27:16", "remaining_time": "1:58:11", "throughput": 19941.95, "total_tokens": 104416704} +{"current_steps": 33180, "total_steps": 78105, "loss": 0.1704, "lr": 3.5581599554024615e-06, "epoch": 2.1240637603226427, "percentage": 42.48, "elapsed_time": "1:27:16", "remaining_time": "1:58:10", "throughput": 19942.4, "total_tokens": 104432640} +{"current_steps": 33185, "total_steps": 78105, "loss": 0.2523, "lr": 3.557653786382853e-06, "epoch": 2.1243838422636196, "percentage": 42.49, "elapsed_time": "1:27:17", "remaining_time": "1:58:09", "throughput": 19943.04, "total_tokens": 104450880} +{"current_steps": 33190, "total_steps": 78105, "loss": 0.2722, "lr": 3.5571475645495244e-06, "epoch": 2.124703924204596, "percentage": 42.49, "elapsed_time": "1:27:18", "remaining_time": "1:58:08", "throughput": 19943.44, "total_tokens": 104466240} +{"current_steps": 33195, "total_steps": 78105, "loss": 0.2074, "lr": 3.556641289927752e-06, "epoch": 2.125024006145573, "percentage": 42.5, "elapsed_time": "1:27:18", "remaining_time": "1:58:07", "throughput": 19943.78, "total_tokens": 104481024} +{"current_steps": 33200, "total_steps": 78105, "loss": 0.1555, "lr": 3.556134962542818e-06, "epoch": 2.12534408808655, "percentage": 42.51, "elapsed_time": "1:27:19", "remaining_time": "1:58:06", "throughput": 19944.4, "total_tokens": 104498880} +{"current_steps": 33205, "total_steps": 78105, "loss": 0.2333, "lr": 3.5556285824200043e-06, "epoch": 2.125664170027527, "percentage": 42.51, "elapsed_time": "1:27:20", "remaining_time": "1:58:05", "throughput": 19944.85, "total_tokens": 104515008} +{"current_steps": 33210, "total_steps": 78105, "loss": 0.2938, "lr": 3.5551221495845987e-06, "epoch": 2.1259842519685037, "percentage": 42.52, "elapsed_time": "1:27:20", "remaining_time": "1:58:04", "throughput": 19945.26, "total_tokens": 104530368} +{"current_steps": 33215, "total_steps": 78105, "loss": 0.2368, "lr": 3.5546156640618888e-06, "epoch": 2.1263043339094807, "percentage": 42.53, "elapsed_time": "1:27:21", "remaining_time": "1:58:03", "throughput": 19945.76, "total_tokens": 104546944} +{"current_steps": 33220, "total_steps": 78105, "loss": 0.2845, "lr": 3.5541091258771652e-06, "epoch": 2.1266244158504577, "percentage": 42.53, "elapsed_time": "1:27:22", "remaining_time": "1:58:02", "throughput": 19946.09, "total_tokens": 104561344} +{"current_steps": 33225, "total_steps": 78105, "loss": 0.273, "lr": 3.5536025350557235e-06, "epoch": 2.1269444977914347, "percentage": 42.54, "elapsed_time": "1:27:22", "remaining_time": "1:58:01", "throughput": 19946.49, "total_tokens": 104576512} +{"current_steps": 33230, "total_steps": 78105, "loss": 0.2434, "lr": 3.5530958916228584e-06, "epoch": 2.1272645797324117, "percentage": 42.55, "elapsed_time": "1:27:23", "remaining_time": "1:58:01", "throughput": 19946.84, "total_tokens": 104591744} +{"current_steps": 33235, "total_steps": 78105, "loss": 0.1655, "lr": 3.5525891956038705e-06, "epoch": 2.1275846616733882, "percentage": 42.55, "elapsed_time": "1:27:24", "remaining_time": "1:58:00", "throughput": 19947.33, "total_tokens": 104608256} +{"current_steps": 33240, "total_steps": 78105, "loss": 0.1644, "lr": 3.5520824470240606e-06, "epoch": 2.127904743614365, "percentage": 42.56, "elapsed_time": "1:27:24", "remaining_time": "1:57:59", "throughput": 19947.89, "total_tokens": 104625536} +{"current_steps": 33245, "total_steps": 78105, "loss": 0.147, "lr": 3.5515756459087334e-06, "epoch": 2.128224825555342, "percentage": 42.56, "elapsed_time": "1:27:25", "remaining_time": "1:57:58", "throughput": 19948.19, "total_tokens": 104639680} +{"current_steps": 33250, "total_steps": 78105, "loss": 0.2699, "lr": 3.5510687922831966e-06, "epoch": 2.128544907496319, "percentage": 42.57, "elapsed_time": "1:27:26", "remaining_time": "1:57:57", "throughput": 19948.66, "total_tokens": 104656000} +{"current_steps": 33255, "total_steps": 78105, "loss": 0.1568, "lr": 3.5505618861727574e-06, "epoch": 2.1288649894372957, "percentage": 42.58, "elapsed_time": "1:27:26", "remaining_time": "1:57:56", "throughput": 19949.08, "total_tokens": 104671744} +{"current_steps": 33260, "total_steps": 78105, "loss": 0.1426, "lr": 3.5500549276027306e-06, "epoch": 2.1291850713782727, "percentage": 42.58, "elapsed_time": "1:27:27", "remaining_time": "1:57:55", "throughput": 19949.43, "total_tokens": 104686144} +{"current_steps": 33265, "total_steps": 78105, "loss": 0.1269, "lr": 3.5495479165984307e-06, "epoch": 2.1295051533192497, "percentage": 42.59, "elapsed_time": "1:27:28", "remaining_time": "1:57:54", "throughput": 19949.88, "total_tokens": 104702208} +{"current_steps": 33270, "total_steps": 78105, "loss": 0.2102, "lr": 3.549040853185175e-06, "epoch": 2.1298252352602267, "percentage": 42.6, "elapsed_time": "1:27:28", "remaining_time": "1:57:53", "throughput": 19950.37, "total_tokens": 104718784} +{"current_steps": 33275, "total_steps": 78105, "loss": 0.2664, "lr": 3.5485337373882827e-06, "epoch": 2.1301453172012037, "percentage": 42.6, "elapsed_time": "1:27:29", "remaining_time": "1:57:52", "throughput": 19950.84, "total_tokens": 104734848} +{"current_steps": 33280, "total_steps": 78105, "loss": 0.1545, "lr": 3.5480265692330775e-06, "epoch": 2.1304653991421802, "percentage": 42.61, "elapsed_time": "1:27:30", "remaining_time": "1:57:51", "throughput": 19951.19, "total_tokens": 104749696} +{"current_steps": 33285, "total_steps": 78105, "loss": 0.1183, "lr": 3.547519348744884e-06, "epoch": 2.1307854810831572, "percentage": 42.62, "elapsed_time": "1:27:30", "remaining_time": "1:57:50", "throughput": 19951.6, "total_tokens": 104765376} +{"current_steps": 33290, "total_steps": 78105, "loss": 0.1367, "lr": 3.5470120759490305e-06, "epoch": 2.1311055630241342, "percentage": 42.62, "elapsed_time": "1:27:31", "remaining_time": "1:57:49", "throughput": 19952.03, "total_tokens": 104781056} +{"current_steps": 33295, "total_steps": 78105, "loss": 0.1776, "lr": 3.5465047508708485e-06, "epoch": 2.131425644965111, "percentage": 42.63, "elapsed_time": "1:27:32", "remaining_time": "1:57:48", "throughput": 19952.46, "total_tokens": 104796864} +{"current_steps": 33300, "total_steps": 78105, "loss": 0.1945, "lr": 3.5459973735356697e-06, "epoch": 2.1317457269060878, "percentage": 42.63, "elapsed_time": "1:27:33", "remaining_time": "1:57:47", "throughput": 19952.99, "total_tokens": 104813888} +{"current_steps": 33305, "total_steps": 78105, "loss": 0.1501, "lr": 3.5454899439688306e-06, "epoch": 2.1320658088470648, "percentage": 42.64, "elapsed_time": "1:27:33", "remaining_time": "1:57:46", "throughput": 19953.43, "total_tokens": 104829504} +{"current_steps": 33310, "total_steps": 78105, "loss": 0.2541, "lr": 3.5449824621956685e-06, "epoch": 2.1323858907880417, "percentage": 42.65, "elapsed_time": "1:27:34", "remaining_time": "1:57:46", "throughput": 19953.84, "total_tokens": 104845312} +{"current_steps": 33315, "total_steps": 78105, "loss": 0.2544, "lr": 3.5444749282415266e-06, "epoch": 2.1327059727290187, "percentage": 42.65, "elapsed_time": "1:27:35", "remaining_time": "1:57:45", "throughput": 19954.34, "total_tokens": 104861888} +{"current_steps": 33320, "total_steps": 78105, "loss": 0.1929, "lr": 3.543967342131747e-06, "epoch": 2.1330260546699957, "percentage": 42.66, "elapsed_time": "1:27:35", "remaining_time": "1:57:44", "throughput": 19954.78, "total_tokens": 104877888} +{"current_steps": 33325, "total_steps": 78105, "loss": 0.1913, "lr": 3.543459703891675e-06, "epoch": 2.1333461366109723, "percentage": 42.67, "elapsed_time": "1:27:36", "remaining_time": "1:57:43", "throughput": 19955.24, "total_tokens": 104893888} +{"current_steps": 33330, "total_steps": 78105, "loss": 0.2014, "lr": 3.542952013546662e-06, "epoch": 2.1336662185519493, "percentage": 42.67, "elapsed_time": "1:27:37", "remaining_time": "1:57:42", "throughput": 19955.68, "total_tokens": 104909952} +{"current_steps": 33335, "total_steps": 78105, "loss": 0.2067, "lr": 3.5424442711220582e-06, "epoch": 2.1339863004929263, "percentage": 42.68, "elapsed_time": "1:27:37", "remaining_time": "1:57:41", "throughput": 19956.1, "total_tokens": 104925120} +{"current_steps": 33340, "total_steps": 78105, "loss": 0.2051, "lr": 3.541936476643216e-06, "epoch": 2.1343063824339032, "percentage": 42.69, "elapsed_time": "1:27:38", "remaining_time": "1:57:40", "throughput": 19956.53, "total_tokens": 104941056} +{"current_steps": 33345, "total_steps": 78105, "loss": 0.2205, "lr": 3.5414286301354943e-06, "epoch": 2.13462646437488, "percentage": 42.69, "elapsed_time": "1:27:39", "remaining_time": "1:57:39", "throughput": 19956.85, "total_tokens": 104955392} +{"current_steps": 33350, "total_steps": 78105, "loss": 0.3438, "lr": 3.540920731624251e-06, "epoch": 2.134946546315857, "percentage": 42.7, "elapsed_time": "1:27:39", "remaining_time": "1:57:38", "throughput": 19957.27, "total_tokens": 104971072} +{"current_steps": 33355, "total_steps": 78105, "loss": 0.2247, "lr": 3.5404127811348486e-06, "epoch": 2.1352666282568338, "percentage": 42.71, "elapsed_time": "1:27:40", "remaining_time": "1:57:37", "throughput": 19957.71, "total_tokens": 104987072} +{"current_steps": 33360, "total_steps": 78105, "loss": 0.3341, "lr": 3.5399047786926514e-06, "epoch": 2.1355867101978108, "percentage": 42.71, "elapsed_time": "1:27:41", "remaining_time": "1:57:36", "throughput": 19958.09, "total_tokens": 105002176} +{"current_steps": 33365, "total_steps": 78105, "loss": 0.239, "lr": 3.5393967243230265e-06, "epoch": 2.1359067921387878, "percentage": 42.72, "elapsed_time": "1:27:41", "remaining_time": "1:57:35", "throughput": 19958.53, "total_tokens": 105018048} +{"current_steps": 33370, "total_steps": 78105, "loss": 0.1765, "lr": 3.538888618051342e-06, "epoch": 2.1362268740797643, "percentage": 42.72, "elapsed_time": "1:27:42", "remaining_time": "1:57:34", "throughput": 19958.9, "total_tokens": 105032960} +{"current_steps": 33375, "total_steps": 78105, "loss": 0.2707, "lr": 3.5383804599029725e-06, "epoch": 2.1365469560207413, "percentage": 42.73, "elapsed_time": "1:27:43", "remaining_time": "1:57:33", "throughput": 19959.44, "total_tokens": 105050176} +{"current_steps": 33380, "total_steps": 78105, "loss": 0.154, "lr": 3.5378722499032904e-06, "epoch": 2.1368670379617183, "percentage": 42.74, "elapsed_time": "1:27:43", "remaining_time": "1:57:32", "throughput": 19959.83, "total_tokens": 105065280} +{"current_steps": 33385, "total_steps": 78105, "loss": 0.2655, "lr": 3.537363988077675e-06, "epoch": 2.1371871199026953, "percentage": 42.74, "elapsed_time": "1:27:44", "remaining_time": "1:57:31", "throughput": 19960.21, "total_tokens": 105080448} +{"current_steps": 33390, "total_steps": 78105, "loss": 0.2169, "lr": 3.5368556744515057e-06, "epoch": 2.137507201843672, "percentage": 42.75, "elapsed_time": "1:27:45", "remaining_time": "1:57:30", "throughput": 19960.65, "total_tokens": 105096256} +{"current_steps": 33395, "total_steps": 78105, "loss": 0.2677, "lr": 3.536347309050164e-06, "epoch": 2.137827283784649, "percentage": 42.76, "elapsed_time": "1:27:45", "remaining_time": "1:57:30", "throughput": 19961.11, "total_tokens": 105112704} +{"current_steps": 33400, "total_steps": 78105, "loss": 0.1674, "lr": 3.535838891899036e-06, "epoch": 2.138147365725626, "percentage": 42.76, "elapsed_time": "1:27:46", "remaining_time": "1:57:29", "throughput": 19961.58, "total_tokens": 105129024} +{"current_steps": 33405, "total_steps": 78105, "loss": 0.2576, "lr": 3.5353304230235096e-06, "epoch": 2.138467447666603, "percentage": 42.77, "elapsed_time": "1:27:47", "remaining_time": "1:57:28", "throughput": 19962.02, "total_tokens": 105144832} +{"current_steps": 33410, "total_steps": 78105, "loss": 0.1551, "lr": 3.534821902448975e-06, "epoch": 2.1387875296075793, "percentage": 42.78, "elapsed_time": "1:27:47", "remaining_time": "1:57:27", "throughput": 19962.4, "total_tokens": 105160256} +{"current_steps": 33415, "total_steps": 78105, "loss": 0.174, "lr": 3.534313330200825e-06, "epoch": 2.1391076115485563, "percentage": 42.78, "elapsed_time": "1:27:48", "remaining_time": "1:57:26", "throughput": 19962.83, "total_tokens": 105175424} +{"current_steps": 33420, "total_steps": 78105, "loss": 0.3091, "lr": 3.533804706304454e-06, "epoch": 2.1394276934895333, "percentage": 42.79, "elapsed_time": "1:27:49", "remaining_time": "1:57:25", "throughput": 19963.22, "total_tokens": 105190464} +{"current_steps": 33425, "total_steps": 78105, "loss": 0.2082, "lr": 3.5332960307852604e-06, "epoch": 2.1397477754305103, "percentage": 42.79, "elapsed_time": "1:27:49", "remaining_time": "1:57:24", "throughput": 19963.69, "total_tokens": 105206912} +{"current_steps": 33430, "total_steps": 78105, "loss": 0.0997, "lr": 3.5327873036686466e-06, "epoch": 2.1400678573714873, "percentage": 42.8, "elapsed_time": "1:27:50", "remaining_time": "1:57:23", "throughput": 19964.08, "total_tokens": 105222336} +{"current_steps": 33435, "total_steps": 78105, "loss": 0.14, "lr": 3.532278524980013e-06, "epoch": 2.140387939312464, "percentage": 42.81, "elapsed_time": "1:27:51", "remaining_time": "1:57:22", "throughput": 19964.4, "total_tokens": 105236544} +{"current_steps": 33440, "total_steps": 78105, "loss": 0.2127, "lr": 3.531769694744768e-06, "epoch": 2.140708021253441, "percentage": 42.81, "elapsed_time": "1:27:51", "remaining_time": "1:57:21", "throughput": 19964.82, "total_tokens": 105252096} +{"current_steps": 33445, "total_steps": 78105, "loss": 0.18, "lr": 3.5312608129883186e-06, "epoch": 2.141028103194418, "percentage": 42.82, "elapsed_time": "1:27:52", "remaining_time": "1:57:20", "throughput": 19965.27, "total_tokens": 105268224} +{"current_steps": 33450, "total_steps": 78105, "loss": 0.2038, "lr": 3.5307518797360756e-06, "epoch": 2.141348185135395, "percentage": 42.83, "elapsed_time": "1:27:53", "remaining_time": "1:57:19", "throughput": 19965.86, "total_tokens": 105285824} +{"current_steps": 33455, "total_steps": 78105, "loss": 0.2503, "lr": 3.5302428950134525e-06, "epoch": 2.1416682670763714, "percentage": 42.83, "elapsed_time": "1:27:53", "remaining_time": "1:57:18", "throughput": 19966.28, "total_tokens": 105301504} +{"current_steps": 33460, "total_steps": 78105, "loss": 0.2744, "lr": 3.529733858845866e-06, "epoch": 2.1419883490173484, "percentage": 42.84, "elapsed_time": "1:27:54", "remaining_time": "1:57:17", "throughput": 19966.75, "total_tokens": 105317952} +{"current_steps": 33465, "total_steps": 78105, "loss": 0.1624, "lr": 3.5292247712587336e-06, "epoch": 2.1423084309583253, "percentage": 42.85, "elapsed_time": "1:27:55", "remaining_time": "1:57:16", "throughput": 19967.17, "total_tokens": 105333440} +{"current_steps": 33470, "total_steps": 78105, "loss": 0.2341, "lr": 3.528715632277478e-06, "epoch": 2.1426285128993023, "percentage": 42.85, "elapsed_time": "1:27:56", "remaining_time": "1:57:16", "throughput": 19967.71, "total_tokens": 105350720} +{"current_steps": 33475, "total_steps": 78105, "loss": 0.2384, "lr": 3.5282064419275217e-06, "epoch": 2.142948594840279, "percentage": 42.86, "elapsed_time": "1:27:56", "remaining_time": "1:57:15", "throughput": 19968.09, "total_tokens": 105366144} +{"current_steps": 33480, "total_steps": 78105, "loss": 0.2245, "lr": 3.527697200234291e-06, "epoch": 2.143268676781256, "percentage": 42.87, "elapsed_time": "1:27:57", "remaining_time": "1:57:14", "throughput": 19968.54, "total_tokens": 105382656} +{"current_steps": 33485, "total_steps": 78105, "loss": 0.2075, "lr": 3.5271879072232158e-06, "epoch": 2.143588758722233, "percentage": 42.87, "elapsed_time": "1:27:58", "remaining_time": "1:57:13", "throughput": 19968.92, "total_tokens": 105398016} +{"current_steps": 33490, "total_steps": 78105, "loss": 0.1965, "lr": 3.526678562919727e-06, "epoch": 2.14390884066321, "percentage": 42.88, "elapsed_time": "1:27:58", "remaining_time": "1:57:12", "throughput": 19969.34, "total_tokens": 105413696} +{"current_steps": 33495, "total_steps": 78105, "loss": 0.2447, "lr": 3.526169167349258e-06, "epoch": 2.144228922604187, "percentage": 42.88, "elapsed_time": "1:27:59", "remaining_time": "1:57:11", "throughput": 19969.8, "total_tokens": 105429760} +{"current_steps": 33500, "total_steps": 78105, "loss": 0.1897, "lr": 3.5256597205372463e-06, "epoch": 2.1445490045451634, "percentage": 42.89, "elapsed_time": "1:28:00", "remaining_time": "1:57:10", "throughput": 19970.27, "total_tokens": 105446272} +{"current_steps": 33505, "total_steps": 78105, "loss": 0.1563, "lr": 3.5251502225091305e-06, "epoch": 2.1448690864861404, "percentage": 42.9, "elapsed_time": "1:28:00", "remaining_time": "1:57:09", "throughput": 19970.75, "total_tokens": 105462400} +{"current_steps": 33510, "total_steps": 78105, "loss": 0.1698, "lr": 3.5246406732903525e-06, "epoch": 2.1451891684271174, "percentage": 42.9, "elapsed_time": "1:28:01", "remaining_time": "1:57:08", "throughput": 19971.23, "total_tokens": 105478912} +{"current_steps": 33515, "total_steps": 78105, "loss": 0.1394, "lr": 3.524131072906356e-06, "epoch": 2.1455092503680944, "percentage": 42.91, "elapsed_time": "1:28:02", "remaining_time": "1:57:07", "throughput": 19971.75, "total_tokens": 105495936} +{"current_steps": 33520, "total_steps": 78105, "loss": 0.1046, "lr": 3.523621421382589e-06, "epoch": 2.145829332309071, "percentage": 42.92, "elapsed_time": "1:28:02", "remaining_time": "1:57:06", "throughput": 19972.14, "total_tokens": 105511296} +{"current_steps": 33525, "total_steps": 78105, "loss": 0.1298, "lr": 3.5231117187444998e-06, "epoch": 2.146149414250048, "percentage": 42.92, "elapsed_time": "1:28:03", "remaining_time": "1:57:05", "throughput": 19972.51, "total_tokens": 105526592} +{"current_steps": 33530, "total_steps": 78105, "loss": 0.2161, "lr": 3.52260196501754e-06, "epoch": 2.146469496191025, "percentage": 42.93, "elapsed_time": "1:28:04", "remaining_time": "1:57:04", "throughput": 19972.94, "total_tokens": 105542464} +{"current_steps": 33535, "total_steps": 78105, "loss": 0.2425, "lr": 3.522092160227165e-06, "epoch": 2.146789578132002, "percentage": 42.94, "elapsed_time": "1:28:04", "remaining_time": "1:57:03", "throughput": 19973.25, "total_tokens": 105557056} +{"current_steps": 33540, "total_steps": 78105, "loss": 0.2174, "lr": 3.521582304398832e-06, "epoch": 2.147109660072979, "percentage": 42.94, "elapsed_time": "1:28:05", "remaining_time": "1:57:03", "throughput": 19973.65, "total_tokens": 105572224} +{"current_steps": 33545, "total_steps": 78105, "loss": 0.2682, "lr": 3.5210723975579992e-06, "epoch": 2.1474297420139554, "percentage": 42.95, "elapsed_time": "1:28:06", "remaining_time": "1:57:02", "throughput": 19974.24, "total_tokens": 105590016} +{"current_steps": 33550, "total_steps": 78105, "loss": 0.1951, "lr": 3.5205624397301297e-06, "epoch": 2.1477498239549324, "percentage": 42.95, "elapsed_time": "1:28:07", "remaining_time": "1:57:01", "throughput": 19974.75, "total_tokens": 105606656} +{"current_steps": 33555, "total_steps": 78105, "loss": 0.2774, "lr": 3.520052430940687e-06, "epoch": 2.1480699058959094, "percentage": 42.96, "elapsed_time": "1:28:07", "remaining_time": "1:57:00", "throughput": 19975.17, "total_tokens": 105622464} +{"current_steps": 33560, "total_steps": 78105, "loss": 0.1945, "lr": 3.51954237121514e-06, "epoch": 2.1483899878368864, "percentage": 42.97, "elapsed_time": "1:28:08", "remaining_time": "1:56:59", "throughput": 19975.55, "total_tokens": 105637504} +{"current_steps": 33565, "total_steps": 78105, "loss": 0.2432, "lr": 3.519032260578958e-06, "epoch": 2.148710069777863, "percentage": 42.97, "elapsed_time": "1:28:08", "remaining_time": "1:56:58", "throughput": 19975.89, "total_tokens": 105652160} +{"current_steps": 33570, "total_steps": 78105, "loss": 0.1757, "lr": 3.5185220990576123e-06, "epoch": 2.14903015171884, "percentage": 42.98, "elapsed_time": "1:28:09", "remaining_time": "1:56:57", "throughput": 19976.29, "total_tokens": 105668160} +{"current_steps": 33575, "total_steps": 78105, "loss": 0.209, "lr": 3.518011886676578e-06, "epoch": 2.149350233659817, "percentage": 42.99, "elapsed_time": "1:28:10", "remaining_time": "1:56:56", "throughput": 19976.66, "total_tokens": 105682944} +{"current_steps": 33580, "total_steps": 78105, "loss": 0.2531, "lr": 3.5175016234613334e-06, "epoch": 2.149670315600794, "percentage": 42.99, "elapsed_time": "1:28:11", "remaining_time": "1:56:55", "throughput": 19977.09, "total_tokens": 105698816} +{"current_steps": 33585, "total_steps": 78105, "loss": 0.1416, "lr": 3.5169913094373575e-06, "epoch": 2.149990397541771, "percentage": 43.0, "elapsed_time": "1:28:11", "remaining_time": "1:56:54", "throughput": 19977.44, "total_tokens": 105713728} +{"current_steps": 33590, "total_steps": 78105, "loss": 0.2043, "lr": 3.516480944630133e-06, "epoch": 2.1503104794827474, "percentage": 43.01, "elapsed_time": "1:28:12", "remaining_time": "1:56:53", "throughput": 19977.84, "total_tokens": 105729344} +{"current_steps": 33595, "total_steps": 78105, "loss": 0.1679, "lr": 3.515970529065145e-06, "epoch": 2.1506305614237244, "percentage": 43.01, "elapsed_time": "1:28:13", "remaining_time": "1:56:52", "throughput": 19978.4, "total_tokens": 105746560} +{"current_steps": 33600, "total_steps": 78105, "loss": 0.2972, "lr": 3.5154600627678792e-06, "epoch": 2.1509506433647014, "percentage": 43.02, "elapsed_time": "1:28:13", "remaining_time": "1:56:51", "throughput": 19978.97, "total_tokens": 105764480} +{"current_steps": 33605, "total_steps": 78105, "loss": 0.1771, "lr": 3.514949545763828e-06, "epoch": 2.1512707253056784, "percentage": 43.03, "elapsed_time": "1:28:14", "remaining_time": "1:56:51", "throughput": 19979.45, "total_tokens": 105780992} +{"current_steps": 33610, "total_steps": 78105, "loss": 0.3079, "lr": 3.514438978078484e-06, "epoch": 2.151590807246655, "percentage": 43.03, "elapsed_time": "1:28:15", "remaining_time": "1:56:50", "throughput": 19979.87, "total_tokens": 105796928} +{"current_steps": 33615, "total_steps": 78105, "loss": 0.2153, "lr": 3.513928359737341e-06, "epoch": 2.151910889187632, "percentage": 43.04, "elapsed_time": "1:28:15", "remaining_time": "1:56:49", "throughput": 19980.25, "total_tokens": 105811712} +{"current_steps": 33620, "total_steps": 78105, "loss": 0.1891, "lr": 3.5134176907658966e-06, "epoch": 2.152230971128609, "percentage": 43.04, "elapsed_time": "1:28:16", "remaining_time": "1:56:48", "throughput": 19980.79, "total_tokens": 105828800} +{"current_steps": 33625, "total_steps": 78105, "loss": 0.2186, "lr": 3.512906971189652e-06, "epoch": 2.152551053069586, "percentage": 43.05, "elapsed_time": "1:28:17", "remaining_time": "1:56:47", "throughput": 19981.34, "total_tokens": 105846272} +{"current_steps": 33630, "total_steps": 78105, "loss": 0.2392, "lr": 3.512396201034109e-06, "epoch": 2.152871135010563, "percentage": 43.06, "elapsed_time": "1:28:17", "remaining_time": "1:56:46", "throughput": 19981.82, "total_tokens": 105862848} +{"current_steps": 33635, "total_steps": 78105, "loss": 0.2205, "lr": 3.5118853803247733e-06, "epoch": 2.1531912169515395, "percentage": 43.06, "elapsed_time": "1:28:18", "remaining_time": "1:56:45", "throughput": 19982.15, "total_tokens": 105877440} +{"current_steps": 33640, "total_steps": 78105, "loss": 0.3131, "lr": 3.5113745090871526e-06, "epoch": 2.1535112988925165, "percentage": 43.07, "elapsed_time": "1:28:19", "remaining_time": "1:56:44", "throughput": 19982.56, "total_tokens": 105892928} +{"current_steps": 33645, "total_steps": 78105, "loss": 0.1858, "lr": 3.5108635873467565e-06, "epoch": 2.1538313808334935, "percentage": 43.08, "elapsed_time": "1:28:19", "remaining_time": "1:56:43", "throughput": 19982.93, "total_tokens": 105908160} +{"current_steps": 33650, "total_steps": 78105, "loss": 0.2167, "lr": 3.510352615129099e-06, "epoch": 2.1541514627744704, "percentage": 43.08, "elapsed_time": "1:28:20", "remaining_time": "1:56:42", "throughput": 19983.32, "total_tokens": 105923392} +{"current_steps": 33655, "total_steps": 78105, "loss": 0.1931, "lr": 3.509841592459694e-06, "epoch": 2.154471544715447, "percentage": 43.09, "elapsed_time": "1:28:21", "remaining_time": "1:56:41", "throughput": 19983.75, "total_tokens": 105939136} +{"current_steps": 33660, "total_steps": 78105, "loss": 0.1777, "lr": 3.5093305193640597e-06, "epoch": 2.154791626656424, "percentage": 43.1, "elapsed_time": "1:28:21", "remaining_time": "1:56:40", "throughput": 19984.2, "total_tokens": 105955584} +{"current_steps": 33665, "total_steps": 78105, "loss": 0.3062, "lr": 3.5088193958677174e-06, "epoch": 2.155111708597401, "percentage": 43.1, "elapsed_time": "1:28:22", "remaining_time": "1:56:39", "throughput": 19984.61, "total_tokens": 105971200} +{"current_steps": 33670, "total_steps": 78105, "loss": 0.1523, "lr": 3.5083082219961883e-06, "epoch": 2.155431790538378, "percentage": 43.11, "elapsed_time": "1:28:23", "remaining_time": "1:56:38", "throughput": 19985.02, "total_tokens": 105986816} +{"current_steps": 33675, "total_steps": 78105, "loss": 0.1629, "lr": 3.5077969977749993e-06, "epoch": 2.1557518724793545, "percentage": 43.12, "elapsed_time": "1:28:24", "remaining_time": "1:56:37", "throughput": 19985.47, "total_tokens": 106003072} +{"current_steps": 33680, "total_steps": 78105, "loss": 0.2281, "lr": 3.507285723229678e-06, "epoch": 2.1560719544203315, "percentage": 43.12, "elapsed_time": "1:28:24", "remaining_time": "1:56:37", "throughput": 19985.9, "total_tokens": 106019136} +{"current_steps": 33685, "total_steps": 78105, "loss": 0.1908, "lr": 3.5067743983857538e-06, "epoch": 2.1563920363613085, "percentage": 43.13, "elapsed_time": "1:28:25", "remaining_time": "1:56:36", "throughput": 19986.28, "total_tokens": 106034176} +{"current_steps": 33690, "total_steps": 78105, "loss": 0.2205, "lr": 3.5062630232687606e-06, "epoch": 2.1567121183022855, "percentage": 43.13, "elapsed_time": "1:28:26", "remaining_time": "1:56:35", "throughput": 19986.69, "total_tokens": 106050176} +{"current_steps": 33695, "total_steps": 78105, "loss": 0.2417, "lr": 3.5057515979042335e-06, "epoch": 2.1570322002432625, "percentage": 43.14, "elapsed_time": "1:28:26", "remaining_time": "1:56:34", "throughput": 19987.14, "total_tokens": 106066432} +{"current_steps": 33700, "total_steps": 78105, "loss": 0.2782, "lr": 3.50524012231771e-06, "epoch": 2.157352282184239, "percentage": 43.15, "elapsed_time": "1:28:27", "remaining_time": "1:56:33", "throughput": 19987.55, "total_tokens": 106082240} +{"current_steps": 33705, "total_steps": 78105, "loss": 0.2555, "lr": 3.504728596534731e-06, "epoch": 2.157672364125216, "percentage": 43.15, "elapsed_time": "1:28:28", "remaining_time": "1:56:32", "throughput": 19987.88, "total_tokens": 106097088} +{"current_steps": 33710, "total_steps": 78105, "loss": 0.1407, "lr": 3.5042170205808403e-06, "epoch": 2.157992446066193, "percentage": 43.16, "elapsed_time": "1:28:28", "remaining_time": "1:56:31", "throughput": 19988.24, "total_tokens": 106112064} +{"current_steps": 33715, "total_steps": 78105, "loss": 0.2465, "lr": 3.503705394481581e-06, "epoch": 2.15831252800717, "percentage": 43.17, "elapsed_time": "1:28:29", "remaining_time": "1:56:30", "throughput": 19988.66, "total_tokens": 106128000} +{"current_steps": 33720, "total_steps": 78105, "loss": 0.1645, "lr": 3.5031937182625023e-06, "epoch": 2.1586326099481465, "percentage": 43.17, "elapsed_time": "1:28:30", "remaining_time": "1:56:29", "throughput": 19988.99, "total_tokens": 106142720} +{"current_steps": 33725, "total_steps": 78105, "loss": 0.2709, "lr": 3.5026819919491563e-06, "epoch": 2.1589526918891235, "percentage": 43.18, "elapsed_time": "1:28:30", "remaining_time": "1:56:28", "throughput": 19989.36, "total_tokens": 106158144} +{"current_steps": 33730, "total_steps": 78105, "loss": 0.1932, "lr": 3.502170215567092e-06, "epoch": 2.1592727738301005, "percentage": 43.19, "elapsed_time": "1:28:31", "remaining_time": "1:56:27", "throughput": 19990.06, "total_tokens": 106177984} +{"current_steps": 33735, "total_steps": 78105, "loss": 0.2208, "lr": 3.501658389141869e-06, "epoch": 2.1595928557710775, "percentage": 43.19, "elapsed_time": "1:28:32", "remaining_time": "1:56:26", "throughput": 19990.46, "total_tokens": 106193472} +{"current_steps": 33740, "total_steps": 78105, "loss": 0.1614, "lr": 3.501146512699044e-06, "epoch": 2.159912937712054, "percentage": 43.2, "elapsed_time": "1:28:32", "remaining_time": "1:56:25", "throughput": 19990.82, "total_tokens": 106208640} +{"current_steps": 33745, "total_steps": 78105, "loss": 0.2378, "lr": 3.5006345862641755e-06, "epoch": 2.160233019653031, "percentage": 43.2, "elapsed_time": "1:28:33", "remaining_time": "1:56:25", "throughput": 19991.26, "total_tokens": 106224448} +{"current_steps": 33750, "total_steps": 78105, "loss": 0.1367, "lr": 3.5001226098628288e-06, "epoch": 2.160553101594008, "percentage": 43.21, "elapsed_time": "1:28:34", "remaining_time": "1:56:24", "throughput": 19991.69, "total_tokens": 106240448} +{"current_steps": 33755, "total_steps": 78105, "loss": 0.2617, "lr": 3.499610583520568e-06, "epoch": 2.160873183534985, "percentage": 43.22, "elapsed_time": "1:28:34", "remaining_time": "1:56:23", "throughput": 19992.07, "total_tokens": 106256128} +{"current_steps": 33760, "total_steps": 78105, "loss": 0.1985, "lr": 3.4990985072629617e-06, "epoch": 2.161193265475962, "percentage": 43.22, "elapsed_time": "1:28:35", "remaining_time": "1:56:22", "throughput": 19992.4, "total_tokens": 106270848} +{"current_steps": 33765, "total_steps": 78105, "loss": 0.216, "lr": 3.49858638111558e-06, "epoch": 2.1615133474169386, "percentage": 43.23, "elapsed_time": "1:28:36", "remaining_time": "1:56:21", "throughput": 19992.8, "total_tokens": 106286528} +{"current_steps": 33770, "total_steps": 78105, "loss": 0.2746, "lr": 3.4980742051039957e-06, "epoch": 2.1618334293579156, "percentage": 43.24, "elapsed_time": "1:28:36", "remaining_time": "1:56:20", "throughput": 19993.17, "total_tokens": 106301568} +{"current_steps": 33775, "total_steps": 78105, "loss": 0.1873, "lr": 3.4975619792537846e-06, "epoch": 2.1621535112988925, "percentage": 43.24, "elapsed_time": "1:28:37", "remaining_time": "1:56:19", "throughput": 19993.53, "total_tokens": 106316416} +{"current_steps": 33780, "total_steps": 78105, "loss": 0.3015, "lr": 3.4970497035905244e-06, "epoch": 2.1624735932398695, "percentage": 43.25, "elapsed_time": "1:28:38", "remaining_time": "1:56:18", "throughput": 19993.93, "total_tokens": 106331968} +{"current_steps": 33785, "total_steps": 78105, "loss": 0.2405, "lr": 3.496537378139795e-06, "epoch": 2.162793675180846, "percentage": 43.26, "elapsed_time": "1:28:38", "remaining_time": "1:56:17", "throughput": 19994.35, "total_tokens": 106347840} +{"current_steps": 33790, "total_steps": 78105, "loss": 0.296, "lr": 3.4960250029271795e-06, "epoch": 2.163113757121823, "percentage": 43.26, "elapsed_time": "1:28:39", "remaining_time": "1:56:16", "throughput": 19994.31, "total_tokens": 106364096} +{"current_steps": 33795, "total_steps": 78105, "loss": 0.2345, "lr": 3.4955125779782646e-06, "epoch": 2.1634338390628, "percentage": 43.27, "elapsed_time": "1:28:40", "remaining_time": "1:56:15", "throughput": 19994.65, "total_tokens": 106379520} +{"current_steps": 33800, "total_steps": 78105, "loss": 0.1912, "lr": 3.495000103318636e-06, "epoch": 2.163753921003777, "percentage": 43.28, "elapsed_time": "1:28:41", "remaining_time": "1:56:14", "throughput": 19995.01, "total_tokens": 106394240} +{"current_steps": 33805, "total_steps": 78105, "loss": 0.1327, "lr": 3.4944875789738852e-06, "epoch": 2.164074002944754, "percentage": 43.28, "elapsed_time": "1:28:41", "remaining_time": "1:56:13", "throughput": 19995.45, "total_tokens": 106410304} +{"current_steps": 33810, "total_steps": 78105, "loss": 0.1797, "lr": 3.4939750049696053e-06, "epoch": 2.1643940848857306, "percentage": 43.29, "elapsed_time": "1:28:42", "remaining_time": "1:56:12", "throughput": 19995.79, "total_tokens": 106425088} +{"current_steps": 33815, "total_steps": 78105, "loss": 0.1659, "lr": 3.4934623813313905e-06, "epoch": 2.1647141668267076, "percentage": 43.29, "elapsed_time": "1:28:43", "remaining_time": "1:56:12", "throughput": 19996.1, "total_tokens": 106440512} +{"current_steps": 33820, "total_steps": 78105, "loss": 0.2589, "lr": 3.4929497080848396e-06, "epoch": 2.1650342487676846, "percentage": 43.3, "elapsed_time": "1:28:43", "remaining_time": "1:56:11", "throughput": 19996.55, "total_tokens": 106456704} +{"current_steps": 33825, "total_steps": 78105, "loss": 0.2504, "lr": 3.4924369852555524e-06, "epoch": 2.1653543307086616, "percentage": 43.31, "elapsed_time": "1:28:44", "remaining_time": "1:56:10", "throughput": 19996.9, "total_tokens": 106471488} +{"current_steps": 33830, "total_steps": 78105, "loss": 0.1923, "lr": 3.491924212869131e-06, "epoch": 2.165674412649638, "percentage": 43.31, "elapsed_time": "1:28:45", "remaining_time": "1:56:09", "throughput": 19997.32, "total_tokens": 106487488} +{"current_steps": 33835, "total_steps": 78105, "loss": 0.1552, "lr": 3.4914113909511813e-06, "epoch": 2.165994494590615, "percentage": 43.32, "elapsed_time": "1:28:45", "remaining_time": "1:56:08", "throughput": 19997.74, "total_tokens": 106503232} +{"current_steps": 33840, "total_steps": 78105, "loss": 0.2846, "lr": 3.4908985195273116e-06, "epoch": 2.166314576531592, "percentage": 43.33, "elapsed_time": "1:28:46", "remaining_time": "1:56:07", "throughput": 19998.11, "total_tokens": 106518464} +{"current_steps": 33845, "total_steps": 78105, "loss": 0.1956, "lr": 3.49038559862313e-06, "epoch": 2.166634658472569, "percentage": 43.33, "elapsed_time": "1:28:47", "remaining_time": "1:56:06", "throughput": 19998.56, "total_tokens": 106534720} +{"current_steps": 33850, "total_steps": 78105, "loss": 0.2281, "lr": 3.489872628264251e-06, "epoch": 2.166954740413546, "percentage": 43.34, "elapsed_time": "1:28:47", "remaining_time": "1:56:05", "throughput": 19999.12, "total_tokens": 106552128} +{"current_steps": 33855, "total_steps": 78105, "loss": 0.383, "lr": 3.489359608476289e-06, "epoch": 2.1672748223545226, "percentage": 43.35, "elapsed_time": "1:28:48", "remaining_time": "1:56:04", "throughput": 19999.37, "total_tokens": 106568192} +{"current_steps": 33860, "total_steps": 78105, "loss": 0.1301, "lr": 3.4888465392848613e-06, "epoch": 2.1675949042954996, "percentage": 43.35, "elapsed_time": "1:28:49", "remaining_time": "1:56:03", "throughput": 19999.74, "total_tokens": 106583296} +{"current_steps": 33865, "total_steps": 78105, "loss": 0.2674, "lr": 3.4883334207155885e-06, "epoch": 2.1679149862364766, "percentage": 43.36, "elapsed_time": "1:28:49", "remaining_time": "1:56:02", "throughput": 20000.11, "total_tokens": 106598464} +{"current_steps": 33870, "total_steps": 78105, "loss": 0.1899, "lr": 3.4878202527940926e-06, "epoch": 2.1682350681774536, "percentage": 43.36, "elapsed_time": "1:28:50", "remaining_time": "1:56:01", "throughput": 20000.58, "total_tokens": 106614464} +{"current_steps": 33875, "total_steps": 78105, "loss": 0.1777, "lr": 3.4873070355459986e-06, "epoch": 2.16855515011843, "percentage": 43.37, "elapsed_time": "1:28:51", "remaining_time": "1:56:00", "throughput": 20000.97, "total_tokens": 106630016} +{"current_steps": 33880, "total_steps": 78105, "loss": 0.1854, "lr": 3.4867937689969337e-06, "epoch": 2.168875232059407, "percentage": 43.38, "elapsed_time": "1:28:51", "remaining_time": "1:55:59", "throughput": 20001.39, "total_tokens": 106645952} +{"current_steps": 33885, "total_steps": 78105, "loss": 0.2888, "lr": 3.486280453172528e-06, "epoch": 2.169195314000384, "percentage": 43.38, "elapsed_time": "1:28:52", "remaining_time": "1:55:59", "throughput": 20001.9, "total_tokens": 106662784} +{"current_steps": 33890, "total_steps": 78105, "loss": 0.2578, "lr": 3.485767088098414e-06, "epoch": 2.169515395941361, "percentage": 43.39, "elapsed_time": "1:28:53", "remaining_time": "1:55:58", "throughput": 20002.34, "total_tokens": 106679104} +{"current_steps": 33895, "total_steps": 78105, "loss": 0.1939, "lr": 3.4852536738002266e-06, "epoch": 2.169835477882338, "percentage": 43.4, "elapsed_time": "1:28:54", "remaining_time": "1:55:57", "throughput": 20002.73, "total_tokens": 106694592} +{"current_steps": 33900, "total_steps": 78105, "loss": 0.1252, "lr": 3.4847402103036027e-06, "epoch": 2.1701555598233147, "percentage": 43.4, "elapsed_time": "1:28:54", "remaining_time": "1:55:56", "throughput": 20003.11, "total_tokens": 106710272} +{"current_steps": 33905, "total_steps": 78105, "loss": 0.1469, "lr": 3.4842266976341814e-06, "epoch": 2.1704756417642916, "percentage": 43.41, "elapsed_time": "1:28:55", "remaining_time": "1:55:55", "throughput": 20003.56, "total_tokens": 106726272} +{"current_steps": 33910, "total_steps": 78105, "loss": 0.2561, "lr": 3.483713135817606e-06, "epoch": 2.1707957237052686, "percentage": 43.42, "elapsed_time": "1:28:55", "remaining_time": "1:55:54", "throughput": 20003.84, "total_tokens": 106740352} +{"current_steps": 33915, "total_steps": 78105, "loss": 0.2273, "lr": 3.4831995248795204e-06, "epoch": 2.1711158056462456, "percentage": 43.42, "elapsed_time": "1:28:56", "remaining_time": "1:55:53", "throughput": 20004.29, "total_tokens": 106756416} +{"current_steps": 33920, "total_steps": 78105, "loss": 0.2186, "lr": 3.482685864845572e-06, "epoch": 2.171435887587222, "percentage": 43.43, "elapsed_time": "1:28:57", "remaining_time": "1:55:52", "throughput": 20004.62, "total_tokens": 106771072} +{"current_steps": 33925, "total_steps": 78105, "loss": 0.2672, "lr": 3.4821721557414094e-06, "epoch": 2.171755969528199, "percentage": 43.44, "elapsed_time": "1:28:57", "remaining_time": "1:55:51", "throughput": 20005.06, "total_tokens": 106786816} +{"current_steps": 33930, "total_steps": 78105, "loss": 0.354, "lr": 3.4816583975926865e-06, "epoch": 2.172076051469176, "percentage": 43.44, "elapsed_time": "1:28:58", "remaining_time": "1:55:50", "throughput": 20005.43, "total_tokens": 106802048} +{"current_steps": 33935, "total_steps": 78105, "loss": 0.1862, "lr": 3.4811445904250557e-06, "epoch": 2.172396133410153, "percentage": 43.45, "elapsed_time": "1:28:59", "remaining_time": "1:55:49", "throughput": 20005.83, "total_tokens": 106817344} +{"current_steps": 33940, "total_steps": 78105, "loss": 0.1708, "lr": 3.480630734264175e-06, "epoch": 2.1727162153511297, "percentage": 43.45, "elapsed_time": "1:28:59", "remaining_time": "1:55:48", "throughput": 20006.26, "total_tokens": 106833408} +{"current_steps": 33945, "total_steps": 78105, "loss": 0.2142, "lr": 3.480116829135703e-06, "epoch": 2.1730362972921067, "percentage": 43.46, "elapsed_time": "1:29:00", "remaining_time": "1:55:47", "throughput": 20006.69, "total_tokens": 106849344} +{"current_steps": 33950, "total_steps": 78105, "loss": 0.1494, "lr": 3.479602875065302e-06, "epoch": 2.1733563792330837, "percentage": 43.47, "elapsed_time": "1:29:01", "remaining_time": "1:55:46", "throughput": 20007.1, "total_tokens": 106865088} +{"current_steps": 33955, "total_steps": 78105, "loss": 0.1881, "lr": 3.4790888720786357e-06, "epoch": 2.1736764611740607, "percentage": 43.47, "elapsed_time": "1:29:02", "remaining_time": "1:55:45", "throughput": 20007.5, "total_tokens": 106880576} +{"current_steps": 33960, "total_steps": 78105, "loss": 0.2463, "lr": 3.4785748202013712e-06, "epoch": 2.1739965431150376, "percentage": 43.48, "elapsed_time": "1:29:02", "remaining_time": "1:55:45", "throughput": 20007.94, "total_tokens": 106896768} +{"current_steps": 33965, "total_steps": 78105, "loss": 0.3227, "lr": 3.4780607194591775e-06, "epoch": 2.174316625056014, "percentage": 43.49, "elapsed_time": "1:29:03", "remaining_time": "1:55:44", "throughput": 20008.27, "total_tokens": 106911360} +{"current_steps": 33970, "total_steps": 78105, "loss": 0.1808, "lr": 3.477546569877726e-06, "epoch": 2.174636706996991, "percentage": 43.49, "elapsed_time": "1:29:04", "remaining_time": "1:55:43", "throughput": 20008.69, "total_tokens": 106927424} +{"current_steps": 33975, "total_steps": 78105, "loss": 0.2348, "lr": 3.4770323714826903e-06, "epoch": 2.174956788937968, "percentage": 43.5, "elapsed_time": "1:29:04", "remaining_time": "1:55:42", "throughput": 20009.05, "total_tokens": 106942720} +{"current_steps": 33980, "total_steps": 78105, "loss": 0.2581, "lr": 3.4765181242997474e-06, "epoch": 2.175276870878945, "percentage": 43.51, "elapsed_time": "1:29:05", "remaining_time": "1:55:41", "throughput": 20009.53, "total_tokens": 106959680} +{"current_steps": 33985, "total_steps": 78105, "loss": 0.1787, "lr": 3.476003828354576e-06, "epoch": 2.1755969528199217, "percentage": 43.51, "elapsed_time": "1:29:06", "remaining_time": "1:55:40", "throughput": 20009.9, "total_tokens": 106974464} +{"current_steps": 33990, "total_steps": 78105, "loss": 0.1541, "lr": 3.4754894836728576e-06, "epoch": 2.1759170347608987, "percentage": 43.52, "elapsed_time": "1:29:06", "remaining_time": "1:55:39", "throughput": 20010.27, "total_tokens": 106989312} +{"current_steps": 33995, "total_steps": 78105, "loss": 0.1986, "lr": 3.474975090280275e-06, "epoch": 2.1762371167018757, "percentage": 43.52, "elapsed_time": "1:29:07", "remaining_time": "1:55:38", "throughput": 20010.65, "total_tokens": 107004480} +{"current_steps": 34000, "total_steps": 78105, "loss": 0.1989, "lr": 3.4744606482025145e-06, "epoch": 2.1765571986428527, "percentage": 43.53, "elapsed_time": "1:29:08", "remaining_time": "1:55:37", "throughput": 20010.99, "total_tokens": 107019456} +{"current_steps": 34005, "total_steps": 78105, "loss": 0.2142, "lr": 3.4739461574652654e-06, "epoch": 2.1768772805838292, "percentage": 43.54, "elapsed_time": "1:29:08", "remaining_time": "1:55:36", "throughput": 20011.4, "total_tokens": 107035200} +{"current_steps": 34010, "total_steps": 78105, "loss": 0.3719, "lr": 3.4734316180942186e-06, "epoch": 2.1771973625248062, "percentage": 43.54, "elapsed_time": "1:29:09", "remaining_time": "1:55:35", "throughput": 20011.78, "total_tokens": 107050624} +{"current_steps": 34015, "total_steps": 78105, "loss": 0.1449, "lr": 3.4729170301150673e-06, "epoch": 2.177517444465783, "percentage": 43.55, "elapsed_time": "1:29:10", "remaining_time": "1:55:34", "throughput": 20012.2, "total_tokens": 107066752} +{"current_steps": 34020, "total_steps": 78105, "loss": 0.2014, "lr": 3.472402393553507e-06, "epoch": 2.17783752640676, "percentage": 43.56, "elapsed_time": "1:29:10", "remaining_time": "1:55:33", "throughput": 20012.47, "total_tokens": 107081088} +{"current_steps": 34025, "total_steps": 78105, "loss": 0.2346, "lr": 3.4718877084352352e-06, "epoch": 2.178157608347737, "percentage": 43.56, "elapsed_time": "1:29:11", "remaining_time": "1:55:32", "throughput": 20012.84, "total_tokens": 107096512} +{"current_steps": 34030, "total_steps": 78105, "loss": 0.242, "lr": 3.471372974785955e-06, "epoch": 2.1784776902887137, "percentage": 43.57, "elapsed_time": "1:29:12", "remaining_time": "1:55:31", "throughput": 20013.26, "total_tokens": 107112512} +{"current_steps": 34035, "total_steps": 78105, "loss": 0.2467, "lr": 3.470858192631368e-06, "epoch": 2.1787977722296907, "percentage": 43.58, "elapsed_time": "1:29:12", "remaining_time": "1:55:30", "throughput": 20013.65, "total_tokens": 107127872} +{"current_steps": 34040, "total_steps": 78105, "loss": 0.1246, "lr": 3.4703433619971794e-06, "epoch": 2.1791178541706677, "percentage": 43.58, "elapsed_time": "1:29:13", "remaining_time": "1:55:30", "throughput": 20014.04, "total_tokens": 107143104} +{"current_steps": 34045, "total_steps": 78105, "loss": 0.2645, "lr": 3.469828482909098e-06, "epoch": 2.1794379361116447, "percentage": 43.59, "elapsed_time": "1:29:14", "remaining_time": "1:55:29", "throughput": 20014.42, "total_tokens": 107158272} +{"current_steps": 34050, "total_steps": 78105, "loss": 0.2499, "lr": 3.4693135553928337e-06, "epoch": 2.1797580180526213, "percentage": 43.6, "elapsed_time": "1:29:14", "remaining_time": "1:55:28", "throughput": 20014.81, "total_tokens": 107173696} +{"current_steps": 34055, "total_steps": 78105, "loss": 0.2194, "lr": 3.4687985794740993e-06, "epoch": 2.1800780999935983, "percentage": 43.6, "elapsed_time": "1:29:15", "remaining_time": "1:55:27", "throughput": 20015.23, "total_tokens": 107189696} +{"current_steps": 34060, "total_steps": 78105, "loss": 0.2478, "lr": 3.4682835551786097e-06, "epoch": 2.1803981819345752, "percentage": 43.61, "elapsed_time": "1:29:16", "remaining_time": "1:55:26", "throughput": 20015.65, "total_tokens": 107205184} +{"current_steps": 34065, "total_steps": 78105, "loss": 0.2267, "lr": 3.4677684825320835e-06, "epoch": 2.1807182638755522, "percentage": 43.61, "elapsed_time": "1:29:16", "remaining_time": "1:55:25", "throughput": 20016.11, "total_tokens": 107221824} +{"current_steps": 34070, "total_steps": 78105, "loss": 0.2264, "lr": 3.46725336156024e-06, "epoch": 2.1810383458165292, "percentage": 43.62, "elapsed_time": "1:29:17", "remaining_time": "1:55:24", "throughput": 20016.63, "total_tokens": 107238976} +{"current_steps": 34075, "total_steps": 78105, "loss": 0.2012, "lr": 3.466738192288802e-06, "epoch": 2.1813584277575058, "percentage": 43.63, "elapsed_time": "1:29:18", "remaining_time": "1:55:23", "throughput": 20017.15, "total_tokens": 107256128} +{"current_steps": 34080, "total_steps": 78105, "loss": 0.23, "lr": 3.466222974743494e-06, "epoch": 2.1816785096984828, "percentage": 43.63, "elapsed_time": "1:29:18", "remaining_time": "1:55:22", "throughput": 20017.56, "total_tokens": 107271680} +{"current_steps": 34085, "total_steps": 78105, "loss": 0.1558, "lr": 3.465707708950044e-06, "epoch": 2.1819985916394598, "percentage": 43.64, "elapsed_time": "1:29:19", "remaining_time": "1:55:21", "throughput": 20018.08, "total_tokens": 107288960} +{"current_steps": 34090, "total_steps": 78105, "loss": 0.1799, "lr": 3.4651923949341804e-06, "epoch": 2.1823186735804367, "percentage": 43.65, "elapsed_time": "1:29:20", "remaining_time": "1:55:20", "throughput": 20018.54, "total_tokens": 107305280} +{"current_steps": 34095, "total_steps": 78105, "loss": 0.3084, "lr": 3.4646770327216367e-06, "epoch": 2.1826387555214133, "percentage": 43.65, "elapsed_time": "1:29:20", "remaining_time": "1:55:19", "throughput": 20018.91, "total_tokens": 107320704} +{"current_steps": 34100, "total_steps": 78105, "loss": 0.2897, "lr": 3.4641616223381467e-06, "epoch": 2.1829588374623903, "percentage": 43.66, "elapsed_time": "1:29:21", "remaining_time": "1:55:19", "throughput": 20019.3, "total_tokens": 107336256} +{"current_steps": 34105, "total_steps": 78105, "loss": 0.1803, "lr": 3.4636461638094476e-06, "epoch": 2.1832789194033673, "percentage": 43.67, "elapsed_time": "1:29:22", "remaining_time": "1:55:18", "throughput": 20019.67, "total_tokens": 107351872} +{"current_steps": 34110, "total_steps": 78105, "loss": 0.1921, "lr": 3.463130657161279e-06, "epoch": 2.1835990013443443, "percentage": 43.67, "elapsed_time": "1:29:22", "remaining_time": "1:55:17", "throughput": 20020.04, "total_tokens": 107367040} +{"current_steps": 34115, "total_steps": 78105, "loss": 0.2382, "lr": 3.462615102419381e-06, "epoch": 2.1839190832853212, "percentage": 43.68, "elapsed_time": "1:29:23", "remaining_time": "1:55:16", "throughput": 20020.44, "total_tokens": 107382656} +{"current_steps": 34120, "total_steps": 78105, "loss": 0.2082, "lr": 3.4620994996094993e-06, "epoch": 2.184239165226298, "percentage": 43.68, "elapsed_time": "1:29:24", "remaining_time": "1:55:15", "throughput": 20020.86, "total_tokens": 107398720} +{"current_steps": 34125, "total_steps": 78105, "loss": 0.1904, "lr": 3.4615838487573803e-06, "epoch": 2.184559247167275, "percentage": 43.69, "elapsed_time": "1:29:24", "remaining_time": "1:55:14", "throughput": 20021.17, "total_tokens": 107412864} +{"current_steps": 34130, "total_steps": 78105, "loss": 0.1471, "lr": 3.461068149888773e-06, "epoch": 2.1848793291082518, "percentage": 43.7, "elapsed_time": "1:29:25", "remaining_time": "1:55:13", "throughput": 20021.52, "total_tokens": 107427776} +{"current_steps": 34135, "total_steps": 78105, "loss": 0.249, "lr": 3.460552403029427e-06, "epoch": 2.1851994110492288, "percentage": 43.7, "elapsed_time": "1:29:26", "remaining_time": "1:55:12", "throughput": 20021.88, "total_tokens": 107443072} +{"current_steps": 34140, "total_steps": 78105, "loss": 0.2319, "lr": 3.4600366082050977e-06, "epoch": 2.1855194929902053, "percentage": 43.71, "elapsed_time": "1:29:26", "remaining_time": "1:55:11", "throughput": 20022.24, "total_tokens": 107457920} +{"current_steps": 34145, "total_steps": 78105, "loss": 0.351, "lr": 3.459520765441541e-06, "epoch": 2.1858395749311823, "percentage": 43.72, "elapsed_time": "1:29:27", "remaining_time": "1:55:10", "throughput": 20022.65, "total_tokens": 107473600} +{"current_steps": 34150, "total_steps": 78105, "loss": 0.3121, "lr": 3.4590048747645154e-06, "epoch": 2.1861596568721593, "percentage": 43.72, "elapsed_time": "1:29:28", "remaining_time": "1:55:09", "throughput": 20023.03, "total_tokens": 107488832} +{"current_steps": 34155, "total_steps": 78105, "loss": 0.2794, "lr": 3.458488936199782e-06, "epoch": 2.1864797388131363, "percentage": 43.73, "elapsed_time": "1:29:28", "remaining_time": "1:55:08", "throughput": 20023.52, "total_tokens": 107505920} +{"current_steps": 34160, "total_steps": 78105, "loss": 0.1562, "lr": 3.4579729497731037e-06, "epoch": 2.1867998207541133, "percentage": 43.74, "elapsed_time": "1:29:29", "remaining_time": "1:55:07", "throughput": 20023.97, "total_tokens": 107522624} +{"current_steps": 34165, "total_steps": 78105, "loss": 0.1315, "lr": 3.4574569155102455e-06, "epoch": 2.18711990269509, "percentage": 43.74, "elapsed_time": "1:29:30", "remaining_time": "1:55:06", "throughput": 20024.46, "total_tokens": 107539136} +{"current_steps": 34170, "total_steps": 78105, "loss": 0.1624, "lr": 3.4569408334369768e-06, "epoch": 2.187439984636067, "percentage": 43.75, "elapsed_time": "1:29:31", "remaining_time": "1:55:06", "throughput": 20024.85, "total_tokens": 107554944} +{"current_steps": 34175, "total_steps": 78105, "loss": 0.2167, "lr": 3.4564247035790664e-06, "epoch": 2.187760066577044, "percentage": 43.76, "elapsed_time": "1:29:31", "remaining_time": "1:55:05", "throughput": 20025.31, "total_tokens": 107571456} +{"current_steps": 34180, "total_steps": 78105, "loss": 0.2237, "lr": 3.4559085259622893e-06, "epoch": 2.188080148518021, "percentage": 43.76, "elapsed_time": "1:29:32", "remaining_time": "1:55:04", "throughput": 20025.69, "total_tokens": 107586880} +{"current_steps": 34185, "total_steps": 78105, "loss": 0.3577, "lr": 3.4553923006124192e-06, "epoch": 2.1884002304589973, "percentage": 43.77, "elapsed_time": "1:29:33", "remaining_time": "1:55:03", "throughput": 20026.07, "total_tokens": 107602560} +{"current_steps": 34190, "total_steps": 78105, "loss": 0.2601, "lr": 3.454876027555234e-06, "epoch": 2.1887203123999743, "percentage": 43.77, "elapsed_time": "1:29:33", "remaining_time": "1:55:02", "throughput": 20026.46, "total_tokens": 107618112} +{"current_steps": 34195, "total_steps": 78105, "loss": 0.1799, "lr": 3.454359706816513e-06, "epoch": 2.1890403943409513, "percentage": 43.78, "elapsed_time": "1:29:34", "remaining_time": "1:55:01", "throughput": 20027.07, "total_tokens": 107636288} +{"current_steps": 34200, "total_steps": 78105, "loss": 0.2103, "lr": 3.4538433384220403e-06, "epoch": 2.1893604762819283, "percentage": 43.79, "elapsed_time": "1:29:35", "remaining_time": "1:55:00", "throughput": 20027.48, "total_tokens": 107652032} +{"current_steps": 34205, "total_steps": 78105, "loss": 0.2011, "lr": 3.453326922397599e-06, "epoch": 2.189680558222905, "percentage": 43.79, "elapsed_time": "1:29:35", "remaining_time": "1:54:59", "throughput": 20027.87, "total_tokens": 107667328} +{"current_steps": 34210, "total_steps": 78105, "loss": 0.1632, "lr": 3.452810458768977e-06, "epoch": 2.190000640163882, "percentage": 43.8, "elapsed_time": "1:29:36", "remaining_time": "1:54:58", "throughput": 20028.32, "total_tokens": 107683904} +{"current_steps": 34215, "total_steps": 78105, "loss": 0.1922, "lr": 3.452293947561964e-06, "epoch": 2.190320722104859, "percentage": 43.81, "elapsed_time": "1:29:37", "remaining_time": "1:54:57", "throughput": 20028.71, "total_tokens": 107699520} +{"current_steps": 34220, "total_steps": 78105, "loss": 0.2186, "lr": 3.4517773888023522e-06, "epoch": 2.190640804045836, "percentage": 43.81, "elapsed_time": "1:29:37", "remaining_time": "1:54:56", "throughput": 20029.09, "total_tokens": 107715008} +{"current_steps": 34225, "total_steps": 78105, "loss": 0.1719, "lr": 3.4512607825159343e-06, "epoch": 2.190960885986813, "percentage": 43.82, "elapsed_time": "1:29:38", "remaining_time": "1:54:55", "throughput": 20029.43, "total_tokens": 107729984} +{"current_steps": 34230, "total_steps": 78105, "loss": 0.1818, "lr": 3.4507441287285083e-06, "epoch": 2.1912809679277894, "percentage": 43.83, "elapsed_time": "1:29:39", "remaining_time": "1:54:55", "throughput": 20029.85, "total_tokens": 107746176} +{"current_steps": 34235, "total_steps": 78105, "loss": 0.2136, "lr": 3.4502274274658724e-06, "epoch": 2.1916010498687664, "percentage": 43.83, "elapsed_time": "1:29:39", "remaining_time": "1:54:54", "throughput": 20030.27, "total_tokens": 107761664} +{"current_steps": 34240, "total_steps": 78105, "loss": 0.2131, "lr": 3.4497106787538286e-06, "epoch": 2.1919211318097434, "percentage": 43.84, "elapsed_time": "1:29:40", "remaining_time": "1:54:53", "throughput": 20030.76, "total_tokens": 107778240} +{"current_steps": 34245, "total_steps": 78105, "loss": 0.1925, "lr": 3.4491938826181803e-06, "epoch": 2.1922412137507203, "percentage": 43.84, "elapsed_time": "1:29:41", "remaining_time": "1:54:52", "throughput": 20031.11, "total_tokens": 107793216} +{"current_steps": 34250, "total_steps": 78105, "loss": 0.1419, "lr": 3.448677039084734e-06, "epoch": 2.192561295691697, "percentage": 43.85, "elapsed_time": "1:29:41", "remaining_time": "1:54:51", "throughput": 20031.5, "total_tokens": 107808768} +{"current_steps": 34255, "total_steps": 78105, "loss": 0.156, "lr": 3.448160148179297e-06, "epoch": 2.192881377632674, "percentage": 43.86, "elapsed_time": "1:29:42", "remaining_time": "1:54:50", "throughput": 20031.82, "total_tokens": 107823808} +{"current_steps": 34260, "total_steps": 78105, "loss": 0.5304, "lr": 3.4476432099276813e-06, "epoch": 2.193201459573651, "percentage": 43.86, "elapsed_time": "1:29:43", "remaining_time": "1:54:49", "throughput": 20032.16, "total_tokens": 107838912} +{"current_steps": 34265, "total_steps": 78105, "loss": 0.323, "lr": 3.4471262243556995e-06, "epoch": 2.193521541514628, "percentage": 43.87, "elapsed_time": "1:29:43", "remaining_time": "1:54:48", "throughput": 20032.58, "total_tokens": 107854784} +{"current_steps": 34270, "total_steps": 78105, "loss": 0.1791, "lr": 3.4466091914891685e-06, "epoch": 2.1938416234556044, "percentage": 43.88, "elapsed_time": "1:29:44", "remaining_time": "1:54:47", "throughput": 20032.96, "total_tokens": 107870208} +{"current_steps": 34275, "total_steps": 78105, "loss": 0.1536, "lr": 3.4460921113539046e-06, "epoch": 2.1941617053965814, "percentage": 43.88, "elapsed_time": "1:29:45", "remaining_time": "1:54:46", "throughput": 20033.35, "total_tokens": 107886144} +{"current_steps": 34280, "total_steps": 78105, "loss": 0.2229, "lr": 3.445574983975729e-06, "epoch": 2.1944817873375584, "percentage": 43.89, "elapsed_time": "1:29:46", "remaining_time": "1:54:45", "throughput": 20033.8, "total_tokens": 107902720} +{"current_steps": 34285, "total_steps": 78105, "loss": 0.2088, "lr": 3.445057809380463e-06, "epoch": 2.1948018692785354, "percentage": 43.9, "elapsed_time": "1:29:46", "remaining_time": "1:54:44", "throughput": 20034.08, "total_tokens": 107917056} +{"current_steps": 34290, "total_steps": 78105, "loss": 0.1265, "lr": 3.4445405875939337e-06, "epoch": 2.1951219512195124, "percentage": 43.9, "elapsed_time": "1:29:47", "remaining_time": "1:54:43", "throughput": 20034.42, "total_tokens": 107931968} +{"current_steps": 34295, "total_steps": 78105, "loss": 0.1757, "lr": 3.4440233186419668e-06, "epoch": 2.195442033160489, "percentage": 43.91, "elapsed_time": "1:29:48", "remaining_time": "1:54:42", "throughput": 20034.86, "total_tokens": 107948288} +{"current_steps": 34300, "total_steps": 78105, "loss": 0.1888, "lr": 3.4435060025503932e-06, "epoch": 2.195762115101466, "percentage": 43.92, "elapsed_time": "1:29:48", "remaining_time": "1:54:41", "throughput": 20035.23, "total_tokens": 107963584} +{"current_steps": 34305, "total_steps": 78105, "loss": 0.2399, "lr": 3.442988639345044e-06, "epoch": 2.196082197042443, "percentage": 43.92, "elapsed_time": "1:29:49", "remaining_time": "1:54:41", "throughput": 20035.69, "total_tokens": 107980096} +{"current_steps": 34310, "total_steps": 78105, "loss": 0.2176, "lr": 3.4424712290517535e-06, "epoch": 2.19640227898342, "percentage": 43.93, "elapsed_time": "1:29:50", "remaining_time": "1:54:40", "throughput": 20035.99, "total_tokens": 107994816} +{"current_steps": 34315, "total_steps": 78105, "loss": 0.2193, "lr": 3.4419537716963597e-06, "epoch": 2.1967223609243964, "percentage": 43.93, "elapsed_time": "1:29:50", "remaining_time": "1:54:39", "throughput": 20036.32, "total_tokens": 108010048} +{"current_steps": 34320, "total_steps": 78105, "loss": 0.2667, "lr": 3.4414362673047008e-06, "epoch": 2.1970424428653734, "percentage": 43.94, "elapsed_time": "1:29:51", "remaining_time": "1:54:38", "throughput": 20036.72, "total_tokens": 108025984} +{"current_steps": 34325, "total_steps": 78105, "loss": 0.1844, "lr": 3.440918715902618e-06, "epoch": 2.1973625248063504, "percentage": 43.95, "elapsed_time": "1:29:52", "remaining_time": "1:54:37", "throughput": 20037.08, "total_tokens": 108041344} +{"current_steps": 34330, "total_steps": 78105, "loss": 0.2452, "lr": 3.4404011175159564e-06, "epoch": 2.1976826067473274, "percentage": 43.95, "elapsed_time": "1:29:52", "remaining_time": "1:54:36", "throughput": 20037.57, "total_tokens": 108057984} +{"current_steps": 34335, "total_steps": 78105, "loss": 0.2462, "lr": 3.439883472170561e-06, "epoch": 2.1980026886883044, "percentage": 43.96, "elapsed_time": "1:29:53", "remaining_time": "1:54:35", "throughput": 20037.91, "total_tokens": 108073024} +{"current_steps": 34340, "total_steps": 78105, "loss": 0.1926, "lr": 3.4393657798922807e-06, "epoch": 2.198322770629281, "percentage": 43.97, "elapsed_time": "1:29:54", "remaining_time": "1:54:34", "throughput": 20038.36, "total_tokens": 108089536} +{"current_steps": 34345, "total_steps": 78105, "loss": 0.1936, "lr": 3.4388480407069665e-06, "epoch": 2.198642852570258, "percentage": 43.97, "elapsed_time": "1:29:54", "remaining_time": "1:54:33", "throughput": 20038.77, "total_tokens": 108105536} +{"current_steps": 34350, "total_steps": 78105, "loss": 0.1931, "lr": 3.4383302546404713e-06, "epoch": 2.198962934511235, "percentage": 43.98, "elapsed_time": "1:29:55", "remaining_time": "1:54:32", "throughput": 20039.31, "total_tokens": 108123200} +{"current_steps": 34355, "total_steps": 78105, "loss": 0.2237, "lr": 3.4378124217186503e-06, "epoch": 2.199283016452212, "percentage": 43.99, "elapsed_time": "1:29:56", "remaining_time": "1:54:31", "throughput": 20039.75, "total_tokens": 108139456} +{"current_steps": 34360, "total_steps": 78105, "loss": 0.2067, "lr": 3.4372945419673626e-06, "epoch": 2.1996030983931885, "percentage": 43.99, "elapsed_time": "1:29:56", "remaining_time": "1:54:31", "throughput": 20040.19, "total_tokens": 108155648} +{"current_steps": 34365, "total_steps": 78105, "loss": 0.1792, "lr": 3.436776615412467e-06, "epoch": 2.1999231803341655, "percentage": 44.0, "elapsed_time": "1:29:57", "remaining_time": "1:54:30", "throughput": 20040.61, "total_tokens": 108171712} +{"current_steps": 34370, "total_steps": 78105, "loss": 0.2654, "lr": 3.436258642079827e-06, "epoch": 2.2002432622751424, "percentage": 44.0, "elapsed_time": "1:29:58", "remaining_time": "1:54:29", "throughput": 20041.04, "total_tokens": 108188032} +{"current_steps": 34375, "total_steps": 78105, "loss": 0.2347, "lr": 3.435740621995307e-06, "epoch": 2.2005633442161194, "percentage": 44.01, "elapsed_time": "1:29:58", "remaining_time": "1:54:28", "throughput": 20041.35, "total_tokens": 108202432} +{"current_steps": 34380, "total_steps": 78105, "loss": 0.1676, "lr": 3.4352225551847747e-06, "epoch": 2.2008834261570964, "percentage": 44.02, "elapsed_time": "1:29:59", "remaining_time": "1:54:27", "throughput": 20041.68, "total_tokens": 108217536} +{"current_steps": 34385, "total_steps": 78105, "loss": 0.2502, "lr": 3.434704441674099e-06, "epoch": 2.201203508098073, "percentage": 44.02, "elapsed_time": "1:30:00", "remaining_time": "1:54:26", "throughput": 20042.26, "total_tokens": 108235072} +{"current_steps": 34390, "total_steps": 78105, "loss": 0.2983, "lr": 3.434186281489153e-06, "epoch": 2.20152359003905, "percentage": 44.03, "elapsed_time": "1:30:01", "remaining_time": "1:54:25", "throughput": 20042.68, "total_tokens": 108250752} +{"current_steps": 34395, "total_steps": 78105, "loss": 0.1947, "lr": 3.43366807465581e-06, "epoch": 2.201843671980027, "percentage": 44.04, "elapsed_time": "1:30:01", "remaining_time": "1:54:24", "throughput": 20043.08, "total_tokens": 108266304} +{"current_steps": 34400, "total_steps": 78105, "loss": 0.1995, "lr": 3.433149821199946e-06, "epoch": 2.202163753921004, "percentage": 44.04, "elapsed_time": "1:30:02", "remaining_time": "1:54:23", "throughput": 20043.51, "total_tokens": 108282304} +{"current_steps": 34405, "total_steps": 78105, "loss": 0.1841, "lr": 3.4326315211474408e-06, "epoch": 2.2024838358619805, "percentage": 44.05, "elapsed_time": "1:30:03", "remaining_time": "1:54:22", "throughput": 20043.93, "total_tokens": 108298176} +{"current_steps": 34410, "total_steps": 78105, "loss": 0.1742, "lr": 3.432113174524175e-06, "epoch": 2.2028039178029575, "percentage": 44.06, "elapsed_time": "1:30:03", "remaining_time": "1:54:21", "throughput": 20044.28, "total_tokens": 108313152} +{"current_steps": 34415, "total_steps": 78105, "loss": 0.1972, "lr": 3.4315947813560337e-06, "epoch": 2.2031239997439345, "percentage": 44.06, "elapsed_time": "1:30:04", "remaining_time": "1:54:20", "throughput": 20044.63, "total_tokens": 108328448} +{"current_steps": 34420, "total_steps": 78105, "loss": 0.1578, "lr": 3.4310763416689003e-06, "epoch": 2.2034440816849115, "percentage": 44.07, "elapsed_time": "1:30:05", "remaining_time": "1:54:19", "throughput": 20045.04, "total_tokens": 108344128} +{"current_steps": 34425, "total_steps": 78105, "loss": 0.2849, "lr": 3.4305578554886647e-06, "epoch": 2.2037641636258885, "percentage": 44.08, "elapsed_time": "1:30:05", "remaining_time": "1:54:19", "throughput": 20045.47, "total_tokens": 108360192} +{"current_steps": 34430, "total_steps": 78105, "loss": 0.187, "lr": 3.4300393228412167e-06, "epoch": 2.204084245566865, "percentage": 44.08, "elapsed_time": "1:30:06", "remaining_time": "1:54:18", "throughput": 20045.8, "total_tokens": 108374784} +{"current_steps": 34435, "total_steps": 78105, "loss": 0.1217, "lr": 3.4295207437524485e-06, "epoch": 2.204404327507842, "percentage": 44.09, "elapsed_time": "1:30:07", "remaining_time": "1:54:17", "throughput": 20046.17, "total_tokens": 108390144} +{"current_steps": 34440, "total_steps": 78105, "loss": 0.2001, "lr": 3.429002118248257e-06, "epoch": 2.204724409448819, "percentage": 44.09, "elapsed_time": "1:30:07", "remaining_time": "1:54:16", "throughput": 20046.5, "total_tokens": 108405056} +{"current_steps": 34445, "total_steps": 78105, "loss": 0.2279, "lr": 3.428483446354539e-06, "epoch": 2.205044491389796, "percentage": 44.1, "elapsed_time": "1:30:08", "remaining_time": "1:54:15", "throughput": 20046.86, "total_tokens": 108420160} +{"current_steps": 34450, "total_steps": 78105, "loss": 0.1814, "lr": 3.427964728097194e-06, "epoch": 2.2053645733307725, "percentage": 44.11, "elapsed_time": "1:30:08", "remaining_time": "1:54:14", "throughput": 20047.17, "total_tokens": 108434560} +{"current_steps": 34455, "total_steps": 78105, "loss": 0.2064, "lr": 3.4274459635021236e-06, "epoch": 2.2056846552717495, "percentage": 44.11, "elapsed_time": "1:30:09", "remaining_time": "1:54:13", "throughput": 20047.48, "total_tokens": 108449216} +{"current_steps": 34460, "total_steps": 78105, "loss": 0.3005, "lr": 3.426927152595232e-06, "epoch": 2.2060047372127265, "percentage": 44.12, "elapsed_time": "1:30:10", "remaining_time": "1:54:12", "throughput": 20047.84, "total_tokens": 108464768} +{"current_steps": 34465, "total_steps": 78105, "loss": 0.2335, "lr": 3.4264082954024275e-06, "epoch": 2.2063248191537035, "percentage": 44.13, "elapsed_time": "1:30:10", "remaining_time": "1:54:11", "throughput": 20048.15, "total_tokens": 108479296} +{"current_steps": 34470, "total_steps": 78105, "loss": 0.3266, "lr": 3.4258893919496182e-06, "epoch": 2.20664490109468, "percentage": 44.13, "elapsed_time": "1:30:11", "remaining_time": "1:54:10", "throughput": 20048.48, "total_tokens": 108493888} +{"current_steps": 34475, "total_steps": 78105, "loss": 0.1955, "lr": 3.4253704422627153e-06, "epoch": 2.206964983035657, "percentage": 44.14, "elapsed_time": "1:30:12", "remaining_time": "1:54:09", "throughput": 20048.85, "total_tokens": 108509248} +{"current_steps": 34480, "total_steps": 78105, "loss": 0.2876, "lr": 3.424851446367633e-06, "epoch": 2.207285064976634, "percentage": 44.15, "elapsed_time": "1:30:12", "remaining_time": "1:54:08", "throughput": 20049.38, "total_tokens": 108526464} +{"current_steps": 34485, "total_steps": 78105, "loss": 0.2008, "lr": 3.424332404290286e-06, "epoch": 2.207605146917611, "percentage": 44.15, "elapsed_time": "1:30:13", "remaining_time": "1:54:07", "throughput": 20049.8, "total_tokens": 108542208} +{"current_steps": 34490, "total_steps": 78105, "loss": 0.232, "lr": 3.4238133160565934e-06, "epoch": 2.207925228858588, "percentage": 44.16, "elapsed_time": "1:30:14", "remaining_time": "1:54:06", "throughput": 20050.16, "total_tokens": 108557056} +{"current_steps": 34495, "total_steps": 78105, "loss": 0.1912, "lr": 3.4232941816924764e-06, "epoch": 2.2082453107995645, "percentage": 44.16, "elapsed_time": "1:30:14", "remaining_time": "1:54:05", "throughput": 20050.55, "total_tokens": 108572608} +{"current_steps": 34500, "total_steps": 78105, "loss": 0.2744, "lr": 3.422775001223857e-06, "epoch": 2.2085653927405415, "percentage": 44.17, "elapsed_time": "1:30:15", "remaining_time": "1:54:04", "throughput": 20050.96, "total_tokens": 108588544} +{"current_steps": 34505, "total_steps": 78105, "loss": 0.1612, "lr": 3.4222557746766604e-06, "epoch": 2.2088854746815185, "percentage": 44.18, "elapsed_time": "1:30:16", "remaining_time": "1:54:03", "throughput": 20051.42, "total_tokens": 108604800} +{"current_steps": 34510, "total_steps": 78105, "loss": 0.1753, "lr": 3.421736502076815e-06, "epoch": 2.2092055566224955, "percentage": 44.18, "elapsed_time": "1:30:16", "remaining_time": "1:54:03", "throughput": 20051.76, "total_tokens": 108619776} +{"current_steps": 34515, "total_steps": 78105, "loss": 0.153, "lr": 3.421217183450249e-06, "epoch": 2.209525638563472, "percentage": 44.19, "elapsed_time": "1:30:17", "remaining_time": "1:54:02", "throughput": 20052.18, "total_tokens": 108636032} +{"current_steps": 34520, "total_steps": 78105, "loss": 0.2578, "lr": 3.4206978188228955e-06, "epoch": 2.209845720504449, "percentage": 44.2, "elapsed_time": "1:30:18", "remaining_time": "1:54:01", "throughput": 20052.54, "total_tokens": 108651520} +{"current_steps": 34525, "total_steps": 78105, "loss": 0.1968, "lr": 3.420178408220689e-06, "epoch": 2.210165802445426, "percentage": 44.2, "elapsed_time": "1:30:19", "remaining_time": "1:54:00", "throughput": 20053.11, "total_tokens": 108669440} +{"current_steps": 34530, "total_steps": 78105, "loss": 0.2407, "lr": 3.4196589516695655e-06, "epoch": 2.210485884386403, "percentage": 44.21, "elapsed_time": "1:30:19", "remaining_time": "1:53:59", "throughput": 20053.57, "total_tokens": 108685952} +{"current_steps": 34535, "total_steps": 78105, "loss": 0.2009, "lr": 3.419139449195465e-06, "epoch": 2.2108059663273796, "percentage": 44.22, "elapsed_time": "1:30:20", "remaining_time": "1:53:58", "throughput": 20053.92, "total_tokens": 108701440} +{"current_steps": 34540, "total_steps": 78105, "loss": 0.1482, "lr": 3.418619900824327e-06, "epoch": 2.2111260482683566, "percentage": 44.22, "elapsed_time": "1:30:21", "remaining_time": "1:53:57", "throughput": 20054.42, "total_tokens": 108718656} +{"current_steps": 34545, "total_steps": 78105, "loss": 0.3677, "lr": 3.4181003065820963e-06, "epoch": 2.2114461302093336, "percentage": 44.23, "elapsed_time": "1:30:21", "remaining_time": "1:53:56", "throughput": 20054.93, "total_tokens": 108735680} +{"current_steps": 34550, "total_steps": 78105, "loss": 0.1548, "lr": 3.4175806664947186e-06, "epoch": 2.2117662121503106, "percentage": 44.24, "elapsed_time": "1:30:22", "remaining_time": "1:53:55", "throughput": 20055.33, "total_tokens": 108751488} +{"current_steps": 34555, "total_steps": 78105, "loss": 0.2066, "lr": 3.417060980588142e-06, "epoch": 2.2120862940912875, "percentage": 44.24, "elapsed_time": "1:30:23", "remaining_time": "1:53:54", "throughput": 20055.67, "total_tokens": 108766272} +{"current_steps": 34560, "total_steps": 78105, "loss": 0.2848, "lr": 3.4165412488883173e-06, "epoch": 2.212406376032264, "percentage": 44.25, "elapsed_time": "1:30:23", "remaining_time": "1:53:54", "throughput": 20056.04, "total_tokens": 108781632} +{"current_steps": 34565, "total_steps": 78105, "loss": 0.1604, "lr": 3.416021471421196e-06, "epoch": 2.212726457973241, "percentage": 44.25, "elapsed_time": "1:30:24", "remaining_time": "1:53:53", "throughput": 20056.45, "total_tokens": 108797376} +{"current_steps": 34570, "total_steps": 78105, "loss": 0.1568, "lr": 3.415501648212734e-06, "epoch": 2.213046539914218, "percentage": 44.26, "elapsed_time": "1:30:25", "remaining_time": "1:53:52", "throughput": 20056.97, "total_tokens": 108814656} +{"current_steps": 34575, "total_steps": 78105, "loss": 0.2778, "lr": 3.414981779288888e-06, "epoch": 2.213366621855195, "percentage": 44.27, "elapsed_time": "1:30:25", "remaining_time": "1:53:51", "throughput": 20057.34, "total_tokens": 108829888} +{"current_steps": 34580, "total_steps": 78105, "loss": 0.2578, "lr": 3.4144618646756187e-06, "epoch": 2.2136867037961716, "percentage": 44.27, "elapsed_time": "1:30:26", "remaining_time": "1:53:50", "throughput": 20057.72, "total_tokens": 108845376} +{"current_steps": 34585, "total_steps": 78105, "loss": 0.1357, "lr": 3.413941904398887e-06, "epoch": 2.2140067857371486, "percentage": 44.28, "elapsed_time": "1:30:27", "remaining_time": "1:53:49", "throughput": 20058.11, "total_tokens": 108861056} +{"current_steps": 34590, "total_steps": 78105, "loss": 0.1876, "lr": 3.413421898484657e-06, "epoch": 2.2143268676781256, "percentage": 44.29, "elapsed_time": "1:30:27", "remaining_time": "1:53:48", "throughput": 20058.46, "total_tokens": 108876096} +{"current_steps": 34595, "total_steps": 78105, "loss": 0.2488, "lr": 3.412901846958895e-06, "epoch": 2.2146469496191026, "percentage": 44.29, "elapsed_time": "1:30:28", "remaining_time": "1:53:47", "throughput": 20058.9, "total_tokens": 108892288} +{"current_steps": 34600, "total_steps": 78105, "loss": 0.3183, "lr": 3.41238174984757e-06, "epoch": 2.2149670315600796, "percentage": 44.3, "elapsed_time": "1:30:29", "remaining_time": "1:53:46", "throughput": 20059.25, "total_tokens": 108907456} +{"current_steps": 34605, "total_steps": 78105, "loss": 0.1888, "lr": 3.4118616071766526e-06, "epoch": 2.215287113501056, "percentage": 44.31, "elapsed_time": "1:30:29", "remaining_time": "1:53:45", "throughput": 20059.72, "total_tokens": 108924224} +{"current_steps": 34610, "total_steps": 78105, "loss": 0.1472, "lr": 3.411341418972116e-06, "epoch": 2.215607195442033, "percentage": 44.31, "elapsed_time": "1:30:30", "remaining_time": "1:53:44", "throughput": 20060.12, "total_tokens": 108939776} +{"current_steps": 34615, "total_steps": 78105, "loss": 0.2511, "lr": 3.410821185259937e-06, "epoch": 2.21592727738301, "percentage": 44.32, "elapsed_time": "1:30:31", "remaining_time": "1:53:43", "throughput": 20060.44, "total_tokens": 108954560} +{"current_steps": 34620, "total_steps": 78105, "loss": 0.1248, "lr": 3.4103009060660917e-06, "epoch": 2.216247359323987, "percentage": 44.32, "elapsed_time": "1:30:31", "remaining_time": "1:53:42", "throughput": 20060.85, "total_tokens": 108970304} +{"current_steps": 34625, "total_steps": 78105, "loss": 0.2162, "lr": 3.4097805814165608e-06, "epoch": 2.2165674412649636, "percentage": 44.33, "elapsed_time": "1:30:32", "remaining_time": "1:53:41", "throughput": 20061.17, "total_tokens": 108984832} +{"current_steps": 34630, "total_steps": 78105, "loss": 0.1608, "lr": 3.4092602113373265e-06, "epoch": 2.2168875232059406, "percentage": 44.34, "elapsed_time": "1:30:33", "remaining_time": "1:53:41", "throughput": 20061.57, "total_tokens": 109000576} +{"current_steps": 34635, "total_steps": 78105, "loss": 0.2242, "lr": 3.4087397958543735e-06, "epoch": 2.2172076051469176, "percentage": 44.34, "elapsed_time": "1:30:34", "remaining_time": "1:53:40", "throughput": 20062.01, "total_tokens": 109017152} +{"current_steps": 34640, "total_steps": 78105, "loss": 0.2537, "lr": 3.4082193349936885e-06, "epoch": 2.2175276870878946, "percentage": 44.35, "elapsed_time": "1:30:34", "remaining_time": "1:53:39", "throughput": 20062.36, "total_tokens": 109032448} +{"current_steps": 34645, "total_steps": 78105, "loss": 0.2881, "lr": 3.407698828781261e-06, "epoch": 2.2178477690288716, "percentage": 44.36, "elapsed_time": "1:30:35", "remaining_time": "1:53:38", "throughput": 20062.69, "total_tokens": 109047296} +{"current_steps": 34650, "total_steps": 78105, "loss": 0.2061, "lr": 3.4071782772430816e-06, "epoch": 2.218167850969848, "percentage": 44.36, "elapsed_time": "1:30:35", "remaining_time": "1:53:37", "throughput": 20063.04, "total_tokens": 109062528} +{"current_steps": 34655, "total_steps": 78105, "loss": 0.2075, "lr": 3.4066576804051445e-06, "epoch": 2.218487932910825, "percentage": 44.37, "elapsed_time": "1:30:36", "remaining_time": "1:53:36", "throughput": 20063.37, "total_tokens": 109077376} +{"current_steps": 34660, "total_steps": 78105, "loss": 0.1857, "lr": 3.406137038293445e-06, "epoch": 2.218808014851802, "percentage": 44.38, "elapsed_time": "1:30:37", "remaining_time": "1:53:35", "throughput": 20063.79, "total_tokens": 109093824} +{"current_steps": 34665, "total_steps": 78105, "loss": 0.2564, "lr": 3.4056163509339816e-06, "epoch": 2.219128096792779, "percentage": 44.38, "elapsed_time": "1:30:38", "remaining_time": "1:53:34", "throughput": 20064.3, "total_tokens": 109111104} +{"current_steps": 34670, "total_steps": 78105, "loss": 0.1952, "lr": 3.4050956183527556e-06, "epoch": 2.2194481787337557, "percentage": 44.39, "elapsed_time": "1:30:38", "remaining_time": "1:53:33", "throughput": 20064.66, "total_tokens": 109126528} +{"current_steps": 34675, "total_steps": 78105, "loss": 0.1603, "lr": 3.4045748405757683e-06, "epoch": 2.2197682606747327, "percentage": 44.4, "elapsed_time": "1:30:39", "remaining_time": "1:53:32", "throughput": 20065.07, "total_tokens": 109142464} +{"current_steps": 34680, "total_steps": 78105, "loss": 0.2068, "lr": 3.404054017629026e-06, "epoch": 2.2200883426157096, "percentage": 44.4, "elapsed_time": "1:30:40", "remaining_time": "1:53:31", "throughput": 20065.45, "total_tokens": 109158144} +{"current_steps": 34685, "total_steps": 78105, "loss": 0.146, "lr": 3.403533149538535e-06, "epoch": 2.2204084245566866, "percentage": 44.41, "elapsed_time": "1:30:40", "remaining_time": "1:53:30", "throughput": 20065.87, "total_tokens": 109173952} +{"current_steps": 34690, "total_steps": 78105, "loss": 0.1901, "lr": 3.4030122363303037e-06, "epoch": 2.2207285064976636, "percentage": 44.41, "elapsed_time": "1:30:41", "remaining_time": "1:53:30", "throughput": 20066.3, "total_tokens": 109190144} +{"current_steps": 34695, "total_steps": 78105, "loss": 0.3357, "lr": 3.4024912780303457e-06, "epoch": 2.22104858843864, "percentage": 44.42, "elapsed_time": "1:30:42", "remaining_time": "1:53:29", "throughput": 20066.71, "total_tokens": 109206080} +{"current_steps": 34700, "total_steps": 78105, "loss": 0.1982, "lr": 3.4019702746646745e-06, "epoch": 2.221368670379617, "percentage": 44.43, "elapsed_time": "1:30:42", "remaining_time": "1:53:28", "throughput": 20067.13, "total_tokens": 109222528} +{"current_steps": 34705, "total_steps": 78105, "loss": 0.2625, "lr": 3.401449226259306e-06, "epoch": 2.221688752320594, "percentage": 44.43, "elapsed_time": "1:30:43", "remaining_time": "1:53:27", "throughput": 20067.46, "total_tokens": 109237376} +{"current_steps": 34710, "total_steps": 78105, "loss": 0.2085, "lr": 3.4009281328402583e-06, "epoch": 2.222008834261571, "percentage": 44.44, "elapsed_time": "1:30:44", "remaining_time": "1:53:26", "throughput": 20067.85, "total_tokens": 109253120} +{"current_steps": 34715, "total_steps": 78105, "loss": 0.2405, "lr": 3.4004069944335515e-06, "epoch": 2.2223289162025477, "percentage": 44.45, "elapsed_time": "1:30:44", "remaining_time": "1:53:25", "throughput": 20068.27, "total_tokens": 109269184} +{"current_steps": 34720, "total_steps": 78105, "loss": 0.213, "lr": 3.39988581106521e-06, "epoch": 2.2226489981435247, "percentage": 44.45, "elapsed_time": "1:30:45", "remaining_time": "1:53:24", "throughput": 20068.59, "total_tokens": 109284096} +{"current_steps": 34725, "total_steps": 78105, "loss": 0.3387, "lr": 3.399364582761259e-06, "epoch": 2.2229690800845017, "percentage": 44.46, "elapsed_time": "1:30:46", "remaining_time": "1:53:23", "throughput": 20068.93, "total_tokens": 109299200} +{"current_steps": 34730, "total_steps": 78105, "loss": 0.1595, "lr": 3.3988433095477252e-06, "epoch": 2.2232891620254787, "percentage": 44.47, "elapsed_time": "1:30:46", "remaining_time": "1:53:22", "throughput": 20069.29, "total_tokens": 109314176} +{"current_steps": 34735, "total_steps": 78105, "loss": 0.3225, "lr": 3.398321991450639e-06, "epoch": 2.223609243966455, "percentage": 44.47, "elapsed_time": "1:30:47", "remaining_time": "1:53:21", "throughput": 20069.62, "total_tokens": 109329344} +{"current_steps": 34740, "total_steps": 78105, "loss": 0.2605, "lr": 3.3978006284960306e-06, "epoch": 2.223929325907432, "percentage": 44.48, "elapsed_time": "1:30:48", "remaining_time": "1:53:20", "throughput": 20070.01, "total_tokens": 109345280} +{"current_steps": 34745, "total_steps": 78105, "loss": 0.1708, "lr": 3.397279220709936e-06, "epoch": 2.224249407848409, "percentage": 44.48, "elapsed_time": "1:30:48", "remaining_time": "1:53:19", "throughput": 20070.45, "total_tokens": 109361664} +{"current_steps": 34750, "total_steps": 78105, "loss": 0.2029, "lr": 3.3967577681183906e-06, "epoch": 2.224569489789386, "percentage": 44.49, "elapsed_time": "1:30:49", "remaining_time": "1:53:19", "throughput": 20070.84, "total_tokens": 109377216} +{"current_steps": 34755, "total_steps": 78105, "loss": 0.304, "lr": 3.396236270747433e-06, "epoch": 2.224889571730363, "percentage": 44.5, "elapsed_time": "1:30:50", "remaining_time": "1:53:18", "throughput": 20071.26, "total_tokens": 109393216} +{"current_steps": 34760, "total_steps": 78105, "loss": 0.213, "lr": 3.395714728623105e-06, "epoch": 2.2252096536713397, "percentage": 44.5, "elapsed_time": "1:30:50", "remaining_time": "1:53:17", "throughput": 20071.68, "total_tokens": 109409536} +{"current_steps": 34765, "total_steps": 78105, "loss": 0.1936, "lr": 3.395193141771449e-06, "epoch": 2.2255297356123167, "percentage": 44.51, "elapsed_time": "1:30:51", "remaining_time": "1:53:16", "throughput": 20072.2, "total_tokens": 109426624} +{"current_steps": 34770, "total_steps": 78105, "loss": 0.2805, "lr": 3.39467151021851e-06, "epoch": 2.2258498175532937, "percentage": 44.52, "elapsed_time": "1:30:52", "remaining_time": "1:53:15", "throughput": 20072.66, "total_tokens": 109443328} +{"current_steps": 34775, "total_steps": 78105, "loss": 0.1941, "lr": 3.3941498339903357e-06, "epoch": 2.2261698994942707, "percentage": 44.52, "elapsed_time": "1:30:53", "remaining_time": "1:53:14", "throughput": 20073.06, "total_tokens": 109459456} +{"current_steps": 34780, "total_steps": 78105, "loss": 0.1622, "lr": 3.3936281131129762e-06, "epoch": 2.2264899814352472, "percentage": 44.53, "elapsed_time": "1:30:53", "remaining_time": "1:53:13", "throughput": 20073.49, "total_tokens": 109475968} +{"current_steps": 34785, "total_steps": 78105, "loss": 0.1761, "lr": 3.3931063476124845e-06, "epoch": 2.2268100633762242, "percentage": 44.54, "elapsed_time": "1:30:54", "remaining_time": "1:53:12", "throughput": 20073.92, "total_tokens": 109492160} +{"current_steps": 34790, "total_steps": 78105, "loss": 0.2776, "lr": 3.392584537514913e-06, "epoch": 2.227130145317201, "percentage": 44.54, "elapsed_time": "1:30:55", "remaining_time": "1:53:11", "throughput": 20074.2, "total_tokens": 109506560} +{"current_steps": 34795, "total_steps": 78105, "loss": 0.1537, "lr": 3.392062682846319e-06, "epoch": 2.227450227258178, "percentage": 44.55, "elapsed_time": "1:30:55", "remaining_time": "1:53:10", "throughput": 20074.56, "total_tokens": 109521600} +{"current_steps": 34800, "total_steps": 78105, "loss": 0.2741, "lr": 3.391540783632762e-06, "epoch": 2.2277703091991548, "percentage": 44.56, "elapsed_time": "1:30:56", "remaining_time": "1:53:09", "throughput": 20074.83, "total_tokens": 109535616} +{"current_steps": 34805, "total_steps": 78105, "loss": 0.2724, "lr": 3.391018839900301e-06, "epoch": 2.2280903911401317, "percentage": 44.56, "elapsed_time": "1:30:57", "remaining_time": "1:53:08", "throughput": 20075.25, "total_tokens": 109551488} +{"current_steps": 34810, "total_steps": 78105, "loss": 0.2132, "lr": 3.3904968516750002e-06, "epoch": 2.2284104730811087, "percentage": 44.57, "elapsed_time": "1:30:57", "remaining_time": "1:53:08", "throughput": 20075.58, "total_tokens": 109566592} +{"current_steps": 34815, "total_steps": 78105, "loss": 0.283, "lr": 3.3899748189829263e-06, "epoch": 2.2287305550220857, "percentage": 44.57, "elapsed_time": "1:30:58", "remaining_time": "1:53:07", "throughput": 20075.97, "total_tokens": 109581952} +{"current_steps": 34820, "total_steps": 78105, "loss": 0.1845, "lr": 3.389452741850145e-06, "epoch": 2.2290506369630627, "percentage": 44.58, "elapsed_time": "1:30:59", "remaining_time": "1:53:06", "throughput": 20076.35, "total_tokens": 109597632} +{"current_steps": 34825, "total_steps": 78105, "loss": 0.1912, "lr": 3.3889306203027266e-06, "epoch": 2.2293707189040393, "percentage": 44.59, "elapsed_time": "1:30:59", "remaining_time": "1:53:05", "throughput": 20076.74, "total_tokens": 109613248} +{"current_steps": 34830, "total_steps": 78105, "loss": 0.2176, "lr": 3.388408454366743e-06, "epoch": 2.2296908008450163, "percentage": 44.59, "elapsed_time": "1:31:00", "remaining_time": "1:53:04", "throughput": 20077.15, "total_tokens": 109628992} +{"current_steps": 34835, "total_steps": 78105, "loss": 0.1843, "lr": 3.387886244068269e-06, "epoch": 2.2300108827859932, "percentage": 44.6, "elapsed_time": "1:31:01", "remaining_time": "1:53:03", "throughput": 20077.52, "total_tokens": 109644352} +{"current_steps": 34840, "total_steps": 78105, "loss": 0.2576, "lr": 3.387363989433381e-06, "epoch": 2.2303309647269702, "percentage": 44.61, "elapsed_time": "1:31:01", "remaining_time": "1:53:02", "throughput": 20077.97, "total_tokens": 109660736} +{"current_steps": 34845, "total_steps": 78105, "loss": 0.2127, "lr": 3.3868416904881575e-06, "epoch": 2.230651046667947, "percentage": 44.61, "elapsed_time": "1:31:02", "remaining_time": "1:53:01", "throughput": 20078.28, "total_tokens": 109675648} +{"current_steps": 34850, "total_steps": 78105, "loss": 0.3412, "lr": 3.3863193472586797e-06, "epoch": 2.2309711286089238, "percentage": 44.62, "elapsed_time": "1:31:03", "remaining_time": "1:53:00", "throughput": 20078.66, "total_tokens": 109691264} +{"current_steps": 34855, "total_steps": 78105, "loss": 0.2122, "lr": 3.38579695977103e-06, "epoch": 2.2312912105499008, "percentage": 44.63, "elapsed_time": "1:31:03", "remaining_time": "1:52:59", "throughput": 20079.15, "total_tokens": 109708544} +{"current_steps": 34860, "total_steps": 78105, "loss": 0.2122, "lr": 3.3852745280512933e-06, "epoch": 2.2316112924908778, "percentage": 44.63, "elapsed_time": "1:31:04", "remaining_time": "1:52:58", "throughput": 20079.52, "total_tokens": 109723968} +{"current_steps": 34865, "total_steps": 78105, "loss": 0.3039, "lr": 3.384752052125559e-06, "epoch": 2.2319313744318547, "percentage": 44.64, "elapsed_time": "1:31:05", "remaining_time": "1:52:57", "throughput": 20079.9, "total_tokens": 109739456} +{"current_steps": 34870, "total_steps": 78105, "loss": 0.2535, "lr": 3.384229532019915e-06, "epoch": 2.2322514563728313, "percentage": 44.65, "elapsed_time": "1:31:05", "remaining_time": "1:52:57", "throughput": 20080.29, "total_tokens": 109755200} +{"current_steps": 34875, "total_steps": 78105, "loss": 0.176, "lr": 3.383706967760455e-06, "epoch": 2.2325715383138083, "percentage": 44.65, "elapsed_time": "1:31:06", "remaining_time": "1:52:56", "throughput": 20080.66, "total_tokens": 109770944} +{"current_steps": 34880, "total_steps": 78105, "loss": 0.1979, "lr": 3.3831843593732715e-06, "epoch": 2.2328916202547853, "percentage": 44.66, "elapsed_time": "1:31:07", "remaining_time": "1:52:55", "throughput": 20081.07, "total_tokens": 109786624} +{"current_steps": 34885, "total_steps": 78105, "loss": 0.2653, "lr": 3.382661706884461e-06, "epoch": 2.2332117021957623, "percentage": 44.66, "elapsed_time": "1:31:07", "remaining_time": "1:52:54", "throughput": 20081.41, "total_tokens": 109801344} +{"current_steps": 34890, "total_steps": 78105, "loss": 0.3099, "lr": 3.382139010320123e-06, "epoch": 2.233531784136739, "percentage": 44.67, "elapsed_time": "1:31:08", "remaining_time": "1:52:53", "throughput": 20081.87, "total_tokens": 109818048} +{"current_steps": 34895, "total_steps": 78105, "loss": 0.1537, "lr": 3.381616269706357e-06, "epoch": 2.233851866077716, "percentage": 44.68, "elapsed_time": "1:31:09", "remaining_time": "1:52:52", "throughput": 20082.38, "total_tokens": 109835328} +{"current_steps": 34900, "total_steps": 78105, "loss": 0.1815, "lr": 3.381093485069267e-06, "epoch": 2.234171948018693, "percentage": 44.68, "elapsed_time": "1:31:09", "remaining_time": "1:52:51", "throughput": 20082.85, "total_tokens": 109852160} +{"current_steps": 34905, "total_steps": 78105, "loss": 0.2513, "lr": 3.3805706564349583e-06, "epoch": 2.23449202995967, "percentage": 44.69, "elapsed_time": "1:31:10", "remaining_time": "1:52:50", "throughput": 20083.29, "total_tokens": 109868352} +{"current_steps": 34910, "total_steps": 78105, "loss": 0.2768, "lr": 3.3800477838295375e-06, "epoch": 2.2348121119006468, "percentage": 44.7, "elapsed_time": "1:31:11", "remaining_time": "1:52:49", "throughput": 20083.62, "total_tokens": 109883200} +{"current_steps": 34915, "total_steps": 78105, "loss": 0.1918, "lr": 3.379524867279115e-06, "epoch": 2.2351321938416233, "percentage": 44.7, "elapsed_time": "1:31:11", "remaining_time": "1:52:48", "throughput": 20084.01, "total_tokens": 109898944} +{"current_steps": 34920, "total_steps": 78105, "loss": 0.2025, "lr": 3.3790019068098e-06, "epoch": 2.2354522757826003, "percentage": 44.71, "elapsed_time": "1:31:12", "remaining_time": "1:52:47", "throughput": 20084.36, "total_tokens": 109913920} +{"current_steps": 34925, "total_steps": 78105, "loss": 0.2429, "lr": 3.37847890244771e-06, "epoch": 2.2357723577235773, "percentage": 44.72, "elapsed_time": "1:31:13", "remaining_time": "1:52:47", "throughput": 20084.85, "total_tokens": 109931008} +{"current_steps": 34930, "total_steps": 78105, "loss": 0.1842, "lr": 3.3779558542189595e-06, "epoch": 2.2360924396645543, "percentage": 44.72, "elapsed_time": "1:31:13", "remaining_time": "1:52:46", "throughput": 20085.22, "total_tokens": 109946368} +{"current_steps": 34935, "total_steps": 78105, "loss": 0.2256, "lr": 3.377432762149666e-06, "epoch": 2.236412521605531, "percentage": 44.73, "elapsed_time": "1:31:14", "remaining_time": "1:52:45", "throughput": 20085.53, "total_tokens": 109960960} +{"current_steps": 34940, "total_steps": 78105, "loss": 0.1823, "lr": 3.376909626265951e-06, "epoch": 2.236732603546508, "percentage": 44.73, "elapsed_time": "1:31:15", "remaining_time": "1:52:44", "throughput": 20086.05, "total_tokens": 109978368} +{"current_steps": 34945, "total_steps": 78105, "loss": 0.215, "lr": 3.3763864465939367e-06, "epoch": 2.237052685487485, "percentage": 44.74, "elapsed_time": "1:31:16", "remaining_time": "1:52:43", "throughput": 20086.47, "total_tokens": 109994304} +{"current_steps": 34950, "total_steps": 78105, "loss": 0.234, "lr": 3.375863223159749e-06, "epoch": 2.237372767428462, "percentage": 44.75, "elapsed_time": "1:31:16", "remaining_time": "1:52:42", "throughput": 20086.8, "total_tokens": 110009280} +{"current_steps": 34955, "total_steps": 78105, "loss": 0.2539, "lr": 3.3753399559895127e-06, "epoch": 2.237692849369439, "percentage": 44.75, "elapsed_time": "1:31:17", "remaining_time": "1:52:41", "throughput": 20087.34, "total_tokens": 110026816} +{"current_steps": 34960, "total_steps": 78105, "loss": 0.2392, "lr": 3.3748166451093596e-06, "epoch": 2.2380129313104153, "percentage": 44.76, "elapsed_time": "1:31:18", "remaining_time": "1:52:40", "throughput": 20087.7, "total_tokens": 110042240} +{"current_steps": 34965, "total_steps": 78105, "loss": 0.259, "lr": 3.3742932905454194e-06, "epoch": 2.2383330132513923, "percentage": 44.77, "elapsed_time": "1:31:18", "remaining_time": "1:52:39", "throughput": 20088.04, "total_tokens": 110057600} +{"current_steps": 34970, "total_steps": 78105, "loss": 0.1604, "lr": 3.3737698923238273e-06, "epoch": 2.2386530951923693, "percentage": 44.77, "elapsed_time": "1:31:19", "remaining_time": "1:52:38", "throughput": 20088.46, "total_tokens": 110073856} +{"current_steps": 34975, "total_steps": 78105, "loss": 0.2358, "lr": 3.3732464504707164e-06, "epoch": 2.2389731771333463, "percentage": 44.78, "elapsed_time": "1:31:20", "remaining_time": "1:52:37", "throughput": 20088.83, "total_tokens": 110089536} +{"current_steps": 34980, "total_steps": 78105, "loss": 0.3496, "lr": 3.3727229650122274e-06, "epoch": 2.239293259074323, "percentage": 44.79, "elapsed_time": "1:31:20", "remaining_time": "1:52:37", "throughput": 20089.24, "total_tokens": 110105536} +{"current_steps": 34985, "total_steps": 78105, "loss": 0.2086, "lr": 3.3721994359745e-06, "epoch": 2.2396133410153, "percentage": 44.79, "elapsed_time": "1:31:21", "remaining_time": "1:52:36", "throughput": 20089.68, "total_tokens": 110121920} +{"current_steps": 34990, "total_steps": 78105, "loss": 0.2684, "lr": 3.371675863383675e-06, "epoch": 2.239933422956277, "percentage": 44.8, "elapsed_time": "1:31:22", "remaining_time": "1:52:35", "throughput": 20090.01, "total_tokens": 110136704} +{"current_steps": 34995, "total_steps": 78105, "loss": 0.204, "lr": 3.371152247265898e-06, "epoch": 2.240253504897254, "percentage": 44.81, "elapsed_time": "1:31:22", "remaining_time": "1:52:34", "throughput": 20090.46, "total_tokens": 110153472} +{"current_steps": 35000, "total_steps": 78105, "loss": 0.184, "lr": 3.370628587647315e-06, "epoch": 2.2405735868382304, "percentage": 44.81, "elapsed_time": "1:31:23", "remaining_time": "1:52:33", "throughput": 20090.82, "total_tokens": 110168768} +{"current_steps": 35005, "total_steps": 78105, "loss": 0.2724, "lr": 3.3701048845540757e-06, "epoch": 2.2408936687792074, "percentage": 44.82, "elapsed_time": "1:31:24", "remaining_time": "1:52:32", "throughput": 20091.27, "total_tokens": 110185216} +{"current_steps": 35010, "total_steps": 78105, "loss": 0.219, "lr": 3.369581138012331e-06, "epoch": 2.2412137507201844, "percentage": 44.82, "elapsed_time": "1:31:24", "remaining_time": "1:52:31", "throughput": 20091.65, "total_tokens": 110200576} +{"current_steps": 35015, "total_steps": 78105, "loss": 0.2744, "lr": 3.369057348048233e-06, "epoch": 2.2415338326611614, "percentage": 44.83, "elapsed_time": "1:31:25", "remaining_time": "1:52:30", "throughput": 20092.02, "total_tokens": 110216192} +{"current_steps": 35020, "total_steps": 78105, "loss": 0.1438, "lr": 3.3685335146879384e-06, "epoch": 2.2418539146021383, "percentage": 44.84, "elapsed_time": "1:31:26", "remaining_time": "1:52:29", "throughput": 20092.37, "total_tokens": 110231296} +{"current_steps": 35025, "total_steps": 78105, "loss": 0.1885, "lr": 3.3680096379576037e-06, "epoch": 2.242173996543115, "percentage": 44.84, "elapsed_time": "1:31:26", "remaining_time": "1:52:28", "throughput": 20092.7, "total_tokens": 110246144} +{"current_steps": 35030, "total_steps": 78105, "loss": 0.1477, "lr": 3.367485717883389e-06, "epoch": 2.242494078484092, "percentage": 44.85, "elapsed_time": "1:31:27", "remaining_time": "1:52:27", "throughput": 20093.06, "total_tokens": 110261440} +{"current_steps": 35035, "total_steps": 78105, "loss": 0.2131, "lr": 3.3669617544914562e-06, "epoch": 2.242814160425069, "percentage": 44.86, "elapsed_time": "1:31:28", "remaining_time": "1:52:26", "throughput": 20093.39, "total_tokens": 110276416} +{"current_steps": 35040, "total_steps": 78105, "loss": 0.1741, "lr": 3.36643774780797e-06, "epoch": 2.243134242366046, "percentage": 44.86, "elapsed_time": "1:31:28", "remaining_time": "1:52:25", "throughput": 20093.69, "total_tokens": 110290944} +{"current_steps": 35045, "total_steps": 78105, "loss": 0.263, "lr": 3.3659136978590946e-06, "epoch": 2.2434543243070224, "percentage": 44.87, "elapsed_time": "1:31:29", "remaining_time": "1:52:24", "throughput": 20094.05, "total_tokens": 110306048} +{"current_steps": 35050, "total_steps": 78105, "loss": 0.2051, "lr": 3.365389604671e-06, "epoch": 2.2437744062479994, "percentage": 44.88, "elapsed_time": "1:31:30", "remaining_time": "1:52:24", "throughput": 20094.44, "total_tokens": 110322240} +{"current_steps": 35055, "total_steps": 78105, "loss": 0.2442, "lr": 3.364865468269857e-06, "epoch": 2.2440944881889764, "percentage": 44.88, "elapsed_time": "1:31:30", "remaining_time": "1:52:23", "throughput": 20094.89, "total_tokens": 110338560} +{"current_steps": 35060, "total_steps": 78105, "loss": 0.2658, "lr": 3.364341288681836e-06, "epoch": 2.2444145701299534, "percentage": 44.89, "elapsed_time": "1:31:31", "remaining_time": "1:52:22", "throughput": 20095.24, "total_tokens": 110353728} +{"current_steps": 35065, "total_steps": 78105, "loss": 0.3488, "lr": 3.3638170659331138e-06, "epoch": 2.24473465207093, "percentage": 44.89, "elapsed_time": "1:31:32", "remaining_time": "1:52:21", "throughput": 20095.65, "total_tokens": 110369472} +{"current_steps": 35070, "total_steps": 78105, "loss": 0.2753, "lr": 3.3632928000498667e-06, "epoch": 2.245054734011907, "percentage": 44.9, "elapsed_time": "1:31:32", "remaining_time": "1:52:20", "throughput": 20096.0, "total_tokens": 110384832} +{"current_steps": 35075, "total_steps": 78105, "loss": 0.2681, "lr": 3.3627684910582737e-06, "epoch": 2.245374815952884, "percentage": 44.91, "elapsed_time": "1:31:33", "remaining_time": "1:52:19", "throughput": 20096.34, "total_tokens": 110399872} +{"current_steps": 35080, "total_steps": 78105, "loss": 0.2491, "lr": 3.3622441389845166e-06, "epoch": 2.245694897893861, "percentage": 44.91, "elapsed_time": "1:31:34", "remaining_time": "1:52:18", "throughput": 20096.77, "total_tokens": 110416064} +{"current_steps": 35085, "total_steps": 78105, "loss": 0.1607, "lr": 3.3617197438547787e-06, "epoch": 2.246014979834838, "percentage": 44.92, "elapsed_time": "1:31:34", "remaining_time": "1:52:17", "throughput": 20097.18, "total_tokens": 110432064} +{"current_steps": 35090, "total_steps": 78105, "loss": 0.2389, "lr": 3.361195305695245e-06, "epoch": 2.2463350617758144, "percentage": 44.93, "elapsed_time": "1:31:35", "remaining_time": "1:52:16", "throughput": 20097.59, "total_tokens": 110448256} +{"current_steps": 35095, "total_steps": 78105, "loss": 0.2655, "lr": 3.3606708245321035e-06, "epoch": 2.2466551437167914, "percentage": 44.93, "elapsed_time": "1:31:36", "remaining_time": "1:52:15", "throughput": 20098.01, "total_tokens": 110464576} +{"current_steps": 35100, "total_steps": 78105, "loss": 0.2452, "lr": 3.360146300391545e-06, "epoch": 2.2469752256577684, "percentage": 44.94, "elapsed_time": "1:31:36", "remaining_time": "1:52:14", "throughput": 20098.34, "total_tokens": 110479680} +{"current_steps": 35105, "total_steps": 78105, "loss": 0.3262, "lr": 3.3596217332997594e-06, "epoch": 2.2472953075987454, "percentage": 44.95, "elapsed_time": "1:31:37", "remaining_time": "1:52:14", "throughput": 20098.72, "total_tokens": 110495296} +{"current_steps": 35110, "total_steps": 78105, "loss": 0.2149, "lr": 3.359097123282943e-06, "epoch": 2.247615389539722, "percentage": 44.95, "elapsed_time": "1:31:38", "remaining_time": "1:52:13", "throughput": 20099.16, "total_tokens": 110511232} +{"current_steps": 35115, "total_steps": 78105, "loss": 0.241, "lr": 3.3585724703672894e-06, "epoch": 2.247935471480699, "percentage": 44.96, "elapsed_time": "1:31:38", "remaining_time": "1:52:12", "throughput": 20099.56, "total_tokens": 110527232} +{"current_steps": 35120, "total_steps": 78105, "loss": 0.2237, "lr": 3.358047774579e-06, "epoch": 2.248255553421676, "percentage": 44.97, "elapsed_time": "1:31:39", "remaining_time": "1:52:11", "throughput": 20099.9, "total_tokens": 110542464} +{"current_steps": 35125, "total_steps": 78105, "loss": 0.2935, "lr": 3.3575230359442747e-06, "epoch": 2.248575635362653, "percentage": 44.97, "elapsed_time": "1:31:40", "remaining_time": "1:52:10", "throughput": 20100.3, "total_tokens": 110558336} +{"current_steps": 35130, "total_steps": 78105, "loss": 0.3091, "lr": 3.3569982544893144e-06, "epoch": 2.24889571730363, "percentage": 44.98, "elapsed_time": "1:31:41", "remaining_time": "1:52:09", "throughput": 20100.64, "total_tokens": 110573632} +{"current_steps": 35135, "total_steps": 78105, "loss": 0.2674, "lr": 3.356473430240326e-06, "epoch": 2.2492157992446065, "percentage": 44.98, "elapsed_time": "1:31:41", "remaining_time": "1:52:08", "throughput": 20101.05, "total_tokens": 110590080} +{"current_steps": 35140, "total_steps": 78105, "loss": 0.2304, "lr": 3.3559485632235155e-06, "epoch": 2.2495358811855835, "percentage": 44.99, "elapsed_time": "1:31:42", "remaining_time": "1:52:07", "throughput": 20101.52, "total_tokens": 110606464} +{"current_steps": 35145, "total_steps": 78105, "loss": 0.1817, "lr": 3.355423653465093e-06, "epoch": 2.2498559631265604, "percentage": 45.0, "elapsed_time": "1:31:43", "remaining_time": "1:52:06", "throughput": 20101.85, "total_tokens": 110621376} +{"current_steps": 35150, "total_steps": 78105, "loss": 0.2663, "lr": 3.3548987009912676e-06, "epoch": 2.2501760450675374, "percentage": 45.0, "elapsed_time": "1:31:43", "remaining_time": "1:52:05", "throughput": 20102.23, "total_tokens": 110637184} +{"current_steps": 35154, "total_steps": 78105, "eval_loss": 0.5257287621498108, "epoch": 2.2504321106203187, "percentage": 45.01, "elapsed_time": "1:32:35", "remaining_time": "1:53:07", "throughput": 19918.1, "total_tokens": 110649216} +{"current_steps": 35155, "total_steps": 78105, "loss": 0.2498, "lr": 3.354373705828255e-06, "epoch": 2.250496127008514, "percentage": 45.01, "elapsed_time": "1:33:17", "remaining_time": "1:53:58", "throughput": 19767.82, "total_tokens": 110652352} +{"current_steps": 35160, "total_steps": 78105, "loss": 0.2259, "lr": 3.3538486680022695e-06, "epoch": 2.250816208949491, "percentage": 45.02, "elapsed_time": "1:33:18", "remaining_time": "1:53:57", "throughput": 19768.15, "total_tokens": 110667328} +{"current_steps": 35165, "total_steps": 78105, "loss": 0.2664, "lr": 3.3533235875395293e-06, "epoch": 2.251136290890468, "percentage": 45.02, "elapsed_time": "1:33:18", "remaining_time": "1:53:56", "throughput": 19768.5, "total_tokens": 110682432} +{"current_steps": 35170, "total_steps": 78105, "loss": 0.1614, "lr": 3.3527984644662538e-06, "epoch": 2.251456372831445, "percentage": 45.03, "elapsed_time": "1:33:19", "remaining_time": "1:53:55", "throughput": 19768.88, "total_tokens": 110697856} +{"current_steps": 35175, "total_steps": 78105, "loss": 0.2603, "lr": 3.352273298808665e-06, "epoch": 2.251776454772422, "percentage": 45.04, "elapsed_time": "1:33:20", "remaining_time": "1:53:54", "throughput": 19769.24, "total_tokens": 110712832} +{"current_steps": 35180, "total_steps": 78105, "loss": 0.1891, "lr": 3.351748090592987e-06, "epoch": 2.2520965367133985, "percentage": 45.04, "elapsed_time": "1:33:20", "remaining_time": "1:53:53", "throughput": 19769.57, "total_tokens": 110727552} +{"current_steps": 35185, "total_steps": 78105, "loss": 0.2633, "lr": 3.351222839845446e-06, "epoch": 2.2524166186543755, "percentage": 45.05, "elapsed_time": "1:33:21", "remaining_time": "1:53:53", "throughput": 19770.01, "total_tokens": 110743680} +{"current_steps": 35190, "total_steps": 78105, "loss": 0.1996, "lr": 3.3506975465922697e-06, "epoch": 2.2527367005953525, "percentage": 45.05, "elapsed_time": "1:33:22", "remaining_time": "1:53:52", "throughput": 19770.54, "total_tokens": 110760512} +{"current_steps": 35195, "total_steps": 78105, "loss": 0.2406, "lr": 3.3501722108596896e-06, "epoch": 2.2530567825363295, "percentage": 45.06, "elapsed_time": "1:33:22", "remaining_time": "1:53:51", "throughput": 19770.93, "total_tokens": 110775616} +{"current_steps": 35200, "total_steps": 78105, "loss": 0.1835, "lr": 3.3496468326739377e-06, "epoch": 2.253376864477306, "percentage": 45.07, "elapsed_time": "1:33:23", "remaining_time": "1:53:50", "throughput": 19771.33, "total_tokens": 110790912} +{"current_steps": 35205, "total_steps": 78105, "loss": 0.2762, "lr": 3.349121412061248e-06, "epoch": 2.253696946418283, "percentage": 45.07, "elapsed_time": "1:33:24", "remaining_time": "1:53:49", "throughput": 19771.73, "total_tokens": 110806208} +{"current_steps": 35210, "total_steps": 78105, "loss": 0.2097, "lr": 3.3485959490478587e-06, "epoch": 2.25401702835926, "percentage": 45.08, "elapsed_time": "1:33:24", "remaining_time": "1:53:48", "throughput": 19772.14, "total_tokens": 110822080} +{"current_steps": 35215, "total_steps": 78105, "loss": 0.3238, "lr": 3.3480704436600083e-06, "epoch": 2.254337110300237, "percentage": 45.09, "elapsed_time": "1:33:25", "remaining_time": "1:53:47", "throughput": 19772.63, "total_tokens": 110838720} +{"current_steps": 35220, "total_steps": 78105, "loss": 0.2346, "lr": 3.3475448959239364e-06, "epoch": 2.254657192241214, "percentage": 45.09, "elapsed_time": "1:33:26", "remaining_time": "1:53:46", "throughput": 19773.13, "total_tokens": 110855488} +{"current_steps": 35225, "total_steps": 78105, "loss": 0.321, "lr": 3.3470193058658874e-06, "epoch": 2.2549772741821905, "percentage": 45.1, "elapsed_time": "1:33:27", "remaining_time": "1:53:45", "throughput": 19773.52, "total_tokens": 110870784} +{"current_steps": 35230, "total_steps": 78105, "loss": 0.2382, "lr": 3.346493673512106e-06, "epoch": 2.2552973561231675, "percentage": 45.11, "elapsed_time": "1:33:27", "remaining_time": "1:53:44", "throughput": 19773.9, "total_tokens": 110886016} +{"current_steps": 35235, "total_steps": 78105, "loss": 0.1633, "lr": 3.34596799888884e-06, "epoch": 2.2556174380641445, "percentage": 45.11, "elapsed_time": "1:33:28", "remaining_time": "1:53:43", "throughput": 19774.3, "total_tokens": 110901568} +{"current_steps": 35240, "total_steps": 78105, "loss": 0.1878, "lr": 3.345442282022339e-06, "epoch": 2.2559375200051215, "percentage": 45.12, "elapsed_time": "1:33:29", "remaining_time": "1:53:42", "throughput": 19774.72, "total_tokens": 110917056} +{"current_steps": 35245, "total_steps": 78105, "loss": 0.1487, "lr": 3.344916522938853e-06, "epoch": 2.256257601946098, "percentage": 45.13, "elapsed_time": "1:33:29", "remaining_time": "1:53:41", "throughput": 19775.16, "total_tokens": 110932992} +{"current_steps": 35250, "total_steps": 78105, "loss": 0.2015, "lr": 3.3443907216646378e-06, "epoch": 2.256577683887075, "percentage": 45.13, "elapsed_time": "1:33:30", "remaining_time": "1:53:40", "throughput": 19775.55, "total_tokens": 110947904} +{"current_steps": 35255, "total_steps": 78105, "loss": 0.1884, "lr": 3.3438648782259487e-06, "epoch": 2.256897765828052, "percentage": 45.14, "elapsed_time": "1:33:31", "remaining_time": "1:53:39", "throughput": 19775.95, "total_tokens": 110963392} +{"current_steps": 35260, "total_steps": 78105, "loss": 0.2113, "lr": 3.343338992649042e-06, "epoch": 2.257217847769029, "percentage": 45.14, "elapsed_time": "1:33:31", "remaining_time": "1:53:38", "throughput": 19776.35, "total_tokens": 110979200} +{"current_steps": 35265, "total_steps": 78105, "loss": 0.2611, "lr": 3.342813064960179e-06, "epoch": 2.257537929710006, "percentage": 45.15, "elapsed_time": "1:33:32", "remaining_time": "1:53:38", "throughput": 19776.85, "total_tokens": 110996544} +{"current_steps": 35270, "total_steps": 78105, "loss": 0.1967, "lr": 3.3422870951856222e-06, "epoch": 2.2578580116509825, "percentage": 45.16, "elapsed_time": "1:33:33", "remaining_time": "1:53:37", "throughput": 19777.34, "total_tokens": 111013376} +{"current_steps": 35275, "total_steps": 78105, "loss": 0.1391, "lr": 3.3417610833516346e-06, "epoch": 2.2581780935919595, "percentage": 45.16, "elapsed_time": "1:33:33", "remaining_time": "1:53:36", "throughput": 19777.68, "total_tokens": 111028608} +{"current_steps": 35280, "total_steps": 78105, "loss": 0.214, "lr": 3.3412350294844836e-06, "epoch": 2.2584981755329365, "percentage": 45.17, "elapsed_time": "1:33:34", "remaining_time": "1:53:35", "throughput": 19778.06, "total_tokens": 111044480} +{"current_steps": 35285, "total_steps": 78105, "loss": 0.1921, "lr": 3.340708933610437e-06, "epoch": 2.2588182574739135, "percentage": 45.18, "elapsed_time": "1:33:35", "remaining_time": "1:53:34", "throughput": 19778.43, "total_tokens": 111059840} +{"current_steps": 35290, "total_steps": 78105, "loss": 0.1358, "lr": 3.340182795755765e-06, "epoch": 2.25913833941489, "percentage": 45.18, "elapsed_time": "1:33:35", "remaining_time": "1:53:33", "throughput": 19778.73, "total_tokens": 111074368} +{"current_steps": 35295, "total_steps": 78105, "loss": 0.3094, "lr": 3.3396566159467404e-06, "epoch": 2.259458421355867, "percentage": 45.19, "elapsed_time": "1:33:36", "remaining_time": "1:53:32", "throughput": 19779.18, "total_tokens": 111090432} +{"current_steps": 35300, "total_steps": 78105, "loss": 0.1856, "lr": 3.339130394209639e-06, "epoch": 2.259778503296844, "percentage": 45.2, "elapsed_time": "1:33:37", "remaining_time": "1:53:31", "throughput": 19779.59, "total_tokens": 111105920} +{"current_steps": 35305, "total_steps": 78105, "loss": 0.3488, "lr": 3.338604130570735e-06, "epoch": 2.260098585237821, "percentage": 45.2, "elapsed_time": "1:33:37", "remaining_time": "1:53:30", "throughput": 19779.97, "total_tokens": 111121216} +{"current_steps": 35310, "total_steps": 78105, "loss": 0.1892, "lr": 3.3380778250563097e-06, "epoch": 2.2604186671787976, "percentage": 45.21, "elapsed_time": "1:33:38", "remaining_time": "1:53:29", "throughput": 19780.38, "total_tokens": 111136896} +{"current_steps": 35315, "total_steps": 78105, "loss": 0.1966, "lr": 3.337551477692643e-06, "epoch": 2.2607387491197746, "percentage": 45.21, "elapsed_time": "1:33:39", "remaining_time": "1:53:28", "throughput": 19780.84, "total_tokens": 111153152} +{"current_steps": 35320, "total_steps": 78105, "loss": 0.3031, "lr": 3.3370250885060183e-06, "epoch": 2.2610588310607516, "percentage": 45.22, "elapsed_time": "1:33:39", "remaining_time": "1:53:27", "throughput": 19781.31, "total_tokens": 111169728} +{"current_steps": 35325, "total_steps": 78105, "loss": 0.1913, "lr": 3.336498657522721e-06, "epoch": 2.2613789130017286, "percentage": 45.23, "elapsed_time": "1:33:40", "remaining_time": "1:53:26", "throughput": 19781.81, "total_tokens": 111186816} +{"current_steps": 35330, "total_steps": 78105, "loss": 0.3084, "lr": 3.3359721847690375e-06, "epoch": 2.261698994942705, "percentage": 45.23, "elapsed_time": "1:33:41", "remaining_time": "1:53:25", "throughput": 19782.3, "total_tokens": 111203136} +{"current_steps": 35335, "total_steps": 78105, "loss": 0.2074, "lr": 3.335445670271258e-06, "epoch": 2.262019076883682, "percentage": 45.24, "elapsed_time": "1:33:42", "remaining_time": "1:53:24", "throughput": 19782.69, "total_tokens": 111218880} +{"current_steps": 35340, "total_steps": 78105, "loss": 0.2325, "lr": 3.334919114055673e-06, "epoch": 2.262339158824659, "percentage": 45.25, "elapsed_time": "1:33:42", "remaining_time": "1:53:24", "throughput": 19783.11, "total_tokens": 111234624} +{"current_steps": 35345, "total_steps": 78105, "loss": 0.2509, "lr": 3.3343925161485757e-06, "epoch": 2.262659240765636, "percentage": 45.25, "elapsed_time": "1:33:43", "remaining_time": "1:53:23", "throughput": 19783.62, "total_tokens": 111251712} +{"current_steps": 35350, "total_steps": 78105, "loss": 0.1662, "lr": 3.3338658765762635e-06, "epoch": 2.262979322706613, "percentage": 45.26, "elapsed_time": "1:33:44", "remaining_time": "1:53:22", "throughput": 19783.92, "total_tokens": 111266176} +{"current_steps": 35355, "total_steps": 78105, "loss": 0.1905, "lr": 3.3333391953650326e-06, "epoch": 2.2632994046475896, "percentage": 45.27, "elapsed_time": "1:33:44", "remaining_time": "1:53:21", "throughput": 19784.31, "total_tokens": 111281472} +{"current_steps": 35360, "total_steps": 78105, "loss": 0.1882, "lr": 3.3328124725411825e-06, "epoch": 2.2636194865885666, "percentage": 45.27, "elapsed_time": "1:33:45", "remaining_time": "1:53:20", "throughput": 19784.71, "total_tokens": 111297024} +{"current_steps": 35365, "total_steps": 78105, "loss": 0.3215, "lr": 3.3322857081310158e-06, "epoch": 2.2639395685295436, "percentage": 45.28, "elapsed_time": "1:33:46", "remaining_time": "1:53:19", "throughput": 19785.07, "total_tokens": 111311936} +{"current_steps": 35370, "total_steps": 78105, "loss": 0.2659, "lr": 3.3317589021608353e-06, "epoch": 2.2642596504705206, "percentage": 45.29, "elapsed_time": "1:33:46", "remaining_time": "1:53:18", "throughput": 19785.55, "total_tokens": 111328576} +{"current_steps": 35375, "total_steps": 78105, "loss": 0.1803, "lr": 3.3312320546569486e-06, "epoch": 2.264579732411497, "percentage": 45.29, "elapsed_time": "1:33:47", "remaining_time": "1:53:17", "throughput": 19785.95, "total_tokens": 111344000} +{"current_steps": 35380, "total_steps": 78105, "loss": 0.2751, "lr": 3.3307051656456624e-06, "epoch": 2.264899814352474, "percentage": 45.3, "elapsed_time": "1:33:48", "remaining_time": "1:53:16", "throughput": 19786.38, "total_tokens": 111359936} +{"current_steps": 35385, "total_steps": 78105, "loss": 0.1414, "lr": 3.3301782351532874e-06, "epoch": 2.265219896293451, "percentage": 45.3, "elapsed_time": "1:33:48", "remaining_time": "1:53:15", "throughput": 19786.76, "total_tokens": 111375488} +{"current_steps": 35390, "total_steps": 78105, "loss": 0.2949, "lr": 3.329651263206136e-06, "epoch": 2.265539978234428, "percentage": 45.31, "elapsed_time": "1:33:49", "remaining_time": "1:53:14", "throughput": 19787.19, "total_tokens": 111391552} +{"current_steps": 35395, "total_steps": 78105, "loss": 0.2348, "lr": 3.3291242498305215e-06, "epoch": 2.265860060175405, "percentage": 45.32, "elapsed_time": "1:33:50", "remaining_time": "1:53:13", "throughput": 19787.55, "total_tokens": 111406976} +{"current_steps": 35400, "total_steps": 78105, "loss": 0.2822, "lr": 3.328597195052761e-06, "epoch": 2.2661801421163816, "percentage": 45.32, "elapsed_time": "1:33:50", "remaining_time": "1:53:12", "throughput": 19788.0, "total_tokens": 111423360} +{"current_steps": 35405, "total_steps": 78105, "loss": 0.2325, "lr": 3.328070098899172e-06, "epoch": 2.2665002240573586, "percentage": 45.33, "elapsed_time": "1:33:51", "remaining_time": "1:53:11", "throughput": 19788.38, "total_tokens": 111438272} +{"current_steps": 35410, "total_steps": 78105, "loss": 0.2491, "lr": 3.327542961396076e-06, "epoch": 2.2668203059983356, "percentage": 45.34, "elapsed_time": "1:33:52", "remaining_time": "1:53:10", "throughput": 19788.71, "total_tokens": 111453120} +{"current_steps": 35415, "total_steps": 78105, "loss": 0.2223, "lr": 3.327015782569795e-06, "epoch": 2.2671403879393126, "percentage": 45.34, "elapsed_time": "1:33:52", "remaining_time": "1:53:09", "throughput": 19789.1, "total_tokens": 111468608} +{"current_steps": 35420, "total_steps": 78105, "loss": 0.1699, "lr": 3.3264885624466537e-06, "epoch": 2.267460469880289, "percentage": 45.35, "elapsed_time": "1:33:53", "remaining_time": "1:53:08", "throughput": 19789.43, "total_tokens": 111483200} +{"current_steps": 35425, "total_steps": 78105, "loss": 0.2718, "lr": 3.325961301052978e-06, "epoch": 2.267780551821266, "percentage": 45.36, "elapsed_time": "1:33:54", "remaining_time": "1:53:08", "throughput": 19789.82, "total_tokens": 111499328} +{"current_steps": 35430, "total_steps": 78105, "loss": 0.1949, "lr": 3.325433998415098e-06, "epoch": 2.268100633762243, "percentage": 45.36, "elapsed_time": "1:33:54", "remaining_time": "1:53:07", "throughput": 19790.21, "total_tokens": 111514880} +{"current_steps": 35435, "total_steps": 78105, "loss": 0.2217, "lr": 3.3249066545593428e-06, "epoch": 2.26842071570322, "percentage": 45.37, "elapsed_time": "1:33:55", "remaining_time": "1:53:06", "throughput": 19790.54, "total_tokens": 111529920} +{"current_steps": 35440, "total_steps": 78105, "loss": 0.231, "lr": 3.324379269512047e-06, "epoch": 2.268740797644197, "percentage": 45.37, "elapsed_time": "1:33:56", "remaining_time": "1:53:05", "throughput": 19791.17, "total_tokens": 111548736} +{"current_steps": 35445, "total_steps": 78105, "loss": 0.171, "lr": 3.323851843299544e-06, "epoch": 2.2690608795851737, "percentage": 45.38, "elapsed_time": "1:33:56", "remaining_time": "1:53:04", "throughput": 19791.55, "total_tokens": 111564032} +{"current_steps": 35450, "total_steps": 78105, "loss": 0.1846, "lr": 3.3233243759481717e-06, "epoch": 2.2693809615261507, "percentage": 45.39, "elapsed_time": "1:33:57", "remaining_time": "1:53:03", "throughput": 19792.07, "total_tokens": 111581184} +{"current_steps": 35455, "total_steps": 78105, "loss": 0.1711, "lr": 3.322796867484268e-06, "epoch": 2.2697010434671276, "percentage": 45.39, "elapsed_time": "1:33:58", "remaining_time": "1:53:02", "throughput": 19792.42, "total_tokens": 111596416} +{"current_steps": 35460, "total_steps": 78105, "loss": 0.2638, "lr": 3.3222693179341743e-06, "epoch": 2.2700211254081046, "percentage": 45.4, "elapsed_time": "1:33:58", "remaining_time": "1:53:01", "throughput": 19792.75, "total_tokens": 111611136} +{"current_steps": 35465, "total_steps": 78105, "loss": 0.2347, "lr": 3.3217417273242346e-06, "epoch": 2.270341207349081, "percentage": 45.41, "elapsed_time": "1:33:59", "remaining_time": "1:53:00", "throughput": 19793.08, "total_tokens": 111625920} +{"current_steps": 35470, "total_steps": 78105, "loss": 0.2005, "lr": 3.321214095680793e-06, "epoch": 2.270661289290058, "percentage": 45.41, "elapsed_time": "1:34:00", "remaining_time": "1:52:59", "throughput": 19793.47, "total_tokens": 111641536} +{"current_steps": 35475, "total_steps": 78105, "loss": 0.2397, "lr": 3.3206864230301974e-06, "epoch": 2.270981371231035, "percentage": 45.42, "elapsed_time": "1:34:01", "remaining_time": "1:52:58", "throughput": 19793.86, "total_tokens": 111657216} +{"current_steps": 35480, "total_steps": 78105, "loss": 0.213, "lr": 3.320158709398796e-06, "epoch": 2.271301453172012, "percentage": 45.43, "elapsed_time": "1:34:01", "remaining_time": "1:52:57", "throughput": 19794.21, "total_tokens": 111672192} +{"current_steps": 35485, "total_steps": 78105, "loss": 0.2734, "lr": 3.319630954812941e-06, "epoch": 2.271621535112989, "percentage": 45.43, "elapsed_time": "1:34:02", "remaining_time": "1:52:56", "throughput": 19794.58, "total_tokens": 111687488} +{"current_steps": 35490, "total_steps": 78105, "loss": 0.2439, "lr": 3.319103159298985e-06, "epoch": 2.2719416170539657, "percentage": 45.44, "elapsed_time": "1:34:02", "remaining_time": "1:52:55", "throughput": 19794.96, "total_tokens": 111702976} +{"current_steps": 35495, "total_steps": 78105, "loss": 0.2182, "lr": 3.318575322883284e-06, "epoch": 2.2722616989949427, "percentage": 45.45, "elapsed_time": "1:34:03", "remaining_time": "1:52:54", "throughput": 19795.37, "total_tokens": 111718592} +{"current_steps": 35500, "total_steps": 78105, "loss": 0.181, "lr": 3.3180474455921958e-06, "epoch": 2.2725817809359197, "percentage": 45.45, "elapsed_time": "1:34:04", "remaining_time": "1:52:54", "throughput": 19795.76, "total_tokens": 111734592} +{"current_steps": 35505, "total_steps": 78105, "loss": 0.2401, "lr": 3.3175195274520786e-06, "epoch": 2.2729018628768967, "percentage": 45.46, "elapsed_time": "1:34:05", "remaining_time": "1:52:53", "throughput": 19796.28, "total_tokens": 111752064} +{"current_steps": 35510, "total_steps": 78105, "loss": 0.2504, "lr": 3.316991568489295e-06, "epoch": 2.273221944817873, "percentage": 45.46, "elapsed_time": "1:34:05", "remaining_time": "1:52:52", "throughput": 19796.62, "total_tokens": 111766976} +{"current_steps": 35515, "total_steps": 78105, "loss": 0.1993, "lr": 3.3164635687302084e-06, "epoch": 2.27354202675885, "percentage": 45.47, "elapsed_time": "1:34:06", "remaining_time": "1:52:51", "throughput": 19797.02, "total_tokens": 111782848} +{"current_steps": 35520, "total_steps": 78105, "loss": 0.2055, "lr": 3.315935528201183e-06, "epoch": 2.273862108699827, "percentage": 45.48, "elapsed_time": "1:34:07", "remaining_time": "1:52:50", "throughput": 19797.46, "total_tokens": 111799296} +{"current_steps": 35525, "total_steps": 78105, "loss": 0.1978, "lr": 3.3154074469285876e-06, "epoch": 2.274182190640804, "percentage": 45.48, "elapsed_time": "1:34:07", "remaining_time": "1:52:49", "throughput": 19797.87, "total_tokens": 111815424} +{"current_steps": 35530, "total_steps": 78105, "loss": 0.236, "lr": 3.314879324938792e-06, "epoch": 2.274502272581781, "percentage": 45.49, "elapsed_time": "1:34:08", "remaining_time": "1:52:48", "throughput": 19798.33, "total_tokens": 111831936} +{"current_steps": 35535, "total_steps": 78105, "loss": 0.2359, "lr": 3.314351162258167e-06, "epoch": 2.2748223545227577, "percentage": 45.5, "elapsed_time": "1:34:09", "remaining_time": "1:52:47", "throughput": 19798.64, "total_tokens": 111846848} +{"current_steps": 35540, "total_steps": 78105, "loss": 0.1994, "lr": 3.3138229589130867e-06, "epoch": 2.2751424364637347, "percentage": 45.5, "elapsed_time": "1:34:09", "remaining_time": "1:52:46", "throughput": 19799.05, "total_tokens": 111862784} +{"current_steps": 35545, "total_steps": 78105, "loss": 0.2478, "lr": 3.313294714929927e-06, "epoch": 2.2754625184047117, "percentage": 45.51, "elapsed_time": "1:34:10", "remaining_time": "1:52:45", "throughput": 19799.55, "total_tokens": 111879872} +{"current_steps": 35550, "total_steps": 78105, "loss": 0.2394, "lr": 3.3127664303350653e-06, "epoch": 2.2757826003456887, "percentage": 45.52, "elapsed_time": "1:34:11", "remaining_time": "1:52:44", "throughput": 19799.93, "total_tokens": 111895360} +{"current_steps": 35555, "total_steps": 78105, "loss": 0.1685, "lr": 3.312238105154882e-06, "epoch": 2.2761026822866652, "percentage": 45.52, "elapsed_time": "1:34:11", "remaining_time": "1:52:43", "throughput": 19800.36, "total_tokens": 111911296} +{"current_steps": 35560, "total_steps": 78105, "loss": 0.184, "lr": 3.3117097394157583e-06, "epoch": 2.2764227642276422, "percentage": 45.53, "elapsed_time": "1:34:12", "remaining_time": "1:52:43", "throughput": 19800.94, "total_tokens": 111929024} +{"current_steps": 35565, "total_steps": 78105, "loss": 0.2587, "lr": 3.311181333144079e-06, "epoch": 2.276742846168619, "percentage": 45.53, "elapsed_time": "1:34:13", "remaining_time": "1:52:42", "throughput": 19801.35, "total_tokens": 111944704} +{"current_steps": 35570, "total_steps": 78105, "loss": 0.2375, "lr": 3.3106528863662286e-06, "epoch": 2.277062928109596, "percentage": 45.54, "elapsed_time": "1:34:14", "remaining_time": "1:52:41", "throughput": 19801.84, "total_tokens": 111961600} +{"current_steps": 35575, "total_steps": 78105, "loss": 0.1761, "lr": 3.310124399108595e-06, "epoch": 2.2773830100505728, "percentage": 45.55, "elapsed_time": "1:34:14", "remaining_time": "1:52:40", "throughput": 19802.25, "total_tokens": 111976960} +{"current_steps": 35580, "total_steps": 78105, "loss": 0.1755, "lr": 3.3095958713975697e-06, "epoch": 2.2777030919915497, "percentage": 45.55, "elapsed_time": "1:34:15", "remaining_time": "1:52:39", "throughput": 19802.68, "total_tokens": 111992576} +{"current_steps": 35585, "total_steps": 78105, "loss": 0.1489, "lr": 3.3090673032595435e-06, "epoch": 2.2780231739325267, "percentage": 45.56, "elapsed_time": "1:34:16", "remaining_time": "1:52:38", "throughput": 19803.09, "total_tokens": 112008128} +{"current_steps": 35590, "total_steps": 78105, "loss": 0.176, "lr": 3.3085386947209103e-06, "epoch": 2.2783432558735037, "percentage": 45.57, "elapsed_time": "1:34:16", "remaining_time": "1:52:37", "throughput": 19803.47, "total_tokens": 112023424} +{"current_steps": 35595, "total_steps": 78105, "loss": 0.1759, "lr": 3.3080100458080655e-06, "epoch": 2.2786633378144803, "percentage": 45.57, "elapsed_time": "1:34:17", "remaining_time": "1:52:36", "throughput": 19803.99, "total_tokens": 112040768} +{"current_steps": 35600, "total_steps": 78105, "loss": 0.1716, "lr": 3.3074813565474084e-06, "epoch": 2.2789834197554573, "percentage": 45.58, "elapsed_time": "1:34:18", "remaining_time": "1:52:35", "throughput": 19804.39, "total_tokens": 112056448} +{"current_steps": 35605, "total_steps": 78105, "loss": 0.1691, "lr": 3.3069526269653386e-06, "epoch": 2.2793035016964343, "percentage": 45.59, "elapsed_time": "1:34:18", "remaining_time": "1:52:34", "throughput": 19804.76, "total_tokens": 112071808} +{"current_steps": 35610, "total_steps": 78105, "loss": 0.2409, "lr": 3.3064238570882573e-06, "epoch": 2.2796235836374112, "percentage": 45.59, "elapsed_time": "1:34:19", "remaining_time": "1:52:33", "throughput": 19805.2, "total_tokens": 112087680} +{"current_steps": 35615, "total_steps": 78105, "loss": 0.1783, "lr": 3.3058950469425694e-06, "epoch": 2.2799436655783882, "percentage": 45.6, "elapsed_time": "1:34:20", "remaining_time": "1:52:32", "throughput": 19805.67, "total_tokens": 112104384} +{"current_steps": 35620, "total_steps": 78105, "loss": 0.163, "lr": 3.3053661965546807e-06, "epoch": 2.280263747519365, "percentage": 45.61, "elapsed_time": "1:34:20", "remaining_time": "1:52:31", "throughput": 19806.13, "total_tokens": 112120768} +{"current_steps": 35625, "total_steps": 78105, "loss": 0.2691, "lr": 3.3048373059509993e-06, "epoch": 2.2805838294603418, "percentage": 45.61, "elapsed_time": "1:34:21", "remaining_time": "1:52:30", "throughput": 19806.53, "total_tokens": 112136384} +{"current_steps": 35630, "total_steps": 78105, "loss": 0.2316, "lr": 3.304308375157935e-06, "epoch": 2.2809039114013188, "percentage": 45.62, "elapsed_time": "1:34:22", "remaining_time": "1:52:30", "throughput": 19806.91, "total_tokens": 112151552} +{"current_steps": 35635, "total_steps": 78105, "loss": 0.2818, "lr": 3.3037794042018995e-06, "epoch": 2.2812239933422958, "percentage": 45.62, "elapsed_time": "1:34:22", "remaining_time": "1:52:29", "throughput": 19807.28, "total_tokens": 112166848} +{"current_steps": 35640, "total_steps": 78105, "loss": 0.1925, "lr": 3.303250393109307e-06, "epoch": 2.2815440752832723, "percentage": 45.63, "elapsed_time": "1:34:23", "remaining_time": "1:52:28", "throughput": 19807.66, "total_tokens": 112182336} +{"current_steps": 35645, "total_steps": 78105, "loss": 0.2369, "lr": 3.302721341906575e-06, "epoch": 2.2818641572242493, "percentage": 45.64, "elapsed_time": "1:34:24", "remaining_time": "1:52:27", "throughput": 19808.05, "total_tokens": 112198272} +{"current_steps": 35650, "total_steps": 78105, "loss": 0.2024, "lr": 3.30219225062012e-06, "epoch": 2.2821842391652263, "percentage": 45.64, "elapsed_time": "1:34:24", "remaining_time": "1:52:26", "throughput": 19808.41, "total_tokens": 112213376} +{"current_steps": 35655, "total_steps": 78105, "loss": 0.2214, "lr": 3.301663119276362e-06, "epoch": 2.2825043211062033, "percentage": 45.65, "elapsed_time": "1:34:25", "remaining_time": "1:52:25", "throughput": 19808.73, "total_tokens": 112228032} +{"current_steps": 35660, "total_steps": 78105, "loss": 0.2142, "lr": 3.3011339479017242e-06, "epoch": 2.2828244030471803, "percentage": 45.66, "elapsed_time": "1:34:26", "remaining_time": "1:52:24", "throughput": 19809.09, "total_tokens": 112242752} +{"current_steps": 35665, "total_steps": 78105, "loss": 0.2561, "lr": 3.300604736522629e-06, "epoch": 2.283144484988157, "percentage": 45.66, "elapsed_time": "1:34:26", "remaining_time": "1:52:23", "throughput": 19809.53, "total_tokens": 112258880} +{"current_steps": 35670, "total_steps": 78105, "loss": 0.2027, "lr": 3.300075485165504e-06, "epoch": 2.283464566929134, "percentage": 45.67, "elapsed_time": "1:34:27", "remaining_time": "1:52:22", "throughput": 19810.0, "total_tokens": 112275136} +{"current_steps": 35675, "total_steps": 78105, "loss": 0.3039, "lr": 3.2995461938567773e-06, "epoch": 2.283784648870111, "percentage": 45.68, "elapsed_time": "1:34:28", "remaining_time": "1:52:21", "throughput": 19810.39, "total_tokens": 112290752} +{"current_steps": 35680, "total_steps": 78105, "loss": 0.1981, "lr": 3.299016862622878e-06, "epoch": 2.284104730811088, "percentage": 45.68, "elapsed_time": "1:34:28", "remaining_time": "1:52:20", "throughput": 19810.76, "total_tokens": 112305856} +{"current_steps": 35685, "total_steps": 78105, "loss": 0.1854, "lr": 3.298487491490238e-06, "epoch": 2.2844248127520643, "percentage": 45.69, "elapsed_time": "1:34:29", "remaining_time": "1:52:19", "throughput": 19811.2, "total_tokens": 112321984} +{"current_steps": 35690, "total_steps": 78105, "loss": 0.1716, "lr": 3.2979580804852923e-06, "epoch": 2.2847448946930413, "percentage": 45.69, "elapsed_time": "1:34:30", "remaining_time": "1:52:18", "throughput": 19811.6, "total_tokens": 112337280} +{"current_steps": 35695, "total_steps": 78105, "loss": 0.229, "lr": 3.2974286296344763e-06, "epoch": 2.2850649766340183, "percentage": 45.7, "elapsed_time": "1:34:31", "remaining_time": "1:52:17", "throughput": 19812.13, "total_tokens": 112354688} +{"current_steps": 35700, "total_steps": 78105, "loss": 0.1905, "lr": 3.2968991389642286e-06, "epoch": 2.2853850585749953, "percentage": 45.71, "elapsed_time": "1:34:31", "remaining_time": "1:52:16", "throughput": 19812.47, "total_tokens": 112369472} +{"current_steps": 35705, "total_steps": 78105, "loss": 0.217, "lr": 3.2963696085009884e-06, "epoch": 2.2857051405159723, "percentage": 45.71, "elapsed_time": "1:34:32", "remaining_time": "1:52:15", "throughput": 19812.92, "total_tokens": 112386048} +{"current_steps": 35710, "total_steps": 78105, "loss": 0.2465, "lr": 3.2958400382711984e-06, "epoch": 2.286025222456949, "percentage": 45.72, "elapsed_time": "1:34:33", "remaining_time": "1:52:15", "throughput": 19813.29, "total_tokens": 112401536} +{"current_steps": 35715, "total_steps": 78105, "loss": 0.1618, "lr": 3.2953104283013016e-06, "epoch": 2.286345304397926, "percentage": 45.73, "elapsed_time": "1:34:33", "remaining_time": "1:52:14", "throughput": 19813.67, "total_tokens": 112416896} +{"current_steps": 35720, "total_steps": 78105, "loss": 0.1625, "lr": 3.294780778617745e-06, "epoch": 2.286665386338903, "percentage": 45.73, "elapsed_time": "1:34:34", "remaining_time": "1:52:13", "throughput": 19814.09, "total_tokens": 112432704} +{"current_steps": 35725, "total_steps": 78105, "loss": 0.1489, "lr": 3.294251089246976e-06, "epoch": 2.28698546827988, "percentage": 45.74, "elapsed_time": "1:34:35", "remaining_time": "1:52:12", "throughput": 19814.47, "total_tokens": 112448320} +{"current_steps": 35730, "total_steps": 78105, "loss": 0.248, "lr": 3.293721360215445e-06, "epoch": 2.2873055502208564, "percentage": 45.75, "elapsed_time": "1:34:35", "remaining_time": "1:52:11", "throughput": 19814.9, "total_tokens": 112464448} +{"current_steps": 35735, "total_steps": 78105, "loss": 0.1548, "lr": 3.293191591549604e-06, "epoch": 2.2876256321618333, "percentage": 45.75, "elapsed_time": "1:34:36", "remaining_time": "1:52:10", "throughput": 19815.32, "total_tokens": 112480384} +{"current_steps": 35740, "total_steps": 78105, "loss": 0.2323, "lr": 3.2926617832759055e-06, "epoch": 2.2879457141028103, "percentage": 45.76, "elapsed_time": "1:34:37", "remaining_time": "1:52:09", "throughput": 19815.62, "total_tokens": 112494592} +{"current_steps": 35745, "total_steps": 78105, "loss": 0.2985, "lr": 3.2921319354208074e-06, "epoch": 2.2882657960437873, "percentage": 45.77, "elapsed_time": "1:34:37", "remaining_time": "1:52:08", "throughput": 19815.99, "total_tokens": 112509568} +{"current_steps": 35750, "total_steps": 78105, "loss": 0.232, "lr": 3.291602048010766e-06, "epoch": 2.2885858779847643, "percentage": 45.77, "elapsed_time": "1:34:38", "remaining_time": "1:52:07", "throughput": 19816.38, "total_tokens": 112525568} +{"current_steps": 35755, "total_steps": 78105, "loss": 0.3065, "lr": 3.291072121072242e-06, "epoch": 2.288905959925741, "percentage": 45.78, "elapsed_time": "1:34:39", "remaining_time": "1:52:06", "throughput": 19816.79, "total_tokens": 112541376} +{"current_steps": 35760, "total_steps": 78105, "loss": 0.1949, "lr": 3.2905421546316963e-06, "epoch": 2.289226041866718, "percentage": 45.78, "elapsed_time": "1:34:39", "remaining_time": "1:52:05", "throughput": 19817.33, "total_tokens": 112558848} +{"current_steps": 35765, "total_steps": 78105, "loss": 0.2198, "lr": 3.290012148715593e-06, "epoch": 2.289546123807695, "percentage": 45.79, "elapsed_time": "1:34:40", "remaining_time": "1:52:04", "throughput": 19817.74, "total_tokens": 112574656} +{"current_steps": 35770, "total_steps": 78105, "loss": 0.3517, "lr": 3.289482103350398e-06, "epoch": 2.289866205748672, "percentage": 45.8, "elapsed_time": "1:34:41", "remaining_time": "1:52:03", "throughput": 19818.05, "total_tokens": 112588928} +{"current_steps": 35775, "total_steps": 78105, "loss": 0.1504, "lr": 3.2889520185625802e-06, "epoch": 2.2901862876896484, "percentage": 45.8, "elapsed_time": "1:34:41", "remaining_time": "1:52:02", "throughput": 19818.48, "total_tokens": 112604864} +{"current_steps": 35780, "total_steps": 78105, "loss": 0.2392, "lr": 3.288421894378606e-06, "epoch": 2.2905063696306254, "percentage": 45.81, "elapsed_time": "1:34:42", "remaining_time": "1:52:01", "throughput": 19818.82, "total_tokens": 112619840} +{"current_steps": 35785, "total_steps": 78105, "loss": 0.2417, "lr": 3.2878917308249514e-06, "epoch": 2.2908264515716024, "percentage": 45.82, "elapsed_time": "1:34:43", "remaining_time": "1:52:00", "throughput": 19819.17, "total_tokens": 112634624} +{"current_steps": 35790, "total_steps": 78105, "loss": 0.3495, "lr": 3.2873615279280868e-06, "epoch": 2.2911465335125794, "percentage": 45.82, "elapsed_time": "1:34:43", "remaining_time": "1:52:00", "throughput": 19819.55, "total_tokens": 112650112} +{"current_steps": 35795, "total_steps": 78105, "loss": 0.2281, "lr": 3.2868312857144887e-06, "epoch": 2.2914666154535563, "percentage": 45.83, "elapsed_time": "1:34:44", "remaining_time": "1:51:59", "throughput": 19819.86, "total_tokens": 112664576} +{"current_steps": 35800, "total_steps": 78105, "loss": 0.2212, "lr": 3.2863010042106347e-06, "epoch": 2.291786697394533, "percentage": 45.84, "elapsed_time": "1:34:45", "remaining_time": "1:51:58", "throughput": 19820.25, "total_tokens": 112680064} +{"current_steps": 35805, "total_steps": 78105, "loss": 0.2435, "lr": 3.2857706834430037e-06, "epoch": 2.29210677933551, "percentage": 45.84, "elapsed_time": "1:34:45", "remaining_time": "1:51:57", "throughput": 19820.69, "total_tokens": 112696576} +{"current_steps": 35810, "total_steps": 78105, "loss": 0.1967, "lr": 3.285240323438078e-06, "epoch": 2.292426861276487, "percentage": 45.85, "elapsed_time": "1:34:46", "remaining_time": "1:51:56", "throughput": 19821.01, "total_tokens": 112711232} +{"current_steps": 35815, "total_steps": 78105, "loss": 0.2464, "lr": 3.284709924222341e-06, "epoch": 2.292746943217464, "percentage": 45.85, "elapsed_time": "1:34:47", "remaining_time": "1:51:55", "throughput": 19821.45, "total_tokens": 112727424} +{"current_steps": 35820, "total_steps": 78105, "loss": 0.2852, "lr": 3.2841794858222783e-06, "epoch": 2.2930670251584404, "percentage": 45.86, "elapsed_time": "1:34:47", "remaining_time": "1:51:54", "throughput": 19821.96, "total_tokens": 112744576} +{"current_steps": 35825, "total_steps": 78105, "loss": 0.1911, "lr": 3.2836490082643756e-06, "epoch": 2.2933871070994174, "percentage": 45.87, "elapsed_time": "1:34:48", "remaining_time": "1:51:53", "throughput": 19822.36, "total_tokens": 112760192} +{"current_steps": 35830, "total_steps": 78105, "loss": 0.2908, "lr": 3.2831184915751233e-06, "epoch": 2.2937071890403944, "percentage": 45.87, "elapsed_time": "1:34:49", "remaining_time": "1:51:52", "throughput": 19822.72, "total_tokens": 112775168} +{"current_steps": 35835, "total_steps": 78105, "loss": 0.1631, "lr": 3.2825879357810136e-06, "epoch": 2.2940272709813714, "percentage": 45.88, "elapsed_time": "1:34:49", "remaining_time": "1:51:51", "throughput": 19823.09, "total_tokens": 112790912} +{"current_steps": 35840, "total_steps": 78105, "loss": 0.1283, "lr": 3.2820573409085383e-06, "epoch": 2.2943473529223484, "percentage": 45.89, "elapsed_time": "1:34:50", "remaining_time": "1:51:50", "throughput": 19823.48, "total_tokens": 112806144} +{"current_steps": 35845, "total_steps": 78105, "loss": 0.2341, "lr": 3.281526706984193e-06, "epoch": 2.294667434863325, "percentage": 45.89, "elapsed_time": "1:34:51", "remaining_time": "1:51:49", "throughput": 19823.93, "total_tokens": 112822592} +{"current_steps": 35850, "total_steps": 78105, "loss": 0.1831, "lr": 3.280996034034475e-06, "epoch": 2.294987516804302, "percentage": 45.9, "elapsed_time": "1:34:51", "remaining_time": "1:51:48", "throughput": 19824.34, "total_tokens": 112838592} +{"current_steps": 35855, "total_steps": 78105, "loss": 0.1915, "lr": 3.2804653220858828e-06, "epoch": 2.295307598745279, "percentage": 45.91, "elapsed_time": "1:34:52", "remaining_time": "1:51:47", "throughput": 19824.75, "total_tokens": 112854336} +{"current_steps": 35860, "total_steps": 78105, "loss": 0.2168, "lr": 3.2799345711649176e-06, "epoch": 2.2956276806862554, "percentage": 45.91, "elapsed_time": "1:34:53", "remaining_time": "1:51:46", "throughput": 19825.12, "total_tokens": 112869568} +{"current_steps": 35865, "total_steps": 78105, "loss": 0.2558, "lr": 3.2794037812980823e-06, "epoch": 2.2959477626272324, "percentage": 45.92, "elapsed_time": "1:34:53", "remaining_time": "1:51:46", "throughput": 19825.53, "total_tokens": 112885440} +{"current_steps": 35870, "total_steps": 78105, "loss": 0.1851, "lr": 3.278872952511882e-06, "epoch": 2.2962678445682094, "percentage": 45.93, "elapsed_time": "1:34:54", "remaining_time": "1:51:45", "throughput": 19825.84, "total_tokens": 112899968} +{"current_steps": 35875, "total_steps": 78105, "loss": 0.2553, "lr": 3.278342084832824e-06, "epoch": 2.2965879265091864, "percentage": 45.93, "elapsed_time": "1:34:55", "remaining_time": "1:51:44", "throughput": 19826.26, "total_tokens": 112915904} +{"current_steps": 35880, "total_steps": 78105, "loss": 0.2898, "lr": 3.2778111782874168e-06, "epoch": 2.2969080084501634, "percentage": 45.94, "elapsed_time": "1:34:55", "remaining_time": "1:51:43", "throughput": 19826.67, "total_tokens": 112932288} +{"current_steps": 35885, "total_steps": 78105, "loss": 0.2115, "lr": 3.2772802329021704e-06, "epoch": 2.29722809039114, "percentage": 45.94, "elapsed_time": "1:34:56", "remaining_time": "1:51:42", "throughput": 19827.14, "total_tokens": 112948672} +{"current_steps": 35890, "total_steps": 78105, "loss": 0.2639, "lr": 3.2767492487035975e-06, "epoch": 2.297548172332117, "percentage": 45.95, "elapsed_time": "1:34:57", "remaining_time": "1:51:41", "throughput": 19827.52, "total_tokens": 112964544} +{"current_steps": 35895, "total_steps": 78105, "loss": 0.1864, "lr": 3.276218225718213e-06, "epoch": 2.297868254273094, "percentage": 45.96, "elapsed_time": "1:34:58", "remaining_time": "1:51:40", "throughput": 19827.91, "total_tokens": 112980032} +{"current_steps": 35900, "total_steps": 78105, "loss": 0.2037, "lr": 3.2756871639725335e-06, "epoch": 2.298188336214071, "percentage": 45.96, "elapsed_time": "1:34:58", "remaining_time": "1:51:39", "throughput": 19828.34, "total_tokens": 112996224} +{"current_steps": 35905, "total_steps": 78105, "loss": 0.237, "lr": 3.2751560634930777e-06, "epoch": 2.2985084181550475, "percentage": 45.97, "elapsed_time": "1:34:59", "remaining_time": "1:51:38", "throughput": 19828.7, "total_tokens": 113011328} +{"current_steps": 35910, "total_steps": 78105, "loss": 0.2893, "lr": 3.274624924306366e-06, "epoch": 2.2988285000960245, "percentage": 45.98, "elapsed_time": "1:35:00", "remaining_time": "1:51:37", "throughput": 19829.14, "total_tokens": 113027392} +{"current_steps": 35915, "total_steps": 78105, "loss": 0.2912, "lr": 3.27409374643892e-06, "epoch": 2.2991485820370015, "percentage": 45.98, "elapsed_time": "1:35:00", "remaining_time": "1:51:36", "throughput": 19829.62, "total_tokens": 113044032} +{"current_steps": 35920, "total_steps": 78105, "loss": 0.2751, "lr": 3.2735625299172645e-06, "epoch": 2.2994686639779784, "percentage": 45.99, "elapsed_time": "1:35:01", "remaining_time": "1:51:35", "throughput": 19829.98, "total_tokens": 113059072} +{"current_steps": 35925, "total_steps": 78105, "loss": 0.2352, "lr": 3.273031274767926e-06, "epoch": 2.2997887459189554, "percentage": 46.0, "elapsed_time": "1:35:02", "remaining_time": "1:51:34", "throughput": 19830.37, "total_tokens": 113074880} +{"current_steps": 35930, "total_steps": 78105, "loss": 0.1902, "lr": 3.2724999810174318e-06, "epoch": 2.300108827859932, "percentage": 46.0, "elapsed_time": "1:35:02", "remaining_time": "1:51:33", "throughput": 19830.76, "total_tokens": 113090624} +{"current_steps": 35935, "total_steps": 78105, "loss": 0.2567, "lr": 3.2719686486923126e-06, "epoch": 2.300428909800909, "percentage": 46.01, "elapsed_time": "1:35:03", "remaining_time": "1:51:33", "throughput": 19831.17, "total_tokens": 113106368} +{"current_steps": 35940, "total_steps": 78105, "loss": 0.196, "lr": 3.2714372778191e-06, "epoch": 2.300748991741886, "percentage": 46.01, "elapsed_time": "1:35:04", "remaining_time": "1:51:32", "throughput": 19831.53, "total_tokens": 113121472} +{"current_steps": 35945, "total_steps": 78105, "loss": 0.3752, "lr": 3.270905868424328e-06, "epoch": 2.301069073682863, "percentage": 46.02, "elapsed_time": "1:35:04", "remaining_time": "1:51:31", "throughput": 19831.88, "total_tokens": 113136448} +{"current_steps": 35950, "total_steps": 78105, "loss": 0.206, "lr": 3.2703744205345316e-06, "epoch": 2.3013891556238395, "percentage": 46.03, "elapsed_time": "1:35:05", "remaining_time": "1:51:30", "throughput": 19832.3, "total_tokens": 113152512} +{"current_steps": 35955, "total_steps": 78105, "loss": 0.1289, "lr": 3.2698429341762507e-06, "epoch": 2.3017092375648165, "percentage": 46.03, "elapsed_time": "1:35:06", "remaining_time": "1:51:29", "throughput": 19832.64, "total_tokens": 113167296} +{"current_steps": 35960, "total_steps": 78105, "loss": 0.2085, "lr": 3.2693114093760233e-06, "epoch": 2.3020293195057935, "percentage": 46.04, "elapsed_time": "1:35:06", "remaining_time": "1:51:28", "throughput": 19833.07, "total_tokens": 113183552} +{"current_steps": 35965, "total_steps": 78105, "loss": 0.2432, "lr": 3.268779846160391e-06, "epoch": 2.3023494014467705, "percentage": 46.05, "elapsed_time": "1:35:07", "remaining_time": "1:51:27", "throughput": 19833.49, "total_tokens": 113199424} +{"current_steps": 35970, "total_steps": 78105, "loss": 0.1899, "lr": 3.2682482445558975e-06, "epoch": 2.3026694833877475, "percentage": 46.05, "elapsed_time": "1:35:08", "remaining_time": "1:51:26", "throughput": 19833.81, "total_tokens": 113214336} +{"current_steps": 35975, "total_steps": 78105, "loss": 0.2147, "lr": 3.267716604589089e-06, "epoch": 2.302989565328724, "percentage": 46.06, "elapsed_time": "1:35:08", "remaining_time": "1:51:25", "throughput": 19834.2, "total_tokens": 113229824} +{"current_steps": 35980, "total_steps": 78105, "loss": 0.1596, "lr": 3.2671849262865114e-06, "epoch": 2.303309647269701, "percentage": 46.07, "elapsed_time": "1:35:09", "remaining_time": "1:51:24", "throughput": 19834.61, "total_tokens": 113246016} +{"current_steps": 35985, "total_steps": 78105, "loss": 0.1786, "lr": 3.266653209674715e-06, "epoch": 2.303629729210678, "percentage": 46.07, "elapsed_time": "1:35:10", "remaining_time": "1:51:23", "throughput": 19834.98, "total_tokens": 113261376} +{"current_steps": 35990, "total_steps": 78105, "loss": 0.2039, "lr": 3.2661214547802512e-06, "epoch": 2.303949811151655, "percentage": 46.08, "elapsed_time": "1:35:10", "remaining_time": "1:51:22", "throughput": 19835.33, "total_tokens": 113276480} +{"current_steps": 35995, "total_steps": 78105, "loss": 0.1284, "lr": 3.2655896616296722e-06, "epoch": 2.3042698930926315, "percentage": 46.09, "elapsed_time": "1:35:11", "remaining_time": "1:51:21", "throughput": 19835.72, "total_tokens": 113292352} +{"current_steps": 36000, "total_steps": 78105, "loss": 0.1803, "lr": 3.265057830249533e-06, "epoch": 2.3045899750336085, "percentage": 46.09, "elapsed_time": "1:35:12", "remaining_time": "1:51:20", "throughput": 19836.13, "total_tokens": 113308160} +{"current_steps": 36005, "total_steps": 78105, "loss": 0.2459, "lr": 3.2645259606663904e-06, "epoch": 2.3049100569745855, "percentage": 46.1, "elapsed_time": "1:35:12", "remaining_time": "1:51:19", "throughput": 19836.52, "total_tokens": 113323584} +{"current_steps": 36010, "total_steps": 78105, "loss": 0.1979, "lr": 3.263994052906804e-06, "epoch": 2.3052301389155625, "percentage": 46.1, "elapsed_time": "1:35:13", "remaining_time": "1:51:19", "throughput": 19836.9, "total_tokens": 113338880} +{"current_steps": 36015, "total_steps": 78105, "loss": 0.3052, "lr": 3.2634621069973344e-06, "epoch": 2.3055502208565395, "percentage": 46.11, "elapsed_time": "1:35:14", "remaining_time": "1:51:18", "throughput": 19837.31, "total_tokens": 113354944} +{"current_steps": 36020, "total_steps": 78105, "loss": 0.1532, "lr": 3.262930122964544e-06, "epoch": 2.305870302797516, "percentage": 46.12, "elapsed_time": "1:35:14", "remaining_time": "1:51:17", "throughput": 19837.71, "total_tokens": 113370432} +{"current_steps": 36025, "total_steps": 78105, "loss": 0.1774, "lr": 3.262398100834997e-06, "epoch": 2.306190384738493, "percentage": 46.12, "elapsed_time": "1:35:15", "remaining_time": "1:51:16", "throughput": 19838.04, "total_tokens": 113385408} +{"current_steps": 36030, "total_steps": 78105, "loss": 0.2313, "lr": 3.26186604063526e-06, "epoch": 2.30651046667947, "percentage": 46.13, "elapsed_time": "1:35:16", "remaining_time": "1:51:15", "throughput": 19838.51, "total_tokens": 113401920} +{"current_steps": 36035, "total_steps": 78105, "loss": 0.2411, "lr": 3.261333942391901e-06, "epoch": 2.306830548620447, "percentage": 46.14, "elapsed_time": "1:35:16", "remaining_time": "1:51:14", "throughput": 19838.85, "total_tokens": 113416768} +{"current_steps": 36040, "total_steps": 78105, "loss": 0.1825, "lr": 3.2608018061314906e-06, "epoch": 2.3071506305614236, "percentage": 46.14, "elapsed_time": "1:35:17", "remaining_time": "1:51:13", "throughput": 19839.26, "total_tokens": 113432896} +{"current_steps": 36045, "total_steps": 78105, "loss": 0.1812, "lr": 3.2602696318806017e-06, "epoch": 2.3074707125024005, "percentage": 46.15, "elapsed_time": "1:35:18", "remaining_time": "1:51:12", "throughput": 19839.72, "total_tokens": 113449472} +{"current_steps": 36050, "total_steps": 78105, "loss": 0.129, "lr": 3.259737419665807e-06, "epoch": 2.3077907944433775, "percentage": 46.16, "elapsed_time": "1:35:18", "remaining_time": "1:51:11", "throughput": 19840.17, "total_tokens": 113465728} +{"current_steps": 36055, "total_steps": 78105, "loss": 0.2606, "lr": 3.2592051695136828e-06, "epoch": 2.3081108763843545, "percentage": 46.16, "elapsed_time": "1:35:19", "remaining_time": "1:51:10", "throughput": 19840.58, "total_tokens": 113481216} +{"current_steps": 36060, "total_steps": 78105, "loss": 0.1819, "lr": 3.2586728814508066e-06, "epoch": 2.3084309583253315, "percentage": 46.17, "elapsed_time": "1:35:20", "remaining_time": "1:51:09", "throughput": 19840.95, "total_tokens": 113496640} +{"current_steps": 36065, "total_steps": 78105, "loss": 0.2935, "lr": 3.258140555503759e-06, "epoch": 2.308751040266308, "percentage": 46.18, "elapsed_time": "1:35:20", "remaining_time": "1:51:08", "throughput": 19841.27, "total_tokens": 113511296} +{"current_steps": 36070, "total_steps": 78105, "loss": 0.2085, "lr": 3.2576081916991208e-06, "epoch": 2.309071122207285, "percentage": 46.18, "elapsed_time": "1:35:21", "remaining_time": "1:51:07", "throughput": 19841.63, "total_tokens": 113526400} +{"current_steps": 36075, "total_steps": 78105, "loss": 0.1803, "lr": 3.257075790063476e-06, "epoch": 2.309391204148262, "percentage": 46.19, "elapsed_time": "1:35:22", "remaining_time": "1:51:06", "throughput": 19842.0, "total_tokens": 113541824} +{"current_steps": 36080, "total_steps": 78105, "loss": 0.3065, "lr": 3.2565433506234095e-06, "epoch": 2.309711286089239, "percentage": 46.19, "elapsed_time": "1:35:22", "remaining_time": "1:51:05", "throughput": 19842.42, "total_tokens": 113557952} +{"current_steps": 36085, "total_steps": 78105, "loss": 0.1975, "lr": 3.256010873405509e-06, "epoch": 2.3100313680302156, "percentage": 46.2, "elapsed_time": "1:35:23", "remaining_time": "1:51:05", "throughput": 19842.79, "total_tokens": 113573312} +{"current_steps": 36090, "total_steps": 78105, "loss": 0.1953, "lr": 3.2554783584363635e-06, "epoch": 2.3103514499711926, "percentage": 46.21, "elapsed_time": "1:35:24", "remaining_time": "1:51:04", "throughput": 19843.19, "total_tokens": 113589120} +{"current_steps": 36095, "total_steps": 78105, "loss": 0.2128, "lr": 3.2549458057425633e-06, "epoch": 2.3106715319121696, "percentage": 46.21, "elapsed_time": "1:35:24", "remaining_time": "1:51:03", "throughput": 19843.56, "total_tokens": 113604224} +{"current_steps": 36100, "total_steps": 78105, "loss": 0.1691, "lr": 3.2544132153507024e-06, "epoch": 2.3109916138531466, "percentage": 46.22, "elapsed_time": "1:35:25", "remaining_time": "1:51:02", "throughput": 19843.93, "total_tokens": 113619520} +{"current_steps": 36105, "total_steps": 78105, "loss": 0.2388, "lr": 3.2538805872873753e-06, "epoch": 2.3113116957941235, "percentage": 46.23, "elapsed_time": "1:35:26", "remaining_time": "1:51:01", "throughput": 19844.28, "total_tokens": 113634560} +{"current_steps": 36110, "total_steps": 78105, "loss": 0.2767, "lr": 3.2533479215791786e-06, "epoch": 2.3116317777351, "percentage": 46.23, "elapsed_time": "1:35:26", "remaining_time": "1:51:00", "throughput": 19844.68, "total_tokens": 113650176} +{"current_steps": 36115, "total_steps": 78105, "loss": 0.2197, "lr": 3.2528152182527105e-06, "epoch": 2.311951859676077, "percentage": 46.24, "elapsed_time": "1:35:27", "remaining_time": "1:50:59", "throughput": 19845.08, "total_tokens": 113665856} +{"current_steps": 36120, "total_steps": 78105, "loss": 0.2403, "lr": 3.2522824773345713e-06, "epoch": 2.312271941617054, "percentage": 46.25, "elapsed_time": "1:35:28", "remaining_time": "1:50:58", "throughput": 19845.45, "total_tokens": 113680896} +{"current_steps": 36125, "total_steps": 78105, "loss": 0.2539, "lr": 3.251749698851363e-06, "epoch": 2.3125920235580306, "percentage": 46.25, "elapsed_time": "1:35:28", "remaining_time": "1:50:57", "throughput": 19845.76, "total_tokens": 113695680} +{"current_steps": 36130, "total_steps": 78105, "loss": 0.234, "lr": 3.251216882829692e-06, "epoch": 2.3129121054990076, "percentage": 46.26, "elapsed_time": "1:35:29", "remaining_time": "1:50:56", "throughput": 19846.2, "total_tokens": 113711872} +{"current_steps": 36135, "total_steps": 78105, "loss": 0.2084, "lr": 3.250684029296162e-06, "epoch": 2.3132321874399846, "percentage": 46.26, "elapsed_time": "1:35:30", "remaining_time": "1:50:55", "throughput": 19846.63, "total_tokens": 113728000} +{"current_steps": 36140, "total_steps": 78105, "loss": 0.136, "lr": 3.250151138277382e-06, "epoch": 2.3135522693809616, "percentage": 46.27, "elapsed_time": "1:35:31", "remaining_time": "1:50:54", "throughput": 19847.02, "total_tokens": 113743488} +{"current_steps": 36145, "total_steps": 78105, "loss": 0.2399, "lr": 3.2496182097999617e-06, "epoch": 2.3138723513219386, "percentage": 46.28, "elapsed_time": "1:35:31", "remaining_time": "1:50:53", "throughput": 19847.39, "total_tokens": 113758784} +{"current_steps": 36150, "total_steps": 78105, "loss": 0.2673, "lr": 3.2490852438905123e-06, "epoch": 2.314192433262915, "percentage": 46.28, "elapsed_time": "1:35:32", "remaining_time": "1:50:52", "throughput": 19847.82, "total_tokens": 113775040} +{"current_steps": 36155, "total_steps": 78105, "loss": 0.2352, "lr": 3.2485522405756475e-06, "epoch": 2.314512515203892, "percentage": 46.29, "elapsed_time": "1:35:33", "remaining_time": "1:50:51", "throughput": 19848.26, "total_tokens": 113791168} +{"current_steps": 36160, "total_steps": 78105, "loss": 0.1866, "lr": 3.2480191998819836e-06, "epoch": 2.314832597144869, "percentage": 46.3, "elapsed_time": "1:35:33", "remaining_time": "1:50:51", "throughput": 19848.68, "total_tokens": 113807104} +{"current_steps": 36165, "total_steps": 78105, "loss": 0.2246, "lr": 3.247486121836137e-06, "epoch": 2.315152679085846, "percentage": 46.3, "elapsed_time": "1:35:34", "remaining_time": "1:50:50", "throughput": 19849.1, "total_tokens": 113823232} +{"current_steps": 36170, "total_steps": 78105, "loss": 0.2279, "lr": 3.246953006464727e-06, "epoch": 2.3154727610268226, "percentage": 46.31, "elapsed_time": "1:35:35", "remaining_time": "1:50:49", "throughput": 19849.44, "total_tokens": 113837888} +{"current_steps": 36175, "total_steps": 78105, "loss": 0.2344, "lr": 3.2464198537943736e-06, "epoch": 2.3157928429677996, "percentage": 46.32, "elapsed_time": "1:35:35", "remaining_time": "1:50:48", "throughput": 19849.82, "total_tokens": 113853184} +{"current_steps": 36180, "total_steps": 78105, "loss": 0.2208, "lr": 3.245886663851701e-06, "epoch": 2.3161129249087766, "percentage": 46.32, "elapsed_time": "1:35:36", "remaining_time": "1:50:47", "throughput": 19850.23, "total_tokens": 113868864} +{"current_steps": 36185, "total_steps": 78105, "loss": 0.3125, "lr": 3.2453534366633343e-06, "epoch": 2.3164330068497536, "percentage": 46.33, "elapsed_time": "1:35:37", "remaining_time": "1:50:46", "throughput": 19850.57, "total_tokens": 113883904} +{"current_steps": 36190, "total_steps": 78105, "loss": 0.2061, "lr": 3.244820172255899e-06, "epoch": 2.3167530887907306, "percentage": 46.34, "elapsed_time": "1:35:37", "remaining_time": "1:50:45", "throughput": 19850.97, "total_tokens": 113899584} +{"current_steps": 36195, "total_steps": 78105, "loss": 0.2645, "lr": 3.2442868706560237e-06, "epoch": 2.317073170731707, "percentage": 46.34, "elapsed_time": "1:35:38", "remaining_time": "1:50:44", "throughput": 19851.29, "total_tokens": 113914432} +{"current_steps": 36200, "total_steps": 78105, "loss": 0.2573, "lr": 3.2437535318903395e-06, "epoch": 2.317393252672684, "percentage": 46.35, "elapsed_time": "1:35:39", "remaining_time": "1:50:43", "throughput": 19851.66, "total_tokens": 113929856} +{"current_steps": 36205, "total_steps": 78105, "loss": 0.1858, "lr": 3.243220155985478e-06, "epoch": 2.317713334613661, "percentage": 46.35, "elapsed_time": "1:35:39", "remaining_time": "1:50:42", "throughput": 19852.04, "total_tokens": 113945280} +{"current_steps": 36210, "total_steps": 78105, "loss": 0.1597, "lr": 3.2426867429680725e-06, "epoch": 2.318033416554638, "percentage": 46.36, "elapsed_time": "1:35:40", "remaining_time": "1:50:41", "throughput": 19852.46, "total_tokens": 113961408} +{"current_steps": 36215, "total_steps": 78105, "loss": 0.1967, "lr": 3.2421532928647605e-06, "epoch": 2.3183534984956147, "percentage": 46.37, "elapsed_time": "1:35:41", "remaining_time": "1:50:40", "throughput": 19852.81, "total_tokens": 113976384} +{"current_steps": 36220, "total_steps": 78105, "loss": 0.2269, "lr": 3.241619805702178e-06, "epoch": 2.3186735804365917, "percentage": 46.37, "elapsed_time": "1:35:41", "remaining_time": "1:50:39", "throughput": 19853.16, "total_tokens": 113991680} +{"current_steps": 36225, "total_steps": 78105, "loss": 0.2183, "lr": 3.2410862815069656e-06, "epoch": 2.3189936623775687, "percentage": 46.38, "elapsed_time": "1:35:42", "remaining_time": "1:50:38", "throughput": 19853.64, "total_tokens": 114008512} +{"current_steps": 36230, "total_steps": 78105, "loss": 0.2233, "lr": 3.2405527203057645e-06, "epoch": 2.3193137443185456, "percentage": 46.39, "elapsed_time": "1:35:43", "remaining_time": "1:50:38", "throughput": 19854.33, "total_tokens": 114028032} +{"current_steps": 36235, "total_steps": 78105, "loss": 0.2322, "lr": 3.2400191221252173e-06, "epoch": 2.3196338262595226, "percentage": 46.39, "elapsed_time": "1:35:43", "remaining_time": "1:50:37", "throughput": 19854.73, "total_tokens": 114044032} +{"current_steps": 36240, "total_steps": 78105, "loss": 0.184, "lr": 3.23948548699197e-06, "epoch": 2.319953908200499, "percentage": 46.4, "elapsed_time": "1:35:44", "remaining_time": "1:50:36", "throughput": 19855.06, "total_tokens": 114058816} +{"current_steps": 36245, "total_steps": 78105, "loss": 0.2354, "lr": 3.2389518149326702e-06, "epoch": 2.320273990141476, "percentage": 46.41, "elapsed_time": "1:35:45", "remaining_time": "1:50:35", "throughput": 19855.44, "total_tokens": 114074368} +{"current_steps": 36250, "total_steps": 78105, "loss": 0.1831, "lr": 3.2384181059739656e-06, "epoch": 2.320594072082453, "percentage": 46.41, "elapsed_time": "1:35:45", "remaining_time": "1:50:34", "throughput": 19855.8, "total_tokens": 114089728} +{"current_steps": 36255, "total_steps": 78105, "loss": 0.1526, "lr": 3.237884360142507e-06, "epoch": 2.32091415402343, "percentage": 46.42, "elapsed_time": "1:35:46", "remaining_time": "1:50:33", "throughput": 19856.22, "total_tokens": 114105664} +{"current_steps": 36260, "total_steps": 78105, "loss": 0.1939, "lr": 3.2373505774649464e-06, "epoch": 2.3212342359644067, "percentage": 46.42, "elapsed_time": "1:35:47", "remaining_time": "1:50:32", "throughput": 19856.62, "total_tokens": 114121408} +{"current_steps": 36265, "total_steps": 78105, "loss": 0.1495, "lr": 3.2368167579679386e-06, "epoch": 2.3215543179053837, "percentage": 46.43, "elapsed_time": "1:35:47", "remaining_time": "1:50:31", "throughput": 19857.01, "total_tokens": 114136960} +{"current_steps": 36270, "total_steps": 78105, "loss": 0.231, "lr": 3.2362829016781405e-06, "epoch": 2.3218743998463607, "percentage": 46.44, "elapsed_time": "1:35:48", "remaining_time": "1:50:30", "throughput": 19857.47, "total_tokens": 114153216} +{"current_steps": 36275, "total_steps": 78105, "loss": 0.2567, "lr": 3.2357490086222093e-06, "epoch": 2.3221944817873377, "percentage": 46.44, "elapsed_time": "1:35:49", "remaining_time": "1:50:29", "throughput": 19857.9, "total_tokens": 114169280} +{"current_steps": 36280, "total_steps": 78105, "loss": 0.2084, "lr": 3.2352150788268056e-06, "epoch": 2.3225145637283147, "percentage": 46.45, "elapsed_time": "1:35:50", "remaining_time": "1:50:28", "throughput": 19858.37, "total_tokens": 114185728} +{"current_steps": 36285, "total_steps": 78105, "loss": 0.1672, "lr": 3.2346811123185896e-06, "epoch": 2.322834645669291, "percentage": 46.46, "elapsed_time": "1:35:50", "remaining_time": "1:50:27", "throughput": 19858.79, "total_tokens": 114201920} +{"current_steps": 36290, "total_steps": 78105, "loss": 0.2174, "lr": 3.2341471091242256e-06, "epoch": 2.323154727610268, "percentage": 46.46, "elapsed_time": "1:35:51", "remaining_time": "1:50:27", "throughput": 19859.26, "total_tokens": 114218688} +{"current_steps": 36295, "total_steps": 78105, "loss": 0.2327, "lr": 3.2336130692703794e-06, "epoch": 2.323474809551245, "percentage": 46.47, "elapsed_time": "1:35:52", "remaining_time": "1:50:26", "throughput": 19859.74, "total_tokens": 114235520} +{"current_steps": 36300, "total_steps": 78105, "loss": 0.2098, "lr": 3.233078992783717e-06, "epoch": 2.323794891492222, "percentage": 46.48, "elapsed_time": "1:35:52", "remaining_time": "1:50:25", "throughput": 19860.09, "total_tokens": 114250304} +{"current_steps": 36305, "total_steps": 78105, "loss": 0.2274, "lr": 3.2325448796909085e-06, "epoch": 2.3241149734331987, "percentage": 46.48, "elapsed_time": "1:35:53", "remaining_time": "1:50:24", "throughput": 19860.44, "total_tokens": 114265536} +{"current_steps": 36310, "total_steps": 78105, "loss": 0.2361, "lr": 3.232010730018625e-06, "epoch": 2.3244350553741757, "percentage": 46.49, "elapsed_time": "1:35:54", "remaining_time": "1:50:23", "throughput": 19860.9, "total_tokens": 114281984} +{"current_steps": 36315, "total_steps": 78105, "loss": 0.245, "lr": 3.2314765437935386e-06, "epoch": 2.3247551373151527, "percentage": 46.5, "elapsed_time": "1:35:54", "remaining_time": "1:50:22", "throughput": 19861.37, "total_tokens": 114299008} +{"current_steps": 36320, "total_steps": 78105, "loss": 0.3888, "lr": 3.230942321042323e-06, "epoch": 2.3250752192561297, "percentage": 46.5, "elapsed_time": "1:35:55", "remaining_time": "1:50:21", "throughput": 19861.75, "total_tokens": 114314560} +{"current_steps": 36325, "total_steps": 78105, "loss": 0.1697, "lr": 3.2304080617916556e-06, "epoch": 2.3253953011971067, "percentage": 46.51, "elapsed_time": "1:35:56", "remaining_time": "1:50:20", "throughput": 19862.18, "total_tokens": 114330752} +{"current_steps": 36330, "total_steps": 78105, "loss": 0.1558, "lr": 3.229873766068214e-06, "epoch": 2.3257153831380832, "percentage": 46.51, "elapsed_time": "1:35:56", "remaining_time": "1:50:19", "throughput": 19862.6, "total_tokens": 114346880} +{"current_steps": 36335, "total_steps": 78105, "loss": 0.198, "lr": 3.229339433898678e-06, "epoch": 2.3260354650790602, "percentage": 46.52, "elapsed_time": "1:35:57", "remaining_time": "1:50:18", "throughput": 19863.03, "total_tokens": 114363200} +{"current_steps": 36340, "total_steps": 78105, "loss": 0.2137, "lr": 3.2288050653097307e-06, "epoch": 2.326355547020037, "percentage": 46.53, "elapsed_time": "1:35:58", "remaining_time": "1:50:17", "throughput": 19863.46, "total_tokens": 114379392} +{"current_steps": 36345, "total_steps": 78105, "loss": 0.1978, "lr": 3.2282706603280545e-06, "epoch": 2.326675628961014, "percentage": 46.53, "elapsed_time": "1:35:58", "remaining_time": "1:50:16", "throughput": 19863.81, "total_tokens": 114394752} +{"current_steps": 36350, "total_steps": 78105, "loss": 0.2727, "lr": 3.2277362189803342e-06, "epoch": 2.3269957109019908, "percentage": 46.54, "elapsed_time": "1:35:59", "remaining_time": "1:50:16", "throughput": 19864.19, "total_tokens": 114410560} +{"current_steps": 36355, "total_steps": 78105, "loss": 0.303, "lr": 3.2272017412932573e-06, "epoch": 2.3273157928429677, "percentage": 46.55, "elapsed_time": "1:36:00", "remaining_time": "1:50:15", "throughput": 19864.57, "total_tokens": 114426432} +{"current_steps": 36360, "total_steps": 78105, "loss": 0.191, "lr": 3.226667227293514e-06, "epoch": 2.3276358747839447, "percentage": 46.55, "elapsed_time": "1:36:00", "remaining_time": "1:50:14", "throughput": 19864.96, "total_tokens": 114441792} +{"current_steps": 36365, "total_steps": 78105, "loss": 0.2371, "lr": 3.226132677007795e-06, "epoch": 2.3279559567249217, "percentage": 46.56, "elapsed_time": "1:36:01", "remaining_time": "1:50:13", "throughput": 19865.45, "total_tokens": 114458816} +{"current_steps": 36370, "total_steps": 78105, "loss": 0.2465, "lr": 3.2255980904627914e-06, "epoch": 2.3282760386658987, "percentage": 46.57, "elapsed_time": "1:36:02", "remaining_time": "1:50:12", "throughput": 19865.84, "total_tokens": 114474240} +{"current_steps": 36375, "total_steps": 78105, "loss": 0.1747, "lr": 3.2250634676851992e-06, "epoch": 2.3285961206068753, "percentage": 46.57, "elapsed_time": "1:36:03", "remaining_time": "1:50:11", "throughput": 19866.36, "total_tokens": 114491456} +{"current_steps": 36380, "total_steps": 78105, "loss": 0.2339, "lr": 3.224528808701714e-06, "epoch": 2.3289162025478523, "percentage": 46.58, "elapsed_time": "1:36:03", "remaining_time": "1:50:10", "throughput": 19866.74, "total_tokens": 114507200} +{"current_steps": 36385, "total_steps": 78105, "loss": 0.2379, "lr": 3.223994113539034e-06, "epoch": 2.3292362844888292, "percentage": 46.58, "elapsed_time": "1:36:04", "remaining_time": "1:50:09", "throughput": 19867.21, "total_tokens": 114523904} +{"current_steps": 36390, "total_steps": 78105, "loss": 0.1407, "lr": 3.2234593822238598e-06, "epoch": 2.329556366429806, "percentage": 46.59, "elapsed_time": "1:36:05", "remaining_time": "1:50:08", "throughput": 19867.63, "total_tokens": 114539840} +{"current_steps": 36395, "total_steps": 78105, "loss": 0.1382, "lr": 3.222924614782892e-06, "epoch": 2.329876448370783, "percentage": 46.6, "elapsed_time": "1:36:05", "remaining_time": "1:50:07", "throughput": 19867.99, "total_tokens": 114555072} +{"current_steps": 36400, "total_steps": 78105, "loss": 0.213, "lr": 3.2223898112428344e-06, "epoch": 2.3301965303117598, "percentage": 46.6, "elapsed_time": "1:36:06", "remaining_time": "1:50:06", "throughput": 19868.34, "total_tokens": 114570176} +{"current_steps": 36405, "total_steps": 78105, "loss": 0.2016, "lr": 3.2218549716303914e-06, "epoch": 2.3305166122527368, "percentage": 46.61, "elapsed_time": "1:36:07", "remaining_time": "1:50:05", "throughput": 19868.73, "total_tokens": 114585728} +{"current_steps": 36410, "total_steps": 78105, "loss": 0.2206, "lr": 3.2213200959722724e-06, "epoch": 2.3308366941937138, "percentage": 46.62, "elapsed_time": "1:36:07", "remaining_time": "1:50:05", "throughput": 19869.15, "total_tokens": 114601792} +{"current_steps": 36415, "total_steps": 78105, "loss": 0.1338, "lr": 3.2207851842951844e-06, "epoch": 2.3311567761346903, "percentage": 46.62, "elapsed_time": "1:36:08", "remaining_time": "1:50:04", "throughput": 19869.57, "total_tokens": 114617664} +{"current_steps": 36420, "total_steps": 78105, "loss": 0.2276, "lr": 3.2202502366258394e-06, "epoch": 2.3314768580756673, "percentage": 46.63, "elapsed_time": "1:36:09", "remaining_time": "1:50:03", "throughput": 19869.87, "total_tokens": 114632000} +{"current_steps": 36425, "total_steps": 78105, "loss": 0.2008, "lr": 3.2197152529909486e-06, "epoch": 2.3317969400166443, "percentage": 46.64, "elapsed_time": "1:36:09", "remaining_time": "1:50:02", "throughput": 19870.27, "total_tokens": 114647872} +{"current_steps": 36430, "total_steps": 78105, "loss": 0.2457, "lr": 3.2191802334172275e-06, "epoch": 2.3321170219576213, "percentage": 46.64, "elapsed_time": "1:36:10", "remaining_time": "1:50:01", "throughput": 19870.63, "total_tokens": 114663168} +{"current_steps": 36435, "total_steps": 78105, "loss": 0.1498, "lr": 3.2186451779313914e-06, "epoch": 2.332437103898598, "percentage": 46.65, "elapsed_time": "1:36:11", "remaining_time": "1:50:00", "throughput": 19870.95, "total_tokens": 114677824} +{"current_steps": 36440, "total_steps": 78105, "loss": 0.2776, "lr": 3.218110086560158e-06, "epoch": 2.332757185839575, "percentage": 46.66, "elapsed_time": "1:36:11", "remaining_time": "1:49:59", "throughput": 19871.26, "total_tokens": 114692672} +{"current_steps": 36445, "total_steps": 78105, "loss": 0.3512, "lr": 3.217574959330248e-06, "epoch": 2.333077267780552, "percentage": 46.66, "elapsed_time": "1:36:12", "remaining_time": "1:49:58", "throughput": 19871.61, "total_tokens": 114707968} +{"current_steps": 36450, "total_steps": 78105, "loss": 0.1954, "lr": 3.2170397962683818e-06, "epoch": 2.333397349721529, "percentage": 46.67, "elapsed_time": "1:36:13", "remaining_time": "1:49:57", "throughput": 19871.99, "total_tokens": 114723712} +{"current_steps": 36455, "total_steps": 78105, "loss": 0.272, "lr": 3.2165045974012832e-06, "epoch": 2.333717431662506, "percentage": 46.67, "elapsed_time": "1:36:13", "remaining_time": "1:49:56", "throughput": 19872.33, "total_tokens": 114738880} +{"current_steps": 36460, "total_steps": 78105, "loss": 0.2548, "lr": 3.2159693627556777e-06, "epoch": 2.3340375136034823, "percentage": 46.68, "elapsed_time": "1:36:14", "remaining_time": "1:49:55", "throughput": 19872.64, "total_tokens": 114753472} +{"current_steps": 36465, "total_steps": 78105, "loss": 0.1387, "lr": 3.2154340923582904e-06, "epoch": 2.3343575955444593, "percentage": 46.69, "elapsed_time": "1:36:15", "remaining_time": "1:49:54", "throughput": 19872.95, "total_tokens": 114768000} +{"current_steps": 36470, "total_steps": 78105, "loss": 0.3848, "lr": 3.214898786235852e-06, "epoch": 2.3346776774854363, "percentage": 46.69, "elapsed_time": "1:36:15", "remaining_time": "1:49:53", "throughput": 19872.78, "total_tokens": 114784000} +{"current_steps": 36475, "total_steps": 78105, "loss": 0.2352, "lr": 3.214363444415091e-06, "epoch": 2.3349977594264133, "percentage": 46.7, "elapsed_time": "1:36:16", "remaining_time": "1:49:52", "throughput": 19873.13, "total_tokens": 114798848} +{"current_steps": 36480, "total_steps": 78105, "loss": 0.259, "lr": 3.2138280669227416e-06, "epoch": 2.33531784136739, "percentage": 46.71, "elapsed_time": "1:36:17", "remaining_time": "1:49:52", "throughput": 19873.44, "total_tokens": 114813632} +{"current_steps": 36485, "total_steps": 78105, "loss": 0.2253, "lr": 3.213292653785537e-06, "epoch": 2.335637923308367, "percentage": 46.71, "elapsed_time": "1:36:17", "remaining_time": "1:49:51", "throughput": 19873.86, "total_tokens": 114830208} +{"current_steps": 36490, "total_steps": 78105, "loss": 0.2324, "lr": 3.212757205030212e-06, "epoch": 2.335958005249344, "percentage": 46.72, "elapsed_time": "1:36:18", "remaining_time": "1:49:50", "throughput": 19874.31, "total_tokens": 114846784} +{"current_steps": 36495, "total_steps": 78105, "loss": 0.2594, "lr": 3.2122217206835043e-06, "epoch": 2.336278087190321, "percentage": 46.73, "elapsed_time": "1:36:19", "remaining_time": "1:49:49", "throughput": 19874.66, "total_tokens": 114861888} +{"current_steps": 36500, "total_steps": 78105, "loss": 0.1692, "lr": 3.2116862007721544e-06, "epoch": 2.336598169131298, "percentage": 46.73, "elapsed_time": "1:36:19", "remaining_time": "1:49:48", "throughput": 19875.04, "total_tokens": 114877504} +{"current_steps": 36505, "total_steps": 78105, "loss": 0.2188, "lr": 3.2111506453229024e-06, "epoch": 2.3369182510722744, "percentage": 46.74, "elapsed_time": "1:36:20", "remaining_time": "1:49:47", "throughput": 19875.33, "total_tokens": 114891840} +{"current_steps": 36510, "total_steps": 78105, "loss": 0.3845, "lr": 3.210615054362492e-06, "epoch": 2.3372383330132513, "percentage": 46.74, "elapsed_time": "1:36:21", "remaining_time": "1:49:46", "throughput": 19875.68, "total_tokens": 114906944} +{"current_steps": 36515, "total_steps": 78105, "loss": 0.1854, "lr": 3.2100794279176674e-06, "epoch": 2.3375584149542283, "percentage": 46.75, "elapsed_time": "1:36:21", "remaining_time": "1:49:45", "throughput": 19876.03, "total_tokens": 114922176} +{"current_steps": 36520, "total_steps": 78105, "loss": 0.2246, "lr": 3.2095437660151734e-06, "epoch": 2.3378784968952053, "percentage": 46.76, "elapsed_time": "1:36:22", "remaining_time": "1:49:44", "throughput": 19876.45, "total_tokens": 114938432} +{"current_steps": 36525, "total_steps": 78105, "loss": 0.318, "lr": 3.2090080686817605e-06, "epoch": 2.338198578836182, "percentage": 46.76, "elapsed_time": "1:36:23", "remaining_time": "1:49:43", "throughput": 19876.76, "total_tokens": 114953152} +{"current_steps": 36530, "total_steps": 78105, "loss": 0.221, "lr": 3.208472335944177e-06, "epoch": 2.338518660777159, "percentage": 46.77, "elapsed_time": "1:36:23", "remaining_time": "1:49:42", "throughput": 19877.2, "total_tokens": 114969344} +{"current_steps": 36535, "total_steps": 78105, "loss": 0.1831, "lr": 3.2079365678291767e-06, "epoch": 2.338838742718136, "percentage": 46.78, "elapsed_time": "1:36:24", "remaining_time": "1:49:41", "throughput": 19877.58, "total_tokens": 114984960} +{"current_steps": 36540, "total_steps": 78105, "loss": 0.2345, "lr": 3.207400764363511e-06, "epoch": 2.339158824659113, "percentage": 46.78, "elapsed_time": "1:36:25", "remaining_time": "1:49:40", "throughput": 19877.96, "total_tokens": 115000576} +{"current_steps": 36545, "total_steps": 78105, "loss": 0.1756, "lr": 3.206864925573936e-06, "epoch": 2.33947890660009, "percentage": 46.79, "elapsed_time": "1:36:25", "remaining_time": "1:49:39", "throughput": 19878.3, "total_tokens": 115015552} +{"current_steps": 36550, "total_steps": 78105, "loss": 0.1685, "lr": 3.206329051487208e-06, "epoch": 2.3397989885410664, "percentage": 46.8, "elapsed_time": "1:36:26", "remaining_time": "1:49:39", "throughput": 19878.61, "total_tokens": 115030016} +{"current_steps": 36555, "total_steps": 78105, "loss": 0.1446, "lr": 3.205793142130087e-06, "epoch": 2.3401190704820434, "percentage": 46.8, "elapsed_time": "1:36:27", "remaining_time": "1:49:38", "throughput": 19878.9, "total_tokens": 115044416} +{"current_steps": 36560, "total_steps": 78105, "loss": 0.3622, "lr": 3.205257197529333e-06, "epoch": 2.3404391524230204, "percentage": 46.81, "elapsed_time": "1:36:27", "remaining_time": "1:49:37", "throughput": 19879.28, "total_tokens": 115059840} +{"current_steps": 36565, "total_steps": 78105, "loss": 0.1435, "lr": 3.2047212177117078e-06, "epoch": 2.3407592343639974, "percentage": 46.82, "elapsed_time": "1:36:28", "remaining_time": "1:49:36", "throughput": 19879.69, "total_tokens": 115075840} +{"current_steps": 36570, "total_steps": 78105, "loss": 0.22, "lr": 3.2041852027039755e-06, "epoch": 2.341079316304974, "percentage": 46.82, "elapsed_time": "1:36:29", "remaining_time": "1:49:35", "throughput": 19880.08, "total_tokens": 115091648} +{"current_steps": 36575, "total_steps": 78105, "loss": 0.2323, "lr": 3.203649152532902e-06, "epoch": 2.341399398245951, "percentage": 46.83, "elapsed_time": "1:36:30", "remaining_time": "1:49:34", "throughput": 19880.56, "total_tokens": 115108608} +{"current_steps": 36580, "total_steps": 78105, "loss": 0.1537, "lr": 3.2031130672252552e-06, "epoch": 2.341719480186928, "percentage": 46.83, "elapsed_time": "1:36:30", "remaining_time": "1:49:33", "throughput": 19880.98, "total_tokens": 115124800} +{"current_steps": 36585, "total_steps": 78105, "loss": 0.2829, "lr": 3.2025769468078044e-06, "epoch": 2.342039562127905, "percentage": 46.84, "elapsed_time": "1:36:31", "remaining_time": "1:49:32", "throughput": 19881.33, "total_tokens": 115140160} +{"current_steps": 36590, "total_steps": 78105, "loss": 0.227, "lr": 3.20204079130732e-06, "epoch": 2.342359644068882, "percentage": 46.85, "elapsed_time": "1:36:32", "remaining_time": "1:49:31", "throughput": 19881.71, "total_tokens": 115156096} +{"current_steps": 36595, "total_steps": 78105, "loss": 0.2746, "lr": 3.2015046007505757e-06, "epoch": 2.3426797260098584, "percentage": 46.85, "elapsed_time": "1:36:32", "remaining_time": "1:49:30", "throughput": 19882.02, "total_tokens": 115170816} +{"current_steps": 36600, "total_steps": 78105, "loss": 0.2314, "lr": 3.2009683751643457e-06, "epoch": 2.3429998079508354, "percentage": 46.86, "elapsed_time": "1:36:33", "remaining_time": "1:49:29", "throughput": 19882.36, "total_tokens": 115185536} +{"current_steps": 36605, "total_steps": 78105, "loss": 0.1881, "lr": 3.2004321145754065e-06, "epoch": 2.3433198898918124, "percentage": 46.87, "elapsed_time": "1:36:34", "remaining_time": "1:49:28", "throughput": 19882.72, "total_tokens": 115201024} +{"current_steps": 36610, "total_steps": 78105, "loss": 0.2162, "lr": 3.1998958190105352e-06, "epoch": 2.3436399718327894, "percentage": 46.87, "elapsed_time": "1:36:34", "remaining_time": "1:49:27", "throughput": 19883.05, "total_tokens": 115215744} +{"current_steps": 36615, "total_steps": 78105, "loss": 0.254, "lr": 3.1993594884965123e-06, "epoch": 2.343960053773766, "percentage": 46.88, "elapsed_time": "1:36:35", "remaining_time": "1:49:27", "throughput": 19883.5, "total_tokens": 115232704} +{"current_steps": 36620, "total_steps": 78105, "loss": 0.2069, "lr": 3.1988231230601198e-06, "epoch": 2.344280135714743, "percentage": 46.89, "elapsed_time": "1:36:36", "remaining_time": "1:49:26", "throughput": 19883.88, "total_tokens": 115248064} +{"current_steps": 36625, "total_steps": 78105, "loss": 0.223, "lr": 3.198286722728141e-06, "epoch": 2.34460021765572, "percentage": 46.89, "elapsed_time": "1:36:36", "remaining_time": "1:49:25", "throughput": 19884.26, "total_tokens": 115263296} +{"current_steps": 36630, "total_steps": 78105, "loss": 0.2543, "lr": 3.1977502875273605e-06, "epoch": 2.344920299596697, "percentage": 46.9, "elapsed_time": "1:36:37", "remaining_time": "1:49:24", "throughput": 19884.71, "total_tokens": 115279872} +{"current_steps": 36635, "total_steps": 78105, "loss": 0.1653, "lr": 3.1972138174845635e-06, "epoch": 2.345240381537674, "percentage": 46.9, "elapsed_time": "1:36:38", "remaining_time": "1:49:23", "throughput": 19885.08, "total_tokens": 115294976} +{"current_steps": 36640, "total_steps": 78105, "loss": 0.1667, "lr": 3.1966773126265415e-06, "epoch": 2.3455604634786504, "percentage": 46.91, "elapsed_time": "1:36:38", "remaining_time": "1:49:22", "throughput": 19885.46, "total_tokens": 115310784} +{"current_steps": 36645, "total_steps": 78105, "loss": 0.2562, "lr": 3.196140772980083e-06, "epoch": 2.3458805454196274, "percentage": 46.92, "elapsed_time": "1:36:39", "remaining_time": "1:49:21", "throughput": 19885.84, "total_tokens": 115326592} +{"current_steps": 36650, "total_steps": 78105, "loss": 0.2537, "lr": 3.19560419857198e-06, "epoch": 2.3462006273606044, "percentage": 46.92, "elapsed_time": "1:36:40", "remaining_time": "1:49:20", "throughput": 19886.34, "total_tokens": 115343616} +{"current_steps": 36655, "total_steps": 78105, "loss": 0.1709, "lr": 3.1950675894290275e-06, "epoch": 2.346520709301581, "percentage": 46.93, "elapsed_time": "1:36:40", "remaining_time": "1:49:19", "throughput": 19886.71, "total_tokens": 115358656} +{"current_steps": 36660, "total_steps": 78105, "loss": 0.3394, "lr": 3.1945309455780195e-06, "epoch": 2.346840791242558, "percentage": 46.94, "elapsed_time": "1:36:41", "remaining_time": "1:49:18", "throughput": 19887.17, "total_tokens": 115375296} +{"current_steps": 36665, "total_steps": 78105, "loss": 0.2417, "lr": 3.1939942670457536e-06, "epoch": 2.347160873183535, "percentage": 46.94, "elapsed_time": "1:36:42", "remaining_time": "1:49:17", "throughput": 19887.57, "total_tokens": 115390976} +{"current_steps": 36670, "total_steps": 78105, "loss": 0.2483, "lr": 3.193457553859029e-06, "epoch": 2.347480955124512, "percentage": 46.95, "elapsed_time": "1:36:42", "remaining_time": "1:49:16", "throughput": 19887.97, "total_tokens": 115406592} +{"current_steps": 36675, "total_steps": 78105, "loss": 0.2299, "lr": 3.1929208060446465e-06, "epoch": 2.347801037065489, "percentage": 46.96, "elapsed_time": "1:36:43", "remaining_time": "1:49:15", "throughput": 19888.29, "total_tokens": 115421376} +{"current_steps": 36680, "total_steps": 78105, "loss": 0.2541, "lr": 3.1923840236294085e-06, "epoch": 2.3481211190064655, "percentage": 46.96, "elapsed_time": "1:36:44", "remaining_time": "1:49:14", "throughput": 19888.61, "total_tokens": 115436096} +{"current_steps": 36685, "total_steps": 78105, "loss": 0.2223, "lr": 3.1918472066401185e-06, "epoch": 2.3484412009474425, "percentage": 46.97, "elapsed_time": "1:36:44", "remaining_time": "1:49:14", "throughput": 19889.04, "total_tokens": 115452672} +{"current_steps": 36690, "total_steps": 78105, "loss": 0.2102, "lr": 3.191310355103583e-06, "epoch": 2.3487612828884195, "percentage": 46.98, "elapsed_time": "1:36:45", "remaining_time": "1:49:13", "throughput": 19889.45, "total_tokens": 115468544} +{"current_steps": 36695, "total_steps": 78105, "loss": 0.3369, "lr": 3.1907734690466086e-06, "epoch": 2.3490813648293964, "percentage": 46.98, "elapsed_time": "1:36:46", "remaining_time": "1:49:12", "throughput": 19889.82, "total_tokens": 115483904} +{"current_steps": 36700, "total_steps": 78105, "loss": 0.2031, "lr": 3.190236548496005e-06, "epoch": 2.349401446770373, "percentage": 46.99, "elapsed_time": "1:36:46", "remaining_time": "1:49:11", "throughput": 19890.24, "total_tokens": 115500032} +{"current_steps": 36705, "total_steps": 78105, "loss": 0.2442, "lr": 3.189699593478584e-06, "epoch": 2.34972152871135, "percentage": 46.99, "elapsed_time": "1:36:47", "remaining_time": "1:49:10", "throughput": 19890.62, "total_tokens": 115515712} +{"current_steps": 36710, "total_steps": 78105, "loss": 0.2571, "lr": 3.1891626040211582e-06, "epoch": 2.350041610652327, "percentage": 47.0, "elapsed_time": "1:36:48", "remaining_time": "1:49:09", "throughput": 19891.01, "total_tokens": 115531200} +{"current_steps": 36715, "total_steps": 78105, "loss": 0.1583, "lr": 3.1886255801505418e-06, "epoch": 2.350361692593304, "percentage": 47.01, "elapsed_time": "1:36:48", "remaining_time": "1:49:08", "throughput": 19891.4, "total_tokens": 115547072} +{"current_steps": 36720, "total_steps": 78105, "loss": 0.2909, "lr": 3.1880885218935505e-06, "epoch": 2.350681774534281, "percentage": 47.01, "elapsed_time": "1:36:49", "remaining_time": "1:49:07", "throughput": 19891.78, "total_tokens": 115562560} +{"current_steps": 36725, "total_steps": 78105, "loss": 0.1459, "lr": 3.1875514292770015e-06, "epoch": 2.3510018564752575, "percentage": 47.02, "elapsed_time": "1:36:50", "remaining_time": "1:49:06", "throughput": 19892.14, "total_tokens": 115577856} +{"current_steps": 36730, "total_steps": 78105, "loss": 0.2942, "lr": 3.1870143023277168e-06, "epoch": 2.3513219384162345, "percentage": 47.03, "elapsed_time": "1:36:50", "remaining_time": "1:49:05", "throughput": 19892.5, "total_tokens": 115593152} +{"current_steps": 36735, "total_steps": 78105, "loss": 0.2979, "lr": 3.186477141072516e-06, "epoch": 2.3516420203572115, "percentage": 47.03, "elapsed_time": "1:36:51", "remaining_time": "1:49:04", "throughput": 19892.88, "total_tokens": 115609024} +{"current_steps": 36740, "total_steps": 78105, "loss": 0.2339, "lr": 3.1859399455382228e-06, "epoch": 2.3519621022981885, "percentage": 47.04, "elapsed_time": "1:36:52", "remaining_time": "1:49:03", "throughput": 19893.27, "total_tokens": 115625024} +{"current_steps": 36745, "total_steps": 78105, "loss": 0.2626, "lr": 3.1854027157516615e-06, "epoch": 2.352282184239165, "percentage": 47.05, "elapsed_time": "1:36:52", "remaining_time": "1:49:03", "throughput": 19893.64, "total_tokens": 115640512} +{"current_steps": 36750, "total_steps": 78105, "loss": 0.1688, "lr": 3.1848654517396583e-06, "epoch": 2.352602266180142, "percentage": 47.05, "elapsed_time": "1:36:53", "remaining_time": "1:49:02", "throughput": 19894.12, "total_tokens": 115657920} +{"current_steps": 36755, "total_steps": 78105, "loss": 0.278, "lr": 3.184328153529042e-06, "epoch": 2.352922348121119, "percentage": 47.06, "elapsed_time": "1:36:54", "remaining_time": "1:49:01", "throughput": 19894.47, "total_tokens": 115672896} +{"current_steps": 36760, "total_steps": 78105, "loss": 0.2092, "lr": 3.183790821146642e-06, "epoch": 2.353242430062096, "percentage": 47.06, "elapsed_time": "1:36:55", "remaining_time": "1:49:00", "throughput": 19895.0, "total_tokens": 115690624} +{"current_steps": 36765, "total_steps": 78105, "loss": 0.2352, "lr": 3.1832534546192908e-06, "epoch": 2.353562512003073, "percentage": 47.07, "elapsed_time": "1:36:55", "remaining_time": "1:48:59", "throughput": 19895.38, "total_tokens": 115706624} +{"current_steps": 36770, "total_steps": 78105, "loss": 0.1923, "lr": 3.18271605397382e-06, "epoch": 2.3538825939440495, "percentage": 47.08, "elapsed_time": "1:36:56", "remaining_time": "1:48:58", "throughput": 19895.69, "total_tokens": 115721344} +{"current_steps": 36775, "total_steps": 78105, "loss": 0.1588, "lr": 3.182178619237066e-06, "epoch": 2.3542026758850265, "percentage": 47.08, "elapsed_time": "1:36:57", "remaining_time": "1:48:57", "throughput": 19896.04, "total_tokens": 115736576} +{"current_steps": 36780, "total_steps": 78105, "loss": 0.224, "lr": 3.181641150435866e-06, "epoch": 2.3545227578260035, "percentage": 47.09, "elapsed_time": "1:36:57", "remaining_time": "1:48:56", "throughput": 19896.47, "total_tokens": 115752896} +{"current_steps": 36785, "total_steps": 78105, "loss": 0.1854, "lr": 3.1811036475970563e-06, "epoch": 2.3548428397669805, "percentage": 47.1, "elapsed_time": "1:36:58", "remaining_time": "1:48:55", "throughput": 19896.85, "total_tokens": 115768320} +{"current_steps": 36790, "total_steps": 78105, "loss": 0.241, "lr": 3.180566110747479e-06, "epoch": 2.355162921707957, "percentage": 47.1, "elapsed_time": "1:36:59", "remaining_time": "1:48:54", "throughput": 19897.29, "total_tokens": 115784640} +{"current_steps": 36795, "total_steps": 78105, "loss": 0.2999, "lr": 3.1800285399139747e-06, "epoch": 2.355483003648934, "percentage": 47.11, "elapsed_time": "1:36:59", "remaining_time": "1:48:53", "throughput": 19897.6, "total_tokens": 115799424} +{"current_steps": 36800, "total_steps": 78105, "loss": 0.1685, "lr": 3.1794909351233873e-06, "epoch": 2.355803085589911, "percentage": 47.12, "elapsed_time": "1:37:00", "remaining_time": "1:48:53", "throughput": 19898.08, "total_tokens": 115816384} +{"current_steps": 36805, "total_steps": 78105, "loss": 0.2579, "lr": 3.178953296402562e-06, "epoch": 2.356123167530888, "percentage": 47.12, "elapsed_time": "1:37:01", "remaining_time": "1:48:52", "throughput": 19898.45, "total_tokens": 115831808} +{"current_steps": 36810, "total_steps": 78105, "loss": 0.2363, "lr": 3.1784156237783453e-06, "epoch": 2.356443249471865, "percentage": 47.13, "elapsed_time": "1:37:01", "remaining_time": "1:48:51", "throughput": 19898.84, "total_tokens": 115847552} +{"current_steps": 36815, "total_steps": 78105, "loss": 0.2009, "lr": 3.1778779172775866e-06, "epoch": 2.3567633314128416, "percentage": 47.14, "elapsed_time": "1:37:02", "remaining_time": "1:48:50", "throughput": 19899.27, "total_tokens": 115863808} +{"current_steps": 36820, "total_steps": 78105, "loss": 0.2429, "lr": 3.1773401769271357e-06, "epoch": 2.3570834133538185, "percentage": 47.14, "elapsed_time": "1:37:03", "remaining_time": "1:48:49", "throughput": 19899.72, "total_tokens": 115879936} +{"current_steps": 36825, "total_steps": 78105, "loss": 0.2146, "lr": 3.1768024027538435e-06, "epoch": 2.3574034952947955, "percentage": 47.15, "elapsed_time": "1:37:03", "remaining_time": "1:48:48", "throughput": 19900.07, "total_tokens": 115894912} +{"current_steps": 36830, "total_steps": 78105, "loss": 0.154, "lr": 3.1762645947845656e-06, "epoch": 2.3577235772357725, "percentage": 47.15, "elapsed_time": "1:37:04", "remaining_time": "1:48:47", "throughput": 19900.47, "total_tokens": 115910784} +{"current_steps": 36835, "total_steps": 78105, "loss": 0.1963, "lr": 3.175726753046156e-06, "epoch": 2.358043659176749, "percentage": 47.16, "elapsed_time": "1:37:05", "remaining_time": "1:48:46", "throughput": 19900.94, "total_tokens": 115927424} +{"current_steps": 36840, "total_steps": 78105, "loss": 0.2708, "lr": 3.1751888775654715e-06, "epoch": 2.358363741117726, "percentage": 47.17, "elapsed_time": "1:37:05", "remaining_time": "1:48:45", "throughput": 19901.39, "total_tokens": 115943936} +{"current_steps": 36845, "total_steps": 78105, "loss": 0.2317, "lr": 3.1746509683693727e-06, "epoch": 2.358683823058703, "percentage": 47.17, "elapsed_time": "1:37:06", "remaining_time": "1:48:44", "throughput": 19901.89, "total_tokens": 115961152} +{"current_steps": 36850, "total_steps": 78105, "loss": 0.2335, "lr": 3.174113025484718e-06, "epoch": 2.35900390499968, "percentage": 47.18, "elapsed_time": "1:37:07", "remaining_time": "1:48:43", "throughput": 19902.29, "total_tokens": 115977344} +{"current_steps": 36855, "total_steps": 78105, "loss": 0.1963, "lr": 3.1735750489383703e-06, "epoch": 2.359323986940657, "percentage": 47.19, "elapsed_time": "1:37:08", "remaining_time": "1:48:43", "throughput": 19902.7, "total_tokens": 115993472} +{"current_steps": 36860, "total_steps": 78105, "loss": 0.1885, "lr": 3.173037038757193e-06, "epoch": 2.3596440688816336, "percentage": 47.19, "elapsed_time": "1:37:08", "remaining_time": "1:48:42", "throughput": 19903.04, "total_tokens": 116008640} +{"current_steps": 36865, "total_steps": 78105, "loss": 0.1878, "lr": 3.172498994968052e-06, "epoch": 2.3599641508226106, "percentage": 47.2, "elapsed_time": "1:37:09", "remaining_time": "1:48:41", "throughput": 19903.51, "total_tokens": 116025280} +{"current_steps": 36870, "total_steps": 78105, "loss": 0.1902, "lr": 3.1719609175978133e-06, "epoch": 2.3602842327635876, "percentage": 47.21, "elapsed_time": "1:37:10", "remaining_time": "1:48:40", "throughput": 19903.83, "total_tokens": 116040064} +{"current_steps": 36875, "total_steps": 78105, "loss": 0.1829, "lr": 3.1714228066733477e-06, "epoch": 2.3606043147045646, "percentage": 47.21, "elapsed_time": "1:37:10", "remaining_time": "1:48:39", "throughput": 19904.3, "total_tokens": 116057088} +{"current_steps": 36880, "total_steps": 78105, "loss": 0.2213, "lr": 3.170884662221523e-06, "epoch": 2.360924396645541, "percentage": 47.22, "elapsed_time": "1:37:11", "remaining_time": "1:48:38", "throughput": 19904.69, "total_tokens": 116072960} +{"current_steps": 36885, "total_steps": 78105, "loss": 0.272, "lr": 3.1703464842692138e-06, "epoch": 2.361244478586518, "percentage": 47.22, "elapsed_time": "1:37:12", "remaining_time": "1:48:37", "throughput": 19905.07, "total_tokens": 116088704} +{"current_steps": 36890, "total_steps": 78105, "loss": 0.1445, "lr": 3.169808272843293e-06, "epoch": 2.361564560527495, "percentage": 47.23, "elapsed_time": "1:37:12", "remaining_time": "1:48:36", "throughput": 19905.54, "total_tokens": 116105664} +{"current_steps": 36895, "total_steps": 78105, "loss": 0.2431, "lr": 3.169270027970635e-06, "epoch": 2.361884642468472, "percentage": 47.24, "elapsed_time": "1:37:13", "remaining_time": "1:48:35", "throughput": 19905.89, "total_tokens": 116121024} +{"current_steps": 36900, "total_steps": 78105, "loss": 0.2349, "lr": 3.168731749678119e-06, "epoch": 2.362204724409449, "percentage": 47.24, "elapsed_time": "1:37:14", "remaining_time": "1:48:34", "throughput": 19906.21, "total_tokens": 116135808} +{"current_steps": 36905, "total_steps": 78105, "loss": 0.2027, "lr": 3.168193437992622e-06, "epoch": 2.3625248063504256, "percentage": 47.25, "elapsed_time": "1:37:14", "remaining_time": "1:48:33", "throughput": 19906.55, "total_tokens": 116150528} +{"current_steps": 36910, "total_steps": 78105, "loss": 0.192, "lr": 3.1676550929410256e-06, "epoch": 2.3628448882914026, "percentage": 47.26, "elapsed_time": "1:37:15", "remaining_time": "1:48:32", "throughput": 19906.87, "total_tokens": 116165312} +{"current_steps": 36915, "total_steps": 78105, "loss": 0.1879, "lr": 3.1671167145502114e-06, "epoch": 2.3631649702323796, "percentage": 47.26, "elapsed_time": "1:37:16", "remaining_time": "1:48:31", "throughput": 19907.21, "total_tokens": 116180416} +{"current_steps": 36920, "total_steps": 78105, "loss": 0.2415, "lr": 3.1665783028470636e-06, "epoch": 2.363485052173356, "percentage": 47.27, "elapsed_time": "1:37:16", "remaining_time": "1:48:31", "throughput": 19907.6, "total_tokens": 116196288} +{"current_steps": 36925, "total_steps": 78105, "loss": 0.2012, "lr": 3.1660398578584665e-06, "epoch": 2.363805134114333, "percentage": 47.28, "elapsed_time": "1:37:17", "remaining_time": "1:48:30", "throughput": 19907.94, "total_tokens": 116211072} +{"current_steps": 36930, "total_steps": 78105, "loss": 0.2828, "lr": 3.1655013796113092e-06, "epoch": 2.36412521605531, "percentage": 47.28, "elapsed_time": "1:37:18", "remaining_time": "1:48:29", "throughput": 19908.26, "total_tokens": 116225856} +{"current_steps": 36935, "total_steps": 78105, "loss": 0.2462, "lr": 3.1649628681324795e-06, "epoch": 2.364445297996287, "percentage": 47.29, "elapsed_time": "1:37:18", "remaining_time": "1:48:28", "throughput": 19908.58, "total_tokens": 116240448} +{"current_steps": 36940, "total_steps": 78105, "loss": 0.273, "lr": 3.164424323448867e-06, "epoch": 2.364765379937264, "percentage": 47.3, "elapsed_time": "1:37:19", "remaining_time": "1:48:27", "throughput": 19908.99, "total_tokens": 116256448} +{"current_steps": 36945, "total_steps": 78105, "loss": 0.2831, "lr": 3.163885745587365e-06, "epoch": 2.3650854618782406, "percentage": 47.3, "elapsed_time": "1:37:20", "remaining_time": "1:48:26", "throughput": 19909.39, "total_tokens": 116272640} +{"current_steps": 36950, "total_steps": 78105, "loss": 0.2958, "lr": 3.1633471345748672e-06, "epoch": 2.3654055438192176, "percentage": 47.31, "elapsed_time": "1:37:20", "remaining_time": "1:48:25", "throughput": 19909.83, "total_tokens": 116288896} +{"current_steps": 36955, "total_steps": 78105, "loss": 0.1791, "lr": 3.1628084904382685e-06, "epoch": 2.3657256257601946, "percentage": 47.31, "elapsed_time": "1:37:21", "remaining_time": "1:48:24", "throughput": 19910.18, "total_tokens": 116304000} +{"current_steps": 36960, "total_steps": 78105, "loss": 0.2435, "lr": 3.162269813204466e-06, "epoch": 2.3660457077011716, "percentage": 47.32, "elapsed_time": "1:37:22", "remaining_time": "1:48:23", "throughput": 19910.64, "total_tokens": 116320896} +{"current_steps": 36965, "total_steps": 78105, "loss": 0.1587, "lr": 3.1617311029003593e-06, "epoch": 2.366365789642148, "percentage": 47.33, "elapsed_time": "1:37:22", "remaining_time": "1:48:22", "throughput": 19911.06, "total_tokens": 116337088} +{"current_steps": 36970, "total_steps": 78105, "loss": 0.2713, "lr": 3.161192359552848e-06, "epoch": 2.366685871583125, "percentage": 47.33, "elapsed_time": "1:37:23", "remaining_time": "1:48:21", "throughput": 19911.36, "total_tokens": 116351488} +{"current_steps": 36975, "total_steps": 78105, "loss": 0.3221, "lr": 3.160653583188834e-06, "epoch": 2.367005953524102, "percentage": 47.34, "elapsed_time": "1:37:24", "remaining_time": "1:48:20", "throughput": 19911.73, "total_tokens": 116366976} +{"current_steps": 36980, "total_steps": 78105, "loss": 0.1425, "lr": 3.1601147738352212e-06, "epoch": 2.367326035465079, "percentage": 47.35, "elapsed_time": "1:37:24", "remaining_time": "1:48:20", "throughput": 19912.23, "total_tokens": 116384384} +{"current_steps": 36985, "total_steps": 78105, "loss": 0.2174, "lr": 3.1595759315189155e-06, "epoch": 2.367646117406056, "percentage": 47.35, "elapsed_time": "1:37:25", "remaining_time": "1:48:19", "throughput": 19912.61, "total_tokens": 116400384} +{"current_steps": 36990, "total_steps": 78105, "loss": 0.2824, "lr": 3.1590370562668233e-06, "epoch": 2.3679661993470327, "percentage": 47.36, "elapsed_time": "1:37:26", "remaining_time": "1:48:18", "throughput": 19912.92, "total_tokens": 116414976} +{"current_steps": 36995, "total_steps": 78105, "loss": 0.2902, "lr": 3.158498148105853e-06, "epoch": 2.3682862812880097, "percentage": 47.37, "elapsed_time": "1:37:26", "remaining_time": "1:48:17", "throughput": 19913.26, "total_tokens": 116430336} +{"current_steps": 37000, "total_steps": 78105, "loss": 0.1279, "lr": 3.157959207062915e-06, "epoch": 2.3686063632289867, "percentage": 47.37, "elapsed_time": "1:37:27", "remaining_time": "1:48:16", "throughput": 19913.65, "total_tokens": 116445888} +{"current_steps": 37005, "total_steps": 78105, "loss": 0.2083, "lr": 3.1574202331649216e-06, "epoch": 2.3689264451699636, "percentage": 47.38, "elapsed_time": "1:37:28", "remaining_time": "1:48:15", "throughput": 19913.98, "total_tokens": 116460736} +{"current_steps": 37010, "total_steps": 78105, "loss": 0.2319, "lr": 3.156881226438786e-06, "epoch": 2.36924652711094, "percentage": 47.38, "elapsed_time": "1:37:28", "remaining_time": "1:48:14", "throughput": 19914.37, "total_tokens": 116475904} +{"current_steps": 37015, "total_steps": 78105, "loss": 0.1774, "lr": 3.1563421869114236e-06, "epoch": 2.369566609051917, "percentage": 47.39, "elapsed_time": "1:37:29", "remaining_time": "1:48:13", "throughput": 19914.8, "total_tokens": 116492352} +{"current_steps": 37020, "total_steps": 78105, "loss": 0.1879, "lr": 3.155803114609751e-06, "epoch": 2.369886690992894, "percentage": 47.4, "elapsed_time": "1:37:30", "remaining_time": "1:48:12", "throughput": 19915.14, "total_tokens": 116507520} +{"current_steps": 37025, "total_steps": 78105, "loss": 0.1764, "lr": 3.155264009560688e-06, "epoch": 2.370206772933871, "percentage": 47.4, "elapsed_time": "1:37:30", "remaining_time": "1:48:11", "throughput": 19915.54, "total_tokens": 116523712} +{"current_steps": 37030, "total_steps": 78105, "loss": 0.229, "lr": 3.1547248717911522e-06, "epoch": 2.370526854874848, "percentage": 47.41, "elapsed_time": "1:37:31", "remaining_time": "1:48:10", "throughput": 19915.87, "total_tokens": 116538624} +{"current_steps": 37035, "total_steps": 78105, "loss": 0.216, "lr": 3.1541857013280676e-06, "epoch": 2.3708469368158247, "percentage": 47.42, "elapsed_time": "1:37:32", "remaining_time": "1:48:09", "throughput": 19916.28, "total_tokens": 116554688} +{"current_steps": 37040, "total_steps": 78105, "loss": 0.2766, "lr": 3.153646498198355e-06, "epoch": 2.3711670187568017, "percentage": 47.42, "elapsed_time": "1:37:32", "remaining_time": "1:48:08", "throughput": 19916.7, "total_tokens": 116570944} +{"current_steps": 37045, "total_steps": 78105, "loss": 0.2411, "lr": 3.153107262428943e-06, "epoch": 2.3714871006977787, "percentage": 47.43, "elapsed_time": "1:37:33", "remaining_time": "1:48:08", "throughput": 19917.07, "total_tokens": 116586496} +{"current_steps": 37050, "total_steps": 78105, "loss": 0.284, "lr": 3.1525679940467548e-06, "epoch": 2.3718071826387557, "percentage": 47.44, "elapsed_time": "1:37:34", "remaining_time": "1:48:07", "throughput": 19917.42, "total_tokens": 116601792} +{"current_steps": 37055, "total_steps": 78105, "loss": 0.1597, "lr": 3.15202869307872e-06, "epoch": 2.372127264579732, "percentage": 47.44, "elapsed_time": "1:37:34", "remaining_time": "1:48:06", "throughput": 19917.75, "total_tokens": 116616768} +{"current_steps": 37060, "total_steps": 78105, "loss": 0.3351, "lr": 3.1514893595517694e-06, "epoch": 2.372447346520709, "percentage": 47.45, "elapsed_time": "1:37:35", "remaining_time": "1:48:05", "throughput": 19918.18, "total_tokens": 116632960} +{"current_steps": 37065, "total_steps": 78105, "loss": 0.1726, "lr": 3.150949993492833e-06, "epoch": 2.372767428461686, "percentage": 47.46, "elapsed_time": "1:37:36", "remaining_time": "1:48:04", "throughput": 19918.57, "total_tokens": 116648640} +{"current_steps": 37070, "total_steps": 78105, "loss": 0.3061, "lr": 3.150410594928845e-06, "epoch": 2.373087510402663, "percentage": 47.46, "elapsed_time": "1:37:36", "remaining_time": "1:48:03", "throughput": 19919.0, "total_tokens": 116665088} +{"current_steps": 37075, "total_steps": 78105, "loss": 0.1316, "lr": 3.1498711638867395e-06, "epoch": 2.37340759234364, "percentage": 47.47, "elapsed_time": "1:37:37", "remaining_time": "1:48:02", "throughput": 19919.4, "total_tokens": 116681088} +{"current_steps": 37080, "total_steps": 78105, "loss": 0.2597, "lr": 3.149331700393454e-06, "epoch": 2.3737276742846167, "percentage": 47.47, "elapsed_time": "1:37:38", "remaining_time": "1:48:01", "throughput": 19919.73, "total_tokens": 116695936} +{"current_steps": 37085, "total_steps": 78105, "loss": 0.2362, "lr": 3.1487922044759244e-06, "epoch": 2.3740477562255937, "percentage": 47.48, "elapsed_time": "1:37:38", "remaining_time": "1:48:00", "throughput": 19920.12, "total_tokens": 116711488} +{"current_steps": 37090, "total_steps": 78105, "loss": 0.1329, "lr": 3.148252676161093e-06, "epoch": 2.3743678381665707, "percentage": 47.49, "elapsed_time": "1:37:39", "remaining_time": "1:47:59", "throughput": 19920.53, "total_tokens": 116727360} +{"current_steps": 37095, "total_steps": 78105, "loss": 0.2023, "lr": 3.1477131154758976e-06, "epoch": 2.3746879201075477, "percentage": 47.49, "elapsed_time": "1:37:40", "remaining_time": "1:47:58", "throughput": 19920.94, "total_tokens": 116743616} +{"current_steps": 37100, "total_steps": 78105, "loss": 0.3928, "lr": 3.1471735224472842e-06, "epoch": 2.3750080020485242, "percentage": 47.5, "elapsed_time": "1:37:41", "remaining_time": "1:47:57", "throughput": 19921.28, "total_tokens": 116758784} +{"current_steps": 37105, "total_steps": 78105, "loss": 0.2498, "lr": 3.1466338971021963e-06, "epoch": 2.3753280839895012, "percentage": 47.51, "elapsed_time": "1:37:41", "remaining_time": "1:47:56", "throughput": 19921.65, "total_tokens": 116774144} +{"current_steps": 37110, "total_steps": 78105, "loss": 0.2343, "lr": 3.1460942394675798e-06, "epoch": 2.3756481659304782, "percentage": 47.51, "elapsed_time": "1:37:42", "remaining_time": "1:47:56", "throughput": 19922.04, "total_tokens": 116790336} +{"current_steps": 37115, "total_steps": 78105, "loss": 0.1273, "lr": 3.1455545495703816e-06, "epoch": 2.375968247871455, "percentage": 47.52, "elapsed_time": "1:37:43", "remaining_time": "1:47:55", "throughput": 19922.5, "total_tokens": 116806976} +{"current_steps": 37120, "total_steps": 78105, "loss": 0.312, "lr": 3.1450148274375523e-06, "epoch": 2.376288329812432, "percentage": 47.53, "elapsed_time": "1:37:43", "remaining_time": "1:47:54", "throughput": 19922.88, "total_tokens": 116822912} +{"current_steps": 37125, "total_steps": 78105, "loss": 0.2116, "lr": 3.1444750730960415e-06, "epoch": 2.3766084117534088, "percentage": 47.53, "elapsed_time": "1:37:44", "remaining_time": "1:47:53", "throughput": 19923.24, "total_tokens": 116838272} +{"current_steps": 37130, "total_steps": 78105, "loss": 0.1827, "lr": 3.1439352865728035e-06, "epoch": 2.3769284936943857, "percentage": 47.54, "elapsed_time": "1:37:45", "remaining_time": "1:47:52", "throughput": 19923.69, "total_tokens": 116855040} +{"current_steps": 37135, "total_steps": 78105, "loss": 0.2978, "lr": 3.1433954678947916e-06, "epoch": 2.3772485756353627, "percentage": 47.54, "elapsed_time": "1:37:45", "remaining_time": "1:47:51", "throughput": 19924.01, "total_tokens": 116869888} +{"current_steps": 37140, "total_steps": 78105, "loss": 0.2941, "lr": 3.1428556170889607e-06, "epoch": 2.3775686575763397, "percentage": 47.55, "elapsed_time": "1:37:46", "remaining_time": "1:47:50", "throughput": 19924.33, "total_tokens": 116884736} +{"current_steps": 37145, "total_steps": 78105, "loss": 0.2497, "lr": 3.1423157341822693e-06, "epoch": 2.3778887395173163, "percentage": 47.56, "elapsed_time": "1:37:47", "remaining_time": "1:47:49", "throughput": 19924.68, "total_tokens": 116900032} +{"current_steps": 37150, "total_steps": 78105, "loss": 0.1966, "lr": 3.141775819201675e-06, "epoch": 2.3782088214582933, "percentage": 47.56, "elapsed_time": "1:37:47", "remaining_time": "1:47:48", "throughput": 19925.06, "total_tokens": 116915712} +{"current_steps": 37155, "total_steps": 78105, "loss": 0.3116, "lr": 3.14123587217414e-06, "epoch": 2.3785289033992703, "percentage": 47.57, "elapsed_time": "1:37:48", "remaining_time": "1:47:47", "throughput": 19925.38, "total_tokens": 116930560} +{"current_steps": 37160, "total_steps": 78105, "loss": 0.181, "lr": 3.140695893126625e-06, "epoch": 2.3788489853402472, "percentage": 47.58, "elapsed_time": "1:37:49", "remaining_time": "1:47:46", "throughput": 19925.76, "total_tokens": 116946496} +{"current_steps": 37165, "total_steps": 78105, "loss": 0.1874, "lr": 3.140155882086095e-06, "epoch": 2.3791690672812242, "percentage": 47.58, "elapsed_time": "1:37:49", "remaining_time": "1:47:46", "throughput": 19926.13, "total_tokens": 116962112} +{"current_steps": 37170, "total_steps": 78105, "loss": 0.2267, "lr": 3.1396158390795143e-06, "epoch": 2.379489149222201, "percentage": 47.59, "elapsed_time": "1:37:50", "remaining_time": "1:47:45", "throughput": 19926.61, "total_tokens": 116978880} +{"current_steps": 37175, "total_steps": 78105, "loss": 0.3445, "lr": 3.1390757641338497e-06, "epoch": 2.3798092311631778, "percentage": 47.6, "elapsed_time": "1:37:51", "remaining_time": "1:47:44", "throughput": 19926.95, "total_tokens": 116993920} +{"current_steps": 37180, "total_steps": 78105, "loss": 0.3064, "lr": 3.138535657276071e-06, "epoch": 2.3801293131041548, "percentage": 47.6, "elapsed_time": "1:37:51", "remaining_time": "1:47:43", "throughput": 19927.29, "total_tokens": 117009280} +{"current_steps": 37185, "total_steps": 78105, "loss": 0.1395, "lr": 3.1379955185331474e-06, "epoch": 2.3804493950451313, "percentage": 47.61, "elapsed_time": "1:37:52", "remaining_time": "1:47:42", "throughput": 19927.62, "total_tokens": 117024128} +{"current_steps": 37190, "total_steps": 78105, "loss": 0.175, "lr": 3.137455347932051e-06, "epoch": 2.3807694769861083, "percentage": 47.62, "elapsed_time": "1:37:53", "remaining_time": "1:47:41", "throughput": 19927.93, "total_tokens": 117039104} +{"current_steps": 37195, "total_steps": 78105, "loss": 0.2272, "lr": 3.1369151454997547e-06, "epoch": 2.3810895589270853, "percentage": 47.62, "elapsed_time": "1:37:53", "remaining_time": "1:47:40", "throughput": 19928.28, "total_tokens": 117054016} +{"current_steps": 37200, "total_steps": 78105, "loss": 0.3674, "lr": 3.1363749112632336e-06, "epoch": 2.3814096408680623, "percentage": 47.63, "elapsed_time": "1:37:54", "remaining_time": "1:47:39", "throughput": 19928.62, "total_tokens": 117068992} +{"current_steps": 37205, "total_steps": 78105, "loss": 0.1852, "lr": 3.1358346452494647e-06, "epoch": 2.3817297228090393, "percentage": 47.63, "elapsed_time": "1:37:55", "remaining_time": "1:47:38", "throughput": 19929.05, "total_tokens": 117085312} +{"current_steps": 37210, "total_steps": 78105, "loss": 0.1488, "lr": 3.1352943474854246e-06, "epoch": 2.382049804750016, "percentage": 47.64, "elapsed_time": "1:37:55", "remaining_time": "1:47:37", "throughput": 19929.38, "total_tokens": 117100288} +{"current_steps": 37215, "total_steps": 78105, "loss": 0.262, "lr": 3.1347540179980945e-06, "epoch": 2.382369886690993, "percentage": 47.65, "elapsed_time": "1:37:56", "remaining_time": "1:47:36", "throughput": 19929.77, "total_tokens": 117116352} +{"current_steps": 37220, "total_steps": 78105, "loss": 0.1892, "lr": 3.1342136568144556e-06, "epoch": 2.38268996863197, "percentage": 47.65, "elapsed_time": "1:37:57", "remaining_time": "1:47:35", "throughput": 19930.17, "total_tokens": 117132160} +{"current_steps": 37225, "total_steps": 78105, "loss": 0.2432, "lr": 3.1336732639614894e-06, "epoch": 2.383010050572947, "percentage": 47.66, "elapsed_time": "1:37:57", "remaining_time": "1:47:34", "throughput": 19930.51, "total_tokens": 117147456} +{"current_steps": 37230, "total_steps": 78105, "loss": 0.319, "lr": 3.133132839466181e-06, "epoch": 2.3833301325139233, "percentage": 47.67, "elapsed_time": "1:37:58", "remaining_time": "1:47:34", "throughput": 19930.87, "total_tokens": 117163008} +{"current_steps": 37235, "total_steps": 78105, "loss": 0.1343, "lr": 3.1325923833555165e-06, "epoch": 2.3836502144549003, "percentage": 47.67, "elapsed_time": "1:37:59", "remaining_time": "1:47:33", "throughput": 19931.25, "total_tokens": 117178752} +{"current_steps": 37240, "total_steps": 78105, "loss": 0.1841, "lr": 3.1320518956564833e-06, "epoch": 2.3839702963958773, "percentage": 47.68, "elapsed_time": "1:37:59", "remaining_time": "1:47:32", "throughput": 19931.63, "total_tokens": 117194304} +{"current_steps": 37245, "total_steps": 78105, "loss": 0.2999, "lr": 3.1315113763960707e-06, "epoch": 2.3842903783368543, "percentage": 47.69, "elapsed_time": "1:38:00", "remaining_time": "1:47:31", "throughput": 19931.94, "total_tokens": 117209408} +{"current_steps": 37250, "total_steps": 78105, "loss": 0.1687, "lr": 3.13097082560127e-06, "epoch": 2.3846104602778313, "percentage": 47.69, "elapsed_time": "1:38:01", "remaining_time": "1:47:30", "throughput": 19932.36, "total_tokens": 117225408} +{"current_steps": 37255, "total_steps": 78105, "loss": 0.3064, "lr": 3.1304302432990728e-06, "epoch": 2.384930542218808, "percentage": 47.7, "elapsed_time": "1:38:01", "remaining_time": "1:47:29", "throughput": 19932.66, "total_tokens": 117240128} +{"current_steps": 37260, "total_steps": 78105, "loss": 0.2303, "lr": 3.1298896295164727e-06, "epoch": 2.385250624159785, "percentage": 47.71, "elapsed_time": "1:38:02", "remaining_time": "1:47:28", "throughput": 19933.04, "total_tokens": 117255808} +{"current_steps": 37265, "total_steps": 78105, "loss": 0.1998, "lr": 3.1293489842804654e-06, "epoch": 2.385570706100762, "percentage": 47.71, "elapsed_time": "1:38:03", "remaining_time": "1:47:27", "throughput": 19933.57, "total_tokens": 117273600} +{"current_steps": 37270, "total_steps": 78105, "loss": 0.2555, "lr": 3.128808307618048e-06, "epoch": 2.385890788041739, "percentage": 47.72, "elapsed_time": "1:38:04", "remaining_time": "1:47:27", "throughput": 19932.79, "total_tokens": 117291136} +{"current_steps": 37275, "total_steps": 78105, "loss": 0.262, "lr": 3.1282675995562195e-06, "epoch": 2.3862108699827154, "percentage": 47.72, "elapsed_time": "1:38:05", "remaining_time": "1:47:26", "throughput": 19933.12, "total_tokens": 117306560} +{"current_steps": 37280, "total_steps": 78105, "loss": 0.3019, "lr": 3.1277268601219797e-06, "epoch": 2.3865309519236924, "percentage": 47.73, "elapsed_time": "1:38:05", "remaining_time": "1:47:25", "throughput": 19933.5, "total_tokens": 117322176} +{"current_steps": 37285, "total_steps": 78105, "loss": 0.3054, "lr": 3.12718608934233e-06, "epoch": 2.3868510338646693, "percentage": 47.74, "elapsed_time": "1:38:06", "remaining_time": "1:47:24", "throughput": 19933.96, "total_tokens": 117339072} +{"current_steps": 37290, "total_steps": 78105, "loss": 0.1775, "lr": 3.126645287244273e-06, "epoch": 2.3871711158056463, "percentage": 47.74, "elapsed_time": "1:38:07", "remaining_time": "1:47:24", "throughput": 19934.87, "total_tokens": 117367936} +{"current_steps": 37295, "total_steps": 78105, "loss": 0.2964, "lr": 3.126104453854815e-06, "epoch": 2.3874911977466233, "percentage": 47.75, "elapsed_time": "1:38:08", "remaining_time": "1:47:23", "throughput": 19935.26, "total_tokens": 117383680} +{"current_steps": 37300, "total_steps": 78105, "loss": 0.2318, "lr": 3.1255635892009617e-06, "epoch": 2.3878112796876, "percentage": 47.76, "elapsed_time": "1:38:08", "remaining_time": "1:47:22", "throughput": 19935.68, "total_tokens": 117399744} +{"current_steps": 37305, "total_steps": 78105, "loss": 0.2093, "lr": 3.1250226933097216e-06, "epoch": 2.388131361628577, "percentage": 47.76, "elapsed_time": "1:38:10", "remaining_time": "1:47:21", "throughput": 19934.58, "total_tokens": 117415872} +{"current_steps": 37310, "total_steps": 78105, "loss": 0.2072, "lr": 3.1244817662081038e-06, "epoch": 2.388451443569554, "percentage": 47.77, "elapsed_time": "1:38:10", "remaining_time": "1:47:20", "throughput": 19934.95, "total_tokens": 117430976} +{"current_steps": 37315, "total_steps": 78105, "loss": 0.2696, "lr": 3.12394080792312e-06, "epoch": 2.388771525510531, "percentage": 47.78, "elapsed_time": "1:38:11", "remaining_time": "1:47:20", "throughput": 19935.35, "total_tokens": 117447040} +{"current_steps": 37320, "total_steps": 78105, "loss": 0.3026, "lr": 3.1233998184817813e-06, "epoch": 2.3890916074515074, "percentage": 47.78, "elapsed_time": "1:38:12", "remaining_time": "1:47:19", "throughput": 19935.69, "total_tokens": 117462144} +{"current_steps": 37325, "total_steps": 78105, "loss": 0.2848, "lr": 3.1228587979111027e-06, "epoch": 2.3894116893924844, "percentage": 47.79, "elapsed_time": "1:38:12", "remaining_time": "1:47:18", "throughput": 19936.02, "total_tokens": 117477056} +{"current_steps": 37330, "total_steps": 78105, "loss": 0.26, "lr": 3.1223177462381005e-06, "epoch": 2.3897317713334614, "percentage": 47.79, "elapsed_time": "1:38:13", "remaining_time": "1:47:17", "throughput": 19936.36, "total_tokens": 117492224} +{"current_steps": 37335, "total_steps": 78105, "loss": 0.3487, "lr": 3.1217766634897918e-06, "epoch": 2.3900518532744384, "percentage": 47.8, "elapsed_time": "1:38:14", "remaining_time": "1:47:16", "throughput": 19936.69, "total_tokens": 117507456} +{"current_steps": 37340, "total_steps": 78105, "loss": 0.2269, "lr": 3.121235549693195e-06, "epoch": 2.3903719352154154, "percentage": 47.81, "elapsed_time": "1:38:14", "remaining_time": "1:47:15", "throughput": 19936.98, "total_tokens": 117521728} +{"current_steps": 37345, "total_steps": 78105, "loss": 0.2479, "lr": 3.1206944048753307e-06, "epoch": 2.390692017156392, "percentage": 47.81, "elapsed_time": "1:38:15", "remaining_time": "1:47:14", "throughput": 19937.3, "total_tokens": 117536640} +{"current_steps": 37350, "total_steps": 78105, "loss": 0.1789, "lr": 3.1201532290632207e-06, "epoch": 2.391012099097369, "percentage": 47.82, "elapsed_time": "1:38:15", "remaining_time": "1:47:13", "throughput": 19937.68, "total_tokens": 117552512} +{"current_steps": 37355, "total_steps": 78105, "loss": 0.2537, "lr": 3.119612022283889e-06, "epoch": 2.391332181038346, "percentage": 47.83, "elapsed_time": "1:38:16", "remaining_time": "1:47:12", "throughput": 19938.0, "total_tokens": 117567424} +{"current_steps": 37360, "total_steps": 78105, "loss": 0.28, "lr": 3.1190707845643605e-06, "epoch": 2.391652262979323, "percentage": 47.83, "elapsed_time": "1:38:17", "remaining_time": "1:47:11", "throughput": 19938.37, "total_tokens": 117583424} +{"current_steps": 37365, "total_steps": 78105, "loss": 0.1772, "lr": 3.1185295159316608e-06, "epoch": 2.3919723449202994, "percentage": 47.84, "elapsed_time": "1:38:18", "remaining_time": "1:47:10", "throughput": 19938.76, "total_tokens": 117599424} +{"current_steps": 37370, "total_steps": 78105, "loss": 0.2561, "lr": 3.1179882164128202e-06, "epoch": 2.3922924268612764, "percentage": 47.85, "elapsed_time": "1:38:18", "remaining_time": "1:47:09", "throughput": 19939.11, "total_tokens": 117614720} +{"current_steps": 37375, "total_steps": 78105, "loss": 0.177, "lr": 3.117446886034866e-06, "epoch": 2.3926125088022534, "percentage": 47.85, "elapsed_time": "1:38:19", "remaining_time": "1:47:08", "throughput": 19939.42, "total_tokens": 117629504} +{"current_steps": 37380, "total_steps": 78105, "loss": 0.1672, "lr": 3.116905524824831e-06, "epoch": 2.3929325907432304, "percentage": 47.86, "elapsed_time": "1:38:20", "remaining_time": "1:47:08", "throughput": 19939.8, "total_tokens": 117645760} +{"current_steps": 37385, "total_steps": 78105, "loss": 0.2088, "lr": 3.1163641328097472e-06, "epoch": 2.3932526726842074, "percentage": 47.87, "elapsed_time": "1:38:20", "remaining_time": "1:47:07", "throughput": 19940.13, "total_tokens": 117660480} +{"current_steps": 37390, "total_steps": 78105, "loss": 0.265, "lr": 3.115822710016649e-06, "epoch": 2.393572754625184, "percentage": 47.87, "elapsed_time": "1:38:21", "remaining_time": "1:47:06", "throughput": 19940.5, "total_tokens": 117676224} +{"current_steps": 37395, "total_steps": 78105, "loss": 0.3185, "lr": 3.115281256472573e-06, "epoch": 2.393892836566161, "percentage": 47.88, "elapsed_time": "1:38:22", "remaining_time": "1:47:05", "throughput": 19940.82, "total_tokens": 117690880} +{"current_steps": 37400, "total_steps": 78105, "loss": 0.2079, "lr": 3.1147397722045558e-06, "epoch": 2.394212918507138, "percentage": 47.88, "elapsed_time": "1:38:22", "remaining_time": "1:47:04", "throughput": 19941.16, "total_tokens": 117706240} +{"current_steps": 37405, "total_steps": 78105, "loss": 0.2328, "lr": 3.1141982572396355e-06, "epoch": 2.394533000448115, "percentage": 47.89, "elapsed_time": "1:38:23", "remaining_time": "1:47:03", "throughput": 19941.49, "total_tokens": 117721536} +{"current_steps": 37410, "total_steps": 78105, "loss": 0.273, "lr": 3.1136567116048545e-06, "epoch": 2.3948530823890914, "percentage": 47.9, "elapsed_time": "1:38:23", "remaining_time": "1:47:02", "throughput": 19941.78, "total_tokens": 117735936} +{"current_steps": 37415, "total_steps": 78105, "loss": 0.2035, "lr": 3.1131151353272527e-06, "epoch": 2.3951731643300684, "percentage": 47.9, "elapsed_time": "1:38:24", "remaining_time": "1:47:01", "throughput": 19942.23, "total_tokens": 117752384} +{"current_steps": 37420, "total_steps": 78105, "loss": 0.157, "lr": 3.1125735284338754e-06, "epoch": 2.3954932462710454, "percentage": 47.91, "elapsed_time": "1:38:25", "remaining_time": "1:47:00", "throughput": 19942.62, "total_tokens": 117768128} +{"current_steps": 37425, "total_steps": 78105, "loss": 0.2292, "lr": 3.112031890951767e-06, "epoch": 2.3958133282120224, "percentage": 47.92, "elapsed_time": "1:38:26", "remaining_time": "1:46:59", "throughput": 19943.02, "total_tokens": 117784128} +{"current_steps": 37430, "total_steps": 78105, "loss": 0.1662, "lr": 3.1114902229079737e-06, "epoch": 2.3961334101529994, "percentage": 47.92, "elapsed_time": "1:38:26", "remaining_time": "1:46:58", "throughput": 19943.32, "total_tokens": 117798848} +{"current_steps": 37435, "total_steps": 78105, "loss": 0.195, "lr": 3.110948524329544e-06, "epoch": 2.396453492093976, "percentage": 47.93, "elapsed_time": "1:38:27", "remaining_time": "1:46:57", "throughput": 19943.68, "total_tokens": 117814720} +{"current_steps": 37440, "total_steps": 78105, "loss": 0.225, "lr": 3.1104067952435267e-06, "epoch": 2.396773574034953, "percentage": 47.94, "elapsed_time": "1:38:28", "remaining_time": "1:46:56", "throughput": 19944.02, "total_tokens": 117830016} +{"current_steps": 37445, "total_steps": 78105, "loss": 0.1745, "lr": 3.1098650356769745e-06, "epoch": 2.39709365597593, "percentage": 47.94, "elapsed_time": "1:38:28", "remaining_time": "1:46:56", "throughput": 19944.44, "total_tokens": 117846272} +{"current_steps": 37450, "total_steps": 78105, "loss": 0.1957, "lr": 3.1093232456569384e-06, "epoch": 2.3974137379169065, "percentage": 47.95, "elapsed_time": "1:38:29", "remaining_time": "1:46:55", "throughput": 19944.74, "total_tokens": 117860736} +{"current_steps": 37455, "total_steps": 78105, "loss": 0.2226, "lr": 3.108781425210473e-06, "epoch": 2.3977338198578835, "percentage": 47.95, "elapsed_time": "1:38:29", "remaining_time": "1:46:54", "throughput": 19945.02, "total_tokens": 117875008} +{"current_steps": 37460, "total_steps": 78105, "loss": 0.2238, "lr": 3.1082395743646353e-06, "epoch": 2.3980539017988605, "percentage": 47.96, "elapsed_time": "1:38:30", "remaining_time": "1:46:53", "throughput": 19945.55, "total_tokens": 117892736} +{"current_steps": 37465, "total_steps": 78105, "loss": 0.2795, "lr": 3.1076976931464803e-06, "epoch": 2.3983739837398375, "percentage": 47.97, "elapsed_time": "1:38:31", "remaining_time": "1:46:52", "throughput": 19945.93, "total_tokens": 117908352} +{"current_steps": 37470, "total_steps": 78105, "loss": 0.1588, "lr": 3.107155781583068e-06, "epoch": 2.3986940656808144, "percentage": 47.97, "elapsed_time": "1:38:32", "remaining_time": "1:46:51", "throughput": 19946.34, "total_tokens": 117924672} +{"current_steps": 37475, "total_steps": 78105, "loss": 0.1354, "lr": 3.106613839701459e-06, "epoch": 2.399014147621791, "percentage": 47.98, "elapsed_time": "1:38:32", "remaining_time": "1:46:50", "throughput": 19946.7, "total_tokens": 117940160} +{"current_steps": 37480, "total_steps": 78105, "loss": 0.2088, "lr": 3.106071867528715e-06, "epoch": 2.399334229562768, "percentage": 47.99, "elapsed_time": "1:38:33", "remaining_time": "1:46:49", "throughput": 19947.04, "total_tokens": 117955136} +{"current_steps": 37485, "total_steps": 78105, "loss": 0.2431, "lr": 3.105529865091898e-06, "epoch": 2.399654311503745, "percentage": 47.99, "elapsed_time": "1:38:34", "remaining_time": "1:46:48", "throughput": 19947.36, "total_tokens": 117969856} +{"current_steps": 37490, "total_steps": 78105, "loss": 0.1642, "lr": 3.1049878324180745e-06, "epoch": 2.399974393444722, "percentage": 48.0, "elapsed_time": "1:38:34", "remaining_time": "1:46:47", "throughput": 19947.66, "total_tokens": 117984704} +{"current_steps": 37495, "total_steps": 78105, "loss": 0.2134, "lr": 3.104445769534309e-06, "epoch": 2.4002944753856985, "percentage": 48.01, "elapsed_time": "1:38:35", "remaining_time": "1:46:46", "throughput": 19948.02, "total_tokens": 118000128} +{"current_steps": 37500, "total_steps": 78105, "loss": 0.1757, "lr": 3.1039036764676704e-06, "epoch": 2.4006145573266755, "percentage": 48.01, "elapsed_time": "1:38:36", "remaining_time": "1:46:45", "throughput": 19948.45, "total_tokens": 118016640} +{"current_steps": 37505, "total_steps": 78105, "loss": 0.3412, "lr": 3.1033615532452283e-06, "epoch": 2.4009346392676525, "percentage": 48.02, "elapsed_time": "1:38:36", "remaining_time": "1:46:45", "throughput": 19948.79, "total_tokens": 118031872} +{"current_steps": 37510, "total_steps": 78105, "loss": 0.2326, "lr": 3.1028193998940525e-06, "epoch": 2.4012547212086295, "percentage": 48.03, "elapsed_time": "1:38:37", "remaining_time": "1:46:44", "throughput": 19949.11, "total_tokens": 118046848} +{"current_steps": 37515, "total_steps": 78105, "loss": 0.2026, "lr": 3.1022772164412168e-06, "epoch": 2.4015748031496065, "percentage": 48.03, "elapsed_time": "1:38:38", "remaining_time": "1:46:43", "throughput": 19949.51, "total_tokens": 118062912} +{"current_steps": 37520, "total_steps": 78105, "loss": 0.214, "lr": 3.101735002913793e-06, "epoch": 2.401894885090583, "percentage": 48.04, "elapsed_time": "1:38:38", "remaining_time": "1:46:42", "throughput": 19949.99, "total_tokens": 118080128} +{"current_steps": 37525, "total_steps": 78105, "loss": 0.2351, "lr": 3.101192759338858e-06, "epoch": 2.40221496703156, "percentage": 48.04, "elapsed_time": "1:38:39", "remaining_time": "1:46:41", "throughput": 19950.36, "total_tokens": 118096064} +{"current_steps": 37530, "total_steps": 78105, "loss": 0.1285, "lr": 3.100650485743487e-06, "epoch": 2.402535048972537, "percentage": 48.05, "elapsed_time": "1:38:40", "remaining_time": "1:46:40", "throughput": 19950.84, "total_tokens": 118113152} +{"current_steps": 37535, "total_steps": 78105, "loss": 0.3104, "lr": 3.1001081821547605e-06, "epoch": 2.402855130913514, "percentage": 48.06, "elapsed_time": "1:38:40", "remaining_time": "1:46:39", "throughput": 19951.16, "total_tokens": 118128000} +{"current_steps": 37540, "total_steps": 78105, "loss": 0.2372, "lr": 3.0995658485997575e-06, "epoch": 2.4031752128544905, "percentage": 48.06, "elapsed_time": "1:38:41", "remaining_time": "1:46:38", "throughput": 19951.7, "total_tokens": 118145728} +{"current_steps": 37545, "total_steps": 78105, "loss": 0.1751, "lr": 3.099023485105559e-06, "epoch": 2.4034952947954675, "percentage": 48.07, "elapsed_time": "1:38:42", "remaining_time": "1:46:37", "throughput": 19952.06, "total_tokens": 118161408} +{"current_steps": 37550, "total_steps": 78105, "loss": 0.239, "lr": 3.0984810916992475e-06, "epoch": 2.4038153767364445, "percentage": 48.08, "elapsed_time": "1:38:42", "remaining_time": "1:46:36", "throughput": 19952.42, "total_tokens": 118176896} +{"current_steps": 37555, "total_steps": 78105, "loss": 0.2408, "lr": 3.097938668407907e-06, "epoch": 2.4041354586774215, "percentage": 48.08, "elapsed_time": "1:38:43", "remaining_time": "1:46:35", "throughput": 19952.71, "total_tokens": 118191360} +{"current_steps": 37560, "total_steps": 78105, "loss": 0.2011, "lr": 3.0973962152586247e-06, "epoch": 2.4044555406183985, "percentage": 48.09, "elapsed_time": "1:38:44", "remaining_time": "1:46:35", "throughput": 19953.14, "total_tokens": 118207936} +{"current_steps": 37565, "total_steps": 78105, "loss": 0.2181, "lr": 3.096853732278487e-06, "epoch": 2.404775622559375, "percentage": 48.1, "elapsed_time": "1:38:44", "remaining_time": "1:46:34", "throughput": 19953.51, "total_tokens": 118223744} +{"current_steps": 37570, "total_steps": 78105, "loss": 0.1763, "lr": 3.0963112194945834e-06, "epoch": 2.405095704500352, "percentage": 48.1, "elapsed_time": "1:38:45", "remaining_time": "1:46:33", "throughput": 19953.91, "total_tokens": 118240064} +{"current_steps": 37575, "total_steps": 78105, "loss": 0.1374, "lr": 3.095768676934003e-06, "epoch": 2.405415786441329, "percentage": 48.11, "elapsed_time": "1:38:46", "remaining_time": "1:46:32", "throughput": 19954.25, "total_tokens": 118255424} +{"current_steps": 37580, "total_steps": 78105, "loss": 0.1365, "lr": 3.0952261046238375e-06, "epoch": 2.405735868382306, "percentage": 48.11, "elapsed_time": "1:38:46", "remaining_time": "1:46:31", "throughput": 19954.59, "total_tokens": 118270464} +{"current_steps": 37585, "total_steps": 78105, "loss": 0.178, "lr": 3.094683502591181e-06, "epoch": 2.4060559503232826, "percentage": 48.12, "elapsed_time": "1:38:47", "remaining_time": "1:46:30", "throughput": 19954.95, "total_tokens": 118285888} +{"current_steps": 37590, "total_steps": 78105, "loss": 0.2249, "lr": 3.0941408708631282e-06, "epoch": 2.4063760322642596, "percentage": 48.13, "elapsed_time": "1:38:48", "remaining_time": "1:46:29", "throughput": 19955.46, "total_tokens": 118303616} +{"current_steps": 37595, "total_steps": 78105, "loss": 0.1757, "lr": 3.0935982094667754e-06, "epoch": 2.4066961142052365, "percentage": 48.13, "elapsed_time": "1:38:49", "remaining_time": "1:46:28", "throughput": 19955.86, "total_tokens": 118319936} +{"current_steps": 37600, "total_steps": 78105, "loss": 0.2439, "lr": 3.09305551842922e-06, "epoch": 2.4070161961462135, "percentage": 48.14, "elapsed_time": "1:38:49", "remaining_time": "1:46:27", "throughput": 19956.16, "total_tokens": 118334848} +{"current_steps": 37605, "total_steps": 78105, "loss": 0.2724, "lr": 3.0925127977775608e-06, "epoch": 2.4073362780871905, "percentage": 48.15, "elapsed_time": "1:38:50", "remaining_time": "1:46:26", "throughput": 19956.51, "total_tokens": 118350592} +{"current_steps": 37610, "total_steps": 78105, "loss": 0.2555, "lr": 3.0919700475388985e-06, "epoch": 2.407656360028167, "percentage": 48.15, "elapsed_time": "1:38:51", "remaining_time": "1:46:26", "throughput": 19956.97, "total_tokens": 118367488} +{"current_steps": 37615, "total_steps": 78105, "loss": 0.1574, "lr": 3.091427267740336e-06, "epoch": 2.407976441969144, "percentage": 48.16, "elapsed_time": "1:38:51", "remaining_time": "1:46:25", "throughput": 19957.32, "total_tokens": 118382720} +{"current_steps": 37620, "total_steps": 78105, "loss": 0.2141, "lr": 3.0908844584089764e-06, "epoch": 2.408296523910121, "percentage": 48.17, "elapsed_time": "1:38:52", "remaining_time": "1:46:24", "throughput": 19957.71, "total_tokens": 118399040} +{"current_steps": 37625, "total_steps": 78105, "loss": 0.1975, "lr": 3.090341619571925e-06, "epoch": 2.408616605851098, "percentage": 48.17, "elapsed_time": "1:38:53", "remaining_time": "1:46:23", "throughput": 19958.01, "total_tokens": 118413824} +{"current_steps": 37630, "total_steps": 78105, "loss": 0.2963, "lr": 3.089798751256288e-06, "epoch": 2.4089366877920746, "percentage": 48.18, "elapsed_time": "1:38:53", "remaining_time": "1:46:22", "throughput": 19958.42, "total_tokens": 118430208} +{"current_steps": 37635, "total_steps": 78105, "loss": 0.1646, "lr": 3.089255853489173e-06, "epoch": 2.4092567697330516, "percentage": 48.19, "elapsed_time": "1:38:54", "remaining_time": "1:46:21", "throughput": 19958.75, "total_tokens": 118445248} +{"current_steps": 37640, "total_steps": 78105, "loss": 0.1595, "lr": 3.08871292629769e-06, "epoch": 2.4095768516740286, "percentage": 48.19, "elapsed_time": "1:38:55", "remaining_time": "1:46:20", "throughput": 19959.37, "total_tokens": 118464896} +{"current_steps": 37645, "total_steps": 78105, "loss": 0.1514, "lr": 3.0881699697089494e-06, "epoch": 2.4098969336150056, "percentage": 48.2, "elapsed_time": "1:38:55", "remaining_time": "1:46:19", "throughput": 19959.74, "total_tokens": 118480768} +{"current_steps": 37650, "total_steps": 78105, "loss": 0.2708, "lr": 3.087626983750065e-06, "epoch": 2.4102170155559826, "percentage": 48.2, "elapsed_time": "1:38:56", "remaining_time": "1:46:18", "throughput": 19960.02, "total_tokens": 118495360} +{"current_steps": 37655, "total_steps": 78105, "loss": 0.2412, "lr": 3.0870839684481502e-06, "epoch": 2.410537097496959, "percentage": 48.21, "elapsed_time": "1:38:57", "remaining_time": "1:46:18", "throughput": 19960.36, "total_tokens": 118510848} +{"current_steps": 37660, "total_steps": 78105, "loss": 0.2465, "lr": 3.0865409238303196e-06, "epoch": 2.410857179437936, "percentage": 48.22, "elapsed_time": "1:38:57", "remaining_time": "1:46:17", "throughput": 19960.71, "total_tokens": 118526336} +{"current_steps": 37665, "total_steps": 78105, "loss": 0.1733, "lr": 3.085997849923691e-06, "epoch": 2.411177261378913, "percentage": 48.22, "elapsed_time": "1:38:58", "remaining_time": "1:46:16", "throughput": 19961.03, "total_tokens": 118541312} +{"current_steps": 37670, "total_steps": 78105, "loss": 0.2339, "lr": 3.085454746755381e-06, "epoch": 2.41149734331989, "percentage": 48.23, "elapsed_time": "1:38:59", "remaining_time": "1:46:15", "throughput": 19961.55, "total_tokens": 118558912} +{"current_steps": 37675, "total_steps": 78105, "loss": 0.3016, "lr": 3.084911614352511e-06, "epoch": 2.4118174252608666, "percentage": 48.24, "elapsed_time": "1:39:00", "remaining_time": "1:46:14", "throughput": 19961.87, "total_tokens": 118573952} +{"current_steps": 37680, "total_steps": 78105, "loss": 0.1975, "lr": 3.084368452742202e-06, "epoch": 2.4121375072018436, "percentage": 48.24, "elapsed_time": "1:39:00", "remaining_time": "1:46:13", "throughput": 19962.24, "total_tokens": 118589376} +{"current_steps": 37685, "total_steps": 78105, "loss": 0.1254, "lr": 3.0838252619515764e-06, "epoch": 2.4124575891428206, "percentage": 48.25, "elapsed_time": "1:39:01", "remaining_time": "1:46:12", "throughput": 19962.57, "total_tokens": 118604608} +{"current_steps": 37690, "total_steps": 78105, "loss": 0.2196, "lr": 3.0832820420077576e-06, "epoch": 2.4127776710837976, "percentage": 48.26, "elapsed_time": "1:39:01", "remaining_time": "1:46:11", "throughput": 19962.87, "total_tokens": 118619200} +{"current_steps": 37695, "total_steps": 78105, "loss": 0.1882, "lr": 3.0827387929378716e-06, "epoch": 2.4130977530247746, "percentage": 48.26, "elapsed_time": "1:39:02", "remaining_time": "1:46:10", "throughput": 19963.28, "total_tokens": 118635264} +{"current_steps": 37700, "total_steps": 78105, "loss": 0.3483, "lr": 3.082195514769046e-06, "epoch": 2.413417834965751, "percentage": 48.27, "elapsed_time": "1:39:03", "remaining_time": "1:46:09", "throughput": 19963.63, "total_tokens": 118651008} +{"current_steps": 37705, "total_steps": 78105, "loss": 0.2173, "lr": 3.0816522075284093e-06, "epoch": 2.413737916906728, "percentage": 48.27, "elapsed_time": "1:39:04", "remaining_time": "1:46:08", "throughput": 19964.05, "total_tokens": 118667328} +{"current_steps": 37710, "total_steps": 78105, "loss": 0.2104, "lr": 3.081108871243091e-06, "epoch": 2.414057998847705, "percentage": 48.28, "elapsed_time": "1:39:04", "remaining_time": "1:46:07", "throughput": 19964.39, "total_tokens": 118682560} +{"current_steps": 37715, "total_steps": 78105, "loss": 0.1763, "lr": 3.080565505940223e-06, "epoch": 2.4143780807886817, "percentage": 48.29, "elapsed_time": "1:39:05", "remaining_time": "1:46:07", "throughput": 19964.82, "total_tokens": 118699136} +{"current_steps": 37720, "total_steps": 78105, "loss": 0.2768, "lr": 3.080022111646937e-06, "epoch": 2.4146981627296586, "percentage": 48.29, "elapsed_time": "1:39:06", "remaining_time": "1:46:06", "throughput": 19965.25, "total_tokens": 118715712} +{"current_steps": 37725, "total_steps": 78105, "loss": 0.2486, "lr": 3.0794786883903675e-06, "epoch": 2.4150182446706356, "percentage": 48.3, "elapsed_time": "1:39:06", "remaining_time": "1:46:05", "throughput": 19965.59, "total_tokens": 118731072} +{"current_steps": 37730, "total_steps": 78105, "loss": 0.3058, "lr": 3.0789352361976515e-06, "epoch": 2.4153383266116126, "percentage": 48.31, "elapsed_time": "1:39:07", "remaining_time": "1:46:04", "throughput": 19965.99, "total_tokens": 118747008} +{"current_steps": 37735, "total_steps": 78105, "loss": 0.3504, "lr": 3.078391755095925e-06, "epoch": 2.4156584085525896, "percentage": 48.31, "elapsed_time": "1:39:08", "remaining_time": "1:46:03", "throughput": 19966.33, "total_tokens": 118762368} +{"current_steps": 37740, "total_steps": 78105, "loss": 0.1283, "lr": 3.0778482451123276e-06, "epoch": 2.415978490493566, "percentage": 48.32, "elapsed_time": "1:39:08", "remaining_time": "1:46:02", "throughput": 19966.74, "total_tokens": 118778688} +{"current_steps": 37745, "total_steps": 78105, "loss": 0.2192, "lr": 3.0773047062739987e-06, "epoch": 2.416298572434543, "percentage": 48.33, "elapsed_time": "1:39:09", "remaining_time": "1:46:01", "throughput": 19967.09, "total_tokens": 118794496} +{"current_steps": 37750, "total_steps": 78105, "loss": 0.3876, "lr": 3.076761138608079e-06, "epoch": 2.41661865437552, "percentage": 48.33, "elapsed_time": "1:39:10", "remaining_time": "1:46:00", "throughput": 19967.39, "total_tokens": 118809088} +{"current_steps": 37755, "total_steps": 78105, "loss": 0.1543, "lr": 3.076217542141713e-06, "epoch": 2.416938736316497, "percentage": 48.34, "elapsed_time": "1:39:10", "remaining_time": "1:45:59", "throughput": 19967.76, "total_tokens": 118824704} +{"current_steps": 37760, "total_steps": 78105, "loss": 0.21, "lr": 3.0756739169020434e-06, "epoch": 2.4172588182574737, "percentage": 48.35, "elapsed_time": "1:39:11", "remaining_time": "1:45:58", "throughput": 19968.15, "total_tokens": 118840704} +{"current_steps": 37765, "total_steps": 78105, "loss": 0.3376, "lr": 3.0751302629162176e-06, "epoch": 2.4175789001984507, "percentage": 48.35, "elapsed_time": "1:39:12", "remaining_time": "1:45:58", "throughput": 19968.57, "total_tokens": 118857088} +{"current_steps": 37770, "total_steps": 78105, "loss": 0.206, "lr": 3.0745865802113828e-06, "epoch": 2.4178989821394277, "percentage": 48.36, "elapsed_time": "1:39:12", "remaining_time": "1:45:57", "throughput": 19968.98, "total_tokens": 118873344} +{"current_steps": 37775, "total_steps": 78105, "loss": 0.2648, "lr": 3.0740428688146862e-06, "epoch": 2.4182190640804047, "percentage": 48.36, "elapsed_time": "1:39:13", "remaining_time": "1:45:56", "throughput": 19969.38, "total_tokens": 118889600} +{"current_steps": 37780, "total_steps": 78105, "loss": 0.2679, "lr": 3.0734991287532796e-06, "epoch": 2.4185391460213816, "percentage": 48.37, "elapsed_time": "1:39:14", "remaining_time": "1:45:55", "throughput": 19969.75, "total_tokens": 118905408} +{"current_steps": 37785, "total_steps": 78105, "loss": 0.233, "lr": 3.072955360054313e-06, "epoch": 2.418859227962358, "percentage": 48.38, "elapsed_time": "1:39:14", "remaining_time": "1:45:54", "throughput": 19970.14, "total_tokens": 118921408} +{"current_steps": 37790, "total_steps": 78105, "loss": 0.2814, "lr": 3.0724115627449403e-06, "epoch": 2.419179309903335, "percentage": 48.38, "elapsed_time": "1:39:15", "remaining_time": "1:45:53", "throughput": 19970.65, "total_tokens": 118939136} +{"current_steps": 37795, "total_steps": 78105, "loss": 0.2321, "lr": 3.0718677368523164e-06, "epoch": 2.419499391844312, "percentage": 48.39, "elapsed_time": "1:39:16", "remaining_time": "1:45:52", "throughput": 19971.05, "total_tokens": 118955200} +{"current_steps": 37800, "total_steps": 78105, "loss": 0.196, "lr": 3.0713238824035957e-06, "epoch": 2.419819473785289, "percentage": 48.4, "elapsed_time": "1:39:17", "remaining_time": "1:45:51", "throughput": 19971.43, "total_tokens": 118970624} +{"current_steps": 37805, "total_steps": 78105, "loss": 0.1438, "lr": 3.0707799994259367e-06, "epoch": 2.4201395557262657, "percentage": 48.4, "elapsed_time": "1:39:17", "remaining_time": "1:45:50", "throughput": 19971.84, "total_tokens": 118987328} +{"current_steps": 37810, "total_steps": 78105, "loss": 0.1631, "lr": 3.070236087946497e-06, "epoch": 2.4204596376672427, "percentage": 48.41, "elapsed_time": "1:39:18", "remaining_time": "1:45:50", "throughput": 19972.18, "total_tokens": 119002624} +{"current_steps": 37815, "total_steps": 78105, "loss": 0.3156, "lr": 3.0696921479924376e-06, "epoch": 2.4207797196082197, "percentage": 48.42, "elapsed_time": "1:39:19", "remaining_time": "1:45:49", "throughput": 19972.54, "total_tokens": 119018240} +{"current_steps": 37820, "total_steps": 78105, "loss": 0.1863, "lr": 3.06914817959092e-06, "epoch": 2.4210998015491967, "percentage": 48.42, "elapsed_time": "1:39:19", "remaining_time": "1:45:48", "throughput": 19972.96, "total_tokens": 119034560} +{"current_steps": 37825, "total_steps": 78105, "loss": 0.2396, "lr": 3.0686041827691066e-06, "epoch": 2.4214198834901737, "percentage": 48.43, "elapsed_time": "1:39:20", "remaining_time": "1:45:47", "throughput": 19973.43, "total_tokens": 119051712} +{"current_steps": 37830, "total_steps": 78105, "loss": 0.1676, "lr": 3.0680601575541625e-06, "epoch": 2.42173996543115, "percentage": 48.43, "elapsed_time": "1:39:21", "remaining_time": "1:45:46", "throughput": 19973.86, "total_tokens": 119067968} +{"current_steps": 37835, "total_steps": 78105, "loss": 0.2777, "lr": 3.0675161039732526e-06, "epoch": 2.422060047372127, "percentage": 48.44, "elapsed_time": "1:39:21", "remaining_time": "1:45:45", "throughput": 19974.2, "total_tokens": 119083200} +{"current_steps": 37840, "total_steps": 78105, "loss": 0.1946, "lr": 3.0669720220535447e-06, "epoch": 2.422380129313104, "percentage": 48.45, "elapsed_time": "1:39:22", "remaining_time": "1:45:44", "throughput": 19974.45, "total_tokens": 119097408} +{"current_steps": 37845, "total_steps": 78105, "loss": 0.2237, "lr": 3.0664279118222074e-06, "epoch": 2.422700211254081, "percentage": 48.45, "elapsed_time": "1:39:23", "remaining_time": "1:45:43", "throughput": 19974.76, "total_tokens": 119112448} +{"current_steps": 37850, "total_steps": 78105, "loss": 0.1967, "lr": 3.065883773306411e-06, "epoch": 2.4230202931950577, "percentage": 48.46, "elapsed_time": "1:39:23", "remaining_time": "1:45:42", "throughput": 19975.12, "total_tokens": 119127680} +{"current_steps": 37855, "total_steps": 78105, "loss": 0.1469, "lr": 3.065339606533326e-06, "epoch": 2.4233403751360347, "percentage": 48.47, "elapsed_time": "1:39:24", "remaining_time": "1:45:41", "throughput": 19975.49, "total_tokens": 119143232} +{"current_steps": 37860, "total_steps": 78105, "loss": 0.2169, "lr": 3.064795411530126e-06, "epoch": 2.4236604570770117, "percentage": 48.47, "elapsed_time": "1:39:25", "remaining_time": "1:45:40", "throughput": 19975.82, "total_tokens": 119158400} +{"current_steps": 37865, "total_steps": 78105, "loss": 0.164, "lr": 3.0642511883239846e-06, "epoch": 2.4239805390179887, "percentage": 48.48, "elapsed_time": "1:39:25", "remaining_time": "1:45:39", "throughput": 19976.16, "total_tokens": 119173696} +{"current_steps": 37870, "total_steps": 78105, "loss": 0.3497, "lr": 3.063706936942078e-06, "epoch": 2.4243006209589657, "percentage": 48.49, "elapsed_time": "1:39:26", "remaining_time": "1:45:39", "throughput": 19976.55, "total_tokens": 119189568} +{"current_steps": 37875, "total_steps": 78105, "loss": 0.2508, "lr": 3.063162657411583e-06, "epoch": 2.4246207028999422, "percentage": 48.49, "elapsed_time": "1:39:27", "remaining_time": "1:45:38", "throughput": 19976.89, "total_tokens": 119204992} +{"current_steps": 37880, "total_steps": 78105, "loss": 0.2146, "lr": 3.062618349759679e-06, "epoch": 2.4249407848409192, "percentage": 48.5, "elapsed_time": "1:39:27", "remaining_time": "1:45:37", "throughput": 19977.24, "total_tokens": 119220224} +{"current_steps": 37885, "total_steps": 78105, "loss": 0.2724, "lr": 3.062074014013545e-06, "epoch": 2.4252608667818962, "percentage": 48.51, "elapsed_time": "1:39:28", "remaining_time": "1:45:36", "throughput": 19977.51, "total_tokens": 119234432} +{"current_steps": 37890, "total_steps": 78105, "loss": 0.1117, "lr": 3.0615296502003627e-06, "epoch": 2.425580948722873, "percentage": 48.51, "elapsed_time": "1:39:29", "remaining_time": "1:45:35", "throughput": 19977.82, "total_tokens": 119249088} +{"current_steps": 37895, "total_steps": 78105, "loss": 0.1979, "lr": 3.060985258347314e-06, "epoch": 2.4259010306638498, "percentage": 48.52, "elapsed_time": "1:39:29", "remaining_time": "1:45:34", "throughput": 19978.15, "total_tokens": 119264064} +{"current_steps": 37900, "total_steps": 78105, "loss": 0.2217, "lr": 3.0604408384815836e-06, "epoch": 2.4262211126048268, "percentage": 48.52, "elapsed_time": "1:39:30", "remaining_time": "1:45:33", "throughput": 19978.45, "total_tokens": 119278592} +{"current_steps": 37905, "total_steps": 78105, "loss": 0.2541, "lr": 3.0598963906303576e-06, "epoch": 2.4265411945458037, "percentage": 48.53, "elapsed_time": "1:39:31", "remaining_time": "1:45:32", "throughput": 19978.74, "total_tokens": 119293440} +{"current_steps": 37910, "total_steps": 78105, "loss": 0.2013, "lr": 3.0593519148208217e-06, "epoch": 2.4268612764867807, "percentage": 48.54, "elapsed_time": "1:39:31", "remaining_time": "1:45:31", "throughput": 19979.07, "total_tokens": 119308544} +{"current_steps": 37915, "total_steps": 78105, "loss": 0.2045, "lr": 3.058807411080165e-06, "epoch": 2.4271813584277577, "percentage": 48.54, "elapsed_time": "1:39:32", "remaining_time": "1:45:30", "throughput": 19979.49, "total_tokens": 119324928} +{"current_steps": 37920, "total_steps": 78105, "loss": 0.3166, "lr": 3.0582628794355775e-06, "epoch": 2.4275014403687343, "percentage": 48.55, "elapsed_time": "1:39:33", "remaining_time": "1:45:29", "throughput": 19979.8, "total_tokens": 119339712} +{"current_steps": 37925, "total_steps": 78105, "loss": 0.2321, "lr": 3.057718319914248e-06, "epoch": 2.4278215223097113, "percentage": 48.56, "elapsed_time": "1:39:33", "remaining_time": "1:45:28", "throughput": 19980.21, "total_tokens": 119356096} +{"current_steps": 37930, "total_steps": 78105, "loss": 0.1764, "lr": 3.057173732543372e-06, "epoch": 2.4281416042506883, "percentage": 48.56, "elapsed_time": "1:39:34", "remaining_time": "1:45:28", "throughput": 19980.67, "total_tokens": 119373056} +{"current_steps": 37935, "total_steps": 78105, "loss": 0.2173, "lr": 3.056629117350141e-06, "epoch": 2.4284616861916652, "percentage": 48.57, "elapsed_time": "1:39:35", "remaining_time": "1:45:27", "throughput": 19981.04, "total_tokens": 119388672} +{"current_steps": 37940, "total_steps": 78105, "loss": 0.3087, "lr": 3.056084474361752e-06, "epoch": 2.428781768132642, "percentage": 48.58, "elapsed_time": "1:39:35", "remaining_time": "1:45:26", "throughput": 19981.37, "total_tokens": 119404032} +{"current_steps": 37945, "total_steps": 78105, "loss": 0.1567, "lr": 3.0555398036054006e-06, "epoch": 2.429101850073619, "percentage": 48.58, "elapsed_time": "1:39:36", "remaining_time": "1:45:25", "throughput": 19981.63, "total_tokens": 119418368} +{"current_steps": 37950, "total_steps": 78105, "loss": 0.2722, "lr": 3.054995105108286e-06, "epoch": 2.4294219320145958, "percentage": 48.59, "elapsed_time": "1:39:37", "remaining_time": "1:45:24", "throughput": 19981.94, "total_tokens": 119433088} +{"current_steps": 37955, "total_steps": 78105, "loss": 0.2747, "lr": 3.0544503788976054e-06, "epoch": 2.4297420139555728, "percentage": 48.59, "elapsed_time": "1:39:37", "remaining_time": "1:45:23", "throughput": 19982.28, "total_tokens": 119448320} +{"current_steps": 37960, "total_steps": 78105, "loss": 0.2972, "lr": 3.0539056250005617e-06, "epoch": 2.4300620958965498, "percentage": 48.6, "elapsed_time": "1:39:38", "remaining_time": "1:45:22", "throughput": 19982.64, "total_tokens": 119463872} +{"current_steps": 37965, "total_steps": 78105, "loss": 0.1852, "lr": 3.0533608434443562e-06, "epoch": 2.4303821778375263, "percentage": 48.61, "elapsed_time": "1:39:39", "remaining_time": "1:45:21", "throughput": 19983.05, "total_tokens": 119480064} +{"current_steps": 37970, "total_steps": 78105, "loss": 0.1684, "lr": 3.0528160342561925e-06, "epoch": 2.4307022597785033, "percentage": 48.61, "elapsed_time": "1:39:39", "remaining_time": "1:45:20", "throughput": 19983.36, "total_tokens": 119495168} +{"current_steps": 37975, "total_steps": 78105, "loss": 0.2119, "lr": 3.0522711974632758e-06, "epoch": 2.4310223417194803, "percentage": 48.62, "elapsed_time": "1:39:40", "remaining_time": "1:45:19", "throughput": 19983.71, "total_tokens": 119510720} +{"current_steps": 37980, "total_steps": 78105, "loss": 0.2815, "lr": 3.0517263330928116e-06, "epoch": 2.431342423660457, "percentage": 48.63, "elapsed_time": "1:39:41", "remaining_time": "1:45:18", "throughput": 19984.08, "total_tokens": 119526336} +{"current_steps": 37985, "total_steps": 78105, "loss": 0.2965, "lr": 3.0511814411720083e-06, "epoch": 2.431662505601434, "percentage": 48.63, "elapsed_time": "1:39:41", "remaining_time": "1:45:17", "throughput": 19984.44, "total_tokens": 119541952} +{"current_steps": 37990, "total_steps": 78105, "loss": 0.2018, "lr": 3.0506365217280742e-06, "epoch": 2.431982587542411, "percentage": 48.64, "elapsed_time": "1:39:42", "remaining_time": "1:45:17", "throughput": 19984.77, "total_tokens": 119557248} +{"current_steps": 37995, "total_steps": 78105, "loss": 0.2062, "lr": 3.0500915747882213e-06, "epoch": 2.432302669483388, "percentage": 48.65, "elapsed_time": "1:39:43", "remaining_time": "1:45:16", "throughput": 19985.02, "total_tokens": 119571456} +{"current_steps": 38000, "total_steps": 78105, "loss": 0.1656, "lr": 3.0495466003796603e-06, "epoch": 2.432622751424365, "percentage": 48.65, "elapsed_time": "1:39:43", "remaining_time": "1:45:15", "throughput": 19985.38, "total_tokens": 119587456} +{"current_steps": 38005, "total_steps": 78105, "loss": 0.354, "lr": 3.0490015985296053e-06, "epoch": 2.4329428333653413, "percentage": 48.66, "elapsed_time": "1:39:44", "remaining_time": "1:45:14", "throughput": 19985.74, "total_tokens": 119603264} +{"current_steps": 38010, "total_steps": 78105, "loss": 0.1856, "lr": 3.048456569265269e-06, "epoch": 2.4332629153063183, "percentage": 48.67, "elapsed_time": "1:39:45", "remaining_time": "1:45:13", "throughput": 19986.04, "total_tokens": 119618048} +{"current_steps": 38015, "total_steps": 78105, "loss": 0.3026, "lr": 3.047911512613869e-06, "epoch": 2.4335829972472953, "percentage": 48.67, "elapsed_time": "1:39:45", "remaining_time": "1:45:12", "throughput": 19986.44, "total_tokens": 119634432} +{"current_steps": 38020, "total_steps": 78105, "loss": 0.2709, "lr": 3.0473664286026216e-06, "epoch": 2.4339030791882723, "percentage": 48.68, "elapsed_time": "1:39:46", "remaining_time": "1:45:11", "throughput": 19986.79, "total_tokens": 119649984} +{"current_steps": 38025, "total_steps": 78105, "loss": 0.233, "lr": 3.0468213172587465e-06, "epoch": 2.434223161129249, "percentage": 48.68, "elapsed_time": "1:39:47", "remaining_time": "1:45:10", "throughput": 19987.17, "total_tokens": 119666176} +{"current_steps": 38030, "total_steps": 78105, "loss": 0.2561, "lr": 3.0462761786094624e-06, "epoch": 2.434543243070226, "percentage": 48.69, "elapsed_time": "1:39:47", "remaining_time": "1:45:09", "throughput": 19987.43, "total_tokens": 119680384} +{"current_steps": 38035, "total_steps": 78105, "loss": 0.2369, "lr": 3.0457310126819917e-06, "epoch": 2.434863325011203, "percentage": 48.7, "elapsed_time": "1:39:48", "remaining_time": "1:45:08", "throughput": 19987.77, "total_tokens": 119696000} +{"current_steps": 38040, "total_steps": 78105, "loss": 0.4032, "lr": 3.0451858195035564e-06, "epoch": 2.43518340695218, "percentage": 48.7, "elapsed_time": "1:39:49", "remaining_time": "1:45:07", "throughput": 19988.18, "total_tokens": 119712192} +{"current_steps": 38045, "total_steps": 78105, "loss": 0.1391, "lr": 3.044640599101382e-06, "epoch": 2.435503488893157, "percentage": 48.71, "elapsed_time": "1:39:49", "remaining_time": "1:45:07", "throughput": 19988.7, "total_tokens": 119730368} +{"current_steps": 38050, "total_steps": 78105, "loss": 0.224, "lr": 3.0440953515026916e-06, "epoch": 2.4358235708341334, "percentage": 48.72, "elapsed_time": "1:39:50", "remaining_time": "1:45:06", "throughput": 19989.13, "total_tokens": 119747136} +{"current_steps": 38055, "total_steps": 78105, "loss": 0.2364, "lr": 3.043550076734715e-06, "epoch": 2.4361436527751104, "percentage": 48.72, "elapsed_time": "1:39:51", "remaining_time": "1:45:05", "throughput": 19989.53, "total_tokens": 119763392} +{"current_steps": 38060, "total_steps": 78105, "loss": 0.2292, "lr": 3.043004774824678e-06, "epoch": 2.4364637347160873, "percentage": 48.73, "elapsed_time": "1:39:51", "remaining_time": "1:45:04", "throughput": 19989.87, "total_tokens": 119778944} +{"current_steps": 38065, "total_steps": 78105, "loss": 0.159, "lr": 3.0424594457998115e-06, "epoch": 2.4367838166570643, "percentage": 48.74, "elapsed_time": "1:39:52", "remaining_time": "1:45:03", "throughput": 19990.27, "total_tokens": 119795520} +{"current_steps": 38070, "total_steps": 78105, "loss": 0.1652, "lr": 3.0419140896873446e-06, "epoch": 2.437103898598041, "percentage": 48.74, "elapsed_time": "1:39:53", "remaining_time": "1:45:02", "throughput": 19990.59, "total_tokens": 119810688} +{"current_steps": 38075, "total_steps": 78105, "loss": 0.3335, "lr": 3.041368706514512e-06, "epoch": 2.437423980539018, "percentage": 48.75, "elapsed_time": "1:39:53", "remaining_time": "1:45:01", "throughput": 19990.88, "total_tokens": 119825280} +{"current_steps": 38080, "total_steps": 78105, "loss": 0.3407, "lr": 3.040823296308546e-06, "epoch": 2.437744062479995, "percentage": 48.75, "elapsed_time": "1:39:54", "remaining_time": "1:45:00", "throughput": 19991.2, "total_tokens": 119840384} +{"current_steps": 38085, "total_steps": 78105, "loss": 0.2037, "lr": 3.0402778590966814e-06, "epoch": 2.438064144420972, "percentage": 48.76, "elapsed_time": "1:39:55", "remaining_time": "1:44:59", "throughput": 19991.5, "total_tokens": 119855168} +{"current_steps": 38090, "total_steps": 78105, "loss": 0.2166, "lr": 3.039732394906155e-06, "epoch": 2.438384226361949, "percentage": 48.77, "elapsed_time": "1:39:55", "remaining_time": "1:44:58", "throughput": 19991.84, "total_tokens": 119870464} +{"current_steps": 38095, "total_steps": 78105, "loss": 0.1906, "lr": 3.039186903764203e-06, "epoch": 2.4387043083029254, "percentage": 48.77, "elapsed_time": "1:39:56", "remaining_time": "1:44:58", "throughput": 19992.15, "total_tokens": 119885696} +{"current_steps": 38100, "total_steps": 78105, "loss": 0.2295, "lr": 3.0386413856980666e-06, "epoch": 2.4390243902439024, "percentage": 48.78, "elapsed_time": "1:39:57", "remaining_time": "1:44:57", "throughput": 19992.53, "total_tokens": 119901312} +{"current_steps": 38105, "total_steps": 78105, "loss": 0.249, "lr": 3.038095840734985e-06, "epoch": 2.4393444721848794, "percentage": 48.79, "elapsed_time": "1:39:57", "remaining_time": "1:44:56", "throughput": 19992.89, "total_tokens": 119916992} +{"current_steps": 38110, "total_steps": 78105, "loss": 0.2908, "lr": 3.037550268902199e-06, "epoch": 2.4396645541258564, "percentage": 48.79, "elapsed_time": "1:39:58", "remaining_time": "1:44:55", "throughput": 19993.28, "total_tokens": 119932928} +{"current_steps": 38115, "total_steps": 78105, "loss": 0.2406, "lr": 3.037004670226954e-06, "epoch": 2.439984636066833, "percentage": 48.8, "elapsed_time": "1:39:59", "remaining_time": "1:44:54", "throughput": 19993.67, "total_tokens": 119948992} +{"current_steps": 38120, "total_steps": 78105, "loss": 0.2393, "lr": 3.0364590447364923e-06, "epoch": 2.44030471800781, "percentage": 48.81, "elapsed_time": "1:39:59", "remaining_time": "1:44:53", "throughput": 19993.96, "total_tokens": 119963712} +{"current_steps": 38125, "total_steps": 78105, "loss": 0.1609, "lr": 3.03591339245806e-06, "epoch": 2.440624799948787, "percentage": 48.81, "elapsed_time": "1:40:00", "remaining_time": "1:44:52", "throughput": 19994.27, "total_tokens": 119978752} +{"current_steps": 38130, "total_steps": 78105, "loss": 0.2331, "lr": 3.0353677134189042e-06, "epoch": 2.440944881889764, "percentage": 48.82, "elapsed_time": "1:40:01", "remaining_time": "1:44:51", "throughput": 19994.62, "total_tokens": 119994432} +{"current_steps": 38135, "total_steps": 78105, "loss": 0.2935, "lr": 3.0348220076462743e-06, "epoch": 2.441264963830741, "percentage": 48.83, "elapsed_time": "1:40:02", "remaining_time": "1:44:50", "throughput": 19995.03, "total_tokens": 120010816} +{"current_steps": 38140, "total_steps": 78105, "loss": 0.1801, "lr": 3.034276275167419e-06, "epoch": 2.4415850457717174, "percentage": 48.83, "elapsed_time": "1:40:02", "remaining_time": "1:44:49", "throughput": 19995.39, "total_tokens": 120026368} +{"current_steps": 38145, "total_steps": 78105, "loss": 0.1317, "lr": 3.033730516009589e-06, "epoch": 2.4419051277126944, "percentage": 48.84, "elapsed_time": "1:40:03", "remaining_time": "1:44:49", "throughput": 19995.73, "total_tokens": 120041920} +{"current_steps": 38150, "total_steps": 78105, "loss": 0.2711, "lr": 3.0331847302000373e-06, "epoch": 2.4422252096536714, "percentage": 48.84, "elapsed_time": "1:40:04", "remaining_time": "1:44:48", "throughput": 19996.15, "total_tokens": 120057984} +{"current_steps": 38155, "total_steps": 78105, "loss": 0.2212, "lr": 3.0326389177660164e-06, "epoch": 2.4425452915946484, "percentage": 48.85, "elapsed_time": "1:40:04", "remaining_time": "1:44:47", "throughput": 19996.59, "total_tokens": 120075456} +{"current_steps": 38160, "total_steps": 78105, "loss": 0.2365, "lr": 3.0320930787347835e-06, "epoch": 2.442865373535625, "percentage": 48.86, "elapsed_time": "1:40:05", "remaining_time": "1:44:46", "throughput": 19996.95, "total_tokens": 120091328} +{"current_steps": 38165, "total_steps": 78105, "loss": 0.2407, "lr": 3.0315472131335934e-06, "epoch": 2.443185455476602, "percentage": 48.86, "elapsed_time": "1:40:06", "remaining_time": "1:44:45", "throughput": 19997.35, "total_tokens": 120107392} +{"current_steps": 38170, "total_steps": 78105, "loss": 0.2258, "lr": 3.031001320989705e-06, "epoch": 2.443505537417579, "percentage": 48.87, "elapsed_time": "1:40:06", "remaining_time": "1:44:44", "throughput": 19997.79, "total_tokens": 120124096} +{"current_steps": 38175, "total_steps": 78105, "loss": 0.2009, "lr": 3.0304554023303757e-06, "epoch": 2.443825619358556, "percentage": 48.88, "elapsed_time": "1:40:07", "remaining_time": "1:44:43", "throughput": 19998.19, "total_tokens": 120140224} +{"current_steps": 38180, "total_steps": 78105, "loss": 0.2672, "lr": 3.0299094571828667e-06, "epoch": 2.444145701299533, "percentage": 48.88, "elapsed_time": "1:40:08", "remaining_time": "1:44:42", "throughput": 19998.66, "total_tokens": 120157504} +{"current_steps": 38185, "total_steps": 78105, "loss": 0.2614, "lr": 3.0293634855744397e-06, "epoch": 2.4444657832405094, "percentage": 48.89, "elapsed_time": "1:40:08", "remaining_time": "1:44:41", "throughput": 19999.03, "total_tokens": 120173184} +{"current_steps": 38190, "total_steps": 78105, "loss": 0.2464, "lr": 3.028817487532358e-06, "epoch": 2.4447858651814864, "percentage": 48.9, "elapsed_time": "1:40:09", "remaining_time": "1:44:41", "throughput": 19999.33, "total_tokens": 120187776} +{"current_steps": 38195, "total_steps": 78105, "loss": 0.3033, "lr": 3.028271463083885e-06, "epoch": 2.4451059471224634, "percentage": 48.9, "elapsed_time": "1:40:10", "remaining_time": "1:44:40", "throughput": 19999.64, "total_tokens": 120202368} +{"current_steps": 38200, "total_steps": 78105, "loss": 0.223, "lr": 3.0277254122562873e-06, "epoch": 2.4454260290634404, "percentage": 48.91, "elapsed_time": "1:40:10", "remaining_time": "1:44:39", "throughput": 20000.02, "total_tokens": 120218624} +{"current_steps": 38205, "total_steps": 78105, "loss": 0.2247, "lr": 3.0271793350768307e-06, "epoch": 2.445746111004417, "percentage": 48.91, "elapsed_time": "1:40:11", "remaining_time": "1:44:38", "throughput": 20000.41, "total_tokens": 120234432} +{"current_steps": 38210, "total_steps": 78105, "loss": 0.2486, "lr": 3.0266332315727843e-06, "epoch": 2.446066192945394, "percentage": 48.92, "elapsed_time": "1:40:12", "remaining_time": "1:44:37", "throughput": 20000.75, "total_tokens": 120249984} +{"current_steps": 38215, "total_steps": 78105, "loss": 0.1844, "lr": 3.0260871017714177e-06, "epoch": 2.446386274886371, "percentage": 48.93, "elapsed_time": "1:40:12", "remaining_time": "1:44:36", "throughput": 20001.04, "total_tokens": 120264448} +{"current_steps": 38220, "total_steps": 78105, "loss": 0.3329, "lr": 3.0255409457000014e-06, "epoch": 2.446706356827348, "percentage": 48.93, "elapsed_time": "1:40:13", "remaining_time": "1:44:35", "throughput": 20001.38, "total_tokens": 120279872} +{"current_steps": 38225, "total_steps": 78105, "loss": 0.1865, "lr": 3.0249947633858074e-06, "epoch": 2.447026438768325, "percentage": 48.94, "elapsed_time": "1:40:14", "remaining_time": "1:44:34", "throughput": 20001.71, "total_tokens": 120294976} +{"current_steps": 38230, "total_steps": 78105, "loss": 0.1945, "lr": 3.0244485548561097e-06, "epoch": 2.4473465207093015, "percentage": 48.95, "elapsed_time": "1:40:14", "remaining_time": "1:44:33", "throughput": 20002.09, "total_tokens": 120311040} +{"current_steps": 38235, "total_steps": 78105, "loss": 0.3044, "lr": 3.0239023201381836e-06, "epoch": 2.4476666026502785, "percentage": 48.95, "elapsed_time": "1:40:15", "remaining_time": "1:44:32", "throughput": 20002.44, "total_tokens": 120327040} +{"current_steps": 38240, "total_steps": 78105, "loss": 0.1835, "lr": 3.0233560592593036e-06, "epoch": 2.4479866845912555, "percentage": 48.96, "elapsed_time": "1:40:16", "remaining_time": "1:44:31", "throughput": 20002.77, "total_tokens": 120342208} +{"current_steps": 38245, "total_steps": 78105, "loss": 0.2948, "lr": 3.0228097722467483e-06, "epoch": 2.448306766532232, "percentage": 48.97, "elapsed_time": "1:40:16", "remaining_time": "1:44:31", "throughput": 20003.13, "total_tokens": 120357568} +{"current_steps": 38250, "total_steps": 78105, "loss": 0.1664, "lr": 3.022263459127796e-06, "epoch": 2.448626848473209, "percentage": 48.97, "elapsed_time": "1:40:17", "remaining_time": "1:44:30", "throughput": 20003.42, "total_tokens": 120372032} +{"current_steps": 38255, "total_steps": 78105, "loss": 0.1201, "lr": 3.0217171199297273e-06, "epoch": 2.448946930414186, "percentage": 48.98, "elapsed_time": "1:40:18", "remaining_time": "1:44:29", "throughput": 20003.75, "total_tokens": 120387520} +{"current_steps": 38260, "total_steps": 78105, "loss": 0.2732, "lr": 3.0211707546798235e-06, "epoch": 2.449267012355163, "percentage": 48.99, "elapsed_time": "1:40:18", "remaining_time": "1:44:28", "throughput": 20004.05, "total_tokens": 120402176} +{"current_steps": 38265, "total_steps": 78105, "loss": 0.1979, "lr": 3.0206243634053666e-06, "epoch": 2.44958709429614, "percentage": 48.99, "elapsed_time": "1:40:19", "remaining_time": "1:44:27", "throughput": 20004.42, "total_tokens": 120418176} +{"current_steps": 38270, "total_steps": 78105, "loss": 0.2882, "lr": 3.0200779461336397e-06, "epoch": 2.4499071762371165, "percentage": 49.0, "elapsed_time": "1:40:20", "remaining_time": "1:44:26", "throughput": 20004.9, "total_tokens": 120435328} +{"current_steps": 38275, "total_steps": 78105, "loss": 0.3605, "lr": 3.019531502891931e-06, "epoch": 2.4502272581780935, "percentage": 49.0, "elapsed_time": "1:40:20", "remaining_time": "1:44:25", "throughput": 20005.25, "total_tokens": 120450752} +{"current_steps": 38280, "total_steps": 78105, "loss": 0.1623, "lr": 3.0189850337075245e-06, "epoch": 2.4505473401190705, "percentage": 49.01, "elapsed_time": "1:40:21", "remaining_time": "1:44:24", "throughput": 20005.66, "total_tokens": 120467072} +{"current_steps": 38285, "total_steps": 78105, "loss": 0.2415, "lr": 3.0184385386077076e-06, "epoch": 2.4508674220600475, "percentage": 49.02, "elapsed_time": "1:40:22", "remaining_time": "1:44:23", "throughput": 20006.01, "total_tokens": 120482496} +{"current_steps": 38290, "total_steps": 78105, "loss": 0.2557, "lr": 3.017892017619772e-06, "epoch": 2.451187504001024, "percentage": 49.02, "elapsed_time": "1:40:22", "remaining_time": "1:44:22", "throughput": 20006.39, "total_tokens": 120498368} +{"current_steps": 38295, "total_steps": 78105, "loss": 0.2319, "lr": 3.017345470771006e-06, "epoch": 2.451507585942001, "percentage": 49.03, "elapsed_time": "1:40:23", "remaining_time": "1:44:21", "throughput": 20006.77, "total_tokens": 120514432} +{"current_steps": 38300, "total_steps": 78105, "loss": 0.225, "lr": 3.016798898088702e-06, "epoch": 2.451827667882978, "percentage": 49.04, "elapsed_time": "1:40:24", "remaining_time": "1:44:21", "throughput": 20007.06, "total_tokens": 120528960} +{"current_steps": 38305, "total_steps": 78105, "loss": 0.2271, "lr": 3.016252299600153e-06, "epoch": 2.452147749823955, "percentage": 49.04, "elapsed_time": "1:40:24", "remaining_time": "1:44:20", "throughput": 20007.45, "total_tokens": 120544896} +{"current_steps": 38310, "total_steps": 78105, "loss": 0.3316, "lr": 3.015705675332653e-06, "epoch": 2.452467831764932, "percentage": 49.05, "elapsed_time": "1:40:25", "remaining_time": "1:44:19", "throughput": 20007.78, "total_tokens": 120559936} +{"current_steps": 38315, "total_steps": 78105, "loss": 0.2252, "lr": 3.015159025313498e-06, "epoch": 2.4527879137059085, "percentage": 49.06, "elapsed_time": "1:40:26", "remaining_time": "1:44:18", "throughput": 20008.08, "total_tokens": 120574528} +{"current_steps": 38320, "total_steps": 78105, "loss": 0.2107, "lr": 3.014612349569985e-06, "epoch": 2.4531079956468855, "percentage": 49.06, "elapsed_time": "1:40:26", "remaining_time": "1:44:17", "throughput": 20008.37, "total_tokens": 120589056} +{"current_steps": 38325, "total_steps": 78105, "loss": 0.157, "lr": 3.0140656481294114e-06, "epoch": 2.4534280775878625, "percentage": 49.07, "elapsed_time": "1:40:27", "remaining_time": "1:44:16", "throughput": 20008.79, "total_tokens": 120605760} +{"current_steps": 38330, "total_steps": 78105, "loss": 0.2334, "lr": 3.013518921019077e-06, "epoch": 2.4537481595288395, "percentage": 49.07, "elapsed_time": "1:40:28", "remaining_time": "1:44:15", "throughput": 20009.13, "total_tokens": 120621184} +{"current_steps": 38335, "total_steps": 78105, "loss": 0.1959, "lr": 3.0129721682662826e-06, "epoch": 2.454068241469816, "percentage": 49.08, "elapsed_time": "1:40:28", "remaining_time": "1:44:14", "throughput": 20009.5, "total_tokens": 120636992} +{"current_steps": 38340, "total_steps": 78105, "loss": 0.1609, "lr": 3.0124253898983295e-06, "epoch": 2.454388323410793, "percentage": 49.09, "elapsed_time": "1:40:29", "remaining_time": "1:44:13", "throughput": 20009.83, "total_tokens": 120652224} +{"current_steps": 38345, "total_steps": 78105, "loss": 0.2677, "lr": 3.011878585942522e-06, "epoch": 2.45470840535177, "percentage": 49.09, "elapsed_time": "1:40:30", "remaining_time": "1:44:12", "throughput": 20010.15, "total_tokens": 120667328} +{"current_steps": 38350, "total_steps": 78105, "loss": 0.1615, "lr": 3.0113317564261646e-06, "epoch": 2.455028487292747, "percentage": 49.1, "elapsed_time": "1:40:30", "remaining_time": "1:44:11", "throughput": 20010.45, "total_tokens": 120682112} +{"current_steps": 38355, "total_steps": 78105, "loss": 0.2636, "lr": 3.0107849013765623e-06, "epoch": 2.455348569233724, "percentage": 49.11, "elapsed_time": "1:40:31", "remaining_time": "1:44:10", "throughput": 20010.81, "total_tokens": 120697728} +{"current_steps": 38360, "total_steps": 78105, "loss": 0.245, "lr": 3.0102380208210225e-06, "epoch": 2.4556686511747006, "percentage": 49.11, "elapsed_time": "1:40:32", "remaining_time": "1:44:10", "throughput": 20011.12, "total_tokens": 120712576} +{"current_steps": 38365, "total_steps": 78105, "loss": 0.1978, "lr": 3.009691114786854e-06, "epoch": 2.4559887331156776, "percentage": 49.12, "elapsed_time": "1:40:32", "remaining_time": "1:44:09", "throughput": 20011.48, "total_tokens": 120728384} +{"current_steps": 38370, "total_steps": 78105, "loss": 0.1854, "lr": 3.0091441833013663e-06, "epoch": 2.4563088150566545, "percentage": 49.13, "elapsed_time": "1:40:33", "remaining_time": "1:44:08", "throughput": 20012.02, "total_tokens": 120746880} +{"current_steps": 38375, "total_steps": 78105, "loss": 0.2427, "lr": 3.0085972263918694e-06, "epoch": 2.4566288969976315, "percentage": 49.13, "elapsed_time": "1:40:34", "remaining_time": "1:44:07", "throughput": 20012.4, "total_tokens": 120763072} +{"current_steps": 38380, "total_steps": 78105, "loss": 0.256, "lr": 3.008050244085677e-06, "epoch": 2.456948978938608, "percentage": 49.14, "elapsed_time": "1:40:35", "remaining_time": "1:44:06", "throughput": 20012.72, "total_tokens": 120778368} +{"current_steps": 38385, "total_steps": 78105, "loss": 0.2559, "lr": 3.007503236410101e-06, "epoch": 2.457269060879585, "percentage": 49.15, "elapsed_time": "1:40:35", "remaining_time": "1:44:05", "throughput": 20013.08, "total_tokens": 120794048} +{"current_steps": 38390, "total_steps": 78105, "loss": 0.2848, "lr": 3.0069562033924573e-06, "epoch": 2.457589142820562, "percentage": 49.15, "elapsed_time": "1:40:36", "remaining_time": "1:44:04", "throughput": 20013.4, "total_tokens": 120809088} +{"current_steps": 38395, "total_steps": 78105, "loss": 0.2871, "lr": 3.006409145060061e-06, "epoch": 2.457909224761539, "percentage": 49.16, "elapsed_time": "1:40:37", "remaining_time": "1:44:03", "throughput": 20013.82, "total_tokens": 120826048} +{"current_steps": 38400, "total_steps": 78105, "loss": 0.1771, "lr": 3.00586206144023e-06, "epoch": 2.458229306702516, "percentage": 49.16, "elapsed_time": "1:40:37", "remaining_time": "1:44:02", "throughput": 20014.17, "total_tokens": 120841344} +{"current_steps": 38405, "total_steps": 78105, "loss": 0.2119, "lr": 3.0053149525602834e-06, "epoch": 2.4585493886434926, "percentage": 49.17, "elapsed_time": "1:40:38", "remaining_time": "1:44:02", "throughput": 20013.41, "total_tokens": 120857856} +{"current_steps": 38410, "total_steps": 78105, "loss": 0.2947, "lr": 3.004767818447539e-06, "epoch": 2.4588694705844696, "percentage": 49.18, "elapsed_time": "1:40:39", "remaining_time": "1:44:01", "throughput": 20013.75, "total_tokens": 120873408} +{"current_steps": 38415, "total_steps": 78105, "loss": 0.1922, "lr": 3.0042206591293195e-06, "epoch": 2.4591895525254466, "percentage": 49.18, "elapsed_time": "1:40:40", "remaining_time": "1:44:00", "throughput": 20014.06, "total_tokens": 120888512} +{"current_steps": 38420, "total_steps": 78105, "loss": 0.2306, "lr": 3.0036734746329465e-06, "epoch": 2.4595096344664236, "percentage": 49.19, "elapsed_time": "1:40:40", "remaining_time": "1:43:59", "throughput": 20014.4, "total_tokens": 120904064} +{"current_steps": 38425, "total_steps": 78105, "loss": 0.1751, "lr": 3.003126264985744e-06, "epoch": 2.4598297164074, "percentage": 49.2, "elapsed_time": "1:40:41", "remaining_time": "1:43:58", "throughput": 20014.72, "total_tokens": 120919296} +{"current_steps": 38430, "total_steps": 78105, "loss": 0.2463, "lr": 3.0025790302150365e-06, "epoch": 2.460149798348377, "percentage": 49.2, "elapsed_time": "1:40:42", "remaining_time": "1:43:57", "throughput": 20015.11, "total_tokens": 120935296} +{"current_steps": 38435, "total_steps": 78105, "loss": 0.338, "lr": 3.00203177034815e-06, "epoch": 2.460469880289354, "percentage": 49.21, "elapsed_time": "1:40:42", "remaining_time": "1:43:57", "throughput": 20015.54, "total_tokens": 120952000} +{"current_steps": 38440, "total_steps": 78105, "loss": 0.231, "lr": 3.0014844854124114e-06, "epoch": 2.460789962230331, "percentage": 49.22, "elapsed_time": "1:40:43", "remaining_time": "1:43:56", "throughput": 20015.85, "total_tokens": 120967104} +{"current_steps": 38445, "total_steps": 78105, "loss": 0.3679, "lr": 3.0009371754351503e-06, "epoch": 2.461110044171308, "percentage": 49.22, "elapsed_time": "1:40:44", "remaining_time": "1:43:55", "throughput": 20016.2, "total_tokens": 120982848} +{"current_steps": 38450, "total_steps": 78105, "loss": 0.1984, "lr": 3.000389840443696e-06, "epoch": 2.4614301261122846, "percentage": 49.23, "elapsed_time": "1:40:44", "remaining_time": "1:43:54", "throughput": 20016.63, "total_tokens": 120999424} +{"current_steps": 38455, "total_steps": 78105, "loss": 0.239, "lr": 2.9998424804653794e-06, "epoch": 2.4617502080532616, "percentage": 49.24, "elapsed_time": "1:40:45", "remaining_time": "1:43:53", "throughput": 20016.98, "total_tokens": 121015360} +{"current_steps": 38460, "total_steps": 78105, "loss": 0.2238, "lr": 2.9992950955275325e-06, "epoch": 2.4620702899942386, "percentage": 49.24, "elapsed_time": "1:40:46", "remaining_time": "1:43:52", "throughput": 20017.41, "total_tokens": 121032320} +{"current_steps": 38465, "total_steps": 78105, "loss": 0.1826, "lr": 2.9987476856574897e-06, "epoch": 2.4623903719352156, "percentage": 49.25, "elapsed_time": "1:40:47", "remaining_time": "1:43:51", "throughput": 20017.9, "total_tokens": 121049856} +{"current_steps": 38470, "total_steps": 78105, "loss": 0.1803, "lr": 2.9982002508825853e-06, "epoch": 2.462710453876192, "percentage": 49.25, "elapsed_time": "1:40:47", "remaining_time": "1:43:50", "throughput": 20018.25, "total_tokens": 121065856} +{"current_steps": 38475, "total_steps": 78105, "loss": 0.1846, "lr": 2.997652791230155e-06, "epoch": 2.463030535817169, "percentage": 49.26, "elapsed_time": "1:40:48", "remaining_time": "1:43:50", "throughput": 20018.62, "total_tokens": 121081664} +{"current_steps": 38480, "total_steps": 78105, "loss": 0.3221, "lr": 2.997105306727537e-06, "epoch": 2.463350617758146, "percentage": 49.27, "elapsed_time": "1:40:49", "remaining_time": "1:43:49", "throughput": 20019.0, "total_tokens": 121097856} +{"current_steps": 38485, "total_steps": 78105, "loss": 0.1719, "lr": 2.996557797402069e-06, "epoch": 2.463670699699123, "percentage": 49.27, "elapsed_time": "1:40:49", "remaining_time": "1:43:48", "throughput": 20019.38, "total_tokens": 121113536} +{"current_steps": 38490, "total_steps": 78105, "loss": 0.2739, "lr": 2.9960102632810915e-06, "epoch": 2.4639907816401, "percentage": 49.28, "elapsed_time": "1:40:50", "remaining_time": "1:43:47", "throughput": 20019.74, "total_tokens": 121129152} +{"current_steps": 38495, "total_steps": 78105, "loss": 0.182, "lr": 2.9954627043919448e-06, "epoch": 2.4643108635810766, "percentage": 49.29, "elapsed_time": "1:40:51", "remaining_time": "1:43:46", "throughput": 20020.09, "total_tokens": 121144704} +{"current_steps": 38500, "total_steps": 78105, "loss": 0.2309, "lr": 2.994915120761971e-06, "epoch": 2.4646309455220536, "percentage": 49.29, "elapsed_time": "1:40:51", "remaining_time": "1:43:45", "throughput": 20020.45, "total_tokens": 121160704} +{"current_steps": 38505, "total_steps": 78105, "loss": 0.2265, "lr": 2.9943675124185147e-06, "epoch": 2.4649510274630306, "percentage": 49.3, "elapsed_time": "1:40:52", "remaining_time": "1:43:44", "throughput": 20020.78, "total_tokens": 121175808} +{"current_steps": 38510, "total_steps": 78105, "loss": 0.2576, "lr": 2.9938198793889197e-06, "epoch": 2.465271109404007, "percentage": 49.31, "elapsed_time": "1:40:53", "remaining_time": "1:43:43", "throughput": 20021.14, "total_tokens": 121191616} +{"current_steps": 38515, "total_steps": 78105, "loss": 0.2165, "lr": 2.9932722217005315e-06, "epoch": 2.465591191344984, "percentage": 49.31, "elapsed_time": "1:40:53", "remaining_time": "1:43:42", "throughput": 20021.49, "total_tokens": 121207168} +{"current_steps": 38520, "total_steps": 78105, "loss": 0.1199, "lr": 2.9927245393806985e-06, "epoch": 2.465911273285961, "percentage": 49.32, "elapsed_time": "1:40:54", "remaining_time": "1:43:41", "throughput": 20021.82, "total_tokens": 121222656} +{"current_steps": 38525, "total_steps": 78105, "loss": 0.2052, "lr": 2.9921768324567686e-06, "epoch": 2.466231355226938, "percentage": 49.32, "elapsed_time": "1:40:55", "remaining_time": "1:43:41", "throughput": 20022.34, "total_tokens": 121240832} +{"current_steps": 38530, "total_steps": 78105, "loss": 0.3025, "lr": 2.991629100956091e-06, "epoch": 2.466551437167915, "percentage": 49.33, "elapsed_time": "1:40:55", "remaining_time": "1:43:40", "throughput": 20022.66, "total_tokens": 121256320} +{"current_steps": 38535, "total_steps": 78105, "loss": 0.1865, "lr": 2.991081344906017e-06, "epoch": 2.4668715191088917, "percentage": 49.34, "elapsed_time": "1:40:56", "remaining_time": "1:43:39", "throughput": 20022.98, "total_tokens": 121271616} +{"current_steps": 38540, "total_steps": 78105, "loss": 0.2554, "lr": 2.990533564333899e-06, "epoch": 2.4671916010498687, "percentage": 49.34, "elapsed_time": "1:40:57", "remaining_time": "1:43:38", "throughput": 20023.27, "total_tokens": 121286336} +{"current_steps": 38545, "total_steps": 78105, "loss": 0.1651, "lr": 2.9899857592670893e-06, "epoch": 2.4675116829908457, "percentage": 49.35, "elapsed_time": "1:40:57", "remaining_time": "1:43:37", "throughput": 20023.61, "total_tokens": 121301952} +{"current_steps": 38550, "total_steps": 78105, "loss": 0.3387, "lr": 2.9894379297329435e-06, "epoch": 2.4678317649318227, "percentage": 49.36, "elapsed_time": "1:40:58", "remaining_time": "1:43:36", "throughput": 20024.0, "total_tokens": 121318080} +{"current_steps": 38555, "total_steps": 78105, "loss": 0.1882, "lr": 2.988890075758816e-06, "epoch": 2.468151846872799, "percentage": 49.36, "elapsed_time": "1:40:59", "remaining_time": "1:43:35", "throughput": 20024.38, "total_tokens": 121334080} +{"current_steps": 38560, "total_steps": 78105, "loss": 0.242, "lr": 2.988342197372066e-06, "epoch": 2.468471928813776, "percentage": 49.37, "elapsed_time": "1:40:59", "remaining_time": "1:43:34", "throughput": 20024.71, "total_tokens": 121349248} +{"current_steps": 38565, "total_steps": 78105, "loss": 0.2495, "lr": 2.9877942946000494e-06, "epoch": 2.468792010754753, "percentage": 49.38, "elapsed_time": "1:41:00", "remaining_time": "1:43:33", "throughput": 20025.0, "total_tokens": 121363840} +{"current_steps": 38570, "total_steps": 78105, "loss": 0.2575, "lr": 2.9872463674701267e-06, "epoch": 2.46911209269573, "percentage": 49.38, "elapsed_time": "1:41:01", "remaining_time": "1:43:32", "throughput": 20025.3, "total_tokens": 121379136} +{"current_steps": 38575, "total_steps": 78105, "loss": 0.3895, "lr": 2.9866984160096586e-06, "epoch": 2.469432174636707, "percentage": 49.39, "elapsed_time": "1:41:02", "remaining_time": "1:43:32", "throughput": 20025.81, "total_tokens": 121397376} +{"current_steps": 38580, "total_steps": 78105, "loss": 0.1979, "lr": 2.986150440246007e-06, "epoch": 2.4697522565776837, "percentage": 49.4, "elapsed_time": "1:41:02", "remaining_time": "1:43:31", "throughput": 20026.21, "total_tokens": 121413632} +{"current_steps": 38585, "total_steps": 78105, "loss": 0.1206, "lr": 2.9856024402065344e-06, "epoch": 2.4700723385186607, "percentage": 49.4, "elapsed_time": "1:41:03", "remaining_time": "1:43:30", "throughput": 20026.52, "total_tokens": 121428544} +{"current_steps": 38590, "total_steps": 78105, "loss": 0.2129, "lr": 2.9850544159186046e-06, "epoch": 2.4703924204596377, "percentage": 49.41, "elapsed_time": "1:41:04", "remaining_time": "1:43:29", "throughput": 20026.78, "total_tokens": 121442752} +{"current_steps": 38595, "total_steps": 78105, "loss": 0.2264, "lr": 2.9845063674095844e-06, "epoch": 2.4707125024006147, "percentage": 49.41, "elapsed_time": "1:41:04", "remaining_time": "1:43:28", "throughput": 20027.14, "total_tokens": 121458368} +{"current_steps": 38600, "total_steps": 78105, "loss": 0.1578, "lr": 2.9839582947068404e-06, "epoch": 2.4710325843415912, "percentage": 49.42, "elapsed_time": "1:41:05", "remaining_time": "1:43:27", "throughput": 20027.53, "total_tokens": 121474432} +{"current_steps": 38605, "total_steps": 78105, "loss": 0.3306, "lr": 2.9834101978377394e-06, "epoch": 2.4713526662825682, "percentage": 49.43, "elapsed_time": "1:41:06", "remaining_time": "1:43:26", "throughput": 20027.88, "total_tokens": 121490240} +{"current_steps": 38610, "total_steps": 78105, "loss": 0.1765, "lr": 2.9828620768296518e-06, "epoch": 2.471672748223545, "percentage": 49.43, "elapsed_time": "1:41:06", "remaining_time": "1:43:25", "throughput": 20028.25, "total_tokens": 121506496} +{"current_steps": 38615, "total_steps": 78105, "loss": 0.1379, "lr": 2.982313931709946e-06, "epoch": 2.471992830164522, "percentage": 49.44, "elapsed_time": "1:41:07", "remaining_time": "1:43:24", "throughput": 20028.7, "total_tokens": 121523392} +{"current_steps": 38620, "total_steps": 78105, "loss": 0.2563, "lr": 2.981765762505996e-06, "epoch": 2.472312912105499, "percentage": 49.45, "elapsed_time": "1:41:08", "remaining_time": "1:43:24", "throughput": 20029.04, "total_tokens": 121538624} +{"current_steps": 38625, "total_steps": 78105, "loss": 0.2458, "lr": 2.981217569245173e-06, "epoch": 2.4726329940464757, "percentage": 49.45, "elapsed_time": "1:41:08", "remaining_time": "1:43:23", "throughput": 20029.37, "total_tokens": 121553728} +{"current_steps": 38630, "total_steps": 78105, "loss": 0.1847, "lr": 2.980669351954851e-06, "epoch": 2.4729530759874527, "percentage": 49.46, "elapsed_time": "1:41:09", "remaining_time": "1:43:22", "throughput": 20029.73, "total_tokens": 121569472} +{"current_steps": 38635, "total_steps": 78105, "loss": 0.1923, "lr": 2.980121110662404e-06, "epoch": 2.4732731579284297, "percentage": 49.47, "elapsed_time": "1:41:10", "remaining_time": "1:43:21", "throughput": 20030.16, "total_tokens": 121586048} +{"current_steps": 38640, "total_steps": 78105, "loss": 0.3104, "lr": 2.979572845395211e-06, "epoch": 2.4735932398694067, "percentage": 49.47, "elapsed_time": "1:41:10", "remaining_time": "1:43:20", "throughput": 20030.43, "total_tokens": 121600576} +{"current_steps": 38645, "total_steps": 78105, "loss": 0.1758, "lr": 2.9790245561806475e-06, "epoch": 2.4739133218103833, "percentage": 49.48, "elapsed_time": "1:41:11", "remaining_time": "1:43:19", "throughput": 20030.77, "total_tokens": 121616000} +{"current_steps": 38650, "total_steps": 78105, "loss": 0.3138, "lr": 2.9784762430460933e-06, "epoch": 2.4742334037513602, "percentage": 49.48, "elapsed_time": "1:41:12", "remaining_time": "1:43:18", "throughput": 20031.13, "total_tokens": 121631936} +{"current_steps": 38655, "total_steps": 78105, "loss": 0.2557, "lr": 2.977927906018928e-06, "epoch": 2.4745534856923372, "percentage": 49.49, "elapsed_time": "1:41:12", "remaining_time": "1:43:17", "throughput": 20031.49, "total_tokens": 121647808} +{"current_steps": 38660, "total_steps": 78105, "loss": 0.1177, "lr": 2.9773795451265318e-06, "epoch": 2.4748735676333142, "percentage": 49.5, "elapsed_time": "1:41:13", "remaining_time": "1:43:16", "throughput": 20031.82, "total_tokens": 121663104} +{"current_steps": 38665, "total_steps": 78105, "loss": 0.2509, "lr": 2.9768311603962886e-06, "epoch": 2.475193649574291, "percentage": 49.5, "elapsed_time": "1:41:14", "remaining_time": "1:43:15", "throughput": 20032.2, "total_tokens": 121679232} +{"current_steps": 38670, "total_steps": 78105, "loss": 0.2307, "lr": 2.97628275185558e-06, "epoch": 2.4755137315152678, "percentage": 49.51, "elapsed_time": "1:41:14", "remaining_time": "1:43:15", "throughput": 20032.51, "total_tokens": 121694208} +{"current_steps": 38675, "total_steps": 78105, "loss": 0.3046, "lr": 2.975734319531792e-06, "epoch": 2.4758338134562448, "percentage": 49.52, "elapsed_time": "1:41:15", "remaining_time": "1:43:14", "throughput": 20032.88, "total_tokens": 121710272} +{"current_steps": 38680, "total_steps": 78105, "loss": 0.1934, "lr": 2.9751858634523105e-06, "epoch": 2.4761538953972217, "percentage": 49.52, "elapsed_time": "1:41:16", "remaining_time": "1:43:13", "throughput": 20033.23, "total_tokens": 121726144} +{"current_steps": 38685, "total_steps": 78105, "loss": 0.2431, "lr": 2.974637383644522e-06, "epoch": 2.4764739773381987, "percentage": 49.53, "elapsed_time": "1:41:16", "remaining_time": "1:43:12", "throughput": 20033.6, "total_tokens": 121741888} +{"current_steps": 38690, "total_steps": 78105, "loss": 0.221, "lr": 2.974088880135814e-06, "epoch": 2.4767940592791753, "percentage": 49.54, "elapsed_time": "1:41:17", "remaining_time": "1:43:11", "throughput": 20033.95, "total_tokens": 121757312} +{"current_steps": 38695, "total_steps": 78105, "loss": 0.1129, "lr": 2.9735403529535777e-06, "epoch": 2.4771141412201523, "percentage": 49.54, "elapsed_time": "1:41:18", "remaining_time": "1:43:10", "throughput": 20034.25, "total_tokens": 121772224} +{"current_steps": 38700, "total_steps": 78105, "loss": 0.2844, "lr": 2.9729918021252023e-06, "epoch": 2.4774342231611293, "percentage": 49.55, "elapsed_time": "1:41:18", "remaining_time": "1:43:09", "throughput": 20034.65, "total_tokens": 121788672} +{"current_steps": 38705, "total_steps": 78105, "loss": 0.239, "lr": 2.9724432276780803e-06, "epoch": 2.4777543051021063, "percentage": 49.56, "elapsed_time": "1:41:19", "remaining_time": "1:43:08", "throughput": 20035.04, "total_tokens": 121804928} +{"current_steps": 38710, "total_steps": 78105, "loss": 0.3126, "lr": 2.9718946296396045e-06, "epoch": 2.4780743870430832, "percentage": 49.56, "elapsed_time": "1:41:20", "remaining_time": "1:43:07", "throughput": 20035.35, "total_tokens": 121819840} +{"current_steps": 38715, "total_steps": 78105, "loss": 0.3743, "lr": 2.9713460080371685e-06, "epoch": 2.47839446898406, "percentage": 49.57, "elapsed_time": "1:41:20", "remaining_time": "1:43:06", "throughput": 20035.64, "total_tokens": 121834624} +{"current_steps": 38720, "total_steps": 78105, "loss": 0.2391, "lr": 2.9707973628981683e-06, "epoch": 2.478714550925037, "percentage": 49.57, "elapsed_time": "1:41:21", "remaining_time": "1:43:06", "throughput": 20035.99, "total_tokens": 121850176} +{"current_steps": 38725, "total_steps": 78105, "loss": 0.2137, "lr": 2.9702486942500003e-06, "epoch": 2.4790346328660138, "percentage": 49.58, "elapsed_time": "1:41:22", "remaining_time": "1:43:05", "throughput": 20036.38, "total_tokens": 121865920} +{"current_steps": 38730, "total_steps": 78105, "loss": 0.218, "lr": 2.9697000021200613e-06, "epoch": 2.4793547148069908, "percentage": 49.59, "elapsed_time": "1:41:22", "remaining_time": "1:43:04", "throughput": 20036.77, "total_tokens": 121882112} +{"current_steps": 38735, "total_steps": 78105, "loss": 0.227, "lr": 2.9691512865357513e-06, "epoch": 2.4796747967479673, "percentage": 49.59, "elapsed_time": "1:41:23", "remaining_time": "1:43:03", "throughput": 20037.14, "total_tokens": 121897920} +{"current_steps": 38740, "total_steps": 78105, "loss": 0.2508, "lr": 2.9686025475244695e-06, "epoch": 2.4799948786889443, "percentage": 49.6, "elapsed_time": "1:41:24", "remaining_time": "1:43:02", "throughput": 20037.48, "total_tokens": 121913472} +{"current_steps": 38745, "total_steps": 78105, "loss": 0.2228, "lr": 2.9680537851136176e-06, "epoch": 2.4803149606299213, "percentage": 49.61, "elapsed_time": "1:41:24", "remaining_time": "1:43:01", "throughput": 20037.87, "total_tokens": 121929920} +{"current_steps": 38750, "total_steps": 78105, "loss": 0.1358, "lr": 2.967504999330597e-06, "epoch": 2.4806350425708983, "percentage": 49.61, "elapsed_time": "1:41:25", "remaining_time": "1:43:00", "throughput": 20038.21, "total_tokens": 121945472} +{"current_steps": 38755, "total_steps": 78105, "loss": 0.2594, "lr": 2.9669561902028125e-06, "epoch": 2.4809551245118753, "percentage": 49.62, "elapsed_time": "1:41:26", "remaining_time": "1:42:59", "throughput": 20038.54, "total_tokens": 121960512} +{"current_steps": 38760, "total_steps": 78105, "loss": 0.1662, "lr": 2.966407357757668e-06, "epoch": 2.481275206452852, "percentage": 49.63, "elapsed_time": "1:41:27", "remaining_time": "1:42:58", "throughput": 20039.02, "total_tokens": 121978304} +{"current_steps": 38765, "total_steps": 78105, "loss": 0.302, "lr": 2.9658585020225695e-06, "epoch": 2.481595288393829, "percentage": 49.63, "elapsed_time": "1:41:27", "remaining_time": "1:42:58", "throughput": 20039.43, "total_tokens": 121994880} +{"current_steps": 38770, "total_steps": 78105, "loss": 0.1458, "lr": 2.965309623024924e-06, "epoch": 2.481915370334806, "percentage": 49.64, "elapsed_time": "1:41:28", "remaining_time": "1:42:57", "throughput": 20039.7, "total_tokens": 122009600} +{"current_steps": 38775, "total_steps": 78105, "loss": 0.2354, "lr": 2.96476072079214e-06, "epoch": 2.482235452275783, "percentage": 49.64, "elapsed_time": "1:41:29", "remaining_time": "1:42:56", "throughput": 20040.09, "total_tokens": 122026304} +{"current_steps": 38780, "total_steps": 78105, "loss": 0.2299, "lr": 2.964211795351626e-06, "epoch": 2.4825555342167593, "percentage": 49.65, "elapsed_time": "1:41:29", "remaining_time": "1:42:55", "throughput": 20040.44, "total_tokens": 122041664} +{"current_steps": 38785, "total_steps": 78105, "loss": 0.2043, "lr": 2.9636628467307927e-06, "epoch": 2.4828756161577363, "percentage": 49.66, "elapsed_time": "1:41:30", "remaining_time": "1:42:54", "throughput": 20040.76, "total_tokens": 122056896} +{"current_steps": 38790, "total_steps": 78105, "loss": 0.2083, "lr": 2.963113874957052e-06, "epoch": 2.4831956980987133, "percentage": 49.66, "elapsed_time": "1:41:31", "remaining_time": "1:42:53", "throughput": 20041.06, "total_tokens": 122071488} +{"current_steps": 38795, "total_steps": 78105, "loss": 0.2005, "lr": 2.962564880057817e-06, "epoch": 2.4835157800396903, "percentage": 49.67, "elapsed_time": "1:41:31", "remaining_time": "1:42:52", "throughput": 20041.37, "total_tokens": 122086400} +{"current_steps": 38800, "total_steps": 78105, "loss": 0.1575, "lr": 2.9620158620605018e-06, "epoch": 2.483835861980667, "percentage": 49.68, "elapsed_time": "1:41:32", "remaining_time": "1:42:51", "throughput": 20041.79, "total_tokens": 122102784} +{"current_steps": 38805, "total_steps": 78105, "loss": 0.2453, "lr": 2.96146682099252e-06, "epoch": 2.484155943921644, "percentage": 49.68, "elapsed_time": "1:41:33", "remaining_time": "1:42:50", "throughput": 20042.18, "total_tokens": 122118848} +{"current_steps": 38810, "total_steps": 78105, "loss": 0.1629, "lr": 2.960917756881289e-06, "epoch": 2.484476025862621, "percentage": 49.69, "elapsed_time": "1:41:33", "remaining_time": "1:42:49", "throughput": 20042.51, "total_tokens": 122134080} +{"current_steps": 38815, "total_steps": 78105, "loss": 0.4773, "lr": 2.960368669754226e-06, "epoch": 2.484796107803598, "percentage": 49.7, "elapsed_time": "1:41:34", "remaining_time": "1:42:48", "throughput": 20042.83, "total_tokens": 122148992} +{"current_steps": 38820, "total_steps": 78105, "loss": 0.1538, "lr": 2.9598195596387504e-06, "epoch": 2.4851161897445744, "percentage": 49.7, "elapsed_time": "1:41:35", "remaining_time": "1:42:48", "throughput": 20043.2, "total_tokens": 122164608} +{"current_steps": 38825, "total_steps": 78105, "loss": 0.232, "lr": 2.9592704265622807e-06, "epoch": 2.4854362716855514, "percentage": 49.71, "elapsed_time": "1:41:35", "remaining_time": "1:42:47", "throughput": 20043.59, "total_tokens": 122180608} +{"current_steps": 38830, "total_steps": 78105, "loss": 0.2174, "lr": 2.9587212705522384e-06, "epoch": 2.4857563536265284, "percentage": 49.72, "elapsed_time": "1:41:36", "remaining_time": "1:42:46", "throughput": 20043.89, "total_tokens": 122195712} +{"current_steps": 38835, "total_steps": 78105, "loss": 0.3016, "lr": 2.9581720916360447e-06, "epoch": 2.4860764355675053, "percentage": 49.72, "elapsed_time": "1:41:37", "remaining_time": "1:42:45", "throughput": 20044.27, "total_tokens": 122211904} +{"current_steps": 38840, "total_steps": 78105, "loss": 0.2784, "lr": 2.9576228898411234e-06, "epoch": 2.4863965175084823, "percentage": 49.73, "elapsed_time": "1:41:37", "remaining_time": "1:42:44", "throughput": 20044.58, "total_tokens": 122226688} +{"current_steps": 38845, "total_steps": 78105, "loss": 0.1753, "lr": 2.957073665194898e-06, "epoch": 2.486716599449459, "percentage": 49.73, "elapsed_time": "1:41:38", "remaining_time": "1:42:43", "throughput": 20044.87, "total_tokens": 122241408} +{"current_steps": 38850, "total_steps": 78105, "loss": 0.194, "lr": 2.956524417724796e-06, "epoch": 2.487036681390436, "percentage": 49.74, "elapsed_time": "1:41:39", "remaining_time": "1:42:42", "throughput": 20045.24, "total_tokens": 122257216} +{"current_steps": 38855, "total_steps": 78105, "loss": 0.1745, "lr": 2.955975147458242e-06, "epoch": 2.487356763331413, "percentage": 49.75, "elapsed_time": "1:41:39", "remaining_time": "1:42:41", "throughput": 20045.56, "total_tokens": 122272512} +{"current_steps": 38860, "total_steps": 78105, "loss": 0.3614, "lr": 2.9554258544226644e-06, "epoch": 2.48767684527239, "percentage": 49.75, "elapsed_time": "1:41:40", "remaining_time": "1:42:40", "throughput": 20045.87, "total_tokens": 122287360} +{"current_steps": 38865, "total_steps": 78105, "loss": 0.2425, "lr": 2.954876538645491e-06, "epoch": 2.4879969272133664, "percentage": 49.76, "elapsed_time": "1:41:41", "remaining_time": "1:42:39", "throughput": 20046.24, "total_tokens": 122303488} +{"current_steps": 38870, "total_steps": 78105, "loss": 0.2122, "lr": 2.9543272001541535e-06, "epoch": 2.4883170091543434, "percentage": 49.77, "elapsed_time": "1:41:41", "remaining_time": "1:42:39", "throughput": 20046.59, "total_tokens": 122318976} +{"current_steps": 38875, "total_steps": 78105, "loss": 0.2268, "lr": 2.9537778389760812e-06, "epoch": 2.4886370910953204, "percentage": 49.77, "elapsed_time": "1:41:42", "remaining_time": "1:42:38", "throughput": 20046.91, "total_tokens": 122334272} +{"current_steps": 38880, "total_steps": 78105, "loss": 0.1778, "lr": 2.953228455138708e-06, "epoch": 2.4889571730362974, "percentage": 49.78, "elapsed_time": "1:41:43", "remaining_time": "1:42:37", "throughput": 20047.27, "total_tokens": 122350016} +{"current_steps": 38885, "total_steps": 78105, "loss": 0.1932, "lr": 2.952679048669467e-06, "epoch": 2.4892772549772744, "percentage": 49.79, "elapsed_time": "1:41:43", "remaining_time": "1:42:36", "throughput": 20047.66, "total_tokens": 122366336} +{"current_steps": 38890, "total_steps": 78105, "loss": 0.2486, "lr": 2.952129619595791e-06, "epoch": 2.489597336918251, "percentage": 49.79, "elapsed_time": "1:41:44", "remaining_time": "1:42:35", "throughput": 20048.01, "total_tokens": 122382016} +{"current_steps": 38895, "total_steps": 78105, "loss": 0.2206, "lr": 2.9515801679451177e-06, "epoch": 2.489917418859228, "percentage": 49.8, "elapsed_time": "1:41:45", "remaining_time": "1:42:34", "throughput": 20048.32, "total_tokens": 122397568} +{"current_steps": 38900, "total_steps": 78105, "loss": 0.2299, "lr": 2.9510306937448825e-06, "epoch": 2.490237500800205, "percentage": 49.8, "elapsed_time": "1:41:45", "remaining_time": "1:42:33", "throughput": 20048.69, "total_tokens": 122413568} +{"current_steps": 38905, "total_steps": 78105, "loss": 0.1943, "lr": 2.9504811970225246e-06, "epoch": 2.490557582741182, "percentage": 49.81, "elapsed_time": "1:41:46", "remaining_time": "1:42:32", "throughput": 20049.05, "total_tokens": 122429184} +{"current_steps": 38910, "total_steps": 78105, "loss": 0.2332, "lr": 2.9499316778054817e-06, "epoch": 2.4908776646821584, "percentage": 49.82, "elapsed_time": "1:41:47", "remaining_time": "1:42:31", "throughput": 20049.4, "total_tokens": 122444672} +{"current_steps": 38915, "total_steps": 78105, "loss": 0.2755, "lr": 2.9493821361211944e-06, "epoch": 2.4911977466231354, "percentage": 49.82, "elapsed_time": "1:41:47", "remaining_time": "1:42:30", "throughput": 20049.7, "total_tokens": 122459648} +{"current_steps": 38920, "total_steps": 78105, "loss": 0.1965, "lr": 2.948832571997104e-06, "epoch": 2.4915178285641124, "percentage": 49.83, "elapsed_time": "1:41:48", "remaining_time": "1:42:30", "throughput": 20050.0, "total_tokens": 122474688} +{"current_steps": 38925, "total_steps": 78105, "loss": 0.2573, "lr": 2.948282985460652e-06, "epoch": 2.4918379105050894, "percentage": 49.84, "elapsed_time": "1:41:49", "remaining_time": "1:42:29", "throughput": 20050.39, "total_tokens": 122490752} +{"current_steps": 38930, "total_steps": 78105, "loss": 0.207, "lr": 2.9477333765392835e-06, "epoch": 2.4921579924460664, "percentage": 49.84, "elapsed_time": "1:41:49", "remaining_time": "1:42:28", "throughput": 20050.74, "total_tokens": 122505984} +{"current_steps": 38935, "total_steps": 78105, "loss": 0.2417, "lr": 2.9471837452604424e-06, "epoch": 2.492478074387043, "percentage": 49.85, "elapsed_time": "1:41:50", "remaining_time": "1:42:27", "throughput": 20051.07, "total_tokens": 122521024} +{"current_steps": 38940, "total_steps": 78105, "loss": 0.2529, "lr": 2.9466340916515746e-06, "epoch": 2.49279815632802, "percentage": 49.86, "elapsed_time": "1:41:51", "remaining_time": "1:42:26", "throughput": 20051.38, "total_tokens": 122536448} +{"current_steps": 38945, "total_steps": 78105, "loss": 0.2426, "lr": 2.9460844157401263e-06, "epoch": 2.493118238268997, "percentage": 49.86, "elapsed_time": "1:41:51", "remaining_time": "1:42:25", "throughput": 20051.7, "total_tokens": 122551872} +{"current_steps": 38950, "total_steps": 78105, "loss": 0.1973, "lr": 2.945534717553546e-06, "epoch": 2.493438320209974, "percentage": 49.87, "elapsed_time": "1:41:52", "remaining_time": "1:42:24", "throughput": 20052.06, "total_tokens": 122568192} +{"current_steps": 38955, "total_steps": 78105, "loss": 0.2229, "lr": 2.944984997119283e-06, "epoch": 2.4937584021509505, "percentage": 49.88, "elapsed_time": "1:41:53", "remaining_time": "1:42:23", "throughput": 20052.5, "total_tokens": 122585344} +{"current_steps": 38960, "total_steps": 78105, "loss": 0.2131, "lr": 2.944435254464786e-06, "epoch": 2.4940784840919275, "percentage": 49.88, "elapsed_time": "1:41:53", "remaining_time": "1:42:22", "throughput": 20052.8, "total_tokens": 122600640} +{"current_steps": 38965, "total_steps": 78105, "loss": 0.1986, "lr": 2.943885489617508e-06, "epoch": 2.4943985660329044, "percentage": 49.89, "elapsed_time": "1:41:54", "remaining_time": "1:42:22", "throughput": 20053.14, "total_tokens": 122616064} +{"current_steps": 38970, "total_steps": 78105, "loss": 0.306, "lr": 2.943335702604901e-06, "epoch": 2.4947186479738814, "percentage": 49.89, "elapsed_time": "1:41:55", "remaining_time": "1:42:21", "throughput": 20053.46, "total_tokens": 122631232} +{"current_steps": 38975, "total_steps": 78105, "loss": 0.3408, "lr": 2.9427858934544183e-06, "epoch": 2.4950387299148584, "percentage": 49.9, "elapsed_time": "1:41:55", "remaining_time": "1:42:20", "throughput": 20053.74, "total_tokens": 122645824} +{"current_steps": 38980, "total_steps": 78105, "loss": 0.2662, "lr": 2.942236062193514e-06, "epoch": 2.495358811855835, "percentage": 49.91, "elapsed_time": "1:41:56", "remaining_time": "1:42:19", "throughput": 20054.07, "total_tokens": 122661184} +{"current_steps": 38985, "total_steps": 78105, "loss": 0.1876, "lr": 2.9416862088496444e-06, "epoch": 2.495678893796812, "percentage": 49.91, "elapsed_time": "1:41:57", "remaining_time": "1:42:18", "throughput": 20054.35, "total_tokens": 122675840} +{"current_steps": 38990, "total_steps": 78105, "loss": 0.2183, "lr": 2.941136333450266e-06, "epoch": 2.495998975737789, "percentage": 49.92, "elapsed_time": "1:41:57", "remaining_time": "1:42:17", "throughput": 20054.74, "total_tokens": 122692096} +{"current_steps": 38995, "total_steps": 78105, "loss": 0.2081, "lr": 2.940586436022837e-06, "epoch": 2.496319057678766, "percentage": 49.93, "elapsed_time": "1:41:58", "remaining_time": "1:42:16", "throughput": 20055.01, "total_tokens": 122706560} +{"current_steps": 39000, "total_steps": 78105, "loss": 0.2781, "lr": 2.9400365165948163e-06, "epoch": 2.4966391396197425, "percentage": 49.93, "elapsed_time": "1:41:59", "remaining_time": "1:42:15", "throughput": 20055.35, "total_tokens": 122722048} +{"current_steps": 39005, "total_steps": 78105, "loss": 0.2418, "lr": 2.939486575193665e-06, "epoch": 2.4969592215607195, "percentage": 49.94, "elapsed_time": "1:41:59", "remaining_time": "1:42:14", "throughput": 20055.66, "total_tokens": 122737152} +{"current_steps": 39010, "total_steps": 78105, "loss": 0.42, "lr": 2.9389366118468422e-06, "epoch": 2.4972793035016965, "percentage": 49.95, "elapsed_time": "1:42:00", "remaining_time": "1:42:13", "throughput": 20055.97, "total_tokens": 122752192} +{"current_steps": 39015, "total_steps": 78105, "loss": 0.1836, "lr": 2.9383866265818118e-06, "epoch": 2.4975993854426735, "percentage": 49.95, "elapsed_time": "1:42:01", "remaining_time": "1:42:12", "throughput": 20056.35, "total_tokens": 122768320} +{"current_steps": 39020, "total_steps": 78105, "loss": 0.2326, "lr": 2.9378366194260364e-06, "epoch": 2.4979194673836504, "percentage": 49.96, "elapsed_time": "1:42:01", "remaining_time": "1:42:12", "throughput": 20056.74, "total_tokens": 122784640} +{"current_steps": 39025, "total_steps": 78105, "loss": 0.2853, "lr": 2.937286590406981e-06, "epoch": 2.498239549324627, "percentage": 49.96, "elapsed_time": "1:42:02", "remaining_time": "1:42:11", "throughput": 20057.16, "total_tokens": 122801408} +{"current_steps": 39030, "total_steps": 78105, "loss": 0.2938, "lr": 2.936736539552111e-06, "epoch": 2.498559631265604, "percentage": 49.97, "elapsed_time": "1:42:03", "remaining_time": "1:42:10", "throughput": 20057.55, "total_tokens": 122817600} +{"current_steps": 39035, "total_steps": 78105, "loss": 0.1987, "lr": 2.936186466888894e-06, "epoch": 2.498879713206581, "percentage": 49.98, "elapsed_time": "1:42:03", "remaining_time": "1:42:09", "throughput": 20057.83, "total_tokens": 122832896} +{"current_steps": 39040, "total_steps": 78105, "loss": 0.1753, "lr": 2.9356363724447958e-06, "epoch": 2.499199795147558, "percentage": 49.98, "elapsed_time": "1:42:04", "remaining_time": "1:42:08", "throughput": 20058.09, "total_tokens": 122846976} +{"current_steps": 39045, "total_steps": 78105, "loss": 0.1784, "lr": 2.935086256247286e-06, "epoch": 2.4995198770885345, "percentage": 49.99, "elapsed_time": "1:42:05", "remaining_time": "1:42:07", "throughput": 20058.45, "total_tokens": 122862912} +{"current_steps": 39050, "total_steps": 78105, "loss": 0.199, "lr": 2.934536118323835e-06, "epoch": 2.4998399590295115, "percentage": 50.0, "elapsed_time": "1:42:05", "remaining_time": "1:42:06", "throughput": 20058.8, "total_tokens": 122878720} +{"current_steps": 39055, "total_steps": 78105, "loss": 0.2244, "lr": 2.9339859587019145e-06, "epoch": 2.5001600409704885, "percentage": 50.0, "elapsed_time": "1:42:06", "remaining_time": "1:42:05", "throughput": 20059.2, "total_tokens": 122895104} +{"current_steps": 39060, "total_steps": 78105, "loss": 0.1792, "lr": 2.9334357774089956e-06, "epoch": 2.5004801229114655, "percentage": 50.01, "elapsed_time": "1:42:07", "remaining_time": "1:42:04", "throughput": 20059.54, "total_tokens": 122910592} +{"current_steps": 39060, "total_steps": 78105, "eval_loss": 0.5070953965187073, "epoch": 2.5004801229114655, "percentage": 50.01, "elapsed_time": "1:42:58", "remaining_time": "1:42:56", "throughput": 19893.63, "total_tokens": 122910592} +{"current_steps": 39065, "total_steps": 78105, "loss": 0.2447, "lr": 2.9328855744725515e-06, "epoch": 2.5008002048524425, "percentage": 50.02, "elapsed_time": "1:43:33", "remaining_time": "1:43:29", "throughput": 19784.14, "total_tokens": 122925952} +{"current_steps": 39070, "total_steps": 78105, "loss": 0.2185, "lr": 2.932335349920056e-06, "epoch": 2.501120286793419, "percentage": 50.02, "elapsed_time": "1:43:34", "remaining_time": "1:43:28", "throughput": 19784.43, "total_tokens": 122940736} +{"current_steps": 39075, "total_steps": 78105, "loss": 0.2521, "lr": 2.9317851037789856e-06, "epoch": 2.501440368734396, "percentage": 50.03, "elapsed_time": "1:43:34", "remaining_time": "1:43:27", "throughput": 19784.76, "total_tokens": 122956288} +{"current_steps": 39080, "total_steps": 78105, "loss": 0.2858, "lr": 2.9312348360768163e-06, "epoch": 2.501760450675373, "percentage": 50.04, "elapsed_time": "1:43:35", "remaining_time": "1:43:26", "throughput": 19785.2, "total_tokens": 122973056} +{"current_steps": 39085, "total_steps": 78105, "loss": 0.2661, "lr": 2.9306845468410256e-06, "epoch": 2.5020805326163496, "percentage": 50.04, "elapsed_time": "1:43:36", "remaining_time": "1:43:25", "throughput": 19785.55, "total_tokens": 122988672} +{"current_steps": 39090, "total_steps": 78105, "loss": 0.2316, "lr": 2.930134236099092e-06, "epoch": 2.5024006145573265, "percentage": 50.05, "elapsed_time": "1:43:36", "remaining_time": "1:43:24", "throughput": 19785.93, "total_tokens": 123004480} +{"current_steps": 39095, "total_steps": 78105, "loss": 0.2113, "lr": 2.929583903878495e-06, "epoch": 2.5027206964983035, "percentage": 50.05, "elapsed_time": "1:43:37", "remaining_time": "1:43:23", "throughput": 19786.27, "total_tokens": 123020224} +{"current_steps": 39100, "total_steps": 78105, "loss": 0.2862, "lr": 2.929033550206715e-06, "epoch": 2.5030407784392805, "percentage": 50.06, "elapsed_time": "1:43:38", "remaining_time": "1:43:23", "throughput": 19786.59, "total_tokens": 123035200} +{"current_steps": 39105, "total_steps": 78105, "loss": 0.2525, "lr": 2.928483175111234e-06, "epoch": 2.5033608603802575, "percentage": 50.07, "elapsed_time": "1:43:38", "remaining_time": "1:43:22", "throughput": 19787.02, "total_tokens": 123051968} +{"current_steps": 39110, "total_steps": 78105, "loss": 0.2098, "lr": 2.927932778619536e-06, "epoch": 2.5036809423212345, "percentage": 50.07, "elapsed_time": "1:43:39", "remaining_time": "1:43:21", "throughput": 19787.4, "total_tokens": 123068096} +{"current_steps": 39115, "total_steps": 78105, "loss": 0.1985, "lr": 2.927382360759104e-06, "epoch": 2.504001024262211, "percentage": 50.08, "elapsed_time": "1:43:40", "remaining_time": "1:43:20", "throughput": 19787.76, "total_tokens": 123083584} +{"current_steps": 39120, "total_steps": 78105, "loss": 0.222, "lr": 2.9268319215574232e-06, "epoch": 2.504321106203188, "percentage": 50.09, "elapsed_time": "1:43:40", "remaining_time": "1:43:19", "throughput": 19788.2, "total_tokens": 123099968} +{"current_steps": 39125, "total_steps": 78105, "loss": 0.1681, "lr": 2.926281461041979e-06, "epoch": 2.504641188144165, "percentage": 50.09, "elapsed_time": "1:43:41", "remaining_time": "1:43:18", "throughput": 19788.56, "total_tokens": 123115776} +{"current_steps": 39130, "total_steps": 78105, "loss": 0.2601, "lr": 2.925730979240259e-06, "epoch": 2.5049612700851416, "percentage": 50.1, "elapsed_time": "1:43:42", "remaining_time": "1:43:17", "throughput": 19788.94, "total_tokens": 123131904} +{"current_steps": 39135, "total_steps": 78105, "loss": 0.197, "lr": 2.925180476179752e-06, "epoch": 2.5052813520261186, "percentage": 50.11, "elapsed_time": "1:43:42", "remaining_time": "1:43:16", "throughput": 19789.34, "total_tokens": 123147968} +{"current_steps": 39140, "total_steps": 78105, "loss": 0.4018, "lr": 2.924629951887946e-06, "epoch": 2.5056014339670956, "percentage": 50.11, "elapsed_time": "1:43:43", "remaining_time": "1:43:15", "throughput": 19789.72, "total_tokens": 123163840} +{"current_steps": 39145, "total_steps": 78105, "loss": 0.2582, "lr": 2.9240794063923327e-06, "epoch": 2.5059215159080725, "percentage": 50.12, "elapsed_time": "1:43:44", "remaining_time": "1:43:14", "throughput": 19790.13, "total_tokens": 123180352} +{"current_steps": 39150, "total_steps": 78105, "loss": 0.1756, "lr": 2.923528839720403e-06, "epoch": 2.5062415978490495, "percentage": 50.12, "elapsed_time": "1:43:45", "remaining_time": "1:43:14", "throughput": 19790.55, "total_tokens": 123196608} +{"current_steps": 39155, "total_steps": 78105, "loss": 0.2078, "lr": 2.922978251899648e-06, "epoch": 2.506561679790026, "percentage": 50.13, "elapsed_time": "1:43:45", "remaining_time": "1:43:13", "throughput": 19790.93, "total_tokens": 123212480} +{"current_steps": 39160, "total_steps": 78105, "loss": 0.1229, "lr": 2.9224276429575626e-06, "epoch": 2.506881761731003, "percentage": 50.14, "elapsed_time": "1:43:46", "remaining_time": "1:43:12", "throughput": 19791.44, "total_tokens": 123230016} +{"current_steps": 39165, "total_steps": 78105, "loss": 0.1453, "lr": 2.9218770129216417e-06, "epoch": 2.50720184367198, "percentage": 50.14, "elapsed_time": "1:43:47", "remaining_time": "1:43:11", "throughput": 19791.79, "total_tokens": 123245632} +{"current_steps": 39170, "total_steps": 78105, "loss": 0.3071, "lr": 2.92132636181938e-06, "epoch": 2.507521925612957, "percentage": 50.15, "elapsed_time": "1:43:47", "remaining_time": "1:43:10", "throughput": 19792.23, "total_tokens": 123262272} +{"current_steps": 39175, "total_steps": 78105, "loss": 0.2318, "lr": 2.9207756896782746e-06, "epoch": 2.5078420075539336, "percentage": 50.16, "elapsed_time": "1:43:48", "remaining_time": "1:43:09", "throughput": 19792.59, "total_tokens": 123277888} +{"current_steps": 39180, "total_steps": 78105, "loss": 0.1895, "lr": 2.920224996525823e-06, "epoch": 2.5081620894949106, "percentage": 50.16, "elapsed_time": "1:43:49", "remaining_time": "1:43:08", "throughput": 19792.97, "total_tokens": 123293824} +{"current_steps": 39185, "total_steps": 78105, "loss": 0.2957, "lr": 2.919674282389524e-06, "epoch": 2.5084821714358876, "percentage": 50.17, "elapsed_time": "1:43:49", "remaining_time": "1:43:07", "throughput": 19793.32, "total_tokens": 123309248} +{"current_steps": 39190, "total_steps": 78105, "loss": 0.2751, "lr": 2.9191235472968767e-06, "epoch": 2.5088022533768646, "percentage": 50.18, "elapsed_time": "1:43:50", "remaining_time": "1:43:06", "throughput": 19793.68, "total_tokens": 123325120} +{"current_steps": 39195, "total_steps": 78105, "loss": 0.2485, "lr": 2.9185727912753827e-06, "epoch": 2.5091223353178416, "percentage": 50.18, "elapsed_time": "1:43:51", "remaining_time": "1:43:05", "throughput": 19793.99, "total_tokens": 123339776} +{"current_steps": 39200, "total_steps": 78105, "loss": 0.2622, "lr": 2.918022014352544e-06, "epoch": 2.509442417258818, "percentage": 50.19, "elapsed_time": "1:43:51", "remaining_time": "1:43:04", "throughput": 19794.34, "total_tokens": 123354944} +{"current_steps": 39205, "total_steps": 78105, "loss": 0.2675, "lr": 2.917471216555864e-06, "epoch": 2.509762499199795, "percentage": 50.2, "elapsed_time": "1:43:52", "remaining_time": "1:43:04", "throughput": 19794.76, "total_tokens": 123371520} +{"current_steps": 39210, "total_steps": 78105, "loss": 0.2917, "lr": 2.916920397912845e-06, "epoch": 2.510082581140772, "percentage": 50.2, "elapsed_time": "1:43:53", "remaining_time": "1:43:03", "throughput": 19795.13, "total_tokens": 123387072} +{"current_steps": 39215, "total_steps": 78105, "loss": 0.2082, "lr": 2.9163695584509934e-06, "epoch": 2.510402663081749, "percentage": 50.21, "elapsed_time": "1:43:53", "remaining_time": "1:43:02", "throughput": 19795.5, "total_tokens": 123402624} +{"current_steps": 39220, "total_steps": 78105, "loss": 0.3232, "lr": 2.915818698197814e-06, "epoch": 2.5107227450227256, "percentage": 50.21, "elapsed_time": "1:43:54", "remaining_time": "1:43:01", "throughput": 19795.92, "total_tokens": 123418880} +{"current_steps": 39225, "total_steps": 78105, "loss": 0.1872, "lr": 2.9152678171808156e-06, "epoch": 2.5110428269637026, "percentage": 50.22, "elapsed_time": "1:43:55", "remaining_time": "1:43:00", "throughput": 19796.35, "total_tokens": 123435584} +{"current_steps": 39230, "total_steps": 78105, "loss": 0.1625, "lr": 2.914716915427505e-06, "epoch": 2.5113629089046796, "percentage": 50.23, "elapsed_time": "1:43:55", "remaining_time": "1:42:59", "throughput": 19796.7, "total_tokens": 123450880} +{"current_steps": 39235, "total_steps": 78105, "loss": 0.3469, "lr": 2.9141659929653925e-06, "epoch": 2.5116829908456566, "percentage": 50.23, "elapsed_time": "1:43:56", "remaining_time": "1:42:58", "throughput": 19797.0, "total_tokens": 123465600} +{"current_steps": 39240, "total_steps": 78105, "loss": 0.2009, "lr": 2.9136150498219872e-06, "epoch": 2.5120030727866336, "percentage": 50.24, "elapsed_time": "1:43:57", "remaining_time": "1:42:57", "throughput": 19797.35, "total_tokens": 123481472} +{"current_steps": 39245, "total_steps": 78105, "loss": 0.2325, "lr": 2.9130640860248003e-06, "epoch": 2.51232315472761, "percentage": 50.25, "elapsed_time": "1:43:57", "remaining_time": "1:42:56", "throughput": 19797.68, "total_tokens": 123496512} +{"current_steps": 39250, "total_steps": 78105, "loss": 0.2488, "lr": 2.912513101601345e-06, "epoch": 2.512643236668587, "percentage": 50.25, "elapsed_time": "1:43:58", "remaining_time": "1:42:55", "throughput": 19798.08, "total_tokens": 123512640} +{"current_steps": 39255, "total_steps": 78105, "loss": 0.1831, "lr": 2.9119620965791346e-06, "epoch": 2.512963318609564, "percentage": 50.26, "elapsed_time": "1:43:59", "remaining_time": "1:42:54", "throughput": 19798.43, "total_tokens": 123528192} +{"current_steps": 39260, "total_steps": 78105, "loss": 0.2414, "lr": 2.911411070985683e-06, "epoch": 2.5132834005505407, "percentage": 50.27, "elapsed_time": "1:43:59", "remaining_time": "1:42:54", "throughput": 19798.78, "total_tokens": 123543680} +{"current_steps": 39265, "total_steps": 78105, "loss": 0.1765, "lr": 2.910860024848505e-06, "epoch": 2.5136034824915177, "percentage": 50.27, "elapsed_time": "1:44:00", "remaining_time": "1:42:53", "throughput": 19799.25, "total_tokens": 123560896} +{"current_steps": 39270, "total_steps": 78105, "loss": 0.1817, "lr": 2.9103089581951166e-06, "epoch": 2.5139235644324947, "percentage": 50.28, "elapsed_time": "1:44:01", "remaining_time": "1:42:52", "throughput": 19799.56, "total_tokens": 123575808} +{"current_steps": 39275, "total_steps": 78105, "loss": 0.2243, "lr": 2.909757871053037e-06, "epoch": 2.5142436463734716, "percentage": 50.28, "elapsed_time": "1:44:02", "remaining_time": "1:42:51", "throughput": 19799.93, "total_tokens": 123591552} +{"current_steps": 39280, "total_steps": 78105, "loss": 0.2163, "lr": 2.9092067634497835e-06, "epoch": 2.5145637283144486, "percentage": 50.29, "elapsed_time": "1:44:02", "remaining_time": "1:42:50", "throughput": 19800.32, "total_tokens": 123607488} +{"current_steps": 39285, "total_steps": 78105, "loss": 0.1746, "lr": 2.908655635412876e-06, "epoch": 2.5148838102554256, "percentage": 50.3, "elapsed_time": "1:44:03", "remaining_time": "1:42:49", "throughput": 19800.62, "total_tokens": 123622144} +{"current_steps": 39290, "total_steps": 78105, "loss": 0.2954, "lr": 2.9081044869698342e-06, "epoch": 2.515203892196402, "percentage": 50.3, "elapsed_time": "1:44:04", "remaining_time": "1:42:48", "throughput": 19801.0, "total_tokens": 123638016} +{"current_steps": 39295, "total_steps": 78105, "loss": 0.2129, "lr": 2.90755331814818e-06, "epoch": 2.515523974137379, "percentage": 50.31, "elapsed_time": "1:44:04", "remaining_time": "1:42:47", "throughput": 19801.43, "total_tokens": 123654976} +{"current_steps": 39300, "total_steps": 78105, "loss": 0.3048, "lr": 2.9070021289754368e-06, "epoch": 2.515844056078356, "percentage": 50.32, "elapsed_time": "1:44:05", "remaining_time": "1:42:46", "throughput": 19801.78, "total_tokens": 123670528} +{"current_steps": 39305, "total_steps": 78105, "loss": 0.3119, "lr": 2.9064509194791263e-06, "epoch": 2.5161641380193327, "percentage": 50.32, "elapsed_time": "1:44:06", "remaining_time": "1:42:45", "throughput": 19802.17, "total_tokens": 123686720} +{"current_steps": 39310, "total_steps": 78105, "loss": 0.3004, "lr": 2.9058996896867743e-06, "epoch": 2.5164842199603097, "percentage": 50.33, "elapsed_time": "1:44:06", "remaining_time": "1:42:44", "throughput": 19802.51, "total_tokens": 123702016} +{"current_steps": 39315, "total_steps": 78105, "loss": 0.1985, "lr": 2.905348439625906e-06, "epoch": 2.5168043019012867, "percentage": 50.34, "elapsed_time": "1:44:07", "remaining_time": "1:42:43", "throughput": 19802.74, "total_tokens": 123715968} +{"current_steps": 39320, "total_steps": 78105, "loss": 0.2372, "lr": 2.9047971693240485e-06, "epoch": 2.5171243838422637, "percentage": 50.34, "elapsed_time": "1:44:08", "remaining_time": "1:42:43", "throughput": 19803.03, "total_tokens": 123730880} +{"current_steps": 39325, "total_steps": 78105, "loss": 0.2031, "lr": 2.904245878808729e-06, "epoch": 2.5174444657832407, "percentage": 50.35, "elapsed_time": "1:44:08", "remaining_time": "1:42:42", "throughput": 19803.36, "total_tokens": 123745920} +{"current_steps": 39330, "total_steps": 78105, "loss": 0.2428, "lr": 2.9036945681074745e-06, "epoch": 2.5177645477242176, "percentage": 50.36, "elapsed_time": "1:44:09", "remaining_time": "1:42:41", "throughput": 19803.71, "total_tokens": 123761536} +{"current_steps": 39335, "total_steps": 78105, "loss": 0.2125, "lr": 2.9031432372478167e-06, "epoch": 2.518084629665194, "percentage": 50.36, "elapsed_time": "1:44:10", "remaining_time": "1:42:40", "throughput": 19804.03, "total_tokens": 123776832} +{"current_steps": 39340, "total_steps": 78105, "loss": 0.3436, "lr": 2.9025918862572866e-06, "epoch": 2.518404711606171, "percentage": 50.37, "elapsed_time": "1:44:10", "remaining_time": "1:42:39", "throughput": 19804.49, "total_tokens": 123794688} +{"current_steps": 39345, "total_steps": 78105, "loss": 0.2761, "lr": 2.902040515163414e-06, "epoch": 2.518724793547148, "percentage": 50.37, "elapsed_time": "1:44:11", "remaining_time": "1:42:38", "throughput": 19804.81, "total_tokens": 123809792} +{"current_steps": 39350, "total_steps": 78105, "loss": 0.2474, "lr": 2.901489123993732e-06, "epoch": 2.5190448754881247, "percentage": 50.38, "elapsed_time": "1:44:12", "remaining_time": "1:42:37", "throughput": 19805.16, "total_tokens": 123825664} +{"current_steps": 39355, "total_steps": 78105, "loss": 0.2624, "lr": 2.9009377127757747e-06, "epoch": 2.5193649574291017, "percentage": 50.39, "elapsed_time": "1:44:12", "remaining_time": "1:42:36", "throughput": 19805.57, "total_tokens": 123842304} +{"current_steps": 39360, "total_steps": 78105, "loss": 0.2393, "lr": 2.9003862815370764e-06, "epoch": 2.5196850393700787, "percentage": 50.39, "elapsed_time": "1:44:13", "remaining_time": "1:42:35", "throughput": 19805.85, "total_tokens": 123856960} +{"current_steps": 39365, "total_steps": 78105, "loss": 0.2521, "lr": 2.8998348303051725e-06, "epoch": 2.5200051213110557, "percentage": 50.4, "elapsed_time": "1:44:14", "remaining_time": "1:42:34", "throughput": 19806.24, "total_tokens": 123873344} +{"current_steps": 39370, "total_steps": 78105, "loss": 0.1666, "lr": 2.8992833591076e-06, "epoch": 2.5203252032520327, "percentage": 50.41, "elapsed_time": "1:44:14", "remaining_time": "1:42:34", "throughput": 19806.65, "total_tokens": 123889984} +{"current_steps": 39375, "total_steps": 78105, "loss": 0.2193, "lr": 2.8987318679718966e-06, "epoch": 2.5206452851930097, "percentage": 50.41, "elapsed_time": "1:44:15", "remaining_time": "1:42:33", "throughput": 19807.07, "total_tokens": 123906240} +{"current_steps": 39380, "total_steps": 78105, "loss": 0.3724, "lr": 2.8981803569256007e-06, "epoch": 2.5209653671339862, "percentage": 50.42, "elapsed_time": "1:44:16", "remaining_time": "1:42:32", "throughput": 19807.42, "total_tokens": 123921920} +{"current_steps": 39385, "total_steps": 78105, "loss": 0.2095, "lr": 2.8976288259962508e-06, "epoch": 2.521285449074963, "percentage": 50.43, "elapsed_time": "1:44:16", "remaining_time": "1:42:31", "throughput": 19807.72, "total_tokens": 123936832} +{"current_steps": 39390, "total_steps": 78105, "loss": 0.2137, "lr": 2.89707727521139e-06, "epoch": 2.52160553101594, "percentage": 50.43, "elapsed_time": "1:44:17", "remaining_time": "1:42:30", "throughput": 19808.03, "total_tokens": 123951616} +{"current_steps": 39395, "total_steps": 78105, "loss": 0.1472, "lr": 2.896525704598557e-06, "epoch": 2.5219256129569168, "percentage": 50.44, "elapsed_time": "1:44:18", "remaining_time": "1:42:29", "throughput": 19808.28, "total_tokens": 123968384} +{"current_steps": 39400, "total_steps": 78105, "loss": 0.1806, "lr": 2.8959741141852963e-06, "epoch": 2.5222456948978937, "percentage": 50.44, "elapsed_time": "1:44:19", "remaining_time": "1:42:28", "throughput": 19808.69, "total_tokens": 123984640} +{"current_steps": 39405, "total_steps": 78105, "loss": 0.1203, "lr": 2.8954225039991514e-06, "epoch": 2.5225657768388707, "percentage": 50.45, "elapsed_time": "1:44:19", "remaining_time": "1:42:27", "throughput": 19809.12, "total_tokens": 124001280} +{"current_steps": 39410, "total_steps": 78105, "loss": 0.2126, "lr": 2.8948708740676657e-06, "epoch": 2.5228858587798477, "percentage": 50.46, "elapsed_time": "1:44:20", "remaining_time": "1:42:26", "throughput": 19809.48, "total_tokens": 124016640} +{"current_steps": 39415, "total_steps": 78105, "loss": 0.2419, "lr": 2.894319224418386e-06, "epoch": 2.5232059407208247, "percentage": 50.46, "elapsed_time": "1:44:21", "remaining_time": "1:42:25", "throughput": 19809.86, "total_tokens": 124032384} +{"current_steps": 39420, "total_steps": 78105, "loss": 0.1775, "lr": 2.893767555078857e-06, "epoch": 2.5235260226618013, "percentage": 50.47, "elapsed_time": "1:44:21", "remaining_time": "1:42:25", "throughput": 19810.17, "total_tokens": 124047232} +{"current_steps": 39425, "total_steps": 78105, "loss": 0.2091, "lr": 2.893215866076628e-06, "epoch": 2.5238461046027783, "percentage": 50.48, "elapsed_time": "1:44:22", "remaining_time": "1:42:24", "throughput": 19810.61, "total_tokens": 124063744} +{"current_steps": 39430, "total_steps": 78105, "loss": 0.1578, "lr": 2.8926641574392473e-06, "epoch": 2.5241661865437552, "percentage": 50.48, "elapsed_time": "1:44:23", "remaining_time": "1:42:23", "throughput": 19810.92, "total_tokens": 124078336} +{"current_steps": 39435, "total_steps": 78105, "loss": 0.126, "lr": 2.8921124291942635e-06, "epoch": 2.5244862684847322, "percentage": 50.49, "elapsed_time": "1:44:23", "remaining_time": "1:42:22", "throughput": 19811.22, "total_tokens": 124093056} +{"current_steps": 39440, "total_steps": 78105, "loss": 0.2111, "lr": 2.8915606813692276e-06, "epoch": 2.524806350425709, "percentage": 50.5, "elapsed_time": "1:44:24", "remaining_time": "1:42:21", "throughput": 19811.51, "total_tokens": 124107392} +{"current_steps": 39445, "total_steps": 78105, "loss": 0.1535, "lr": 2.89100891399169e-06, "epoch": 2.5251264323666858, "percentage": 50.5, "elapsed_time": "1:44:25", "remaining_time": "1:42:20", "throughput": 19811.85, "total_tokens": 124122496} +{"current_steps": 39450, "total_steps": 78105, "loss": 0.1801, "lr": 2.8904571270892046e-06, "epoch": 2.5254465143076628, "percentage": 50.51, "elapsed_time": "1:44:25", "remaining_time": "1:42:19", "throughput": 19812.23, "total_tokens": 124138176} +{"current_steps": 39455, "total_steps": 78105, "loss": 0.2439, "lr": 2.889905320689324e-06, "epoch": 2.5257665962486398, "percentage": 50.52, "elapsed_time": "1:44:26", "remaining_time": "1:42:18", "throughput": 19812.53, "total_tokens": 124152640} +{"current_steps": 39460, "total_steps": 78105, "loss": 0.2297, "lr": 2.889353494819603e-06, "epoch": 2.5260866781896167, "percentage": 50.52, "elapsed_time": "1:44:27", "remaining_time": "1:42:17", "throughput": 19813.04, "total_tokens": 124170432} +{"current_steps": 39465, "total_steps": 78105, "loss": 0.2656, "lr": 2.888801649507596e-06, "epoch": 2.5264067601305933, "percentage": 50.53, "elapsed_time": "1:44:27", "remaining_time": "1:42:16", "throughput": 19813.36, "total_tokens": 124186048} +{"current_steps": 39470, "total_steps": 78105, "loss": 0.3109, "lr": 2.8882497847808605e-06, "epoch": 2.5267268420715703, "percentage": 50.53, "elapsed_time": "1:44:28", "remaining_time": "1:42:15", "throughput": 19813.72, "total_tokens": 124202048} +{"current_steps": 39475, "total_steps": 78105, "loss": 0.2268, "lr": 2.887697900666952e-06, "epoch": 2.5270469240125473, "percentage": 50.54, "elapsed_time": "1:44:29", "remaining_time": "1:42:14", "throughput": 19814.14, "total_tokens": 124218688} +{"current_steps": 39480, "total_steps": 78105, "loss": 0.2231, "lr": 2.8871459971934307e-06, "epoch": 2.5273670059535243, "percentage": 50.55, "elapsed_time": "1:44:29", "remaining_time": "1:42:14", "throughput": 19814.47, "total_tokens": 124234048} +{"current_steps": 39485, "total_steps": 78105, "loss": 0.1872, "lr": 2.8865940743878546e-06, "epoch": 2.527687087894501, "percentage": 50.55, "elapsed_time": "1:44:30", "remaining_time": "1:42:13", "throughput": 19814.81, "total_tokens": 124249280} +{"current_steps": 39490, "total_steps": 78105, "loss": 0.1659, "lr": 2.8860421322777844e-06, "epoch": 2.528007169835478, "percentage": 50.56, "elapsed_time": "1:44:31", "remaining_time": "1:42:12", "throughput": 19815.17, "total_tokens": 124264832} +{"current_steps": 39495, "total_steps": 78105, "loss": 0.2652, "lr": 2.885490170890781e-06, "epoch": 2.528327251776455, "percentage": 50.57, "elapsed_time": "1:44:31", "remaining_time": "1:42:11", "throughput": 19815.52, "total_tokens": 124280448} +{"current_steps": 39500, "total_steps": 78105, "loss": 0.2381, "lr": 2.884938190254405e-06, "epoch": 2.5286473337174318, "percentage": 50.57, "elapsed_time": "1:44:32", "remaining_time": "1:42:10", "throughput": 19815.95, "total_tokens": 124296896} +{"current_steps": 39505, "total_steps": 78105, "loss": 0.1968, "lr": 2.884386190396222e-06, "epoch": 2.5289674156584088, "percentage": 50.58, "elapsed_time": "1:44:33", "remaining_time": "1:42:09", "throughput": 19816.23, "total_tokens": 124311296} +{"current_steps": 39510, "total_steps": 78105, "loss": 0.2005, "lr": 2.8838341713437944e-06, "epoch": 2.5292874975993853, "percentage": 50.59, "elapsed_time": "1:44:33", "remaining_time": "1:42:08", "throughput": 19816.56, "total_tokens": 124326272} +{"current_steps": 39515, "total_steps": 78105, "loss": 0.1836, "lr": 2.8832821331246878e-06, "epoch": 2.5296075795403623, "percentage": 50.59, "elapsed_time": "1:44:34", "remaining_time": "1:42:07", "throughput": 19816.89, "total_tokens": 124341248} +{"current_steps": 39520, "total_steps": 78105, "loss": 0.1753, "lr": 2.8827300757664686e-06, "epoch": 2.5299276614813393, "percentage": 50.6, "elapsed_time": "1:44:35", "remaining_time": "1:42:06", "throughput": 19817.38, "total_tokens": 124359040} +{"current_steps": 39525, "total_steps": 78105, "loss": 0.3078, "lr": 2.8821779992967026e-06, "epoch": 2.530247743422316, "percentage": 50.6, "elapsed_time": "1:44:35", "remaining_time": "1:42:05", "throughput": 19817.75, "total_tokens": 124374720} +{"current_steps": 39530, "total_steps": 78105, "loss": 0.208, "lr": 2.8816259037429573e-06, "epoch": 2.530567825363293, "percentage": 50.61, "elapsed_time": "1:44:36", "remaining_time": "1:42:04", "throughput": 19818.13, "total_tokens": 124391040} +{"current_steps": 39535, "total_steps": 78105, "loss": 0.2192, "lr": 2.881073789132803e-06, "epoch": 2.53088790730427, "percentage": 50.62, "elapsed_time": "1:44:37", "remaining_time": "1:42:04", "throughput": 19818.57, "total_tokens": 124407680} +{"current_steps": 39540, "total_steps": 78105, "loss": 0.2628, "lr": 2.880521655493808e-06, "epoch": 2.531207989245247, "percentage": 50.62, "elapsed_time": "1:44:38", "remaining_time": "1:42:03", "throughput": 19818.94, "total_tokens": 124423744} +{"current_steps": 39545, "total_steps": 78105, "loss": 0.2605, "lr": 2.879969502853544e-06, "epoch": 2.531528071186224, "percentage": 50.63, "elapsed_time": "1:44:38", "remaining_time": "1:42:02", "throughput": 19819.26, "total_tokens": 124438720} +{"current_steps": 39550, "total_steps": 78105, "loss": 0.177, "lr": 2.879417331239582e-06, "epoch": 2.531848153127201, "percentage": 50.64, "elapsed_time": "1:44:39", "remaining_time": "1:42:01", "throughput": 19819.67, "total_tokens": 124455104} +{"current_steps": 39555, "total_steps": 78105, "loss": 0.2341, "lr": 2.8788651406794956e-06, "epoch": 2.5321682350681773, "percentage": 50.64, "elapsed_time": "1:44:40", "remaining_time": "1:42:00", "throughput": 19820.08, "total_tokens": 124471488} +{"current_steps": 39560, "total_steps": 78105, "loss": 0.1969, "lr": 2.878312931200856e-06, "epoch": 2.5324883170091543, "percentage": 50.65, "elapsed_time": "1:44:40", "remaining_time": "1:41:59", "throughput": 19820.43, "total_tokens": 124486976} +{"current_steps": 39565, "total_steps": 78105, "loss": 0.2184, "lr": 2.8777607028312406e-06, "epoch": 2.5328083989501313, "percentage": 50.66, "elapsed_time": "1:44:41", "remaining_time": "1:41:58", "throughput": 19820.86, "total_tokens": 124503616} +{"current_steps": 39570, "total_steps": 78105, "loss": 0.1877, "lr": 2.877208455598222e-06, "epoch": 2.533128480891108, "percentage": 50.66, "elapsed_time": "1:44:42", "remaining_time": "1:41:57", "throughput": 19821.26, "total_tokens": 124520000} +{"current_steps": 39575, "total_steps": 78105, "loss": 0.5476, "lr": 2.876656189529379e-06, "epoch": 2.533448562832085, "percentage": 50.67, "elapsed_time": "1:44:42", "remaining_time": "1:41:56", "throughput": 19821.57, "total_tokens": 124535296} +{"current_steps": 39580, "total_steps": 78105, "loss": 0.2581, "lr": 2.8761039046522874e-06, "epoch": 2.533768644773062, "percentage": 50.68, "elapsed_time": "1:44:43", "remaining_time": "1:41:56", "throughput": 19822.03, "total_tokens": 124552064} +{"current_steps": 39585, "total_steps": 78105, "loss": 0.1697, "lr": 2.8755516009945266e-06, "epoch": 2.534088726714039, "percentage": 50.68, "elapsed_time": "1:44:44", "remaining_time": "1:41:55", "throughput": 19822.37, "total_tokens": 124567552} +{"current_steps": 39590, "total_steps": 78105, "loss": 0.235, "lr": 2.874999278583674e-06, "epoch": 2.534408808655016, "percentage": 50.69, "elapsed_time": "1:44:44", "remaining_time": "1:41:54", "throughput": 19822.75, "total_tokens": 124583360} +{"current_steps": 39595, "total_steps": 78105, "loss": 0.2169, "lr": 2.874446937447311e-06, "epoch": 2.534728890595993, "percentage": 50.69, "elapsed_time": "1:44:45", "remaining_time": "1:41:53", "throughput": 19823.06, "total_tokens": 124598080} +{"current_steps": 39600, "total_steps": 78105, "loss": 0.2462, "lr": 2.8738945776130188e-06, "epoch": 2.5350489725369694, "percentage": 50.7, "elapsed_time": "1:44:46", "remaining_time": "1:41:52", "throughput": 19823.39, "total_tokens": 124612992} +{"current_steps": 39605, "total_steps": 78105, "loss": 0.2994, "lr": 2.8733421991083787e-06, "epoch": 2.5353690544779464, "percentage": 50.71, "elapsed_time": "1:44:46", "remaining_time": "1:41:51", "throughput": 19823.72, "total_tokens": 124628544} +{"current_steps": 39610, "total_steps": 78105, "loss": 0.186, "lr": 2.872789801960974e-06, "epoch": 2.5356891364189234, "percentage": 50.71, "elapsed_time": "1:44:47", "remaining_time": "1:41:50", "throughput": 19824.15, "total_tokens": 124645120} +{"current_steps": 39615, "total_steps": 78105, "loss": 0.2054, "lr": 2.8722373861983875e-06, "epoch": 2.5360092183599, "percentage": 50.72, "elapsed_time": "1:44:48", "remaining_time": "1:41:49", "throughput": 19824.53, "total_tokens": 124661056} +{"current_steps": 39620, "total_steps": 78105, "loss": 0.1903, "lr": 2.8716849518482058e-06, "epoch": 2.536329300300877, "percentage": 50.73, "elapsed_time": "1:44:48", "remaining_time": "1:41:48", "throughput": 19824.93, "total_tokens": 124677120} +{"current_steps": 39625, "total_steps": 78105, "loss": 0.1526, "lr": 2.8711324989380135e-06, "epoch": 2.536649382241854, "percentage": 50.73, "elapsed_time": "1:44:49", "remaining_time": "1:41:47", "throughput": 19825.26, "total_tokens": 124692224} +{"current_steps": 39630, "total_steps": 78105, "loss": 0.1917, "lr": 2.870580027495397e-06, "epoch": 2.536969464182831, "percentage": 50.74, "elapsed_time": "1:44:50", "remaining_time": "1:41:46", "throughput": 19825.59, "total_tokens": 124707904} +{"current_steps": 39635, "total_steps": 78105, "loss": 0.2173, "lr": 2.8700275375479447e-06, "epoch": 2.537289546123808, "percentage": 50.75, "elapsed_time": "1:44:50", "remaining_time": "1:41:45", "throughput": 19825.91, "total_tokens": 124722944} +{"current_steps": 39640, "total_steps": 78105, "loss": 0.1564, "lr": 2.8694750291232443e-06, "epoch": 2.537609628064785, "percentage": 50.75, "elapsed_time": "1:44:51", "remaining_time": "1:41:45", "throughput": 19826.24, "total_tokens": 124738368} +{"current_steps": 39645, "total_steps": 78105, "loss": 0.2893, "lr": 2.8689225022488852e-06, "epoch": 2.5379297100057614, "percentage": 50.76, "elapsed_time": "1:44:52", "remaining_time": "1:41:44", "throughput": 19826.57, "total_tokens": 124753792} +{"current_steps": 39650, "total_steps": 78105, "loss": 0.2568, "lr": 2.8683699569524587e-06, "epoch": 2.5382497919467384, "percentage": 50.76, "elapsed_time": "1:44:52", "remaining_time": "1:41:43", "throughput": 19827.01, "total_tokens": 124770304} +{"current_steps": 39655, "total_steps": 78105, "loss": 0.271, "lr": 2.867817393261555e-06, "epoch": 2.5385698738877154, "percentage": 50.77, "elapsed_time": "1:44:53", "remaining_time": "1:41:42", "throughput": 19827.52, "total_tokens": 124788480} +{"current_steps": 39660, "total_steps": 78105, "loss": 0.1103, "lr": 2.8672648112037665e-06, "epoch": 2.538889955828692, "percentage": 50.78, "elapsed_time": "1:44:54", "remaining_time": "1:41:41", "throughput": 19827.86, "total_tokens": 124803904} +{"current_steps": 39665, "total_steps": 78105, "loss": 0.2727, "lr": 2.8667122108066863e-06, "epoch": 2.539210037769669, "percentage": 50.78, "elapsed_time": "1:44:55", "remaining_time": "1:41:40", "throughput": 19828.21, "total_tokens": 124819392} +{"current_steps": 39670, "total_steps": 78105, "loss": 0.2652, "lr": 2.8661595920979087e-06, "epoch": 2.539530119710646, "percentage": 50.79, "elapsed_time": "1:44:55", "remaining_time": "1:41:39", "throughput": 19828.67, "total_tokens": 124836544} +{"current_steps": 39675, "total_steps": 78105, "loss": 0.3355, "lr": 2.8656069551050275e-06, "epoch": 2.539850201651623, "percentage": 50.8, "elapsed_time": "1:44:56", "remaining_time": "1:41:38", "throughput": 19828.99, "total_tokens": 124851648} +{"current_steps": 39680, "total_steps": 78105, "loss": 0.2043, "lr": 2.8650542998556403e-06, "epoch": 2.5401702835926, "percentage": 50.8, "elapsed_time": "1:44:57", "remaining_time": "1:41:37", "throughput": 19829.38, "total_tokens": 124867840} +{"current_steps": 39685, "total_steps": 78105, "loss": 0.2482, "lr": 2.8645016263773418e-06, "epoch": 2.5404903655335764, "percentage": 50.81, "elapsed_time": "1:44:57", "remaining_time": "1:41:37", "throughput": 19829.86, "total_tokens": 124884928} +{"current_steps": 39690, "total_steps": 78105, "loss": 0.3038, "lr": 2.8639489346977316e-06, "epoch": 2.5408104474745534, "percentage": 50.82, "elapsed_time": "1:44:58", "remaining_time": "1:41:36", "throughput": 19830.24, "total_tokens": 124900992} +{"current_steps": 39695, "total_steps": 78105, "loss": 0.2139, "lr": 2.8633962248444074e-06, "epoch": 2.5411305294155304, "percentage": 50.82, "elapsed_time": "1:44:59", "remaining_time": "1:41:35", "throughput": 19830.65, "total_tokens": 124917248} +{"current_steps": 39700, "total_steps": 78105, "loss": 0.2057, "lr": 2.862843496844968e-06, "epoch": 2.5414506113565074, "percentage": 50.83, "elapsed_time": "1:44:59", "remaining_time": "1:41:34", "throughput": 19830.94, "total_tokens": 124931648} +{"current_steps": 39705, "total_steps": 78105, "loss": 0.2156, "lr": 2.862290750727015e-06, "epoch": 2.541770693297484, "percentage": 50.84, "elapsed_time": "1:45:00", "remaining_time": "1:41:33", "throughput": 19831.23, "total_tokens": 124946048} +{"current_steps": 39710, "total_steps": 78105, "loss": 0.3019, "lr": 2.8617379865181483e-06, "epoch": 2.542090775238461, "percentage": 50.84, "elapsed_time": "1:45:01", "remaining_time": "1:41:32", "throughput": 19831.59, "total_tokens": 124961600} +{"current_steps": 39715, "total_steps": 78105, "loss": 0.2215, "lr": 2.8611852042459715e-06, "epoch": 2.542410857179438, "percentage": 50.85, "elapsed_time": "1:45:01", "remaining_time": "1:41:31", "throughput": 19832.1, "total_tokens": 124979648} +{"current_steps": 39720, "total_steps": 78105, "loss": 0.3225, "lr": 2.8606324039380865e-06, "epoch": 2.542730939120415, "percentage": 50.85, "elapsed_time": "1:45:02", "remaining_time": "1:41:30", "throughput": 19832.46, "total_tokens": 124995008} +{"current_steps": 39725, "total_steps": 78105, "loss": 0.3222, "lr": 2.860079585622098e-06, "epoch": 2.543051021061392, "percentage": 50.86, "elapsed_time": "1:45:03", "remaining_time": "1:41:29", "throughput": 19832.82, "total_tokens": 125010560} +{"current_steps": 39730, "total_steps": 78105, "loss": 0.3629, "lr": 2.8595267493256103e-06, "epoch": 2.5433711030023685, "percentage": 50.87, "elapsed_time": "1:45:03", "remaining_time": "1:41:28", "throughput": 19833.18, "total_tokens": 125026112} +{"current_steps": 39735, "total_steps": 78105, "loss": 0.1813, "lr": 2.858973895076229e-06, "epoch": 2.5436911849433455, "percentage": 50.87, "elapsed_time": "1:45:04", "remaining_time": "1:41:28", "throughput": 19833.56, "total_tokens": 125042304} +{"current_steps": 39740, "total_steps": 78105, "loss": 0.2903, "lr": 2.8584210229015622e-06, "epoch": 2.5440112668843224, "percentage": 50.88, "elapsed_time": "1:45:05", "remaining_time": "1:41:27", "throughput": 19833.94, "total_tokens": 125058368} +{"current_steps": 39745, "total_steps": 78105, "loss": 0.3299, "lr": 2.857868132829216e-06, "epoch": 2.5443313488252994, "percentage": 50.89, "elapsed_time": "1:45:05", "remaining_time": "1:41:26", "throughput": 19834.32, "total_tokens": 125074496} +{"current_steps": 39750, "total_steps": 78105, "loss": 0.1938, "lr": 2.857315224886799e-06, "epoch": 2.544651430766276, "percentage": 50.89, "elapsed_time": "1:45:06", "remaining_time": "1:41:25", "throughput": 19834.66, "total_tokens": 125089792} +{"current_steps": 39755, "total_steps": 78105, "loss": 0.2714, "lr": 2.856762299101921e-06, "epoch": 2.544971512707253, "percentage": 50.9, "elapsed_time": "1:45:07", "remaining_time": "1:41:24", "throughput": 19834.97, "total_tokens": 125104704} +{"current_steps": 39760, "total_steps": 78105, "loss": 0.2626, "lr": 2.856209355502192e-06, "epoch": 2.54529159464823, "percentage": 50.91, "elapsed_time": "1:45:07", "remaining_time": "1:41:23", "throughput": 19835.38, "total_tokens": 125120896} +{"current_steps": 39765, "total_steps": 78105, "loss": 0.2318, "lr": 2.8556563941152236e-06, "epoch": 2.545611676589207, "percentage": 50.91, "elapsed_time": "1:45:08", "remaining_time": "1:41:22", "throughput": 19835.7, "total_tokens": 125136192} +{"current_steps": 39770, "total_steps": 78105, "loss": 0.2387, "lr": 2.8551034149686274e-06, "epoch": 2.545931758530184, "percentage": 50.92, "elapsed_time": "1:45:09", "remaining_time": "1:41:21", "throughput": 19835.99, "total_tokens": 125150592} +{"current_steps": 39775, "total_steps": 78105, "loss": 0.191, "lr": 2.8545504180900164e-06, "epoch": 2.5462518404711605, "percentage": 50.93, "elapsed_time": "1:45:09", "remaining_time": "1:41:20", "throughput": 19836.4, "total_tokens": 125167360} +{"current_steps": 39780, "total_steps": 78105, "loss": 0.2008, "lr": 2.8539974035070044e-06, "epoch": 2.5465719224121375, "percentage": 50.93, "elapsed_time": "1:45:10", "remaining_time": "1:41:19", "throughput": 19836.82, "total_tokens": 125184128} +{"current_steps": 39785, "total_steps": 78105, "loss": 0.2601, "lr": 2.853444371247206e-06, "epoch": 2.5468920043531145, "percentage": 50.94, "elapsed_time": "1:45:11", "remaining_time": "1:41:19", "throughput": 19837.35, "total_tokens": 125202560} +{"current_steps": 39790, "total_steps": 78105, "loss": 0.243, "lr": 2.852891321338236e-06, "epoch": 2.547212086294091, "percentage": 50.94, "elapsed_time": "1:45:12", "remaining_time": "1:41:18", "throughput": 19837.7, "total_tokens": 125218176} +{"current_steps": 39795, "total_steps": 78105, "loss": 0.2192, "lr": 2.852338253807712e-06, "epoch": 2.547532168235068, "percentage": 50.95, "elapsed_time": "1:45:12", "remaining_time": "1:41:17", "throughput": 19838.1, "total_tokens": 125234496} +{"current_steps": 39800, "total_steps": 78105, "loss": 0.2298, "lr": 2.8517851686832506e-06, "epoch": 2.547852250176045, "percentage": 50.96, "elapsed_time": "1:45:13", "remaining_time": "1:41:16", "throughput": 19838.43, "total_tokens": 125249664} +{"current_steps": 39805, "total_steps": 78105, "loss": 0.327, "lr": 2.8512320659924703e-06, "epoch": 2.548172332117022, "percentage": 50.96, "elapsed_time": "1:45:14", "remaining_time": "1:41:15", "throughput": 19838.92, "total_tokens": 125267008} +{"current_steps": 39810, "total_steps": 78105, "loss": 0.235, "lr": 2.8506789457629906e-06, "epoch": 2.548492414057999, "percentage": 50.97, "elapsed_time": "1:45:14", "remaining_time": "1:41:14", "throughput": 19839.26, "total_tokens": 125282496} +{"current_steps": 39815, "total_steps": 78105, "loss": 0.1675, "lr": 2.8501258080224308e-06, "epoch": 2.548812495998976, "percentage": 50.98, "elapsed_time": "1:45:15", "remaining_time": "1:41:13", "throughput": 19839.56, "total_tokens": 125297408} +{"current_steps": 39820, "total_steps": 78105, "loss": 0.1942, "lr": 2.8495726527984112e-06, "epoch": 2.5491325779399525, "percentage": 50.98, "elapsed_time": "1:45:16", "remaining_time": "1:41:12", "throughput": 19839.92, "total_tokens": 125313088} +{"current_steps": 39825, "total_steps": 78105, "loss": 0.2023, "lr": 2.849019480118555e-06, "epoch": 2.5494526598809295, "percentage": 50.99, "elapsed_time": "1:45:16", "remaining_time": "1:41:11", "throughput": 19840.27, "total_tokens": 125328448} +{"current_steps": 39830, "total_steps": 78105, "loss": 0.3413, "lr": 2.8484662900104835e-06, "epoch": 2.5497727418219065, "percentage": 51.0, "elapsed_time": "1:45:17", "remaining_time": "1:41:10", "throughput": 19840.63, "total_tokens": 125343744} +{"current_steps": 39835, "total_steps": 78105, "loss": 0.28, "lr": 2.8479130825018214e-06, "epoch": 2.550092823762883, "percentage": 51.0, "elapsed_time": "1:45:18", "remaining_time": "1:41:09", "throughput": 19841.02, "total_tokens": 125359936} +{"current_steps": 39840, "total_steps": 78105, "loss": 0.2827, "lr": 2.8473598576201916e-06, "epoch": 2.55041290570386, "percentage": 51.01, "elapsed_time": "1:45:18", "remaining_time": "1:41:09", "throughput": 19841.42, "total_tokens": 125376128} +{"current_steps": 39845, "total_steps": 78105, "loss": 0.287, "lr": 2.8468066153932193e-06, "epoch": 2.550732987644837, "percentage": 51.01, "elapsed_time": "1:45:19", "remaining_time": "1:41:08", "throughput": 19841.77, "total_tokens": 125392000} +{"current_steps": 39850, "total_steps": 78105, "loss": 0.1688, "lr": 2.846253355848532e-06, "epoch": 2.551053069585814, "percentage": 51.02, "elapsed_time": "1:45:20", "remaining_time": "1:41:07", "throughput": 19842.17, "total_tokens": 125408512} +{"current_steps": 39855, "total_steps": 78105, "loss": 0.2146, "lr": 2.845700079013755e-06, "epoch": 2.551373151526791, "percentage": 51.03, "elapsed_time": "1:45:21", "remaining_time": "1:41:06", "throughput": 19842.61, "total_tokens": 125425408} +{"current_steps": 39860, "total_steps": 78105, "loss": 0.1933, "lr": 2.8451467849165167e-06, "epoch": 2.551693233467768, "percentage": 51.03, "elapsed_time": "1:45:21", "remaining_time": "1:41:05", "throughput": 19842.92, "total_tokens": 125440704} +{"current_steps": 39865, "total_steps": 78105, "loss": 0.1679, "lr": 2.844593473584446e-06, "epoch": 2.5520133154087445, "percentage": 51.04, "elapsed_time": "1:45:22", "remaining_time": "1:41:04", "throughput": 19843.28, "total_tokens": 125456448} +{"current_steps": 39870, "total_steps": 78105, "loss": 0.2809, "lr": 2.8440401450451727e-06, "epoch": 2.5523333973497215, "percentage": 51.05, "elapsed_time": "1:45:23", "remaining_time": "1:41:03", "throughput": 19843.6, "total_tokens": 125471552} +{"current_steps": 39875, "total_steps": 78105, "loss": 0.2265, "lr": 2.8434867993263254e-06, "epoch": 2.5526534792906985, "percentage": 51.05, "elapsed_time": "1:45:23", "remaining_time": "1:41:02", "throughput": 19843.9, "total_tokens": 125486400} +{"current_steps": 39880, "total_steps": 78105, "loss": 0.2882, "lr": 2.8429334364555374e-06, "epoch": 2.552973561231675, "percentage": 51.06, "elapsed_time": "1:45:24", "remaining_time": "1:41:01", "throughput": 19844.25, "total_tokens": 125501760} +{"current_steps": 39885, "total_steps": 78105, "loss": 0.1316, "lr": 2.8423800564604397e-06, "epoch": 2.553293643172652, "percentage": 51.07, "elapsed_time": "1:45:25", "remaining_time": "1:41:00", "throughput": 19844.68, "total_tokens": 125518336} +{"current_steps": 39890, "total_steps": 78105, "loss": 0.2625, "lr": 2.8418266593686655e-06, "epoch": 2.553613725113629, "percentage": 51.07, "elapsed_time": "1:45:25", "remaining_time": "1:41:00", "throughput": 19844.98, "total_tokens": 125533312} +{"current_steps": 39895, "total_steps": 78105, "loss": 0.2524, "lr": 2.8412732452078484e-06, "epoch": 2.553933807054606, "percentage": 51.08, "elapsed_time": "1:45:26", "remaining_time": "1:40:59", "throughput": 19845.39, "total_tokens": 125549632} +{"current_steps": 39900, "total_steps": 78105, "loss": 0.171, "lr": 2.840719814005623e-06, "epoch": 2.554253888995583, "percentage": 51.09, "elapsed_time": "1:45:27", "remaining_time": "1:40:58", "throughput": 19845.67, "total_tokens": 125564224} +{"current_steps": 39905, "total_steps": 78105, "loss": 0.2254, "lr": 2.8401663657896247e-06, "epoch": 2.55457397093656, "percentage": 51.09, "elapsed_time": "1:45:27", "remaining_time": "1:40:57", "throughput": 19846.11, "total_tokens": 125580928} +{"current_steps": 39910, "total_steps": 78105, "loss": 0.3978, "lr": 2.83961290058749e-06, "epoch": 2.5548940528775366, "percentage": 51.1, "elapsed_time": "1:45:28", "remaining_time": "1:40:56", "throughput": 19846.68, "total_tokens": 125600128} +{"current_steps": 39915, "total_steps": 78105, "loss": 0.3144, "lr": 2.839059418426856e-06, "epoch": 2.5552141348185136, "percentage": 51.1, "elapsed_time": "1:45:29", "remaining_time": "1:40:55", "throughput": 19847.02, "total_tokens": 125615744} +{"current_steps": 39920, "total_steps": 78105, "loss": 0.2625, "lr": 2.8385059193353602e-06, "epoch": 2.5555342167594906, "percentage": 51.11, "elapsed_time": "1:45:29", "remaining_time": "1:40:54", "throughput": 19847.35, "total_tokens": 125631040} +{"current_steps": 39925, "total_steps": 78105, "loss": 0.2009, "lr": 2.837952403340643e-06, "epoch": 2.555854298700467, "percentage": 51.12, "elapsed_time": "1:45:30", "remaining_time": "1:40:53", "throughput": 19847.82, "total_tokens": 125648448} +{"current_steps": 39930, "total_steps": 78105, "loss": 0.1901, "lr": 2.8373988704703426e-06, "epoch": 2.556174380641444, "percentage": 51.12, "elapsed_time": "1:45:31", "remaining_time": "1:40:53", "throughput": 19848.16, "total_tokens": 125664128} +{"current_steps": 39935, "total_steps": 78105, "loss": 0.2775, "lr": 2.8368453207520998e-06, "epoch": 2.556494462582421, "percentage": 51.13, "elapsed_time": "1:45:31", "remaining_time": "1:40:52", "throughput": 19848.5, "total_tokens": 125679360} +{"current_steps": 39940, "total_steps": 78105, "loss": 0.2407, "lr": 2.836291754213557e-06, "epoch": 2.556814544523398, "percentage": 51.14, "elapsed_time": "1:45:32", "remaining_time": "1:40:51", "throughput": 19848.8, "total_tokens": 125694144} +{"current_steps": 39945, "total_steps": 78105, "loss": 0.1919, "lr": 2.835738170882355e-06, "epoch": 2.557134626464375, "percentage": 51.14, "elapsed_time": "1:45:33", "remaining_time": "1:40:50", "throughput": 19849.18, "total_tokens": 125710080} +{"current_steps": 39950, "total_steps": 78105, "loss": 0.1835, "lr": 2.835184570786138e-06, "epoch": 2.5574547084053516, "percentage": 51.15, "elapsed_time": "1:45:33", "remaining_time": "1:40:49", "throughput": 19849.48, "total_tokens": 125724544} +{"current_steps": 39955, "total_steps": 78105, "loss": 0.1916, "lr": 2.8346309539525494e-06, "epoch": 2.5577747903463286, "percentage": 51.16, "elapsed_time": "1:45:34", "remaining_time": "1:40:48", "throughput": 19849.84, "total_tokens": 125740224} +{"current_steps": 39960, "total_steps": 78105, "loss": 0.2222, "lr": 2.834077320409234e-06, "epoch": 2.5580948722873056, "percentage": 51.16, "elapsed_time": "1:45:35", "remaining_time": "1:40:47", "throughput": 19849.46, "total_tokens": 125756672} +{"current_steps": 39965, "total_steps": 78105, "loss": 0.1859, "lr": 2.833523670183837e-06, "epoch": 2.5584149542282826, "percentage": 51.17, "elapsed_time": "1:45:36", "remaining_time": "1:40:46", "throughput": 19849.83, "total_tokens": 125772480} +{"current_steps": 39970, "total_steps": 78105, "loss": 0.239, "lr": 2.832970003304005e-06, "epoch": 2.558735036169259, "percentage": 51.17, "elapsed_time": "1:45:36", "remaining_time": "1:40:45", "throughput": 19850.18, "total_tokens": 125788096} +{"current_steps": 39975, "total_steps": 78105, "loss": 0.2278, "lr": 2.832416319797386e-06, "epoch": 2.559055118110236, "percentage": 51.18, "elapsed_time": "1:45:37", "remaining_time": "1:40:45", "throughput": 19850.49, "total_tokens": 125802944} +{"current_steps": 39980, "total_steps": 78105, "loss": 0.1894, "lr": 2.831862619691627e-06, "epoch": 2.559375200051213, "percentage": 51.19, "elapsed_time": "1:45:38", "remaining_time": "1:40:44", "throughput": 19850.84, "total_tokens": 125818432} +{"current_steps": 39985, "total_steps": 78105, "loss": 0.2023, "lr": 2.831308903014378e-06, "epoch": 2.55969528199219, "percentage": 51.19, "elapsed_time": "1:45:38", "remaining_time": "1:40:43", "throughput": 19851.26, "total_tokens": 125835264} +{"current_steps": 39990, "total_steps": 78105, "loss": 0.3285, "lr": 2.8307551697932874e-06, "epoch": 2.560015363933167, "percentage": 51.2, "elapsed_time": "1:45:39", "remaining_time": "1:40:42", "throughput": 19851.67, "total_tokens": 125851712} +{"current_steps": 39995, "total_steps": 78105, "loss": 0.2648, "lr": 2.8302014200560068e-06, "epoch": 2.5603354458741436, "percentage": 51.21, "elapsed_time": "1:45:40", "remaining_time": "1:40:41", "throughput": 19852.26, "total_tokens": 125871104} +{"current_steps": 40000, "total_steps": 78105, "loss": 0.2592, "lr": 2.8296476538301875e-06, "epoch": 2.5606555278151206, "percentage": 51.21, "elapsed_time": "1:45:41", "remaining_time": "1:40:40", "throughput": 19852.63, "total_tokens": 125886976} +{"current_steps": 40005, "total_steps": 78105, "loss": 0.1919, "lr": 2.829093871143481e-06, "epoch": 2.5609756097560976, "percentage": 51.22, "elapsed_time": "1:45:41", "remaining_time": "1:40:39", "throughput": 19852.92, "total_tokens": 125901248} +{"current_steps": 40010, "total_steps": 78105, "loss": 0.2342, "lr": 2.8285400720235414e-06, "epoch": 2.5612956916970746, "percentage": 51.23, "elapsed_time": "1:45:42", "remaining_time": "1:40:38", "throughput": 19853.2, "total_tokens": 125915584} +{"current_steps": 40015, "total_steps": 78105, "loss": 0.2611, "lr": 2.8279862564980215e-06, "epoch": 2.561615773638051, "percentage": 51.23, "elapsed_time": "1:45:43", "remaining_time": "1:40:37", "throughput": 19853.55, "total_tokens": 125931328} +{"current_steps": 40020, "total_steps": 78105, "loss": 0.1823, "lr": 2.827432424594576e-06, "epoch": 2.561935855579028, "percentage": 51.24, "elapsed_time": "1:45:43", "remaining_time": "1:40:36", "throughput": 19853.93, "total_tokens": 125947392} +{"current_steps": 40025, "total_steps": 78105, "loss": 0.1831, "lr": 2.826878576340861e-06, "epoch": 2.562255937520005, "percentage": 51.25, "elapsed_time": "1:45:44", "remaining_time": "1:40:36", "throughput": 19854.3, "total_tokens": 125963264} +{"current_steps": 40030, "total_steps": 78105, "loss": 0.2589, "lr": 2.826324711764533e-06, "epoch": 2.562576019460982, "percentage": 51.25, "elapsed_time": "1:45:45", "remaining_time": "1:40:35", "throughput": 19854.65, "total_tokens": 125978944} +{"current_steps": 40035, "total_steps": 78105, "loss": 0.2792, "lr": 2.825770830893248e-06, "epoch": 2.562896101401959, "percentage": 51.26, "elapsed_time": "1:45:45", "remaining_time": "1:40:34", "throughput": 19855.01, "total_tokens": 125994880} +{"current_steps": 40040, "total_steps": 78105, "loss": 0.1957, "lr": 2.825216933754665e-06, "epoch": 2.5632161833429357, "percentage": 51.26, "elapsed_time": "1:45:46", "remaining_time": "1:40:33", "throughput": 19855.34, "total_tokens": 126010368} +{"current_steps": 40045, "total_steps": 78105, "loss": 0.1953, "lr": 2.8246630203764426e-06, "epoch": 2.5635362652839127, "percentage": 51.27, "elapsed_time": "1:45:47", "remaining_time": "1:40:32", "throughput": 19855.91, "total_tokens": 126029504} +{"current_steps": 40050, "total_steps": 78105, "loss": 0.1655, "lr": 2.8241090907862388e-06, "epoch": 2.5638563472248896, "percentage": 51.28, "elapsed_time": "1:45:47", "remaining_time": "1:40:31", "throughput": 19856.29, "total_tokens": 126045696} +{"current_steps": 40055, "total_steps": 78105, "loss": 0.24, "lr": 2.823555145011716e-06, "epoch": 2.564176429165866, "percentage": 51.28, "elapsed_time": "1:45:48", "remaining_time": "1:40:30", "throughput": 19856.69, "total_tokens": 126061888} +{"current_steps": 40060, "total_steps": 78105, "loss": 0.194, "lr": 2.8230011830805347e-06, "epoch": 2.564496511106843, "percentage": 51.29, "elapsed_time": "1:45:49", "remaining_time": "1:40:29", "throughput": 19857.01, "total_tokens": 126077184} +{"current_steps": 40065, "total_steps": 78105, "loss": 0.2946, "lr": 2.822447205020357e-06, "epoch": 2.56481659304782, "percentage": 51.3, "elapsed_time": "1:45:49", "remaining_time": "1:40:28", "throughput": 19857.33, "total_tokens": 126092352} +{"current_steps": 40070, "total_steps": 78105, "loss": 0.2528, "lr": 2.821893210858846e-06, "epoch": 2.565136674988797, "percentage": 51.3, "elapsed_time": "1:45:50", "remaining_time": "1:40:28", "throughput": 19857.66, "total_tokens": 126107456} +{"current_steps": 40075, "total_steps": 78105, "loss": 0.2257, "lr": 2.8213392006236635e-06, "epoch": 2.565456756929774, "percentage": 51.31, "elapsed_time": "1:45:51", "remaining_time": "1:40:27", "throughput": 19857.99, "total_tokens": 126122688} +{"current_steps": 40080, "total_steps": 78105, "loss": 0.2635, "lr": 2.8207851743424753e-06, "epoch": 2.565776838870751, "percentage": 51.32, "elapsed_time": "1:45:51", "remaining_time": "1:40:26", "throughput": 19858.29, "total_tokens": 126137472} +{"current_steps": 40085, "total_steps": 78105, "loss": 0.2849, "lr": 2.820231132042947e-06, "epoch": 2.5660969208117277, "percentage": 51.32, "elapsed_time": "1:45:52", "remaining_time": "1:40:25", "throughput": 19858.67, "total_tokens": 126153280} +{"current_steps": 40090, "total_steps": 78105, "loss": 0.1777, "lr": 2.8196770737527434e-06, "epoch": 2.5664170027527047, "percentage": 51.33, "elapsed_time": "1:45:53", "remaining_time": "1:40:24", "throughput": 19859.04, "total_tokens": 126169152} +{"current_steps": 40095, "total_steps": 78105, "loss": 0.2273, "lr": 2.8191229994995317e-06, "epoch": 2.5667370846936817, "percentage": 51.33, "elapsed_time": "1:45:53", "remaining_time": "1:40:23", "throughput": 19859.45, "total_tokens": 126185600} +{"current_steps": 40100, "total_steps": 78105, "loss": 0.2582, "lr": 2.8185689093109803e-06, "epoch": 2.567057166634658, "percentage": 51.34, "elapsed_time": "1:45:54", "remaining_time": "1:40:22", "throughput": 19859.88, "total_tokens": 126202368} +{"current_steps": 40105, "total_steps": 78105, "loss": 0.2239, "lr": 2.8180148032147563e-06, "epoch": 2.567377248575635, "percentage": 51.35, "elapsed_time": "1:45:55", "remaining_time": "1:40:21", "throughput": 19860.23, "total_tokens": 126217856} +{"current_steps": 40110, "total_steps": 78105, "loss": 0.2796, "lr": 2.81746068123853e-06, "epoch": 2.567697330516612, "percentage": 51.35, "elapsed_time": "1:45:55", "remaining_time": "1:40:20", "throughput": 19860.55, "total_tokens": 126232768} +{"current_steps": 40115, "total_steps": 78105, "loss": 0.2338, "lr": 2.8169065434099714e-06, "epoch": 2.568017412457589, "percentage": 51.36, "elapsed_time": "1:45:56", "remaining_time": "1:40:19", "throughput": 19860.9, "total_tokens": 126248320} +{"current_steps": 40120, "total_steps": 78105, "loss": 0.3427, "lr": 2.81635238975675e-06, "epoch": 2.568337494398566, "percentage": 51.37, "elapsed_time": "1:45:57", "remaining_time": "1:40:18", "throughput": 19861.25, "total_tokens": 126263872} +{"current_steps": 40125, "total_steps": 78105, "loss": 0.1515, "lr": 2.815798220306538e-06, "epoch": 2.568657576339543, "percentage": 51.37, "elapsed_time": "1:45:57", "remaining_time": "1:40:18", "throughput": 19861.57, "total_tokens": 126278720} +{"current_steps": 40130, "total_steps": 78105, "loss": 0.2018, "lr": 2.815244035087008e-06, "epoch": 2.5689776582805197, "percentage": 51.38, "elapsed_time": "1:45:58", "remaining_time": "1:40:17", "throughput": 19861.83, "total_tokens": 126293248} +{"current_steps": 40135, "total_steps": 78105, "loss": 0.2389, "lr": 2.8146898341258328e-06, "epoch": 2.5692977402214967, "percentage": 51.39, "elapsed_time": "1:45:59", "remaining_time": "1:40:16", "throughput": 19862.2, "total_tokens": 126308992} +{"current_steps": 40140, "total_steps": 78105, "loss": 0.2551, "lr": 2.8141356174506865e-06, "epoch": 2.5696178221624737, "percentage": 51.39, "elapsed_time": "1:45:59", "remaining_time": "1:40:15", "throughput": 19862.56, "total_tokens": 126324480} +{"current_steps": 40145, "total_steps": 78105, "loss": 0.2702, "lr": 2.813581385089244e-06, "epoch": 2.5699379041034502, "percentage": 51.4, "elapsed_time": "1:46:00", "remaining_time": "1:40:14", "throughput": 19862.88, "total_tokens": 126339392} +{"current_steps": 40150, "total_steps": 78105, "loss": 0.1983, "lr": 2.8130271370691802e-06, "epoch": 2.5702579860444272, "percentage": 51.41, "elapsed_time": "1:46:01", "remaining_time": "1:40:13", "throughput": 19863.2, "total_tokens": 126354304} +{"current_steps": 40155, "total_steps": 78105, "loss": 0.2871, "lr": 2.8124728734181716e-06, "epoch": 2.5705780679854042, "percentage": 51.41, "elapsed_time": "1:46:01", "remaining_time": "1:40:12", "throughput": 19863.56, "total_tokens": 126370240} +{"current_steps": 40160, "total_steps": 78105, "loss": 0.3206, "lr": 2.811918594163896e-06, "epoch": 2.570898149926381, "percentage": 51.42, "elapsed_time": "1:46:02", "remaining_time": "1:40:11", "throughput": 19863.95, "total_tokens": 126386560} +{"current_steps": 40165, "total_steps": 78105, "loss": 0.1632, "lr": 2.8113642993340296e-06, "epoch": 2.571218231867358, "percentage": 51.42, "elapsed_time": "1:46:03", "remaining_time": "1:40:10", "throughput": 19864.37, "total_tokens": 126403136} +{"current_steps": 40170, "total_steps": 78105, "loss": 0.2099, "lr": 2.8108099889562524e-06, "epoch": 2.571538313808335, "percentage": 51.43, "elapsed_time": "1:46:03", "remaining_time": "1:40:09", "throughput": 19864.68, "total_tokens": 126417856} +{"current_steps": 40175, "total_steps": 78105, "loss": 0.2497, "lr": 2.8102556630582435e-06, "epoch": 2.5718583957493117, "percentage": 51.44, "elapsed_time": "1:46:04", "remaining_time": "1:40:08", "throughput": 19865.03, "total_tokens": 126433408} +{"current_steps": 40180, "total_steps": 78105, "loss": 0.2417, "lr": 2.8097013216676826e-06, "epoch": 2.5721784776902887, "percentage": 51.44, "elapsed_time": "1:46:05", "remaining_time": "1:40:08", "throughput": 19865.32, "total_tokens": 126448064} +{"current_steps": 40185, "total_steps": 78105, "loss": 0.1375, "lr": 2.809146964812251e-06, "epoch": 2.5724985596312657, "percentage": 51.45, "elapsed_time": "1:46:05", "remaining_time": "1:40:07", "throughput": 19865.61, "total_tokens": 126462784} +{"current_steps": 40190, "total_steps": 78105, "loss": 0.2106, "lr": 2.80859259251963e-06, "epoch": 2.5728186415722423, "percentage": 51.46, "elapsed_time": "1:46:06", "remaining_time": "1:40:06", "throughput": 19865.96, "total_tokens": 126478336} +{"current_steps": 40195, "total_steps": 78105, "loss": 0.1834, "lr": 2.8080382048175026e-06, "epoch": 2.5731387235132193, "percentage": 51.46, "elapsed_time": "1:46:07", "remaining_time": "1:40:05", "throughput": 19866.27, "total_tokens": 126493184} +{"current_steps": 40200, "total_steps": 78105, "loss": 0.2348, "lr": 2.8074838017335515e-06, "epoch": 2.5734588054541963, "percentage": 51.47, "elapsed_time": "1:46:07", "remaining_time": "1:40:04", "throughput": 19866.7, "total_tokens": 126509824} +{"current_steps": 40205, "total_steps": 78105, "loss": 0.2725, "lr": 2.8069293832954613e-06, "epoch": 2.5737788873951732, "percentage": 51.48, "elapsed_time": "1:46:08", "remaining_time": "1:40:03", "throughput": 19867.1, "total_tokens": 126526080} +{"current_steps": 40210, "total_steps": 78105, "loss": 0.2202, "lr": 2.806374949530916e-06, "epoch": 2.5740989693361502, "percentage": 51.48, "elapsed_time": "1:46:09", "remaining_time": "1:40:02", "throughput": 19867.45, "total_tokens": 126541568} +{"current_steps": 40215, "total_steps": 78105, "loss": 0.2271, "lr": 2.805820500467602e-06, "epoch": 2.574419051277127, "percentage": 51.49, "elapsed_time": "1:46:09", "remaining_time": "1:40:01", "throughput": 19867.78, "total_tokens": 126556864} +{"current_steps": 40220, "total_steps": 78105, "loss": 0.2008, "lr": 2.8052660361332045e-06, "epoch": 2.5747391332181038, "percentage": 51.49, "elapsed_time": "1:46:10", "remaining_time": "1:40:00", "throughput": 19868.19, "total_tokens": 126573376} +{"current_steps": 40225, "total_steps": 78105, "loss": 0.1942, "lr": 2.804711556555412e-06, "epoch": 2.5750592151590808, "percentage": 51.5, "elapsed_time": "1:46:11", "remaining_time": "1:39:59", "throughput": 19868.69, "total_tokens": 126591168} +{"current_steps": 40230, "total_steps": 78105, "loss": 0.2544, "lr": 2.8041570617619114e-06, "epoch": 2.5753792971000578, "percentage": 51.51, "elapsed_time": "1:46:12", "remaining_time": "1:39:59", "throughput": 19869.04, "total_tokens": 126606784} +{"current_steps": 40235, "total_steps": 78105, "loss": 0.1715, "lr": 2.8036025517803915e-06, "epoch": 2.5756993790410343, "percentage": 51.51, "elapsed_time": "1:46:12", "remaining_time": "1:39:58", "throughput": 19869.46, "total_tokens": 126622976} +{"current_steps": 40240, "total_steps": 78105, "loss": 0.2383, "lr": 2.8030480266385413e-06, "epoch": 2.5760194609820113, "percentage": 51.52, "elapsed_time": "1:46:13", "remaining_time": "1:39:57", "throughput": 19869.82, "total_tokens": 126638720} +{"current_steps": 40245, "total_steps": 78105, "loss": 0.1301, "lr": 2.8024934863640507e-06, "epoch": 2.5763395429229883, "percentage": 51.53, "elapsed_time": "1:46:14", "remaining_time": "1:39:56", "throughput": 19870.17, "total_tokens": 126654144} +{"current_steps": 40250, "total_steps": 78105, "loss": 0.2135, "lr": 2.8019389309846116e-06, "epoch": 2.5766596248639653, "percentage": 51.53, "elapsed_time": "1:46:14", "remaining_time": "1:39:55", "throughput": 19870.49, "total_tokens": 126669056} +{"current_steps": 40255, "total_steps": 78105, "loss": 0.2559, "lr": 2.801384360527916e-06, "epoch": 2.5769797068049423, "percentage": 51.54, "elapsed_time": "1:46:15", "remaining_time": "1:39:54", "throughput": 19870.78, "total_tokens": 126683904} +{"current_steps": 40260, "total_steps": 78105, "loss": 0.1696, "lr": 2.800829775021654e-06, "epoch": 2.577299788745919, "percentage": 51.55, "elapsed_time": "1:46:16", "remaining_time": "1:39:53", "throughput": 19871.14, "total_tokens": 126699648} +{"current_steps": 40265, "total_steps": 78105, "loss": 0.2481, "lr": 2.8002751744935204e-06, "epoch": 2.577619870686896, "percentage": 51.55, "elapsed_time": "1:46:16", "remaining_time": "1:39:52", "throughput": 19871.47, "total_tokens": 126715072} +{"current_steps": 40270, "total_steps": 78105, "loss": 0.1637, "lr": 2.799720558971208e-06, "epoch": 2.577939952627873, "percentage": 51.56, "elapsed_time": "1:46:17", "remaining_time": "1:39:51", "throughput": 19871.85, "total_tokens": 126731072} +{"current_steps": 40275, "total_steps": 78105, "loss": 0.332, "lr": 2.799165928482413e-06, "epoch": 2.57826003456885, "percentage": 51.57, "elapsed_time": "1:46:18", "remaining_time": "1:39:50", "throughput": 19872.23, "total_tokens": 126747072} +{"current_steps": 40280, "total_steps": 78105, "loss": 0.1858, "lr": 2.798611283054829e-06, "epoch": 2.5785801165098263, "percentage": 51.57, "elapsed_time": "1:46:18", "remaining_time": "1:39:49", "throughput": 19872.58, "total_tokens": 126762624} +{"current_steps": 40285, "total_steps": 78105, "loss": 0.219, "lr": 2.798056622716153e-06, "epoch": 2.5789001984508033, "percentage": 51.58, "elapsed_time": "1:46:19", "remaining_time": "1:39:49", "throughput": 19872.92, "total_tokens": 126778112} +{"current_steps": 40290, "total_steps": 78105, "loss": 0.264, "lr": 2.797501947494083e-06, "epoch": 2.5792202803917803, "percentage": 51.58, "elapsed_time": "1:46:20", "remaining_time": "1:39:48", "throughput": 19873.27, "total_tokens": 126794112} +{"current_steps": 40295, "total_steps": 78105, "loss": 0.2105, "lr": 2.7969472574163142e-06, "epoch": 2.5795403623327573, "percentage": 51.59, "elapsed_time": "1:46:20", "remaining_time": "1:39:47", "throughput": 19873.58, "total_tokens": 126808832} +{"current_steps": 40300, "total_steps": 78105, "loss": 0.205, "lr": 2.7963925525105467e-06, "epoch": 2.5798604442737343, "percentage": 51.6, "elapsed_time": "1:46:21", "remaining_time": "1:39:46", "throughput": 19873.98, "total_tokens": 126824960} +{"current_steps": 40305, "total_steps": 78105, "loss": 0.1652, "lr": 2.7958378328044784e-06, "epoch": 2.580180526214711, "percentage": 51.6, "elapsed_time": "1:46:22", "remaining_time": "1:39:45", "throughput": 19874.26, "total_tokens": 126839552} +{"current_steps": 40310, "total_steps": 78105, "loss": 0.2387, "lr": 2.79528309832581e-06, "epoch": 2.580500608155688, "percentage": 51.61, "elapsed_time": "1:46:22", "remaining_time": "1:39:44", "throughput": 19874.62, "total_tokens": 126855488} +{"current_steps": 40315, "total_steps": 78105, "loss": 0.1717, "lr": 2.794728349102242e-06, "epoch": 2.580820690096665, "percentage": 51.62, "elapsed_time": "1:46:23", "remaining_time": "1:39:43", "throughput": 19875.07, "total_tokens": 126872320} +{"current_steps": 40320, "total_steps": 78105, "loss": 0.357, "lr": 2.7941735851614755e-06, "epoch": 2.5811407720376414, "percentage": 51.62, "elapsed_time": "1:46:24", "remaining_time": "1:39:42", "throughput": 19875.41, "total_tokens": 126887680} +{"current_steps": 40325, "total_steps": 78105, "loss": 0.1238, "lr": 2.793618806531212e-06, "epoch": 2.5814608539786184, "percentage": 51.63, "elapsed_time": "1:46:24", "remaining_time": "1:39:41", "throughput": 19875.81, "total_tokens": 126904064} +{"current_steps": 40330, "total_steps": 78105, "loss": 0.2239, "lr": 2.7930640132391545e-06, "epoch": 2.5817809359195953, "percentage": 51.64, "elapsed_time": "1:46:25", "remaining_time": "1:39:40", "throughput": 19876.13, "total_tokens": 126919232} +{"current_steps": 40335, "total_steps": 78105, "loss": 0.2197, "lr": 2.7925092053130064e-06, "epoch": 2.5821010178605723, "percentage": 51.64, "elapsed_time": "1:46:26", "remaining_time": "1:39:40", "throughput": 19876.47, "total_tokens": 126934784} +{"current_steps": 40340, "total_steps": 78105, "loss": 0.3262, "lr": 2.7919543827804735e-06, "epoch": 2.5824210998015493, "percentage": 51.65, "elapsed_time": "1:46:26", "remaining_time": "1:39:39", "throughput": 19876.84, "total_tokens": 126950464} +{"current_steps": 40345, "total_steps": 78105, "loss": 0.2917, "lr": 2.791399545669259e-06, "epoch": 2.5827411817425263, "percentage": 51.65, "elapsed_time": "1:46:28", "remaining_time": "1:39:38", "throughput": 19877.72, "total_tokens": 126979456} +{"current_steps": 40350, "total_steps": 78105, "loss": 0.2101, "lr": 2.7908446940070687e-06, "epoch": 2.583061263683503, "percentage": 51.66, "elapsed_time": "1:46:28", "remaining_time": "1:39:37", "throughput": 19878.07, "total_tokens": 126995136} +{"current_steps": 40355, "total_steps": 78105, "loss": 0.2366, "lr": 2.7902898278216095e-06, "epoch": 2.58338134562448, "percentage": 51.67, "elapsed_time": "1:46:29", "remaining_time": "1:39:36", "throughput": 19878.41, "total_tokens": 127010816} +{"current_steps": 40360, "total_steps": 78105, "loss": 0.2774, "lr": 2.7897349471405884e-06, "epoch": 2.583701427565457, "percentage": 51.67, "elapsed_time": "1:46:30", "remaining_time": "1:39:36", "throughput": 19878.76, "total_tokens": 127026432} +{"current_steps": 40365, "total_steps": 78105, "loss": 0.178, "lr": 2.789180051991713e-06, "epoch": 2.5840215095064334, "percentage": 51.68, "elapsed_time": "1:46:30", "remaining_time": "1:39:35", "throughput": 19879.14, "total_tokens": 127042496} +{"current_steps": 40370, "total_steps": 78105, "loss": 0.2812, "lr": 2.7886251424026924e-06, "epoch": 2.5843415914474104, "percentage": 51.69, "elapsed_time": "1:46:31", "remaining_time": "1:39:34", "throughput": 19879.47, "total_tokens": 127058112} +{"current_steps": 40375, "total_steps": 78105, "loss": 0.1885, "lr": 2.7880702184012355e-06, "epoch": 2.5846616733883874, "percentage": 51.69, "elapsed_time": "1:46:32", "remaining_time": "1:39:33", "throughput": 19879.81, "total_tokens": 127073856} +{"current_steps": 40380, "total_steps": 78105, "loss": 0.207, "lr": 2.787515280015052e-06, "epoch": 2.5849817553293644, "percentage": 51.7, "elapsed_time": "1:46:32", "remaining_time": "1:39:32", "throughput": 19880.17, "total_tokens": 127089856} +{"current_steps": 40385, "total_steps": 78105, "loss": 0.176, "lr": 2.786960327271853e-06, "epoch": 2.5853018372703414, "percentage": 51.71, "elapsed_time": "1:46:33", "remaining_time": "1:39:31", "throughput": 19880.48, "total_tokens": 127104448} +{"current_steps": 40390, "total_steps": 78105, "loss": 0.2844, "lr": 2.7864053601993506e-06, "epoch": 2.5856219192113183, "percentage": 51.71, "elapsed_time": "1:46:34", "remaining_time": "1:39:30", "throughput": 19880.83, "total_tokens": 127120000} +{"current_steps": 40395, "total_steps": 78105, "loss": 0.3363, "lr": 2.7858503788252562e-06, "epoch": 2.585942001152295, "percentage": 51.72, "elapsed_time": "1:46:34", "remaining_time": "1:39:29", "throughput": 19881.27, "total_tokens": 127137024} +{"current_steps": 40400, "total_steps": 78105, "loss": 0.2154, "lr": 2.785295383177283e-06, "epoch": 2.586262083093272, "percentage": 51.73, "elapsed_time": "1:46:35", "remaining_time": "1:39:28", "throughput": 19881.62, "total_tokens": 127152384} +{"current_steps": 40405, "total_steps": 78105, "loss": 0.1823, "lr": 2.784740373283144e-06, "epoch": 2.586582165034249, "percentage": 51.73, "elapsed_time": "1:46:36", "remaining_time": "1:39:27", "throughput": 19881.94, "total_tokens": 127168192} +{"current_steps": 40410, "total_steps": 78105, "loss": 0.2541, "lr": 2.784185349170554e-06, "epoch": 2.5869022469752254, "percentage": 51.74, "elapsed_time": "1:46:36", "remaining_time": "1:39:27", "throughput": 19882.31, "total_tokens": 127184192} +{"current_steps": 40415, "total_steps": 78105, "loss": 0.2417, "lr": 2.783630310867228e-06, "epoch": 2.5872223289162024, "percentage": 51.74, "elapsed_time": "1:46:37", "remaining_time": "1:39:26", "throughput": 19882.64, "total_tokens": 127199360} +{"current_steps": 40420, "total_steps": 78105, "loss": 0.2424, "lr": 2.783075258400881e-06, "epoch": 2.5875424108571794, "percentage": 51.75, "elapsed_time": "1:46:38", "remaining_time": "1:39:25", "throughput": 19882.92, "total_tokens": 127214016} +{"current_steps": 40425, "total_steps": 78105, "loss": 0.2632, "lr": 2.782520191799231e-06, "epoch": 2.5878624927981564, "percentage": 51.76, "elapsed_time": "1:46:38", "remaining_time": "1:39:24", "throughput": 19883.43, "total_tokens": 127232000} +{"current_steps": 40430, "total_steps": 78105, "loss": 0.2933, "lr": 2.7819651110899943e-06, "epoch": 2.5881825747391334, "percentage": 51.76, "elapsed_time": "1:46:39", "remaining_time": "1:39:23", "throughput": 19883.78, "total_tokens": 127247872} +{"current_steps": 40435, "total_steps": 78105, "loss": 0.3145, "lr": 2.781410016300888e-06, "epoch": 2.5885026566801104, "percentage": 51.77, "elapsed_time": "1:46:40", "remaining_time": "1:39:22", "throughput": 19884.17, "total_tokens": 127264128} +{"current_steps": 40440, "total_steps": 78105, "loss": 0.1872, "lr": 2.7808549074596318e-06, "epoch": 2.588822738621087, "percentage": 51.78, "elapsed_time": "1:46:40", "remaining_time": "1:39:21", "throughput": 19884.48, "total_tokens": 127279296} +{"current_steps": 40445, "total_steps": 78105, "loss": 0.1627, "lr": 2.780299784593944e-06, "epoch": 2.589142820562064, "percentage": 51.78, "elapsed_time": "1:46:41", "remaining_time": "1:39:20", "throughput": 19884.86, "total_tokens": 127295232} +{"current_steps": 40450, "total_steps": 78105, "loss": 0.2176, "lr": 2.7797446477315448e-06, "epoch": 2.589462902503041, "percentage": 51.79, "elapsed_time": "1:46:42", "remaining_time": "1:39:19", "throughput": 19885.23, "total_tokens": 127311488} +{"current_steps": 40455, "total_steps": 78105, "loss": 0.2127, "lr": 2.7791894969001555e-06, "epoch": 2.5897829844440174, "percentage": 51.8, "elapsed_time": "1:46:42", "remaining_time": "1:39:19", "throughput": 19885.57, "total_tokens": 127326912} +{"current_steps": 40460, "total_steps": 78105, "loss": 0.2832, "lr": 2.7786343321274967e-06, "epoch": 2.5901030663849944, "percentage": 51.8, "elapsed_time": "1:46:43", "remaining_time": "1:39:18", "throughput": 19885.88, "total_tokens": 127341888} +{"current_steps": 40465, "total_steps": 78105, "loss": 0.1455, "lr": 2.778079153441291e-06, "epoch": 2.5904231483259714, "percentage": 51.81, "elapsed_time": "1:46:44", "remaining_time": "1:39:17", "throughput": 19886.23, "total_tokens": 127357120} +{"current_steps": 40470, "total_steps": 78105, "loss": 0.3235, "lr": 2.777523960869261e-06, "epoch": 2.5907432302669484, "percentage": 51.81, "elapsed_time": "1:46:44", "remaining_time": "1:39:16", "throughput": 19886.64, "total_tokens": 127373696} +{"current_steps": 40475, "total_steps": 78105, "loss": 0.2108, "lr": 2.776968754439129e-06, "epoch": 2.5910633122079254, "percentage": 51.82, "elapsed_time": "1:46:45", "remaining_time": "1:39:15", "throughput": 19886.99, "total_tokens": 127389440} +{"current_steps": 40480, "total_steps": 78105, "loss": 0.2055, "lr": 2.7764135341786214e-06, "epoch": 2.591383394148902, "percentage": 51.83, "elapsed_time": "1:46:46", "remaining_time": "1:39:14", "throughput": 19887.31, "total_tokens": 127404800} +{"current_steps": 40485, "total_steps": 78105, "loss": 0.2051, "lr": 2.775858300115461e-06, "epoch": 2.591703476089879, "percentage": 51.83, "elapsed_time": "1:46:46", "remaining_time": "1:39:13", "throughput": 19887.62, "total_tokens": 127419904} +{"current_steps": 40490, "total_steps": 78105, "loss": 0.3448, "lr": 2.7753030522773745e-06, "epoch": 2.592023558030856, "percentage": 51.84, "elapsed_time": "1:46:47", "remaining_time": "1:39:12", "throughput": 19887.92, "total_tokens": 127435200} +{"current_steps": 40495, "total_steps": 78105, "loss": 0.2214, "lr": 2.774747790692087e-06, "epoch": 2.592343639971833, "percentage": 51.85, "elapsed_time": "1:46:48", "remaining_time": "1:39:11", "throughput": 19888.23, "total_tokens": 127450496} +{"current_steps": 40500, "total_steps": 78105, "loss": 0.1552, "lr": 2.7741925153873263e-06, "epoch": 2.5926637219128095, "percentage": 51.85, "elapsed_time": "1:46:49", "remaining_time": "1:39:10", "throughput": 19888.64, "total_tokens": 127467328} +{"current_steps": 40505, "total_steps": 78105, "loss": 0.2552, "lr": 2.773637226390819e-06, "epoch": 2.5929838038537865, "percentage": 51.86, "elapsed_time": "1:46:49", "remaining_time": "1:39:10", "throughput": 19888.99, "total_tokens": 127483072} +{"current_steps": 40510, "total_steps": 78105, "loss": 0.2069, "lr": 2.7730819237302935e-06, "epoch": 2.5933038857947635, "percentage": 51.87, "elapsed_time": "1:46:50", "remaining_time": "1:39:09", "throughput": 19889.42, "total_tokens": 127499840} +{"current_steps": 40515, "total_steps": 78105, "loss": 0.2342, "lr": 2.7725266074334807e-06, "epoch": 2.5936239677357404, "percentage": 51.87, "elapsed_time": "1:46:51", "remaining_time": "1:39:08", "throughput": 19889.71, "total_tokens": 127514816} +{"current_steps": 40520, "total_steps": 78105, "loss": 0.2927, "lr": 2.771971277528108e-06, "epoch": 2.5939440496767174, "percentage": 51.88, "elapsed_time": "1:46:51", "remaining_time": "1:39:07", "throughput": 19890.09, "total_tokens": 127531072} +{"current_steps": 40525, "total_steps": 78105, "loss": 0.2973, "lr": 2.771415934041906e-06, "epoch": 2.594264131617694, "percentage": 51.89, "elapsed_time": "1:46:52", "remaining_time": "1:39:06", "throughput": 19890.61, "total_tokens": 127549952} +{"current_steps": 40530, "total_steps": 78105, "loss": 0.1886, "lr": 2.7708605770026064e-06, "epoch": 2.594584213558671, "percentage": 51.89, "elapsed_time": "1:46:53", "remaining_time": "1:39:05", "throughput": 19891.1, "total_tokens": 127567872} +{"current_steps": 40535, "total_steps": 78105, "loss": 0.15, "lr": 2.7703052064379398e-06, "epoch": 2.594904295499648, "percentage": 51.9, "elapsed_time": "1:46:53", "remaining_time": "1:39:04", "throughput": 19891.38, "total_tokens": 127582528} +{"current_steps": 40540, "total_steps": 78105, "loss": 0.1754, "lr": 2.76974982237564e-06, "epoch": 2.595224377440625, "percentage": 51.9, "elapsed_time": "1:46:54", "remaining_time": "1:39:03", "throughput": 19891.73, "total_tokens": 127598144} +{"current_steps": 40545, "total_steps": 78105, "loss": 0.2159, "lr": 2.7691944248434388e-06, "epoch": 2.5955444593816015, "percentage": 51.91, "elapsed_time": "1:46:55", "remaining_time": "1:39:03", "throughput": 19892.16, "total_tokens": 127615360} +{"current_steps": 40550, "total_steps": 78105, "loss": 0.2137, "lr": 2.7686390138690706e-06, "epoch": 2.5958645413225785, "percentage": 51.92, "elapsed_time": "1:46:56", "remaining_time": "1:39:02", "throughput": 19892.44, "total_tokens": 127629888} +{"current_steps": 40555, "total_steps": 78105, "loss": 0.2748, "lr": 2.7680835894802693e-06, "epoch": 2.5961846232635555, "percentage": 51.92, "elapsed_time": "1:46:56", "remaining_time": "1:39:01", "throughput": 19892.69, "total_tokens": 127644224} +{"current_steps": 40560, "total_steps": 78105, "loss": 0.2182, "lr": 2.767528151704769e-06, "epoch": 2.5965047052045325, "percentage": 51.93, "elapsed_time": "1:46:57", "remaining_time": "1:39:00", "throughput": 19893.07, "total_tokens": 127659904} +{"current_steps": 40565, "total_steps": 78105, "loss": 0.2189, "lr": 2.766972700570306e-06, "epoch": 2.5968247871455095, "percentage": 51.94, "elapsed_time": "1:46:57", "remaining_time": "1:38:59", "throughput": 19893.43, "total_tokens": 127675904} +{"current_steps": 40570, "total_steps": 78105, "loss": 0.2486, "lr": 2.766417236104618e-06, "epoch": 2.597144869086486, "percentage": 51.94, "elapsed_time": "1:46:58", "remaining_time": "1:38:58", "throughput": 19893.78, "total_tokens": 127691904} +{"current_steps": 40575, "total_steps": 78105, "loss": 0.3322, "lr": 2.765861758335441e-06, "epoch": 2.597464951027463, "percentage": 51.95, "elapsed_time": "1:46:59", "remaining_time": "1:38:57", "throughput": 19894.11, "total_tokens": 127707264} +{"current_steps": 40580, "total_steps": 78105, "loss": 0.2168, "lr": 2.765306267290513e-06, "epoch": 2.59778503296844, "percentage": 51.96, "elapsed_time": "1:47:00", "remaining_time": "1:38:56", "throughput": 19894.56, "total_tokens": 127724480} +{"current_steps": 40585, "total_steps": 78105, "loss": 0.163, "lr": 2.764750762997571e-06, "epoch": 2.5981051149094165, "percentage": 51.96, "elapsed_time": "1:47:00", "remaining_time": "1:38:55", "throughput": 19894.97, "total_tokens": 127740736} +{"current_steps": 40590, "total_steps": 78105, "loss": 0.1926, "lr": 2.764195245484355e-06, "epoch": 2.5984251968503935, "percentage": 51.97, "elapsed_time": "1:47:01", "remaining_time": "1:38:54", "throughput": 19895.35, "total_tokens": 127756480} +{"current_steps": 40595, "total_steps": 78105, "loss": 0.174, "lr": 2.7636397147786053e-06, "epoch": 2.5987452787913705, "percentage": 51.97, "elapsed_time": "1:47:02", "remaining_time": "1:38:54", "throughput": 19895.71, "total_tokens": 127772352} +{"current_steps": 40600, "total_steps": 78105, "loss": 0.2995, "lr": 2.7630841709080615e-06, "epoch": 2.5990653607323475, "percentage": 51.98, "elapsed_time": "1:47:02", "remaining_time": "1:38:53", "throughput": 19896.0, "total_tokens": 127787136} +{"current_steps": 40605, "total_steps": 78105, "loss": 0.249, "lr": 2.7625286139004635e-06, "epoch": 2.5993854426733245, "percentage": 51.99, "elapsed_time": "1:47:03", "remaining_time": "1:38:52", "throughput": 19896.44, "total_tokens": 127803840} +{"current_steps": 40610, "total_steps": 78105, "loss": 0.1906, "lr": 2.761973043783555e-06, "epoch": 2.5997055246143015, "percentage": 51.99, "elapsed_time": "1:47:04", "remaining_time": "1:38:51", "throughput": 19896.8, "total_tokens": 127819712} +{"current_steps": 40615, "total_steps": 78105, "loss": 0.3053, "lr": 2.7614174605850776e-06, "epoch": 2.600025606555278, "percentage": 52.0, "elapsed_time": "1:47:04", "remaining_time": "1:38:50", "throughput": 19897.16, "total_tokens": 127835328} +{"current_steps": 40620, "total_steps": 78105, "loss": 0.3161, "lr": 2.7608618643327724e-06, "epoch": 2.600345688496255, "percentage": 52.01, "elapsed_time": "1:47:05", "remaining_time": "1:38:49", "throughput": 19897.57, "total_tokens": 127851904} +{"current_steps": 40625, "total_steps": 78105, "loss": 0.1776, "lr": 2.7603062550543846e-06, "epoch": 2.600665770437232, "percentage": 52.01, "elapsed_time": "1:47:06", "remaining_time": "1:38:48", "throughput": 19897.88, "total_tokens": 127866624} +{"current_steps": 40630, "total_steps": 78105, "loss": 0.3068, "lr": 2.759750632777659e-06, "epoch": 2.6009858523782086, "percentage": 52.02, "elapsed_time": "1:47:06", "remaining_time": "1:38:47", "throughput": 19898.19, "total_tokens": 127881664} +{"current_steps": 40635, "total_steps": 78105, "loss": 0.1777, "lr": 2.75919499753034e-06, "epoch": 2.6013059343191856, "percentage": 52.03, "elapsed_time": "1:47:07", "remaining_time": "1:38:46", "throughput": 19898.5, "total_tokens": 127896704} +{"current_steps": 40640, "total_steps": 78105, "loss": 0.2716, "lr": 2.758639349340173e-06, "epoch": 2.6016260162601625, "percentage": 52.03, "elapsed_time": "1:47:08", "remaining_time": "1:38:46", "throughput": 19899.03, "total_tokens": 127915264} +{"current_steps": 40645, "total_steps": 78105, "loss": 0.2362, "lr": 2.758083688234904e-06, "epoch": 2.6019460982011395, "percentage": 52.04, "elapsed_time": "1:47:08", "remaining_time": "1:38:45", "throughput": 19899.4, "total_tokens": 127931264} +{"current_steps": 40650, "total_steps": 78105, "loss": 0.1817, "lr": 2.757528014242279e-06, "epoch": 2.6022661801421165, "percentage": 52.05, "elapsed_time": "1:47:09", "remaining_time": "1:38:44", "throughput": 19899.81, "total_tokens": 127948032} +{"current_steps": 40655, "total_steps": 78105, "loss": 0.2391, "lr": 2.7569723273900474e-06, "epoch": 2.6025862620830935, "percentage": 52.05, "elapsed_time": "1:47:10", "remaining_time": "1:38:43", "throughput": 19900.28, "total_tokens": 127965312} +{"current_steps": 40660, "total_steps": 78105, "loss": 0.1652, "lr": 2.756416627705956e-06, "epoch": 2.60290634402407, "percentage": 52.06, "elapsed_time": "1:47:10", "remaining_time": "1:38:42", "throughput": 19900.6, "total_tokens": 127980544} +{"current_steps": 40665, "total_steps": 78105, "loss": 0.1585, "lr": 2.755860915217754e-06, "epoch": 2.603226425965047, "percentage": 52.06, "elapsed_time": "1:47:11", "remaining_time": "1:38:41", "throughput": 19900.96, "total_tokens": 127996224} +{"current_steps": 40670, "total_steps": 78105, "loss": 0.163, "lr": 2.75530518995319e-06, "epoch": 2.603546507906024, "percentage": 52.07, "elapsed_time": "1:47:12", "remaining_time": "1:38:40", "throughput": 19901.35, "total_tokens": 128012608} +{"current_steps": 40675, "total_steps": 78105, "loss": 0.3127, "lr": 2.754749451940015e-06, "epoch": 2.6038665898470006, "percentage": 52.08, "elapsed_time": "1:47:13", "remaining_time": "1:38:39", "throughput": 19901.66, "total_tokens": 128027584} +{"current_steps": 40680, "total_steps": 78105, "loss": 0.2108, "lr": 2.7541937012059787e-06, "epoch": 2.6041866717879776, "percentage": 52.08, "elapsed_time": "1:47:13", "remaining_time": "1:38:38", "throughput": 19902.0, "total_tokens": 128043648} +{"current_steps": 40685, "total_steps": 78105, "loss": 0.2278, "lr": 2.753637937778834e-06, "epoch": 2.6045067537289546, "percentage": 52.09, "elapsed_time": "1:47:14", "remaining_time": "1:38:38", "throughput": 19902.34, "total_tokens": 128058880} +{"current_steps": 40690, "total_steps": 78105, "loss": 0.2063, "lr": 2.7530821616863317e-06, "epoch": 2.6048268356699316, "percentage": 52.1, "elapsed_time": "1:47:15", "remaining_time": "1:38:37", "throughput": 19902.73, "total_tokens": 128075264} +{"current_steps": 40695, "total_steps": 78105, "loss": 0.2888, "lr": 2.7525263729562247e-06, "epoch": 2.6051469176109086, "percentage": 52.1, "elapsed_time": "1:47:15", "remaining_time": "1:38:36", "throughput": 19903.13, "total_tokens": 128091904} +{"current_steps": 40700, "total_steps": 78105, "loss": 0.2072, "lr": 2.7519705716162653e-06, "epoch": 2.6054669995518855, "percentage": 52.11, "elapsed_time": "1:47:16", "remaining_time": "1:38:35", "throughput": 19903.41, "total_tokens": 128106560} +{"current_steps": 40705, "total_steps": 78105, "loss": 0.1759, "lr": 2.751414757694208e-06, "epoch": 2.605787081492862, "percentage": 52.12, "elapsed_time": "1:47:17", "remaining_time": "1:38:34", "throughput": 19903.76, "total_tokens": 128122496} +{"current_steps": 40710, "total_steps": 78105, "loss": 0.2457, "lr": 2.750858931217808e-06, "epoch": 2.606107163433839, "percentage": 52.12, "elapsed_time": "1:47:17", "remaining_time": "1:38:33", "throughput": 19904.06, "total_tokens": 128136960} +{"current_steps": 40715, "total_steps": 78105, "loss": 0.1815, "lr": 2.7503030922148194e-06, "epoch": 2.606427245374816, "percentage": 52.13, "elapsed_time": "1:47:18", "remaining_time": "1:38:32", "throughput": 19904.45, "total_tokens": 128153472} +{"current_steps": 40720, "total_steps": 78105, "loss": 0.2602, "lr": 2.7497472407129984e-06, "epoch": 2.6067473273157926, "percentage": 52.13, "elapsed_time": "1:47:19", "remaining_time": "1:38:31", "throughput": 19904.83, "total_tokens": 128169664} +{"current_steps": 40725, "total_steps": 78105, "loss": 0.1712, "lr": 2.749191376740101e-06, "epoch": 2.6070674092567696, "percentage": 52.14, "elapsed_time": "1:47:19", "remaining_time": "1:38:30", "throughput": 19905.23, "total_tokens": 128186240} +{"current_steps": 40730, "total_steps": 78105, "loss": 0.2503, "lr": 2.748635500323883e-06, "epoch": 2.6073874911977466, "percentage": 52.15, "elapsed_time": "1:47:20", "remaining_time": "1:38:29", "throughput": 19905.53, "total_tokens": 128201216} +{"current_steps": 40735, "total_steps": 78105, "loss": 0.2093, "lr": 2.7480796114921044e-06, "epoch": 2.6077075731387236, "percentage": 52.15, "elapsed_time": "1:47:21", "remaining_time": "1:38:29", "throughput": 19905.87, "total_tokens": 128217152} +{"current_steps": 40740, "total_steps": 78105, "loss": 0.2199, "lr": 2.7475237102725216e-06, "epoch": 2.6080276550797006, "percentage": 52.16, "elapsed_time": "1:47:21", "remaining_time": "1:38:28", "throughput": 19906.2, "total_tokens": 128232576} +{"current_steps": 40745, "total_steps": 78105, "loss": 0.2027, "lr": 2.7469677966928943e-06, "epoch": 2.6083477370206776, "percentage": 52.17, "elapsed_time": "1:47:22", "remaining_time": "1:38:27", "throughput": 19906.49, "total_tokens": 128247360} +{"current_steps": 40750, "total_steps": 78105, "loss": 0.3396, "lr": 2.746411870780982e-06, "epoch": 2.608667818961654, "percentage": 52.17, "elapsed_time": "1:47:23", "remaining_time": "1:38:26", "throughput": 19906.82, "total_tokens": 128262976} +{"current_steps": 40755, "total_steps": 78105, "loss": 0.2575, "lr": 2.745855932564544e-06, "epoch": 2.608987900902631, "percentage": 52.18, "elapsed_time": "1:47:23", "remaining_time": "1:38:25", "throughput": 19907.16, "total_tokens": 128278528} +{"current_steps": 40760, "total_steps": 78105, "loss": 0.3572, "lr": 2.7452999820713417e-06, "epoch": 2.609307982843608, "percentage": 52.19, "elapsed_time": "1:47:24", "remaining_time": "1:38:24", "throughput": 19907.49, "total_tokens": 128293952} +{"current_steps": 40765, "total_steps": 78105, "loss": 0.2125, "lr": 2.7447440193291353e-06, "epoch": 2.6096280647845846, "percentage": 52.19, "elapsed_time": "1:47:25", "remaining_time": "1:38:23", "throughput": 19907.87, "total_tokens": 128310208} +{"current_steps": 40770, "total_steps": 78105, "loss": 0.219, "lr": 2.744188044365688e-06, "epoch": 2.6099481467255616, "percentage": 52.2, "elapsed_time": "1:47:25", "remaining_time": "1:38:22", "throughput": 19908.2, "total_tokens": 128325376} +{"current_steps": 40775, "total_steps": 78105, "loss": 0.1755, "lr": 2.7436320572087614e-06, "epoch": 2.6102682286665386, "percentage": 52.21, "elapsed_time": "1:47:26", "remaining_time": "1:38:21", "throughput": 19908.57, "total_tokens": 128341952} +{"current_steps": 40780, "total_steps": 78105, "loss": 0.2195, "lr": 2.743076057886119e-06, "epoch": 2.6105883106075156, "percentage": 52.21, "elapsed_time": "1:47:27", "remaining_time": "1:38:21", "throughput": 19908.98, "total_tokens": 128358528} +{"current_steps": 40785, "total_steps": 78105, "loss": 0.2159, "lr": 2.7425200464255253e-06, "epoch": 2.6109083925484926, "percentage": 52.22, "elapsed_time": "1:47:27", "remaining_time": "1:38:20", "throughput": 19909.27, "total_tokens": 128373568} +{"current_steps": 40790, "total_steps": 78105, "loss": 0.2201, "lr": 2.741964022854742e-06, "epoch": 2.611228474489469, "percentage": 52.22, "elapsed_time": "1:47:28", "remaining_time": "1:38:19", "throughput": 19909.58, "total_tokens": 128389056} +{"current_steps": 40795, "total_steps": 78105, "loss": 0.2496, "lr": 2.7414079872015367e-06, "epoch": 2.611548556430446, "percentage": 52.23, "elapsed_time": "1:47:29", "remaining_time": "1:38:18", "throughput": 19909.86, "total_tokens": 128404160} +{"current_steps": 40800, "total_steps": 78105, "loss": 0.2125, "lr": 2.740851939493674e-06, "epoch": 2.611868638371423, "percentage": 52.24, "elapsed_time": "1:47:29", "remaining_time": "1:38:17", "throughput": 19910.21, "total_tokens": 128419904} +{"current_steps": 40805, "total_steps": 78105, "loss": 0.2583, "lr": 2.7402958797589197e-06, "epoch": 2.6121887203124, "percentage": 52.24, "elapsed_time": "1:47:30", "remaining_time": "1:38:16", "throughput": 19910.56, "total_tokens": 128435392} +{"current_steps": 40810, "total_steps": 78105, "loss": 0.2339, "lr": 2.739739808025041e-06, "epoch": 2.6125088022533767, "percentage": 52.25, "elapsed_time": "1:47:31", "remaining_time": "1:38:15", "throughput": 19910.89, "total_tokens": 128450880} +{"current_steps": 40815, "total_steps": 78105, "loss": 0.3018, "lr": 2.7391837243198056e-06, "epoch": 2.6128288841943537, "percentage": 52.26, "elapsed_time": "1:47:32", "remaining_time": "1:38:14", "throughput": 19911.34, "total_tokens": 128468096} +{"current_steps": 40820, "total_steps": 78105, "loss": 0.2648, "lr": 2.7386276286709795e-06, "epoch": 2.6131489661353307, "percentage": 52.26, "elapsed_time": "1:47:32", "remaining_time": "1:38:13", "throughput": 19911.71, "total_tokens": 128483904} +{"current_steps": 40825, "total_steps": 78105, "loss": 0.304, "lr": 2.7380715211063336e-06, "epoch": 2.6134690480763076, "percentage": 52.27, "elapsed_time": "1:47:33", "remaining_time": "1:38:12", "throughput": 19912.05, "total_tokens": 128499648} +{"current_steps": 40830, "total_steps": 78105, "loss": 0.196, "lr": 2.7375154016536357e-06, "epoch": 2.6137891300172846, "percentage": 52.28, "elapsed_time": "1:47:34", "remaining_time": "1:38:12", "throughput": 19912.37, "total_tokens": 128515136} +{"current_steps": 40835, "total_steps": 78105, "loss": 0.24, "lr": 2.736959270340656e-06, "epoch": 2.614109211958261, "percentage": 52.28, "elapsed_time": "1:47:34", "remaining_time": "1:38:11", "throughput": 19912.64, "total_tokens": 128529728} +{"current_steps": 40840, "total_steps": 78105, "loss": 0.2053, "lr": 2.736403127195165e-06, "epoch": 2.614429293899238, "percentage": 52.29, "elapsed_time": "1:47:35", "remaining_time": "1:38:10", "throughput": 19912.97, "total_tokens": 128545472} +{"current_steps": 40845, "total_steps": 78105, "loss": 0.1457, "lr": 2.735846972244932e-06, "epoch": 2.614749375840215, "percentage": 52.29, "elapsed_time": "1:47:36", "remaining_time": "1:38:09", "throughput": 19913.27, "total_tokens": 128560384} +{"current_steps": 40850, "total_steps": 78105, "loss": 0.2336, "lr": 2.7352908055177306e-06, "epoch": 2.6150694577811917, "percentage": 52.3, "elapsed_time": "1:47:36", "remaining_time": "1:38:08", "throughput": 19913.64, "total_tokens": 128576448} +{"current_steps": 40855, "total_steps": 78105, "loss": 0.278, "lr": 2.7347346270413316e-06, "epoch": 2.6153895397221687, "percentage": 52.31, "elapsed_time": "1:47:37", "remaining_time": "1:38:07", "throughput": 19913.97, "total_tokens": 128591616} +{"current_steps": 40860, "total_steps": 78105, "loss": 0.1925, "lr": 2.7341784368435086e-06, "epoch": 2.6157096216631457, "percentage": 52.31, "elapsed_time": "1:47:38", "remaining_time": "1:38:06", "throughput": 19914.28, "total_tokens": 128606720} +{"current_steps": 40865, "total_steps": 78105, "loss": 0.2862, "lr": 2.7336222349520336e-06, "epoch": 2.6160297036041227, "percentage": 52.32, "elapsed_time": "1:47:38", "remaining_time": "1:38:05", "throughput": 19914.62, "total_tokens": 128622336} +{"current_steps": 40870, "total_steps": 78105, "loss": 0.3135, "lr": 2.733066021394682e-06, "epoch": 2.6163497855450997, "percentage": 52.33, "elapsed_time": "1:47:39", "remaining_time": "1:38:04", "throughput": 19914.94, "total_tokens": 128637568} +{"current_steps": 40875, "total_steps": 78105, "loss": 0.1468, "lr": 2.7325097961992264e-06, "epoch": 2.6166698674860767, "percentage": 52.33, "elapsed_time": "1:47:40", "remaining_time": "1:38:03", "throughput": 19915.21, "total_tokens": 128652672} +{"current_steps": 40880, "total_steps": 78105, "loss": 0.196, "lr": 2.7319535593934426e-06, "epoch": 2.616989949427053, "percentage": 52.34, "elapsed_time": "1:47:40", "remaining_time": "1:38:03", "throughput": 19915.53, "total_tokens": 128667968} +{"current_steps": 40885, "total_steps": 78105, "loss": 0.268, "lr": 2.7313973110051067e-06, "epoch": 2.61731003136803, "percentage": 52.35, "elapsed_time": "1:47:41", "remaining_time": "1:38:02", "throughput": 19915.89, "total_tokens": 128684288} +{"current_steps": 40890, "total_steps": 78105, "loss": 0.2837, "lr": 2.7308410510619945e-06, "epoch": 2.617630113309007, "percentage": 52.35, "elapsed_time": "1:47:42", "remaining_time": "1:38:01", "throughput": 19916.17, "total_tokens": 128698816} +{"current_steps": 40895, "total_steps": 78105, "loss": 0.2696, "lr": 2.7302847795918824e-06, "epoch": 2.6179501952499837, "percentage": 52.36, "elapsed_time": "1:47:42", "remaining_time": "1:38:00", "throughput": 19916.54, "total_tokens": 128714816} +{"current_steps": 40900, "total_steps": 78105, "loss": 0.2631, "lr": 2.729728496622548e-06, "epoch": 2.6182702771909607, "percentage": 52.37, "elapsed_time": "1:47:43", "remaining_time": "1:37:59", "throughput": 19916.85, "total_tokens": 128729536} +{"current_steps": 40905, "total_steps": 78105, "loss": 0.2565, "lr": 2.7291722021817684e-06, "epoch": 2.6185903591319377, "percentage": 52.37, "elapsed_time": "1:47:44", "remaining_time": "1:37:58", "throughput": 19917.13, "total_tokens": 128744576} +{"current_steps": 40910, "total_steps": 78105, "loss": 0.209, "lr": 2.728615896297323e-06, "epoch": 2.6189104410729147, "percentage": 52.38, "elapsed_time": "1:47:44", "remaining_time": "1:37:57", "throughput": 19917.49, "total_tokens": 128760192} +{"current_steps": 40915, "total_steps": 78105, "loss": 0.1708, "lr": 2.728059578996991e-06, "epoch": 2.6192305230138917, "percentage": 52.38, "elapsed_time": "1:47:45", "remaining_time": "1:37:56", "throughput": 19917.77, "total_tokens": 128774656} +{"current_steps": 40920, "total_steps": 78105, "loss": 0.1691, "lr": 2.7275032503085515e-06, "epoch": 2.6195506049548687, "percentage": 52.39, "elapsed_time": "1:47:46", "remaining_time": "1:37:55", "throughput": 19918.18, "total_tokens": 128791360} +{"current_steps": 40925, "total_steps": 78105, "loss": 0.2619, "lr": 2.7269469102597844e-06, "epoch": 2.6198706868958452, "percentage": 52.4, "elapsed_time": "1:47:46", "remaining_time": "1:37:54", "throughput": 19918.51, "total_tokens": 128806912} +{"current_steps": 40930, "total_steps": 78105, "loss": 0.1432, "lr": 2.726390558878471e-06, "epoch": 2.6201907688368222, "percentage": 52.4, "elapsed_time": "1:47:47", "remaining_time": "1:37:54", "throughput": 19918.95, "total_tokens": 128824128} +{"current_steps": 40935, "total_steps": 78105, "loss": 0.2237, "lr": 2.7258341961923923e-06, "epoch": 2.620510850777799, "percentage": 52.41, "elapsed_time": "1:47:48", "remaining_time": "1:37:53", "throughput": 19919.34, "total_tokens": 128840384} +{"current_steps": 40940, "total_steps": 78105, "loss": 0.1387, "lr": 2.72527782222933e-06, "epoch": 2.6208309327187758, "percentage": 52.42, "elapsed_time": "1:47:48", "remaining_time": "1:37:52", "throughput": 19919.78, "total_tokens": 128857792} +{"current_steps": 40945, "total_steps": 78105, "loss": 0.2324, "lr": 2.724721437017067e-06, "epoch": 2.6211510146597528, "percentage": 52.42, "elapsed_time": "1:47:49", "remaining_time": "1:37:51", "throughput": 19920.17, "total_tokens": 128874176} +{"current_steps": 40950, "total_steps": 78105, "loss": 0.203, "lr": 2.724165040583386e-06, "epoch": 2.6214710966007297, "percentage": 52.43, "elapsed_time": "1:47:50", "remaining_time": "1:37:50", "throughput": 19920.54, "total_tokens": 128889920} +{"current_steps": 40955, "total_steps": 78105, "loss": 0.2569, "lr": 2.7236086329560703e-06, "epoch": 2.6217911785417067, "percentage": 52.44, "elapsed_time": "1:47:50", "remaining_time": "1:37:49", "throughput": 19920.85, "total_tokens": 128905088} +{"current_steps": 40960, "total_steps": 78105, "loss": 0.2041, "lr": 2.723052214162904e-06, "epoch": 2.6221112604826837, "percentage": 52.44, "elapsed_time": "1:47:51", "remaining_time": "1:37:48", "throughput": 19921.23, "total_tokens": 128920960} +{"current_steps": 40965, "total_steps": 78105, "loss": 0.1669, "lr": 2.7224957842316717e-06, "epoch": 2.6224313424236607, "percentage": 52.45, "elapsed_time": "1:47:52", "remaining_time": "1:37:47", "throughput": 19921.53, "total_tokens": 128935616} +{"current_steps": 40970, "total_steps": 78105, "loss": 0.2564, "lr": 2.721939343190159e-06, "epoch": 2.6227514243646373, "percentage": 52.46, "elapsed_time": "1:47:52", "remaining_time": "1:37:46", "throughput": 19921.94, "total_tokens": 128952448} +{"current_steps": 40975, "total_steps": 78105, "loss": 0.1982, "lr": 2.721382891066152e-06, "epoch": 2.6230715063056143, "percentage": 52.46, "elapsed_time": "1:47:53", "remaining_time": "1:37:46", "throughput": 19922.25, "total_tokens": 128967744} +{"current_steps": 40980, "total_steps": 78105, "loss": 0.2127, "lr": 2.720826427887437e-06, "epoch": 2.6233915882465912, "percentage": 52.47, "elapsed_time": "1:47:54", "remaining_time": "1:37:45", "throughput": 19922.6, "total_tokens": 128983168} +{"current_steps": 40985, "total_steps": 78105, "loss": 0.2716, "lr": 2.7202699536818e-06, "epoch": 2.623711670187568, "percentage": 52.47, "elapsed_time": "1:47:54", "remaining_time": "1:37:44", "throughput": 19922.9, "total_tokens": 128998272} +{"current_steps": 40990, "total_steps": 78105, "loss": 0.2002, "lr": 2.7197134684770288e-06, "epoch": 2.624031752128545, "percentage": 52.48, "elapsed_time": "1:47:55", "remaining_time": "1:37:43", "throughput": 19923.22, "total_tokens": 129013888} +{"current_steps": 40995, "total_steps": 78105, "loss": 0.2078, "lr": 2.719156972300911e-06, "epoch": 2.6243518340695218, "percentage": 52.49, "elapsed_time": "1:47:56", "remaining_time": "1:37:42", "throughput": 19923.57, "total_tokens": 129029376} +{"current_steps": 41000, "total_steps": 78105, "loss": 0.2925, "lr": 2.718600465181236e-06, "epoch": 2.6246719160104988, "percentage": 52.49, "elapsed_time": "1:47:56", "remaining_time": "1:37:41", "throughput": 19923.98, "total_tokens": 129045888} +{"current_steps": 41005, "total_steps": 78105, "loss": 0.1804, "lr": 2.718043947145792e-06, "epoch": 2.6249919979514758, "percentage": 52.5, "elapsed_time": "1:47:57", "remaining_time": "1:37:40", "throughput": 19924.27, "total_tokens": 129060928} +{"current_steps": 41010, "total_steps": 78105, "loss": 0.3181, "lr": 2.71748741822237e-06, "epoch": 2.6253120798924527, "percentage": 52.51, "elapsed_time": "1:47:58", "remaining_time": "1:37:39", "throughput": 19924.54, "total_tokens": 129075776} +{"current_steps": 41015, "total_steps": 78105, "loss": 0.1856, "lr": 2.716930878438758e-06, "epoch": 2.6256321618334293, "percentage": 52.51, "elapsed_time": "1:47:58", "remaining_time": "1:37:38", "throughput": 19924.97, "total_tokens": 129092928} +{"current_steps": 41020, "total_steps": 78105, "loss": 0.1736, "lr": 2.716374327822748e-06, "epoch": 2.6259522437744063, "percentage": 52.52, "elapsed_time": "1:47:59", "remaining_time": "1:37:38", "throughput": 19925.29, "total_tokens": 129107776} +{"current_steps": 41025, "total_steps": 78105, "loss": 0.224, "lr": 2.7158177664021316e-06, "epoch": 2.6262723257153833, "percentage": 52.53, "elapsed_time": "1:48:00", "remaining_time": "1:37:37", "throughput": 19925.63, "total_tokens": 129123392} +{"current_steps": 41030, "total_steps": 78105, "loss": 0.2394, "lr": 2.7152611942047e-06, "epoch": 2.62659240765636, "percentage": 52.53, "elapsed_time": "1:48:00", "remaining_time": "1:37:36", "throughput": 19925.98, "total_tokens": 129139072} +{"current_steps": 41035, "total_steps": 78105, "loss": 0.1845, "lr": 2.714704611258246e-06, "epoch": 2.626912489597337, "percentage": 52.54, "elapsed_time": "1:48:01", "remaining_time": "1:37:35", "throughput": 19926.26, "total_tokens": 129153856} +{"current_steps": 41040, "total_steps": 78105, "loss": 0.2173, "lr": 2.7141480175905615e-06, "epoch": 2.627232571538314, "percentage": 52.54, "elapsed_time": "1:48:02", "remaining_time": "1:37:34", "throughput": 19926.65, "total_tokens": 129169920} +{"current_steps": 41045, "total_steps": 78105, "loss": 0.1824, "lr": 2.713591413229441e-06, "epoch": 2.627552653479291, "percentage": 52.55, "elapsed_time": "1:48:02", "remaining_time": "1:37:33", "throughput": 19926.97, "total_tokens": 129185344} +{"current_steps": 41050, "total_steps": 78105, "loss": 0.1922, "lr": 2.713034798202677e-06, "epoch": 2.627872735420268, "percentage": 52.56, "elapsed_time": "1:48:03", "remaining_time": "1:37:32", "throughput": 19927.3, "total_tokens": 129201152} +{"current_steps": 41055, "total_steps": 78105, "loss": 0.2191, "lr": 2.7124781725380657e-06, "epoch": 2.6281928173612443, "percentage": 52.56, "elapsed_time": "1:48:04", "remaining_time": "1:37:31", "throughput": 19927.61, "total_tokens": 129216384} +{"current_steps": 41060, "total_steps": 78105, "loss": 0.1356, "lr": 2.711921536263401e-06, "epoch": 2.6285128993022213, "percentage": 52.57, "elapsed_time": "1:48:04", "remaining_time": "1:37:30", "throughput": 19927.96, "total_tokens": 129232128} +{"current_steps": 41065, "total_steps": 78105, "loss": 0.2034, "lr": 2.711364889406479e-06, "epoch": 2.6288329812431983, "percentage": 52.58, "elapsed_time": "1:48:05", "remaining_time": "1:37:29", "throughput": 19928.34, "total_tokens": 129248512} +{"current_steps": 41070, "total_steps": 78105, "loss": 0.1895, "lr": 2.7108082319950946e-06, "epoch": 2.6291530631841753, "percentage": 52.58, "elapsed_time": "1:48:06", "remaining_time": "1:37:29", "throughput": 19928.69, "total_tokens": 129264448} +{"current_steps": 41075, "total_steps": 78105, "loss": 0.2898, "lr": 2.710251564057045e-06, "epoch": 2.629473145125152, "percentage": 52.59, "elapsed_time": "1:48:06", "remaining_time": "1:37:28", "throughput": 19928.98, "total_tokens": 129279296} +{"current_steps": 41080, "total_steps": 78105, "loss": 0.1662, "lr": 2.709694885620128e-06, "epoch": 2.629793227066129, "percentage": 52.6, "elapsed_time": "1:48:07", "remaining_time": "1:37:27", "throughput": 19929.36, "total_tokens": 129295552} +{"current_steps": 41085, "total_steps": 78105, "loss": 0.2313, "lr": 2.7091381967121405e-06, "epoch": 2.630113309007106, "percentage": 52.6, "elapsed_time": "1:48:08", "remaining_time": "1:37:26", "throughput": 19929.65, "total_tokens": 129310720} +{"current_steps": 41090, "total_steps": 78105, "loss": 0.3258, "lr": 2.708581497360881e-06, "epoch": 2.630433390948083, "percentage": 52.61, "elapsed_time": "1:48:09", "remaining_time": "1:37:25", "throughput": 19930.0, "total_tokens": 129326400} +{"current_steps": 41095, "total_steps": 78105, "loss": 0.1606, "lr": 2.708024787594148e-06, "epoch": 2.63075347288906, "percentage": 52.62, "elapsed_time": "1:48:09", "remaining_time": "1:37:24", "throughput": 19930.44, "total_tokens": 129343232} +{"current_steps": 41100, "total_steps": 78105, "loss": 0.2334, "lr": 2.7074680674397408e-06, "epoch": 2.6310735548300364, "percentage": 52.62, "elapsed_time": "1:48:10", "remaining_time": "1:37:23", "throughput": 19930.73, "total_tokens": 129358080} +{"current_steps": 41105, "total_steps": 78105, "loss": 0.2308, "lr": 2.7069113369254584e-06, "epoch": 2.6313936367710133, "percentage": 52.63, "elapsed_time": "1:48:11", "remaining_time": "1:37:22", "throughput": 19931.01, "total_tokens": 129373120} +{"current_steps": 41110, "total_steps": 78105, "loss": 0.2637, "lr": 2.7063545960791015e-06, "epoch": 2.6317137187119903, "percentage": 52.63, "elapsed_time": "1:48:11", "remaining_time": "1:37:21", "throughput": 19931.36, "total_tokens": 129388800} +{"current_steps": 41115, "total_steps": 78105, "loss": 0.254, "lr": 2.7057978449284716e-06, "epoch": 2.6320338006529673, "percentage": 52.64, "elapsed_time": "1:48:12", "remaining_time": "1:37:21", "throughput": 19931.77, "total_tokens": 129405248} +{"current_steps": 41120, "total_steps": 78105, "loss": 0.4129, "lr": 2.7052410835013694e-06, "epoch": 2.632353882593944, "percentage": 52.65, "elapsed_time": "1:48:13", "remaining_time": "1:37:20", "throughput": 19932.24, "total_tokens": 129422784} +{"current_steps": 41125, "total_steps": 78105, "loss": 0.2204, "lr": 2.7046843118255963e-06, "epoch": 2.632673964534921, "percentage": 52.65, "elapsed_time": "1:48:13", "remaining_time": "1:37:19", "throughput": 19932.57, "total_tokens": 129438528} +{"current_steps": 41130, "total_steps": 78105, "loss": 0.2269, "lr": 2.704127529928955e-06, "epoch": 2.632994046475898, "percentage": 52.66, "elapsed_time": "1:48:14", "remaining_time": "1:37:18", "throughput": 19932.89, "total_tokens": 129453632} +{"current_steps": 41135, "total_steps": 78105, "loss": 0.16, "lr": 2.7035707378392473e-06, "epoch": 2.633314128416875, "percentage": 52.67, "elapsed_time": "1:48:15", "remaining_time": "1:37:17", "throughput": 19933.25, "total_tokens": 129469376} +{"current_steps": 41140, "total_steps": 78105, "loss": 0.2356, "lr": 2.7030139355842783e-06, "epoch": 2.633634210357852, "percentage": 52.67, "elapsed_time": "1:48:15", "remaining_time": "1:37:16", "throughput": 19933.59, "total_tokens": 129484928} +{"current_steps": 41145, "total_steps": 78105, "loss": 0.2529, "lr": 2.7024571231918495e-06, "epoch": 2.6339542922988284, "percentage": 52.68, "elapsed_time": "1:48:16", "remaining_time": "1:37:15", "throughput": 19933.92, "total_tokens": 129500544} +{"current_steps": 41150, "total_steps": 78105, "loss": 0.2412, "lr": 2.7019003006897675e-06, "epoch": 2.6342743742398054, "percentage": 52.69, "elapsed_time": "1:48:17", "remaining_time": "1:37:14", "throughput": 19934.24, "total_tokens": 129515904} +{"current_steps": 41155, "total_steps": 78105, "loss": 0.1943, "lr": 2.7013434681058363e-06, "epoch": 2.6345944561807824, "percentage": 52.69, "elapsed_time": "1:48:17", "remaining_time": "1:37:13", "throughput": 19934.57, "total_tokens": 129531328} +{"current_steps": 41160, "total_steps": 78105, "loss": 0.2205, "lr": 2.700786625467861e-06, "epoch": 2.634914538121759, "percentage": 52.7, "elapsed_time": "1:48:18", "remaining_time": "1:37:13", "throughput": 19934.92, "total_tokens": 129546752} +{"current_steps": 41165, "total_steps": 78105, "loss": 0.2483, "lr": 2.700229772803647e-06, "epoch": 2.635234620062736, "percentage": 52.7, "elapsed_time": "1:48:19", "remaining_time": "1:37:12", "throughput": 19935.36, "total_tokens": 129564096} +{"current_steps": 41170, "total_steps": 78105, "loss": 0.2401, "lr": 2.6996729101410013e-06, "epoch": 2.635554702003713, "percentage": 52.71, "elapsed_time": "1:48:19", "remaining_time": "1:37:11", "throughput": 19935.72, "total_tokens": 129580032} +{"current_steps": 41175, "total_steps": 78105, "loss": 0.2484, "lr": 2.6991160375077308e-06, "epoch": 2.63587478394469, "percentage": 52.72, "elapsed_time": "1:48:20", "remaining_time": "1:37:10", "throughput": 19936.03, "total_tokens": 129594944} +{"current_steps": 41180, "total_steps": 78105, "loss": 0.229, "lr": 2.6985591549316424e-06, "epoch": 2.636194865885667, "percentage": 52.72, "elapsed_time": "1:48:21", "remaining_time": "1:37:09", "throughput": 19936.39, "total_tokens": 129610752} +{"current_steps": 41185, "total_steps": 78105, "loss": 0.1819, "lr": 2.6980022624405446e-06, "epoch": 2.636514947826644, "percentage": 52.73, "elapsed_time": "1:48:21", "remaining_time": "1:37:08", "throughput": 19936.72, "total_tokens": 129626112} +{"current_steps": 41190, "total_steps": 78105, "loss": 0.1814, "lr": 2.6974453600622442e-06, "epoch": 2.6368350297676204, "percentage": 52.74, "elapsed_time": "1:48:22", "remaining_time": "1:37:07", "throughput": 19937.05, "total_tokens": 129641408} +{"current_steps": 41195, "total_steps": 78105, "loss": 0.2203, "lr": 2.6968884478245515e-06, "epoch": 2.6371551117085974, "percentage": 52.74, "elapsed_time": "1:48:23", "remaining_time": "1:37:06", "throughput": 19937.37, "total_tokens": 129656448} +{"current_steps": 41200, "total_steps": 78105, "loss": 0.1885, "lr": 2.6963315257552754e-06, "epoch": 2.6374751936495744, "percentage": 52.75, "elapsed_time": "1:48:23", "remaining_time": "1:37:05", "throughput": 19937.66, "total_tokens": 129671488} +{"current_steps": 41205, "total_steps": 78105, "loss": 0.3412, "lr": 2.6957745938822248e-06, "epoch": 2.637795275590551, "percentage": 52.76, "elapsed_time": "1:48:24", "remaining_time": "1:37:04", "throughput": 19938.07, "total_tokens": 129688192} +{"current_steps": 41210, "total_steps": 78105, "loss": 0.2104, "lr": 2.695217652233211e-06, "epoch": 2.638115357531528, "percentage": 52.76, "elapsed_time": "1:48:25", "remaining_time": "1:37:04", "throughput": 19938.49, "total_tokens": 129705024} +{"current_steps": 41215, "total_steps": 78105, "loss": 0.2396, "lr": 2.694660700836045e-06, "epoch": 2.638435439472505, "percentage": 52.77, "elapsed_time": "1:48:25", "remaining_time": "1:37:03", "throughput": 19938.77, "total_tokens": 129719616} +{"current_steps": 41220, "total_steps": 78105, "loss": 0.2503, "lr": 2.6941037397185375e-06, "epoch": 2.638755521413482, "percentage": 52.78, "elapsed_time": "1:48:26", "remaining_time": "1:37:02", "throughput": 19939.07, "total_tokens": 129734848} +{"current_steps": 41225, "total_steps": 78105, "loss": 0.3266, "lr": 2.6935467689085e-06, "epoch": 2.639075603354459, "percentage": 52.78, "elapsed_time": "1:48:27", "remaining_time": "1:37:01", "throughput": 19939.43, "total_tokens": 129750784} +{"current_steps": 41230, "total_steps": 78105, "loss": 0.2465, "lr": 2.6929897884337454e-06, "epoch": 2.639395685295436, "percentage": 52.79, "elapsed_time": "1:48:27", "remaining_time": "1:37:00", "throughput": 19939.77, "total_tokens": 129766720} +{"current_steps": 41235, "total_steps": 78105, "loss": 0.2098, "lr": 2.6924327983220856e-06, "epoch": 2.6397157672364124, "percentage": 52.79, "elapsed_time": "1:48:28", "remaining_time": "1:36:59", "throughput": 19940.22, "total_tokens": 129784192} +{"current_steps": 41240, "total_steps": 78105, "loss": 0.2653, "lr": 2.6918757986013346e-06, "epoch": 2.6400358491773894, "percentage": 52.8, "elapsed_time": "1:48:29", "remaining_time": "1:36:58", "throughput": 19940.52, "total_tokens": 129799424} +{"current_steps": 41245, "total_steps": 78105, "loss": 0.2272, "lr": 2.6913187892993053e-06, "epoch": 2.6403559311183664, "percentage": 52.81, "elapsed_time": "1:48:29", "remaining_time": "1:36:57", "throughput": 19940.83, "total_tokens": 129814720} +{"current_steps": 41250, "total_steps": 78105, "loss": 0.1359, "lr": 2.6907617704438115e-06, "epoch": 2.640676013059343, "percentage": 52.81, "elapsed_time": "1:48:30", "remaining_time": "1:36:57", "throughput": 19941.22, "total_tokens": 129831040} +{"current_steps": 41255, "total_steps": 78105, "loss": 0.3712, "lr": 2.69020474206267e-06, "epoch": 2.64099609500032, "percentage": 52.82, "elapsed_time": "1:48:31", "remaining_time": "1:36:56", "throughput": 19941.49, "total_tokens": 129845440} +{"current_steps": 41260, "total_steps": 78105, "loss": 0.2336, "lr": 2.6896477041836934e-06, "epoch": 2.641316176941297, "percentage": 52.83, "elapsed_time": "1:48:32", "remaining_time": "1:36:55", "throughput": 19941.88, "total_tokens": 129861568} +{"current_steps": 41265, "total_steps": 78105, "loss": 0.2963, "lr": 2.6890906568346987e-06, "epoch": 2.641636258882274, "percentage": 52.83, "elapsed_time": "1:48:32", "remaining_time": "1:36:54", "throughput": 19942.2, "total_tokens": 129877056} +{"current_steps": 41270, "total_steps": 78105, "loss": 0.2921, "lr": 2.6885336000435016e-06, "epoch": 2.641956340823251, "percentage": 52.84, "elapsed_time": "1:48:33", "remaining_time": "1:36:53", "throughput": 19942.48, "total_tokens": 129891776} +{"current_steps": 41275, "total_steps": 78105, "loss": 0.1619, "lr": 2.6879765338379183e-06, "epoch": 2.642276422764228, "percentage": 52.85, "elapsed_time": "1:48:33", "remaining_time": "1:36:52", "throughput": 19942.81, "total_tokens": 129906816} +{"current_steps": 41280, "total_steps": 78105, "loss": 0.1129, "lr": 2.6874194582457657e-06, "epoch": 2.6425965047052045, "percentage": 52.85, "elapsed_time": "1:48:34", "remaining_time": "1:36:51", "throughput": 19943.12, "total_tokens": 129922368} +{"current_steps": 41285, "total_steps": 78105, "loss": 0.2401, "lr": 2.6868623732948617e-06, "epoch": 2.6429165866461815, "percentage": 52.86, "elapsed_time": "1:48:35", "remaining_time": "1:36:50", "throughput": 19943.48, "total_tokens": 129938624} +{"current_steps": 41290, "total_steps": 78105, "loss": 0.1611, "lr": 2.686305279013025e-06, "epoch": 2.6432366685871584, "percentage": 52.86, "elapsed_time": "1:48:36", "remaining_time": "1:36:49", "throughput": 19943.8, "total_tokens": 129953984} +{"current_steps": 41295, "total_steps": 78105, "loss": 0.2679, "lr": 2.685748175428072e-06, "epoch": 2.643556750528135, "percentage": 52.87, "elapsed_time": "1:48:36", "remaining_time": "1:36:48", "throughput": 19944.12, "total_tokens": 129969408} +{"current_steps": 41300, "total_steps": 78105, "loss": 0.1871, "lr": 2.685191062567824e-06, "epoch": 2.643876832469112, "percentage": 52.88, "elapsed_time": "1:48:37", "remaining_time": "1:36:48", "throughput": 19944.44, "total_tokens": 129984704} +{"current_steps": 41305, "total_steps": 78105, "loss": 0.2274, "lr": 2.6846339404600973e-06, "epoch": 2.644196914410089, "percentage": 52.88, "elapsed_time": "1:48:38", "remaining_time": "1:36:47", "throughput": 19944.81, "total_tokens": 130000704} +{"current_steps": 41310, "total_steps": 78105, "loss": 0.2339, "lr": 2.6840768091327147e-06, "epoch": 2.644516996351066, "percentage": 52.89, "elapsed_time": "1:48:38", "remaining_time": "1:36:46", "throughput": 19945.08, "total_tokens": 130015232} +{"current_steps": 41315, "total_steps": 78105, "loss": 0.2078, "lr": 2.6835196686134945e-06, "epoch": 2.644837078292043, "percentage": 52.9, "elapsed_time": "1:48:39", "remaining_time": "1:36:45", "throughput": 19945.41, "total_tokens": 130030720} +{"current_steps": 41320, "total_steps": 78105, "loss": 0.1687, "lr": 2.682962518930258e-06, "epoch": 2.6451571602330195, "percentage": 52.9, "elapsed_time": "1:48:40", "remaining_time": "1:36:44", "throughput": 19945.74, "total_tokens": 130046272} +{"current_steps": 41325, "total_steps": 78105, "loss": 0.2655, "lr": 2.682405360110826e-06, "epoch": 2.6454772421739965, "percentage": 52.91, "elapsed_time": "1:48:40", "remaining_time": "1:36:43", "throughput": 19946.05, "total_tokens": 130061760} +{"current_steps": 41330, "total_steps": 78105, "loss": 0.1826, "lr": 2.6818481921830207e-06, "epoch": 2.6457973241149735, "percentage": 52.92, "elapsed_time": "1:48:41", "remaining_time": "1:36:42", "throughput": 19946.37, "total_tokens": 130077312} +{"current_steps": 41335, "total_steps": 78105, "loss": 0.1732, "lr": 2.681291015174664e-06, "epoch": 2.6461174060559505, "percentage": 52.92, "elapsed_time": "1:48:42", "remaining_time": "1:36:41", "throughput": 19946.88, "total_tokens": 130095808} +{"current_steps": 41340, "total_steps": 78105, "loss": 0.2907, "lr": 2.680733829113578e-06, "epoch": 2.646437487996927, "percentage": 52.93, "elapsed_time": "1:48:42", "remaining_time": "1:36:40", "throughput": 19947.16, "total_tokens": 130110784} +{"current_steps": 41345, "total_steps": 78105, "loss": 0.2794, "lr": 2.6801766340275865e-06, "epoch": 2.646757569937904, "percentage": 52.94, "elapsed_time": "1:48:43", "remaining_time": "1:36:40", "throughput": 19947.51, "total_tokens": 130126400} +{"current_steps": 41350, "total_steps": 78105, "loss": 0.2194, "lr": 2.6796194299445123e-06, "epoch": 2.647077651878881, "percentage": 52.94, "elapsed_time": "1:48:44", "remaining_time": "1:36:39", "throughput": 19947.84, "total_tokens": 130142208} +{"current_steps": 41355, "total_steps": 78105, "loss": 0.2653, "lr": 2.6790622168921794e-06, "epoch": 2.647397733819858, "percentage": 52.95, "elapsed_time": "1:48:44", "remaining_time": "1:36:38", "throughput": 19948.17, "total_tokens": 130157504} +{"current_steps": 41360, "total_steps": 78105, "loss": 0.158, "lr": 2.6785049948984122e-06, "epoch": 2.647717815760835, "percentage": 52.95, "elapsed_time": "1:48:45", "remaining_time": "1:36:37", "throughput": 19948.53, "total_tokens": 130173376} +{"current_steps": 41365, "total_steps": 78105, "loss": 0.1953, "lr": 2.6779477639910344e-06, "epoch": 2.6480378977018115, "percentage": 52.96, "elapsed_time": "1:48:46", "remaining_time": "1:36:36", "throughput": 19948.85, "total_tokens": 130188800} +{"current_steps": 41370, "total_steps": 78105, "loss": 0.1425, "lr": 2.677390524197873e-06, "epoch": 2.6483579796427885, "percentage": 52.97, "elapsed_time": "1:48:46", "remaining_time": "1:36:35", "throughput": 19949.32, "total_tokens": 130206592} +{"current_steps": 41375, "total_steps": 78105, "loss": 0.1956, "lr": 2.676833275546753e-06, "epoch": 2.6486780615837655, "percentage": 52.97, "elapsed_time": "1:48:47", "remaining_time": "1:36:34", "throughput": 19949.78, "total_tokens": 130224576} +{"current_steps": 41380, "total_steps": 78105, "loss": 0.1839, "lr": 2.6762760180654995e-06, "epoch": 2.6489981435247425, "percentage": 52.98, "elapsed_time": "1:48:48", "remaining_time": "1:36:33", "throughput": 19950.21, "total_tokens": 130241920} +{"current_steps": 41385, "total_steps": 78105, "loss": 0.2494, "lr": 2.6757187517819406e-06, "epoch": 2.649318225465719, "percentage": 52.99, "elapsed_time": "1:48:49", "remaining_time": "1:36:33", "throughput": 19950.62, "total_tokens": 130258624} +{"current_steps": 41390, "total_steps": 78105, "loss": 0.211, "lr": 2.675161476723902e-06, "epoch": 2.649638307406696, "percentage": 52.99, "elapsed_time": "1:48:49", "remaining_time": "1:36:32", "throughput": 19950.93, "total_tokens": 130273536} +{"current_steps": 41395, "total_steps": 78105, "loss": 0.158, "lr": 2.6746041929192113e-06, "epoch": 2.649958389347673, "percentage": 53.0, "elapsed_time": "1:48:50", "remaining_time": "1:36:31", "throughput": 19951.22, "total_tokens": 130288384} +{"current_steps": 41400, "total_steps": 78105, "loss": 0.3643, "lr": 2.6740469003956974e-06, "epoch": 2.65027847128865, "percentage": 53.01, "elapsed_time": "1:48:51", "remaining_time": "1:36:30", "throughput": 19951.58, "total_tokens": 130304192} +{"current_steps": 41405, "total_steps": 78105, "loss": 0.1569, "lr": 2.6734895991811884e-06, "epoch": 2.650598553229627, "percentage": 53.01, "elapsed_time": "1:48:51", "remaining_time": "1:36:29", "throughput": 19951.92, "total_tokens": 130319680} +{"current_steps": 41410, "total_steps": 78105, "loss": 0.1823, "lr": 2.6729322893035114e-06, "epoch": 2.6509186351706036, "percentage": 53.02, "elapsed_time": "1:48:52", "remaining_time": "1:36:28", "throughput": 19952.21, "total_tokens": 130334592} +{"current_steps": 41415, "total_steps": 78105, "loss": 0.1812, "lr": 2.6723749707904974e-06, "epoch": 2.6512387171115805, "percentage": 53.02, "elapsed_time": "1:48:53", "remaining_time": "1:36:27", "throughput": 19952.55, "total_tokens": 130350272} +{"current_steps": 41420, "total_steps": 78105, "loss": 0.2267, "lr": 2.6718176436699744e-06, "epoch": 2.6515587990525575, "percentage": 53.03, "elapsed_time": "1:48:53", "remaining_time": "1:36:26", "throughput": 19952.83, "total_tokens": 130364992} +{"current_steps": 41425, "total_steps": 78105, "loss": 0.301, "lr": 2.6712603079697742e-06, "epoch": 2.651878880993534, "percentage": 53.04, "elapsed_time": "1:48:54", "remaining_time": "1:36:25", "throughput": 19953.24, "total_tokens": 130381376} +{"current_steps": 41430, "total_steps": 78105, "loss": 0.2851, "lr": 2.6707029637177257e-06, "epoch": 2.652198962934511, "percentage": 53.04, "elapsed_time": "1:48:55", "remaining_time": "1:36:25", "throughput": 19953.65, "total_tokens": 130398016} +{"current_steps": 41435, "total_steps": 78105, "loss": 0.1436, "lr": 2.6701456109416602e-06, "epoch": 2.652519044875488, "percentage": 53.05, "elapsed_time": "1:48:55", "remaining_time": "1:36:24", "throughput": 19953.97, "total_tokens": 130413248} +{"current_steps": 41440, "total_steps": 78105, "loss": 0.2448, "lr": 2.669588249669409e-06, "epoch": 2.652839126816465, "percentage": 53.06, "elapsed_time": "1:48:56", "remaining_time": "1:36:23", "throughput": 19954.24, "total_tokens": 130427776} +{"current_steps": 41445, "total_steps": 78105, "loss": 0.2274, "lr": 2.6690308799288043e-06, "epoch": 2.653159208757442, "percentage": 53.06, "elapsed_time": "1:48:57", "remaining_time": "1:36:22", "throughput": 19954.63, "total_tokens": 130444288} +{"current_steps": 41450, "total_steps": 78105, "loss": 0.2413, "lr": 2.668473501747679e-06, "epoch": 2.653479290698419, "percentage": 53.07, "elapsed_time": "1:48:57", "remaining_time": "1:36:21", "throughput": 19954.92, "total_tokens": 130459456} +{"current_steps": 41455, "total_steps": 78105, "loss": 0.2521, "lr": 2.6679161151538623e-06, "epoch": 2.6537993726393956, "percentage": 53.08, "elapsed_time": "1:48:58", "remaining_time": "1:36:20", "throughput": 19955.24, "total_tokens": 130474880} +{"current_steps": 41460, "total_steps": 78105, "loss": 0.2512, "lr": 2.6673587201751912e-06, "epoch": 2.6541194545803726, "percentage": 53.08, "elapsed_time": "1:48:59", "remaining_time": "1:36:19", "throughput": 19955.63, "total_tokens": 130491136} +{"current_steps": 41465, "total_steps": 78105, "loss": 0.1614, "lr": 2.666801316839497e-06, "epoch": 2.6544395365213496, "percentage": 53.09, "elapsed_time": "1:48:59", "remaining_time": "1:36:18", "throughput": 19955.98, "total_tokens": 130507072} +{"current_steps": 41470, "total_steps": 78105, "loss": 0.237, "lr": 2.6662439051746136e-06, "epoch": 2.654759618462326, "percentage": 53.1, "elapsed_time": "1:49:00", "remaining_time": "1:36:17", "throughput": 19956.3, "total_tokens": 130522624} +{"current_steps": 41475, "total_steps": 78105, "loss": 0.1732, "lr": 2.6656864852083758e-06, "epoch": 2.655079700403303, "percentage": 53.1, "elapsed_time": "1:49:01", "remaining_time": "1:36:16", "throughput": 19956.59, "total_tokens": 130537536} +{"current_steps": 41480, "total_steps": 78105, "loss": 0.3565, "lr": 2.6651290569686174e-06, "epoch": 2.65539978234428, "percentage": 53.11, "elapsed_time": "1:49:01", "remaining_time": "1:36:16", "throughput": 19956.91, "total_tokens": 130552960} +{"current_steps": 41485, "total_steps": 78105, "loss": 0.152, "lr": 2.6645716204831745e-06, "epoch": 2.655719864285257, "percentage": 53.11, "elapsed_time": "1:49:02", "remaining_time": "1:36:15", "throughput": 19957.32, "total_tokens": 130569856} +{"current_steps": 41490, "total_steps": 78105, "loss": 0.2822, "lr": 2.664014175779882e-06, "epoch": 2.656039946226234, "percentage": 53.12, "elapsed_time": "1:49:03", "remaining_time": "1:36:14", "throughput": 19957.61, "total_tokens": 130585024} +{"current_steps": 41495, "total_steps": 78105, "loss": 0.2037, "lr": 2.663456722886576e-06, "epoch": 2.656360028167211, "percentage": 53.13, "elapsed_time": "1:49:03", "remaining_time": "1:36:13", "throughput": 19957.92, "total_tokens": 130600384} +{"current_steps": 41500, "total_steps": 78105, "loss": 0.3533, "lr": 2.6628992618310923e-06, "epoch": 2.6566801101081876, "percentage": 53.13, "elapsed_time": "1:49:04", "remaining_time": "1:36:12", "throughput": 19958.3, "total_tokens": 130616768} +{"current_steps": 41505, "total_steps": 78105, "loss": 0.204, "lr": 2.6623417926412686e-06, "epoch": 2.6570001920491646, "percentage": 53.14, "elapsed_time": "1:49:05", "remaining_time": "1:36:11", "throughput": 19958.7, "total_tokens": 130633536} +{"current_steps": 41510, "total_steps": 78105, "loss": 0.2205, "lr": 2.6617843153449407e-06, "epoch": 2.6573202739901416, "percentage": 53.15, "elapsed_time": "1:49:05", "remaining_time": "1:36:10", "throughput": 19959.03, "total_tokens": 130649152} +{"current_steps": 41515, "total_steps": 78105, "loss": 0.199, "lr": 2.6612268299699474e-06, "epoch": 2.657640355931118, "percentage": 53.15, "elapsed_time": "1:49:06", "remaining_time": "1:36:09", "throughput": 19959.41, "total_tokens": 130665792} +{"current_steps": 41520, "total_steps": 78105, "loss": 0.2726, "lr": 2.6606693365441254e-06, "epoch": 2.657960437872095, "percentage": 53.16, "elapsed_time": "1:49:07", "remaining_time": "1:36:09", "throughput": 19959.72, "total_tokens": 130681472} +{"current_steps": 41525, "total_steps": 78105, "loss": 0.232, "lr": 2.660111835095314e-06, "epoch": 2.658280519813072, "percentage": 53.17, "elapsed_time": "1:49:07", "remaining_time": "1:36:08", "throughput": 19960.06, "total_tokens": 130697408} +{"current_steps": 41530, "total_steps": 78105, "loss": 0.2411, "lr": 2.6595543256513515e-06, "epoch": 2.658600601754049, "percentage": 53.17, "elapsed_time": "1:49:08", "remaining_time": "1:36:07", "throughput": 19960.34, "total_tokens": 130712512} +{"current_steps": 41535, "total_steps": 78105, "loss": 0.2569, "lr": 2.658996808240077e-06, "epoch": 2.658920683695026, "percentage": 53.18, "elapsed_time": "1:49:09", "remaining_time": "1:36:06", "throughput": 19960.7, "total_tokens": 130728704} +{"current_steps": 41540, "total_steps": 78105, "loss": 0.2186, "lr": 2.65843928288933e-06, "epoch": 2.659240765636003, "percentage": 53.18, "elapsed_time": "1:49:09", "remaining_time": "1:36:05", "throughput": 19961.02, "total_tokens": 130744064} +{"current_steps": 41545, "total_steps": 78105, "loss": 0.1895, "lr": 2.657881749626951e-06, "epoch": 2.6595608475769796, "percentage": 53.19, "elapsed_time": "1:49:10", "remaining_time": "1:36:04", "throughput": 19961.39, "total_tokens": 130760384} +{"current_steps": 41550, "total_steps": 78105, "loss": 0.2509, "lr": 2.6573242084807794e-06, "epoch": 2.6598809295179566, "percentage": 53.2, "elapsed_time": "1:49:11", "remaining_time": "1:36:03", "throughput": 19961.74, "total_tokens": 130776256} +{"current_steps": 41555, "total_steps": 78105, "loss": 0.2283, "lr": 2.656766659478656e-06, "epoch": 2.6602010114589336, "percentage": 53.2, "elapsed_time": "1:49:12", "remaining_time": "1:36:02", "throughput": 19962.15, "total_tokens": 130793024} +{"current_steps": 41560, "total_steps": 78105, "loss": 0.1837, "lr": 2.6562091026484226e-06, "epoch": 2.66052109339991, "percentage": 53.21, "elapsed_time": "1:49:12", "remaining_time": "1:36:02", "throughput": 19962.45, "total_tokens": 130808384} +{"current_steps": 41565, "total_steps": 78105, "loss": 0.248, "lr": 2.6556515380179203e-06, "epoch": 2.660841175340887, "percentage": 53.22, "elapsed_time": "1:49:13", "remaining_time": "1:36:01", "throughput": 19962.71, "total_tokens": 130822848} +{"current_steps": 41570, "total_steps": 78105, "loss": 0.1602, "lr": 2.6550939656149906e-06, "epoch": 2.661161257281864, "percentage": 53.22, "elapsed_time": "1:49:14", "remaining_time": "1:36:00", "throughput": 19963.05, "total_tokens": 130838784} +{"current_steps": 41575, "total_steps": 78105, "loss": 0.2316, "lr": 2.6545363854674765e-06, "epoch": 2.661481339222841, "percentage": 53.23, "elapsed_time": "1:49:14", "remaining_time": "1:35:59", "throughput": 19963.36, "total_tokens": 130854208} +{"current_steps": 41580, "total_steps": 78105, "loss": 0.2155, "lr": 2.6539787976032204e-06, "epoch": 2.661801421163818, "percentage": 53.24, "elapsed_time": "1:49:15", "remaining_time": "1:35:58", "throughput": 19963.68, "total_tokens": 130869248} +{"current_steps": 41585, "total_steps": 78105, "loss": 0.2503, "lr": 2.653421202050065e-06, "epoch": 2.6621215031047947, "percentage": 53.24, "elapsed_time": "1:49:16", "remaining_time": "1:35:57", "throughput": 19964.0, "total_tokens": 130884480} +{"current_steps": 41590, "total_steps": 78105, "loss": 0.248, "lr": 2.6528635988358537e-06, "epoch": 2.6624415850457717, "percentage": 53.25, "elapsed_time": "1:49:16", "remaining_time": "1:35:56", "throughput": 19964.29, "total_tokens": 130899200} +{"current_steps": 41595, "total_steps": 78105, "loss": 0.1598, "lr": 2.6523059879884305e-06, "epoch": 2.6627616669867487, "percentage": 53.26, "elapsed_time": "1:49:17", "remaining_time": "1:35:55", "throughput": 19964.6, "total_tokens": 130914752} +{"current_steps": 41600, "total_steps": 78105, "loss": 0.2021, "lr": 2.65174836953564e-06, "epoch": 2.6630817489277256, "percentage": 53.26, "elapsed_time": "1:49:18", "remaining_time": "1:35:54", "throughput": 19964.92, "total_tokens": 130930176} +{"current_steps": 41605, "total_steps": 78105, "loss": 0.1484, "lr": 2.651190743505326e-06, "epoch": 2.663401830868702, "percentage": 53.27, "elapsed_time": "1:49:18", "remaining_time": "1:35:53", "throughput": 19965.25, "total_tokens": 130945472} +{"current_steps": 41610, "total_steps": 78105, "loss": 0.2114, "lr": 2.6506331099253333e-06, "epoch": 2.663721912809679, "percentage": 53.27, "elapsed_time": "1:49:19", "remaining_time": "1:35:52", "throughput": 19965.53, "total_tokens": 130960128} +{"current_steps": 41615, "total_steps": 78105, "loss": 0.2003, "lr": 2.6500754688235085e-06, "epoch": 2.664041994750656, "percentage": 53.28, "elapsed_time": "1:49:19", "remaining_time": "1:35:52", "throughput": 19965.85, "total_tokens": 130975424} +{"current_steps": 41620, "total_steps": 78105, "loss": 0.2115, "lr": 2.6495178202276965e-06, "epoch": 2.664362076691633, "percentage": 53.29, "elapsed_time": "1:49:20", "remaining_time": "1:35:51", "throughput": 19966.14, "total_tokens": 130990016} +{"current_steps": 41625, "total_steps": 78105, "loss": 0.255, "lr": 2.6489601641657426e-06, "epoch": 2.66468215863261, "percentage": 53.29, "elapsed_time": "1:49:21", "remaining_time": "1:35:50", "throughput": 19966.5, "total_tokens": 131006272} +{"current_steps": 41630, "total_steps": 78105, "loss": 0.1549, "lr": 2.6484025006654952e-06, "epoch": 2.6650022405735867, "percentage": 53.3, "elapsed_time": "1:49:22", "remaining_time": "1:35:49", "throughput": 19966.87, "total_tokens": 131022720} +{"current_steps": 41635, "total_steps": 78105, "loss": 0.1778, "lr": 2.6478448297547993e-06, "epoch": 2.6653223225145637, "percentage": 53.31, "elapsed_time": "1:49:22", "remaining_time": "1:35:48", "throughput": 19967.15, "total_tokens": 131037312} +{"current_steps": 41640, "total_steps": 78105, "loss": 0.1708, "lr": 2.6472871514615026e-06, "epoch": 2.6656424044555407, "percentage": 53.31, "elapsed_time": "1:49:23", "remaining_time": "1:35:47", "throughput": 19967.49, "total_tokens": 131052928} +{"current_steps": 41645, "total_steps": 78105, "loss": 0.2943, "lr": 2.646729465813453e-06, "epoch": 2.6659624863965177, "percentage": 53.32, "elapsed_time": "1:49:23", "remaining_time": "1:35:46", "throughput": 19967.8, "total_tokens": 131068544} +{"current_steps": 41650, "total_steps": 78105, "loss": 0.2628, "lr": 2.6461717728384972e-06, "epoch": 2.666282568337494, "percentage": 53.33, "elapsed_time": "1:49:24", "remaining_time": "1:35:45", "throughput": 19968.12, "total_tokens": 131083968} +{"current_steps": 41655, "total_steps": 78105, "loss": 0.3024, "lr": 2.6456140725644856e-06, "epoch": 2.666602650278471, "percentage": 53.33, "elapsed_time": "1:49:25", "remaining_time": "1:35:44", "throughput": 19968.46, "total_tokens": 131099648} +{"current_steps": 41660, "total_steps": 78105, "loss": 0.2815, "lr": 2.6450563650192657e-06, "epoch": 2.666922732219448, "percentage": 53.34, "elapsed_time": "1:49:25", "remaining_time": "1:35:44", "throughput": 19968.76, "total_tokens": 131114496} +{"current_steps": 41665, "total_steps": 78105, "loss": 0.2378, "lr": 2.6444986502306863e-06, "epoch": 2.667242814160425, "percentage": 53.34, "elapsed_time": "1:49:26", "remaining_time": "1:35:43", "throughput": 19969.06, "total_tokens": 131129472} +{"current_steps": 41670, "total_steps": 78105, "loss": 0.2157, "lr": 2.643940928226596e-06, "epoch": 2.667562896101402, "percentage": 53.35, "elapsed_time": "1:49:27", "remaining_time": "1:35:42", "throughput": 19969.33, "total_tokens": 131144128} +{"current_steps": 41675, "total_steps": 78105, "loss": 0.2313, "lr": 2.643383199034847e-06, "epoch": 2.6678829780423787, "percentage": 53.36, "elapsed_time": "1:49:27", "remaining_time": "1:35:41", "throughput": 19969.68, "total_tokens": 131160192} +{"current_steps": 41680, "total_steps": 78105, "loss": 0.2001, "lr": 2.6428254626832872e-06, "epoch": 2.6682030599833557, "percentage": 53.36, "elapsed_time": "1:49:28", "remaining_time": "1:35:40", "throughput": 19970.03, "total_tokens": 131176128} +{"current_steps": 41685, "total_steps": 78105, "loss": 0.3001, "lr": 2.6422677191997668e-06, "epoch": 2.6685231419243327, "percentage": 53.37, "elapsed_time": "1:49:29", "remaining_time": "1:35:39", "throughput": 19970.46, "total_tokens": 131193536} +{"current_steps": 41690, "total_steps": 78105, "loss": 0.1506, "lr": 2.641709968612139e-06, "epoch": 2.6688432238653093, "percentage": 53.38, "elapsed_time": "1:49:30", "remaining_time": "1:35:38", "throughput": 19970.78, "total_tokens": 131209152} +{"current_steps": 41695, "total_steps": 78105, "loss": 0.4037, "lr": 2.641152210948253e-06, "epoch": 2.6691633058062862, "percentage": 53.38, "elapsed_time": "1:49:30", "remaining_time": "1:35:37", "throughput": 19971.11, "total_tokens": 131224704} +{"current_steps": 41700, "total_steps": 78105, "loss": 0.1781, "lr": 2.640594446235961e-06, "epoch": 2.6694833877472632, "percentage": 53.39, "elapsed_time": "1:49:31", "remaining_time": "1:35:36", "throughput": 19971.48, "total_tokens": 131240896} +{"current_steps": 41705, "total_steps": 78105, "loss": 0.2807, "lr": 2.6400366745031147e-06, "epoch": 2.6698034696882402, "percentage": 53.4, "elapsed_time": "1:49:32", "remaining_time": "1:35:36", "throughput": 19971.83, "total_tokens": 131256768} +{"current_steps": 41710, "total_steps": 78105, "loss": 0.236, "lr": 2.6394788957775657e-06, "epoch": 2.670123551629217, "percentage": 53.4, "elapsed_time": "1:49:32", "remaining_time": "1:35:35", "throughput": 19972.13, "total_tokens": 131271552} +{"current_steps": 41715, "total_steps": 78105, "loss": 0.2688, "lr": 2.6389211100871677e-06, "epoch": 2.670443633570194, "percentage": 53.41, "elapsed_time": "1:49:33", "remaining_time": "1:35:34", "throughput": 19972.42, "total_tokens": 131286784} +{"current_steps": 41720, "total_steps": 78105, "loss": 0.1437, "lr": 2.6383633174597735e-06, "epoch": 2.6707637155111708, "percentage": 53.42, "elapsed_time": "1:49:34", "remaining_time": "1:35:33", "throughput": 19972.75, "total_tokens": 131302272} +{"current_steps": 41725, "total_steps": 78105, "loss": 0.2011, "lr": 2.6378055179232354e-06, "epoch": 2.6710837974521477, "percentage": 53.42, "elapsed_time": "1:49:34", "remaining_time": "1:35:32", "throughput": 19973.2, "total_tokens": 131319360} +{"current_steps": 41730, "total_steps": 78105, "loss": 0.2875, "lr": 2.6372477115054067e-06, "epoch": 2.6714038793931247, "percentage": 53.43, "elapsed_time": "1:49:35", "remaining_time": "1:35:31", "throughput": 19973.54, "total_tokens": 131335232} +{"current_steps": 41735, "total_steps": 78105, "loss": 0.3015, "lr": 2.6366898982341433e-06, "epoch": 2.6717239613341013, "percentage": 53.43, "elapsed_time": "1:49:36", "remaining_time": "1:35:30", "throughput": 19973.82, "total_tokens": 131350272} +{"current_steps": 41740, "total_steps": 78105, "loss": 0.205, "lr": 2.636132078137298e-06, "epoch": 2.6720440432750783, "percentage": 53.44, "elapsed_time": "1:49:36", "remaining_time": "1:35:29", "throughput": 19974.24, "total_tokens": 131367296} +{"current_steps": 41745, "total_steps": 78105, "loss": 0.1604, "lr": 2.635574251242726e-06, "epoch": 2.6723641252160553, "percentage": 53.45, "elapsed_time": "1:49:37", "remaining_time": "1:35:29", "throughput": 19974.52, "total_tokens": 131382080} +{"current_steps": 41750, "total_steps": 78105, "loss": 0.1906, "lr": 2.6350164175782816e-06, "epoch": 2.6726842071570323, "percentage": 53.45, "elapsed_time": "1:49:38", "remaining_time": "1:35:28", "throughput": 19974.84, "total_tokens": 131397632} +{"current_steps": 41755, "total_steps": 78105, "loss": 0.26, "lr": 2.6344585771718214e-06, "epoch": 2.6730042890980092, "percentage": 53.46, "elapsed_time": "1:49:38", "remaining_time": "1:35:27", "throughput": 19975.28, "total_tokens": 131414720} +{"current_steps": 41760, "total_steps": 78105, "loss": 0.1476, "lr": 2.6339007300512e-06, "epoch": 2.6733243710389862, "percentage": 53.47, "elapsed_time": "1:49:39", "remaining_time": "1:35:26", "throughput": 19975.68, "total_tokens": 131431104} +{"current_steps": 41765, "total_steps": 78105, "loss": 0.2659, "lr": 2.6333428762442724e-06, "epoch": 2.673644452979963, "percentage": 53.47, "elapsed_time": "1:49:40", "remaining_time": "1:35:25", "throughput": 19975.99, "total_tokens": 131446208} +{"current_steps": 41770, "total_steps": 78105, "loss": 0.2564, "lr": 2.6327850157788976e-06, "epoch": 2.6739645349209398, "percentage": 53.48, "elapsed_time": "1:49:40", "remaining_time": "1:35:24", "throughput": 19976.34, "total_tokens": 131462080} +{"current_steps": 41775, "total_steps": 78105, "loss": 0.2235, "lr": 2.6322271486829303e-06, "epoch": 2.6742846168619168, "percentage": 53.49, "elapsed_time": "1:49:41", "remaining_time": "1:35:23", "throughput": 19976.67, "total_tokens": 131477696} +{"current_steps": 41780, "total_steps": 78105, "loss": 0.2871, "lr": 2.631669274984228e-06, "epoch": 2.6746046988028933, "percentage": 53.49, "elapsed_time": "1:49:42", "remaining_time": "1:35:22", "throughput": 19976.98, "total_tokens": 131492672} +{"current_steps": 41785, "total_steps": 78105, "loss": 0.1942, "lr": 2.631111394710648e-06, "epoch": 2.6749247807438703, "percentage": 53.5, "elapsed_time": "1:49:42", "remaining_time": "1:35:21", "throughput": 19977.37, "total_tokens": 131509184} +{"current_steps": 41790, "total_steps": 78105, "loss": 0.2044, "lr": 2.6305535078900473e-06, "epoch": 2.6752448626848473, "percentage": 53.5, "elapsed_time": "1:49:43", "remaining_time": "1:35:21", "throughput": 19977.69, "total_tokens": 131524672} +{"current_steps": 41795, "total_steps": 78105, "loss": 0.2911, "lr": 2.629995614550285e-06, "epoch": 2.6755649446258243, "percentage": 53.51, "elapsed_time": "1:49:44", "remaining_time": "1:35:20", "throughput": 19978.03, "total_tokens": 131540800} +{"current_steps": 41800, "total_steps": 78105, "loss": 0.1487, "lr": 2.6294377147192186e-06, "epoch": 2.6758850265668013, "percentage": 53.52, "elapsed_time": "1:49:44", "remaining_time": "1:35:19", "throughput": 19978.32, "total_tokens": 131555520} +{"current_steps": 41805, "total_steps": 78105, "loss": 0.2589, "lr": 2.628879808424707e-06, "epoch": 2.6762051085077783, "percentage": 53.52, "elapsed_time": "1:49:45", "remaining_time": "1:35:18", "throughput": 19978.66, "total_tokens": 131571520} +{"current_steps": 41810, "total_steps": 78105, "loss": 0.2274, "lr": 2.6283218956946096e-06, "epoch": 2.676525190448755, "percentage": 53.53, "elapsed_time": "1:49:46", "remaining_time": "1:35:17", "throughput": 19978.94, "total_tokens": 131586304} +{"current_steps": 41815, "total_steps": 78105, "loss": 0.2239, "lr": 2.6277639765567842e-06, "epoch": 2.676845272389732, "percentage": 53.54, "elapsed_time": "1:49:46", "remaining_time": "1:35:16", "throughput": 19979.23, "total_tokens": 131601472} +{"current_steps": 41820, "total_steps": 78105, "loss": 0.2437, "lr": 2.627206051039092e-06, "epoch": 2.677165354330709, "percentage": 53.54, "elapsed_time": "1:49:47", "remaining_time": "1:35:15", "throughput": 19979.61, "total_tokens": 131618176} +{"current_steps": 41825, "total_steps": 78105, "loss": 0.1405, "lr": 2.6266481191693915e-06, "epoch": 2.6774854362716853, "percentage": 53.55, "elapsed_time": "1:49:48", "remaining_time": "1:35:14", "throughput": 19979.97, "total_tokens": 131634368} +{"current_steps": 41830, "total_steps": 78105, "loss": 0.3287, "lr": 2.626090180975544e-06, "epoch": 2.6778055182126623, "percentage": 53.56, "elapsed_time": "1:49:48", "remaining_time": "1:35:13", "throughput": 19980.27, "total_tokens": 131649472} +{"current_steps": 41835, "total_steps": 78105, "loss": 0.1763, "lr": 2.62553223648541e-06, "epoch": 2.6781256001536393, "percentage": 53.56, "elapsed_time": "1:49:49", "remaining_time": "1:35:13", "throughput": 19980.52, "total_tokens": 131663552} +{"current_steps": 41840, "total_steps": 78105, "loss": 0.2578, "lr": 2.62497428572685e-06, "epoch": 2.6784456820946163, "percentage": 53.57, "elapsed_time": "1:49:50", "remaining_time": "1:35:12", "throughput": 19980.76, "total_tokens": 131677760} +{"current_steps": 41845, "total_steps": 78105, "loss": 0.3469, "lr": 2.6244163287277247e-06, "epoch": 2.6787657640355933, "percentage": 53.58, "elapsed_time": "1:49:50", "remaining_time": "1:35:11", "throughput": 19981.08, "total_tokens": 131693504} +{"current_steps": 41850, "total_steps": 78105, "loss": 0.163, "lr": 2.623858365515896e-06, "epoch": 2.67908584597657, "percentage": 53.58, "elapsed_time": "1:49:51", "remaining_time": "1:35:10", "throughput": 19981.4, "total_tokens": 131708992} +{"current_steps": 41855, "total_steps": 78105, "loss": 0.2484, "lr": 2.6233003961192255e-06, "epoch": 2.679405927917547, "percentage": 53.59, "elapsed_time": "1:49:52", "remaining_time": "1:35:09", "throughput": 19981.72, "total_tokens": 131724416} +{"current_steps": 41860, "total_steps": 78105, "loss": 0.1891, "lr": 2.6227424205655762e-06, "epoch": 2.679726009858524, "percentage": 53.59, "elapsed_time": "1:49:52", "remaining_time": "1:35:08", "throughput": 19982.02, "total_tokens": 131739264} +{"current_steps": 41865, "total_steps": 78105, "loss": 0.2357, "lr": 2.6221844388828092e-06, "epoch": 2.680046091799501, "percentage": 53.6, "elapsed_time": "1:49:53", "remaining_time": "1:35:07", "throughput": 19982.38, "total_tokens": 131755520} +{"current_steps": 41870, "total_steps": 78105, "loss": 0.1739, "lr": 2.621626451098788e-06, "epoch": 2.6803661737404774, "percentage": 53.61, "elapsed_time": "1:49:54", "remaining_time": "1:35:06", "throughput": 19982.74, "total_tokens": 131771840} +{"current_steps": 41875, "total_steps": 78105, "loss": 0.3005, "lr": 2.621068457241375e-06, "epoch": 2.6806862556814544, "percentage": 53.61, "elapsed_time": "1:49:54", "remaining_time": "1:35:05", "throughput": 19983.11, "total_tokens": 131788032} +{"current_steps": 41880, "total_steps": 78105, "loss": 0.2631, "lr": 2.6205104573384344e-06, "epoch": 2.6810063376224313, "percentage": 53.62, "elapsed_time": "1:49:55", "remaining_time": "1:35:05", "throughput": 19983.49, "total_tokens": 131804416} +{"current_steps": 41885, "total_steps": 78105, "loss": 0.1928, "lr": 2.6199524514178285e-06, "epoch": 2.6813264195634083, "percentage": 53.63, "elapsed_time": "1:49:56", "remaining_time": "1:35:04", "throughput": 19983.84, "total_tokens": 131820096} +{"current_steps": 41890, "total_steps": 78105, "loss": 0.1743, "lr": 2.619394439507423e-06, "epoch": 2.6816465015043853, "percentage": 53.63, "elapsed_time": "1:49:57", "remaining_time": "1:35:03", "throughput": 19984.18, "total_tokens": 131836032} +{"current_steps": 41895, "total_steps": 78105, "loss": 0.2193, "lr": 2.618836421635081e-06, "epoch": 2.681966583445362, "percentage": 53.64, "elapsed_time": "1:49:57", "remaining_time": "1:35:02", "throughput": 19984.48, "total_tokens": 131850816} +{"current_steps": 41900, "total_steps": 78105, "loss": 0.2378, "lr": 2.618278397828666e-06, "epoch": 2.682286665386339, "percentage": 53.65, "elapsed_time": "1:49:58", "remaining_time": "1:35:01", "throughput": 19984.8, "total_tokens": 131866112} +{"current_steps": 41905, "total_steps": 78105, "loss": 0.2459, "lr": 2.6177203681160445e-06, "epoch": 2.682606747327316, "percentage": 53.65, "elapsed_time": "1:49:58", "remaining_time": "1:35:00", "throughput": 19985.1, "total_tokens": 131881344} +{"current_steps": 41910, "total_steps": 78105, "loss": 0.2885, "lr": 2.617162332525081e-06, "epoch": 2.682926829268293, "percentage": 53.66, "elapsed_time": "1:49:59", "remaining_time": "1:34:59", "throughput": 19985.38, "total_tokens": 131896064} +{"current_steps": 41915, "total_steps": 78105, "loss": 0.1519, "lr": 2.6166042910836408e-06, "epoch": 2.6832469112092694, "percentage": 53.66, "elapsed_time": "1:50:00", "remaining_time": "1:34:58", "throughput": 19985.93, "total_tokens": 131915648} +{"current_steps": 41920, "total_steps": 78105, "loss": 0.2115, "lr": 2.6160462438195905e-06, "epoch": 2.6835669931502464, "percentage": 53.67, "elapsed_time": "1:50:01", "remaining_time": "1:34:58", "throughput": 19986.21, "total_tokens": 131930688} +{"current_steps": 41925, "total_steps": 78105, "loss": 0.2692, "lr": 2.6154881907607943e-06, "epoch": 2.6838870750912234, "percentage": 53.68, "elapsed_time": "1:50:01", "remaining_time": "1:34:57", "throughput": 19986.54, "total_tokens": 131946112} +{"current_steps": 41930, "total_steps": 78105, "loss": 0.2683, "lr": 2.61493013193512e-06, "epoch": 2.6842071570322004, "percentage": 53.68, "elapsed_time": "1:50:02", "remaining_time": "1:34:56", "throughput": 19986.91, "total_tokens": 131962304} +{"current_steps": 41935, "total_steps": 78105, "loss": 0.2109, "lr": 2.6143720673704337e-06, "epoch": 2.6845272389731774, "percentage": 53.69, "elapsed_time": "1:50:03", "remaining_time": "1:34:55", "throughput": 19987.22, "total_tokens": 131977664} +{"current_steps": 41940, "total_steps": 78105, "loss": 0.2304, "lr": 2.613813997094601e-06, "epoch": 2.684847320914154, "percentage": 53.7, "elapsed_time": "1:50:03", "remaining_time": "1:34:54", "throughput": 19987.54, "total_tokens": 131993024} +{"current_steps": 41945, "total_steps": 78105, "loss": 0.2856, "lr": 2.6132559211354907e-06, "epoch": 2.685167402855131, "percentage": 53.7, "elapsed_time": "1:50:04", "remaining_time": "1:34:53", "throughput": 19987.83, "total_tokens": 132008192} +{"current_steps": 41950, "total_steps": 78105, "loss": 0.2237, "lr": 2.61269783952097e-06, "epoch": 2.685487484796108, "percentage": 53.71, "elapsed_time": "1:50:05", "remaining_time": "1:34:52", "throughput": 19988.11, "total_tokens": 132023232} +{"current_steps": 41955, "total_steps": 78105, "loss": 0.3079, "lr": 2.612139752278906e-06, "epoch": 2.6858075667370844, "percentage": 53.72, "elapsed_time": "1:50:05", "remaining_time": "1:34:51", "throughput": 19988.46, "total_tokens": 132039552} +{"current_steps": 41960, "total_steps": 78105, "loss": 0.1774, "lr": 2.6115816594371674e-06, "epoch": 2.6861276486780614, "percentage": 53.72, "elapsed_time": "1:50:06", "remaining_time": "1:34:50", "throughput": 19988.79, "total_tokens": 132055168} +{"current_steps": 41965, "total_steps": 78105, "loss": 0.1585, "lr": 2.6110235610236207e-06, "epoch": 2.6864477306190384, "percentage": 53.73, "elapsed_time": "1:50:07", "remaining_time": "1:34:50", "throughput": 19989.09, "total_tokens": 132070464} +{"current_steps": 41970, "total_steps": 78105, "loss": 0.275, "lr": 2.6104654570661357e-06, "epoch": 2.6867678125600154, "percentage": 53.74, "elapsed_time": "1:50:07", "remaining_time": "1:34:49", "throughput": 19989.45, "total_tokens": 132086848} +{"current_steps": 41975, "total_steps": 78105, "loss": 0.3131, "lr": 2.6099073475925824e-06, "epoch": 2.6870878945009924, "percentage": 53.74, "elapsed_time": "1:50:08", "remaining_time": "1:34:48", "throughput": 19989.67, "total_tokens": 132100864} +{"current_steps": 41980, "total_steps": 78105, "loss": 0.2236, "lr": 2.609349232630828e-06, "epoch": 2.6874079764419694, "percentage": 53.75, "elapsed_time": "1:50:09", "remaining_time": "1:34:47", "throughput": 19990.0, "total_tokens": 132116672} +{"current_steps": 41985, "total_steps": 78105, "loss": 0.287, "lr": 2.6087911122087427e-06, "epoch": 2.687728058382946, "percentage": 53.75, "elapsed_time": "1:50:09", "remaining_time": "1:34:46", "throughput": 19990.41, "total_tokens": 132133632} +{"current_steps": 41990, "total_steps": 78105, "loss": 0.2718, "lr": 2.6082329863541956e-06, "epoch": 2.688048140323923, "percentage": 53.76, "elapsed_time": "1:50:10", "remaining_time": "1:34:45", "throughput": 19990.73, "total_tokens": 132149376} +{"current_steps": 41995, "total_steps": 78105, "loss": 0.1877, "lr": 2.607674855095057e-06, "epoch": 2.6883682222649, "percentage": 53.77, "elapsed_time": "1:50:11", "remaining_time": "1:34:44", "throughput": 19991.03, "total_tokens": 132164736} +{"current_steps": 42000, "total_steps": 78105, "loss": 0.2317, "lr": 2.607116718459197e-06, "epoch": 2.6886883042058765, "percentage": 53.77, "elapsed_time": "1:50:11", "remaining_time": "1:34:43", "throughput": 19991.32, "total_tokens": 132179840} +{"current_steps": 42005, "total_steps": 78105, "loss": 0.3007, "lr": 2.606558576474486e-06, "epoch": 2.6890083861468534, "percentage": 53.78, "elapsed_time": "1:50:12", "remaining_time": "1:34:42", "throughput": 19991.59, "total_tokens": 132194624} +{"current_steps": 42010, "total_steps": 78105, "loss": 0.1507, "lr": 2.6060004291687953e-06, "epoch": 2.6893284680878304, "percentage": 53.79, "elapsed_time": "1:50:13", "remaining_time": "1:34:42", "throughput": 19991.88, "total_tokens": 132209728} +{"current_steps": 42015, "total_steps": 78105, "loss": 0.1296, "lr": 2.605442276569995e-06, "epoch": 2.6896485500288074, "percentage": 53.79, "elapsed_time": "1:50:13", "remaining_time": "1:34:41", "throughput": 19992.22, "total_tokens": 132225536} +{"current_steps": 42020, "total_steps": 78105, "loss": 0.186, "lr": 2.604884118705956e-06, "epoch": 2.6899686319697844, "percentage": 53.8, "elapsed_time": "1:50:14", "remaining_time": "1:34:40", "throughput": 19992.53, "total_tokens": 132240832} +{"current_steps": 42025, "total_steps": 78105, "loss": 0.1699, "lr": 2.6043259556045507e-06, "epoch": 2.6902887139107614, "percentage": 53.81, "elapsed_time": "1:50:15", "remaining_time": "1:34:39", "throughput": 19992.88, "total_tokens": 132256512} +{"current_steps": 42030, "total_steps": 78105, "loss": 0.2736, "lr": 2.6037677872936508e-06, "epoch": 2.690608795851738, "percentage": 53.81, "elapsed_time": "1:50:15", "remaining_time": "1:34:38", "throughput": 19993.2, "total_tokens": 132272000} +{"current_steps": 42035, "total_steps": 78105, "loss": 0.2222, "lr": 2.603209613801128e-06, "epoch": 2.690928877792715, "percentage": 53.82, "elapsed_time": "1:50:16", "remaining_time": "1:34:37", "throughput": 19993.55, "total_tokens": 132288000} +{"current_steps": 42040, "total_steps": 78105, "loss": 0.2271, "lr": 2.6026514351548556e-06, "epoch": 2.691248959733692, "percentage": 53.82, "elapsed_time": "1:50:17", "remaining_time": "1:34:36", "throughput": 19993.98, "total_tokens": 132305152} +{"current_steps": 42045, "total_steps": 78105, "loss": 0.2093, "lr": 2.602093251382704e-06, "epoch": 2.6915690416746685, "percentage": 53.83, "elapsed_time": "1:50:17", "remaining_time": "1:34:35", "throughput": 19994.31, "total_tokens": 132320768} +{"current_steps": 42050, "total_steps": 78105, "loss": 0.1654, "lr": 2.601535062512548e-06, "epoch": 2.6918891236156455, "percentage": 53.84, "elapsed_time": "1:50:18", "remaining_time": "1:34:34", "throughput": 19994.62, "total_tokens": 132336256} +{"current_steps": 42055, "total_steps": 78105, "loss": 0.1773, "lr": 2.600976868572259e-06, "epoch": 2.6922092055566225, "percentage": 53.84, "elapsed_time": "1:50:19", "remaining_time": "1:34:34", "throughput": 19994.9, "total_tokens": 132351040} +{"current_steps": 42060, "total_steps": 78105, "loss": 0.2258, "lr": 2.600418669589712e-06, "epoch": 2.6925292874975995, "percentage": 53.85, "elapsed_time": "1:50:19", "remaining_time": "1:34:33", "throughput": 19995.21, "total_tokens": 132366336} +{"current_steps": 42065, "total_steps": 78105, "loss": 0.1983, "lr": 2.5998604655927794e-06, "epoch": 2.6928493694385764, "percentage": 53.86, "elapsed_time": "1:50:20", "remaining_time": "1:34:32", "throughput": 19995.48, "total_tokens": 132380864} +{"current_steps": 42070, "total_steps": 78105, "loss": 0.2047, "lr": 2.599302256609335e-06, "epoch": 2.6931694513795534, "percentage": 53.86, "elapsed_time": "1:50:21", "remaining_time": "1:34:31", "throughput": 19995.84, "total_tokens": 132397248} +{"current_steps": 42075, "total_steps": 78105, "loss": 0.1606, "lr": 2.5987440426672537e-06, "epoch": 2.69348953332053, "percentage": 53.87, "elapsed_time": "1:50:21", "remaining_time": "1:34:30", "throughput": 19996.18, "total_tokens": 132412928} +{"current_steps": 42080, "total_steps": 78105, "loss": 0.293, "lr": 2.5981858237944084e-06, "epoch": 2.693809615261507, "percentage": 53.88, "elapsed_time": "1:50:22", "remaining_time": "1:34:29", "throughput": 19996.5, "total_tokens": 132428416} +{"current_steps": 42085, "total_steps": 78105, "loss": 0.2504, "lr": 2.597627600018675e-06, "epoch": 2.694129697202484, "percentage": 53.88, "elapsed_time": "1:50:23", "remaining_time": "1:34:28", "throughput": 19996.85, "total_tokens": 132444352} +{"current_steps": 42090, "total_steps": 78105, "loss": 0.2006, "lr": 2.597069371367928e-06, "epoch": 2.6944497791434605, "percentage": 53.89, "elapsed_time": "1:50:23", "remaining_time": "1:34:27", "throughput": 19997.17, "total_tokens": 132459712} +{"current_steps": 42095, "total_steps": 78105, "loss": 0.18, "lr": 2.596511137870042e-06, "epoch": 2.6947698610844375, "percentage": 53.9, "elapsed_time": "1:50:24", "remaining_time": "1:34:27", "throughput": 19997.58, "total_tokens": 132476672} +{"current_steps": 42100, "total_steps": 78105, "loss": 0.2087, "lr": 2.595952899552893e-06, "epoch": 2.6950899430254145, "percentage": 53.9, "elapsed_time": "1:50:25", "remaining_time": "1:34:26", "throughput": 19997.91, "total_tokens": 132492224} +{"current_steps": 42105, "total_steps": 78105, "loss": 0.2058, "lr": 2.5953946564443554e-06, "epoch": 2.6954100249663915, "percentage": 53.91, "elapsed_time": "1:50:25", "remaining_time": "1:34:25", "throughput": 19998.21, "total_tokens": 132507456} +{"current_steps": 42110, "total_steps": 78105, "loss": 0.2725, "lr": 2.5948364085723055e-06, "epoch": 2.6957301069073685, "percentage": 53.91, "elapsed_time": "1:50:26", "remaining_time": "1:34:24", "throughput": 19998.54, "total_tokens": 132523392} +{"current_steps": 42115, "total_steps": 78105, "loss": 0.2302, "lr": 2.5942781559646197e-06, "epoch": 2.696050188848345, "percentage": 53.92, "elapsed_time": "1:50:27", "remaining_time": "1:34:23", "throughput": 19998.87, "total_tokens": 132538816} +{"current_steps": 42120, "total_steps": 78105, "loss": 0.1905, "lr": 2.5937198986491735e-06, "epoch": 2.696370270789322, "percentage": 53.93, "elapsed_time": "1:50:27", "remaining_time": "1:34:22", "throughput": 19999.19, "total_tokens": 132554432} +{"current_steps": 42125, "total_steps": 78105, "loss": 0.1991, "lr": 2.5931616366538444e-06, "epoch": 2.696690352730299, "percentage": 53.93, "elapsed_time": "1:50:28", "remaining_time": "1:34:21", "throughput": 19999.54, "total_tokens": 132570368} +{"current_steps": 42130, "total_steps": 78105, "loss": 0.2057, "lr": 2.592603370006508e-06, "epoch": 2.697010434671276, "percentage": 53.94, "elapsed_time": "1:50:29", "remaining_time": "1:34:20", "throughput": 19999.83, "total_tokens": 132586048} +{"current_steps": 42135, "total_steps": 78105, "loss": 0.292, "lr": 2.592045098735041e-06, "epoch": 2.6973305166122525, "percentage": 53.95, "elapsed_time": "1:50:30", "remaining_time": "1:34:19", "throughput": 20000.12, "total_tokens": 132601088} +{"current_steps": 42140, "total_steps": 78105, "loss": 0.1806, "lr": 2.5914868228673214e-06, "epoch": 2.6976505985532295, "percentage": 53.95, "elapsed_time": "1:50:30", "remaining_time": "1:34:19", "throughput": 20000.43, "total_tokens": 132616448} +{"current_steps": 42145, "total_steps": 78105, "loss": 0.2344, "lr": 2.590928542431227e-06, "epoch": 2.6979706804942065, "percentage": 53.96, "elapsed_time": "1:50:31", "remaining_time": "1:34:18", "throughput": 20000.72, "total_tokens": 132631296} +{"current_steps": 42150, "total_steps": 78105, "loss": 0.1493, "lr": 2.5903702574546346e-06, "epoch": 2.6982907624351835, "percentage": 53.97, "elapsed_time": "1:50:32", "remaining_time": "1:34:17", "throughput": 20001.13, "total_tokens": 132648256} +{"current_steps": 42155, "total_steps": 78105, "loss": 0.2154, "lr": 2.589811967965423e-06, "epoch": 2.6986108443761605, "percentage": 53.97, "elapsed_time": "1:50:32", "remaining_time": "1:34:16", "throughput": 20001.47, "total_tokens": 132664448} +{"current_steps": 42160, "total_steps": 78105, "loss": 0.2631, "lr": 2.5892536739914685e-06, "epoch": 2.698930926317137, "percentage": 53.98, "elapsed_time": "1:50:33", "remaining_time": "1:34:15", "throughput": 20001.78, "total_tokens": 132679872} +{"current_steps": 42165, "total_steps": 78105, "loss": 0.2164, "lr": 2.588695375560651e-06, "epoch": 2.699251008258114, "percentage": 53.99, "elapsed_time": "1:50:34", "remaining_time": "1:34:14", "throughput": 20002.05, "total_tokens": 132694464} +{"current_steps": 42170, "total_steps": 78105, "loss": 0.2184, "lr": 2.5881370727008474e-06, "epoch": 2.699571090199091, "percentage": 53.99, "elapsed_time": "1:50:34", "remaining_time": "1:34:13", "throughput": 20002.42, "total_tokens": 132710784} +{"current_steps": 42175, "total_steps": 78105, "loss": 0.1727, "lr": 2.5875787654399387e-06, "epoch": 2.699891172140068, "percentage": 54.0, "elapsed_time": "1:50:35", "remaining_time": "1:34:12", "throughput": 20002.78, "total_tokens": 132726976} +{"current_steps": 42180, "total_steps": 78105, "loss": 0.3153, "lr": 2.5870204538058023e-06, "epoch": 2.7002112540810446, "percentage": 54.0, "elapsed_time": "1:50:36", "remaining_time": "1:34:11", "throughput": 20003.09, "total_tokens": 132742144} +{"current_steps": 42185, "total_steps": 78105, "loss": 0.1987, "lr": 2.586462137826318e-06, "epoch": 2.7005313360220216, "percentage": 54.01, "elapsed_time": "1:50:36", "remaining_time": "1:34:11", "throughput": 20003.37, "total_tokens": 132757056} +{"current_steps": 42190, "total_steps": 78105, "loss": 0.2785, "lr": 2.5859038175293645e-06, "epoch": 2.7008514179629985, "percentage": 54.02, "elapsed_time": "1:50:37", "remaining_time": "1:34:10", "throughput": 20003.66, "total_tokens": 132772160} +{"current_steps": 42195, "total_steps": 78105, "loss": 0.2969, "lr": 2.5853454929428213e-06, "epoch": 2.7011714999039755, "percentage": 54.02, "elapsed_time": "1:50:38", "remaining_time": "1:34:09", "throughput": 20003.93, "total_tokens": 132786880} +{"current_steps": 42200, "total_steps": 78105, "loss": 0.1321, "lr": 2.584787164094569e-06, "epoch": 2.7014915818449525, "percentage": 54.03, "elapsed_time": "1:50:38", "remaining_time": "1:34:08", "throughput": 20004.44, "total_tokens": 132805760} +{"current_steps": 42205, "total_steps": 78105, "loss": 0.2687, "lr": 2.5842288310124876e-06, "epoch": 2.701811663785929, "percentage": 54.04, "elapsed_time": "1:50:39", "remaining_time": "1:34:07", "throughput": 20004.81, "total_tokens": 132822272} +{"current_steps": 42210, "total_steps": 78105, "loss": 0.1725, "lr": 2.583670493724457e-06, "epoch": 2.702131745726906, "percentage": 54.04, "elapsed_time": "1:50:40", "remaining_time": "1:34:06", "throughput": 20005.16, "total_tokens": 132837888} +{"current_steps": 42215, "total_steps": 78105, "loss": 0.187, "lr": 2.583112152258358e-06, "epoch": 2.702451827667883, "percentage": 54.05, "elapsed_time": "1:50:40", "remaining_time": "1:34:05", "throughput": 20005.41, "total_tokens": 132852544} +{"current_steps": 42220, "total_steps": 78105, "loss": 0.3097, "lr": 2.5825538066420713e-06, "epoch": 2.7027719096088596, "percentage": 54.06, "elapsed_time": "1:50:41", "remaining_time": "1:34:04", "throughput": 20005.78, "total_tokens": 132868800} +{"current_steps": 42225, "total_steps": 78105, "loss": 0.2885, "lr": 2.5819954569034767e-06, "epoch": 2.7030919915498366, "percentage": 54.06, "elapsed_time": "1:50:42", "remaining_time": "1:34:04", "throughput": 20006.07, "total_tokens": 132884160} +{"current_steps": 42230, "total_steps": 78105, "loss": 0.2297, "lr": 2.5814371030704566e-06, "epoch": 2.7034120734908136, "percentage": 54.07, "elapsed_time": "1:50:42", "remaining_time": "1:34:03", "throughput": 20006.44, "total_tokens": 132900096} +{"current_steps": 42235, "total_steps": 78105, "loss": 0.1881, "lr": 2.5808787451708915e-06, "epoch": 2.7037321554317906, "percentage": 54.07, "elapsed_time": "1:50:43", "remaining_time": "1:34:02", "throughput": 20006.77, "total_tokens": 132915392} +{"current_steps": 42240, "total_steps": 78105, "loss": 0.1998, "lr": 2.580320383232664e-06, "epoch": 2.7040522373727676, "percentage": 54.08, "elapsed_time": "1:50:44", "remaining_time": "1:34:01", "throughput": 20007.17, "total_tokens": 132932096} +{"current_steps": 42245, "total_steps": 78105, "loss": 0.1977, "lr": 2.579762017283654e-06, "epoch": 2.7043723193137446, "percentage": 54.09, "elapsed_time": "1:50:44", "remaining_time": "1:34:00", "throughput": 20007.52, "total_tokens": 132948224} +{"current_steps": 42250, "total_steps": 78105, "loss": 0.2544, "lr": 2.5792036473517444e-06, "epoch": 2.704692401254721, "percentage": 54.09, "elapsed_time": "1:50:45", "remaining_time": "1:33:59", "throughput": 20007.86, "total_tokens": 132964096} +{"current_steps": 42255, "total_steps": 78105, "loss": 0.2317, "lr": 2.578645273464817e-06, "epoch": 2.705012483195698, "percentage": 54.1, "elapsed_time": "1:50:46", "remaining_time": "1:33:58", "throughput": 20008.16, "total_tokens": 132979392} +{"current_steps": 42260, "total_steps": 78105, "loss": 0.3085, "lr": 2.578086895650755e-06, "epoch": 2.705332565136675, "percentage": 54.11, "elapsed_time": "1:50:46", "remaining_time": "1:33:57", "throughput": 20008.53, "total_tokens": 132995840} +{"current_steps": 42265, "total_steps": 78105, "loss": 0.148, "lr": 2.5775285139374396e-06, "epoch": 2.7056526470776516, "percentage": 54.11, "elapsed_time": "1:50:47", "remaining_time": "1:33:57", "throughput": 20008.85, "total_tokens": 133011840} +{"current_steps": 42270, "total_steps": 78105, "loss": 0.2201, "lr": 2.576970128352755e-06, "epoch": 2.7059727290186286, "percentage": 54.12, "elapsed_time": "1:50:48", "remaining_time": "1:33:56", "throughput": 20009.13, "total_tokens": 133027200} +{"current_steps": 42275, "total_steps": 78105, "loss": 0.1732, "lr": 2.5764117389245822e-06, "epoch": 2.7062928109596056, "percentage": 54.13, "elapsed_time": "1:50:48", "remaining_time": "1:33:55", "throughput": 20009.36, "total_tokens": 133041280} +{"current_steps": 42280, "total_steps": 78105, "loss": 0.2119, "lr": 2.5758533456808055e-06, "epoch": 2.7066128929005826, "percentage": 54.13, "elapsed_time": "1:50:49", "remaining_time": "1:33:54", "throughput": 20009.63, "total_tokens": 133056128} +{"current_steps": 42285, "total_steps": 78105, "loss": 0.2754, "lr": 2.575294948649307e-06, "epoch": 2.7069329748415596, "percentage": 54.14, "elapsed_time": "1:50:50", "remaining_time": "1:33:53", "throughput": 20009.99, "total_tokens": 133072128} +{"current_steps": 42290, "total_steps": 78105, "loss": 0.1819, "lr": 2.574736547857972e-06, "epoch": 2.7072530567825366, "percentage": 54.15, "elapsed_time": "1:50:50", "remaining_time": "1:33:52", "throughput": 20010.26, "total_tokens": 133086784} +{"current_steps": 42295, "total_steps": 78105, "loss": 0.1896, "lr": 2.574178143334683e-06, "epoch": 2.707573138723513, "percentage": 54.15, "elapsed_time": "1:50:51", "remaining_time": "1:33:51", "throughput": 20010.64, "total_tokens": 133103488} +{"current_steps": 42300, "total_steps": 78105, "loss": 0.2065, "lr": 2.573619735107324e-06, "epoch": 2.70789322066449, "percentage": 54.16, "elapsed_time": "1:50:52", "remaining_time": "1:33:50", "throughput": 20010.92, "total_tokens": 133118528} +{"current_steps": 42305, "total_steps": 78105, "loss": 0.1918, "lr": 2.5730613232037787e-06, "epoch": 2.708213302605467, "percentage": 54.16, "elapsed_time": "1:50:52", "remaining_time": "1:33:50", "throughput": 20011.33, "total_tokens": 133135360} +{"current_steps": 42310, "total_steps": 78105, "loss": 0.3044, "lr": 2.5725029076519308e-06, "epoch": 2.7085333845464437, "percentage": 54.17, "elapsed_time": "1:50:53", "remaining_time": "1:33:49", "throughput": 20011.67, "total_tokens": 133151168} +{"current_steps": 42315, "total_steps": 78105, "loss": 0.3606, "lr": 2.5719444884796653e-06, "epoch": 2.7088534664874206, "percentage": 54.18, "elapsed_time": "1:50:54", "remaining_time": "1:33:48", "throughput": 20012.01, "total_tokens": 133167232} +{"current_steps": 42320, "total_steps": 78105, "loss": 0.177, "lr": 2.5713860657148677e-06, "epoch": 2.7091735484283976, "percentage": 54.18, "elapsed_time": "1:50:55", "remaining_time": "1:33:47", "throughput": 20012.36, "total_tokens": 133183616} +{"current_steps": 42325, "total_steps": 78105, "loss": 0.2269, "lr": 2.5708276393854216e-06, "epoch": 2.7094936303693746, "percentage": 54.19, "elapsed_time": "1:50:55", "remaining_time": "1:33:46", "throughput": 20012.64, "total_tokens": 133198528} +{"current_steps": 42330, "total_steps": 78105, "loss": 0.2223, "lr": 2.5702692095192123e-06, "epoch": 2.7098137123103516, "percentage": 54.2, "elapsed_time": "1:50:56", "remaining_time": "1:33:45", "throughput": 20012.93, "total_tokens": 133213696} +{"current_steps": 42335, "total_steps": 78105, "loss": 0.2247, "lr": 2.569710776144124e-06, "epoch": 2.7101337942513286, "percentage": 54.2, "elapsed_time": "1:50:57", "remaining_time": "1:33:44", "throughput": 20013.26, "total_tokens": 133229248} +{"current_steps": 42340, "total_steps": 78105, "loss": 0.3229, "lr": 2.5691523392880428e-06, "epoch": 2.710453876192305, "percentage": 54.21, "elapsed_time": "1:50:57", "remaining_time": "1:33:43", "throughput": 20013.62, "total_tokens": 133245568} +{"current_steps": 42345, "total_steps": 78105, "loss": 0.2048, "lr": 2.5685938989788545e-06, "epoch": 2.710773958133282, "percentage": 54.22, "elapsed_time": "1:50:58", "remaining_time": "1:33:42", "throughput": 20013.94, "total_tokens": 133260992} +{"current_steps": 42350, "total_steps": 78105, "loss": 0.2256, "lr": 2.568035455244444e-06, "epoch": 2.711094040074259, "percentage": 54.22, "elapsed_time": "1:50:59", "remaining_time": "1:33:42", "throughput": 20014.27, "total_tokens": 133276544} +{"current_steps": 42355, "total_steps": 78105, "loss": 0.3146, "lr": 2.5674770081126977e-06, "epoch": 2.7114141220152357, "percentage": 54.23, "elapsed_time": "1:50:59", "remaining_time": "1:33:41", "throughput": 20014.65, "total_tokens": 133293184} +{"current_steps": 42360, "total_steps": 78105, "loss": 0.1427, "lr": 2.5669185576115004e-06, "epoch": 2.7117342039562127, "percentage": 54.23, "elapsed_time": "1:51:00", "remaining_time": "1:33:40", "throughput": 20014.96, "total_tokens": 133308352} +{"current_steps": 42365, "total_steps": 78105, "loss": 0.2569, "lr": 2.5663601037687386e-06, "epoch": 2.7120542858971897, "percentage": 54.24, "elapsed_time": "1:51:01", "remaining_time": "1:33:39", "throughput": 20015.28, "total_tokens": 133323968} +{"current_steps": 42370, "total_steps": 78105, "loss": 0.2647, "lr": 2.565801646612299e-06, "epoch": 2.7123743678381667, "percentage": 54.25, "elapsed_time": "1:51:01", "remaining_time": "1:33:38", "throughput": 20015.58, "total_tokens": 133339584} +{"current_steps": 42375, "total_steps": 78105, "loss": 0.334, "lr": 2.5652431861700683e-06, "epoch": 2.7126944497791436, "percentage": 54.25, "elapsed_time": "1:51:02", "remaining_time": "1:33:37", "throughput": 20015.93, "total_tokens": 133355584} +{"current_steps": 42380, "total_steps": 78105, "loss": 0.3551, "lr": 2.5646847224699327e-06, "epoch": 2.71301453172012, "percentage": 54.26, "elapsed_time": "1:51:03", "remaining_time": "1:33:36", "throughput": 20016.23, "total_tokens": 133370944} +{"current_steps": 42385, "total_steps": 78105, "loss": 0.2646, "lr": 2.5641262555397787e-06, "epoch": 2.713334613661097, "percentage": 54.27, "elapsed_time": "1:51:03", "remaining_time": "1:33:35", "throughput": 20016.61, "total_tokens": 133387456} +{"current_steps": 42390, "total_steps": 78105, "loss": 0.1974, "lr": 2.5635677854074942e-06, "epoch": 2.713654695602074, "percentage": 54.27, "elapsed_time": "1:51:04", "remaining_time": "1:33:35", "throughput": 20016.98, "total_tokens": 133403904} +{"current_steps": 42395, "total_steps": 78105, "loss": 0.1941, "lr": 2.563009312100965e-06, "epoch": 2.713974777543051, "percentage": 54.28, "elapsed_time": "1:51:05", "remaining_time": "1:33:34", "throughput": 20017.39, "total_tokens": 133420736} +{"current_steps": 42400, "total_steps": 78105, "loss": 0.2239, "lr": 2.5624508356480794e-06, "epoch": 2.7142948594840277, "percentage": 54.29, "elapsed_time": "1:51:05", "remaining_time": "1:33:33", "throughput": 20017.73, "total_tokens": 133436608} +{"current_steps": 42405, "total_steps": 78105, "loss": 0.2457, "lr": 2.561892356076724e-06, "epoch": 2.7146149414250047, "percentage": 54.29, "elapsed_time": "1:51:06", "remaining_time": "1:33:32", "throughput": 20018.04, "total_tokens": 133451840} +{"current_steps": 42410, "total_steps": 78105, "loss": 0.1412, "lr": 2.561333873414787e-06, "epoch": 2.7149350233659817, "percentage": 54.3, "elapsed_time": "1:51:07", "remaining_time": "1:33:31", "throughput": 20018.33, "total_tokens": 133467328} +{"current_steps": 42415, "total_steps": 78105, "loss": 0.2157, "lr": 2.560775387690156e-06, "epoch": 2.7152551053069587, "percentage": 54.31, "elapsed_time": "1:51:07", "remaining_time": "1:33:30", "throughput": 20018.65, "total_tokens": 133483264} +{"current_steps": 42420, "total_steps": 78105, "loss": 0.1953, "lr": 2.5602168989307187e-06, "epoch": 2.7155751872479357, "percentage": 54.31, "elapsed_time": "1:51:08", "remaining_time": "1:33:29", "throughput": 20019.01, "total_tokens": 133499584} +{"current_steps": 42425, "total_steps": 78105, "loss": 0.3926, "lr": 2.559658407164362e-06, "epoch": 2.715895269188912, "percentage": 54.32, "elapsed_time": "1:51:09", "remaining_time": "1:33:29", "throughput": 20019.41, "total_tokens": 133516480} +{"current_steps": 42430, "total_steps": 78105, "loss": 0.2246, "lr": 2.5590999124189754e-06, "epoch": 2.716215351129889, "percentage": 54.32, "elapsed_time": "1:51:10", "remaining_time": "1:33:28", "throughput": 20019.73, "total_tokens": 133532480} +{"current_steps": 42435, "total_steps": 78105, "loss": 0.2274, "lr": 2.558541414722448e-06, "epoch": 2.716535433070866, "percentage": 54.33, "elapsed_time": "1:51:10", "remaining_time": "1:33:27", "throughput": 20020.0, "total_tokens": 133547136} +{"current_steps": 42440, "total_steps": 78105, "loss": 0.2198, "lr": 2.5579829141026667e-06, "epoch": 2.716855515011843, "percentage": 54.34, "elapsed_time": "1:51:11", "remaining_time": "1:33:26", "throughput": 20020.31, "total_tokens": 133562880} +{"current_steps": 42445, "total_steps": 78105, "loss": 0.233, "lr": 2.5574244105875212e-06, "epoch": 2.7171755969528197, "percentage": 54.34, "elapsed_time": "1:51:12", "remaining_time": "1:33:25", "throughput": 20020.61, "total_tokens": 133578048} +{"current_steps": 42450, "total_steps": 78105, "loss": 0.2013, "lr": 2.5568659042048994e-06, "epoch": 2.7174956788937967, "percentage": 54.35, "elapsed_time": "1:51:12", "remaining_time": "1:33:24", "throughput": 20020.93, "total_tokens": 133593920} +{"current_steps": 42455, "total_steps": 78105, "loss": 0.223, "lr": 2.5563073949826907e-06, "epoch": 2.7178157608347737, "percentage": 54.36, "elapsed_time": "1:51:13", "remaining_time": "1:33:23", "throughput": 20021.49, "total_tokens": 133613376} +{"current_steps": 42460, "total_steps": 78105, "loss": 0.1625, "lr": 2.5557488829487836e-06, "epoch": 2.7181358427757507, "percentage": 54.36, "elapsed_time": "1:51:14", "remaining_time": "1:33:22", "throughput": 20021.79, "total_tokens": 133628480} +{"current_steps": 42465, "total_steps": 78105, "loss": 0.1886, "lr": 2.555190368131068e-06, "epoch": 2.7184559247167277, "percentage": 54.37, "elapsed_time": "1:51:14", "remaining_time": "1:33:22", "throughput": 20022.09, "total_tokens": 133643904} +{"current_steps": 42470, "total_steps": 78105, "loss": 0.269, "lr": 2.554631850557433e-06, "epoch": 2.7187760066577042, "percentage": 54.38, "elapsed_time": "1:51:15", "remaining_time": "1:33:21", "throughput": 20022.4, "total_tokens": 133659584} +{"current_steps": 42475, "total_steps": 78105, "loss": 0.241, "lr": 2.554073330255768e-06, "epoch": 2.7190960885986812, "percentage": 54.38, "elapsed_time": "1:51:16", "remaining_time": "1:33:20", "throughput": 20022.68, "total_tokens": 133674880} +{"current_steps": 42480, "total_steps": 78105, "loss": 0.1952, "lr": 2.553514807253962e-06, "epoch": 2.7194161705396582, "percentage": 54.39, "elapsed_time": "1:51:16", "remaining_time": "1:33:19", "throughput": 20022.99, "total_tokens": 133690368} +{"current_steps": 42485, "total_steps": 78105, "loss": 0.1502, "lr": 2.552956281579906e-06, "epoch": 2.7197362524806348, "percentage": 54.39, "elapsed_time": "1:51:17", "remaining_time": "1:33:18", "throughput": 20023.27, "total_tokens": 133705664} +{"current_steps": 42490, "total_steps": 78105, "loss": 0.27, "lr": 2.552397753261488e-06, "epoch": 2.7200563344216118, "percentage": 54.4, "elapsed_time": "1:51:18", "remaining_time": "1:33:17", "throughput": 20023.54, "total_tokens": 133720768} +{"current_steps": 42495, "total_steps": 78105, "loss": 0.1744, "lr": 2.5518392223266e-06, "epoch": 2.7203764163625888, "percentage": 54.41, "elapsed_time": "1:51:18", "remaining_time": "1:33:16", "throughput": 20023.79, "total_tokens": 133735232} +{"current_steps": 42500, "total_steps": 78105, "loss": 0.3103, "lr": 2.551280688803131e-06, "epoch": 2.7206964983035657, "percentage": 54.41, "elapsed_time": "1:51:19", "remaining_time": "1:33:15", "throughput": 20024.06, "total_tokens": 133750080} +{"current_steps": 42505, "total_steps": 78105, "loss": 0.2276, "lr": 2.550722152718972e-06, "epoch": 2.7210165802445427, "percentage": 54.42, "elapsed_time": "1:51:20", "remaining_time": "1:33:14", "throughput": 20024.44, "total_tokens": 133766464} +{"current_steps": 42510, "total_steps": 78105, "loss": 0.2621, "lr": 2.550163614102012e-06, "epoch": 2.7213366621855197, "percentage": 54.43, "elapsed_time": "1:51:20", "remaining_time": "1:33:14", "throughput": 20024.8, "total_tokens": 133782848} +{"current_steps": 42515, "total_steps": 78105, "loss": 0.1367, "lr": 2.549605072980143e-06, "epoch": 2.7216567441264963, "percentage": 54.43, "elapsed_time": "1:51:21", "remaining_time": "1:33:13", "throughput": 20025.07, "total_tokens": 133797696} +{"current_steps": 42520, "total_steps": 78105, "loss": 0.1574, "lr": 2.549046529381255e-06, "epoch": 2.7219768260674733, "percentage": 54.44, "elapsed_time": "1:51:22", "remaining_time": "1:33:12", "throughput": 20025.34, "total_tokens": 133813120} +{"current_steps": 42525, "total_steps": 78105, "loss": 0.1621, "lr": 2.5484879833332392e-06, "epoch": 2.7222969080084503, "percentage": 54.45, "elapsed_time": "1:51:22", "remaining_time": "1:33:11", "throughput": 20025.63, "total_tokens": 133828608} +{"current_steps": 42530, "total_steps": 78105, "loss": 0.2898, "lr": 2.5479294348639855e-06, "epoch": 2.722616989949427, "percentage": 54.45, "elapsed_time": "1:51:23", "remaining_time": "1:33:10", "throughput": 20025.93, "total_tokens": 133844224} +{"current_steps": 42535, "total_steps": 78105, "loss": 0.274, "lr": 2.547370884001386e-06, "epoch": 2.722937071890404, "percentage": 54.46, "elapsed_time": "1:51:24", "remaining_time": "1:33:09", "throughput": 20026.3, "total_tokens": 133860416} +{"current_steps": 42540, "total_steps": 78105, "loss": 0.2179, "lr": 2.5468123307733304e-06, "epoch": 2.723257153831381, "percentage": 54.47, "elapsed_time": "1:51:24", "remaining_time": "1:33:08", "throughput": 20026.71, "total_tokens": 133877568} +{"current_steps": 42545, "total_steps": 78105, "loss": 0.2668, "lr": 2.546253775207711e-06, "epoch": 2.7235772357723578, "percentage": 54.47, "elapsed_time": "1:51:25", "remaining_time": "1:33:08", "throughput": 20027.01, "total_tokens": 133893568} +{"current_steps": 42550, "total_steps": 78105, "loss": 0.185, "lr": 2.5456952173324197e-06, "epoch": 2.7238973177133348, "percentage": 54.48, "elapsed_time": "1:51:26", "remaining_time": "1:33:07", "throughput": 20027.32, "total_tokens": 133909120} +{"current_steps": 42555, "total_steps": 78105, "loss": 0.3923, "lr": 2.5451366571753467e-06, "epoch": 2.7242173996543118, "percentage": 54.48, "elapsed_time": "1:51:26", "remaining_time": "1:33:06", "throughput": 20027.66, "total_tokens": 133924864} +{"current_steps": 42560, "total_steps": 78105, "loss": 0.2699, "lr": 2.5445780947643842e-06, "epoch": 2.7245374815952883, "percentage": 54.49, "elapsed_time": "1:51:27", "remaining_time": "1:33:05", "throughput": 20028.01, "total_tokens": 133940672} +{"current_steps": 42565, "total_steps": 78105, "loss": 0.292, "lr": 2.5440195301274247e-06, "epoch": 2.7248575635362653, "percentage": 54.5, "elapsed_time": "1:51:28", "remaining_time": "1:33:04", "throughput": 20028.34, "total_tokens": 133956672} +{"current_steps": 42570, "total_steps": 78105, "loss": 0.2636, "lr": 2.543460963292358e-06, "epoch": 2.7251776454772423, "percentage": 54.5, "elapsed_time": "1:51:29", "remaining_time": "1:33:03", "throughput": 20028.66, "total_tokens": 133972416} +{"current_steps": 42575, "total_steps": 78105, "loss": 0.2519, "lr": 2.542902394287078e-06, "epoch": 2.725497727418219, "percentage": 54.51, "elapsed_time": "1:51:29", "remaining_time": "1:33:02", "throughput": 20029.12, "total_tokens": 133990144} +{"current_steps": 42580, "total_steps": 78105, "loss": 0.2507, "lr": 2.5423438231394754e-06, "epoch": 2.725817809359196, "percentage": 54.52, "elapsed_time": "1:51:30", "remaining_time": "1:33:02", "throughput": 20027.82, "total_tokens": 134005632} +{"current_steps": 42585, "total_steps": 78105, "loss": 0.1917, "lr": 2.5417852498774433e-06, "epoch": 2.726137891300173, "percentage": 54.52, "elapsed_time": "1:51:31", "remaining_time": "1:33:01", "throughput": 20028.13, "total_tokens": 134020928} +{"current_steps": 42590, "total_steps": 78105, "loss": 0.2271, "lr": 2.541226674528873e-06, "epoch": 2.72645797324115, "percentage": 54.53, "elapsed_time": "1:51:32", "remaining_time": "1:33:00", "throughput": 20028.45, "total_tokens": 134036928} +{"current_steps": 42595, "total_steps": 78105, "loss": 0.2658, "lr": 2.5406680971216567e-06, "epoch": 2.726778055182127, "percentage": 54.54, "elapsed_time": "1:51:33", "remaining_time": "1:32:59", "throughput": 20028.8, "total_tokens": 134053056} +{"current_steps": 42600, "total_steps": 78105, "loss": 0.2035, "lr": 2.5401095176836887e-06, "epoch": 2.727098137123104, "percentage": 54.54, "elapsed_time": "1:51:33", "remaining_time": "1:32:58", "throughput": 20029.12, "total_tokens": 134068800} +{"current_steps": 42605, "total_steps": 78105, "loss": 0.224, "lr": 2.5395509362428593e-06, "epoch": 2.7274182190640803, "percentage": 54.55, "elapsed_time": "1:51:34", "remaining_time": "1:32:57", "throughput": 20029.48, "total_tokens": 134084736} +{"current_steps": 42610, "total_steps": 78105, "loss": 0.2952, "lr": 2.538992352827063e-06, "epoch": 2.7277383010050573, "percentage": 54.55, "elapsed_time": "1:51:35", "remaining_time": "1:32:57", "throughput": 20029.83, "total_tokens": 134101056} +{"current_steps": 42615, "total_steps": 78105, "loss": 0.2335, "lr": 2.5384337674641913e-06, "epoch": 2.7280583829460343, "percentage": 54.56, "elapsed_time": "1:51:35", "remaining_time": "1:32:56", "throughput": 20030.1, "total_tokens": 134115648} +{"current_steps": 42620, "total_steps": 78105, "loss": 0.1952, "lr": 2.5378751801821375e-06, "epoch": 2.728378464887011, "percentage": 54.57, "elapsed_time": "1:51:36", "remaining_time": "1:32:55", "throughput": 20029.95, "total_tokens": 134130880} +{"current_steps": 42625, "total_steps": 78105, "loss": 0.2794, "lr": 2.5373165910087943e-06, "epoch": 2.728698546827988, "percentage": 54.57, "elapsed_time": "1:51:37", "remaining_time": "1:32:54", "throughput": 20030.29, "total_tokens": 134147072} +{"current_steps": 42630, "total_steps": 78105, "loss": 0.1391, "lr": 2.5367579999720552e-06, "epoch": 2.729018628768965, "percentage": 54.58, "elapsed_time": "1:51:37", "remaining_time": "1:32:53", "throughput": 20030.65, "total_tokens": 134163520} +{"current_steps": 42635, "total_steps": 78105, "loss": 0.1248, "lr": 2.5361994070998135e-06, "epoch": 2.729338710709942, "percentage": 54.59, "elapsed_time": "1:51:38", "remaining_time": "1:32:52", "throughput": 20031.03, "total_tokens": 134179968} +{"current_steps": 42640, "total_steps": 78105, "loss": 0.2446, "lr": 2.5356408124199616e-06, "epoch": 2.729658792650919, "percentage": 54.59, "elapsed_time": "1:51:39", "remaining_time": "1:32:51", "throughput": 20031.33, "total_tokens": 134195072} +{"current_steps": 42645, "total_steps": 78105, "loss": 0.2583, "lr": 2.5350822159603927e-06, "epoch": 2.7299788745918954, "percentage": 54.6, "elapsed_time": "1:51:39", "remaining_time": "1:32:51", "throughput": 20031.66, "total_tokens": 134210816} +{"current_steps": 42650, "total_steps": 78105, "loss": 0.1995, "lr": 2.5345236177490012e-06, "epoch": 2.7302989565328724, "percentage": 54.61, "elapsed_time": "1:51:40", "remaining_time": "1:32:50", "throughput": 20031.98, "total_tokens": 134226432} +{"current_steps": 42655, "total_steps": 78105, "loss": 0.2268, "lr": 2.53396501781368e-06, "epoch": 2.7306190384738493, "percentage": 54.61, "elapsed_time": "1:51:41", "remaining_time": "1:32:49", "throughput": 20032.28, "total_tokens": 134241536} +{"current_steps": 42660, "total_steps": 78105, "loss": 0.2936, "lr": 2.5334064161823223e-06, "epoch": 2.7309391204148263, "percentage": 54.62, "elapsed_time": "1:51:41", "remaining_time": "1:32:48", "throughput": 20032.57, "total_tokens": 134256896} +{"current_steps": 42665, "total_steps": 78105, "loss": 0.1958, "lr": 2.532847812882822e-06, "epoch": 2.731259202355803, "percentage": 54.63, "elapsed_time": "1:51:42", "remaining_time": "1:32:47", "throughput": 20032.88, "total_tokens": 134272704} +{"current_steps": 42670, "total_steps": 78105, "loss": 0.2381, "lr": 2.532289207943074e-06, "epoch": 2.73157928429678, "percentage": 54.63, "elapsed_time": "1:51:43", "remaining_time": "1:32:46", "throughput": 20033.15, "total_tokens": 134287616} +{"current_steps": 42675, "total_steps": 78105, "loss": 0.2003, "lr": 2.5317306013909703e-06, "epoch": 2.731899366237757, "percentage": 54.64, "elapsed_time": "1:51:43", "remaining_time": "1:32:45", "throughput": 20033.49, "total_tokens": 134303552} +{"current_steps": 42680, "total_steps": 78105, "loss": 0.2278, "lr": 2.5311719932544067e-06, "epoch": 2.732219448178734, "percentage": 54.64, "elapsed_time": "1:51:44", "remaining_time": "1:32:44", "throughput": 20033.85, "total_tokens": 134319936} +{"current_steps": 42685, "total_steps": 78105, "loss": 0.2232, "lr": 2.5306133835612744e-06, "epoch": 2.732539530119711, "percentage": 54.65, "elapsed_time": "1:51:45", "remaining_time": "1:32:44", "throughput": 20034.16, "total_tokens": 134335616} +{"current_steps": 42690, "total_steps": 78105, "loss": 0.2385, "lr": 2.530054772339471e-06, "epoch": 2.7328596120606874, "percentage": 54.66, "elapsed_time": "1:51:46", "remaining_time": "1:32:43", "throughput": 20034.52, "total_tokens": 134352320} +{"current_steps": 42695, "total_steps": 78105, "loss": 0.1843, "lr": 2.5294961596168876e-06, "epoch": 2.7331796940016644, "percentage": 54.66, "elapsed_time": "1:51:46", "remaining_time": "1:32:42", "throughput": 20034.8, "total_tokens": 134367104} +{"current_steps": 42700, "total_steps": 78105, "loss": 0.1649, "lr": 2.5289375454214203e-06, "epoch": 2.7334997759426414, "percentage": 54.67, "elapsed_time": "1:51:47", "remaining_time": "1:32:41", "throughput": 20035.14, "total_tokens": 134382976} +{"current_steps": 42705, "total_steps": 78105, "loss": 0.2481, "lr": 2.5283789297809626e-06, "epoch": 2.7338198578836184, "percentage": 54.68, "elapsed_time": "1:51:48", "remaining_time": "1:32:40", "throughput": 20035.45, "total_tokens": 134398528} +{"current_steps": 42710, "total_steps": 78105, "loss": 0.2089, "lr": 2.5278203127234087e-06, "epoch": 2.734139939824595, "percentage": 54.68, "elapsed_time": "1:51:48", "remaining_time": "1:32:39", "throughput": 20035.76, "total_tokens": 134414080} +{"current_steps": 42715, "total_steps": 78105, "loss": 0.2383, "lr": 2.5272616942766533e-06, "epoch": 2.734460021765572, "percentage": 54.69, "elapsed_time": "1:51:49", "remaining_time": "1:32:38", "throughput": 20036.1, "total_tokens": 134429824} +{"current_steps": 42720, "total_steps": 78105, "loss": 0.2569, "lr": 2.5267030744685907e-06, "epoch": 2.734780103706549, "percentage": 54.7, "elapsed_time": "1:51:50", "remaining_time": "1:32:37", "throughput": 20036.45, "total_tokens": 134445824} +{"current_steps": 42725, "total_steps": 78105, "loss": 0.2395, "lr": 2.5261444533271157e-06, "epoch": 2.735100185647526, "percentage": 54.7, "elapsed_time": "1:51:50", "remaining_time": "1:32:37", "throughput": 20036.78, "total_tokens": 134461376} +{"current_steps": 42730, "total_steps": 78105, "loss": 0.215, "lr": 2.5255858308801236e-06, "epoch": 2.735420267588503, "percentage": 54.71, "elapsed_time": "1:51:51", "remaining_time": "1:32:36", "throughput": 20037.08, "total_tokens": 134476992} +{"current_steps": 42735, "total_steps": 78105, "loss": 0.2526, "lr": 2.525027207155508e-06, "epoch": 2.7357403495294794, "percentage": 54.71, "elapsed_time": "1:51:52", "remaining_time": "1:32:35", "throughput": 20037.48, "total_tokens": 134493632} +{"current_steps": 42740, "total_steps": 78105, "loss": 0.2965, "lr": 2.5244685821811633e-06, "epoch": 2.7360604314704564, "percentage": 54.72, "elapsed_time": "1:51:52", "remaining_time": "1:32:34", "throughput": 20037.77, "total_tokens": 134508992} +{"current_steps": 42745, "total_steps": 78105, "loss": 0.1246, "lr": 2.523909955984986e-06, "epoch": 2.7363805134114334, "percentage": 54.73, "elapsed_time": "1:51:53", "remaining_time": "1:32:33", "throughput": 20038.09, "total_tokens": 134524608} +{"current_steps": 42750, "total_steps": 78105, "loss": 0.2433, "lr": 2.5233513285948694e-06, "epoch": 2.73670059535241, "percentage": 54.73, "elapsed_time": "1:51:54", "remaining_time": "1:32:32", "throughput": 20038.32, "total_tokens": 134538880} +{"current_steps": 42755, "total_steps": 78105, "loss": 0.2578, "lr": 2.52279270003871e-06, "epoch": 2.737020677293387, "percentage": 54.74, "elapsed_time": "1:51:54", "remaining_time": "1:32:31", "throughput": 20038.66, "total_tokens": 134554752} +{"current_steps": 42760, "total_steps": 78105, "loss": 0.3722, "lr": 2.5222340703444014e-06, "epoch": 2.737340759234364, "percentage": 54.75, "elapsed_time": "1:51:55", "remaining_time": "1:32:30", "throughput": 20038.94, "total_tokens": 134569728} +{"current_steps": 42765, "total_steps": 78105, "loss": 0.2052, "lr": 2.521675439539839e-06, "epoch": 2.737660841175341, "percentage": 54.75, "elapsed_time": "1:51:56", "remaining_time": "1:32:30", "throughput": 20039.24, "total_tokens": 134585088} +{"current_steps": 42770, "total_steps": 78105, "loss": 0.3977, "lr": 2.521116807652918e-06, "epoch": 2.737980923116318, "percentage": 54.76, "elapsed_time": "1:51:56", "remaining_time": "1:32:29", "throughput": 20039.47, "total_tokens": 134599296} +{"current_steps": 42775, "total_steps": 78105, "loss": 0.1524, "lr": 2.5205581747115337e-06, "epoch": 2.738301005057295, "percentage": 54.77, "elapsed_time": "1:51:57", "remaining_time": "1:32:28", "throughput": 20039.76, "total_tokens": 134614528} +{"current_steps": 42780, "total_steps": 78105, "loss": 0.2973, "lr": 2.519999540743581e-06, "epoch": 2.7386210869982714, "percentage": 54.77, "elapsed_time": "1:51:58", "remaining_time": "1:32:27", "throughput": 20040.01, "total_tokens": 134629312} +{"current_steps": 42785, "total_steps": 78105, "loss": 0.1537, "lr": 2.519440905776956e-06, "epoch": 2.7389411689392484, "percentage": 54.78, "elapsed_time": "1:51:58", "remaining_time": "1:32:26", "throughput": 20040.33, "total_tokens": 134644736} +{"current_steps": 42790, "total_steps": 78105, "loss": 0.2517, "lr": 2.5188822698395533e-06, "epoch": 2.7392612508802254, "percentage": 54.79, "elapsed_time": "1:51:59", "remaining_time": "1:32:25", "throughput": 20040.61, "total_tokens": 134660224} +{"current_steps": 42795, "total_steps": 78105, "loss": 0.3474, "lr": 2.518323632959268e-06, "epoch": 2.739581332821202, "percentage": 54.79, "elapsed_time": "1:52:00", "remaining_time": "1:32:24", "throughput": 20040.9, "total_tokens": 134675584} +{"current_steps": 42800, "total_steps": 78105, "loss": 0.2745, "lr": 2.5177649951639958e-06, "epoch": 2.739901414762179, "percentage": 54.8, "elapsed_time": "1:52:00", "remaining_time": "1:32:23", "throughput": 20041.3, "total_tokens": 134692416} +{"current_steps": 42805, "total_steps": 78105, "loss": 0.2483, "lr": 2.517206356481633e-06, "epoch": 2.740221496703156, "percentage": 54.8, "elapsed_time": "1:52:01", "remaining_time": "1:32:22", "throughput": 20041.59, "total_tokens": 134707392} +{"current_steps": 42810, "total_steps": 78105, "loss": 0.2671, "lr": 2.5166477169400736e-06, "epoch": 2.740541578644133, "percentage": 54.81, "elapsed_time": "1:52:02", "remaining_time": "1:32:22", "throughput": 20041.88, "total_tokens": 134722368} +{"current_steps": 42815, "total_steps": 78105, "loss": 0.2706, "lr": 2.516089076567215e-06, "epoch": 2.74086166058511, "percentage": 54.82, "elapsed_time": "1:52:02", "remaining_time": "1:32:21", "throughput": 20042.15, "total_tokens": 134737472} +{"current_steps": 42820, "total_steps": 78105, "loss": 0.2192, "lr": 2.515530435390951e-06, "epoch": 2.741181742526087, "percentage": 54.82, "elapsed_time": "1:52:03", "remaining_time": "1:32:20", "throughput": 20042.43, "total_tokens": 134752448} +{"current_steps": 42825, "total_steps": 78105, "loss": 0.2388, "lr": 2.514971793439178e-06, "epoch": 2.7415018244670635, "percentage": 54.83, "elapsed_time": "1:52:04", "remaining_time": "1:32:19", "throughput": 20042.95, "total_tokens": 134771520} +{"current_steps": 42830, "total_steps": 78105, "loss": 0.2213, "lr": 2.514413150739791e-06, "epoch": 2.7418219064080405, "percentage": 54.84, "elapsed_time": "1:52:04", "remaining_time": "1:32:18", "throughput": 20043.24, "total_tokens": 134787264} +{"current_steps": 42835, "total_steps": 78105, "loss": 0.2271, "lr": 2.513854507320687e-06, "epoch": 2.7421419883490175, "percentage": 54.84, "elapsed_time": "1:52:05", "remaining_time": "1:32:17", "throughput": 20043.59, "total_tokens": 134803584} +{"current_steps": 42840, "total_steps": 78105, "loss": 0.2145, "lr": 2.513295863209761e-06, "epoch": 2.742462070289994, "percentage": 54.85, "elapsed_time": "1:52:06", "remaining_time": "1:32:16", "throughput": 20043.96, "total_tokens": 134819904} +{"current_steps": 42845, "total_steps": 78105, "loss": 0.1872, "lr": 2.5127372184349084e-06, "epoch": 2.742782152230971, "percentage": 54.86, "elapsed_time": "1:52:06", "remaining_time": "1:32:16", "throughput": 20044.34, "total_tokens": 134836736} +{"current_steps": 42850, "total_steps": 78105, "loss": 0.1589, "lr": 2.512178573024026e-06, "epoch": 2.743102234171948, "percentage": 54.86, "elapsed_time": "1:52:07", "remaining_time": "1:32:15", "throughput": 20044.61, "total_tokens": 134851584} +{"current_steps": 42855, "total_steps": 78105, "loss": 0.148, "lr": 2.511619927005009e-06, "epoch": 2.743422316112925, "percentage": 54.87, "elapsed_time": "1:52:08", "remaining_time": "1:32:14", "throughput": 20044.92, "total_tokens": 134867136} +{"current_steps": 42860, "total_steps": 78105, "loss": 0.1865, "lr": 2.511061280405753e-06, "epoch": 2.743742398053902, "percentage": 54.87, "elapsed_time": "1:52:08", "remaining_time": "1:32:13", "throughput": 20045.31, "total_tokens": 134884288} +{"current_steps": 42865, "total_steps": 78105, "loss": 0.3706, "lr": 2.5105026332541553e-06, "epoch": 2.744062479994879, "percentage": 54.88, "elapsed_time": "1:52:09", "remaining_time": "1:32:12", "throughput": 20045.69, "total_tokens": 134901120} +{"current_steps": 42870, "total_steps": 78105, "loss": 0.2, "lr": 2.5099439855781107e-06, "epoch": 2.7443825619358555, "percentage": 54.89, "elapsed_time": "1:52:10", "remaining_time": "1:32:11", "throughput": 20046.0, "total_tokens": 134916736} +{"current_steps": 42875, "total_steps": 78105, "loss": 0.3615, "lr": 2.509385337405514e-06, "epoch": 2.7447026438768325, "percentage": 54.89, "elapsed_time": "1:52:11", "remaining_time": "1:32:10", "throughput": 20046.4, "total_tokens": 134933312} +{"current_steps": 42880, "total_steps": 78105, "loss": 0.2678, "lr": 2.5088266887642637e-06, "epoch": 2.7450227258178095, "percentage": 54.9, "elapsed_time": "1:52:11", "remaining_time": "1:32:09", "throughput": 20046.65, "total_tokens": 134948224} +{"current_steps": 42885, "total_steps": 78105, "loss": 0.2857, "lr": 2.5082680396822533e-06, "epoch": 2.745342807758786, "percentage": 54.91, "elapsed_time": "1:52:12", "remaining_time": "1:32:09", "throughput": 20047.02, "total_tokens": 134964672} +{"current_steps": 42890, "total_steps": 78105, "loss": 0.2464, "lr": 2.507709390187381e-06, "epoch": 2.745662889699763, "percentage": 54.91, "elapsed_time": "1:52:13", "remaining_time": "1:32:08", "throughput": 20047.3, "total_tokens": 134979840} +{"current_steps": 42895, "total_steps": 78105, "loss": 0.2792, "lr": 2.507150740307541e-06, "epoch": 2.74598297164074, "percentage": 54.92, "elapsed_time": "1:52:13", "remaining_time": "1:32:07", "throughput": 20047.61, "total_tokens": 134995648} +{"current_steps": 42900, "total_steps": 78105, "loss": 0.2634, "lr": 2.5065920900706313e-06, "epoch": 2.746303053581717, "percentage": 54.93, "elapsed_time": "1:52:14", "remaining_time": "1:32:06", "throughput": 20047.95, "total_tokens": 135011904} +{"current_steps": 42905, "total_steps": 78105, "loss": 0.2425, "lr": 2.5060334395045467e-06, "epoch": 2.746623135522694, "percentage": 54.93, "elapsed_time": "1:52:15", "remaining_time": "1:32:05", "throughput": 20048.36, "total_tokens": 135029440} +{"current_steps": 42910, "total_steps": 78105, "loss": 0.3343, "lr": 2.505474788637184e-06, "epoch": 2.7469432174636705, "percentage": 54.94, "elapsed_time": "1:52:15", "remaining_time": "1:32:04", "throughput": 20048.65, "total_tokens": 135044992} +{"current_steps": 42915, "total_steps": 78105, "loss": 0.1481, "lr": 2.5049161374964375e-06, "epoch": 2.7472632994046475, "percentage": 54.95, "elapsed_time": "1:52:16", "remaining_time": "1:32:03", "throughput": 20049.02, "total_tokens": 135061824} +{"current_steps": 42920, "total_steps": 78105, "loss": 0.2286, "lr": 2.5043574861102056e-06, "epoch": 2.7475833813456245, "percentage": 54.95, "elapsed_time": "1:52:17", "remaining_time": "1:32:03", "throughput": 20049.3, "total_tokens": 135076736} +{"current_steps": 42925, "total_steps": 78105, "loss": 0.4465, "lr": 2.5037988345063837e-06, "epoch": 2.7479034632866015, "percentage": 54.96, "elapsed_time": "1:52:17", "remaining_time": "1:32:02", "throughput": 20049.73, "total_tokens": 135094528} +{"current_steps": 42930, "total_steps": 78105, "loss": 0.2589, "lr": 2.503240182712868e-06, "epoch": 2.748223545227578, "percentage": 54.96, "elapsed_time": "1:52:18", "remaining_time": "1:32:01", "throughput": 20050.09, "total_tokens": 135111104} +{"current_steps": 42935, "total_steps": 78105, "loss": 0.1621, "lr": 2.5026815307575536e-06, "epoch": 2.748543627168555, "percentage": 54.97, "elapsed_time": "1:52:19", "remaining_time": "1:32:00", "throughput": 20050.39, "total_tokens": 135126336} +{"current_steps": 42940, "total_steps": 78105, "loss": 0.2015, "lr": 2.5021228786683377e-06, "epoch": 2.748863709109532, "percentage": 54.98, "elapsed_time": "1:52:19", "remaining_time": "1:31:59", "throughput": 20050.64, "total_tokens": 135140928} +{"current_steps": 42945, "total_steps": 78105, "loss": 0.1642, "lr": 2.5015642264731165e-06, "epoch": 2.749183791050509, "percentage": 54.98, "elapsed_time": "1:52:20", "remaining_time": "1:31:58", "throughput": 20050.96, "total_tokens": 135156736} +{"current_steps": 42950, "total_steps": 78105, "loss": 0.2276, "lr": 2.5010055741997862e-06, "epoch": 2.749503872991486, "percentage": 54.99, "elapsed_time": "1:52:21", "remaining_time": "1:31:57", "throughput": 20051.22, "total_tokens": 135171264} +{"current_steps": 42955, "total_steps": 78105, "loss": 0.2371, "lr": 2.500446921876242e-06, "epoch": 2.7498239549324626, "percentage": 55.0, "elapsed_time": "1:52:22", "remaining_time": "1:31:56", "throughput": 20051.62, "total_tokens": 135188096} +{"current_steps": 42960, "total_steps": 78105, "loss": 0.252, "lr": 2.4998882695303825e-06, "epoch": 2.7501440368734396, "percentage": 55.0, "elapsed_time": "1:52:22", "remaining_time": "1:31:56", "throughput": 20051.95, "total_tokens": 135203840} +{"current_steps": 42965, "total_steps": 78105, "loss": 0.2395, "lr": 2.4993296171901006e-06, "epoch": 2.7504641188144165, "percentage": 55.01, "elapsed_time": "1:52:23", "remaining_time": "1:31:55", "throughput": 20052.27, "total_tokens": 135219840} +{"current_steps": 42966, "total_steps": 78105, "eval_loss": 0.5056279301643372, "epoch": 2.750528135202612, "percentage": 55.01, "elapsed_time": "1:53:14", "remaining_time": "1:32:36", "throughput": 19901.93, "total_tokens": 135222656} +{"current_steps": 42970, "total_steps": 78105, "loss": 0.208, "lr": 2.4987709648832947e-06, "epoch": 2.7507842007553935, "percentage": 55.02, "elapsed_time": "1:53:47", "remaining_time": "1:33:02", "throughput": 19806.45, "total_tokens": 135235328} +{"current_steps": 42975, "total_steps": 78105, "loss": 0.3785, "lr": 2.49821231263786e-06, "epoch": 2.75110428269637, "percentage": 55.02, "elapsed_time": "1:53:48", "remaining_time": "1:33:01", "throughput": 19806.74, "total_tokens": 135249984} +{"current_steps": 42980, "total_steps": 78105, "loss": 0.3224, "lr": 2.4976536604816944e-06, "epoch": 2.751424364637347, "percentage": 55.03, "elapsed_time": "1:53:49", "remaining_time": "1:33:01", "throughput": 19807.04, "total_tokens": 135265024} +{"current_steps": 42985, "total_steps": 78105, "loss": 0.1517, "lr": 2.497095008442691e-06, "epoch": 2.751744446578324, "percentage": 55.03, "elapsed_time": "1:53:49", "remaining_time": "1:33:00", "throughput": 19807.48, "total_tokens": 135282368} +{"current_steps": 42990, "total_steps": 78105, "loss": 0.1871, "lr": 2.4965363565487485e-06, "epoch": 2.752064528519301, "percentage": 55.04, "elapsed_time": "1:53:50", "remaining_time": "1:32:59", "throughput": 19807.85, "total_tokens": 135298560} +{"current_steps": 42995, "total_steps": 78105, "loss": 0.2182, "lr": 2.4959777048277624e-06, "epoch": 2.752384610460278, "percentage": 55.05, "elapsed_time": "1:53:51", "remaining_time": "1:32:58", "throughput": 19808.17, "total_tokens": 135314112} +{"current_steps": 43000, "total_steps": 78105, "loss": 0.2873, "lr": 2.495419053307629e-06, "epoch": 2.7527046924012546, "percentage": 55.05, "elapsed_time": "1:53:51", "remaining_time": "1:32:57", "throughput": 19808.46, "total_tokens": 135328832} +{"current_steps": 43005, "total_steps": 78105, "loss": 0.2026, "lr": 2.494860402016244e-06, "epoch": 2.7530247743422316, "percentage": 55.06, "elapsed_time": "1:53:52", "remaining_time": "1:32:56", "throughput": 19808.74, "total_tokens": 135343360} +{"current_steps": 43010, "total_steps": 78105, "loss": 0.2787, "lr": 2.4943017509815034e-06, "epoch": 2.7533448562832086, "percentage": 55.07, "elapsed_time": "1:53:53", "remaining_time": "1:32:55", "throughput": 19809.06, "total_tokens": 135358720} +{"current_steps": 43015, "total_steps": 78105, "loss": 0.261, "lr": 2.493743100231305e-06, "epoch": 2.753664938224185, "percentage": 55.07, "elapsed_time": "1:53:53", "remaining_time": "1:32:54", "throughput": 19809.32, "total_tokens": 135373248} +{"current_steps": 43020, "total_steps": 78105, "loss": 0.2126, "lr": 2.493184449793542e-06, "epoch": 2.753985020165162, "percentage": 55.08, "elapsed_time": "1:53:54", "remaining_time": "1:32:53", "throughput": 19809.66, "total_tokens": 135388800} +{"current_steps": 43025, "total_steps": 78105, "loss": 0.3461, "lr": 2.492625799696113e-06, "epoch": 2.754305102106139, "percentage": 55.09, "elapsed_time": "1:53:55", "remaining_time": "1:32:52", "throughput": 19810.02, "total_tokens": 135404736} +{"current_steps": 43030, "total_steps": 78105, "loss": 0.155, "lr": 2.4920671499669122e-06, "epoch": 2.754625184047116, "percentage": 55.09, "elapsed_time": "1:53:55", "remaining_time": "1:32:52", "throughput": 19810.32, "total_tokens": 135419840} +{"current_steps": 43035, "total_steps": 78105, "loss": 0.2702, "lr": 2.491508500633838e-06, "epoch": 2.754945265988093, "percentage": 55.1, "elapsed_time": "1:53:56", "remaining_time": "1:32:51", "throughput": 19810.68, "total_tokens": 135435840} +{"current_steps": 43040, "total_steps": 78105, "loss": 0.2132, "lr": 2.490949851724784e-06, "epoch": 2.75526534792907, "percentage": 55.11, "elapsed_time": "1:53:57", "remaining_time": "1:32:50", "throughput": 19811.05, "total_tokens": 135451840} +{"current_steps": 43045, "total_steps": 78105, "loss": 0.2534, "lr": 2.490391203267647e-06, "epoch": 2.7555854298700466, "percentage": 55.11, "elapsed_time": "1:53:57", "remaining_time": "1:32:49", "throughput": 19811.38, "total_tokens": 135467392} +{"current_steps": 43050, "total_steps": 78105, "loss": 0.2481, "lr": 2.4898325552903247e-06, "epoch": 2.7559055118110236, "percentage": 55.12, "elapsed_time": "1:53:58", "remaining_time": "1:32:48", "throughput": 19811.65, "total_tokens": 135482048} +{"current_steps": 43055, "total_steps": 78105, "loss": 0.4394, "lr": 2.48927390782071e-06, "epoch": 2.7562255937520006, "percentage": 55.12, "elapsed_time": "1:53:59", "remaining_time": "1:32:47", "throughput": 19811.98, "total_tokens": 135497792} +{"current_steps": 43060, "total_steps": 78105, "loss": 0.2531, "lr": 2.4887152608867015e-06, "epoch": 2.756545675692977, "percentage": 55.13, "elapsed_time": "1:53:59", "remaining_time": "1:32:46", "throughput": 19812.35, "total_tokens": 135514176} +{"current_steps": 43065, "total_steps": 78105, "loss": 0.2848, "lr": 2.4881566145161934e-06, "epoch": 2.756865757633954, "percentage": 55.14, "elapsed_time": "1:54:00", "remaining_time": "1:32:45", "throughput": 19812.68, "total_tokens": 135529792} +{"current_steps": 43070, "total_steps": 78105, "loss": 0.3337, "lr": 2.4875979687370834e-06, "epoch": 2.757185839574931, "percentage": 55.14, "elapsed_time": "1:54:01", "remaining_time": "1:32:44", "throughput": 19813.03, "total_tokens": 135545664} +{"current_steps": 43075, "total_steps": 78105, "loss": 0.2029, "lr": 2.4870393235772653e-06, "epoch": 2.757505921515908, "percentage": 55.15, "elapsed_time": "1:54:01", "remaining_time": "1:32:44", "throughput": 19813.37, "total_tokens": 135561472} +{"current_steps": 43080, "total_steps": 78105, "loss": 0.2714, "lr": 2.4864806790646364e-06, "epoch": 2.757826003456885, "percentage": 55.16, "elapsed_time": "1:54:02", "remaining_time": "1:32:43", "throughput": 19813.7, "total_tokens": 135577024} +{"current_steps": 43085, "total_steps": 78105, "loss": 0.2499, "lr": 2.485922035227091e-06, "epoch": 2.758146085397862, "percentage": 55.16, "elapsed_time": "1:54:03", "remaining_time": "1:32:42", "throughput": 19814.02, "total_tokens": 135592320} +{"current_steps": 43090, "total_steps": 78105, "loss": 0.171, "lr": 2.485363392092527e-06, "epoch": 2.7584661673388386, "percentage": 55.17, "elapsed_time": "1:54:03", "remaining_time": "1:32:41", "throughput": 19814.35, "total_tokens": 135608000} +{"current_steps": 43095, "total_steps": 78105, "loss": 0.2985, "lr": 2.484804749688838e-06, "epoch": 2.7587862492798156, "percentage": 55.18, "elapsed_time": "1:54:04", "remaining_time": "1:32:40", "throughput": 19814.65, "total_tokens": 135623232} +{"current_steps": 43100, "total_steps": 78105, "loss": 0.1919, "lr": 2.484246108043921e-06, "epoch": 2.7591063312207926, "percentage": 55.18, "elapsed_time": "1:54:05", "remaining_time": "1:32:39", "throughput": 19815.02, "total_tokens": 135639744} +{"current_steps": 43105, "total_steps": 78105, "loss": 0.2325, "lr": 2.483687467185672e-06, "epoch": 2.759426413161769, "percentage": 55.19, "elapsed_time": "1:54:05", "remaining_time": "1:32:38", "throughput": 19815.33, "total_tokens": 135654976} +{"current_steps": 43110, "total_steps": 78105, "loss": 0.2432, "lr": 2.483128827141985e-06, "epoch": 2.759746495102746, "percentage": 55.19, "elapsed_time": "1:54:06", "remaining_time": "1:32:37", "throughput": 19815.77, "total_tokens": 135672192} +{"current_steps": 43115, "total_steps": 78105, "loss": 0.2071, "lr": 2.482570187940757e-06, "epoch": 2.760066577043723, "percentage": 55.2, "elapsed_time": "1:54:07", "remaining_time": "1:32:36", "throughput": 19816.08, "total_tokens": 135687808} +{"current_steps": 43120, "total_steps": 78105, "loss": 0.2047, "lr": 2.4820115496098823e-06, "epoch": 2.7603866589847, "percentage": 55.21, "elapsed_time": "1:54:08", "remaining_time": "1:32:36", "throughput": 19816.34, "total_tokens": 135702336} +{"current_steps": 43125, "total_steps": 78105, "loss": 0.1747, "lr": 2.4814529121772583e-06, "epoch": 2.760706740925677, "percentage": 55.21, "elapsed_time": "1:54:08", "remaining_time": "1:32:35", "throughput": 19816.65, "total_tokens": 135718016} +{"current_steps": 43130, "total_steps": 78105, "loss": 0.311, "lr": 2.4808942756707784e-06, "epoch": 2.761026822866654, "percentage": 55.22, "elapsed_time": "1:54:09", "remaining_time": "1:32:34", "throughput": 19816.92, "total_tokens": 135732672} +{"current_steps": 43135, "total_steps": 78105, "loss": 0.1994, "lr": 2.4803356401183397e-06, "epoch": 2.7613469048076307, "percentage": 55.23, "elapsed_time": "1:54:09", "remaining_time": "1:32:33", "throughput": 19817.2, "total_tokens": 135747648} +{"current_steps": 43140, "total_steps": 78105, "loss": 0.27, "lr": 2.479777005547836e-06, "epoch": 2.7616669867486077, "percentage": 55.23, "elapsed_time": "1:54:10", "remaining_time": "1:32:32", "throughput": 19817.49, "total_tokens": 135762944} +{"current_steps": 43145, "total_steps": 78105, "loss": 0.2364, "lr": 2.4792183719871633e-06, "epoch": 2.7619870686895847, "percentage": 55.24, "elapsed_time": "1:54:11", "remaining_time": "1:32:31", "throughput": 19817.83, "total_tokens": 135779008} +{"current_steps": 43150, "total_steps": 78105, "loss": 0.1714, "lr": 2.4786597394642162e-06, "epoch": 2.762307150630561, "percentage": 55.25, "elapsed_time": "1:54:12", "remaining_time": "1:32:30", "throughput": 19818.14, "total_tokens": 135794560} +{"current_steps": 43155, "total_steps": 78105, "loss": 0.1911, "lr": 2.478101108006892e-06, "epoch": 2.762627232571538, "percentage": 55.25, "elapsed_time": "1:54:12", "remaining_time": "1:32:29", "throughput": 19818.43, "total_tokens": 135809280} +{"current_steps": 43160, "total_steps": 78105, "loss": 0.2429, "lr": 2.4775424776430833e-06, "epoch": 2.762947314512515, "percentage": 55.26, "elapsed_time": "1:54:13", "remaining_time": "1:32:28", "throughput": 19818.71, "total_tokens": 135824192} +{"current_steps": 43165, "total_steps": 78105, "loss": 0.2434, "lr": 2.4769838484006864e-06, "epoch": 2.763267396453492, "percentage": 55.27, "elapsed_time": "1:54:14", "remaining_time": "1:32:28", "throughput": 19819.06, "total_tokens": 135840576} +{"current_steps": 43170, "total_steps": 78105, "loss": 0.2945, "lr": 2.4764252203075968e-06, "epoch": 2.763587478394469, "percentage": 55.27, "elapsed_time": "1:54:14", "remaining_time": "1:32:27", "throughput": 19819.42, "total_tokens": 135856640} +{"current_steps": 43175, "total_steps": 78105, "loss": 0.1681, "lr": 2.475866593391708e-06, "epoch": 2.7639075603354457, "percentage": 55.28, "elapsed_time": "1:54:15", "remaining_time": "1:32:26", "throughput": 19819.74, "total_tokens": 135872576} +{"current_steps": 43180, "total_steps": 78105, "loss": 0.3037, "lr": 2.475307967680917e-06, "epoch": 2.7642276422764227, "percentage": 55.28, "elapsed_time": "1:54:16", "remaining_time": "1:32:25", "throughput": 19820.03, "total_tokens": 135887680} +{"current_steps": 43185, "total_steps": 78105, "loss": 0.3184, "lr": 2.474749343203117e-06, "epoch": 2.7645477242173997, "percentage": 55.29, "elapsed_time": "1:54:16", "remaining_time": "1:32:24", "throughput": 19820.35, "total_tokens": 135903488} +{"current_steps": 43190, "total_steps": 78105, "loss": 0.1819, "lr": 2.4741907199862037e-06, "epoch": 2.7648678061583767, "percentage": 55.3, "elapsed_time": "1:54:17", "remaining_time": "1:32:23", "throughput": 19820.75, "total_tokens": 135920192} +{"current_steps": 43195, "total_steps": 78105, "loss": 0.2257, "lr": 2.4736320980580706e-06, "epoch": 2.7651878880993532, "percentage": 55.3, "elapsed_time": "1:54:18", "remaining_time": "1:32:22", "throughput": 19821.07, "total_tokens": 135935552} +{"current_steps": 43200, "total_steps": 78105, "loss": 0.1637, "lr": 2.4730734774466137e-06, "epoch": 2.76550797004033, "percentage": 55.31, "elapsed_time": "1:54:18", "remaining_time": "1:32:21", "throughput": 19821.43, "total_tokens": 135951936} +{"current_steps": 43205, "total_steps": 78105, "loss": 0.1806, "lr": 2.4725148581797264e-06, "epoch": 2.765828051981307, "percentage": 55.32, "elapsed_time": "1:54:19", "remaining_time": "1:32:20", "throughput": 19821.67, "total_tokens": 135966144} +{"current_steps": 43210, "total_steps": 78105, "loss": 0.2165, "lr": 2.4719562402853047e-06, "epoch": 2.766148133922284, "percentage": 55.32, "elapsed_time": "1:54:20", "remaining_time": "1:32:20", "throughput": 19821.97, "total_tokens": 135981568} +{"current_steps": 43215, "total_steps": 78105, "loss": 0.264, "lr": 2.471397623791242e-06, "epoch": 2.766468215863261, "percentage": 55.33, "elapsed_time": "1:54:20", "remaining_time": "1:32:19", "throughput": 19822.28, "total_tokens": 135997248} +{"current_steps": 43220, "total_steps": 78105, "loss": 0.1761, "lr": 2.4708390087254322e-06, "epoch": 2.7667882978042377, "percentage": 55.34, "elapsed_time": "1:54:21", "remaining_time": "1:32:18", "throughput": 19822.58, "total_tokens": 136012416} +{"current_steps": 43225, "total_steps": 78105, "loss": 0.2544, "lr": 2.470280395115771e-06, "epoch": 2.7671083797452147, "percentage": 55.34, "elapsed_time": "1:54:22", "remaining_time": "1:32:17", "throughput": 19822.84, "total_tokens": 136027264} +{"current_steps": 43230, "total_steps": 78105, "loss": 0.2152, "lr": 2.4697217829901526e-06, "epoch": 2.7674284616861917, "percentage": 55.35, "elapsed_time": "1:54:22", "remaining_time": "1:32:16", "throughput": 19823.1, "total_tokens": 136041600} +{"current_steps": 43235, "total_steps": 78105, "loss": 0.1551, "lr": 2.4691631723764704e-06, "epoch": 2.7677485436271687, "percentage": 55.35, "elapsed_time": "1:54:23", "remaining_time": "1:32:15", "throughput": 19823.43, "total_tokens": 136057600} +{"current_steps": 43240, "total_steps": 78105, "loss": 0.2603, "lr": 2.4686045633026183e-06, "epoch": 2.7680686255681453, "percentage": 55.36, "elapsed_time": "1:54:24", "remaining_time": "1:32:14", "throughput": 19823.79, "total_tokens": 136073856} +{"current_steps": 43245, "total_steps": 78105, "loss": 0.1746, "lr": 2.4680459557964916e-06, "epoch": 2.7683887075091222, "percentage": 55.37, "elapsed_time": "1:54:24", "remaining_time": "1:32:13", "throughput": 19824.1, "total_tokens": 136089600} +{"current_steps": 43250, "total_steps": 78105, "loss": 0.2231, "lr": 2.4674873498859827e-06, "epoch": 2.7687087894500992, "percentage": 55.37, "elapsed_time": "1:54:25", "remaining_time": "1:32:12", "throughput": 19824.6, "total_tokens": 136108224} +{"current_steps": 43255, "total_steps": 78105, "loss": 0.2962, "lr": 2.4669287455989866e-06, "epoch": 2.7690288713910762, "percentage": 55.38, "elapsed_time": "1:54:26", "remaining_time": "1:32:12", "throughput": 19824.87, "total_tokens": 136123648} +{"current_steps": 43260, "total_steps": 78105, "loss": 0.1811, "lr": 2.466370142963396e-06, "epoch": 2.769348953332053, "percentage": 55.39, "elapsed_time": "1:54:27", "remaining_time": "1:32:11", "throughput": 19825.26, "total_tokens": 136140224} +{"current_steps": 43265, "total_steps": 78105, "loss": 0.1884, "lr": 2.4658115420071065e-06, "epoch": 2.7696690352730298, "percentage": 55.39, "elapsed_time": "1:54:27", "remaining_time": "1:32:10", "throughput": 19825.63, "total_tokens": 136157056} +{"current_steps": 43270, "total_steps": 78105, "loss": 0.2015, "lr": 2.4652529427580095e-06, "epoch": 2.7699891172140068, "percentage": 55.4, "elapsed_time": "1:54:28", "remaining_time": "1:32:09", "throughput": 19825.97, "total_tokens": 136173056} +{"current_steps": 43275, "total_steps": 78105, "loss": 0.2896, "lr": 2.4646943452439993e-06, "epoch": 2.7703091991549837, "percentage": 55.41, "elapsed_time": "1:54:29", "remaining_time": "1:32:08", "throughput": 19826.24, "total_tokens": 136187648} +{"current_steps": 43280, "total_steps": 78105, "loss": 0.2189, "lr": 2.464135749492971e-06, "epoch": 2.7706292810959603, "percentage": 55.41, "elapsed_time": "1:54:29", "remaining_time": "1:32:07", "throughput": 19826.53, "total_tokens": 136202880} +{"current_steps": 43285, "total_steps": 78105, "loss": 0.1735, "lr": 2.463577155532815e-06, "epoch": 2.7709493630369373, "percentage": 55.42, "elapsed_time": "1:54:30", "remaining_time": "1:32:06", "throughput": 19826.81, "total_tokens": 136218048} +{"current_steps": 43290, "total_steps": 78105, "loss": 0.2068, "lr": 2.463018563391427e-06, "epoch": 2.7712694449779143, "percentage": 55.43, "elapsed_time": "1:54:31", "remaining_time": "1:32:05", "throughput": 19827.09, "total_tokens": 136232960} +{"current_steps": 43295, "total_steps": 78105, "loss": 0.151, "lr": 2.462459973096699e-06, "epoch": 2.7715895269188913, "percentage": 55.43, "elapsed_time": "1:54:31", "remaining_time": "1:32:04", "throughput": 19827.46, "total_tokens": 136248896} +{"current_steps": 43300, "total_steps": 78105, "loss": 0.2065, "lr": 2.461901384676525e-06, "epoch": 2.7719096088598683, "percentage": 55.44, "elapsed_time": "1:54:32", "remaining_time": "1:32:04", "throughput": 19827.78, "total_tokens": 136264128} +{"current_steps": 43305, "total_steps": 78105, "loss": 0.228, "lr": 2.4613427981587967e-06, "epoch": 2.7722296908008452, "percentage": 55.44, "elapsed_time": "1:54:33", "remaining_time": "1:32:03", "throughput": 19828.09, "total_tokens": 136279232} +{"current_steps": 43310, "total_steps": 78105, "loss": 0.1915, "lr": 2.460784213571408e-06, "epoch": 2.772549772741822, "percentage": 55.45, "elapsed_time": "1:54:33", "remaining_time": "1:32:02", "throughput": 19828.48, "total_tokens": 136295872} +{"current_steps": 43315, "total_steps": 78105, "loss": 0.2784, "lr": 2.460225630942251e-06, "epoch": 2.772869854682799, "percentage": 55.46, "elapsed_time": "1:54:34", "remaining_time": "1:32:01", "throughput": 19828.8, "total_tokens": 136311360} +{"current_steps": 43320, "total_steps": 78105, "loss": 0.2536, "lr": 2.45966705029922e-06, "epoch": 2.7731899366237758, "percentage": 55.46, "elapsed_time": "1:54:35", "remaining_time": "1:32:00", "throughput": 19829.13, "total_tokens": 136326848} +{"current_steps": 43325, "total_steps": 78105, "loss": 0.2467, "lr": 2.459108471670205e-06, "epoch": 2.7735100185647523, "percentage": 55.47, "elapsed_time": "1:54:35", "remaining_time": "1:31:59", "throughput": 19829.46, "total_tokens": 136342400} +{"current_steps": 43330, "total_steps": 78105, "loss": 0.2912, "lr": 2.4585498950831012e-06, "epoch": 2.7738301005057293, "percentage": 55.48, "elapsed_time": "1:54:36", "remaining_time": "1:31:58", "throughput": 19829.77, "total_tokens": 136358016} +{"current_steps": 43335, "total_steps": 78105, "loss": 0.2382, "lr": 2.457991320565799e-06, "epoch": 2.7741501824467063, "percentage": 55.48, "elapsed_time": "1:54:37", "remaining_time": "1:31:57", "throughput": 19830.12, "total_tokens": 136373824} +{"current_steps": 43340, "total_steps": 78105, "loss": 0.2367, "lr": 2.457432748146191e-06, "epoch": 2.7744702643876833, "percentage": 55.49, "elapsed_time": "1:54:37", "remaining_time": "1:31:57", "throughput": 19830.56, "total_tokens": 136390976} +{"current_steps": 43345, "total_steps": 78105, "loss": 0.3068, "lr": 2.45687417785217e-06, "epoch": 2.7747903463286603, "percentage": 55.5, "elapsed_time": "1:54:38", "remaining_time": "1:31:56", "throughput": 19830.87, "total_tokens": 136406336} +{"current_steps": 43350, "total_steps": 78105, "loss": 0.3203, "lr": 2.4563156097116282e-06, "epoch": 2.7751104282696373, "percentage": 55.5, "elapsed_time": "1:54:39", "remaining_time": "1:31:55", "throughput": 19831.19, "total_tokens": 136421888} +{"current_steps": 43355, "total_steps": 78105, "loss": 0.1842, "lr": 2.455757043752458e-06, "epoch": 2.775430510210614, "percentage": 55.51, "elapsed_time": "1:54:39", "remaining_time": "1:31:54", "throughput": 19831.5, "total_tokens": 136437120} +{"current_steps": 43360, "total_steps": 78105, "loss": 0.2768, "lr": 2.4551984800025494e-06, "epoch": 2.775750592151591, "percentage": 55.52, "elapsed_time": "1:54:40", "remaining_time": "1:31:53", "throughput": 19831.82, "total_tokens": 136452608} +{"current_steps": 43365, "total_steps": 78105, "loss": 0.1828, "lr": 2.4546399184897963e-06, "epoch": 2.776070674092568, "percentage": 55.52, "elapsed_time": "1:54:41", "remaining_time": "1:31:52", "throughput": 19832.1, "total_tokens": 136467584} +{"current_steps": 43370, "total_steps": 78105, "loss": 0.2901, "lr": 2.4540813592420885e-06, "epoch": 2.7763907560335443, "percentage": 55.53, "elapsed_time": "1:54:41", "remaining_time": "1:31:51", "throughput": 19832.44, "total_tokens": 136483584} +{"current_steps": 43375, "total_steps": 78105, "loss": 0.2584, "lr": 2.4535228022873193e-06, "epoch": 2.7767108379745213, "percentage": 55.53, "elapsed_time": "1:54:42", "remaining_time": "1:31:50", "throughput": 19832.76, "total_tokens": 136498816} +{"current_steps": 43380, "total_steps": 78105, "loss": 0.3089, "lr": 2.4529642476533784e-06, "epoch": 2.7770309199154983, "percentage": 55.54, "elapsed_time": "1:54:43", "remaining_time": "1:31:49", "throughput": 19833.09, "total_tokens": 136514368} +{"current_steps": 43385, "total_steps": 78105, "loss": 0.2567, "lr": 2.4524056953681595e-06, "epoch": 2.7773510018564753, "percentage": 55.55, "elapsed_time": "1:54:43", "remaining_time": "1:31:48", "throughput": 19833.41, "total_tokens": 136530240} +{"current_steps": 43390, "total_steps": 78105, "loss": 0.2025, "lr": 2.4518471454595503e-06, "epoch": 2.7776710837974523, "percentage": 55.55, "elapsed_time": "1:54:44", "remaining_time": "1:31:48", "throughput": 19833.7, "total_tokens": 136545216} +{"current_steps": 43395, "total_steps": 78105, "loss": 0.2924, "lr": 2.4512885979554445e-06, "epoch": 2.7779911657384293, "percentage": 55.56, "elapsed_time": "1:54:45", "remaining_time": "1:31:47", "throughput": 19834.01, "total_tokens": 136560576} +{"current_steps": 43400, "total_steps": 78105, "loss": 0.3014, "lr": 2.450730052883733e-06, "epoch": 2.778311247679406, "percentage": 55.57, "elapsed_time": "1:54:45", "remaining_time": "1:31:46", "throughput": 19834.27, "total_tokens": 136574912} +{"current_steps": 43405, "total_steps": 78105, "loss": 0.1743, "lr": 2.450171510272305e-06, "epoch": 2.778631329620383, "percentage": 55.57, "elapsed_time": "1:54:46", "remaining_time": "1:31:45", "throughput": 19834.58, "total_tokens": 136590336} +{"current_steps": 43410, "total_steps": 78105, "loss": 0.2105, "lr": 2.4496129701490537e-06, "epoch": 2.77895141156136, "percentage": 55.58, "elapsed_time": "1:54:47", "remaining_time": "1:31:44", "throughput": 19835.02, "total_tokens": 136607616} +{"current_steps": 43415, "total_steps": 78105, "loss": 0.1322, "lr": 2.4490544325418667e-06, "epoch": 2.7792714935023364, "percentage": 55.59, "elapsed_time": "1:54:47", "remaining_time": "1:31:43", "throughput": 19835.45, "total_tokens": 136624768} +{"current_steps": 43420, "total_steps": 78105, "loss": 0.1722, "lr": 2.4484958974786378e-06, "epoch": 2.7795915754433134, "percentage": 55.59, "elapsed_time": "1:54:48", "remaining_time": "1:31:42", "throughput": 19835.81, "total_tokens": 136641280} +{"current_steps": 43425, "total_steps": 78105, "loss": 0.2395, "lr": 2.447937364987254e-06, "epoch": 2.7799116573842904, "percentage": 55.6, "elapsed_time": "1:54:49", "remaining_time": "1:31:41", "throughput": 19836.09, "total_tokens": 136656000} +{"current_steps": 43430, "total_steps": 78105, "loss": 0.2012, "lr": 2.447378835095607e-06, "epoch": 2.7802317393252673, "percentage": 55.6, "elapsed_time": "1:54:49", "remaining_time": "1:31:41", "throughput": 19836.43, "total_tokens": 136671680} +{"current_steps": 43435, "total_steps": 78105, "loss": 0.2567, "lr": 2.4468203078315874e-06, "epoch": 2.7805518212662443, "percentage": 55.61, "elapsed_time": "1:54:50", "remaining_time": "1:31:40", "throughput": 19836.7, "total_tokens": 136686400} +{"current_steps": 43440, "total_steps": 78105, "loss": 0.2767, "lr": 2.446261783223085e-06, "epoch": 2.780871903207221, "percentage": 55.62, "elapsed_time": "1:54:51", "remaining_time": "1:31:39", "throughput": 19837.04, "total_tokens": 136702272} +{"current_steps": 43445, "total_steps": 78105, "loss": 0.3365, "lr": 2.4457032612979883e-06, "epoch": 2.781191985148198, "percentage": 55.62, "elapsed_time": "1:54:51", "remaining_time": "1:31:38", "throughput": 19837.38, "total_tokens": 136718144} +{"current_steps": 43450, "total_steps": 78105, "loss": 0.201, "lr": 2.4451447420841883e-06, "epoch": 2.781512067089175, "percentage": 55.63, "elapsed_time": "1:54:52", "remaining_time": "1:31:37", "throughput": 19837.73, "total_tokens": 136734016} +{"current_steps": 43455, "total_steps": 78105, "loss": 0.2646, "lr": 2.444586225609574e-06, "epoch": 2.781832149030152, "percentage": 55.64, "elapsed_time": "1:54:53", "remaining_time": "1:31:36", "throughput": 19838.11, "total_tokens": 136750656} +{"current_steps": 43460, "total_steps": 78105, "loss": 0.1631, "lr": 2.4440277119020347e-06, "epoch": 2.7821522309711284, "percentage": 55.64, "elapsed_time": "1:54:53", "remaining_time": "1:31:35", "throughput": 19838.38, "total_tokens": 136765504} +{"current_steps": 43465, "total_steps": 78105, "loss": 0.2374, "lr": 2.443469200989461e-06, "epoch": 2.7824723129121054, "percentage": 55.65, "elapsed_time": "1:54:54", "remaining_time": "1:31:34", "throughput": 19838.66, "total_tokens": 136780416} +{"current_steps": 43470, "total_steps": 78105, "loss": 0.3309, "lr": 2.44291069289974e-06, "epoch": 2.7827923948530824, "percentage": 55.66, "elapsed_time": "1:54:55", "remaining_time": "1:31:33", "throughput": 19839.01, "total_tokens": 136796544} +{"current_steps": 43475, "total_steps": 78105, "loss": 0.2476, "lr": 2.4423521876607628e-06, "epoch": 2.7831124767940594, "percentage": 55.66, "elapsed_time": "1:54:56", "remaining_time": "1:31:33", "throughput": 19839.36, "total_tokens": 136812672} +{"current_steps": 43480, "total_steps": 78105, "loss": 0.2369, "lr": 2.441793685300416e-06, "epoch": 2.7834325587350364, "percentage": 55.67, "elapsed_time": "1:54:56", "remaining_time": "1:31:32", "throughput": 19839.68, "total_tokens": 136828224} +{"current_steps": 43485, "total_steps": 78105, "loss": 0.2058, "lr": 2.441235185846589e-06, "epoch": 2.783752640676013, "percentage": 55.68, "elapsed_time": "1:54:57", "remaining_time": "1:31:31", "throughput": 19839.93, "total_tokens": 136842624} +{"current_steps": 43490, "total_steps": 78105, "loss": 0.1918, "lr": 2.4406766893271714e-06, "epoch": 2.78407272261699, "percentage": 55.68, "elapsed_time": "1:54:58", "remaining_time": "1:31:30", "throughput": 19840.25, "total_tokens": 136858368} +{"current_steps": 43495, "total_steps": 78105, "loss": 0.2121, "lr": 2.4401181957700514e-06, "epoch": 2.784392804557967, "percentage": 55.69, "elapsed_time": "1:54:58", "remaining_time": "1:31:29", "throughput": 19840.51, "total_tokens": 136872704} +{"current_steps": 43500, "total_steps": 78105, "loss": 0.2706, "lr": 2.4395597052031157e-06, "epoch": 2.784712886498944, "percentage": 55.69, "elapsed_time": "1:54:59", "remaining_time": "1:31:28", "throughput": 19840.88, "total_tokens": 136889088} +{"current_steps": 43505, "total_steps": 78105, "loss": 0.2903, "lr": 2.439001217654255e-06, "epoch": 2.7850329684399204, "percentage": 55.7, "elapsed_time": "1:55:00", "remaining_time": "1:31:27", "throughput": 19841.24, "total_tokens": 136904960} +{"current_steps": 43510, "total_steps": 78105, "loss": 0.1893, "lr": 2.438442733151354e-06, "epoch": 2.7853530503808974, "percentage": 55.71, "elapsed_time": "1:55:00", "remaining_time": "1:31:26", "throughput": 19841.5, "total_tokens": 136919360} +{"current_steps": 43515, "total_steps": 78105, "loss": 0.1027, "lr": 2.437884251722303e-06, "epoch": 2.7856731323218744, "percentage": 55.71, "elapsed_time": "1:55:01", "remaining_time": "1:31:25", "throughput": 19841.77, "total_tokens": 136934208} +{"current_steps": 43520, "total_steps": 78105, "loss": 0.1721, "lr": 2.437325773394989e-06, "epoch": 2.7859932142628514, "percentage": 55.72, "elapsed_time": "1:55:01", "remaining_time": "1:31:24", "throughput": 19842.12, "total_tokens": 136950208} +{"current_steps": 43525, "total_steps": 78105, "loss": 0.2443, "lr": 2.436767298197299e-06, "epoch": 2.7863132962038284, "percentage": 55.73, "elapsed_time": "1:55:02", "remaining_time": "1:31:24", "throughput": 19842.47, "total_tokens": 136966208} +{"current_steps": 43530, "total_steps": 78105, "loss": 0.2573, "lr": 2.4362088261571218e-06, "epoch": 2.786633378144805, "percentage": 55.73, "elapsed_time": "1:55:03", "remaining_time": "1:31:23", "throughput": 19842.84, "total_tokens": 136982528} +{"current_steps": 43535, "total_steps": 78105, "loss": 0.2591, "lr": 2.4356503573023425e-06, "epoch": 2.786953460085782, "percentage": 55.74, "elapsed_time": "1:55:04", "remaining_time": "1:31:22", "throughput": 19843.07, "total_tokens": 136996928} +{"current_steps": 43540, "total_steps": 78105, "loss": 0.2069, "lr": 2.4350918916608497e-06, "epoch": 2.787273542026759, "percentage": 55.75, "elapsed_time": "1:55:04", "remaining_time": "1:31:21", "throughput": 19843.36, "total_tokens": 137011840} +{"current_steps": 43545, "total_steps": 78105, "loss": 0.2047, "lr": 2.4345334292605293e-06, "epoch": 2.7875936239677355, "percentage": 55.75, "elapsed_time": "1:55:05", "remaining_time": "1:31:20", "throughput": 19843.67, "total_tokens": 137027072} +{"current_steps": 43550, "total_steps": 78105, "loss": 0.2738, "lr": 2.433974970129269e-06, "epoch": 2.7879137059087125, "percentage": 55.76, "elapsed_time": "1:55:05", "remaining_time": "1:31:19", "throughput": 19843.96, "total_tokens": 137041856} +{"current_steps": 43555, "total_steps": 78105, "loss": 0.197, "lr": 2.4334165142949545e-06, "epoch": 2.7882337878496894, "percentage": 55.76, "elapsed_time": "1:55:06", "remaining_time": "1:31:18", "throughput": 19844.31, "total_tokens": 137058176} +{"current_steps": 43560, "total_steps": 78105, "loss": 0.2104, "lr": 2.432858061785473e-06, "epoch": 2.7885538697906664, "percentage": 55.77, "elapsed_time": "1:55:07", "remaining_time": "1:31:17", "throughput": 19844.64, "total_tokens": 137073920} +{"current_steps": 43565, "total_steps": 78105, "loss": 0.3731, "lr": 2.432299612628709e-06, "epoch": 2.7888739517316434, "percentage": 55.78, "elapsed_time": "1:55:08", "remaining_time": "1:31:16", "throughput": 19845.05, "total_tokens": 137091008} +{"current_steps": 43570, "total_steps": 78105, "loss": 0.2411, "lr": 2.4317411668525505e-06, "epoch": 2.7891940336726204, "percentage": 55.78, "elapsed_time": "1:55:08", "remaining_time": "1:31:16", "throughput": 19845.35, "total_tokens": 137106112} +{"current_steps": 43575, "total_steps": 78105, "loss": 0.3352, "lr": 2.4311827244848824e-06, "epoch": 2.789514115613597, "percentage": 55.79, "elapsed_time": "1:55:09", "remaining_time": "1:31:15", "throughput": 19845.65, "total_tokens": 137120960} +{"current_steps": 43580, "total_steps": 78105, "loss": 0.1853, "lr": 2.43062428555359e-06, "epoch": 2.789834197554574, "percentage": 55.8, "elapsed_time": "1:55:10", "remaining_time": "1:31:14", "throughput": 19845.99, "total_tokens": 137136896} +{"current_steps": 43585, "total_steps": 78105, "loss": 0.1409, "lr": 2.430065850086561e-06, "epoch": 2.790154279495551, "percentage": 55.8, "elapsed_time": "1:55:10", "remaining_time": "1:31:13", "throughput": 19846.29, "total_tokens": 137152448} +{"current_steps": 43590, "total_steps": 78105, "loss": 0.1979, "lr": 2.4295074181116777e-06, "epoch": 2.7904743614365275, "percentage": 55.81, "elapsed_time": "1:55:11", "remaining_time": "1:31:12", "throughput": 19846.69, "total_tokens": 137169472} +{"current_steps": 43595, "total_steps": 78105, "loss": 0.2905, "lr": 2.4289489896568273e-06, "epoch": 2.7907944433775045, "percentage": 55.82, "elapsed_time": "1:55:12", "remaining_time": "1:31:11", "throughput": 19847.03, "total_tokens": 137185344} +{"current_steps": 43600, "total_steps": 78105, "loss": 0.2394, "lr": 2.4283905647498943e-06, "epoch": 2.7911145253184815, "percentage": 55.82, "elapsed_time": "1:55:12", "remaining_time": "1:31:10", "throughput": 19847.47, "total_tokens": 137202816} +{"current_steps": 43605, "total_steps": 78105, "loss": 0.172, "lr": 2.427832143418764e-06, "epoch": 2.7914346072594585, "percentage": 55.83, "elapsed_time": "1:55:13", "remaining_time": "1:31:09", "throughput": 19847.79, "total_tokens": 137218432} +{"current_steps": 43610, "total_steps": 78105, "loss": 0.3095, "lr": 2.4272737256913194e-06, "epoch": 2.7917546892004355, "percentage": 55.84, "elapsed_time": "1:55:14", "remaining_time": "1:31:09", "throughput": 19848.23, "total_tokens": 137235776} +{"current_steps": 43615, "total_steps": 78105, "loss": 0.2425, "lr": 2.4267153115954474e-06, "epoch": 2.7920747711414124, "percentage": 55.84, "elapsed_time": "1:55:14", "remaining_time": "1:31:08", "throughput": 19848.55, "total_tokens": 137251328} +{"current_steps": 43620, "total_steps": 78105, "loss": 0.1534, "lr": 2.4261569011590296e-06, "epoch": 2.792394853082389, "percentage": 55.85, "elapsed_time": "1:55:15", "remaining_time": "1:31:07", "throughput": 19848.82, "total_tokens": 137266176} +{"current_steps": 43625, "total_steps": 78105, "loss": 0.2048, "lr": 2.425598494409952e-06, "epoch": 2.792714935023366, "percentage": 55.85, "elapsed_time": "1:55:16", "remaining_time": "1:31:06", "throughput": 19849.13, "total_tokens": 137281600} +{"current_steps": 43630, "total_steps": 78105, "loss": 0.2732, "lr": 2.4250400913760982e-06, "epoch": 2.793035016964343, "percentage": 55.86, "elapsed_time": "1:55:16", "remaining_time": "1:31:05", "throughput": 19849.42, "total_tokens": 137296448} +{"current_steps": 43635, "total_steps": 78105, "loss": 0.2189, "lr": 2.4244816920853516e-06, "epoch": 2.7933550989053195, "percentage": 55.87, "elapsed_time": "1:55:17", "remaining_time": "1:31:04", "throughput": 19849.83, "total_tokens": 137313152} +{"current_steps": 43640, "total_steps": 78105, "loss": 0.2186, "lr": 2.423923296565597e-06, "epoch": 2.7936751808462965, "percentage": 55.87, "elapsed_time": "1:55:18", "remaining_time": "1:31:03", "throughput": 19850.09, "total_tokens": 137328128} +{"current_steps": 43645, "total_steps": 78105, "loss": 0.1657, "lr": 2.423364904844715e-06, "epoch": 2.7939952627872735, "percentage": 55.88, "elapsed_time": "1:55:18", "remaining_time": "1:31:02", "throughput": 19850.36, "total_tokens": 137342592} +{"current_steps": 43650, "total_steps": 78105, "loss": 0.1955, "lr": 2.4228065169505923e-06, "epoch": 2.7943153447282505, "percentage": 55.89, "elapsed_time": "1:55:19", "remaining_time": "1:31:01", "throughput": 19850.68, "total_tokens": 137357760} +{"current_steps": 43655, "total_steps": 78105, "loss": 0.1772, "lr": 2.422248132911109e-06, "epoch": 2.7946354266692275, "percentage": 55.89, "elapsed_time": "1:55:20", "remaining_time": "1:31:01", "throughput": 19851.02, "total_tokens": 137373504} +{"current_steps": 43660, "total_steps": 78105, "loss": 0.2534, "lr": 2.421689752754149e-06, "epoch": 2.7949555086102045, "percentage": 55.9, "elapsed_time": "1:55:20", "remaining_time": "1:31:00", "throughput": 19851.33, "total_tokens": 137388992} +{"current_steps": 43665, "total_steps": 78105, "loss": 0.1881, "lr": 2.4211313765075945e-06, "epoch": 2.795275590551181, "percentage": 55.91, "elapsed_time": "1:55:21", "remaining_time": "1:30:59", "throughput": 19851.64, "total_tokens": 137404416} +{"current_steps": 43670, "total_steps": 78105, "loss": 0.1914, "lr": 2.4205730041993294e-06, "epoch": 2.795595672492158, "percentage": 55.91, "elapsed_time": "1:55:22", "remaining_time": "1:30:58", "throughput": 19851.87, "total_tokens": 137418304} +{"current_steps": 43675, "total_steps": 78105, "loss": 0.209, "lr": 2.4200146358572333e-06, "epoch": 2.795915754433135, "percentage": 55.92, "elapsed_time": "1:55:22", "remaining_time": "1:30:57", "throughput": 19852.27, "total_tokens": 137435008} +{"current_steps": 43680, "total_steps": 78105, "loss": 0.2195, "lr": 2.4194562715091906e-06, "epoch": 2.7962358363741115, "percentage": 55.92, "elapsed_time": "1:55:23", "remaining_time": "1:30:56", "throughput": 19852.55, "total_tokens": 137449856} +{"current_steps": 43685, "total_steps": 78105, "loss": 0.2168, "lr": 2.418897911183082e-06, "epoch": 2.7965559183150885, "percentage": 55.93, "elapsed_time": "1:55:24", "remaining_time": "1:30:55", "throughput": 19852.86, "total_tokens": 137465088} +{"current_steps": 43690, "total_steps": 78105, "loss": 0.1584, "lr": 2.4183395549067885e-06, "epoch": 2.7968760002560655, "percentage": 55.94, "elapsed_time": "1:55:24", "remaining_time": "1:30:54", "throughput": 19853.17, "total_tokens": 137480512} +{"current_steps": 43695, "total_steps": 78105, "loss": 0.2449, "lr": 2.417781202708194e-06, "epoch": 2.7971960821970425, "percentage": 55.94, "elapsed_time": "1:55:25", "remaining_time": "1:30:53", "throughput": 19853.42, "total_tokens": 137494976} +{"current_steps": 43700, "total_steps": 78105, "loss": 0.2626, "lr": 2.4172228546151763e-06, "epoch": 2.7975161641380195, "percentage": 55.95, "elapsed_time": "1:55:26", "remaining_time": "1:30:52", "throughput": 19853.81, "total_tokens": 137511552} +{"current_steps": 43705, "total_steps": 78105, "loss": 0.1722, "lr": 2.4166645106556195e-06, "epoch": 2.797836246078996, "percentage": 55.96, "elapsed_time": "1:55:26", "remaining_time": "1:30:52", "throughput": 19854.12, "total_tokens": 137526784} +{"current_steps": 43710, "total_steps": 78105, "loss": 0.2264, "lr": 2.4161061708574017e-06, "epoch": 2.798156328019973, "percentage": 55.96, "elapsed_time": "1:55:27", "remaining_time": "1:30:51", "throughput": 19854.41, "total_tokens": 137541952} +{"current_steps": 43715, "total_steps": 78105, "loss": 0.1979, "lr": 2.415547835248405e-06, "epoch": 2.79847640996095, "percentage": 55.97, "elapsed_time": "1:55:28", "remaining_time": "1:30:50", "throughput": 19854.78, "total_tokens": 137558144} +{"current_steps": 43720, "total_steps": 78105, "loss": 0.3175, "lr": 2.4149895038565094e-06, "epoch": 2.798796491901927, "percentage": 55.98, "elapsed_time": "1:55:28", "remaining_time": "1:30:49", "throughput": 19855.14, "total_tokens": 137574016} +{"current_steps": 43725, "total_steps": 78105, "loss": 0.2088, "lr": 2.4144311767095965e-06, "epoch": 2.7991165738429036, "percentage": 55.98, "elapsed_time": "1:55:29", "remaining_time": "1:30:48", "throughput": 19855.43, "total_tokens": 137589120} +{"current_steps": 43730, "total_steps": 78105, "loss": 0.3295, "lr": 2.4138728538355433e-06, "epoch": 2.7994366557838806, "percentage": 55.99, "elapsed_time": "1:55:30", "remaining_time": "1:30:47", "throughput": 19855.78, "total_tokens": 137605120} +{"current_steps": 43735, "total_steps": 78105, "loss": 0.2233, "lr": 2.4133145352622327e-06, "epoch": 2.7997567377248576, "percentage": 56.0, "elapsed_time": "1:55:30", "remaining_time": "1:30:46", "throughput": 19856.21, "total_tokens": 137622528} +{"current_steps": 43740, "total_steps": 78105, "loss": 0.1735, "lr": 2.412756221017541e-06, "epoch": 2.8000768196658345, "percentage": 56.0, "elapsed_time": "1:55:31", "remaining_time": "1:30:45", "throughput": 19856.52, "total_tokens": 137638016} +{"current_steps": 43745, "total_steps": 78105, "loss": 0.1693, "lr": 2.4121979111293497e-06, "epoch": 2.8003969016068115, "percentage": 56.01, "elapsed_time": "1:55:32", "remaining_time": "1:30:45", "throughput": 19856.82, "total_tokens": 137653376} +{"current_steps": 43750, "total_steps": 78105, "loss": 0.2275, "lr": 2.411639605625538e-06, "epoch": 2.800716983547788, "percentage": 56.01, "elapsed_time": "1:55:32", "remaining_time": "1:30:44", "throughput": 19857.18, "total_tokens": 137669632} +{"current_steps": 43755, "total_steps": 78105, "loss": 0.2669, "lr": 2.411081304533983e-06, "epoch": 2.801037065488765, "percentage": 56.02, "elapsed_time": "1:55:33", "remaining_time": "1:30:43", "throughput": 19857.51, "total_tokens": 137685568} +{"current_steps": 43760, "total_steps": 78105, "loss": 0.2473, "lr": 2.410523007882566e-06, "epoch": 2.801357147429742, "percentage": 56.03, "elapsed_time": "1:55:34", "remaining_time": "1:30:42", "throughput": 19857.81, "total_tokens": 137700864} +{"current_steps": 43765, "total_steps": 78105, "loss": 0.215, "lr": 2.409964715699163e-06, "epoch": 2.801677229370719, "percentage": 56.03, "elapsed_time": "1:55:35", "remaining_time": "1:30:41", "throughput": 19858.15, "total_tokens": 137717056} +{"current_steps": 43770, "total_steps": 78105, "loss": 0.267, "lr": 2.4094064280116533e-06, "epoch": 2.8019973113116956, "percentage": 56.04, "elapsed_time": "1:55:35", "remaining_time": "1:30:40", "throughput": 19858.47, "total_tokens": 137732096} +{"current_steps": 43775, "total_steps": 78105, "loss": 0.3291, "lr": 2.4088481448479146e-06, "epoch": 2.8023173932526726, "percentage": 56.05, "elapsed_time": "1:55:36", "remaining_time": "1:30:39", "throughput": 19858.82, "total_tokens": 137748352} +{"current_steps": 43780, "total_steps": 78105, "loss": 0.2483, "lr": 2.408289866235825e-06, "epoch": 2.8026374751936496, "percentage": 56.05, "elapsed_time": "1:55:37", "remaining_time": "1:30:38", "throughput": 19859.16, "total_tokens": 137764096} +{"current_steps": 43785, "total_steps": 78105, "loss": 0.2571, "lr": 2.4077315922032617e-06, "epoch": 2.8029575571346266, "percentage": 56.06, "elapsed_time": "1:55:37", "remaining_time": "1:30:37", "throughput": 19859.49, "total_tokens": 137779712} +{"current_steps": 43790, "total_steps": 78105, "loss": 0.1611, "lr": 2.4071733227781027e-06, "epoch": 2.8032776390756036, "percentage": 56.07, "elapsed_time": "1:55:38", "remaining_time": "1:30:37", "throughput": 19859.76, "total_tokens": 137794496} +{"current_steps": 43795, "total_steps": 78105, "loss": 0.3214, "lr": 2.4066150579882234e-06, "epoch": 2.80359772101658, "percentage": 56.07, "elapsed_time": "1:55:39", "remaining_time": "1:30:36", "throughput": 19860.08, "total_tokens": 137809792} +{"current_steps": 43800, "total_steps": 78105, "loss": 0.2097, "lr": 2.406056797861502e-06, "epoch": 2.803917802957557, "percentage": 56.08, "elapsed_time": "1:55:39", "remaining_time": "1:30:35", "throughput": 19860.42, "total_tokens": 137826048} +{"current_steps": 43805, "total_steps": 78105, "loss": 0.2012, "lr": 2.4054985424258146e-06, "epoch": 2.804237884898534, "percentage": 56.08, "elapsed_time": "1:55:40", "remaining_time": "1:30:34", "throughput": 19860.74, "total_tokens": 137841536} +{"current_steps": 43810, "total_steps": 78105, "loss": 0.1748, "lr": 2.4049402917090375e-06, "epoch": 2.8045579668395106, "percentage": 56.09, "elapsed_time": "1:55:41", "remaining_time": "1:30:33", "throughput": 19861.03, "total_tokens": 137856640} +{"current_steps": 43815, "total_steps": 78105, "loss": 0.2603, "lr": 2.4043820457390483e-06, "epoch": 2.8048780487804876, "percentage": 56.1, "elapsed_time": "1:55:41", "remaining_time": "1:30:32", "throughput": 19861.37, "total_tokens": 137872576} +{"current_steps": 43820, "total_steps": 78105, "loss": 0.1837, "lr": 2.4038238045437203e-06, "epoch": 2.8051981307214646, "percentage": 56.1, "elapsed_time": "1:55:42", "remaining_time": "1:30:31", "throughput": 19861.81, "total_tokens": 137890432} +{"current_steps": 43825, "total_steps": 78105, "loss": 0.25, "lr": 2.4032655681509315e-06, "epoch": 2.8055182126624416, "percentage": 56.11, "elapsed_time": "1:55:43", "remaining_time": "1:30:30", "throughput": 19862.15, "total_tokens": 137906112} +{"current_steps": 43830, "total_steps": 78105, "loss": 0.2715, "lr": 2.402707336588556e-06, "epoch": 2.8058382946034186, "percentage": 56.12, "elapsed_time": "1:55:43", "remaining_time": "1:30:30", "throughput": 19862.45, "total_tokens": 137921472} +{"current_steps": 43835, "total_steps": 78105, "loss": 0.3797, "lr": 2.4021491098844697e-06, "epoch": 2.8061583765443956, "percentage": 56.12, "elapsed_time": "1:55:44", "remaining_time": "1:30:29", "throughput": 19862.83, "total_tokens": 137938368} +{"current_steps": 43840, "total_steps": 78105, "loss": 0.2277, "lr": 2.4015908880665463e-06, "epoch": 2.806478458485372, "percentage": 56.13, "elapsed_time": "1:55:45", "remaining_time": "1:30:28", "throughput": 19863.22, "total_tokens": 137955008} +{"current_steps": 43845, "total_steps": 78105, "loss": 0.2341, "lr": 2.4010326711626625e-06, "epoch": 2.806798540426349, "percentage": 56.14, "elapsed_time": "1:55:45", "remaining_time": "1:30:27", "throughput": 19863.54, "total_tokens": 137970432} +{"current_steps": 43850, "total_steps": 78105, "loss": 0.2173, "lr": 2.400474459200691e-06, "epoch": 2.807118622367326, "percentage": 56.14, "elapsed_time": "1:55:46", "remaining_time": "1:30:26", "throughput": 19863.87, "total_tokens": 137986112} +{"current_steps": 43855, "total_steps": 78105, "loss": 0.2485, "lr": 2.3999162522085062e-06, "epoch": 2.8074387043083027, "percentage": 56.15, "elapsed_time": "1:55:47", "remaining_time": "1:30:25", "throughput": 19864.16, "total_tokens": 138001024} +{"current_steps": 43860, "total_steps": 78105, "loss": 0.2806, "lr": 2.3993580502139825e-06, "epoch": 2.8077587862492797, "percentage": 56.16, "elapsed_time": "1:55:47", "remaining_time": "1:30:24", "throughput": 19864.45, "total_tokens": 138016384} +{"current_steps": 43865, "total_steps": 78105, "loss": 0.2032, "lr": 2.3987998532449937e-06, "epoch": 2.8080788681902566, "percentage": 56.16, "elapsed_time": "1:55:48", "remaining_time": "1:30:23", "throughput": 19864.93, "total_tokens": 138034560} +{"current_steps": 43870, "total_steps": 78105, "loss": 0.2277, "lr": 2.3982416613294137e-06, "epoch": 2.8083989501312336, "percentage": 56.17, "elapsed_time": "1:55:49", "remaining_time": "1:30:23", "throughput": 19865.22, "total_tokens": 138049536} +{"current_steps": 43875, "total_steps": 78105, "loss": 0.1843, "lr": 2.3976834744951144e-06, "epoch": 2.8087190320722106, "percentage": 56.17, "elapsed_time": "1:55:49", "remaining_time": "1:30:22", "throughput": 19865.54, "total_tokens": 138065088} +{"current_steps": 43880, "total_steps": 78105, "loss": 0.1947, "lr": 2.3971252927699705e-06, "epoch": 2.8090391140131876, "percentage": 56.18, "elapsed_time": "1:55:50", "remaining_time": "1:30:21", "throughput": 19865.9, "total_tokens": 138081152} +{"current_steps": 43885, "total_steps": 78105, "loss": 0.2614, "lr": 2.3965671161818522e-06, "epoch": 2.809359195954164, "percentage": 56.19, "elapsed_time": "1:55:51", "remaining_time": "1:30:20", "throughput": 19866.24, "total_tokens": 138096896} +{"current_steps": 43890, "total_steps": 78105, "loss": 0.2865, "lr": 2.396008944758634e-06, "epoch": 2.809679277895141, "percentage": 56.19, "elapsed_time": "1:55:51", "remaining_time": "1:30:19", "throughput": 19866.52, "total_tokens": 138112000} +{"current_steps": 43895, "total_steps": 78105, "loss": 0.1542, "lr": 2.395450778528187e-06, "epoch": 2.809999359836118, "percentage": 56.2, "elapsed_time": "1:55:52", "remaining_time": "1:30:18", "throughput": 19866.85, "total_tokens": 138127808} +{"current_steps": 43900, "total_steps": 78105, "loss": 0.2239, "lr": 2.394892617518385e-06, "epoch": 2.8103194417770947, "percentage": 56.21, "elapsed_time": "1:55:53", "remaining_time": "1:30:17", "throughput": 19867.13, "total_tokens": 138142784} +{"current_steps": 43905, "total_steps": 78105, "loss": 0.2381, "lr": 2.3943344617570967e-06, "epoch": 2.8106395237180717, "percentage": 56.21, "elapsed_time": "1:55:53", "remaining_time": "1:30:16", "throughput": 19867.41, "total_tokens": 138157696} +{"current_steps": 43910, "total_steps": 78105, "loss": 0.2195, "lr": 2.3937763112721953e-06, "epoch": 2.8109596056590487, "percentage": 56.22, "elapsed_time": "1:55:54", "remaining_time": "1:30:15", "throughput": 19867.74, "total_tokens": 138173120} +{"current_steps": 43915, "total_steps": 78105, "loss": 0.2862, "lr": 2.3932181660915514e-06, "epoch": 2.8112796876000257, "percentage": 56.23, "elapsed_time": "1:55:55", "remaining_time": "1:30:15", "throughput": 19868.12, "total_tokens": 138189568} +{"current_steps": 43920, "total_steps": 78105, "loss": 0.2011, "lr": 2.3926600262430362e-06, "epoch": 2.8115997695410027, "percentage": 56.23, "elapsed_time": "1:55:55", "remaining_time": "1:30:14", "throughput": 19868.43, "total_tokens": 138204736} +{"current_steps": 43925, "total_steps": 78105, "loss": 0.1736, "lr": 2.392101891754521e-06, "epoch": 2.8119198514819796, "percentage": 56.24, "elapsed_time": "1:55:56", "remaining_time": "1:30:13", "throughput": 19868.78, "total_tokens": 138220544} +{"current_steps": 43930, "total_steps": 78105, "loss": 0.1633, "lr": 2.3915437626538743e-06, "epoch": 2.812239933422956, "percentage": 56.24, "elapsed_time": "1:55:57", "remaining_time": "1:30:12", "throughput": 19869.02, "total_tokens": 138234816} +{"current_steps": 43935, "total_steps": 78105, "loss": 0.2308, "lr": 2.3909856389689683e-06, "epoch": 2.812560015363933, "percentage": 56.25, "elapsed_time": "1:55:57", "remaining_time": "1:30:11", "throughput": 19869.32, "total_tokens": 138250048} +{"current_steps": 43940, "total_steps": 78105, "loss": 0.2256, "lr": 2.3904275207276707e-06, "epoch": 2.81288009730491, "percentage": 56.26, "elapsed_time": "1:55:58", "remaining_time": "1:30:10", "throughput": 19869.6, "total_tokens": 138265088} +{"current_steps": 43945, "total_steps": 78105, "loss": 0.212, "lr": 2.3898694079578522e-06, "epoch": 2.8132001792458867, "percentage": 56.26, "elapsed_time": "1:55:59", "remaining_time": "1:30:09", "throughput": 19869.86, "total_tokens": 138279744} +{"current_steps": 43950, "total_steps": 78105, "loss": 0.2526, "lr": 2.3893113006873813e-06, "epoch": 2.8135202611868637, "percentage": 56.27, "elapsed_time": "1:55:59", "remaining_time": "1:30:08", "throughput": 19870.23, "total_tokens": 138296128} +{"current_steps": 43955, "total_steps": 78105, "loss": 0.2185, "lr": 2.388753198944129e-06, "epoch": 2.8138403431278407, "percentage": 56.28, "elapsed_time": "1:56:00", "remaining_time": "1:30:07", "throughput": 19870.57, "total_tokens": 138312064} +{"current_steps": 43960, "total_steps": 78105, "loss": 0.2491, "lr": 2.388195102755961e-06, "epoch": 2.8141604250688177, "percentage": 56.28, "elapsed_time": "1:56:01", "remaining_time": "1:30:07", "throughput": 19870.85, "total_tokens": 138327104} +{"current_steps": 43965, "total_steps": 78105, "loss": 0.2342, "lr": 2.3876370121507484e-06, "epoch": 2.8144805070097947, "percentage": 56.29, "elapsed_time": "1:56:01", "remaining_time": "1:30:06", "throughput": 19871.19, "total_tokens": 138342976} +{"current_steps": 43970, "total_steps": 78105, "loss": 0.242, "lr": 2.3870789271563567e-06, "epoch": 2.8148005889507712, "percentage": 56.3, "elapsed_time": "1:56:02", "remaining_time": "1:30:05", "throughput": 19871.59, "total_tokens": 138359872} +{"current_steps": 43975, "total_steps": 78105, "loss": 0.3118, "lr": 2.3865208478006565e-06, "epoch": 2.8151206708917482, "percentage": 56.3, "elapsed_time": "1:56:03", "remaining_time": "1:30:04", "throughput": 19871.92, "total_tokens": 138375808} +{"current_steps": 43980, "total_steps": 78105, "loss": 0.206, "lr": 2.3859627741115127e-06, "epoch": 2.815440752832725, "percentage": 56.31, "elapsed_time": "1:56:04", "remaining_time": "1:30:03", "throughput": 19872.22, "total_tokens": 138390976} +{"current_steps": 43985, "total_steps": 78105, "loss": 0.1974, "lr": 2.3854047061167944e-06, "epoch": 2.815760834773702, "percentage": 56.32, "elapsed_time": "1:56:04", "remaining_time": "1:30:02", "throughput": 19872.52, "total_tokens": 138406464} +{"current_steps": 43990, "total_steps": 78105, "loss": 0.2446, "lr": 2.3848466438443694e-06, "epoch": 2.8160809167146788, "percentage": 56.32, "elapsed_time": "1:56:05", "remaining_time": "1:30:01", "throughput": 19872.84, "total_tokens": 138422272} +{"current_steps": 43995, "total_steps": 78105, "loss": 0.1551, "lr": 2.3842885873221016e-06, "epoch": 2.8164009986556557, "percentage": 56.33, "elapsed_time": "1:56:06", "remaining_time": "1:30:00", "throughput": 19873.16, "total_tokens": 138437888} +{"current_steps": 44000, "total_steps": 78105, "loss": 0.1492, "lr": 2.3837305365778602e-06, "epoch": 2.8167210805966327, "percentage": 56.33, "elapsed_time": "1:56:06", "remaining_time": "1:30:00", "throughput": 19873.51, "total_tokens": 138453952} +{"current_steps": 44005, "total_steps": 78105, "loss": 0.2079, "lr": 2.383172491639509e-06, "epoch": 2.8170411625376097, "percentage": 56.34, "elapsed_time": "1:56:07", "remaining_time": "1:29:59", "throughput": 19873.81, "total_tokens": 138469120} +{"current_steps": 44010, "total_steps": 78105, "loss": 0.2003, "lr": 2.3826144525349163e-06, "epoch": 2.8173612444785867, "percentage": 56.35, "elapsed_time": "1:56:08", "remaining_time": "1:29:58", "throughput": 19874.11, "total_tokens": 138484608} +{"current_steps": 44015, "total_steps": 78105, "loss": 0.2145, "lr": 2.3820564192919456e-06, "epoch": 2.8176813264195633, "percentage": 56.35, "elapsed_time": "1:56:08", "remaining_time": "1:29:57", "throughput": 19874.41, "total_tokens": 138500160} +{"current_steps": 44020, "total_steps": 78105, "loss": 0.233, "lr": 2.381498391938464e-06, "epoch": 2.8180014083605402, "percentage": 56.36, "elapsed_time": "1:56:09", "remaining_time": "1:29:56", "throughput": 19874.72, "total_tokens": 138515584} +{"current_steps": 44025, "total_steps": 78105, "loss": 0.1965, "lr": 2.3809403705023342e-06, "epoch": 2.8183214903015172, "percentage": 56.37, "elapsed_time": "1:56:10", "remaining_time": "1:29:55", "throughput": 19875.05, "total_tokens": 138531456} +{"current_steps": 44030, "total_steps": 78105, "loss": 0.2553, "lr": 2.380382355011423e-06, "epoch": 2.8186415722424942, "percentage": 56.37, "elapsed_time": "1:56:10", "remaining_time": "1:29:54", "throughput": 19875.36, "total_tokens": 138546816} +{"current_steps": 44035, "total_steps": 78105, "loss": 0.2613, "lr": 2.3798243454935934e-06, "epoch": 2.8189616541834708, "percentage": 56.38, "elapsed_time": "1:56:11", "remaining_time": "1:29:53", "throughput": 19875.69, "total_tokens": 138562752} +{"current_steps": 44040, "total_steps": 78105, "loss": 0.1717, "lr": 2.3792663419767095e-06, "epoch": 2.8192817361244478, "percentage": 56.39, "elapsed_time": "1:56:12", "remaining_time": "1:29:52", "throughput": 19876.0, "total_tokens": 138577856} +{"current_steps": 44045, "total_steps": 78105, "loss": 0.1816, "lr": 2.3787083444886368e-06, "epoch": 2.8196018180654248, "percentage": 56.39, "elapsed_time": "1:56:12", "remaining_time": "1:29:52", "throughput": 19876.33, "total_tokens": 138593728} +{"current_steps": 44050, "total_steps": 78105, "loss": 0.2282, "lr": 2.3781503530572367e-06, "epoch": 2.8199219000064017, "percentage": 56.4, "elapsed_time": "1:56:13", "remaining_time": "1:29:51", "throughput": 19876.64, "total_tokens": 138609024} +{"current_steps": 44055, "total_steps": 78105, "loss": 0.1734, "lr": 2.3775923677103738e-06, "epoch": 2.8202419819473787, "percentage": 56.4, "elapsed_time": "1:56:14", "remaining_time": "1:29:50", "throughput": 19877.03, "total_tokens": 138625408} +{"current_steps": 44060, "total_steps": 78105, "loss": 0.2477, "lr": 2.37703438847591e-06, "epoch": 2.8205620638883553, "percentage": 56.41, "elapsed_time": "1:56:14", "remaining_time": "1:29:49", "throughput": 19877.45, "total_tokens": 138642688} +{"current_steps": 44065, "total_steps": 78105, "loss": 0.4014, "lr": 2.376476415381709e-06, "epoch": 2.8208821458293323, "percentage": 56.42, "elapsed_time": "1:56:15", "remaining_time": "1:29:48", "throughput": 19877.75, "total_tokens": 138657856} +{"current_steps": 44070, "total_steps": 78105, "loss": 0.176, "lr": 2.3759184484556317e-06, "epoch": 2.8212022277703093, "percentage": 56.42, "elapsed_time": "1:56:16", "remaining_time": "1:29:47", "throughput": 19878.03, "total_tokens": 138672640} +{"current_steps": 44075, "total_steps": 78105, "loss": 0.2552, "lr": 2.3753604877255422e-06, "epoch": 2.821522309711286, "percentage": 56.43, "elapsed_time": "1:56:16", "remaining_time": "1:29:46", "throughput": 19878.48, "total_tokens": 138690560} +{"current_steps": 44080, "total_steps": 78105, "loss": 0.2514, "lr": 2.374802533219299e-06, "epoch": 2.821842391652263, "percentage": 56.44, "elapsed_time": "1:56:17", "remaining_time": "1:29:45", "throughput": 19878.86, "total_tokens": 138707008} +{"current_steps": 44085, "total_steps": 78105, "loss": 0.1726, "lr": 2.374244584964766e-06, "epoch": 2.82216247359324, "percentage": 56.44, "elapsed_time": "1:56:18", "remaining_time": "1:29:45", "throughput": 19879.18, "total_tokens": 138722368} +{"current_steps": 44090, "total_steps": 78105, "loss": 0.1757, "lr": 2.3736866429898027e-06, "epoch": 2.822482555534217, "percentage": 56.45, "elapsed_time": "1:56:18", "remaining_time": "1:29:44", "throughput": 19879.5, "total_tokens": 138738112} +{"current_steps": 44095, "total_steps": 78105, "loss": 0.2233, "lr": 2.3731287073222705e-06, "epoch": 2.8228026374751938, "percentage": 56.46, "elapsed_time": "1:56:19", "remaining_time": "1:29:43", "throughput": 19879.78, "total_tokens": 138753280} +{"current_steps": 44100, "total_steps": 78105, "loss": 0.3008, "lr": 2.3725707779900313e-06, "epoch": 2.8231227194161708, "percentage": 56.46, "elapsed_time": "1:56:20", "remaining_time": "1:29:42", "throughput": 19880.33, "total_tokens": 138773120} +{"current_steps": 44105, "total_steps": 78105, "loss": 0.2513, "lr": 2.372012855020942e-06, "epoch": 2.8234428013571473, "percentage": 56.47, "elapsed_time": "1:56:21", "remaining_time": "1:29:41", "throughput": 19880.68, "total_tokens": 138788800} +{"current_steps": 44110, "total_steps": 78105, "loss": 0.1958, "lr": 2.371454938442865e-06, "epoch": 2.8237628832981243, "percentage": 56.48, "elapsed_time": "1:56:21", "remaining_time": "1:29:40", "throughput": 19881.01, "total_tokens": 138804672} +{"current_steps": 44115, "total_steps": 78105, "loss": 0.1894, "lr": 2.3708970282836585e-06, "epoch": 2.8240829652391013, "percentage": 56.48, "elapsed_time": "1:56:22", "remaining_time": "1:29:39", "throughput": 19881.29, "total_tokens": 138819520} +{"current_steps": 44120, "total_steps": 78105, "loss": 0.2009, "lr": 2.3703391245711823e-06, "epoch": 2.824403047180078, "percentage": 56.49, "elapsed_time": "1:56:23", "remaining_time": "1:29:38", "throughput": 19881.59, "total_tokens": 138834752} +{"current_steps": 44125, "total_steps": 78105, "loss": 0.2332, "lr": 2.369781227333294e-06, "epoch": 2.824723129121055, "percentage": 56.49, "elapsed_time": "1:56:23", "remaining_time": "1:29:38", "throughput": 19881.89, "total_tokens": 138850048} +{"current_steps": 44130, "total_steps": 78105, "loss": 0.1679, "lr": 2.369223336597854e-06, "epoch": 2.825043211062032, "percentage": 56.5, "elapsed_time": "1:56:24", "remaining_time": "1:29:37", "throughput": 19882.17, "total_tokens": 138865152} +{"current_steps": 44135, "total_steps": 78105, "loss": 0.2943, "lr": 2.3686654523927187e-06, "epoch": 2.825363293003009, "percentage": 56.51, "elapsed_time": "1:56:25", "remaining_time": "1:29:36", "throughput": 19882.65, "total_tokens": 138882880} +{"current_steps": 44140, "total_steps": 78105, "loss": 0.1469, "lr": 2.368107574745747e-06, "epoch": 2.825683374943986, "percentage": 56.51, "elapsed_time": "1:56:25", "remaining_time": "1:29:35", "throughput": 19882.94, "total_tokens": 138898112} +{"current_steps": 44145, "total_steps": 78105, "loss": 0.1727, "lr": 2.367549703684795e-06, "epoch": 2.826003456884963, "percentage": 56.52, "elapsed_time": "1:56:26", "remaining_time": "1:29:34", "throughput": 19883.32, "total_tokens": 138914752} +{"current_steps": 44150, "total_steps": 78105, "loss": 0.135, "lr": 2.3669918392377224e-06, "epoch": 2.8263235388259393, "percentage": 56.53, "elapsed_time": "1:56:27", "remaining_time": "1:29:33", "throughput": 19883.63, "total_tokens": 138930688} +{"current_steps": 44155, "total_steps": 78105, "loss": 0.1783, "lr": 2.3664339814323835e-06, "epoch": 2.8266436207669163, "percentage": 56.53, "elapsed_time": "1:56:27", "remaining_time": "1:29:32", "throughput": 19883.93, "total_tokens": 138945920} +{"current_steps": 44160, "total_steps": 78105, "loss": 0.1945, "lr": 2.3658761302966357e-06, "epoch": 2.8269637027078933, "percentage": 56.54, "elapsed_time": "1:56:28", "remaining_time": "1:29:31", "throughput": 19884.25, "total_tokens": 138961408} +{"current_steps": 44165, "total_steps": 78105, "loss": 0.1959, "lr": 2.3653182858583364e-06, "epoch": 2.82728378464887, "percentage": 56.55, "elapsed_time": "1:56:29", "remaining_time": "1:29:31", "throughput": 19884.68, "total_tokens": 138979008} +{"current_steps": 44170, "total_steps": 78105, "loss": 0.2449, "lr": 2.3647604481453393e-06, "epoch": 2.827603866589847, "percentage": 56.55, "elapsed_time": "1:56:29", "remaining_time": "1:29:30", "throughput": 19884.96, "total_tokens": 138994112} +{"current_steps": 44175, "total_steps": 78105, "loss": 0.3041, "lr": 2.3642026171855013e-06, "epoch": 2.827923948530824, "percentage": 56.56, "elapsed_time": "1:56:30", "remaining_time": "1:29:29", "throughput": 19885.3, "total_tokens": 139010176} +{"current_steps": 44180, "total_steps": 78105, "loss": 0.3095, "lr": 2.363644793006677e-06, "epoch": 2.828244030471801, "percentage": 56.56, "elapsed_time": "1:56:31", "remaining_time": "1:29:28", "throughput": 19885.63, "total_tokens": 139026048} +{"current_steps": 44185, "total_steps": 78105, "loss": 0.2471, "lr": 2.363086975636723e-06, "epoch": 2.828564112412778, "percentage": 56.57, "elapsed_time": "1:56:31", "remaining_time": "1:29:27", "throughput": 19885.95, "total_tokens": 139041856} +{"current_steps": 44190, "total_steps": 78105, "loss": 0.2208, "lr": 2.3625291651034905e-06, "epoch": 2.828884194353755, "percentage": 56.58, "elapsed_time": "1:56:32", "remaining_time": "1:29:26", "throughput": 19886.27, "total_tokens": 139057408} +{"current_steps": 44195, "total_steps": 78105, "loss": 0.1601, "lr": 2.361971361434837e-06, "epoch": 2.8292042762947314, "percentage": 56.58, "elapsed_time": "1:56:33", "remaining_time": "1:29:25", "throughput": 19886.59, "total_tokens": 139073344} +{"current_steps": 44200, "total_steps": 78105, "loss": 0.1999, "lr": 2.361413564658613e-06, "epoch": 2.8295243582357084, "percentage": 56.59, "elapsed_time": "1:56:34", "remaining_time": "1:29:25", "throughput": 19887.08, "total_tokens": 139092096} +{"current_steps": 44205, "total_steps": 78105, "loss": 0.2826, "lr": 2.3608557748026747e-06, "epoch": 2.8298444401766853, "percentage": 56.6, "elapsed_time": "1:56:34", "remaining_time": "1:29:24", "throughput": 19887.44, "total_tokens": 139108544} +{"current_steps": 44210, "total_steps": 78105, "loss": 0.2114, "lr": 2.360297991894874e-06, "epoch": 2.830164522117662, "percentage": 56.6, "elapsed_time": "1:56:35", "remaining_time": "1:29:23", "throughput": 19887.75, "total_tokens": 139124352} +{"current_steps": 44215, "total_steps": 78105, "loss": 0.1929, "lr": 2.3597402159630632e-06, "epoch": 2.830484604058639, "percentage": 56.61, "elapsed_time": "1:56:36", "remaining_time": "1:29:22", "throughput": 19888.03, "total_tokens": 139139072} +{"current_steps": 44220, "total_steps": 78105, "loss": 0.2083, "lr": 2.359182447035097e-06, "epoch": 2.830804685999616, "percentage": 56.62, "elapsed_time": "1:56:36", "remaining_time": "1:29:21", "throughput": 19888.38, "total_tokens": 139155392} +{"current_steps": 44225, "total_steps": 78105, "loss": 0.1707, "lr": 2.3586246851388243e-06, "epoch": 2.831124767940593, "percentage": 56.62, "elapsed_time": "1:56:37", "remaining_time": "1:29:20", "throughput": 19888.75, "total_tokens": 139171968} +{"current_steps": 44230, "total_steps": 78105, "loss": 0.1931, "lr": 2.3580669303021e-06, "epoch": 2.83144484988157, "percentage": 56.63, "elapsed_time": "1:56:38", "remaining_time": "1:29:19", "throughput": 19889.07, "total_tokens": 139187648} +{"current_steps": 44235, "total_steps": 78105, "loss": 0.3123, "lr": 2.3575091825527724e-06, "epoch": 2.831764931822547, "percentage": 56.64, "elapsed_time": "1:56:38", "remaining_time": "1:29:18", "throughput": 19889.41, "total_tokens": 139203584} +{"current_steps": 44240, "total_steps": 78105, "loss": 0.2331, "lr": 2.3569514419186953e-06, "epoch": 2.8320850137635234, "percentage": 56.64, "elapsed_time": "1:56:39", "remaining_time": "1:29:18", "throughput": 19889.69, "total_tokens": 139218688} +{"current_steps": 44245, "total_steps": 78105, "loss": 0.2012, "lr": 2.3563937084277174e-06, "epoch": 2.8324050957045004, "percentage": 56.65, "elapsed_time": "1:56:40", "remaining_time": "1:29:17", "throughput": 19889.99, "total_tokens": 139233920} +{"current_steps": 44250, "total_steps": 78105, "loss": 0.3717, "lr": 2.3558359821076903e-06, "epoch": 2.8327251776454774, "percentage": 56.65, "elapsed_time": "1:56:40", "remaining_time": "1:29:16", "throughput": 19890.41, "total_tokens": 139251392} +{"current_steps": 44255, "total_steps": 78105, "loss": 0.2139, "lr": 2.3552782629864625e-06, "epoch": 2.833045259586454, "percentage": 56.66, "elapsed_time": "1:56:41", "remaining_time": "1:29:15", "throughput": 19890.68, "total_tokens": 139266304} +{"current_steps": 44260, "total_steps": 78105, "loss": 0.1798, "lr": 2.3547205510918846e-06, "epoch": 2.833365341527431, "percentage": 56.67, "elapsed_time": "1:56:42", "remaining_time": "1:29:14", "throughput": 19891.02, "total_tokens": 139282304} +{"current_steps": 44265, "total_steps": 78105, "loss": 0.251, "lr": 2.3541628464518056e-06, "epoch": 2.833685423468408, "percentage": 56.67, "elapsed_time": "1:56:42", "remaining_time": "1:29:13", "throughput": 19891.37, "total_tokens": 139298816} +{"current_steps": 44270, "total_steps": 78105, "loss": 0.1523, "lr": 2.353605149094074e-06, "epoch": 2.834005505409385, "percentage": 56.68, "elapsed_time": "1:56:43", "remaining_time": "1:29:12", "throughput": 19891.67, "total_tokens": 139313920} +{"current_steps": 44275, "total_steps": 78105, "loss": 0.2123, "lr": 2.3530474590465403e-06, "epoch": 2.834325587350362, "percentage": 56.69, "elapsed_time": "1:56:44", "remaining_time": "1:29:11", "throughput": 19891.99, "total_tokens": 139329472} +{"current_steps": 44280, "total_steps": 78105, "loss": 0.2351, "lr": 2.3524897763370493e-06, "epoch": 2.8346456692913384, "percentage": 56.69, "elapsed_time": "1:56:44", "remaining_time": "1:29:11", "throughput": 19892.32, "total_tokens": 139344960} +{"current_steps": 44285, "total_steps": 78105, "loss": 0.3499, "lr": 2.3519321009934513e-06, "epoch": 2.8349657512323154, "percentage": 56.7, "elapsed_time": "1:56:45", "remaining_time": "1:29:10", "throughput": 19892.56, "total_tokens": 139359296} +{"current_steps": 44290, "total_steps": 78105, "loss": 0.1809, "lr": 2.351374433043593e-06, "epoch": 2.8352858331732924, "percentage": 56.71, "elapsed_time": "1:56:46", "remaining_time": "1:29:09", "throughput": 19892.85, "total_tokens": 139374528} +{"current_steps": 44295, "total_steps": 78105, "loss": 0.2806, "lr": 2.3508167725153216e-06, "epoch": 2.8356059151142694, "percentage": 56.71, "elapsed_time": "1:56:46", "remaining_time": "1:29:08", "throughput": 19893.25, "total_tokens": 139391488} +{"current_steps": 44300, "total_steps": 78105, "loss": 0.2491, "lr": 2.350259119436483e-06, "epoch": 2.835925997055246, "percentage": 56.72, "elapsed_time": "1:56:47", "remaining_time": "1:29:07", "throughput": 19893.5, "total_tokens": 139406272} +{"current_steps": 44305, "total_steps": 78105, "loss": 0.1931, "lr": 2.349701473834925e-06, "epoch": 2.836246078996223, "percentage": 56.72, "elapsed_time": "1:56:48", "remaining_time": "1:29:06", "throughput": 19893.78, "total_tokens": 139421312} +{"current_steps": 44310, "total_steps": 78105, "loss": 0.1616, "lr": 2.349143835738491e-06, "epoch": 2.8365661609372, "percentage": 56.73, "elapsed_time": "1:56:48", "remaining_time": "1:29:05", "throughput": 19894.05, "total_tokens": 139435840} +{"current_steps": 44315, "total_steps": 78105, "loss": 0.2365, "lr": 2.348586205175029e-06, "epoch": 2.836886242878177, "percentage": 56.74, "elapsed_time": "1:56:49", "remaining_time": "1:29:04", "throughput": 19894.4, "total_tokens": 139452032} +{"current_steps": 44320, "total_steps": 78105, "loss": 0.2702, "lr": 2.3480285821723826e-06, "epoch": 2.837206324819154, "percentage": 56.74, "elapsed_time": "1:56:50", "remaining_time": "1:29:03", "throughput": 19894.67, "total_tokens": 139466816} +{"current_steps": 44325, "total_steps": 78105, "loss": 0.2039, "lr": 2.347470966758398e-06, "epoch": 2.8375264067601305, "percentage": 56.75, "elapsed_time": "1:56:50", "remaining_time": "1:29:03", "throughput": 19895.0, "total_tokens": 139482240} +{"current_steps": 44330, "total_steps": 78105, "loss": 0.2812, "lr": 2.346913358960918e-06, "epoch": 2.8378464887011075, "percentage": 56.76, "elapsed_time": "1:56:51", "remaining_time": "1:29:02", "throughput": 19895.29, "total_tokens": 139497408} +{"current_steps": 44335, "total_steps": 78105, "loss": 0.2855, "lr": 2.3463557588077877e-06, "epoch": 2.8381665706420844, "percentage": 56.76, "elapsed_time": "1:56:52", "remaining_time": "1:29:01", "throughput": 19895.58, "total_tokens": 139512960} +{"current_steps": 44340, "total_steps": 78105, "loss": 0.218, "lr": 2.3457981663268513e-06, "epoch": 2.838486652583061, "percentage": 56.77, "elapsed_time": "1:56:52", "remaining_time": "1:29:00", "throughput": 19895.93, "total_tokens": 139529152} +{"current_steps": 44345, "total_steps": 78105, "loss": 0.2036, "lr": 2.34524058154595e-06, "epoch": 2.838806734524038, "percentage": 56.78, "elapsed_time": "1:56:53", "remaining_time": "1:28:59", "throughput": 19896.31, "total_tokens": 139545792} +{"current_steps": 44350, "total_steps": 78105, "loss": 0.2265, "lr": 2.344683004492929e-06, "epoch": 2.839126816465015, "percentage": 56.78, "elapsed_time": "1:56:54", "remaining_time": "1:28:58", "throughput": 19896.64, "total_tokens": 139561728} +{"current_steps": 44355, "total_steps": 78105, "loss": 0.2324, "lr": 2.344125435195629e-06, "epoch": 2.839446898405992, "percentage": 56.79, "elapsed_time": "1:56:54", "remaining_time": "1:28:57", "throughput": 19896.9, "total_tokens": 139576256} +{"current_steps": 44360, "total_steps": 78105, "loss": 0.1874, "lr": 2.343567873681894e-06, "epoch": 2.839766980346969, "percentage": 56.8, "elapsed_time": "1:56:55", "remaining_time": "1:28:56", "throughput": 19897.14, "total_tokens": 139590464} +{"current_steps": 44365, "total_steps": 78105, "loss": 0.2494, "lr": 2.343010319979563e-06, "epoch": 2.840087062287946, "percentage": 56.8, "elapsed_time": "1:56:56", "remaining_time": "1:28:55", "throughput": 19897.41, "total_tokens": 139605248} +{"current_steps": 44370, "total_steps": 78105, "loss": 0.2666, "lr": 2.3424527741164798e-06, "epoch": 2.8404071442289225, "percentage": 56.81, "elapsed_time": "1:56:56", "remaining_time": "1:28:55", "throughput": 19897.79, "total_tokens": 139621504} +{"current_steps": 44375, "total_steps": 78105, "loss": 0.3019, "lr": 2.3418952361204837e-06, "epoch": 2.8407272261698995, "percentage": 56.81, "elapsed_time": "1:56:57", "remaining_time": "1:28:54", "throughput": 19898.05, "total_tokens": 139636416} +{"current_steps": 44380, "total_steps": 78105, "loss": 0.2298, "lr": 2.341337706019417e-06, "epoch": 2.8410473081108765, "percentage": 56.82, "elapsed_time": "1:56:58", "remaining_time": "1:28:53", "throughput": 19898.39, "total_tokens": 139652480} +{"current_steps": 44385, "total_steps": 78105, "loss": 0.1877, "lr": 2.340780183841118e-06, "epoch": 2.841367390051853, "percentage": 56.83, "elapsed_time": "1:56:58", "remaining_time": "1:28:52", "throughput": 19898.67, "total_tokens": 139667584} +{"current_steps": 44390, "total_steps": 78105, "loss": 0.1671, "lr": 2.340222669613427e-06, "epoch": 2.84168747199283, "percentage": 56.83, "elapsed_time": "1:56:59", "remaining_time": "1:28:51", "throughput": 19898.97, "total_tokens": 139682816} +{"current_steps": 44395, "total_steps": 78105, "loss": 0.2568, "lr": 2.3396651633641854e-06, "epoch": 2.842007553933807, "percentage": 56.84, "elapsed_time": "1:57:00", "remaining_time": "1:28:50", "throughput": 19899.25, "total_tokens": 139697792} +{"current_steps": 44400, "total_steps": 78105, "loss": 0.2184, "lr": 2.3391076651212286e-06, "epoch": 2.842327635874784, "percentage": 56.85, "elapsed_time": "1:57:00", "remaining_time": "1:28:49", "throughput": 19899.64, "total_tokens": 139714496} +{"current_steps": 44405, "total_steps": 78105, "loss": 0.1701, "lr": 2.338550174912398e-06, "epoch": 2.842647717815761, "percentage": 56.85, "elapsed_time": "1:57:01", "remaining_time": "1:28:48", "throughput": 19899.97, "total_tokens": 139730432} +{"current_steps": 44410, "total_steps": 78105, "loss": 0.2167, "lr": 2.33799269276553e-06, "epoch": 2.842967799756738, "percentage": 56.86, "elapsed_time": "1:57:02", "remaining_time": "1:28:47", "throughput": 19900.27, "total_tokens": 139745600} +{"current_steps": 44415, "total_steps": 78105, "loss": 0.2717, "lr": 2.3374352187084638e-06, "epoch": 2.8432878816977145, "percentage": 56.87, "elapsed_time": "1:57:02", "remaining_time": "1:28:47", "throughput": 19900.59, "total_tokens": 139761152} +{"current_steps": 44420, "total_steps": 78105, "loss": 0.215, "lr": 2.336877752769035e-06, "epoch": 2.8436079636386915, "percentage": 56.87, "elapsed_time": "1:57:03", "remaining_time": "1:28:46", "throughput": 19900.89, "total_tokens": 139776512} +{"current_steps": 44425, "total_steps": 78105, "loss": 0.2383, "lr": 2.336320294975082e-06, "epoch": 2.8439280455796685, "percentage": 56.88, "elapsed_time": "1:57:04", "remaining_time": "1:28:45", "throughput": 19901.19, "total_tokens": 139791936} +{"current_steps": 44430, "total_steps": 78105, "loss": 0.1699, "lr": 2.3357628453544413e-06, "epoch": 2.844248127520645, "percentage": 56.88, "elapsed_time": "1:57:04", "remaining_time": "1:28:44", "throughput": 19901.48, "total_tokens": 139807360} +{"current_steps": 44435, "total_steps": 78105, "loss": 0.3171, "lr": 2.3352054039349485e-06, "epoch": 2.844568209461622, "percentage": 56.89, "elapsed_time": "1:57:05", "remaining_time": "1:28:43", "throughput": 19901.78, "total_tokens": 139822400} +{"current_steps": 44440, "total_steps": 78105, "loss": 0.1592, "lr": 2.334647970744439e-06, "epoch": 2.844888291402599, "percentage": 56.9, "elapsed_time": "1:57:06", "remaining_time": "1:28:42", "throughput": 19902.14, "total_tokens": 139838784} +{"current_steps": 44445, "total_steps": 78105, "loss": 0.183, "lr": 2.3340905458107483e-06, "epoch": 2.845208373343576, "percentage": 56.9, "elapsed_time": "1:57:07", "remaining_time": "1:28:41", "throughput": 19902.54, "total_tokens": 139856000} +{"current_steps": 44450, "total_steps": 78105, "loss": 0.1716, "lr": 2.333533129161713e-06, "epoch": 2.845528455284553, "percentage": 56.91, "elapsed_time": "1:57:07", "remaining_time": "1:28:40", "throughput": 19902.83, "total_tokens": 139871296} +{"current_steps": 44455, "total_steps": 78105, "loss": 0.5357, "lr": 2.332975720825164e-06, "epoch": 2.84584853722553, "percentage": 56.92, "elapsed_time": "1:57:08", "remaining_time": "1:28:40", "throughput": 19903.13, "total_tokens": 139886528} +{"current_steps": 44460, "total_steps": 78105, "loss": 0.2261, "lr": 2.3324183208289393e-06, "epoch": 2.8461686191665065, "percentage": 56.92, "elapsed_time": "1:57:09", "remaining_time": "1:28:39", "throughput": 19903.52, "total_tokens": 139903168} +{"current_steps": 44465, "total_steps": 78105, "loss": 0.244, "lr": 2.3318609292008697e-06, "epoch": 2.8464887011074835, "percentage": 56.93, "elapsed_time": "1:57:09", "remaining_time": "1:28:38", "throughput": 19903.84, "total_tokens": 139918848} +{"current_steps": 44470, "total_steps": 78105, "loss": 0.3307, "lr": 2.3313035459687906e-06, "epoch": 2.8468087830484605, "percentage": 56.94, "elapsed_time": "1:57:10", "remaining_time": "1:28:37", "throughput": 19904.17, "total_tokens": 139934976} +{"current_steps": 44475, "total_steps": 78105, "loss": 0.1675, "lr": 2.330746171160533e-06, "epoch": 2.847128864989437, "percentage": 56.94, "elapsed_time": "1:57:11", "remaining_time": "1:28:36", "throughput": 19904.56, "total_tokens": 139951616} +{"current_steps": 44480, "total_steps": 78105, "loss": 0.1356, "lr": 2.3301888048039306e-06, "epoch": 2.847448946930414, "percentage": 56.95, "elapsed_time": "1:57:11", "remaining_time": "1:28:35", "throughput": 19904.93, "total_tokens": 139968256} +{"current_steps": 44485, "total_steps": 78105, "loss": 0.1824, "lr": 2.3296314469268138e-06, "epoch": 2.847769028871391, "percentage": 56.96, "elapsed_time": "1:57:12", "remaining_time": "1:28:34", "throughput": 19905.24, "total_tokens": 139983616} +{"current_steps": 44490, "total_steps": 78105, "loss": 0.1731, "lr": 2.3290740975570155e-06, "epoch": 2.848089110812368, "percentage": 56.96, "elapsed_time": "1:57:13", "remaining_time": "1:28:34", "throughput": 19905.57, "total_tokens": 139999232} +{"current_steps": 44495, "total_steps": 78105, "loss": 0.2596, "lr": 2.3285167567223662e-06, "epoch": 2.848409192753345, "percentage": 56.97, "elapsed_time": "1:57:13", "remaining_time": "1:28:33", "throughput": 19905.87, "total_tokens": 140014400} +{"current_steps": 44500, "total_steps": 78105, "loss": 0.2312, "lr": 2.327959424450698e-06, "epoch": 2.848729274694322, "percentage": 56.97, "elapsed_time": "1:57:14", "remaining_time": "1:28:32", "throughput": 19906.22, "total_tokens": 140030528} +{"current_steps": 44505, "total_steps": 78105, "loss": 0.2017, "lr": 2.3274021007698385e-06, "epoch": 2.8490493566352986, "percentage": 56.98, "elapsed_time": "1:57:15", "remaining_time": "1:28:31", "throughput": 19906.53, "total_tokens": 140046208} +{"current_steps": 44510, "total_steps": 78105, "loss": 0.2373, "lr": 2.3268447857076194e-06, "epoch": 2.8493694385762756, "percentage": 56.99, "elapsed_time": "1:57:15", "remaining_time": "1:28:30", "throughput": 19906.92, "total_tokens": 140063296} +{"current_steps": 44515, "total_steps": 78105, "loss": 0.3689, "lr": 2.3262874792918703e-06, "epoch": 2.8496895205172526, "percentage": 56.99, "elapsed_time": "1:57:16", "remaining_time": "1:28:29", "throughput": 19907.18, "total_tokens": 140077888} +{"current_steps": 44520, "total_steps": 78105, "loss": 0.1692, "lr": 2.325730181550419e-06, "epoch": 2.850009602458229, "percentage": 57.0, "elapsed_time": "1:57:17", "remaining_time": "1:28:28", "throughput": 19907.54, "total_tokens": 140094208} +{"current_steps": 44525, "total_steps": 78105, "loss": 0.2089, "lr": 2.325172892511096e-06, "epoch": 2.850329684399206, "percentage": 57.01, "elapsed_time": "1:57:17", "remaining_time": "1:28:27", "throughput": 19908.0, "total_tokens": 140112000} +{"current_steps": 44530, "total_steps": 78105, "loss": 0.258, "lr": 2.3246156122017264e-06, "epoch": 2.850649766340183, "percentage": 57.01, "elapsed_time": "1:57:18", "remaining_time": "1:28:27", "throughput": 19908.32, "total_tokens": 140127552} +{"current_steps": 44535, "total_steps": 78105, "loss": 0.2803, "lr": 2.3240583406501414e-06, "epoch": 2.85096984828116, "percentage": 57.02, "elapsed_time": "1:57:19", "remaining_time": "1:28:26", "throughput": 19908.67, "total_tokens": 140143552} +{"current_steps": 44540, "total_steps": 78105, "loss": 0.321, "lr": 2.323501077884165e-06, "epoch": 2.851289930222137, "percentage": 57.03, "elapsed_time": "1:57:19", "remaining_time": "1:28:25", "throughput": 19908.99, "total_tokens": 140158720} +{"current_steps": 44545, "total_steps": 78105, "loss": 0.1971, "lr": 2.3229438239316258e-06, "epoch": 2.8516100121631136, "percentage": 57.03, "elapsed_time": "1:57:20", "remaining_time": "1:28:24", "throughput": 19909.32, "total_tokens": 140174784} +{"current_steps": 44550, "total_steps": 78105, "loss": 0.1966, "lr": 2.3223865788203495e-06, "epoch": 2.8519300941040906, "percentage": 57.04, "elapsed_time": "1:57:21", "remaining_time": "1:28:23", "throughput": 19909.66, "total_tokens": 140190720} +{"current_steps": 44555, "total_steps": 78105, "loss": 0.2565, "lr": 2.3218293425781634e-06, "epoch": 2.8522501760450676, "percentage": 57.05, "elapsed_time": "1:57:22", "remaining_time": "1:28:22", "throughput": 19909.97, "total_tokens": 140206400} +{"current_steps": 44560, "total_steps": 78105, "loss": 0.2011, "lr": 2.3212721152328907e-06, "epoch": 2.8525702579860446, "percentage": 57.05, "elapsed_time": "1:57:22", "remaining_time": "1:28:21", "throughput": 19910.25, "total_tokens": 140221120} +{"current_steps": 44565, "total_steps": 78105, "loss": 0.2197, "lr": 2.320714896812358e-06, "epoch": 2.852890339927021, "percentage": 57.06, "elapsed_time": "1:57:23", "remaining_time": "1:28:20", "throughput": 19910.57, "total_tokens": 140236800} +{"current_steps": 44570, "total_steps": 78105, "loss": 0.2038, "lr": 2.32015768734439e-06, "epoch": 2.853210421867998, "percentage": 57.06, "elapsed_time": "1:57:23", "remaining_time": "1:28:19", "throughput": 19910.86, "total_tokens": 140251968} +{"current_steps": 44575, "total_steps": 78105, "loss": 0.3535, "lr": 2.319600486856809e-06, "epoch": 2.853530503808975, "percentage": 57.07, "elapsed_time": "1:57:24", "remaining_time": "1:28:19", "throughput": 19911.14, "total_tokens": 140267136} +{"current_steps": 44580, "total_steps": 78105, "loss": 0.2445, "lr": 2.319043295377441e-06, "epoch": 2.853850585749952, "percentage": 57.08, "elapsed_time": "1:57:25", "remaining_time": "1:28:18", "throughput": 19911.4, "total_tokens": 140281792} +{"current_steps": 44585, "total_steps": 78105, "loss": 0.1777, "lr": 2.318486112934108e-06, "epoch": 2.854170667690929, "percentage": 57.08, "elapsed_time": "1:57:25", "remaining_time": "1:28:17", "throughput": 19911.68, "total_tokens": 140296832} +{"current_steps": 44590, "total_steps": 78105, "loss": 0.1406, "lr": 2.317928939554634e-06, "epoch": 2.8544907496319056, "percentage": 57.09, "elapsed_time": "1:57:26", "remaining_time": "1:28:16", "throughput": 19911.98, "total_tokens": 140312064} +{"current_steps": 44595, "total_steps": 78105, "loss": 0.2001, "lr": 2.317371775266839e-06, "epoch": 2.8548108315728826, "percentage": 57.1, "elapsed_time": "1:57:27", "remaining_time": "1:28:15", "throughput": 19912.26, "total_tokens": 140327360} +{"current_steps": 44600, "total_steps": 78105, "loss": 0.1539, "lr": 2.316814620098547e-06, "epoch": 2.8551309135138596, "percentage": 57.1, "elapsed_time": "1:57:27", "remaining_time": "1:28:14", "throughput": 19912.59, "total_tokens": 140343296} +{"current_steps": 44605, "total_steps": 78105, "loss": 0.2062, "lr": 2.316257474077578e-06, "epoch": 2.8554509954548366, "percentage": 57.11, "elapsed_time": "1:57:28", "remaining_time": "1:28:13", "throughput": 19912.92, "total_tokens": 140359232} +{"current_steps": 44610, "total_steps": 78105, "loss": 0.2805, "lr": 2.315700337231755e-06, "epoch": 2.855771077395813, "percentage": 57.12, "elapsed_time": "1:57:29", "remaining_time": "1:28:12", "throughput": 19913.28, "total_tokens": 140375552} +{"current_steps": 44615, "total_steps": 78105, "loss": 0.1845, "lr": 2.315143209588896e-06, "epoch": 2.85609115933679, "percentage": 57.12, "elapsed_time": "1:57:30", "remaining_time": "1:28:12", "throughput": 19913.67, "total_tokens": 140392576} +{"current_steps": 44620, "total_steps": 78105, "loss": 0.1562, "lr": 2.314586091176822e-06, "epoch": 2.856411241277767, "percentage": 57.13, "elapsed_time": "1:57:30", "remaining_time": "1:28:11", "throughput": 19913.97, "total_tokens": 140408256} +{"current_steps": 44625, "total_steps": 78105, "loss": 0.233, "lr": 2.314028982023354e-06, "epoch": 2.856731323218744, "percentage": 57.13, "elapsed_time": "1:57:31", "remaining_time": "1:28:10", "throughput": 19914.29, "total_tokens": 140424128} +{"current_steps": 44630, "total_steps": 78105, "loss": 0.2935, "lr": 2.313471882156309e-06, "epoch": 2.857051405159721, "percentage": 57.14, "elapsed_time": "1:57:32", "remaining_time": "1:28:09", "throughput": 19914.58, "total_tokens": 140439168} +{"current_steps": 44635, "total_steps": 78105, "loss": 0.1529, "lr": 2.312914791603507e-06, "epoch": 2.8573714871006977, "percentage": 57.15, "elapsed_time": "1:57:32", "remaining_time": "1:28:08", "throughput": 19914.91, "total_tokens": 140455360} +{"current_steps": 44640, "total_steps": 78105, "loss": 0.2748, "lr": 2.3123577103927656e-06, "epoch": 2.8576915690416747, "percentage": 57.15, "elapsed_time": "1:57:33", "remaining_time": "1:28:07", "throughput": 19915.27, "total_tokens": 140471872} +{"current_steps": 44645, "total_steps": 78105, "loss": 0.2308, "lr": 2.311800638551904e-06, "epoch": 2.8580116509826516, "percentage": 57.16, "elapsed_time": "1:57:34", "remaining_time": "1:28:06", "throughput": 19915.53, "total_tokens": 140486592} +{"current_steps": 44650, "total_steps": 78105, "loss": 0.219, "lr": 2.3112435761087367e-06, "epoch": 2.858331732923628, "percentage": 57.17, "elapsed_time": "1:57:34", "remaining_time": "1:28:05", "throughput": 19915.77, "total_tokens": 140500864} +{"current_steps": 44655, "total_steps": 78105, "loss": 0.2197, "lr": 2.3106865230910824e-06, "epoch": 2.858651814864605, "percentage": 57.17, "elapsed_time": "1:57:35", "remaining_time": "1:28:05", "throughput": 19916.13, "total_tokens": 140517056} +{"current_steps": 44660, "total_steps": 78105, "loss": 0.1609, "lr": 2.3101294795267567e-06, "epoch": 2.858971896805582, "percentage": 57.18, "elapsed_time": "1:57:36", "remaining_time": "1:28:04", "throughput": 19916.35, "total_tokens": 140531264} +{"current_steps": 44665, "total_steps": 78105, "loss": 0.1922, "lr": 2.309572445443577e-06, "epoch": 2.859291978746559, "percentage": 57.19, "elapsed_time": "1:57:36", "remaining_time": "1:28:03", "throughput": 19916.67, "total_tokens": 140547136} +{"current_steps": 44670, "total_steps": 78105, "loss": 0.2598, "lr": 2.309015420869356e-06, "epoch": 2.859612060687536, "percentage": 57.19, "elapsed_time": "1:57:37", "remaining_time": "1:28:02", "throughput": 19916.97, "total_tokens": 140562304} +{"current_steps": 44675, "total_steps": 78105, "loss": 0.265, "lr": 2.3084584058319115e-06, "epoch": 2.859932142628513, "percentage": 57.2, "elapsed_time": "1:57:38", "remaining_time": "1:28:01", "throughput": 19917.28, "total_tokens": 140578176} +{"current_steps": 44680, "total_steps": 78105, "loss": 0.1118, "lr": 2.3079014003590555e-06, "epoch": 2.8602522245694897, "percentage": 57.21, "elapsed_time": "1:57:38", "remaining_time": "1:28:00", "throughput": 19917.58, "total_tokens": 140593600} +{"current_steps": 44685, "total_steps": 78105, "loss": 0.2334, "lr": 2.3073444044786025e-06, "epoch": 2.8605723065104667, "percentage": 57.21, "elapsed_time": "1:57:39", "remaining_time": "1:27:59", "throughput": 19917.91, "total_tokens": 140609472} +{"current_steps": 44690, "total_steps": 78105, "loss": 0.1526, "lr": 2.3067874182183676e-06, "epoch": 2.8608923884514437, "percentage": 57.22, "elapsed_time": "1:57:40", "remaining_time": "1:27:58", "throughput": 19918.24, "total_tokens": 140624960} +{"current_steps": 44695, "total_steps": 78105, "loss": 0.263, "lr": 2.3062304416061616e-06, "epoch": 2.86121247039242, "percentage": 57.22, "elapsed_time": "1:57:40", "remaining_time": "1:27:58", "throughput": 19918.54, "total_tokens": 140640512} +{"current_steps": 44700, "total_steps": 78105, "loss": 0.2402, "lr": 2.3056734746697997e-06, "epoch": 2.861532552333397, "percentage": 57.23, "elapsed_time": "1:57:41", "remaining_time": "1:27:57", "throughput": 19918.93, "total_tokens": 140657408} +{"current_steps": 44705, "total_steps": 78105, "loss": 0.2162, "lr": 2.3051165174370904e-06, "epoch": 2.861852634274374, "percentage": 57.24, "elapsed_time": "1:57:42", "remaining_time": "1:27:56", "throughput": 19919.33, "total_tokens": 140674624} +{"current_steps": 44710, "total_steps": 78105, "loss": 0.2667, "lr": 2.3045595699358487e-06, "epoch": 2.862172716215351, "percentage": 57.24, "elapsed_time": "1:57:42", "remaining_time": "1:27:55", "throughput": 19919.65, "total_tokens": 140690368} +{"current_steps": 44715, "total_steps": 78105, "loss": 0.2578, "lr": 2.3040026321938826e-06, "epoch": 2.862492798156328, "percentage": 57.25, "elapsed_time": "1:57:43", "remaining_time": "1:27:54", "throughput": 19919.99, "total_tokens": 140706304} +{"current_steps": 44720, "total_steps": 78105, "loss": 0.2047, "lr": 2.303445704239005e-06, "epoch": 2.862812880097305, "percentage": 57.26, "elapsed_time": "1:57:44", "remaining_time": "1:27:53", "throughput": 19920.29, "total_tokens": 140721920} +{"current_steps": 44725, "total_steps": 78105, "loss": 0.2405, "lr": 2.3028887860990247e-06, "epoch": 2.8631329620382817, "percentage": 57.26, "elapsed_time": "1:57:44", "remaining_time": "1:27:52", "throughput": 19920.66, "total_tokens": 140738240} +{"current_steps": 44730, "total_steps": 78105, "loss": 0.2576, "lr": 2.3023318778017526e-06, "epoch": 2.8634530439792587, "percentage": 57.27, "elapsed_time": "1:57:45", "remaining_time": "1:27:51", "throughput": 19921.06, "total_tokens": 140755200} +{"current_steps": 44735, "total_steps": 78105, "loss": 0.2057, "lr": 2.3017749793749956e-06, "epoch": 2.8637731259202357, "percentage": 57.28, "elapsed_time": "1:57:46", "remaining_time": "1:27:51", "throughput": 19921.36, "total_tokens": 140770624} +{"current_steps": 44740, "total_steps": 78105, "loss": 0.2664, "lr": 2.301218090846564e-06, "epoch": 2.8640932078612122, "percentage": 57.28, "elapsed_time": "1:57:47", "remaining_time": "1:27:50", "throughput": 19921.74, "total_tokens": 140787392} +{"current_steps": 44745, "total_steps": 78105, "loss": 0.2434, "lr": 2.300661212244266e-06, "epoch": 2.8644132898021892, "percentage": 57.29, "elapsed_time": "1:57:47", "remaining_time": "1:27:49", "throughput": 19922.09, "total_tokens": 140803584} +{"current_steps": 44750, "total_steps": 78105, "loss": 0.1918, "lr": 2.300104343595908e-06, "epoch": 2.8647333717431662, "percentage": 57.29, "elapsed_time": "1:57:48", "remaining_time": "1:27:48", "throughput": 19922.43, "total_tokens": 140819776} +{"current_steps": 44755, "total_steps": 78105, "loss": 0.2781, "lr": 2.299547484929299e-06, "epoch": 2.865053453684143, "percentage": 57.3, "elapsed_time": "1:57:49", "remaining_time": "1:27:47", "throughput": 19922.78, "total_tokens": 140836224} +{"current_steps": 44760, "total_steps": 78105, "loss": 0.2081, "lr": 2.2989906362722434e-06, "epoch": 2.86537353562512, "percentage": 57.31, "elapsed_time": "1:57:49", "remaining_time": "1:27:46", "throughput": 19923.26, "total_tokens": 140854976} +{"current_steps": 44765, "total_steps": 78105, "loss": 0.2745, "lr": 2.29843379765255e-06, "epoch": 2.865693617566097, "percentage": 57.31, "elapsed_time": "1:57:50", "remaining_time": "1:27:45", "throughput": 19923.58, "total_tokens": 140870464} +{"current_steps": 44770, "total_steps": 78105, "loss": 0.2269, "lr": 2.297876969098021e-06, "epoch": 2.8660136995070737, "percentage": 57.32, "elapsed_time": "1:57:51", "remaining_time": "1:27:45", "throughput": 19923.89, "total_tokens": 140886016} +{"current_steps": 44775, "total_steps": 78105, "loss": 0.2441, "lr": 2.297320150636464e-06, "epoch": 2.8663337814480507, "percentage": 57.33, "elapsed_time": "1:57:51", "remaining_time": "1:27:44", "throughput": 19924.12, "total_tokens": 140900416} +{"current_steps": 44780, "total_steps": 78105, "loss": 0.3098, "lr": 2.2967633422956827e-06, "epoch": 2.8666538633890277, "percentage": 57.33, "elapsed_time": "1:57:52", "remaining_time": "1:27:43", "throughput": 19924.46, "total_tokens": 140916928} +{"current_steps": 44785, "total_steps": 78105, "loss": 0.2314, "lr": 2.2962065441034826e-06, "epoch": 2.8669739453300043, "percentage": 57.34, "elapsed_time": "1:57:53", "remaining_time": "1:27:42", "throughput": 19924.75, "total_tokens": 140932352} +{"current_steps": 44790, "total_steps": 78105, "loss": 0.2473, "lr": 2.2956497560876647e-06, "epoch": 2.8672940272709813, "percentage": 57.35, "elapsed_time": "1:57:53", "remaining_time": "1:27:41", "throughput": 19925.01, "total_tokens": 140947200} +{"current_steps": 44795, "total_steps": 78105, "loss": 0.1707, "lr": 2.295092978276034e-06, "epoch": 2.8676141092119583, "percentage": 57.35, "elapsed_time": "1:57:54", "remaining_time": "1:27:40", "throughput": 19925.35, "total_tokens": 140963584} +{"current_steps": 44800, "total_steps": 78105, "loss": 0.193, "lr": 2.2945362106963933e-06, "epoch": 2.8679341911529352, "percentage": 57.36, "elapsed_time": "1:57:55", "remaining_time": "1:27:39", "throughput": 19925.61, "total_tokens": 140978368} +{"current_steps": 44805, "total_steps": 78105, "loss": 0.1898, "lr": 2.293979453376543e-06, "epoch": 2.8682542730939122, "percentage": 57.37, "elapsed_time": "1:57:55", "remaining_time": "1:27:38", "throughput": 19925.92, "total_tokens": 140994048} +{"current_steps": 44810, "total_steps": 78105, "loss": 0.2355, "lr": 2.2934227063442866e-06, "epoch": 2.868574355034889, "percentage": 57.37, "elapsed_time": "1:57:56", "remaining_time": "1:27:38", "throughput": 19926.17, "total_tokens": 141008704} +{"current_steps": 44815, "total_steps": 78105, "loss": 0.2554, "lr": 2.2928659696274234e-06, "epoch": 2.8688944369758658, "percentage": 57.38, "elapsed_time": "1:57:57", "remaining_time": "1:27:37", "throughput": 19926.5, "total_tokens": 141024384} +{"current_steps": 44820, "total_steps": 78105, "loss": 0.183, "lr": 2.2923092432537556e-06, "epoch": 2.8692145189168428, "percentage": 57.38, "elapsed_time": "1:57:57", "remaining_time": "1:27:36", "throughput": 19926.89, "total_tokens": 141041344} +{"current_steps": 44825, "total_steps": 78105, "loss": 0.1648, "lr": 2.291752527251082e-06, "epoch": 2.8695346008578198, "percentage": 57.39, "elapsed_time": "1:57:58", "remaining_time": "1:27:35", "throughput": 19927.23, "total_tokens": 141057664} +{"current_steps": 44830, "total_steps": 78105, "loss": 0.2397, "lr": 2.2911958216472024e-06, "epoch": 2.8698546827987963, "percentage": 57.4, "elapsed_time": "1:57:59", "remaining_time": "1:27:34", "throughput": 19927.51, "total_tokens": 141072640} +{"current_steps": 44835, "total_steps": 78105, "loss": 0.1741, "lr": 2.2906391264699152e-06, "epoch": 2.8701747647397733, "percentage": 57.4, "elapsed_time": "1:57:59", "remaining_time": "1:27:33", "throughput": 19927.78, "total_tokens": 141088000} +{"current_steps": 44840, "total_steps": 78105, "loss": 0.1857, "lr": 2.290082441747021e-06, "epoch": 2.8704948466807503, "percentage": 57.41, "elapsed_time": "1:58:00", "remaining_time": "1:27:32", "throughput": 19928.09, "total_tokens": 141103936} +{"current_steps": 44845, "total_steps": 78105, "loss": 0.1053, "lr": 2.289525767506315e-06, "epoch": 2.8708149286217273, "percentage": 57.42, "elapsed_time": "1:58:01", "remaining_time": "1:27:32", "throughput": 19928.85, "total_tokens": 141132352} +{"current_steps": 44850, "total_steps": 78105, "loss": 0.2696, "lr": 2.288969103775597e-06, "epoch": 2.8711350105627043, "percentage": 57.42, "elapsed_time": "1:58:02", "remaining_time": "1:27:31", "throughput": 19929.15, "total_tokens": 141147520} +{"current_steps": 44855, "total_steps": 78105, "loss": 0.1419, "lr": 2.2884124505826617e-06, "epoch": 2.871455092503681, "percentage": 57.43, "elapsed_time": "1:58:03", "remaining_time": "1:27:30", "throughput": 19929.43, "total_tokens": 141163072} +{"current_steps": 44860, "total_steps": 78105, "loss": 0.2222, "lr": 2.2878558079553062e-06, "epoch": 2.871775174444658, "percentage": 57.44, "elapsed_time": "1:58:03", "remaining_time": "1:27:29", "throughput": 19929.72, "total_tokens": 141178304} +{"current_steps": 44865, "total_steps": 78105, "loss": 0.3013, "lr": 2.2872991759213277e-06, "epoch": 2.872095256385635, "percentage": 57.44, "elapsed_time": "1:58:04", "remaining_time": "1:27:28", "throughput": 19930.17, "total_tokens": 141196096} +{"current_steps": 44870, "total_steps": 78105, "loss": 0.2045, "lr": 2.2867425545085192e-06, "epoch": 2.8724153383266118, "percentage": 57.45, "elapsed_time": "1:58:05", "remaining_time": "1:27:27", "throughput": 19930.48, "total_tokens": 141211776} +{"current_steps": 44875, "total_steps": 78105, "loss": 0.2384, "lr": 2.286185943744678e-06, "epoch": 2.8727354202675883, "percentage": 57.45, "elapsed_time": "1:58:05", "remaining_time": "1:27:27", "throughput": 19930.79, "total_tokens": 141227328} +{"current_steps": 44880, "total_steps": 78105, "loss": 0.2071, "lr": 2.285629343657596e-06, "epoch": 2.8730555022085653, "percentage": 57.46, "elapsed_time": "1:58:06", "remaining_time": "1:27:26", "throughput": 19931.09, "total_tokens": 141242688} +{"current_steps": 44885, "total_steps": 78105, "loss": 0.2278, "lr": 2.285072754275069e-06, "epoch": 2.8733755841495423, "percentage": 57.47, "elapsed_time": "1:58:07", "remaining_time": "1:27:25", "throughput": 19931.37, "total_tokens": 141258048} +{"current_steps": 44890, "total_steps": 78105, "loss": 0.1792, "lr": 2.284516175624888e-06, "epoch": 2.8736956660905193, "percentage": 57.47, "elapsed_time": "1:58:07", "remaining_time": "1:27:24", "throughput": 19931.62, "total_tokens": 141272512} +{"current_steps": 44895, "total_steps": 78105, "loss": 0.327, "lr": 2.2839596077348483e-06, "epoch": 2.8740157480314963, "percentage": 57.48, "elapsed_time": "1:58:08", "remaining_time": "1:27:23", "throughput": 19931.95, "total_tokens": 141288640} +{"current_steps": 44900, "total_steps": 78105, "loss": 0.2094, "lr": 2.2834030506327394e-06, "epoch": 2.874335829972473, "percentage": 57.49, "elapsed_time": "1:58:09", "remaining_time": "1:27:22", "throughput": 19932.29, "total_tokens": 141304640} +{"current_steps": 44905, "total_steps": 78105, "loss": 0.2244, "lr": 2.2828465043463544e-06, "epoch": 2.87465591191345, "percentage": 57.49, "elapsed_time": "1:58:09", "remaining_time": "1:27:21", "throughput": 19932.74, "total_tokens": 141322560} +{"current_steps": 44910, "total_steps": 78105, "loss": 0.2849, "lr": 2.2822899689034832e-06, "epoch": 2.874975993854427, "percentage": 57.5, "elapsed_time": "1:58:10", "remaining_time": "1:27:21", "throughput": 19932.99, "total_tokens": 141337472} +{"current_steps": 44915, "total_steps": 78105, "loss": 0.2835, "lr": 2.2817334443319174e-06, "epoch": 2.8752960757954034, "percentage": 57.51, "elapsed_time": "1:58:11", "remaining_time": "1:27:20", "throughput": 19933.29, "total_tokens": 141352896} +{"current_steps": 44920, "total_steps": 78105, "loss": 0.2221, "lr": 2.2811769306594463e-06, "epoch": 2.8756161577363804, "percentage": 57.51, "elapsed_time": "1:58:11", "remaining_time": "1:27:19", "throughput": 19933.6, "total_tokens": 141368320} +{"current_steps": 44925, "total_steps": 78105, "loss": 0.1893, "lr": 2.280620427913859e-06, "epoch": 2.8759362396773573, "percentage": 57.52, "elapsed_time": "1:58:12", "remaining_time": "1:27:18", "throughput": 19933.91, "total_tokens": 141384000} +{"current_steps": 44930, "total_steps": 78105, "loss": 0.1827, "lr": 2.2800639361229464e-06, "epoch": 2.8762563216183343, "percentage": 57.53, "elapsed_time": "1:58:13", "remaining_time": "1:27:17", "throughput": 19934.21, "total_tokens": 141399040} +{"current_steps": 44935, "total_steps": 78105, "loss": 0.1448, "lr": 2.2795074553144936e-06, "epoch": 2.8765764035593113, "percentage": 57.53, "elapsed_time": "1:58:13", "remaining_time": "1:27:16", "throughput": 19934.57, "total_tokens": 141415680} +{"current_steps": 44940, "total_steps": 78105, "loss": 0.2351, "lr": 2.2789509855162912e-06, "epoch": 2.8768964855002883, "percentage": 57.54, "elapsed_time": "1:58:14", "remaining_time": "1:27:15", "throughput": 19934.93, "total_tokens": 141432064} +{"current_steps": 44945, "total_steps": 78105, "loss": 0.3497, "lr": 2.2783945267561246e-06, "epoch": 2.877216567441265, "percentage": 57.54, "elapsed_time": "1:58:15", "remaining_time": "1:27:14", "throughput": 19935.25, "total_tokens": 141447936} +{"current_steps": 44950, "total_steps": 78105, "loss": 0.1935, "lr": 2.277838079061781e-06, "epoch": 2.877536649382242, "percentage": 57.55, "elapsed_time": "1:58:16", "remaining_time": "1:27:14", "throughput": 19935.59, "total_tokens": 141463808} +{"current_steps": 44955, "total_steps": 78105, "loss": 0.2442, "lr": 2.2772816424610463e-06, "epoch": 2.877856731323219, "percentage": 57.56, "elapsed_time": "1:58:16", "remaining_time": "1:27:13", "throughput": 19935.93, "total_tokens": 141480256} +{"current_steps": 44960, "total_steps": 78105, "loss": 0.3129, "lr": 2.276725216981707e-06, "epoch": 2.8781768132641954, "percentage": 57.56, "elapsed_time": "1:58:17", "remaining_time": "1:27:12", "throughput": 19936.22, "total_tokens": 141495424} +{"current_steps": 44965, "total_steps": 78105, "loss": 0.3059, "lr": 2.2761688026515467e-06, "epoch": 2.8784968952051724, "percentage": 57.57, "elapsed_time": "1:58:18", "remaining_time": "1:27:11", "throughput": 19936.48, "total_tokens": 141510272} +{"current_steps": 44970, "total_steps": 78105, "loss": 0.1473, "lr": 2.27561239949835e-06, "epoch": 2.8788169771461494, "percentage": 57.58, "elapsed_time": "1:58:18", "remaining_time": "1:27:10", "throughput": 19936.76, "total_tokens": 141525312} +{"current_steps": 44975, "total_steps": 78105, "loss": 0.2226, "lr": 2.2750560075499025e-06, "epoch": 2.8791370590871264, "percentage": 57.58, "elapsed_time": "1:58:19", "remaining_time": "1:27:09", "throughput": 19937.09, "total_tokens": 141541312} +{"current_steps": 44980, "total_steps": 78105, "loss": 0.1887, "lr": 2.274499626833985e-06, "epoch": 2.8794571410281034, "percentage": 57.59, "elapsed_time": "1:58:20", "remaining_time": "1:27:08", "throughput": 19937.36, "total_tokens": 141556160} +{"current_steps": 44985, "total_steps": 78105, "loss": 0.191, "lr": 2.2739432573783832e-06, "epoch": 2.8797772229690803, "percentage": 57.6, "elapsed_time": "1:58:20", "remaining_time": "1:27:07", "throughput": 19937.65, "total_tokens": 141571648} +{"current_steps": 44990, "total_steps": 78105, "loss": 0.232, "lr": 2.273386899210876e-06, "epoch": 2.880097304910057, "percentage": 57.6, "elapsed_time": "1:58:21", "remaining_time": "1:27:06", "throughput": 19937.87, "total_tokens": 141585856} +{"current_steps": 44995, "total_steps": 78105, "loss": 0.1903, "lr": 2.2728305523592482e-06, "epoch": 2.880417386851034, "percentage": 57.61, "elapsed_time": "1:58:22", "remaining_time": "1:27:06", "throughput": 19938.13, "total_tokens": 141601152} +{"current_steps": 45000, "total_steps": 78105, "loss": 0.1379, "lr": 2.272274216851278e-06, "epoch": 2.880737468792011, "percentage": 57.61, "elapsed_time": "1:58:22", "remaining_time": "1:27:05", "throughput": 19938.44, "total_tokens": 141616896} +{"current_steps": 45005, "total_steps": 78105, "loss": 0.3167, "lr": 2.2717178927147483e-06, "epoch": 2.8810575507329874, "percentage": 57.62, "elapsed_time": "1:58:23", "remaining_time": "1:27:04", "throughput": 19938.75, "total_tokens": 141632320} +{"current_steps": 45010, "total_steps": 78105, "loss": 0.1945, "lr": 2.271161579977437e-06, "epoch": 2.8813776326739644, "percentage": 57.63, "elapsed_time": "1:58:24", "remaining_time": "1:27:03", "throughput": 19939.08, "total_tokens": 141648384} +{"current_steps": 45015, "total_steps": 78105, "loss": 0.2173, "lr": 2.2706052786671255e-06, "epoch": 2.8816977146149414, "percentage": 57.63, "elapsed_time": "1:58:24", "remaining_time": "1:27:02", "throughput": 19939.4, "total_tokens": 141664448} +{"current_steps": 45020, "total_steps": 78105, "loss": 0.2076, "lr": 2.2700489888115907e-06, "epoch": 2.8820177965559184, "percentage": 57.64, "elapsed_time": "1:58:25", "remaining_time": "1:27:01", "throughput": 19939.71, "total_tokens": 141680064} +{"current_steps": 45025, "total_steps": 78105, "loss": 0.293, "lr": 2.2694927104386115e-06, "epoch": 2.8823378784968954, "percentage": 57.65, "elapsed_time": "1:58:26", "remaining_time": "1:27:00", "throughput": 19940.06, "total_tokens": 141696448} +{"current_steps": 45030, "total_steps": 78105, "loss": 0.2419, "lr": 2.2689364435759665e-06, "epoch": 2.8826579604378724, "percentage": 57.65, "elapsed_time": "1:58:26", "remaining_time": "1:27:00", "throughput": 19940.35, "total_tokens": 141711744} +{"current_steps": 45035, "total_steps": 78105, "loss": 0.1816, "lr": 2.268380188251431e-06, "epoch": 2.882978042378849, "percentage": 57.66, "elapsed_time": "1:58:27", "remaining_time": "1:26:59", "throughput": 19940.7, "total_tokens": 141727616} +{"current_steps": 45040, "total_steps": 78105, "loss": 0.3126, "lr": 2.267823944492783e-06, "epoch": 2.883298124319826, "percentage": 57.67, "elapsed_time": "1:58:28", "remaining_time": "1:26:58", "throughput": 19941.03, "total_tokens": 141744000} +{"current_steps": 45045, "total_steps": 78105, "loss": 0.204, "lr": 2.267267712327797e-06, "epoch": 2.883618206260803, "percentage": 57.67, "elapsed_time": "1:58:28", "remaining_time": "1:26:57", "throughput": 19941.32, "total_tokens": 141759168} +{"current_steps": 45050, "total_steps": 78105, "loss": 0.248, "lr": 2.266711491784251e-06, "epoch": 2.8839382882017794, "percentage": 57.68, "elapsed_time": "1:58:29", "remaining_time": "1:26:56", "throughput": 19941.63, "total_tokens": 141774656} +{"current_steps": 45055, "total_steps": 78105, "loss": 0.2116, "lr": 2.266155282889916e-06, "epoch": 2.8842583701427564, "percentage": 57.69, "elapsed_time": "1:58:30", "remaining_time": "1:26:55", "throughput": 19941.99, "total_tokens": 141791296} +{"current_steps": 45060, "total_steps": 78105, "loss": 0.2342, "lr": 2.2655990856725695e-06, "epoch": 2.8845784520837334, "percentage": 57.69, "elapsed_time": "1:58:30", "remaining_time": "1:26:54", "throughput": 19942.21, "total_tokens": 141805504} +{"current_steps": 45065, "total_steps": 78105, "loss": 0.2994, "lr": 2.2650429001599826e-06, "epoch": 2.8848985340247104, "percentage": 57.7, "elapsed_time": "1:58:31", "remaining_time": "1:26:53", "throughput": 19942.55, "total_tokens": 141821632} +{"current_steps": 45070, "total_steps": 78105, "loss": 0.1775, "lr": 2.2644867263799307e-06, "epoch": 2.8852186159656874, "percentage": 57.7, "elapsed_time": "1:58:32", "remaining_time": "1:26:53", "throughput": 19942.84, "total_tokens": 141836736} +{"current_steps": 45075, "total_steps": 78105, "loss": 0.1947, "lr": 2.2639305643601838e-06, "epoch": 2.885538697906664, "percentage": 57.71, "elapsed_time": "1:58:32", "remaining_time": "1:26:52", "throughput": 19943.12, "total_tokens": 141852032} +{"current_steps": 45080, "total_steps": 78105, "loss": 0.2518, "lr": 2.263374414128516e-06, "epoch": 2.885858779847641, "percentage": 57.72, "elapsed_time": "1:58:33", "remaining_time": "1:26:51", "throughput": 19943.49, "total_tokens": 141869056} +{"current_steps": 45085, "total_steps": 78105, "loss": 0.1722, "lr": 2.262818275712696e-06, "epoch": 2.886178861788618, "percentage": 57.72, "elapsed_time": "1:58:34", "remaining_time": "1:26:50", "throughput": 19943.74, "total_tokens": 141883648} +{"current_steps": 45090, "total_steps": 78105, "loss": 0.3109, "lr": 2.2622621491404962e-06, "epoch": 2.886498943729595, "percentage": 57.73, "elapsed_time": "1:58:34", "remaining_time": "1:26:49", "throughput": 19944.1, "total_tokens": 141900352} +{"current_steps": 45095, "total_steps": 78105, "loss": 0.2064, "lr": 2.261706034439687e-06, "epoch": 2.8868190256705715, "percentage": 57.74, "elapsed_time": "1:58:35", "remaining_time": "1:26:48", "throughput": 19944.44, "total_tokens": 141916992} +{"current_steps": 45100, "total_steps": 78105, "loss": 0.1981, "lr": 2.2611499316380367e-06, "epoch": 2.8871391076115485, "percentage": 57.74, "elapsed_time": "1:58:36", "remaining_time": "1:26:47", "throughput": 19944.79, "total_tokens": 141933376} +{"current_steps": 45105, "total_steps": 78105, "loss": 0.1708, "lr": 2.2605938407633157e-06, "epoch": 2.8874591895525255, "percentage": 57.75, "elapsed_time": "1:58:36", "remaining_time": "1:26:46", "throughput": 19945.07, "total_tokens": 141948480} +{"current_steps": 45110, "total_steps": 78105, "loss": 0.2531, "lr": 2.26003776184329e-06, "epoch": 2.8877792714935024, "percentage": 57.76, "elapsed_time": "1:58:37", "remaining_time": "1:26:46", "throughput": 19945.38, "total_tokens": 141964224} +{"current_steps": 45115, "total_steps": 78105, "loss": 0.2189, "lr": 2.259481694905729e-06, "epoch": 2.8880993534344794, "percentage": 57.76, "elapsed_time": "1:58:38", "remaining_time": "1:26:45", "throughput": 19945.69, "total_tokens": 141980096} +{"current_steps": 45120, "total_steps": 78105, "loss": 0.2182, "lr": 2.2589256399783993e-06, "epoch": 2.888419435375456, "percentage": 57.77, "elapsed_time": "1:58:39", "remaining_time": "1:26:44", "throughput": 19946.04, "total_tokens": 141996800} +{"current_steps": 45125, "total_steps": 78105, "loss": 0.2882, "lr": 2.258369597089068e-06, "epoch": 2.888739517316433, "percentage": 57.77, "elapsed_time": "1:58:39", "remaining_time": "1:26:43", "throughput": 19946.31, "total_tokens": 142011584} +{"current_steps": 45130, "total_steps": 78105, "loss": 0.2331, "lr": 2.2578135662655e-06, "epoch": 2.88905959925741, "percentage": 57.78, "elapsed_time": "1:58:40", "remaining_time": "1:26:42", "throughput": 19946.68, "total_tokens": 142028672} +{"current_steps": 45135, "total_steps": 78105, "loss": 0.1424, "lr": 2.2572575475354614e-06, "epoch": 2.889379681198387, "percentage": 57.79, "elapsed_time": "1:58:41", "remaining_time": "1:26:41", "throughput": 19946.94, "total_tokens": 142043648} +{"current_steps": 45140, "total_steps": 78105, "loss": 0.2004, "lr": 2.256701540926716e-06, "epoch": 2.8896997631393635, "percentage": 57.79, "elapsed_time": "1:58:41", "remaining_time": "1:26:40", "throughput": 19947.19, "total_tokens": 142058240} +{"current_steps": 45145, "total_steps": 78105, "loss": 0.2502, "lr": 2.2561455464670274e-06, "epoch": 2.8900198450803405, "percentage": 57.8, "elapsed_time": "1:58:42", "remaining_time": "1:26:39", "throughput": 19947.46, "total_tokens": 142073216} +{"current_steps": 45150, "total_steps": 78105, "loss": 0.3251, "lr": 2.2555895641841614e-06, "epoch": 2.8903399270213175, "percentage": 57.81, "elapsed_time": "1:58:43", "remaining_time": "1:26:39", "throughput": 19947.69, "total_tokens": 142087488} +{"current_steps": 45155, "total_steps": 78105, "loss": 0.1674, "lr": 2.255033594105879e-06, "epoch": 2.8906600089622945, "percentage": 57.81, "elapsed_time": "1:58:43", "remaining_time": "1:26:38", "throughput": 19948.01, "total_tokens": 142103232} +{"current_steps": 45160, "total_steps": 78105, "loss": 0.3264, "lr": 2.2544776362599433e-06, "epoch": 2.8909800909032715, "percentage": 57.82, "elapsed_time": "1:58:44", "remaining_time": "1:26:37", "throughput": 19948.27, "total_tokens": 142118144} +{"current_steps": 45165, "total_steps": 78105, "loss": 0.1983, "lr": 2.253921690674115e-06, "epoch": 2.891300172844248, "percentage": 57.83, "elapsed_time": "1:58:45", "remaining_time": "1:26:36", "throughput": 19948.56, "total_tokens": 142133824} +{"current_steps": 45170, "total_steps": 78105, "loss": 0.3949, "lr": 2.2533657573761564e-06, "epoch": 2.891620254785225, "percentage": 57.83, "elapsed_time": "1:58:45", "remaining_time": "1:26:35", "throughput": 19948.87, "total_tokens": 142149568} +{"current_steps": 45175, "total_steps": 78105, "loss": 0.3101, "lr": 2.2528098363938262e-06, "epoch": 2.891940336726202, "percentage": 57.84, "elapsed_time": "1:58:46", "remaining_time": "1:26:34", "throughput": 19949.16, "total_tokens": 142164928} +{"current_steps": 45180, "total_steps": 78105, "loss": 0.1955, "lr": 2.252253927754886e-06, "epoch": 2.8922604186671785, "percentage": 57.85, "elapsed_time": "1:58:47", "remaining_time": "1:26:33", "throughput": 19949.5, "total_tokens": 142181120} +{"current_steps": 45185, "total_steps": 78105, "loss": 0.2241, "lr": 2.2516980314870936e-06, "epoch": 2.8925805006081555, "percentage": 57.85, "elapsed_time": "1:58:47", "remaining_time": "1:26:32", "throughput": 19949.8, "total_tokens": 142196544} +{"current_steps": 45190, "total_steps": 78105, "loss": 0.2419, "lr": 2.2511421476182096e-06, "epoch": 2.8929005825491325, "percentage": 57.86, "elapsed_time": "1:58:48", "remaining_time": "1:26:32", "throughput": 19950.08, "total_tokens": 142211648} +{"current_steps": 45195, "total_steps": 78105, "loss": 0.2536, "lr": 2.2505862761759888e-06, "epoch": 2.8932206644901095, "percentage": 57.86, "elapsed_time": "1:58:49", "remaining_time": "1:26:31", "throughput": 19950.41, "total_tokens": 142227712} +{"current_steps": 45200, "total_steps": 78105, "loss": 0.2748, "lr": 2.250030417188191e-06, "epoch": 2.8935407464310865, "percentage": 57.87, "elapsed_time": "1:58:49", "remaining_time": "1:26:30", "throughput": 19950.72, "total_tokens": 142243776} +{"current_steps": 45205, "total_steps": 78105, "loss": 0.2847, "lr": 2.2494745706825714e-06, "epoch": 2.8938608283720635, "percentage": 57.88, "elapsed_time": "1:58:50", "remaining_time": "1:26:29", "throughput": 19951.13, "total_tokens": 142261056} +{"current_steps": 45210, "total_steps": 78105, "loss": 0.2542, "lr": 2.248918736686887e-06, "epoch": 2.89418091031304, "percentage": 57.88, "elapsed_time": "1:58:51", "remaining_time": "1:26:28", "throughput": 19951.42, "total_tokens": 142276608} +{"current_steps": 45215, "total_steps": 78105, "loss": 0.2531, "lr": 2.248362915228894e-06, "epoch": 2.894500992254017, "percentage": 57.89, "elapsed_time": "1:58:51", "remaining_time": "1:26:27", "throughput": 19951.73, "total_tokens": 142292224} +{"current_steps": 45220, "total_steps": 78105, "loss": 0.1431, "lr": 2.2478071063363454e-06, "epoch": 2.894821074194994, "percentage": 57.9, "elapsed_time": "1:58:52", "remaining_time": "1:26:26", "throughput": 19952.03, "total_tokens": 142308032} +{"current_steps": 45225, "total_steps": 78105, "loss": 0.1981, "lr": 2.2472513100369974e-06, "epoch": 2.8951411561359706, "percentage": 57.9, "elapsed_time": "1:58:53", "remaining_time": "1:26:26", "throughput": 19952.37, "total_tokens": 142324096} +{"current_steps": 45230, "total_steps": 78105, "loss": 0.2682, "lr": 2.246695526358601e-06, "epoch": 2.8954612380769476, "percentage": 57.91, "elapsed_time": "1:58:53", "remaining_time": "1:26:25", "throughput": 19952.61, "total_tokens": 142338624} +{"current_steps": 45235, "total_steps": 78105, "loss": 0.1736, "lr": 2.2461397553289112e-06, "epoch": 2.8957813200179245, "percentage": 57.92, "elapsed_time": "1:58:54", "remaining_time": "1:26:24", "throughput": 19952.9, "total_tokens": 142354048} +{"current_steps": 45240, "total_steps": 78105, "loss": 0.1856, "lr": 2.2455839969756796e-06, "epoch": 2.8961014019589015, "percentage": 57.92, "elapsed_time": "1:58:55", "remaining_time": "1:26:23", "throughput": 19953.17, "total_tokens": 142369280} +{"current_steps": 45245, "total_steps": 78105, "loss": 0.1625, "lr": 2.2450282513266585e-06, "epoch": 2.8964214838998785, "percentage": 57.93, "elapsed_time": "1:58:55", "remaining_time": "1:26:22", "throughput": 19953.47, "total_tokens": 142385152} +{"current_steps": 45250, "total_steps": 78105, "loss": 0.3317, "lr": 2.244472518409598e-06, "epoch": 2.8967415658408555, "percentage": 57.93, "elapsed_time": "1:58:56", "remaining_time": "1:26:21", "throughput": 19953.77, "total_tokens": 142400576} +{"current_steps": 45255, "total_steps": 78105, "loss": 0.2302, "lr": 2.243916798252249e-06, "epoch": 2.897061647781832, "percentage": 57.94, "elapsed_time": "1:58:57", "remaining_time": "1:26:20", "throughput": 19954.21, "total_tokens": 142418368} +{"current_steps": 45260, "total_steps": 78105, "loss": 0.2291, "lr": 2.2433610908823607e-06, "epoch": 2.897381729722809, "percentage": 57.95, "elapsed_time": "1:58:57", "remaining_time": "1:26:19", "throughput": 19954.47, "total_tokens": 142433280} +{"current_steps": 45265, "total_steps": 78105, "loss": 0.1481, "lr": 2.2428053963276836e-06, "epoch": 2.897701811663786, "percentage": 57.95, "elapsed_time": "1:58:58", "remaining_time": "1:26:19", "throughput": 19954.92, "total_tokens": 142451392} +{"current_steps": 45270, "total_steps": 78105, "loss": 0.1916, "lr": 2.242249714615965e-06, "epoch": 2.8980218936047626, "percentage": 57.96, "elapsed_time": "1:58:59", "remaining_time": "1:26:18", "throughput": 19955.23, "total_tokens": 142466880} +{"current_steps": 45275, "total_steps": 78105, "loss": 0.2127, "lr": 2.2416940457749525e-06, "epoch": 2.8983419755457396, "percentage": 57.97, "elapsed_time": "1:59:00", "remaining_time": "1:26:17", "throughput": 19955.61, "total_tokens": 142483904} +{"current_steps": 45280, "total_steps": 78105, "loss": 0.226, "lr": 2.241138389832395e-06, "epoch": 2.8986620574867166, "percentage": 57.97, "elapsed_time": "1:59:00", "remaining_time": "1:26:16", "throughput": 19955.93, "total_tokens": 142499584} +{"current_steps": 45285, "total_steps": 78105, "loss": 0.2064, "lr": 2.240582746816037e-06, "epoch": 2.8989821394276936, "percentage": 57.98, "elapsed_time": "1:59:01", "remaining_time": "1:26:15", "throughput": 19956.28, "total_tokens": 142516032} +{"current_steps": 45290, "total_steps": 78105, "loss": 0.2315, "lr": 2.2400271167536262e-06, "epoch": 2.8993022213686706, "percentage": 57.99, "elapsed_time": "1:59:02", "remaining_time": "1:26:14", "throughput": 19956.67, "total_tokens": 142533312} +{"current_steps": 45295, "total_steps": 78105, "loss": 0.231, "lr": 2.2394714996729062e-06, "epoch": 2.8996223033096475, "percentage": 57.99, "elapsed_time": "1:59:02", "remaining_time": "1:26:13", "throughput": 19957.02, "total_tokens": 142549312} +{"current_steps": 45300, "total_steps": 78105, "loss": 0.2421, "lr": 2.2389158956016234e-06, "epoch": 2.899942385250624, "percentage": 58.0, "elapsed_time": "1:59:03", "remaining_time": "1:26:13", "throughput": 19957.33, "total_tokens": 142565184} +{"current_steps": 45305, "total_steps": 78105, "loss": 0.2441, "lr": 2.2383603045675197e-06, "epoch": 2.900262467191601, "percentage": 58.01, "elapsed_time": "1:59:04", "remaining_time": "1:26:12", "throughput": 19957.61, "total_tokens": 142580224} +{"current_steps": 45310, "total_steps": 78105, "loss": 0.151, "lr": 2.237804726598341e-06, "epoch": 2.900582549132578, "percentage": 58.01, "elapsed_time": "1:59:04", "remaining_time": "1:26:11", "throughput": 19957.93, "total_tokens": 142596288} +{"current_steps": 45315, "total_steps": 78105, "loss": 0.2503, "lr": 2.2372491617218274e-06, "epoch": 2.9009026310735546, "percentage": 58.02, "elapsed_time": "1:59:05", "remaining_time": "1:26:10", "throughput": 19958.24, "total_tokens": 142612032} +{"current_steps": 45320, "total_steps": 78105, "loss": 0.3537, "lr": 2.236693609965722e-06, "epoch": 2.9012227130145316, "percentage": 58.02, "elapsed_time": "1:59:06", "remaining_time": "1:26:09", "throughput": 19958.54, "total_tokens": 142627328} +{"current_steps": 45325, "total_steps": 78105, "loss": 0.1754, "lr": 2.236138071357766e-06, "epoch": 2.9015427949555086, "percentage": 58.03, "elapsed_time": "1:59:06", "remaining_time": "1:26:08", "throughput": 19958.86, "total_tokens": 142643264} +{"current_steps": 45330, "total_steps": 78105, "loss": 0.1899, "lr": 2.2355825459257006e-06, "epoch": 2.9018628768964856, "percentage": 58.04, "elapsed_time": "1:59:07", "remaining_time": "1:26:07", "throughput": 19959.17, "total_tokens": 142658688} +{"current_steps": 45335, "total_steps": 78105, "loss": 0.2643, "lr": 2.235027033697267e-06, "epoch": 2.9021829588374626, "percentage": 58.04, "elapsed_time": "1:59:08", "remaining_time": "1:26:07", "throughput": 19959.53, "total_tokens": 142675200} +{"current_steps": 45340, "total_steps": 78105, "loss": 0.3192, "lr": 2.2344715347002013e-06, "epoch": 2.902503040778439, "percentage": 58.05, "elapsed_time": "1:59:08", "remaining_time": "1:26:06", "throughput": 19959.95, "total_tokens": 142693120} +{"current_steps": 45345, "total_steps": 78105, "loss": 0.1802, "lr": 2.2339160489622448e-06, "epoch": 2.902823122719416, "percentage": 58.06, "elapsed_time": "1:59:09", "remaining_time": "1:26:05", "throughput": 19960.24, "total_tokens": 142708352} +{"current_steps": 45350, "total_steps": 78105, "loss": 0.1539, "lr": 2.233360576511134e-06, "epoch": 2.903143204660393, "percentage": 58.06, "elapsed_time": "1:59:10", "remaining_time": "1:26:04", "throughput": 19960.65, "total_tokens": 142725824} +{"current_steps": 45355, "total_steps": 78105, "loss": 0.2356, "lr": 2.232805117374609e-06, "epoch": 2.90346328660137, "percentage": 58.07, "elapsed_time": "1:59:11", "remaining_time": "1:26:03", "throughput": 19960.91, "total_tokens": 142741120} +{"current_steps": 45360, "total_steps": 78105, "loss": 0.2417, "lr": 2.232249671580403e-06, "epoch": 2.9037833685423466, "percentage": 58.08, "elapsed_time": "1:59:11", "remaining_time": "1:26:02", "throughput": 19961.21, "total_tokens": 142756928} +{"current_steps": 45365, "total_steps": 78105, "loss": 0.2518, "lr": 2.231694239156255e-06, "epoch": 2.9041034504833236, "percentage": 58.08, "elapsed_time": "1:59:12", "remaining_time": "1:26:01", "throughput": 19961.57, "total_tokens": 142773632} +{"current_steps": 45370, "total_steps": 78105, "loss": 0.2215, "lr": 2.231138820129898e-06, "epoch": 2.9044235324243006, "percentage": 58.09, "elapsed_time": "1:59:13", "remaining_time": "1:26:01", "throughput": 19961.88, "total_tokens": 142789056} +{"current_steps": 45375, "total_steps": 78105, "loss": 0.2013, "lr": 2.2305834145290683e-06, "epoch": 2.9047436143652776, "percentage": 58.09, "elapsed_time": "1:59:13", "remaining_time": "1:26:00", "throughput": 19962.19, "total_tokens": 142804672} +{"current_steps": 45380, "total_steps": 78105, "loss": 0.2605, "lr": 2.2300280223814994e-06, "epoch": 2.9050636963062546, "percentage": 58.1, "elapsed_time": "1:59:14", "remaining_time": "1:25:59", "throughput": 19962.59, "total_tokens": 142822016} +{"current_steps": 45385, "total_steps": 78105, "loss": 0.1667, "lr": 2.2294726437149246e-06, "epoch": 2.905383778247231, "percentage": 58.11, "elapsed_time": "1:59:15", "remaining_time": "1:25:58", "throughput": 19962.94, "total_tokens": 142838464} +{"current_steps": 45390, "total_steps": 78105, "loss": 0.2685, "lr": 2.2289172785570776e-06, "epoch": 2.905703860188208, "percentage": 58.11, "elapsed_time": "1:59:15", "remaining_time": "1:25:57", "throughput": 19963.28, "total_tokens": 142854848} +{"current_steps": 45395, "total_steps": 78105, "loss": 0.208, "lr": 2.2283619269356892e-06, "epoch": 2.906023942129185, "percentage": 58.12, "elapsed_time": "1:59:16", "remaining_time": "1:25:56", "throughput": 19963.6, "total_tokens": 142870912} +{"current_steps": 45400, "total_steps": 78105, "loss": 0.2718, "lr": 2.227806588878492e-06, "epoch": 2.906344024070162, "percentage": 58.13, "elapsed_time": "1:59:17", "remaining_time": "1:25:55", "throughput": 19963.86, "total_tokens": 142885760} +{"current_steps": 45405, "total_steps": 78105, "loss": 0.1724, "lr": 2.227251264413215e-06, "epoch": 2.9066641060111387, "percentage": 58.13, "elapsed_time": "1:59:17", "remaining_time": "1:25:55", "throughput": 19964.14, "total_tokens": 142901056} +{"current_steps": 45410, "total_steps": 78105, "loss": 0.1677, "lr": 2.2266959535675894e-06, "epoch": 2.9069841879521157, "percentage": 58.14, "elapsed_time": "1:59:18", "remaining_time": "1:25:54", "throughput": 19964.48, "total_tokens": 142917376} +{"current_steps": 45415, "total_steps": 78105, "loss": 0.3009, "lr": 2.226140656369344e-06, "epoch": 2.9073042698930927, "percentage": 58.15, "elapsed_time": "1:59:19", "remaining_time": "1:25:53", "throughput": 19964.8, "total_tokens": 142933312} +{"current_steps": 45420, "total_steps": 78105, "loss": 0.1602, "lr": 2.2255853728462086e-06, "epoch": 2.9076243518340696, "percentage": 58.15, "elapsed_time": "1:59:19", "remaining_time": "1:25:52", "throughput": 19965.14, "total_tokens": 142949440} +{"current_steps": 45425, "total_steps": 78105, "loss": 0.2475, "lr": 2.22503010302591e-06, "epoch": 2.9079444337750466, "percentage": 58.16, "elapsed_time": "1:59:20", "remaining_time": "1:25:51", "throughput": 19965.41, "total_tokens": 142964416} +{"current_steps": 45430, "total_steps": 78105, "loss": 0.1824, "lr": 2.2244748469361756e-06, "epoch": 2.908264515716023, "percentage": 58.17, "elapsed_time": "1:59:21", "remaining_time": "1:25:50", "throughput": 19965.72, "total_tokens": 142980480} +{"current_steps": 45435, "total_steps": 78105, "loss": 0.1742, "lr": 2.2239196046047315e-06, "epoch": 2.908584597657, "percentage": 58.17, "elapsed_time": "1:59:21", "remaining_time": "1:25:49", "throughput": 19966.03, "total_tokens": 142996352} +{"current_steps": 45440, "total_steps": 78105, "loss": 0.2532, "lr": 2.2233643760593047e-06, "epoch": 2.908904679597977, "percentage": 58.18, "elapsed_time": "1:59:22", "remaining_time": "1:25:48", "throughput": 19966.33, "total_tokens": 143011840} +{"current_steps": 45445, "total_steps": 78105, "loss": 0.2405, "lr": 2.222809161327621e-06, "epoch": 2.9092247615389537, "percentage": 58.18, "elapsed_time": "1:59:23", "remaining_time": "1:25:48", "throughput": 19966.64, "total_tokens": 143027712} +{"current_steps": 45450, "total_steps": 78105, "loss": 0.3159, "lr": 2.222253960437403e-06, "epoch": 2.9095448434799307, "percentage": 58.19, "elapsed_time": "1:59:24", "remaining_time": "1:25:47", "throughput": 19966.96, "total_tokens": 143043584} +{"current_steps": 45455, "total_steps": 78105, "loss": 0.3149, "lr": 2.221698773416376e-06, "epoch": 2.9098649254209077, "percentage": 58.2, "elapsed_time": "1:59:24", "remaining_time": "1:25:46", "throughput": 19967.33, "total_tokens": 143060288} +{"current_steps": 45460, "total_steps": 78105, "loss": 0.2105, "lr": 2.221143600292262e-06, "epoch": 2.9101850073618847, "percentage": 58.2, "elapsed_time": "1:59:25", "remaining_time": "1:25:45", "throughput": 19967.59, "total_tokens": 143074816} +{"current_steps": 45465, "total_steps": 78105, "loss": 0.1477, "lr": 2.2205884410927847e-06, "epoch": 2.9105050893028617, "percentage": 58.21, "elapsed_time": "1:59:26", "remaining_time": "1:25:44", "throughput": 19967.91, "total_tokens": 143090816} +{"current_steps": 45470, "total_steps": 78105, "loss": 0.1625, "lr": 2.2200332958456647e-06, "epoch": 2.9108251712438387, "percentage": 58.22, "elapsed_time": "1:59:26", "remaining_time": "1:25:43", "throughput": 19968.23, "total_tokens": 143106752} +{"current_steps": 45475, "total_steps": 78105, "loss": 0.2599, "lr": 2.2194781645786244e-06, "epoch": 2.911145253184815, "percentage": 58.22, "elapsed_time": "1:59:27", "remaining_time": "1:25:42", "throughput": 19968.66, "total_tokens": 143124672} +{"current_steps": 45480, "total_steps": 78105, "loss": 0.1547, "lr": 2.2189230473193826e-06, "epoch": 2.911465335125792, "percentage": 58.23, "elapsed_time": "1:59:28", "remaining_time": "1:25:42", "throughput": 19969.04, "total_tokens": 143141440} +{"current_steps": 45485, "total_steps": 78105, "loss": 0.2389, "lr": 2.21836794409566e-06, "epoch": 2.911785417066769, "percentage": 58.24, "elapsed_time": "1:59:28", "remaining_time": "1:25:41", "throughput": 19969.34, "total_tokens": 143157248} +{"current_steps": 45490, "total_steps": 78105, "loss": 0.2098, "lr": 2.217812854935175e-06, "epoch": 2.9121054990077457, "percentage": 58.24, "elapsed_time": "1:59:29", "remaining_time": "1:25:40", "throughput": 19969.67, "total_tokens": 143173440} +{"current_steps": 45495, "total_steps": 78105, "loss": 0.2594, "lr": 2.2172577798656467e-06, "epoch": 2.9124255809487227, "percentage": 58.25, "elapsed_time": "1:59:30", "remaining_time": "1:25:39", "throughput": 19969.98, "total_tokens": 143188992} +{"current_steps": 45500, "total_steps": 78105, "loss": 0.1787, "lr": 2.216702718914792e-06, "epoch": 2.9127456628896997, "percentage": 58.25, "elapsed_time": "1:59:30", "remaining_time": "1:25:38", "throughput": 19970.33, "total_tokens": 143205440} +{"current_steps": 45505, "total_steps": 78105, "loss": 0.1835, "lr": 2.2161476721103277e-06, "epoch": 2.9130657448306767, "percentage": 58.26, "elapsed_time": "1:59:31", "remaining_time": "1:25:37", "throughput": 19970.53, "total_tokens": 143219328} +{"current_steps": 45510, "total_steps": 78105, "loss": 0.2448, "lr": 2.2155926394799705e-06, "epoch": 2.9133858267716537, "percentage": 58.27, "elapsed_time": "1:59:32", "remaining_time": "1:25:36", "throughput": 19970.82, "total_tokens": 143234816} +{"current_steps": 45515, "total_steps": 78105, "loss": 0.3062, "lr": 2.215037621051435e-06, "epoch": 2.9137059087126307, "percentage": 58.27, "elapsed_time": "1:59:32", "remaining_time": "1:25:35", "throughput": 19971.11, "total_tokens": 143250624} +{"current_steps": 45520, "total_steps": 78105, "loss": 0.2098, "lr": 2.2144826168524368e-06, "epoch": 2.9140259906536072, "percentage": 58.28, "elapsed_time": "1:59:33", "remaining_time": "1:25:35", "throughput": 19971.4, "total_tokens": 143266112} +{"current_steps": 45525, "total_steps": 78105, "loss": 0.2695, "lr": 2.213927626910689e-06, "epoch": 2.9143460725945842, "percentage": 58.29, "elapsed_time": "1:59:34", "remaining_time": "1:25:34", "throughput": 19971.8, "total_tokens": 143283328} +{"current_steps": 45530, "total_steps": 78105, "loss": 0.194, "lr": 2.213372651253906e-06, "epoch": 2.914666154535561, "percentage": 58.29, "elapsed_time": "1:59:34", "remaining_time": "1:25:33", "throughput": 19972.07, "total_tokens": 143298176} +{"current_steps": 45535, "total_steps": 78105, "loss": 0.1273, "lr": 2.2128176899097993e-06, "epoch": 2.9149862364765378, "percentage": 58.3, "elapsed_time": "1:59:35", "remaining_time": "1:25:32", "throughput": 19972.4, "total_tokens": 143314368} +{"current_steps": 45540, "total_steps": 78105, "loss": 0.1717, "lr": 2.2122627429060823e-06, "epoch": 2.9153063184175148, "percentage": 58.31, "elapsed_time": "1:59:36", "remaining_time": "1:25:31", "throughput": 19972.64, "total_tokens": 143328960} +{"current_steps": 45545, "total_steps": 78105, "loss": 0.219, "lr": 2.211707810270464e-06, "epoch": 2.9156264003584917, "percentage": 58.31, "elapsed_time": "1:59:36", "remaining_time": "1:25:30", "throughput": 19972.9, "total_tokens": 143344256} +{"current_steps": 45550, "total_steps": 78105, "loss": 0.306, "lr": 2.211152892030656e-06, "epoch": 2.9159464822994687, "percentage": 58.32, "elapsed_time": "1:59:37", "remaining_time": "1:25:29", "throughput": 19973.24, "total_tokens": 143360896} +{"current_steps": 45555, "total_steps": 78105, "loss": 0.178, "lr": 2.2105979882143683e-06, "epoch": 2.9162665642404457, "percentage": 58.33, "elapsed_time": "1:59:38", "remaining_time": "1:25:29", "throughput": 19973.55, "total_tokens": 143376704} +{"current_steps": 45560, "total_steps": 78105, "loss": 0.2196, "lr": 2.2100430988493092e-06, "epoch": 2.9165866461814227, "percentage": 58.33, "elapsed_time": "1:59:38", "remaining_time": "1:25:28", "throughput": 19973.79, "total_tokens": 143391296} +{"current_steps": 45565, "total_steps": 78105, "loss": 0.21, "lr": 2.2094882239631886e-06, "epoch": 2.9169067281223993, "percentage": 58.34, "elapsed_time": "1:59:39", "remaining_time": "1:25:27", "throughput": 19974.12, "total_tokens": 143407488} +{"current_steps": 45570, "total_steps": 78105, "loss": 0.2053, "lr": 2.2089333635837117e-06, "epoch": 2.9172268100633763, "percentage": 58.34, "elapsed_time": "1:59:40", "remaining_time": "1:25:26", "throughput": 19974.42, "total_tokens": 143423232} +{"current_steps": 45575, "total_steps": 78105, "loss": 0.3258, "lr": 2.2083785177385875e-06, "epoch": 2.9175468920043532, "percentage": 58.35, "elapsed_time": "1:59:41", "remaining_time": "1:25:25", "throughput": 19974.84, "total_tokens": 143440704} +{"current_steps": 45580, "total_steps": 78105, "loss": 0.2386, "lr": 2.20782368645552e-06, "epoch": 2.91786697394533, "percentage": 58.36, "elapsed_time": "1:59:41", "remaining_time": "1:25:24", "throughput": 19975.08, "total_tokens": 143455680} +{"current_steps": 45585, "total_steps": 78105, "loss": 0.2883, "lr": 2.207268869762217e-06, "epoch": 2.918187055886307, "percentage": 58.36, "elapsed_time": "1:59:42", "remaining_time": "1:25:23", "throughput": 19975.33, "total_tokens": 143470528} +{"current_steps": 45590, "total_steps": 78105, "loss": 0.2332, "lr": 2.2067140676863807e-06, "epoch": 2.9185071378272838, "percentage": 58.37, "elapsed_time": "1:59:43", "remaining_time": "1:25:22", "throughput": 19975.65, "total_tokens": 143486272} +{"current_steps": 45595, "total_steps": 78105, "loss": 0.2276, "lr": 2.2061592802557174e-06, "epoch": 2.9188272197682608, "percentage": 58.38, "elapsed_time": "1:59:43", "remaining_time": "1:25:22", "throughput": 19975.93, "total_tokens": 143501888} +{"current_steps": 45600, "total_steps": 78105, "loss": 0.2908, "lr": 2.2056045074979284e-06, "epoch": 2.9191473017092378, "percentage": 58.38, "elapsed_time": "1:59:44", "remaining_time": "1:25:21", "throughput": 19976.19, "total_tokens": 143517056} +{"current_steps": 45605, "total_steps": 78105, "loss": 0.2319, "lr": 2.2050497494407168e-06, "epoch": 2.9194673836502143, "percentage": 58.39, "elapsed_time": "1:59:45", "remaining_time": "1:25:20", "throughput": 19976.43, "total_tokens": 143531904} +{"current_steps": 45610, "total_steps": 78105, "loss": 0.1832, "lr": 2.2044950061117835e-06, "epoch": 2.9197874655911913, "percentage": 58.4, "elapsed_time": "1:59:45", "remaining_time": "1:25:19", "throughput": 19976.75, "total_tokens": 143547840} +{"current_steps": 45615, "total_steps": 78105, "loss": 0.1995, "lr": 2.203940277538831e-06, "epoch": 2.9201075475321683, "percentage": 58.4, "elapsed_time": "1:59:46", "remaining_time": "1:25:18", "throughput": 19977.01, "total_tokens": 143563264} +{"current_steps": 45620, "total_steps": 78105, "loss": 0.1516, "lr": 2.20338556374956e-06, "epoch": 2.9204276294731453, "percentage": 58.41, "elapsed_time": "1:59:47", "remaining_time": "1:25:17", "throughput": 19977.29, "total_tokens": 143578304} +{"current_steps": 45625, "total_steps": 78105, "loss": 0.2949, "lr": 2.202830864771668e-06, "epoch": 2.920747711414122, "percentage": 58.41, "elapsed_time": "1:59:47", "remaining_time": "1:25:16", "throughput": 19977.59, "total_tokens": 143593984} +{"current_steps": 45630, "total_steps": 78105, "loss": 0.1497, "lr": 2.2022761806328552e-06, "epoch": 2.921067793355099, "percentage": 58.42, "elapsed_time": "1:59:48", "remaining_time": "1:25:16", "throughput": 19977.92, "total_tokens": 143610112} +{"current_steps": 45635, "total_steps": 78105, "loss": 0.3329, "lr": 2.2017215113608185e-06, "epoch": 2.921387875296076, "percentage": 58.43, "elapsed_time": "1:59:49", "remaining_time": "1:25:15", "throughput": 19978.27, "total_tokens": 143627200} +{"current_steps": 45640, "total_steps": 78105, "loss": 0.2323, "lr": 2.201166856983256e-06, "epoch": 2.921707957237053, "percentage": 58.43, "elapsed_time": "1:59:49", "remaining_time": "1:25:14", "throughput": 19978.58, "total_tokens": 143642816} +{"current_steps": 45645, "total_steps": 78105, "loss": 0.1793, "lr": 2.200612217527864e-06, "epoch": 2.92202803917803, "percentage": 58.44, "elapsed_time": "1:59:50", "remaining_time": "1:25:13", "throughput": 19978.85, "total_tokens": 143658240} +{"current_steps": 45650, "total_steps": 78105, "loss": 0.1993, "lr": 2.2000575930223394e-06, "epoch": 2.9223481211190063, "percentage": 58.45, "elapsed_time": "1:59:51", "remaining_time": "1:25:12", "throughput": 19979.24, "total_tokens": 143675520} +{"current_steps": 45655, "total_steps": 78105, "loss": 0.2178, "lr": 2.199502983494375e-06, "epoch": 2.9226682030599833, "percentage": 58.45, "elapsed_time": "1:59:51", "remaining_time": "1:25:11", "throughput": 19979.5, "total_tokens": 143690112} +{"current_steps": 45660, "total_steps": 78105, "loss": 0.1915, "lr": 2.198948388971667e-06, "epoch": 2.9229882850009603, "percentage": 58.46, "elapsed_time": "1:59:52", "remaining_time": "1:25:10", "throughput": 19979.74, "total_tokens": 143704832} +{"current_steps": 45665, "total_steps": 78105, "loss": 0.2821, "lr": 2.198393809481908e-06, "epoch": 2.9233083669419373, "percentage": 58.47, "elapsed_time": "1:59:53", "remaining_time": "1:25:09", "throughput": 19980.02, "total_tokens": 143719808} +{"current_steps": 45670, "total_steps": 78105, "loss": 0.2764, "lr": 2.197839245052791e-06, "epoch": 2.923628448882914, "percentage": 58.47, "elapsed_time": "1:59:53", "remaining_time": "1:25:09", "throughput": 19980.34, "total_tokens": 143735616} +{"current_steps": 45675, "total_steps": 78105, "loss": 0.2563, "lr": 2.197284695712009e-06, "epoch": 2.923948530823891, "percentage": 58.48, "elapsed_time": "1:59:54", "remaining_time": "1:25:08", "throughput": 19980.71, "total_tokens": 143752448} +{"current_steps": 45680, "total_steps": 78105, "loss": 0.2517, "lr": 2.1967301614872516e-06, "epoch": 2.924268612764868, "percentage": 58.49, "elapsed_time": "1:59:55", "remaining_time": "1:25:07", "throughput": 19981.03, "total_tokens": 143768384} +{"current_steps": 45685, "total_steps": 78105, "loss": 0.2312, "lr": 2.1961756424062114e-06, "epoch": 2.924588694705845, "percentage": 58.49, "elapsed_time": "1:59:55", "remaining_time": "1:25:06", "throughput": 19981.3, "total_tokens": 143783296} +{"current_steps": 45690, "total_steps": 78105, "loss": 0.2144, "lr": 2.195621138496576e-06, "epoch": 2.924908776646822, "percentage": 58.5, "elapsed_time": "1:59:56", "remaining_time": "1:25:05", "throughput": 19981.61, "total_tokens": 143799360} +{"current_steps": 45695, "total_steps": 78105, "loss": 0.2718, "lr": 2.195066649786036e-06, "epoch": 2.9252288585877984, "percentage": 58.5, "elapsed_time": "1:59:57", "remaining_time": "1:25:04", "throughput": 19981.86, "total_tokens": 143814144} +{"current_steps": 45700, "total_steps": 78105, "loss": 0.285, "lr": 2.1945121763022786e-06, "epoch": 2.9255489405287753, "percentage": 58.51, "elapsed_time": "1:59:57", "remaining_time": "1:25:03", "throughput": 19982.14, "total_tokens": 143828992} +{"current_steps": 45705, "total_steps": 78105, "loss": 0.1564, "lr": 2.193957718072993e-06, "epoch": 2.9258690224697523, "percentage": 58.52, "elapsed_time": "1:59:58", "remaining_time": "1:25:03", "throughput": 19982.43, "total_tokens": 143844672} +{"current_steps": 45710, "total_steps": 78105, "loss": 0.2277, "lr": 2.1934032751258638e-06, "epoch": 2.926189104410729, "percentage": 58.52, "elapsed_time": "1:59:59", "remaining_time": "1:25:02", "throughput": 19982.73, "total_tokens": 143860416} +{"current_steps": 45715, "total_steps": 78105, "loss": 0.2406, "lr": 2.1928488474885786e-06, "epoch": 2.926509186351706, "percentage": 58.53, "elapsed_time": "1:59:59", "remaining_time": "1:25:01", "throughput": 19983.0, "total_tokens": 143875520} +{"current_steps": 45720, "total_steps": 78105, "loss": 0.2056, "lr": 2.1922944351888214e-06, "epoch": 2.926829268292683, "percentage": 58.54, "elapsed_time": "2:00:00", "remaining_time": "1:25:00", "throughput": 19983.27, "total_tokens": 143891264} +{"current_steps": 45725, "total_steps": 78105, "loss": 0.2964, "lr": 2.1917400382542783e-06, "epoch": 2.92714935023366, "percentage": 58.54, "elapsed_time": "2:00:01", "remaining_time": "1:24:59", "throughput": 19983.54, "total_tokens": 143906240} +{"current_steps": 45730, "total_steps": 78105, "loss": 0.2918, "lr": 2.191185656712631e-06, "epoch": 2.927469432174637, "percentage": 58.55, "elapsed_time": "2:00:01", "remaining_time": "1:24:58", "throughput": 19983.86, "total_tokens": 143922240} +{"current_steps": 45735, "total_steps": 78105, "loss": 0.2193, "lr": 2.190631290591564e-06, "epoch": 2.927789514115614, "percentage": 58.56, "elapsed_time": "2:00:02", "remaining_time": "1:24:57", "throughput": 19984.16, "total_tokens": 143938112} +{"current_steps": 45740, "total_steps": 78105, "loss": 0.2415, "lr": 2.190076939918759e-06, "epoch": 2.9281095960565904, "percentage": 58.56, "elapsed_time": "2:00:03", "remaining_time": "1:24:56", "throughput": 19984.48, "total_tokens": 143954176} +{"current_steps": 45745, "total_steps": 78105, "loss": 0.3458, "lr": 2.189522604721897e-06, "epoch": 2.9284296779975674, "percentage": 58.57, "elapsed_time": "2:00:03", "remaining_time": "1:24:56", "throughput": 19984.78, "total_tokens": 143969856} +{"current_steps": 45750, "total_steps": 78105, "loss": 0.2386, "lr": 2.188968285028659e-06, "epoch": 2.9287497599385444, "percentage": 58.57, "elapsed_time": "2:00:04", "remaining_time": "1:24:55", "throughput": 19985.04, "total_tokens": 143984768} +{"current_steps": 45755, "total_steps": 78105, "loss": 0.3346, "lr": 2.1884139808667247e-06, "epoch": 2.929069841879521, "percentage": 58.58, "elapsed_time": "2:00:05", "remaining_time": "1:24:54", "throughput": 19985.33, "total_tokens": 144000128} +{"current_steps": 45760, "total_steps": 78105, "loss": 0.1727, "lr": 2.187859692263774e-06, "epoch": 2.929389923820498, "percentage": 58.59, "elapsed_time": "2:00:05", "remaining_time": "1:24:53", "throughput": 19985.6, "total_tokens": 144015360} +{"current_steps": 45765, "total_steps": 78105, "loss": 0.2571, "lr": 2.187305419247483e-06, "epoch": 2.929710005761475, "percentage": 58.59, "elapsed_time": "2:00:06", "remaining_time": "1:24:52", "throughput": 19985.88, "total_tokens": 144030656} +{"current_steps": 45770, "total_steps": 78105, "loss": 0.2338, "lr": 2.186751161845532e-06, "epoch": 2.930030087702452, "percentage": 58.6, "elapsed_time": "2:00:07", "remaining_time": "1:24:51", "throughput": 19986.2, "total_tokens": 144046272} +{"current_steps": 45775, "total_steps": 78105, "loss": 0.2016, "lr": 2.1861969200855954e-06, "epoch": 2.930350169643429, "percentage": 58.61, "elapsed_time": "2:00:07", "remaining_time": "1:24:50", "throughput": 19986.48, "total_tokens": 144061632} +{"current_steps": 45780, "total_steps": 78105, "loss": 0.1527, "lr": 2.1856426939953506e-06, "epoch": 2.930670251584406, "percentage": 58.61, "elapsed_time": "2:00:08", "remaining_time": "1:24:49", "throughput": 19986.77, "total_tokens": 144077248} +{"current_steps": 45785, "total_steps": 78105, "loss": 0.2234, "lr": 2.185088483602471e-06, "epoch": 2.9309903335253824, "percentage": 58.62, "elapsed_time": "2:00:09", "remaining_time": "1:24:49", "throughput": 19987.14, "total_tokens": 144094080} +{"current_steps": 45790, "total_steps": 78105, "loss": 0.1364, "lr": 2.184534288934633e-06, "epoch": 2.9313104154663594, "percentage": 58.63, "elapsed_time": "2:00:10", "remaining_time": "1:24:48", "throughput": 19987.43, "total_tokens": 144109440} +{"current_steps": 45795, "total_steps": 78105, "loss": 0.1791, "lr": 2.18398011001951e-06, "epoch": 2.9316304974073364, "percentage": 58.63, "elapsed_time": "2:00:10", "remaining_time": "1:24:47", "throughput": 19987.69, "total_tokens": 144124416} +{"current_steps": 45800, "total_steps": 78105, "loss": 0.2824, "lr": 2.1834259468847734e-06, "epoch": 2.931950579348313, "percentage": 58.64, "elapsed_time": "2:00:11", "remaining_time": "1:24:46", "throughput": 19987.94, "total_tokens": 144139136} +{"current_steps": 45805, "total_steps": 78105, "loss": 0.2023, "lr": 2.1828717995580966e-06, "epoch": 2.93227066128929, "percentage": 58.65, "elapsed_time": "2:00:12", "remaining_time": "1:24:45", "throughput": 19988.29, "total_tokens": 144155712} +{"current_steps": 45810, "total_steps": 78105, "loss": 0.2327, "lr": 2.1823176680671498e-06, "epoch": 2.932590743230267, "percentage": 58.65, "elapsed_time": "2:00:12", "remaining_time": "1:24:44", "throughput": 19988.6, "total_tokens": 144171264} +{"current_steps": 45815, "total_steps": 78105, "loss": 0.1862, "lr": 2.1817635524396045e-06, "epoch": 2.932910825171244, "percentage": 58.66, "elapsed_time": "2:00:13", "remaining_time": "1:24:43", "throughput": 19988.98, "total_tokens": 144188160} +{"current_steps": 45820, "total_steps": 78105, "loss": 0.16, "lr": 2.181209452703129e-06, "epoch": 2.933230907112221, "percentage": 58.66, "elapsed_time": "2:00:14", "remaining_time": "1:24:43", "throughput": 19989.24, "total_tokens": 144202688} +{"current_steps": 45825, "total_steps": 78105, "loss": 0.2288, "lr": 2.1806553688853942e-06, "epoch": 2.933550989053198, "percentage": 58.67, "elapsed_time": "2:00:14", "remaining_time": "1:24:42", "throughput": 19989.52, "total_tokens": 144218048} +{"current_steps": 45830, "total_steps": 78105, "loss": 0.2404, "lr": 2.180101301014066e-06, "epoch": 2.9338710709941744, "percentage": 58.68, "elapsed_time": "2:00:15", "remaining_time": "1:24:41", "throughput": 19989.83, "total_tokens": 144234304} +{"current_steps": 45835, "total_steps": 78105, "loss": 0.2155, "lr": 2.1795472491168124e-06, "epoch": 2.9341911529351514, "percentage": 58.68, "elapsed_time": "2:00:16", "remaining_time": "1:24:40", "throughput": 19990.14, "total_tokens": 144249856} +{"current_steps": 45840, "total_steps": 78105, "loss": 0.265, "lr": 2.1789932132212997e-06, "epoch": 2.9345112348761284, "percentage": 58.69, "elapsed_time": "2:00:16", "remaining_time": "1:24:39", "throughput": 19990.46, "total_tokens": 144265920} +{"current_steps": 45845, "total_steps": 78105, "loss": 0.1973, "lr": 2.178439193355194e-06, "epoch": 2.934831316817105, "percentage": 58.7, "elapsed_time": "2:00:17", "remaining_time": "1:24:38", "throughput": 19990.77, "total_tokens": 144282048} +{"current_steps": 45850, "total_steps": 78105, "loss": 0.2021, "lr": 2.1778851895461612e-06, "epoch": 2.935151398758082, "percentage": 58.7, "elapsed_time": "2:00:18", "remaining_time": "1:24:37", "throughput": 19991.03, "total_tokens": 144296960} +{"current_steps": 45855, "total_steps": 78105, "loss": 0.1903, "lr": 2.177331201821863e-06, "epoch": 2.935471480699059, "percentage": 58.71, "elapsed_time": "2:00:18", "remaining_time": "1:24:36", "throughput": 19991.34, "total_tokens": 144312896} +{"current_steps": 45860, "total_steps": 78105, "loss": 0.1487, "lr": 2.1767772302099645e-06, "epoch": 2.935791562640036, "percentage": 58.72, "elapsed_time": "2:00:19", "remaining_time": "1:24:36", "throughput": 19991.63, "total_tokens": 144328448} +{"current_steps": 45865, "total_steps": 78105, "loss": 0.1638, "lr": 2.176223274738127e-06, "epoch": 2.936111644581013, "percentage": 58.72, "elapsed_time": "2:00:20", "remaining_time": "1:24:35", "throughput": 19992.01, "total_tokens": 144345344} +{"current_steps": 45870, "total_steps": 78105, "loss": 0.2904, "lr": 2.175669335434013e-06, "epoch": 2.9364317265219895, "percentage": 58.73, "elapsed_time": "2:00:20", "remaining_time": "1:24:34", "throughput": 19992.29, "total_tokens": 144360320} +{"current_steps": 45875, "total_steps": 78105, "loss": 0.1791, "lr": 2.175115412325283e-06, "epoch": 2.9367518084629665, "percentage": 58.74, "elapsed_time": "2:00:21", "remaining_time": "1:24:33", "throughput": 19992.65, "total_tokens": 144376640} +{"current_steps": 45880, "total_steps": 78105, "loss": 0.2334, "lr": 2.1745615054395973e-06, "epoch": 2.9370718904039435, "percentage": 58.74, "elapsed_time": "2:00:22", "remaining_time": "1:24:32", "throughput": 19992.93, "total_tokens": 144392064} +{"current_steps": 45885, "total_steps": 78105, "loss": 0.1733, "lr": 2.174007614804614e-06, "epoch": 2.9373919723449204, "percentage": 58.75, "elapsed_time": "2:00:22", "remaining_time": "1:24:31", "throughput": 19993.31, "total_tokens": 144409216} +{"current_steps": 45890, "total_steps": 78105, "loss": 0.2628, "lr": 2.173453740447993e-06, "epoch": 2.937712054285897, "percentage": 58.75, "elapsed_time": "2:00:23", "remaining_time": "1:24:30", "throughput": 19993.6, "total_tokens": 144424832} +{"current_steps": 45895, "total_steps": 78105, "loss": 0.1869, "lr": 2.1728998823973908e-06, "epoch": 2.938032136226874, "percentage": 58.76, "elapsed_time": "2:00:24", "remaining_time": "1:24:30", "throughput": 19993.94, "total_tokens": 144441408} +{"current_steps": 45900, "total_steps": 78105, "loss": 0.2113, "lr": 2.1723460406804657e-06, "epoch": 2.938352218167851, "percentage": 58.77, "elapsed_time": "2:00:24", "remaining_time": "1:24:29", "throughput": 19994.19, "total_tokens": 144456000} +{"current_steps": 45905, "total_steps": 78105, "loss": 0.2059, "lr": 2.171792215324872e-06, "epoch": 2.938672300108828, "percentage": 58.77, "elapsed_time": "2:00:25", "remaining_time": "1:24:28", "throughput": 19994.54, "total_tokens": 144472576} +{"current_steps": 45910, "total_steps": 78105, "loss": 0.1148, "lr": 2.1712384063582647e-06, "epoch": 2.938992382049805, "percentage": 58.78, "elapsed_time": "2:00:26", "remaining_time": "1:24:27", "throughput": 19994.85, "total_tokens": 144488704} +{"current_steps": 45915, "total_steps": 78105, "loss": 0.2741, "lr": 2.170684613808301e-06, "epoch": 2.9393124639907815, "percentage": 58.79, "elapsed_time": "2:00:26", "remaining_time": "1:24:26", "throughput": 19995.12, "total_tokens": 144504000} +{"current_steps": 45920, "total_steps": 78105, "loss": 0.1434, "lr": 2.1701308377026305e-06, "epoch": 2.9396325459317585, "percentage": 58.79, "elapsed_time": "2:00:27", "remaining_time": "1:24:25", "throughput": 19995.38, "total_tokens": 144518848} +{"current_steps": 45925, "total_steps": 78105, "loss": 0.3417, "lr": 2.1695770780689083e-06, "epoch": 2.9399526278727355, "percentage": 58.8, "elapsed_time": "2:00:28", "remaining_time": "1:24:24", "throughput": 19995.71, "total_tokens": 144535424} +{"current_steps": 45930, "total_steps": 78105, "loss": 0.1506, "lr": 2.169023334934785e-06, "epoch": 2.9402727098137125, "percentage": 58.81, "elapsed_time": "2:00:28", "remaining_time": "1:24:24", "throughput": 19995.99, "total_tokens": 144550656} +{"current_steps": 45935, "total_steps": 78105, "loss": 0.1679, "lr": 2.1684696083279143e-06, "epoch": 2.940592791754689, "percentage": 58.81, "elapsed_time": "2:00:29", "remaining_time": "1:24:23", "throughput": 19996.22, "total_tokens": 144565312} +{"current_steps": 45940, "total_steps": 78105, "loss": 0.2042, "lr": 2.1679158982759425e-06, "epoch": 2.940912873695666, "percentage": 58.82, "elapsed_time": "2:00:30", "remaining_time": "1:24:22", "throughput": 19996.55, "total_tokens": 144581504} +{"current_steps": 45945, "total_steps": 78105, "loss": 0.2542, "lr": 2.1673622048065222e-06, "epoch": 2.941232955636643, "percentage": 58.82, "elapsed_time": "2:00:31", "remaining_time": "1:24:21", "throughput": 19996.87, "total_tokens": 144597568} +{"current_steps": 45950, "total_steps": 78105, "loss": 0.2439, "lr": 2.1668085279473e-06, "epoch": 2.94155303757762, "percentage": 58.83, "elapsed_time": "2:00:31", "remaining_time": "1:24:20", "throughput": 19997.13, "total_tokens": 144612544} +{"current_steps": 45955, "total_steps": 78105, "loss": 0.2301, "lr": 2.1662548677259252e-06, "epoch": 2.941873119518597, "percentage": 58.84, "elapsed_time": "2:00:32", "remaining_time": "1:24:19", "throughput": 19997.38, "total_tokens": 144627776} +{"current_steps": 45960, "total_steps": 78105, "loss": 0.1665, "lr": 2.1657012241700428e-06, "epoch": 2.9421932014595735, "percentage": 58.84, "elapsed_time": "2:00:33", "remaining_time": "1:24:18", "throughput": 19997.7, "total_tokens": 144643712} +{"current_steps": 45965, "total_steps": 78105, "loss": 0.3135, "lr": 2.1651475973073e-06, "epoch": 2.9425132834005505, "percentage": 58.85, "elapsed_time": "2:00:33", "remaining_time": "1:24:17", "throughput": 19997.95, "total_tokens": 144658688} +{"current_steps": 45970, "total_steps": 78105, "loss": 0.2947, "lr": 2.1645939871653433e-06, "epoch": 2.9428333653415275, "percentage": 58.86, "elapsed_time": "2:00:34", "remaining_time": "1:24:17", "throughput": 19998.27, "total_tokens": 144674752} +{"current_steps": 45975, "total_steps": 78105, "loss": 0.1797, "lr": 2.1640403937718146e-06, "epoch": 2.943153447282504, "percentage": 58.86, "elapsed_time": "2:00:35", "remaining_time": "1:24:16", "throughput": 19998.61, "total_tokens": 144690688} +{"current_steps": 45980, "total_steps": 78105, "loss": 0.3034, "lr": 2.1634868171543595e-06, "epoch": 2.943473529223481, "percentage": 58.87, "elapsed_time": "2:00:35", "remaining_time": "1:24:15", "throughput": 19998.92, "total_tokens": 144706816} +{"current_steps": 45985, "total_steps": 78105, "loss": 0.1714, "lr": 2.1629332573406194e-06, "epoch": 2.943793611164458, "percentage": 58.88, "elapsed_time": "2:00:36", "remaining_time": "1:24:14", "throughput": 19999.23, "total_tokens": 144722752} +{"current_steps": 45990, "total_steps": 78105, "loss": 0.2672, "lr": 2.1623797143582375e-06, "epoch": 2.944113693105435, "percentage": 58.88, "elapsed_time": "2:00:37", "remaining_time": "1:24:13", "throughput": 19999.57, "total_tokens": 144739200} +{"current_steps": 45995, "total_steps": 78105, "loss": 0.2714, "lr": 2.161826188234853e-06, "epoch": 2.944433775046412, "percentage": 58.89, "elapsed_time": "2:00:37", "remaining_time": "1:24:12", "throughput": 19999.84, "total_tokens": 144754432} +{"current_steps": 46000, "total_steps": 78105, "loss": 0.2298, "lr": 2.161272678998108e-06, "epoch": 2.944753856987389, "percentage": 58.9, "elapsed_time": "2:00:38", "remaining_time": "1:24:11", "throughput": 20000.12, "total_tokens": 144769600} +{"current_steps": 46005, "total_steps": 78105, "loss": 0.2261, "lr": 2.1607191866756403e-06, "epoch": 2.9450739389283656, "percentage": 58.9, "elapsed_time": "2:00:39", "remaining_time": "1:24:11", "throughput": 20000.42, "total_tokens": 144785152} +{"current_steps": 46010, "total_steps": 78105, "loss": 0.2334, "lr": 2.1601657112950897e-06, "epoch": 2.9453940208693425, "percentage": 58.91, "elapsed_time": "2:00:39", "remaining_time": "1:24:10", "throughput": 20000.74, "total_tokens": 144801088} +{"current_steps": 46015, "total_steps": 78105, "loss": 0.2539, "lr": 2.1596122528840928e-06, "epoch": 2.9457141028103195, "percentage": 58.91, "elapsed_time": "2:00:40", "remaining_time": "1:24:09", "throughput": 20001.13, "total_tokens": 144818560} +{"current_steps": 46020, "total_steps": 78105, "loss": 0.278, "lr": 2.159058811470287e-06, "epoch": 2.946034184751296, "percentage": 58.92, "elapsed_time": "2:00:41", "remaining_time": "1:24:08", "throughput": 20001.36, "total_tokens": 144832896} +{"current_steps": 46025, "total_steps": 78105, "loss": 0.1882, "lr": 2.158505387081309e-06, "epoch": 2.946354266692273, "percentage": 58.93, "elapsed_time": "2:00:41", "remaining_time": "1:24:07", "throughput": 20001.69, "total_tokens": 144848576} +{"current_steps": 46030, "total_steps": 78105, "loss": 0.3166, "lr": 2.1579519797447926e-06, "epoch": 2.94667434863325, "percentage": 58.93, "elapsed_time": "2:00:42", "remaining_time": "1:24:06", "throughput": 20001.98, "total_tokens": 144864000} +{"current_steps": 46035, "total_steps": 78105, "loss": 0.1769, "lr": 2.157398589488373e-06, "epoch": 2.946994430574227, "percentage": 58.94, "elapsed_time": "2:00:43", "remaining_time": "1:24:05", "throughput": 20002.24, "total_tokens": 144879232} +{"current_steps": 46040, "total_steps": 78105, "loss": 0.2262, "lr": 2.1568452163396826e-06, "epoch": 2.947314512515204, "percentage": 58.95, "elapsed_time": "2:00:43", "remaining_time": "1:24:05", "throughput": 20002.55, "total_tokens": 144895104} +{"current_steps": 46045, "total_steps": 78105, "loss": 0.2183, "lr": 2.156291860326356e-06, "epoch": 2.947634594456181, "percentage": 58.95, "elapsed_time": "2:00:44", "remaining_time": "1:24:04", "throughput": 20002.81, "total_tokens": 144909632} +{"current_steps": 46050, "total_steps": 78105, "loss": 0.2074, "lr": 2.155738521476022e-06, "epoch": 2.9479546763971576, "percentage": 58.96, "elapsed_time": "2:00:45", "remaining_time": "1:24:03", "throughput": 20003.1, "total_tokens": 144924992} +{"current_steps": 46055, "total_steps": 78105, "loss": 0.1439, "lr": 2.155185199816315e-06, "epoch": 2.9482747583381346, "percentage": 58.97, "elapsed_time": "2:00:45", "remaining_time": "1:24:02", "throughput": 20003.37, "total_tokens": 144940032} +{"current_steps": 46060, "total_steps": 78105, "loss": 0.2487, "lr": 2.1546318953748615e-06, "epoch": 2.9485948402791116, "percentage": 58.97, "elapsed_time": "2:00:46", "remaining_time": "1:24:01", "throughput": 20003.67, "total_tokens": 144955392} +{"current_steps": 46065, "total_steps": 78105, "loss": 0.1977, "lr": 2.154078608179293e-06, "epoch": 2.948914922220088, "percentage": 58.98, "elapsed_time": "2:00:47", "remaining_time": "1:24:00", "throughput": 20003.96, "total_tokens": 144970880} +{"current_steps": 46070, "total_steps": 78105, "loss": 0.2199, "lr": 2.153525338257236e-06, "epoch": 2.949235004161065, "percentage": 58.98, "elapsed_time": "2:00:47", "remaining_time": "1:23:59", "throughput": 20004.36, "total_tokens": 144988224} +{"current_steps": 46075, "total_steps": 78105, "loss": 0.2367, "lr": 2.15297208563632e-06, "epoch": 2.949555086102042, "percentage": 58.99, "elapsed_time": "2:00:48", "remaining_time": "1:23:58", "throughput": 20004.63, "total_tokens": 145003648} +{"current_steps": 46080, "total_steps": 78105, "loss": 0.2339, "lr": 2.1524188503441697e-06, "epoch": 2.949875168043019, "percentage": 59.0, "elapsed_time": "2:00:49", "remaining_time": "1:23:58", "throughput": 20005.07, "total_tokens": 145021952} +{"current_steps": 46085, "total_steps": 78105, "loss": 0.2185, "lr": 2.1518656324084113e-06, "epoch": 2.950195249983996, "percentage": 59.0, "elapsed_time": "2:00:49", "remaining_time": "1:23:57", "throughput": 20005.34, "total_tokens": 145036992} +{"current_steps": 46090, "total_steps": 78105, "loss": 0.2829, "lr": 2.1513124318566704e-06, "epoch": 2.950515331924973, "percentage": 59.01, "elapsed_time": "2:00:50", "remaining_time": "1:23:56", "throughput": 20005.6, "total_tokens": 145052224} +{"current_steps": 46095, "total_steps": 78105, "loss": 0.2023, "lr": 2.15075924871657e-06, "epoch": 2.9508354138659496, "percentage": 59.02, "elapsed_time": "2:00:51", "remaining_time": "1:23:55", "throughput": 20005.87, "total_tokens": 145067072} +{"current_steps": 46100, "total_steps": 78105, "loss": 0.2653, "lr": 2.1502060830157344e-06, "epoch": 2.9511554958069266, "percentage": 59.02, "elapsed_time": "2:00:51", "remaining_time": "1:23:54", "throughput": 20006.14, "total_tokens": 145082880} +{"current_steps": 46105, "total_steps": 78105, "loss": 0.279, "lr": 2.149652934781784e-06, "epoch": 2.9514755777479036, "percentage": 59.03, "elapsed_time": "2:00:52", "remaining_time": "1:23:53", "throughput": 20006.43, "total_tokens": 145098368} +{"current_steps": 46110, "total_steps": 78105, "loss": 0.1865, "lr": 2.1490998040423424e-06, "epoch": 2.95179565968888, "percentage": 59.04, "elapsed_time": "2:00:53", "remaining_time": "1:23:52", "throughput": 20006.76, "total_tokens": 145114368} +{"current_steps": 46115, "total_steps": 78105, "loss": 0.246, "lr": 2.1485466908250275e-06, "epoch": 2.952115741629857, "percentage": 59.04, "elapsed_time": "2:00:53", "remaining_time": "1:23:52", "throughput": 20007.03, "total_tokens": 145129664} +{"current_steps": 46120, "total_steps": 78105, "loss": 0.1757, "lr": 2.1479935951574606e-06, "epoch": 2.952435823570834, "percentage": 59.05, "elapsed_time": "2:00:54", "remaining_time": "1:23:51", "throughput": 20007.34, "total_tokens": 145145536} +{"current_steps": 46125, "total_steps": 78105, "loss": 0.2171, "lr": 2.14744051706726e-06, "epoch": 2.952755905511811, "percentage": 59.06, "elapsed_time": "2:00:55", "remaining_time": "1:23:50", "throughput": 20007.65, "total_tokens": 145161664} +{"current_steps": 46130, "total_steps": 78105, "loss": 0.2985, "lr": 2.1468874565820446e-06, "epoch": 2.953075987452788, "percentage": 59.06, "elapsed_time": "2:00:55", "remaining_time": "1:23:49", "throughput": 20007.94, "total_tokens": 145177024} +{"current_steps": 46135, "total_steps": 78105, "loss": 0.3048, "lr": 2.1463344137294287e-06, "epoch": 2.9533960693937646, "percentage": 59.07, "elapsed_time": "2:00:56", "remaining_time": "1:23:48", "throughput": 20008.22, "total_tokens": 145192384} +{"current_steps": 46140, "total_steps": 78105, "loss": 0.2254, "lr": 2.1457813885370304e-06, "epoch": 2.9537161513347416, "percentage": 59.07, "elapsed_time": "2:00:57", "remaining_time": "1:23:47", "throughput": 20008.54, "total_tokens": 145208320} +{"current_steps": 46145, "total_steps": 78105, "loss": 0.2513, "lr": 2.1452283810324657e-06, "epoch": 2.9540362332757186, "percentage": 59.08, "elapsed_time": "2:00:57", "remaining_time": "1:23:46", "throughput": 20008.84, "total_tokens": 145223808} +{"current_steps": 46150, "total_steps": 78105, "loss": 0.2454, "lr": 2.1446753912433464e-06, "epoch": 2.9543563152166956, "percentage": 59.09, "elapsed_time": "2:00:58", "remaining_time": "1:23:46", "throughput": 20009.12, "total_tokens": 145239104} +{"current_steps": 46155, "total_steps": 78105, "loss": 0.3205, "lr": 2.1441224191972876e-06, "epoch": 2.954676397157672, "percentage": 59.09, "elapsed_time": "2:00:59", "remaining_time": "1:23:45", "throughput": 20009.37, "total_tokens": 145254080} +{"current_steps": 46160, "total_steps": 78105, "loss": 0.2263, "lr": 2.1435694649219012e-06, "epoch": 2.954996479098649, "percentage": 59.1, "elapsed_time": "2:00:59", "remaining_time": "1:23:44", "throughput": 20009.72, "total_tokens": 145270464} +{"current_steps": 46165, "total_steps": 78105, "loss": 0.2373, "lr": 2.1430165284448006e-06, "epoch": 2.955316561039626, "percentage": 59.11, "elapsed_time": "2:01:00", "remaining_time": "1:23:43", "throughput": 20010.02, "total_tokens": 145285888} +{"current_steps": 46170, "total_steps": 78105, "loss": 0.2568, "lr": 2.1424636097935935e-06, "epoch": 2.955636642980603, "percentage": 59.11, "elapsed_time": "2:01:01", "remaining_time": "1:23:42", "throughput": 20010.3, "total_tokens": 145301056} +{"current_steps": 46175, "total_steps": 78105, "loss": 0.1594, "lr": 2.141910708995892e-06, "epoch": 2.95595672492158, "percentage": 59.12, "elapsed_time": "2:01:02", "remaining_time": "1:23:41", "throughput": 20010.65, "total_tokens": 145317504} +{"current_steps": 46180, "total_steps": 78105, "loss": 0.2354, "lr": 2.141357826079304e-06, "epoch": 2.9562768068625567, "percentage": 59.13, "elapsed_time": "2:01:02", "remaining_time": "1:23:40", "throughput": 20010.98, "total_tokens": 145333696} +{"current_steps": 46185, "total_steps": 78105, "loss": 0.1758, "lr": 2.140804961071439e-06, "epoch": 2.9565968888035337, "percentage": 59.13, "elapsed_time": "2:01:03", "remaining_time": "1:23:39", "throughput": 20011.27, "total_tokens": 145349312} +{"current_steps": 46190, "total_steps": 78105, "loss": 0.158, "lr": 2.1402521139999024e-06, "epoch": 2.9569169707445107, "percentage": 59.14, "elapsed_time": "2:01:04", "remaining_time": "1:23:39", "throughput": 20011.53, "total_tokens": 145364416} +{"current_steps": 46195, "total_steps": 78105, "loss": 0.2748, "lr": 2.1396992848923013e-06, "epoch": 2.9572370526854876, "percentage": 59.14, "elapsed_time": "2:01:04", "remaining_time": "1:23:38", "throughput": 20011.89, "total_tokens": 145381376} +{"current_steps": 46200, "total_steps": 78105, "loss": 0.2563, "lr": 2.1391464737762425e-06, "epoch": 2.957557134626464, "percentage": 59.15, "elapsed_time": "2:01:05", "remaining_time": "1:23:37", "throughput": 20012.21, "total_tokens": 145397568} +{"current_steps": 46205, "total_steps": 78105, "loss": 0.3571, "lr": 2.1385936806793277e-06, "epoch": 2.957877216567441, "percentage": 59.16, "elapsed_time": "2:01:06", "remaining_time": "1:23:36", "throughput": 20012.5, "total_tokens": 145412800} +{"current_steps": 46210, "total_steps": 78105, "loss": 0.169, "lr": 2.1380409056291627e-06, "epoch": 2.958197298508418, "percentage": 59.16, "elapsed_time": "2:01:06", "remaining_time": "1:23:35", "throughput": 20012.89, "total_tokens": 145429952} +{"current_steps": 46215, "total_steps": 78105, "loss": 0.2084, "lr": 2.1374881486533488e-06, "epoch": 2.958517380449395, "percentage": 59.17, "elapsed_time": "2:01:07", "remaining_time": "1:23:34", "throughput": 20013.3, "total_tokens": 145447296} +{"current_steps": 46220, "total_steps": 78105, "loss": 0.2288, "lr": 2.13693540977949e-06, "epoch": 2.958837462390372, "percentage": 59.18, "elapsed_time": "2:01:08", "remaining_time": "1:23:34", "throughput": 20013.73, "total_tokens": 145465216} +{"current_steps": 46225, "total_steps": 78105, "loss": 0.2945, "lr": 2.1363826890351842e-06, "epoch": 2.9591575443313487, "percentage": 59.18, "elapsed_time": "2:01:08", "remaining_time": "1:23:33", "throughput": 20014.01, "total_tokens": 145480640} +{"current_steps": 46230, "total_steps": 78105, "loss": 0.3031, "lr": 2.1358299864480344e-06, "epoch": 2.9594776262723257, "percentage": 59.19, "elapsed_time": "2:01:09", "remaining_time": "1:23:32", "throughput": 20014.3, "total_tokens": 145496000} +{"current_steps": 46235, "total_steps": 78105, "loss": 0.2104, "lr": 2.135277302045636e-06, "epoch": 2.9597977082133027, "percentage": 59.2, "elapsed_time": "2:01:10", "remaining_time": "1:23:31", "throughput": 20014.65, "total_tokens": 145512512} +{"current_steps": 46240, "total_steps": 78105, "loss": 0.168, "lr": 2.134724635855591e-06, "epoch": 2.9601177901542792, "percentage": 59.2, "elapsed_time": "2:01:10", "remaining_time": "1:23:30", "throughput": 20014.94, "total_tokens": 145527872} +{"current_steps": 46245, "total_steps": 78105, "loss": 0.2986, "lr": 2.1341719879054947e-06, "epoch": 2.960437872095256, "percentage": 59.21, "elapsed_time": "2:01:11", "remaining_time": "1:23:29", "throughput": 20015.23, "total_tokens": 145543808} +{"current_steps": 46250, "total_steps": 78105, "loss": 0.187, "lr": 2.133619358222944e-06, "epoch": 2.960757954036233, "percentage": 59.22, "elapsed_time": "2:01:12", "remaining_time": "1:23:28", "throughput": 20015.51, "total_tokens": 145559168} +{"current_steps": 46255, "total_steps": 78105, "loss": 0.2369, "lr": 2.133066746835534e-06, "epoch": 2.96107803597721, "percentage": 59.22, "elapsed_time": "2:01:12", "remaining_time": "1:23:27", "throughput": 20015.73, "total_tokens": 145573376} +{"current_steps": 46260, "total_steps": 78105, "loss": 0.2194, "lr": 2.1325141537708583e-06, "epoch": 2.961398117918187, "percentage": 59.23, "elapsed_time": "2:01:13", "remaining_time": "1:23:27", "throughput": 20016.06, "total_tokens": 145590016} +{"current_steps": 46265, "total_steps": 78105, "loss": 0.1865, "lr": 2.131961579056513e-06, "epoch": 2.961718199859164, "percentage": 59.23, "elapsed_time": "2:01:14", "remaining_time": "1:23:26", "throughput": 20016.53, "total_tokens": 145608832} +{"current_steps": 46270, "total_steps": 78105, "loss": 0.2651, "lr": 2.1314090227200885e-06, "epoch": 2.9620382818001407, "percentage": 59.24, "elapsed_time": "2:01:15", "remaining_time": "1:23:25", "throughput": 20016.82, "total_tokens": 145624640} +{"current_steps": 46275, "total_steps": 78105, "loss": 0.2519, "lr": 2.1308564847891784e-06, "epoch": 2.9623583637411177, "percentage": 59.25, "elapsed_time": "2:01:15", "remaining_time": "1:23:24", "throughput": 20017.13, "total_tokens": 145640320} +{"current_steps": 46280, "total_steps": 78105, "loss": 0.2431, "lr": 2.1303039652913718e-06, "epoch": 2.9626784456820947, "percentage": 59.25, "elapsed_time": "2:01:16", "remaining_time": "1:23:23", "throughput": 20017.39, "total_tokens": 145655296} +{"current_steps": 46285, "total_steps": 78105, "loss": 0.1792, "lr": 2.1297514642542607e-06, "epoch": 2.9629985276230713, "percentage": 59.26, "elapsed_time": "2:01:17", "remaining_time": "1:23:22", "throughput": 20017.68, "total_tokens": 145670528} +{"current_steps": 46290, "total_steps": 78105, "loss": 0.1373, "lr": 2.1291989817054314e-06, "epoch": 2.9633186095640482, "percentage": 59.27, "elapsed_time": "2:01:17", "remaining_time": "1:23:21", "throughput": 20018.02, "total_tokens": 145686720} +{"current_steps": 46295, "total_steps": 78105, "loss": 0.1459, "lr": 2.1286465176724746e-06, "epoch": 2.9636386915050252, "percentage": 59.27, "elapsed_time": "2:01:18", "remaining_time": "1:23:21", "throughput": 20018.31, "total_tokens": 145702144} +{"current_steps": 46300, "total_steps": 78105, "loss": 0.2048, "lr": 2.1280940721829757e-06, "epoch": 2.9639587734460022, "percentage": 59.28, "elapsed_time": "2:01:19", "remaining_time": "1:23:20", "throughput": 20018.55, "total_tokens": 145716992} +{"current_steps": 46305, "total_steps": 78105, "loss": 0.2093, "lr": 2.1275416452645226e-06, "epoch": 2.964278855386979, "percentage": 59.29, "elapsed_time": "2:01:19", "remaining_time": "1:23:19", "throughput": 20018.81, "total_tokens": 145731968} +{"current_steps": 46310, "total_steps": 78105, "loss": 0.3044, "lr": 2.1269892369446984e-06, "epoch": 2.964598937327956, "percentage": 59.29, "elapsed_time": "2:01:20", "remaining_time": "1:23:18", "throughput": 20019.1, "total_tokens": 145747392} +{"current_steps": 46315, "total_steps": 78105, "loss": 0.2091, "lr": 2.126436847251089e-06, "epoch": 2.9649190192689328, "percentage": 59.3, "elapsed_time": "2:01:21", "remaining_time": "1:23:17", "throughput": 20019.4, "total_tokens": 145763136} +{"current_steps": 46320, "total_steps": 78105, "loss": 0.2518, "lr": 2.1258844762112787e-06, "epoch": 2.9652391012099097, "percentage": 59.3, "elapsed_time": "2:01:21", "remaining_time": "1:23:16", "throughput": 20019.76, "total_tokens": 145779840} +{"current_steps": 46325, "total_steps": 78105, "loss": 0.3357, "lr": 2.1253321238528474e-06, "epoch": 2.9655591831508867, "percentage": 59.31, "elapsed_time": "2:01:22", "remaining_time": "1:23:15", "throughput": 20020.07, "total_tokens": 145795584} +{"current_steps": 46330, "total_steps": 78105, "loss": 0.2661, "lr": 2.12477979020338e-06, "epoch": 2.9658792650918633, "percentage": 59.32, "elapsed_time": "2:01:23", "remaining_time": "1:23:15", "throughput": 20020.34, "total_tokens": 145810688} +{"current_steps": 46335, "total_steps": 78105, "loss": 0.1763, "lr": 2.124227475290454e-06, "epoch": 2.9661993470328403, "percentage": 59.32, "elapsed_time": "2:01:23", "remaining_time": "1:23:14", "throughput": 20020.63, "total_tokens": 145826048} +{"current_steps": 46340, "total_steps": 78105, "loss": 0.3211, "lr": 2.123675179141652e-06, "epoch": 2.9665194289738173, "percentage": 59.33, "elapsed_time": "2:01:24", "remaining_time": "1:23:13", "throughput": 20020.9, "total_tokens": 145841408} +{"current_steps": 46345, "total_steps": 78105, "loss": 0.2432, "lr": 2.12312290178455e-06, "epoch": 2.9668395109147943, "percentage": 59.34, "elapsed_time": "2:01:25", "remaining_time": "1:23:12", "throughput": 20021.28, "total_tokens": 145858432} +{"current_steps": 46350, "total_steps": 78105, "loss": 0.2069, "lr": 2.1225706432467277e-06, "epoch": 2.9671595928557712, "percentage": 59.34, "elapsed_time": "2:01:25", "remaining_time": "1:23:11", "throughput": 20021.52, "total_tokens": 145873216} +{"current_steps": 46355, "total_steps": 78105, "loss": 0.1425, "lr": 2.1220184035557613e-06, "epoch": 2.9674796747967482, "percentage": 59.35, "elapsed_time": "2:01:26", "remaining_time": "1:23:10", "throughput": 20021.79, "total_tokens": 145888320} +{"current_steps": 46360, "total_steps": 78105, "loss": 0.2318, "lr": 2.1214661827392275e-06, "epoch": 2.967799756737725, "percentage": 59.36, "elapsed_time": "2:01:27", "remaining_time": "1:23:09", "throughput": 20022.06, "total_tokens": 145903424} +{"current_steps": 46365, "total_steps": 78105, "loss": 0.4214, "lr": 2.1209139808247003e-06, "epoch": 2.9681198386787018, "percentage": 59.36, "elapsed_time": "2:01:28", "remaining_time": "1:23:09", "throughput": 20022.83, "total_tokens": 145932608} +{"current_steps": 46370, "total_steps": 78105, "loss": 0.2261, "lr": 2.1203617978397543e-06, "epoch": 2.9684399206196788, "percentage": 59.37, "elapsed_time": "2:01:28", "remaining_time": "1:23:08", "throughput": 20023.08, "total_tokens": 145947456} +{"current_steps": 46375, "total_steps": 78105, "loss": 0.2228, "lr": 2.1198096338119634e-06, "epoch": 2.9687600025606553, "percentage": 59.38, "elapsed_time": "2:01:29", "remaining_time": "1:23:07", "throughput": 20023.34, "total_tokens": 145962560} +{"current_steps": 46380, "total_steps": 78105, "loss": 0.2131, "lr": 2.119257488768898e-06, "epoch": 2.9690800845016323, "percentage": 59.38, "elapsed_time": "2:01:30", "remaining_time": "1:23:06", "throughput": 20023.69, "total_tokens": 145978944} +{"current_steps": 46385, "total_steps": 78105, "loss": 0.1967, "lr": 2.118705362738131e-06, "epoch": 2.9694001664426093, "percentage": 59.39, "elapsed_time": "2:01:30", "remaining_time": "1:23:05", "throughput": 20023.96, "total_tokens": 145994304} +{"current_steps": 46390, "total_steps": 78105, "loss": 0.2386, "lr": 2.1181532557472316e-06, "epoch": 2.9697202483835863, "percentage": 59.39, "elapsed_time": "2:01:31", "remaining_time": "1:23:05", "throughput": 20024.27, "total_tokens": 146009856} +{"current_steps": 46395, "total_steps": 78105, "loss": 0.2441, "lr": 2.117601167823771e-06, "epoch": 2.9700403303245633, "percentage": 59.4, "elapsed_time": "2:01:32", "remaining_time": "1:23:04", "throughput": 20024.56, "total_tokens": 146025792} +{"current_steps": 46400, "total_steps": 78105, "loss": 0.2369, "lr": 2.1170490989953148e-06, "epoch": 2.97036041226554, "percentage": 59.41, "elapsed_time": "2:01:32", "remaining_time": "1:23:03", "throughput": 20024.81, "total_tokens": 146040832} +{"current_steps": 46405, "total_steps": 78105, "loss": 0.2341, "lr": 2.1164970492894326e-06, "epoch": 2.970680494206517, "percentage": 59.41, "elapsed_time": "2:01:33", "remaining_time": "1:23:02", "throughput": 20025.05, "total_tokens": 146055616} +{"current_steps": 46410, "total_steps": 78105, "loss": 0.2157, "lr": 2.1159450187336893e-06, "epoch": 2.971000576147494, "percentage": 59.42, "elapsed_time": "2:01:34", "remaining_time": "1:23:01", "throughput": 20025.33, "total_tokens": 146071168} +{"current_steps": 46415, "total_steps": 78105, "loss": 0.2758, "lr": 2.115393007355653e-06, "epoch": 2.971320658088471, "percentage": 59.43, "elapsed_time": "2:01:35", "remaining_time": "1:23:00", "throughput": 20025.63, "total_tokens": 146086976} +{"current_steps": 46420, "total_steps": 78105, "loss": 0.2013, "lr": 2.114841015182885e-06, "epoch": 2.9716407400294473, "percentage": 59.43, "elapsed_time": "2:01:35", "remaining_time": "1:22:59", "throughput": 20025.86, "total_tokens": 146101824} +{"current_steps": 46425, "total_steps": 78105, "loss": 0.2896, "lr": 2.1142890422429516e-06, "epoch": 2.9719608219704243, "percentage": 59.44, "elapsed_time": "2:01:36", "remaining_time": "1:22:58", "throughput": 20026.13, "total_tokens": 146117056} +{"current_steps": 46430, "total_steps": 78105, "loss": 0.1726, "lr": 2.1137370885634133e-06, "epoch": 2.9722809039114013, "percentage": 59.45, "elapsed_time": "2:01:37", "remaining_time": "1:22:58", "throughput": 20026.45, "total_tokens": 146133184} +{"current_steps": 46435, "total_steps": 78105, "loss": 0.2081, "lr": 2.113185154171833e-06, "epoch": 2.9726009858523783, "percentage": 59.45, "elapsed_time": "2:01:37", "remaining_time": "1:22:57", "throughput": 20026.75, "total_tokens": 146149440} +{"current_steps": 46440, "total_steps": 78105, "loss": 0.2804, "lr": 2.1126332390957714e-06, "epoch": 2.9729210677933553, "percentage": 59.46, "elapsed_time": "2:01:38", "remaining_time": "1:22:56", "throughput": 20026.96, "total_tokens": 146163776} +{"current_steps": 46445, "total_steps": 78105, "loss": 0.2193, "lr": 2.112081343362788e-06, "epoch": 2.973241149734332, "percentage": 59.46, "elapsed_time": "2:01:39", "remaining_time": "1:22:55", "throughput": 20027.3, "total_tokens": 146180224} +{"current_steps": 46450, "total_steps": 78105, "loss": 0.1553, "lr": 2.111529467000443e-06, "epoch": 2.973561231675309, "percentage": 59.47, "elapsed_time": "2:01:39", "remaining_time": "1:22:54", "throughput": 20027.54, "total_tokens": 146195072} +{"current_steps": 46455, "total_steps": 78105, "loss": 0.2079, "lr": 2.1109776100362912e-06, "epoch": 2.973881313616286, "percentage": 59.48, "elapsed_time": "2:01:40", "remaining_time": "1:22:53", "throughput": 20027.88, "total_tokens": 146211200} +{"current_steps": 46460, "total_steps": 78105, "loss": 0.173, "lr": 2.1104257724978924e-06, "epoch": 2.974201395557263, "percentage": 59.48, "elapsed_time": "2:01:41", "remaining_time": "1:22:52", "throughput": 20028.17, "total_tokens": 146227008} +{"current_steps": 46465, "total_steps": 78105, "loss": 0.3075, "lr": 2.1098739544128003e-06, "epoch": 2.9745214774982394, "percentage": 59.49, "elapsed_time": "2:01:41", "remaining_time": "1:22:52", "throughput": 20028.55, "total_tokens": 146243968} +{"current_steps": 46470, "total_steps": 78105, "loss": 0.2205, "lr": 2.1093221558085716e-06, "epoch": 2.9748415594392164, "percentage": 59.5, "elapsed_time": "2:01:42", "remaining_time": "1:22:51", "throughput": 20028.86, "total_tokens": 146259776} +{"current_steps": 46475, "total_steps": 78105, "loss": 0.1693, "lr": 2.108770376712759e-06, "epoch": 2.9751616413801933, "percentage": 59.5, "elapsed_time": "2:01:43", "remaining_time": "1:22:50", "throughput": 20029.22, "total_tokens": 146276736} +{"current_steps": 46480, "total_steps": 78105, "loss": 0.2099, "lr": 2.1082186171529166e-06, "epoch": 2.9754817233211703, "percentage": 59.51, "elapsed_time": "2:01:43", "remaining_time": "1:22:49", "throughput": 20029.49, "total_tokens": 146291904} +{"current_steps": 46485, "total_steps": 78105, "loss": 0.2414, "lr": 2.107666877156595e-06, "epoch": 2.9758018052621473, "percentage": 59.52, "elapsed_time": "2:01:44", "remaining_time": "1:22:48", "throughput": 20029.76, "total_tokens": 146306944} +{"current_steps": 46490, "total_steps": 78105, "loss": 0.3572, "lr": 2.107115156751346e-06, "epoch": 2.976121887203124, "percentage": 59.52, "elapsed_time": "2:01:45", "remaining_time": "1:22:47", "throughput": 20030.04, "total_tokens": 146322176} +{"current_steps": 46495, "total_steps": 78105, "loss": 0.1976, "lr": 2.1065634559647197e-06, "epoch": 2.976441969144101, "percentage": 59.53, "elapsed_time": "2:01:45", "remaining_time": "1:22:46", "throughput": 20030.34, "total_tokens": 146337728} +{"current_steps": 46500, "total_steps": 78105, "loss": 0.2953, "lr": 2.1060117748242647e-06, "epoch": 2.976762051085078, "percentage": 59.54, "elapsed_time": "2:01:46", "remaining_time": "1:22:46", "throughput": 20030.64, "total_tokens": 146353408} +{"current_steps": 46505, "total_steps": 78105, "loss": 0.1739, "lr": 2.105460113357531e-06, "epoch": 2.9770821330260544, "percentage": 59.54, "elapsed_time": "2:01:47", "remaining_time": "1:22:45", "throughput": 20030.99, "total_tokens": 146370048} +{"current_steps": 46510, "total_steps": 78105, "loss": 0.3382, "lr": 2.1049084715920628e-06, "epoch": 2.9774022149670314, "percentage": 59.55, "elapsed_time": "2:01:47", "remaining_time": "1:22:44", "throughput": 20031.21, "total_tokens": 146384384} +{"current_steps": 46515, "total_steps": 78105, "loss": 0.2107, "lr": 2.1043568495554086e-06, "epoch": 2.9777222969080084, "percentage": 59.55, "elapsed_time": "2:01:48", "remaining_time": "1:22:43", "throughput": 20031.53, "total_tokens": 146400320} +{"current_steps": 46520, "total_steps": 78105, "loss": 0.3154, "lr": 2.1038052472751114e-06, "epoch": 2.9780423788489854, "percentage": 59.56, "elapsed_time": "2:01:49", "remaining_time": "1:22:42", "throughput": 20031.77, "total_tokens": 146415168} +{"current_steps": 46525, "total_steps": 78105, "loss": 0.1901, "lr": 2.103253664778717e-06, "epoch": 2.9783624607899624, "percentage": 59.57, "elapsed_time": "2:01:49", "remaining_time": "1:22:41", "throughput": 20032.02, "total_tokens": 146430080} +{"current_steps": 46530, "total_steps": 78105, "loss": 0.2543, "lr": 2.1027021020937674e-06, "epoch": 2.9786825427309394, "percentage": 59.57, "elapsed_time": "2:01:50", "remaining_time": "1:22:40", "throughput": 20032.34, "total_tokens": 146446336} +{"current_steps": 46535, "total_steps": 78105, "loss": 0.1713, "lr": 2.1021505592478066e-06, "epoch": 2.979002624671916, "percentage": 59.58, "elapsed_time": "2:01:51", "remaining_time": "1:22:39", "throughput": 20032.65, "total_tokens": 146461952} +{"current_steps": 46540, "total_steps": 78105, "loss": 0.2301, "lr": 2.1015990362683733e-06, "epoch": 2.979322706612893, "percentage": 59.59, "elapsed_time": "2:01:51", "remaining_time": "1:22:39", "throughput": 20032.87, "total_tokens": 146476544} +{"current_steps": 46545, "total_steps": 78105, "loss": 0.2889, "lr": 2.101047533183009e-06, "epoch": 2.97964278855387, "percentage": 59.59, "elapsed_time": "2:01:52", "remaining_time": "1:22:38", "throughput": 20033.16, "total_tokens": 146491968} +{"current_steps": 46550, "total_steps": 78105, "loss": 0.3376, "lr": 2.1004960500192534e-06, "epoch": 2.9799628704948464, "percentage": 59.6, "elapsed_time": "2:01:53", "remaining_time": "1:22:37", "throughput": 20033.43, "total_tokens": 146507392} +{"current_steps": 46555, "total_steps": 78105, "loss": 0.1538, "lr": 2.0999445868046438e-06, "epoch": 2.9802829524358234, "percentage": 59.61, "elapsed_time": "2:01:53", "remaining_time": "1:22:36", "throughput": 20033.66, "total_tokens": 146521792} +{"current_steps": 46560, "total_steps": 78105, "loss": 0.1267, "lr": 2.0993931435667184e-06, "epoch": 2.9806030343768004, "percentage": 59.61, "elapsed_time": "2:01:54", "remaining_time": "1:22:35", "throughput": 20033.98, "total_tokens": 146538048} +{"current_steps": 46565, "total_steps": 78105, "loss": 0.253, "lr": 2.098841720333012e-06, "epoch": 2.9809231163177774, "percentage": 59.62, "elapsed_time": "2:01:55", "remaining_time": "1:22:34", "throughput": 20034.33, "total_tokens": 146554688} +{"current_steps": 46570, "total_steps": 78105, "loss": 0.1851, "lr": 2.0982903171310612e-06, "epoch": 2.9812431982587544, "percentage": 59.62, "elapsed_time": "2:01:55", "remaining_time": "1:22:33", "throughput": 20034.6, "total_tokens": 146569856} +{"current_steps": 46575, "total_steps": 78105, "loss": 0.1949, "lr": 2.0977389339883984e-06, "epoch": 2.9815632801997314, "percentage": 59.63, "elapsed_time": "2:01:56", "remaining_time": "1:22:33", "throughput": 20034.81, "total_tokens": 146584192} +{"current_steps": 46580, "total_steps": 78105, "loss": 0.2788, "lr": 2.097187570932559e-06, "epoch": 2.981883362140708, "percentage": 59.64, "elapsed_time": "2:01:57", "remaining_time": "1:22:32", "throughput": 20035.15, "total_tokens": 146600512} +{"current_steps": 46585, "total_steps": 78105, "loss": 0.2399, "lr": 2.0966362279910736e-06, "epoch": 2.982203444081685, "percentage": 59.64, "elapsed_time": "2:01:57", "remaining_time": "1:22:31", "throughput": 20035.41, "total_tokens": 146615744} +{"current_steps": 46590, "total_steps": 78105, "loss": 0.2708, "lr": 2.0960849051914743e-06, "epoch": 2.982523526022662, "percentage": 59.65, "elapsed_time": "2:01:58", "remaining_time": "1:22:30", "throughput": 20035.65, "total_tokens": 146630592} +{"current_steps": 46595, "total_steps": 78105, "loss": 0.268, "lr": 2.0955336025612904e-06, "epoch": 2.9828436079636385, "percentage": 59.66, "elapsed_time": "2:01:59", "remaining_time": "1:22:29", "throughput": 20035.9, "total_tokens": 146645440} +{"current_steps": 46600, "total_steps": 78105, "loss": 0.2394, "lr": 2.094982320128053e-06, "epoch": 2.9831636899046154, "percentage": 59.66, "elapsed_time": "2:01:59", "remaining_time": "1:22:28", "throughput": 20036.16, "total_tokens": 146660992} +{"current_steps": 46605, "total_steps": 78105, "loss": 0.2049, "lr": 2.094431057919287e-06, "epoch": 2.9834837718455924, "percentage": 59.67, "elapsed_time": "2:02:00", "remaining_time": "1:22:27", "throughput": 20036.48, "total_tokens": 146676992} +{"current_steps": 46610, "total_steps": 78105, "loss": 0.2071, "lr": 2.093879815962522e-06, "epoch": 2.9838038537865694, "percentage": 59.68, "elapsed_time": "2:02:01", "remaining_time": "1:22:27", "throughput": 20036.73, "total_tokens": 146692288} +{"current_steps": 46615, "total_steps": 78105, "loss": 0.2099, "lr": 2.0933285942852838e-06, "epoch": 2.9841239357275464, "percentage": 59.68, "elapsed_time": "2:02:01", "remaining_time": "1:22:26", "throughput": 20037.04, "total_tokens": 146708096} +{"current_steps": 46620, "total_steps": 78105, "loss": 0.1691, "lr": 2.092777392915097e-06, "epoch": 2.9844440176685234, "percentage": 59.69, "elapsed_time": "2:02:02", "remaining_time": "1:22:25", "throughput": 20037.31, "total_tokens": 146723328} +{"current_steps": 46625, "total_steps": 78105, "loss": 0.1584, "lr": 2.0922262118794874e-06, "epoch": 2.9847640996095, "percentage": 59.7, "elapsed_time": "2:02:03", "remaining_time": "1:22:24", "throughput": 20037.61, "total_tokens": 146739072} +{"current_steps": 46630, "total_steps": 78105, "loss": 0.2923, "lr": 2.091675051205975e-06, "epoch": 2.985084181550477, "percentage": 59.7, "elapsed_time": "2:02:03", "remaining_time": "1:22:23", "throughput": 20037.87, "total_tokens": 146754240} +{"current_steps": 46635, "total_steps": 78105, "loss": 0.1786, "lr": 2.0911239109220846e-06, "epoch": 2.985404263491454, "percentage": 59.71, "elapsed_time": "2:02:04", "remaining_time": "1:22:22", "throughput": 20038.11, "total_tokens": 146768960} +{"current_steps": 46640, "total_steps": 78105, "loss": 0.2447, "lr": 2.090572791055336e-06, "epoch": 2.9857243454324305, "percentage": 59.71, "elapsed_time": "2:02:05", "remaining_time": "1:22:21", "throughput": 20038.43, "total_tokens": 146784960} +{"current_steps": 46645, "total_steps": 78105, "loss": 0.2335, "lr": 2.09002169163325e-06, "epoch": 2.9860444273734075, "percentage": 59.72, "elapsed_time": "2:02:05", "remaining_time": "1:22:20", "throughput": 20038.76, "total_tokens": 146800896} +{"current_steps": 46650, "total_steps": 78105, "loss": 0.2207, "lr": 2.089470612683345e-06, "epoch": 2.9863645093143845, "percentage": 59.73, "elapsed_time": "2:02:06", "remaining_time": "1:22:20", "throughput": 20038.97, "total_tokens": 146815232} +{"current_steps": 46655, "total_steps": 78105, "loss": 0.3087, "lr": 2.0889195542331395e-06, "epoch": 2.9866845912553615, "percentage": 59.73, "elapsed_time": "2:02:07", "remaining_time": "1:22:19", "throughput": 20039.23, "total_tokens": 146830336} +{"current_steps": 46660, "total_steps": 78105, "loss": 0.1565, "lr": 2.0883685163101495e-06, "epoch": 2.9870046731963384, "percentage": 59.74, "elapsed_time": "2:02:07", "remaining_time": "1:22:18", "throughput": 20039.54, "total_tokens": 146846528} +{"current_steps": 46665, "total_steps": 78105, "loss": 0.1609, "lr": 2.0878174989418916e-06, "epoch": 2.987324755137315, "percentage": 59.75, "elapsed_time": "2:02:08", "remaining_time": "1:22:17", "throughput": 20039.8, "total_tokens": 146861824} +{"current_steps": 46670, "total_steps": 78105, "loss": 0.2016, "lr": 2.0872665021558817e-06, "epoch": 2.987644837078292, "percentage": 59.75, "elapsed_time": "2:02:09", "remaining_time": "1:22:16", "throughput": 20040.14, "total_tokens": 146877952} +{"current_steps": 46675, "total_steps": 78105, "loss": 0.2071, "lr": 2.086715525979632e-06, "epoch": 2.987964919019269, "percentage": 59.76, "elapsed_time": "2:02:09", "remaining_time": "1:22:15", "throughput": 20040.48, "total_tokens": 146894272} +{"current_steps": 46680, "total_steps": 78105, "loss": 0.1368, "lr": 2.086164570440657e-06, "epoch": 2.988285000960246, "percentage": 59.77, "elapsed_time": "2:02:10", "remaining_time": "1:22:14", "throughput": 20040.74, "total_tokens": 146909248} +{"current_steps": 46685, "total_steps": 78105, "loss": 0.3046, "lr": 2.0856136355664673e-06, "epoch": 2.9886050829012225, "percentage": 59.77, "elapsed_time": "2:02:11", "remaining_time": "1:22:14", "throughput": 20040.97, "total_tokens": 146923840} +{"current_steps": 46690, "total_steps": 78105, "loss": 0.2109, "lr": 2.085062721384574e-06, "epoch": 2.9889251648421995, "percentage": 59.78, "elapsed_time": "2:02:11", "remaining_time": "1:22:13", "throughput": 20041.23, "total_tokens": 146938816} +{"current_steps": 46695, "total_steps": 78105, "loss": 0.2048, "lr": 2.084511827922488e-06, "epoch": 2.9892452467831765, "percentage": 59.78, "elapsed_time": "2:02:12", "remaining_time": "1:22:12", "throughput": 20041.58, "total_tokens": 146955648} +{"current_steps": 46700, "total_steps": 78105, "loss": 0.2616, "lr": 2.0839609552077164e-06, "epoch": 2.9895653287241535, "percentage": 59.79, "elapsed_time": "2:02:13", "remaining_time": "1:22:11", "throughput": 20041.9, "total_tokens": 146971840} +{"current_steps": 46705, "total_steps": 78105, "loss": 0.1943, "lr": 2.083410103267767e-06, "epoch": 2.9898854106651305, "percentage": 59.8, "elapsed_time": "2:02:13", "remaining_time": "1:22:10", "throughput": 20042.21, "total_tokens": 146987840} +{"current_steps": 46710, "total_steps": 78105, "loss": 0.2437, "lr": 2.082859272130149e-06, "epoch": 2.990205492606107, "percentage": 59.8, "elapsed_time": "2:02:14", "remaining_time": "1:22:09", "throughput": 20042.52, "total_tokens": 147003840} +{"current_steps": 46715, "total_steps": 78105, "loss": 0.2044, "lr": 2.0823084618223644e-06, "epoch": 2.990525574547084, "percentage": 59.81, "elapsed_time": "2:02:15", "remaining_time": "1:22:08", "throughput": 20042.79, "total_tokens": 147019200} +{"current_steps": 46720, "total_steps": 78105, "loss": 0.1568, "lr": 2.08175767237192e-06, "epoch": 2.990845656488061, "percentage": 59.82, "elapsed_time": "2:02:15", "remaining_time": "1:22:08", "throughput": 20043.1, "total_tokens": 147035456} +{"current_steps": 46725, "total_steps": 78105, "loss": 0.1793, "lr": 2.081206903806319e-06, "epoch": 2.991165738429038, "percentage": 59.82, "elapsed_time": "2:02:16", "remaining_time": "1:22:07", "throughput": 20043.49, "total_tokens": 147053120} +{"current_steps": 46730, "total_steps": 78105, "loss": 0.2353, "lr": 2.080656156153063e-06, "epoch": 2.9914858203700145, "percentage": 59.83, "elapsed_time": "2:02:17", "remaining_time": "1:22:06", "throughput": 20043.8, "total_tokens": 147068992} +{"current_steps": 46735, "total_steps": 78105, "loss": 0.1396, "lr": 2.0801054294396557e-06, "epoch": 2.9918059023109915, "percentage": 59.84, "elapsed_time": "2:02:18", "remaining_time": "1:22:05", "throughput": 20044.16, "total_tokens": 147085632} +{"current_steps": 46740, "total_steps": 78105, "loss": 0.267, "lr": 2.079554723693595e-06, "epoch": 2.9921259842519685, "percentage": 59.84, "elapsed_time": "2:02:18", "remaining_time": "1:22:04", "throughput": 20044.49, "total_tokens": 147102144} +{"current_steps": 46745, "total_steps": 78105, "loss": 0.2866, "lr": 2.0790040389423826e-06, "epoch": 2.9924460661929455, "percentage": 59.85, "elapsed_time": "2:02:19", "remaining_time": "1:22:03", "throughput": 20044.93, "total_tokens": 147120384} +{"current_steps": 46750, "total_steps": 78105, "loss": 0.1751, "lr": 2.0784533752135146e-06, "epoch": 2.9927661481339225, "percentage": 59.86, "elapsed_time": "2:02:20", "remaining_time": "1:22:03", "throughput": 20045.17, "total_tokens": 147135232} +{"current_steps": 46755, "total_steps": 78105, "loss": 0.2485, "lr": 2.07790273253449e-06, "epoch": 2.993086230074899, "percentage": 59.86, "elapsed_time": "2:02:20", "remaining_time": "1:22:02", "throughput": 20045.48, "total_tokens": 147151168} +{"current_steps": 46760, "total_steps": 78105, "loss": 0.2188, "lr": 2.077352110932803e-06, "epoch": 2.993406312015876, "percentage": 59.87, "elapsed_time": "2:02:21", "remaining_time": "1:22:01", "throughput": 20045.75, "total_tokens": 147166208} +{"current_steps": 46765, "total_steps": 78105, "loss": 0.2365, "lr": 2.0768015104359517e-06, "epoch": 2.993726393956853, "percentage": 59.87, "elapsed_time": "2:02:22", "remaining_time": "1:22:00", "throughput": 20046.08, "total_tokens": 147182528} +{"current_steps": 46770, "total_steps": 78105, "loss": 0.2243, "lr": 2.0762509310714272e-06, "epoch": 2.9940464758978296, "percentage": 59.88, "elapsed_time": "2:02:22", "remaining_time": "1:21:59", "throughput": 20046.34, "total_tokens": 147197888} +{"current_steps": 46775, "total_steps": 78105, "loss": 0.3133, "lr": 2.075700372866725e-06, "epoch": 2.9943665578388066, "percentage": 59.89, "elapsed_time": "2:02:23", "remaining_time": "1:21:58", "throughput": 20046.68, "total_tokens": 147214592} +{"current_steps": 46780, "total_steps": 78105, "loss": 0.2857, "lr": 2.0751498358493355e-06, "epoch": 2.9946866397797836, "percentage": 59.89, "elapsed_time": "2:02:24", "remaining_time": "1:21:57", "throughput": 20047.04, "total_tokens": 147231680} +{"current_steps": 46785, "total_steps": 78105, "loss": 0.1859, "lr": 2.07459932004675e-06, "epoch": 2.9950067217207605, "percentage": 59.9, "elapsed_time": "2:02:24", "remaining_time": "1:21:57", "throughput": 20047.33, "total_tokens": 147247104} +{"current_steps": 46790, "total_steps": 78105, "loss": 0.1572, "lr": 2.0740488254864594e-06, "epoch": 2.9953268036617375, "percentage": 59.91, "elapsed_time": "2:02:25", "remaining_time": "1:21:56", "throughput": 20047.63, "total_tokens": 147262656} +{"current_steps": 46795, "total_steps": 78105, "loss": 0.2312, "lr": 2.073498352195951e-06, "epoch": 2.9956468856027145, "percentage": 59.91, "elapsed_time": "2:02:26", "remaining_time": "1:21:55", "throughput": 20047.86, "total_tokens": 147276992} +{"current_steps": 46800, "total_steps": 78105, "loss": 0.2586, "lr": 2.0729479002027147e-06, "epoch": 2.995966967543691, "percentage": 59.92, "elapsed_time": "2:02:27", "remaining_time": "1:21:54", "throughput": 20047.51, "total_tokens": 147293120} +{"current_steps": 46805, "total_steps": 78105, "loss": 0.1988, "lr": 2.0723974695342346e-06, "epoch": 2.996287049484668, "percentage": 59.93, "elapsed_time": "2:02:27", "remaining_time": "1:21:53", "throughput": 20047.77, "total_tokens": 147308160} +{"current_steps": 46810, "total_steps": 78105, "loss": 0.1825, "lr": 2.071847060217998e-06, "epoch": 2.996607131425645, "percentage": 59.93, "elapsed_time": "2:02:28", "remaining_time": "1:21:52", "throughput": 20048.03, "total_tokens": 147323200} +{"current_steps": 46815, "total_steps": 78105, "loss": 0.1861, "lr": 2.0712966722814883e-06, "epoch": 2.9969272133666216, "percentage": 59.94, "elapsed_time": "2:02:29", "remaining_time": "1:21:52", "throughput": 20048.32, "total_tokens": 147338688} +{"current_steps": 46820, "total_steps": 78105, "loss": 0.2376, "lr": 2.070746305752191e-06, "epoch": 2.9972472953075986, "percentage": 59.94, "elapsed_time": "2:02:29", "remaining_time": "1:21:51", "throughput": 20048.59, "total_tokens": 147353600} +{"current_steps": 46825, "total_steps": 78105, "loss": 0.2324, "lr": 2.0701959606575863e-06, "epoch": 2.9975673772485756, "percentage": 59.95, "elapsed_time": "2:02:30", "remaining_time": "1:21:50", "throughput": 20048.89, "total_tokens": 147369472} +{"current_steps": 46830, "total_steps": 78105, "loss": 0.1667, "lr": 2.0696456370251576e-06, "epoch": 2.9978874591895526, "percentage": 59.96, "elapsed_time": "2:02:31", "remaining_time": "1:21:49", "throughput": 20049.16, "total_tokens": 147384896} +{"current_steps": 46835, "total_steps": 78105, "loss": 0.1573, "lr": 2.069095334882383e-06, "epoch": 2.9982075411305296, "percentage": 59.96, "elapsed_time": "2:02:31", "remaining_time": "1:21:48", "throughput": 20049.45, "total_tokens": 147400832} +{"current_steps": 46840, "total_steps": 78105, "loss": 0.1533, "lr": 2.0685450542567432e-06, "epoch": 2.9985276230715066, "percentage": 59.97, "elapsed_time": "2:02:32", "remaining_time": "1:21:47", "throughput": 20049.78, "total_tokens": 147417280} +{"current_steps": 46845, "total_steps": 78105, "loss": 0.2064, "lr": 2.0679947951757164e-06, "epoch": 2.998847705012483, "percentage": 59.98, "elapsed_time": "2:02:33", "remaining_time": "1:21:46", "throughput": 20050.09, "total_tokens": 147433408} +{"current_steps": 46850, "total_steps": 78105, "loss": 0.216, "lr": 2.0674445576667785e-06, "epoch": 2.99916778695346, "percentage": 59.98, "elapsed_time": "2:02:33", "remaining_time": "1:21:46", "throughput": 20050.38, "total_tokens": 147448960} +{"current_steps": 46855, "total_steps": 78105, "loss": 0.2719, "lr": 2.0668943417574073e-06, "epoch": 2.999487868894437, "percentage": 59.99, "elapsed_time": "2:02:34", "remaining_time": "1:21:45", "throughput": 20050.61, "total_tokens": 147463744} +{"current_steps": 46860, "total_steps": 78105, "loss": 0.2388, "lr": 2.066344147475076e-06, "epoch": 2.9998079508354136, "percentage": 60.0, "elapsed_time": "2:02:35", "remaining_time": "1:21:44", "throughput": 20050.97, "total_tokens": 147480512} +{"current_steps": 46865, "total_steps": 78105, "loss": 0.1646, "lr": 2.0657939748472593e-06, "epoch": 3.0001280327763906, "percentage": 60.0, "elapsed_time": "2:02:36", "remaining_time": "1:21:43", "throughput": 20050.98, "total_tokens": 147496384} +{"current_steps": 46870, "total_steps": 78105, "loss": 0.1496, "lr": 2.06524382390143e-06, "epoch": 3.0004481147173676, "percentage": 60.01, "elapsed_time": "2:02:36", "remaining_time": "1:21:42", "throughput": 20051.21, "total_tokens": 147511104} +{"current_steps": 46872, "total_steps": 78105, "eval_loss": 0.5023031234741211, "epoch": 3.0005761474937582, "percentage": 60.01, "elapsed_time": "2:03:27", "remaining_time": "1:22:16", "throughput": 19913.36, "total_tokens": 147516736} +{"current_steps": 46875, "total_steps": 78105, "loss": 0.1556, "lr": 2.06469369466506e-06, "epoch": 3.0007681966583446, "percentage": 60.02, "elapsed_time": "2:04:02", "remaining_time": "1:22:38", "throughput": 19822.18, "total_tokens": 147525440} +{"current_steps": 46880, "total_steps": 78105, "loss": 0.1, "lr": 2.0641435871656183e-06, "epoch": 3.0010882785993216, "percentage": 60.02, "elapsed_time": "2:04:03", "remaining_time": "1:22:37", "throughput": 19822.46, "total_tokens": 147540800} +{"current_steps": 46885, "total_steps": 78105, "loss": 0.1753, "lr": 2.063593501430577e-06, "epoch": 3.001408360540298, "percentage": 60.03, "elapsed_time": "2:04:03", "remaining_time": "1:22:36", "throughput": 19822.78, "total_tokens": 147556992} +{"current_steps": 46890, "total_steps": 78105, "loss": 0.1897, "lr": 2.0630434374874017e-06, "epoch": 3.001728442481275, "percentage": 60.03, "elapsed_time": "2:04:04", "remaining_time": "1:22:35", "throughput": 19823.14, "total_tokens": 147574080} +{"current_steps": 46895, "total_steps": 78105, "loss": 0.1506, "lr": 2.062493395363562e-06, "epoch": 3.002048524422252, "percentage": 60.04, "elapsed_time": "2:04:05", "remaining_time": "1:22:34", "throughput": 19823.41, "total_tokens": 147588864} +{"current_steps": 46900, "total_steps": 78105, "loss": 0.1401, "lr": 2.0619433750865227e-06, "epoch": 3.002368606363229, "percentage": 60.05, "elapsed_time": "2:04:05", "remaining_time": "1:22:34", "throughput": 19823.67, "total_tokens": 147603776} +{"current_steps": 46905, "total_steps": 78105, "loss": 0.1737, "lr": 2.061393376683749e-06, "epoch": 3.0026886883042057, "percentage": 60.05, "elapsed_time": "2:04:06", "remaining_time": "1:22:33", "throughput": 19824.03, "total_tokens": 147620288} +{"current_steps": 46910, "total_steps": 78105, "loss": 0.1351, "lr": 2.060843400182707e-06, "epoch": 3.0030087702451826, "percentage": 60.06, "elapsed_time": "2:04:07", "remaining_time": "1:22:32", "throughput": 19824.28, "total_tokens": 147634816} +{"current_steps": 46915, "total_steps": 78105, "loss": 0.1831, "lr": 2.0602934456108567e-06, "epoch": 3.0033288521861596, "percentage": 60.07, "elapsed_time": "2:04:07", "remaining_time": "1:22:31", "throughput": 19824.69, "total_tokens": 147652032} +{"current_steps": 46920, "total_steps": 78105, "loss": 0.1352, "lr": 2.059743512995662e-06, "epoch": 3.0036489341271366, "percentage": 60.07, "elapsed_time": "2:04:08", "remaining_time": "1:22:30", "throughput": 19824.95, "total_tokens": 147667136} +{"current_steps": 46925, "total_steps": 78105, "loss": 0.1296, "lr": 2.059193602364583e-06, "epoch": 3.0039690160681136, "percentage": 60.08, "elapsed_time": "2:04:09", "remaining_time": "1:22:29", "throughput": 19825.3, "total_tokens": 147683520} +{"current_steps": 46930, "total_steps": 78105, "loss": 0.1696, "lr": 2.05864371374508e-06, "epoch": 3.00428909800909, "percentage": 60.09, "elapsed_time": "2:04:09", "remaining_time": "1:22:28", "throughput": 19825.58, "total_tokens": 147698496} +{"current_steps": 46935, "total_steps": 78105, "loss": 0.1052, "lr": 2.05809384716461e-06, "epoch": 3.004609179950067, "percentage": 60.09, "elapsed_time": "2:04:10", "remaining_time": "1:22:28", "throughput": 19825.91, "total_tokens": 147714496} +{"current_steps": 46940, "total_steps": 78105, "loss": 0.2007, "lr": 2.057544002650632e-06, "epoch": 3.004929261891044, "percentage": 60.1, "elapsed_time": "2:04:11", "remaining_time": "1:22:27", "throughput": 19826.22, "total_tokens": 147730048} +{"current_steps": 46945, "total_steps": 78105, "loss": 0.1726, "lr": 2.0569941802306018e-06, "epoch": 3.005249343832021, "percentage": 60.1, "elapsed_time": "2:04:11", "remaining_time": "1:22:26", "throughput": 19826.58, "total_tokens": 147746624} +{"current_steps": 46950, "total_steps": 78105, "loss": 0.1319, "lr": 2.0564443799319747e-06, "epoch": 3.0055694257729977, "percentage": 60.11, "elapsed_time": "2:04:12", "remaining_time": "1:22:25", "throughput": 19826.87, "total_tokens": 147761792} +{"current_steps": 46955, "total_steps": 78105, "loss": 0.1215, "lr": 2.0558946017822047e-06, "epoch": 3.0058895077139747, "percentage": 60.12, "elapsed_time": "2:04:13", "remaining_time": "1:22:24", "throughput": 19827.22, "total_tokens": 147777984} +{"current_steps": 46960, "total_steps": 78105, "loss": 0.1655, "lr": 2.0553448458087445e-06, "epoch": 3.0062095896549517, "percentage": 60.12, "elapsed_time": "2:04:13", "remaining_time": "1:22:23", "throughput": 19827.52, "total_tokens": 147793600} +{"current_steps": 46965, "total_steps": 78105, "loss": 0.1754, "lr": 2.0547951120390476e-06, "epoch": 3.0065296715959287, "percentage": 60.13, "elapsed_time": "2:04:14", "remaining_time": "1:22:22", "throughput": 19827.84, "total_tokens": 147809728} +{"current_steps": 46970, "total_steps": 78105, "loss": 0.0864, "lr": 2.054245400500563e-06, "epoch": 3.0068497535369056, "percentage": 60.14, "elapsed_time": "2:04:15", "remaining_time": "1:22:21", "throughput": 19828.15, "total_tokens": 147825600} +{"current_steps": 46975, "total_steps": 78105, "loss": 0.1677, "lr": 2.0536957112207424e-06, "epoch": 3.007169835477882, "percentage": 60.14, "elapsed_time": "2:04:16", "remaining_time": "1:22:21", "throughput": 19828.48, "total_tokens": 147841152} +{"current_steps": 46980, "total_steps": 78105, "loss": 0.1463, "lr": 2.0531460442270324e-06, "epoch": 3.007489917418859, "percentage": 60.15, "elapsed_time": "2:04:16", "remaining_time": "1:22:20", "throughput": 19828.78, "total_tokens": 147856512} +{"current_steps": 46985, "total_steps": 78105, "loss": 0.0892, "lr": 2.0525963995468815e-06, "epoch": 3.007809999359836, "percentage": 60.16, "elapsed_time": "2:04:17", "remaining_time": "1:22:19", "throughput": 19829.14, "total_tokens": 147873088} +{"current_steps": 46990, "total_steps": 78105, "loss": 0.2207, "lr": 2.052046777207736e-06, "epoch": 3.008130081300813, "percentage": 60.16, "elapsed_time": "2:04:18", "remaining_time": "1:22:18", "throughput": 19829.42, "total_tokens": 147888448} +{"current_steps": 46995, "total_steps": 78105, "loss": 0.1632, "lr": 2.0514971772370413e-06, "epoch": 3.0084501632417897, "percentage": 60.17, "elapsed_time": "2:04:18", "remaining_time": "1:22:17", "throughput": 19829.64, "total_tokens": 147902464} +{"current_steps": 47000, "total_steps": 78105, "loss": 0.1545, "lr": 2.050947599662241e-06, "epoch": 3.0087702451827667, "percentage": 60.18, "elapsed_time": "2:04:19", "remaining_time": "1:22:16", "throughput": 19829.93, "total_tokens": 147918272} +{"current_steps": 47005, "total_steps": 78105, "loss": 0.1162, "lr": 2.0503980445107786e-06, "epoch": 3.0090903271237437, "percentage": 60.18, "elapsed_time": "2:04:20", "remaining_time": "1:22:15", "throughput": 19830.23, "total_tokens": 147933696} +{"current_steps": 47010, "total_steps": 78105, "loss": 0.1201, "lr": 2.049848511810096e-06, "epoch": 3.0094104090647207, "percentage": 60.19, "elapsed_time": "2:04:20", "remaining_time": "1:22:14", "throughput": 19830.53, "total_tokens": 147949376} +{"current_steps": 47015, "total_steps": 78105, "loss": 0.193, "lr": 2.049299001587634e-06, "epoch": 3.0097304910056977, "percentage": 60.19, "elapsed_time": "2:04:21", "remaining_time": "1:22:14", "throughput": 19830.82, "total_tokens": 147964928} +{"current_steps": 47020, "total_steps": 78105, "loss": 0.1333, "lr": 2.0487495138708328e-06, "epoch": 3.010050572946674, "percentage": 60.2, "elapsed_time": "2:04:22", "remaining_time": "1:22:13", "throughput": 19831.14, "total_tokens": 147980672} +{"current_steps": 47025, "total_steps": 78105, "loss": 0.2778, "lr": 2.0482000486871295e-06, "epoch": 3.010370654887651, "percentage": 60.21, "elapsed_time": "2:04:22", "remaining_time": "1:22:12", "throughput": 19831.48, "total_tokens": 147996992} +{"current_steps": 47030, "total_steps": 78105, "loss": 0.1639, "lr": 2.0476506060639633e-06, "epoch": 3.010690736828628, "percentage": 60.21, "elapsed_time": "2:04:23", "remaining_time": "1:22:11", "throughput": 19831.78, "total_tokens": 148012736} +{"current_steps": 47035, "total_steps": 78105, "loss": 0.1334, "lr": 2.047101186028769e-06, "epoch": 3.011010818769605, "percentage": 60.22, "elapsed_time": "2:04:24", "remaining_time": "1:22:10", "throughput": 19832.07, "total_tokens": 148028544} +{"current_steps": 47040, "total_steps": 78105, "loss": 0.0719, "lr": 2.046551788608983e-06, "epoch": 3.0113309007105817, "percentage": 60.23, "elapsed_time": "2:04:24", "remaining_time": "1:22:09", "throughput": 19832.33, "total_tokens": 148043392} +{"current_steps": 47045, "total_steps": 78105, "loss": 0.1151, "lr": 2.0460024138320377e-06, "epoch": 3.0116509826515587, "percentage": 60.23, "elapsed_time": "2:04:25", "remaining_time": "1:22:08", "throughput": 19832.59, "total_tokens": 148058368} +{"current_steps": 47050, "total_steps": 78105, "loss": 0.1467, "lr": 2.0454530617253686e-06, "epoch": 3.0119710645925357, "percentage": 60.24, "elapsed_time": "2:04:26", "remaining_time": "1:22:07", "throughput": 19832.8, "total_tokens": 148072256} +{"current_steps": 47055, "total_steps": 78105, "loss": 0.1342, "lr": 2.0449037323164046e-06, "epoch": 3.0122911465335127, "percentage": 60.25, "elapsed_time": "2:04:26", "remaining_time": "1:22:07", "throughput": 19833.09, "total_tokens": 148087616} +{"current_steps": 47060, "total_steps": 78105, "loss": 0.1396, "lr": 2.0443544256325786e-06, "epoch": 3.0126112284744893, "percentage": 60.25, "elapsed_time": "2:04:27", "remaining_time": "1:22:06", "throughput": 19833.39, "total_tokens": 148103680} +{"current_steps": 47065, "total_steps": 78105, "loss": 0.1688, "lr": 2.0438051417013184e-06, "epoch": 3.0129313104154662, "percentage": 60.26, "elapsed_time": "2:04:28", "remaining_time": "1:22:05", "throughput": 19833.66, "total_tokens": 148118848} +{"current_steps": 47070, "total_steps": 78105, "loss": 0.2015, "lr": 2.043255880550054e-06, "epoch": 3.0132513923564432, "percentage": 60.27, "elapsed_time": "2:04:28", "remaining_time": "1:22:04", "throughput": 19834.0, "total_tokens": 148135296} +{"current_steps": 47075, "total_steps": 78105, "loss": 0.1305, "lr": 2.0427066422062107e-06, "epoch": 3.0135714742974202, "percentage": 60.27, "elapsed_time": "2:04:29", "remaining_time": "1:22:03", "throughput": 19834.27, "total_tokens": 148151040} +{"current_steps": 47080, "total_steps": 78105, "loss": 0.2057, "lr": 2.042157426697216e-06, "epoch": 3.013891556238397, "percentage": 60.28, "elapsed_time": "2:04:30", "remaining_time": "1:22:02", "throughput": 19834.57, "total_tokens": 148166784} +{"current_steps": 47085, "total_steps": 78105, "loss": 0.1627, "lr": 2.041608234050496e-06, "epoch": 3.0142116381793738, "percentage": 60.28, "elapsed_time": "2:04:30", "remaining_time": "1:22:01", "throughput": 19834.89, "total_tokens": 148182912} +{"current_steps": 47090, "total_steps": 78105, "loss": 0.1708, "lr": 2.0410590642934715e-06, "epoch": 3.0145317201203508, "percentage": 60.29, "elapsed_time": "2:04:31", "remaining_time": "1:22:00", "throughput": 19835.17, "total_tokens": 148198336} +{"current_steps": 47095, "total_steps": 78105, "loss": 0.1436, "lr": 2.040509917453568e-06, "epoch": 3.0148518020613277, "percentage": 60.3, "elapsed_time": "2:04:32", "remaining_time": "1:22:00", "throughput": 19835.5, "total_tokens": 148214464} +{"current_steps": 47100, "total_steps": 78105, "loss": 0.118, "lr": 2.039960793558205e-06, "epoch": 3.0151718840023047, "percentage": 60.3, "elapsed_time": "2:04:32", "remaining_time": "1:21:59", "throughput": 19835.75, "total_tokens": 148229760} +{"current_steps": 47105, "total_steps": 78105, "loss": 0.141, "lr": 2.039411692634804e-06, "epoch": 3.0154919659432813, "percentage": 60.31, "elapsed_time": "2:04:33", "remaining_time": "1:21:58", "throughput": 19836.06, "total_tokens": 148246208} +{"current_steps": 47110, "total_steps": 78105, "loss": 0.1982, "lr": 2.0388626147107837e-06, "epoch": 3.0158120478842583, "percentage": 60.32, "elapsed_time": "2:04:34", "remaining_time": "1:21:57", "throughput": 19836.33, "total_tokens": 148261312} +{"current_steps": 47115, "total_steps": 78105, "loss": 0.1154, "lr": 2.0383135598135635e-06, "epoch": 3.0161321298252353, "percentage": 60.32, "elapsed_time": "2:04:34", "remaining_time": "1:21:56", "throughput": 19836.66, "total_tokens": 148277824} +{"current_steps": 47120, "total_steps": 78105, "loss": 0.1589, "lr": 2.0377645279705583e-06, "epoch": 3.0164522117662123, "percentage": 60.33, "elapsed_time": "2:04:35", "remaining_time": "1:21:55", "throughput": 19836.88, "total_tokens": 148292480} +{"current_steps": 47125, "total_steps": 78105, "loss": 0.1566, "lr": 2.037215519209185e-06, "epoch": 3.0167722937071892, "percentage": 60.34, "elapsed_time": "2:04:36", "remaining_time": "1:21:54", "throughput": 19837.23, "total_tokens": 148308544} +{"current_steps": 47130, "total_steps": 78105, "loss": 0.182, "lr": 2.0366665335568576e-06, "epoch": 3.017092375648166, "percentage": 60.34, "elapsed_time": "2:04:36", "remaining_time": "1:21:54", "throughput": 19837.5, "total_tokens": 148323840} +{"current_steps": 47135, "total_steps": 78105, "loss": 0.1763, "lr": 2.0361175710409902e-06, "epoch": 3.017412457589143, "percentage": 60.35, "elapsed_time": "2:04:37", "remaining_time": "1:21:53", "throughput": 19837.77, "total_tokens": 148339264} +{"current_steps": 47140, "total_steps": 78105, "loss": 0.2021, "lr": 2.0355686316889964e-06, "epoch": 3.0177325395301198, "percentage": 60.35, "elapsed_time": "2:04:38", "remaining_time": "1:21:52", "throughput": 19838.04, "total_tokens": 148354944} +{"current_steps": 47145, "total_steps": 78105, "loss": 0.1247, "lr": 2.0350197155282848e-06, "epoch": 3.0180526214710968, "percentage": 60.36, "elapsed_time": "2:04:38", "remaining_time": "1:21:51", "throughput": 19838.34, "total_tokens": 148370880} +{"current_steps": 47150, "total_steps": 78105, "loss": 0.1669, "lr": 2.034470822586267e-06, "epoch": 3.0183727034120733, "percentage": 60.37, "elapsed_time": "2:04:39", "remaining_time": "1:21:50", "throughput": 19838.62, "total_tokens": 148386368} +{"current_steps": 47155, "total_steps": 78105, "loss": 0.205, "lr": 2.0339219528903508e-06, "epoch": 3.0186927853530503, "percentage": 60.37, "elapsed_time": "2:04:40", "remaining_time": "1:21:49", "throughput": 19838.88, "total_tokens": 148401600} +{"current_steps": 47160, "total_steps": 78105, "loss": 0.216, "lr": 2.033373106467946e-06, "epoch": 3.0190128672940273, "percentage": 60.38, "elapsed_time": "2:04:41", "remaining_time": "1:21:48", "throughput": 19839.12, "total_tokens": 148416576} +{"current_steps": 47165, "total_steps": 78105, "loss": 0.164, "lr": 2.0328242833464563e-06, "epoch": 3.0193329492350043, "percentage": 60.39, "elapsed_time": "2:04:41", "remaining_time": "1:21:47", "throughput": 19839.4, "total_tokens": 148431872} +{"current_steps": 47170, "total_steps": 78105, "loss": 0.1878, "lr": 2.0322754835532897e-06, "epoch": 3.019653031175981, "percentage": 60.39, "elapsed_time": "2:04:42", "remaining_time": "1:21:47", "throughput": 19839.66, "total_tokens": 148446784} +{"current_steps": 47175, "total_steps": 78105, "loss": 0.132, "lr": 2.0317267071158482e-06, "epoch": 3.019973113116958, "percentage": 60.4, "elapsed_time": "2:04:43", "remaining_time": "1:21:46", "throughput": 19839.95, "total_tokens": 148462592} +{"current_steps": 47180, "total_steps": 78105, "loss": 0.1745, "lr": 2.031177954061536e-06, "epoch": 3.020293195057935, "percentage": 60.41, "elapsed_time": "2:04:43", "remaining_time": "1:21:45", "throughput": 19840.21, "total_tokens": 148477632} +{"current_steps": 47185, "total_steps": 78105, "loss": 0.1788, "lr": 2.0306292244177543e-06, "epoch": 3.020613276998912, "percentage": 60.41, "elapsed_time": "2:04:44", "remaining_time": "1:21:44", "throughput": 19840.52, "total_tokens": 148493312} +{"current_steps": 47190, "total_steps": 78105, "loss": 0.1414, "lr": 2.0300805182119045e-06, "epoch": 3.020933358939889, "percentage": 60.42, "elapsed_time": "2:04:45", "remaining_time": "1:21:43", "throughput": 19840.89, "total_tokens": 148510144} +{"current_steps": 47195, "total_steps": 78105, "loss": 0.1231, "lr": 2.0295318354713867e-06, "epoch": 3.0212534408808653, "percentage": 60.43, "elapsed_time": "2:04:45", "remaining_time": "1:21:42", "throughput": 19841.12, "total_tokens": 148524480} +{"current_steps": 47200, "total_steps": 78105, "loss": 0.1774, "lr": 2.0289831762235976e-06, "epoch": 3.0215735228218423, "percentage": 60.43, "elapsed_time": "2:04:46", "remaining_time": "1:21:41", "throughput": 19841.44, "total_tokens": 148540416} +{"current_steps": 47205, "total_steps": 78105, "loss": 0.1439, "lr": 2.0284345404959364e-06, "epoch": 3.0218936047628193, "percentage": 60.44, "elapsed_time": "2:04:47", "remaining_time": "1:21:40", "throughput": 19841.75, "total_tokens": 148556288} +{"current_steps": 47210, "total_steps": 78105, "loss": 0.1966, "lr": 2.0278859283157966e-06, "epoch": 3.0222136867037963, "percentage": 60.44, "elapsed_time": "2:04:47", "remaining_time": "1:21:40", "throughput": 19842.06, "total_tokens": 148571712} +{"current_steps": 47215, "total_steps": 78105, "loss": 0.1737, "lr": 2.027337339710575e-06, "epoch": 3.022533768644773, "percentage": 60.45, "elapsed_time": "2:04:48", "remaining_time": "1:21:39", "throughput": 19842.35, "total_tokens": 148587264} +{"current_steps": 47220, "total_steps": 78105, "loss": 0.1391, "lr": 2.0267887747076642e-06, "epoch": 3.02285385058575, "percentage": 60.46, "elapsed_time": "2:04:49", "remaining_time": "1:21:38", "throughput": 19842.66, "total_tokens": 148602816} +{"current_steps": 47225, "total_steps": 78105, "loss": 0.1438, "lr": 2.0262402333344585e-06, "epoch": 3.023173932526727, "percentage": 60.46, "elapsed_time": "2:04:49", "remaining_time": "1:21:37", "throughput": 19842.96, "total_tokens": 148618368} +{"current_steps": 47230, "total_steps": 78105, "loss": 0.1759, "lr": 2.025691715618346e-06, "epoch": 3.023494014467704, "percentage": 60.47, "elapsed_time": "2:04:50", "remaining_time": "1:21:36", "throughput": 19843.19, "total_tokens": 148632960} +{"current_steps": 47235, "total_steps": 78105, "loss": 0.148, "lr": 2.0251432215867197e-06, "epoch": 3.023814096408681, "percentage": 60.48, "elapsed_time": "2:04:51", "remaining_time": "1:21:35", "throughput": 19843.45, "total_tokens": 148648000} +{"current_steps": 47240, "total_steps": 78105, "loss": 0.1652, "lr": 2.0245947512669668e-06, "epoch": 3.0241341783496574, "percentage": 60.48, "elapsed_time": "2:04:51", "remaining_time": "1:21:34", "throughput": 19843.8, "total_tokens": 148664832} +{"current_steps": 47245, "total_steps": 78105, "loss": 0.1096, "lr": 2.024046304686477e-06, "epoch": 3.0244542602906344, "percentage": 60.49, "elapsed_time": "2:04:52", "remaining_time": "1:21:33", "throughput": 19844.1, "total_tokens": 148680384} +{"current_steps": 47250, "total_steps": 78105, "loss": 0.1457, "lr": 2.023497881872634e-06, "epoch": 3.0247743422316113, "percentage": 60.5, "elapsed_time": "2:04:53", "remaining_time": "1:21:33", "throughput": 19844.36, "total_tokens": 148695424} +{"current_steps": 47255, "total_steps": 78105, "loss": 0.1612, "lr": 2.0229494828528252e-06, "epoch": 3.0250944241725883, "percentage": 60.5, "elapsed_time": "2:04:53", "remaining_time": "1:21:32", "throughput": 19844.61, "total_tokens": 148710016} +{"current_steps": 47260, "total_steps": 78105, "loss": 0.1602, "lr": 2.0224011076544352e-06, "epoch": 3.025414506113565, "percentage": 60.51, "elapsed_time": "2:04:54", "remaining_time": "1:21:31", "throughput": 19844.92, "total_tokens": 148725632} +{"current_steps": 47265, "total_steps": 78105, "loss": 0.1816, "lr": 2.0218527563048453e-06, "epoch": 3.025734588054542, "percentage": 60.51, "elapsed_time": "2:04:55", "remaining_time": "1:21:30", "throughput": 19845.24, "total_tokens": 148741632} +{"current_steps": 47270, "total_steps": 78105, "loss": 0.1029, "lr": 2.0213044288314386e-06, "epoch": 3.026054669995519, "percentage": 60.52, "elapsed_time": "2:04:55", "remaining_time": "1:21:29", "throughput": 19845.54, "total_tokens": 148757248} +{"current_steps": 47275, "total_steps": 78105, "loss": 0.1873, "lr": 2.0207561252615953e-06, "epoch": 3.026374751936496, "percentage": 60.53, "elapsed_time": "2:04:56", "remaining_time": "1:21:28", "throughput": 19845.83, "total_tokens": 148772864} +{"current_steps": 47280, "total_steps": 78105, "loss": 0.0767, "lr": 2.020207845622695e-06, "epoch": 3.026694833877473, "percentage": 60.53, "elapsed_time": "2:04:57", "remaining_time": "1:21:27", "throughput": 19846.19, "total_tokens": 148789440} +{"current_steps": 47285, "total_steps": 78105, "loss": 0.1261, "lr": 2.019659589942115e-06, "epoch": 3.0270149158184494, "percentage": 60.54, "elapsed_time": "2:04:57", "remaining_time": "1:21:27", "throughput": 19846.51, "total_tokens": 148805120} +{"current_steps": 47290, "total_steps": 78105, "loss": 0.1392, "lr": 2.019111358247234e-06, "epoch": 3.0273349977594264, "percentage": 60.55, "elapsed_time": "2:04:58", "remaining_time": "1:21:26", "throughput": 19846.79, "total_tokens": 148820544} +{"current_steps": 47295, "total_steps": 78105, "loss": 0.1823, "lr": 2.0185631505654262e-06, "epoch": 3.0276550797004034, "percentage": 60.55, "elapsed_time": "2:04:59", "remaining_time": "1:21:25", "throughput": 19847.05, "total_tokens": 148835264} +{"current_steps": 47300, "total_steps": 78105, "loss": 0.138, "lr": 2.0180149669240675e-06, "epoch": 3.0279751616413804, "percentage": 60.56, "elapsed_time": "2:04:59", "remaining_time": "1:21:24", "throughput": 19847.31, "total_tokens": 148850368} +{"current_steps": 47305, "total_steps": 78105, "loss": 0.1199, "lr": 2.0174668073505303e-06, "epoch": 3.028295243582357, "percentage": 60.57, "elapsed_time": "2:05:00", "remaining_time": "1:21:23", "throughput": 19847.65, "total_tokens": 148866304} +{"current_steps": 47310, "total_steps": 78105, "loss": 0.1164, "lr": 2.016918671872187e-06, "epoch": 3.028615325523334, "percentage": 60.57, "elapsed_time": "2:05:01", "remaining_time": "1:21:22", "throughput": 19847.91, "total_tokens": 148881408} +{"current_steps": 47315, "total_steps": 78105, "loss": 0.1429, "lr": 2.01637056051641e-06, "epoch": 3.028935407464311, "percentage": 60.58, "elapsed_time": "2:05:01", "remaining_time": "1:21:21", "throughput": 19848.17, "total_tokens": 148896192} +{"current_steps": 47320, "total_steps": 78105, "loss": 0.1199, "lr": 2.015822473310567e-06, "epoch": 3.029255489405288, "percentage": 60.59, "elapsed_time": "2:05:02", "remaining_time": "1:21:20", "throughput": 19848.45, "total_tokens": 148911552} +{"current_steps": 47325, "total_steps": 78105, "loss": 0.0901, "lr": 2.0152744102820286e-06, "epoch": 3.0295755713462644, "percentage": 60.59, "elapsed_time": "2:05:03", "remaining_time": "1:21:19", "throughput": 19848.77, "total_tokens": 148927040} +{"current_steps": 47330, "total_steps": 78105, "loss": 0.1823, "lr": 2.0147263714581606e-06, "epoch": 3.0298956532872414, "percentage": 60.6, "elapsed_time": "2:05:03", "remaining_time": "1:21:19", "throughput": 19849.13, "total_tokens": 148943808} +{"current_steps": 47335, "total_steps": 78105, "loss": 0.2101, "lr": 2.0141783568663304e-06, "epoch": 3.0302157352282184, "percentage": 60.6, "elapsed_time": "2:05:04", "remaining_time": "1:21:18", "throughput": 19849.38, "total_tokens": 148958464} +{"current_steps": 47340, "total_steps": 78105, "loss": 0.2033, "lr": 2.013630366533902e-06, "epoch": 3.0305358171691954, "percentage": 60.61, "elapsed_time": "2:05:05", "remaining_time": "1:21:17", "throughput": 19849.69, "total_tokens": 148974528} +{"current_steps": 47345, "total_steps": 78105, "loss": 0.1524, "lr": 2.013082400488241e-06, "epoch": 3.0308558991101724, "percentage": 60.62, "elapsed_time": "2:05:05", "remaining_time": "1:21:16", "throughput": 19849.99, "total_tokens": 148990016} +{"current_steps": 47350, "total_steps": 78105, "loss": 0.1503, "lr": 2.012534458756707e-06, "epoch": 3.031175981051149, "percentage": 60.62, "elapsed_time": "2:05:06", "remaining_time": "1:21:15", "throughput": 19850.31, "total_tokens": 149006016} +{"current_steps": 47355, "total_steps": 78105, "loss": 0.1339, "lr": 2.0119865413666638e-06, "epoch": 3.031496062992126, "percentage": 60.63, "elapsed_time": "2:05:07", "remaining_time": "1:21:14", "throughput": 19850.62, "total_tokens": 149021824} +{"current_steps": 47360, "total_steps": 78105, "loss": 0.1924, "lr": 2.0114386483454708e-06, "epoch": 3.031816144933103, "percentage": 60.64, "elapsed_time": "2:05:07", "remaining_time": "1:21:13", "throughput": 19850.93, "total_tokens": 149037376} +{"current_steps": 47365, "total_steps": 78105, "loss": 0.1623, "lr": 2.010890779720486e-06, "epoch": 3.03213622687408, "percentage": 60.64, "elapsed_time": "2:05:08", "remaining_time": "1:21:13", "throughput": 19851.28, "total_tokens": 149053888} +{"current_steps": 47370, "total_steps": 78105, "loss": 0.1361, "lr": 2.0103429355190695e-06, "epoch": 3.0324563088150565, "percentage": 60.65, "elapsed_time": "2:05:09", "remaining_time": "1:21:12", "throughput": 19851.55, "total_tokens": 149069248} +{"current_steps": 47375, "total_steps": 78105, "loss": 0.1533, "lr": 2.009795115768575e-06, "epoch": 3.0327763907560334, "percentage": 60.66, "elapsed_time": "2:05:09", "remaining_time": "1:21:11", "throughput": 19851.81, "total_tokens": 149084480} +{"current_steps": 47380, "total_steps": 78105, "loss": 0.2616, "lr": 2.00924732049636e-06, "epoch": 3.0330964726970104, "percentage": 60.66, "elapsed_time": "2:05:10", "remaining_time": "1:21:10", "throughput": 19852.09, "total_tokens": 149099712} +{"current_steps": 47385, "total_steps": 78105, "loss": 0.1906, "lr": 2.0086995497297764e-06, "epoch": 3.0334165546379874, "percentage": 60.67, "elapsed_time": "2:05:11", "remaining_time": "1:21:09", "throughput": 19852.35, "total_tokens": 149114880} +{"current_steps": 47390, "total_steps": 78105, "loss": 0.1628, "lr": 2.0081518034961795e-06, "epoch": 3.0337366365789644, "percentage": 60.67, "elapsed_time": "2:05:11", "remaining_time": "1:21:08", "throughput": 19852.68, "total_tokens": 149131072} +{"current_steps": 47395, "total_steps": 78105, "loss": 0.1103, "lr": 2.0076040818229187e-06, "epoch": 3.034056718519941, "percentage": 60.68, "elapsed_time": "2:05:12", "remaining_time": "1:21:07", "throughput": 19853.0, "total_tokens": 149147200} +{"current_steps": 47400, "total_steps": 78105, "loss": 0.155, "lr": 2.007056384737346e-06, "epoch": 3.034376800460918, "percentage": 60.69, "elapsed_time": "2:05:13", "remaining_time": "1:21:06", "throughput": 19853.34, "total_tokens": 149162944} +{"current_steps": 47405, "total_steps": 78105, "loss": 0.1416, "lr": 2.006508712266809e-06, "epoch": 3.034696882401895, "percentage": 60.69, "elapsed_time": "2:05:13", "remaining_time": "1:21:06", "throughput": 19853.63, "total_tokens": 149178432} +{"current_steps": 47410, "total_steps": 78105, "loss": 0.1509, "lr": 2.005961064438657e-06, "epoch": 3.035016964342872, "percentage": 60.7, "elapsed_time": "2:05:14", "remaining_time": "1:21:05", "throughput": 19853.93, "total_tokens": 149194240} +{"current_steps": 47415, "total_steps": 78105, "loss": 0.1214, "lr": 2.005413441280235e-06, "epoch": 3.0353370462838485, "percentage": 60.71, "elapsed_time": "2:05:15", "remaining_time": "1:21:04", "throughput": 19854.26, "total_tokens": 149210560} +{"current_steps": 47420, "total_steps": 78105, "loss": 0.2117, "lr": 2.004865842818891e-06, "epoch": 3.0356571282248255, "percentage": 60.71, "elapsed_time": "2:05:15", "remaining_time": "1:21:03", "throughput": 19854.55, "total_tokens": 149225728} +{"current_steps": 47425, "total_steps": 78105, "loss": 0.1594, "lr": 2.004318269081967e-06, "epoch": 3.0359772101658025, "percentage": 60.72, "elapsed_time": "2:05:16", "remaining_time": "1:21:02", "throughput": 19854.88, "total_tokens": 149241664} +{"current_steps": 47430, "total_steps": 78105, "loss": 0.1372, "lr": 2.0037707200968064e-06, "epoch": 3.0362972921067795, "percentage": 60.73, "elapsed_time": "2:05:17", "remaining_time": "1:21:01", "throughput": 19855.2, "total_tokens": 149257536} +{"current_steps": 47435, "total_steps": 78105, "loss": 0.1418, "lr": 2.0032231958907527e-06, "epoch": 3.036617374047756, "percentage": 60.73, "elapsed_time": "2:05:17", "remaining_time": "1:21:00", "throughput": 19855.46, "total_tokens": 149272704} +{"current_steps": 47440, "total_steps": 78105, "loss": 0.1928, "lr": 2.0026756964911434e-06, "epoch": 3.036937455988733, "percentage": 60.74, "elapsed_time": "2:05:18", "remaining_time": "1:21:00", "throughput": 19855.75, "total_tokens": 149288384} +{"current_steps": 47445, "total_steps": 78105, "loss": 0.1998, "lr": 2.00212822192532e-06, "epoch": 3.03725753792971, "percentage": 60.75, "elapsed_time": "2:05:19", "remaining_time": "1:20:59", "throughput": 19855.99, "total_tokens": 149303168} +{"current_steps": 47450, "total_steps": 78105, "loss": 0.1389, "lr": 2.0015807722206196e-06, "epoch": 3.037577619870687, "percentage": 60.75, "elapsed_time": "2:05:19", "remaining_time": "1:20:58", "throughput": 19856.25, "total_tokens": 149318336} +{"current_steps": 47455, "total_steps": 78105, "loss": 0.0876, "lr": 2.00103334740438e-06, "epoch": 3.037897701811664, "percentage": 60.76, "elapsed_time": "2:05:20", "remaining_time": "1:20:57", "throughput": 19856.54, "total_tokens": 149333504} +{"current_steps": 47460, "total_steps": 78105, "loss": 0.163, "lr": 2.000485947503935e-06, "epoch": 3.0382177837526405, "percentage": 60.76, "elapsed_time": "2:05:21", "remaining_time": "1:20:56", "throughput": 19856.92, "total_tokens": 149350656} +{"current_steps": 47465, "total_steps": 78105, "loss": 0.1282, "lr": 1.9999385725466207e-06, "epoch": 3.0385378656936175, "percentage": 60.77, "elapsed_time": "2:05:22", "remaining_time": "1:20:55", "throughput": 19857.18, "total_tokens": 149365760} +{"current_steps": 47470, "total_steps": 78105, "loss": 0.1662, "lr": 1.9993912225597685e-06, "epoch": 3.0388579476345945, "percentage": 60.78, "elapsed_time": "2:05:22", "remaining_time": "1:20:54", "throughput": 19857.45, "total_tokens": 149380736} +{"current_steps": 47475, "total_steps": 78105, "loss": 0.1963, "lr": 1.9988438975707127e-06, "epoch": 3.0391780295755715, "percentage": 60.78, "elapsed_time": "2:05:23", "remaining_time": "1:20:53", "throughput": 19857.76, "total_tokens": 149396608} +{"current_steps": 47480, "total_steps": 78105, "loss": 0.2719, "lr": 1.9982965976067808e-06, "epoch": 3.039498111516548, "percentage": 60.79, "elapsed_time": "2:05:23", "remaining_time": "1:20:53", "throughput": 19858.02, "total_tokens": 149411712} +{"current_steps": 47485, "total_steps": 78105, "loss": 0.1177, "lr": 1.997749322695303e-06, "epoch": 3.039818193457525, "percentage": 60.8, "elapsed_time": "2:05:24", "remaining_time": "1:20:52", "throughput": 19858.42, "total_tokens": 149428992} +{"current_steps": 47490, "total_steps": 78105, "loss": 0.1604, "lr": 1.99720207286361e-06, "epoch": 3.040138275398502, "percentage": 60.8, "elapsed_time": "2:05:25", "remaining_time": "1:20:51", "throughput": 19858.76, "total_tokens": 149445568} +{"current_steps": 47495, "total_steps": 78105, "loss": 0.1245, "lr": 1.9966548481390248e-06, "epoch": 3.040458357339479, "percentage": 60.81, "elapsed_time": "2:05:26", "remaining_time": "1:20:50", "throughput": 19859.09, "total_tokens": 149461568} +{"current_steps": 47500, "total_steps": 78105, "loss": 0.1583, "lr": 1.9961076485488753e-06, "epoch": 3.040778439280456, "percentage": 60.82, "elapsed_time": "2:05:26", "remaining_time": "1:20:49", "throughput": 19859.4, "total_tokens": 149477440} +{"current_steps": 47505, "total_steps": 78105, "loss": 0.1525, "lr": 1.995560474120484e-06, "epoch": 3.0410985212214325, "percentage": 60.82, "elapsed_time": "2:05:27", "remaining_time": "1:20:48", "throughput": 19859.77, "total_tokens": 149494464} +{"current_steps": 47510, "total_steps": 78105, "loss": 0.2602, "lr": 1.995013324881177e-06, "epoch": 3.0414186031624095, "percentage": 60.83, "elapsed_time": "2:05:28", "remaining_time": "1:20:47", "throughput": 19860.15, "total_tokens": 149511616} +{"current_steps": 47515, "total_steps": 78105, "loss": 0.1628, "lr": 1.9944662008582734e-06, "epoch": 3.0417386851033865, "percentage": 60.83, "elapsed_time": "2:05:28", "remaining_time": "1:20:47", "throughput": 19860.51, "total_tokens": 149528128} +{"current_steps": 47520, "total_steps": 78105, "loss": 0.1647, "lr": 1.9939191020790944e-06, "epoch": 3.0420587670443635, "percentage": 60.84, "elapsed_time": "2:05:29", "remaining_time": "1:20:46", "throughput": 19860.78, "total_tokens": 149543232} +{"current_steps": 47525, "total_steps": 78105, "loss": 0.0676, "lr": 1.9933720285709587e-06, "epoch": 3.04237884898534, "percentage": 60.85, "elapsed_time": "2:05:30", "remaining_time": "1:20:45", "throughput": 19861.07, "total_tokens": 149559040} +{"current_steps": 47530, "total_steps": 78105, "loss": 0.2869, "lr": 1.992824980361186e-06, "epoch": 3.042698930926317, "percentage": 60.85, "elapsed_time": "2:05:30", "remaining_time": "1:20:44", "throughput": 19861.42, "total_tokens": 149575360} +{"current_steps": 47535, "total_steps": 78105, "loss": 0.1776, "lr": 1.9922779574770913e-06, "epoch": 3.043019012867294, "percentage": 60.86, "elapsed_time": "2:05:31", "remaining_time": "1:20:43", "throughput": 19861.71, "total_tokens": 149591104} +{"current_steps": 47540, "total_steps": 78105, "loss": 0.1012, "lr": 1.9917309599459906e-06, "epoch": 3.043339094808271, "percentage": 60.87, "elapsed_time": "2:05:32", "remaining_time": "1:20:43", "throughput": 19862.46, "total_tokens": 149619904} +{"current_steps": 47545, "total_steps": 78105, "loss": 0.2155, "lr": 1.9911839877951997e-06, "epoch": 3.043659176749248, "percentage": 60.87, "elapsed_time": "2:05:33", "remaining_time": "1:20:42", "throughput": 19862.74, "total_tokens": 149635328} +{"current_steps": 47550, "total_steps": 78105, "loss": 0.1333, "lr": 1.9906370410520286e-06, "epoch": 3.0439792586902246, "percentage": 60.88, "elapsed_time": "2:05:34", "remaining_time": "1:20:41", "throughput": 19863.06, "total_tokens": 149651328} +{"current_steps": 47555, "total_steps": 78105, "loss": 0.1287, "lr": 1.9900901197437916e-06, "epoch": 3.0442993406312016, "percentage": 60.89, "elapsed_time": "2:05:34", "remaining_time": "1:20:40", "throughput": 19863.33, "total_tokens": 149666560} +{"current_steps": 47560, "total_steps": 78105, "loss": 0.1752, "lr": 1.989543223897797e-06, "epoch": 3.0446194225721785, "percentage": 60.89, "elapsed_time": "2:05:35", "remaining_time": "1:20:39", "throughput": 19863.61, "total_tokens": 149681792} +{"current_steps": 47565, "total_steps": 78105, "loss": 0.1434, "lr": 1.988996353541356e-06, "epoch": 3.0449395045131555, "percentage": 60.9, "elapsed_time": "2:05:36", "remaining_time": "1:20:38", "throughput": 19863.96, "total_tokens": 149698432} +{"current_steps": 47570, "total_steps": 78105, "loss": 0.1517, "lr": 1.9884495087017743e-06, "epoch": 3.045259586454132, "percentage": 60.91, "elapsed_time": "2:05:36", "remaining_time": "1:20:37", "throughput": 19864.33, "total_tokens": 149715264} +{"current_steps": 47575, "total_steps": 78105, "loss": 0.1653, "lr": 1.9879026894063606e-06, "epoch": 3.045579668395109, "percentage": 60.91, "elapsed_time": "2:05:37", "remaining_time": "1:20:37", "throughput": 19864.64, "total_tokens": 149731264} +{"current_steps": 47580, "total_steps": 78105, "loss": 0.1442, "lr": 1.9873558956824185e-06, "epoch": 3.045899750336086, "percentage": 60.92, "elapsed_time": "2:05:38", "remaining_time": "1:20:36", "throughput": 19864.98, "total_tokens": 149747584} +{"current_steps": 47585, "total_steps": 78105, "loss": 0.1226, "lr": 1.9868091275572528e-06, "epoch": 3.046219832277063, "percentage": 60.92, "elapsed_time": "2:05:38", "remaining_time": "1:20:35", "throughput": 19865.3, "total_tokens": 149763712} +{"current_steps": 47590, "total_steps": 78105, "loss": 0.1999, "lr": 1.986262385058166e-06, "epoch": 3.0465399142180396, "percentage": 60.93, "elapsed_time": "2:05:39", "remaining_time": "1:20:34", "throughput": 19865.61, "total_tokens": 149779712} +{"current_steps": 47595, "total_steps": 78105, "loss": 0.1887, "lr": 1.985715668212461e-06, "epoch": 3.0468599961590166, "percentage": 60.94, "elapsed_time": "2:05:40", "remaining_time": "1:20:33", "throughput": 19865.94, "total_tokens": 149796160} +{"current_steps": 47600, "total_steps": 78105, "loss": 0.1351, "lr": 1.9851689770474348e-06, "epoch": 3.0471800780999936, "percentage": 60.94, "elapsed_time": "2:05:41", "remaining_time": "1:20:32", "throughput": 19866.2, "total_tokens": 149811136} +{"current_steps": 47605, "total_steps": 78105, "loss": 0.1378, "lr": 1.984622311590389e-06, "epoch": 3.0475001600409706, "percentage": 60.95, "elapsed_time": "2:05:41", "remaining_time": "1:20:31", "throughput": 19866.52, "total_tokens": 149827072} +{"current_steps": 47610, "total_steps": 78105, "loss": 0.1605, "lr": 1.98407567186862e-06, "epoch": 3.0478202419819476, "percentage": 60.96, "elapsed_time": "2:05:42", "remaining_time": "1:20:31", "throughput": 19866.91, "total_tokens": 149844288} +{"current_steps": 47615, "total_steps": 78105, "loss": 0.2217, "lr": 1.983529057909425e-06, "epoch": 3.048140323922924, "percentage": 60.96, "elapsed_time": "2:05:43", "remaining_time": "1:20:30", "throughput": 19867.16, "total_tokens": 149859008} +{"current_steps": 47620, "total_steps": 78105, "loss": 0.1383, "lr": 1.9829824697400994e-06, "epoch": 3.048460405863901, "percentage": 60.97, "elapsed_time": "2:05:43", "remaining_time": "1:20:29", "throughput": 19867.48, "total_tokens": 149874880} +{"current_steps": 47625, "total_steps": 78105, "loss": 0.2013, "lr": 1.982435907387935e-06, "epoch": 3.048780487804878, "percentage": 60.98, "elapsed_time": "2:05:44", "remaining_time": "1:20:28", "throughput": 19867.77, "total_tokens": 149890496} +{"current_steps": 47630, "total_steps": 78105, "loss": 0.2065, "lr": 1.981889370880227e-06, "epoch": 3.049100569745855, "percentage": 60.98, "elapsed_time": "2:05:45", "remaining_time": "1:20:27", "throughput": 19868.03, "total_tokens": 149905344} +{"current_steps": 47635, "total_steps": 78105, "loss": 0.1493, "lr": 1.9813428602442636e-06, "epoch": 3.0494206516868316, "percentage": 60.99, "elapsed_time": "2:05:45", "remaining_time": "1:20:26", "throughput": 19868.3, "total_tokens": 149920768} +{"current_steps": 47640, "total_steps": 78105, "loss": 0.1375, "lr": 1.980796375507337e-06, "epoch": 3.0497407336278086, "percentage": 60.99, "elapsed_time": "2:05:46", "remaining_time": "1:20:25", "throughput": 19868.59, "total_tokens": 149936064} +{"current_steps": 47645, "total_steps": 78105, "loss": 0.1064, "lr": 1.980249916696734e-06, "epoch": 3.0500608155687856, "percentage": 61.0, "elapsed_time": "2:05:47", "remaining_time": "1:20:24", "throughput": 19868.82, "total_tokens": 149950592} +{"current_steps": 47650, "total_steps": 78105, "loss": 0.1358, "lr": 1.9797034838397446e-06, "epoch": 3.0503808975097626, "percentage": 61.01, "elapsed_time": "2:05:47", "remaining_time": "1:20:24", "throughput": 19869.06, "total_tokens": 149965056} +{"current_steps": 47655, "total_steps": 78105, "loss": 0.1829, "lr": 1.9791570769636514e-06, "epoch": 3.0507009794507396, "percentage": 61.01, "elapsed_time": "2:05:48", "remaining_time": "1:20:23", "throughput": 19869.38, "total_tokens": 149981184} +{"current_steps": 47660, "total_steps": 78105, "loss": 0.1375, "lr": 1.9786106960957412e-06, "epoch": 3.051021061391716, "percentage": 61.02, "elapsed_time": "2:05:49", "remaining_time": "1:20:22", "throughput": 19869.64, "total_tokens": 149996544} +{"current_steps": 47665, "total_steps": 78105, "loss": 0.1414, "lr": 1.978064341263298e-06, "epoch": 3.051341143332693, "percentage": 61.03, "elapsed_time": "2:05:49", "remaining_time": "1:20:21", "throughput": 19870.0, "total_tokens": 150013312} +{"current_steps": 47670, "total_steps": 78105, "loss": 0.1625, "lr": 1.977518012493602e-06, "epoch": 3.05166122527367, "percentage": 61.03, "elapsed_time": "2:05:50", "remaining_time": "1:20:20", "throughput": 19870.37, "total_tokens": 150030016} +{"current_steps": 47675, "total_steps": 78105, "loss": 0.1525, "lr": 1.976971709813935e-06, "epoch": 3.051981307214647, "percentage": 61.04, "elapsed_time": "2:05:51", "remaining_time": "1:20:19", "throughput": 19870.62, "total_tokens": 150045056} +{"current_steps": 47680, "total_steps": 78105, "loss": 0.114, "lr": 1.9764254332515765e-06, "epoch": 3.0523013891556237, "percentage": 61.05, "elapsed_time": "2:05:51", "remaining_time": "1:20:18", "throughput": 19870.91, "total_tokens": 150060480} +{"current_steps": 47685, "total_steps": 78105, "loss": 0.2572, "lr": 1.9758791828338053e-06, "epoch": 3.0526214710966006, "percentage": 61.05, "elapsed_time": "2:05:52", "remaining_time": "1:20:18", "throughput": 19871.25, "total_tokens": 150077120} +{"current_steps": 47690, "total_steps": 78105, "loss": 0.1731, "lr": 1.975332958587897e-06, "epoch": 3.0529415530375776, "percentage": 61.06, "elapsed_time": "2:05:53", "remaining_time": "1:20:17", "throughput": 19871.62, "total_tokens": 150093952} +{"current_steps": 47695, "total_steps": 78105, "loss": 0.1819, "lr": 1.9747867605411282e-06, "epoch": 3.0532616349785546, "percentage": 61.07, "elapsed_time": "2:05:53", "remaining_time": "1:20:16", "throughput": 19871.89, "total_tokens": 150109504} +{"current_steps": 47700, "total_steps": 78105, "loss": 0.1143, "lr": 1.974240588720772e-06, "epoch": 3.053581716919531, "percentage": 61.07, "elapsed_time": "2:05:54", "remaining_time": "1:20:15", "throughput": 19872.31, "total_tokens": 150127552} +{"current_steps": 47705, "total_steps": 78105, "loss": 0.1824, "lr": 1.9736944431541038e-06, "epoch": 3.053901798860508, "percentage": 61.08, "elapsed_time": "2:05:55", "remaining_time": "1:20:14", "throughput": 19872.58, "total_tokens": 150142720} +{"current_steps": 47710, "total_steps": 78105, "loss": 0.1491, "lr": 1.973148323868392e-06, "epoch": 3.054221880801485, "percentage": 61.08, "elapsed_time": "2:05:55", "remaining_time": "1:20:13", "throughput": 19872.82, "total_tokens": 150157504} +{"current_steps": 47715, "total_steps": 78105, "loss": 0.1599, "lr": 1.9726022308909095e-06, "epoch": 3.054541962742462, "percentage": 61.09, "elapsed_time": "2:05:56", "remaining_time": "1:20:12", "throughput": 19873.09, "total_tokens": 150172992} +{"current_steps": 47720, "total_steps": 78105, "loss": 0.1498, "lr": 1.9720561642489247e-06, "epoch": 3.054862044683439, "percentage": 61.1, "elapsed_time": "2:05:57", "remaining_time": "1:20:11", "throughput": 19873.4, "total_tokens": 150188992} +{"current_steps": 47725, "total_steps": 78105, "loss": 0.1453, "lr": 1.971510123969704e-06, "epoch": 3.0551821266244157, "percentage": 61.1, "elapsed_time": "2:05:58", "remaining_time": "1:20:11", "throughput": 19873.77, "total_tokens": 150205952} +{"current_steps": 47730, "total_steps": 78105, "loss": 0.1228, "lr": 1.9709641100805163e-06, "epoch": 3.0555022085653927, "percentage": 61.11, "elapsed_time": "2:05:58", "remaining_time": "1:20:10", "throughput": 19874.04, "total_tokens": 150221248} +{"current_steps": 47735, "total_steps": 78105, "loss": 0.1354, "lr": 1.970418122608624e-06, "epoch": 3.0558222905063697, "percentage": 61.12, "elapsed_time": "2:05:59", "remaining_time": "1:20:09", "throughput": 19874.32, "total_tokens": 150236672} +{"current_steps": 47740, "total_steps": 78105, "loss": 0.155, "lr": 1.969872161581294e-06, "epoch": 3.0561423724473467, "percentage": 61.12, "elapsed_time": "2:06:00", "remaining_time": "1:20:08", "throughput": 19874.65, "total_tokens": 150252672} +{"current_steps": 47745, "total_steps": 78105, "loss": 0.1819, "lr": 1.9693262270257853e-06, "epoch": 3.056462454388323, "percentage": 61.13, "elapsed_time": "2:06:00", "remaining_time": "1:20:07", "throughput": 19875.0, "total_tokens": 150269056} +{"current_steps": 47750, "total_steps": 78105, "loss": 0.2936, "lr": 1.968780318969361e-06, "epoch": 3.0567825363293, "percentage": 61.14, "elapsed_time": "2:06:01", "remaining_time": "1:20:06", "throughput": 19875.28, "total_tokens": 150284416} +{"current_steps": 47755, "total_steps": 78105, "loss": 0.1472, "lr": 1.9682344374392805e-06, "epoch": 3.057102618270277, "percentage": 61.14, "elapsed_time": "2:06:02", "remaining_time": "1:20:05", "throughput": 19875.56, "total_tokens": 150299776} +{"current_steps": 47760, "total_steps": 78105, "loss": 0.0795, "lr": 1.967688582462803e-06, "epoch": 3.057422700211254, "percentage": 61.15, "elapsed_time": "2:06:02", "remaining_time": "1:20:05", "throughput": 19875.84, "total_tokens": 150315264} +{"current_steps": 47765, "total_steps": 78105, "loss": 0.1907, "lr": 1.9671427540671838e-06, "epoch": 3.057742782152231, "percentage": 61.15, "elapsed_time": "2:06:03", "remaining_time": "1:20:04", "throughput": 19876.17, "total_tokens": 150331200} +{"current_steps": 47770, "total_steps": 78105, "loss": 0.1474, "lr": 1.9665969522796813e-06, "epoch": 3.0580628640932077, "percentage": 61.16, "elapsed_time": "2:06:04", "remaining_time": "1:20:03", "throughput": 19876.49, "total_tokens": 150347072} +{"current_steps": 47775, "total_steps": 78105, "loss": 0.1164, "lr": 1.9660511771275474e-06, "epoch": 3.0583829460341847, "percentage": 61.17, "elapsed_time": "2:06:04", "remaining_time": "1:20:02", "throughput": 19876.82, "total_tokens": 150363392} +{"current_steps": 47780, "total_steps": 78105, "loss": 0.2084, "lr": 1.9655054286380367e-06, "epoch": 3.0587030279751617, "percentage": 61.17, "elapsed_time": "2:06:05", "remaining_time": "1:20:01", "throughput": 19877.1, "total_tokens": 150379008} +{"current_steps": 47785, "total_steps": 78105, "loss": 0.2199, "lr": 1.9649597068384012e-06, "epoch": 3.0590231099161387, "percentage": 61.18, "elapsed_time": "2:06:06", "remaining_time": "1:20:00", "throughput": 19877.38, "total_tokens": 150394624} +{"current_steps": 47790, "total_steps": 78105, "loss": 0.1773, "lr": 1.9644140117558904e-06, "epoch": 3.0593431918571152, "percentage": 61.19, "elapsed_time": "2:06:06", "remaining_time": "1:19:59", "throughput": 19877.68, "total_tokens": 150410432} +{"current_steps": 47795, "total_steps": 78105, "loss": 0.1238, "lr": 1.9638683434177554e-06, "epoch": 3.059663273798092, "percentage": 61.19, "elapsed_time": "2:06:07", "remaining_time": "1:19:59", "throughput": 19877.92, "total_tokens": 150425216} +{"current_steps": 47800, "total_steps": 78105, "loss": 0.1551, "lr": 1.9633227018512414e-06, "epoch": 3.059983355739069, "percentage": 61.2, "elapsed_time": "2:06:08", "remaining_time": "1:19:58", "throughput": 19878.18, "total_tokens": 150440192} +{"current_steps": 47805, "total_steps": 78105, "loss": 0.1245, "lr": 1.9627770870835976e-06, "epoch": 3.060303437680046, "percentage": 61.21, "elapsed_time": "2:06:08", "remaining_time": "1:19:57", "throughput": 19878.47, "total_tokens": 150456128} +{"current_steps": 47810, "total_steps": 78105, "loss": 0.0953, "lr": 1.962231499142066e-06, "epoch": 3.060623519621023, "percentage": 61.21, "elapsed_time": "2:06:09", "remaining_time": "1:19:56", "throughput": 19878.85, "total_tokens": 150473088} +{"current_steps": 47815, "total_steps": 78105, "loss": 0.1509, "lr": 1.9616859380538936e-06, "epoch": 3.0609436015619997, "percentage": 61.22, "elapsed_time": "2:06:10", "remaining_time": "1:19:55", "throughput": 19879.11, "total_tokens": 150487744} +{"current_steps": 47820, "total_steps": 78105, "loss": 0.1472, "lr": 1.9611404038463205e-06, "epoch": 3.0612636835029767, "percentage": 61.23, "elapsed_time": "2:06:10", "remaining_time": "1:19:54", "throughput": 19879.44, "total_tokens": 150504000} +{"current_steps": 47825, "total_steps": 78105, "loss": 0.118, "lr": 1.96059489654659e-06, "epoch": 3.0615837654439537, "percentage": 61.23, "elapsed_time": "2:06:11", "remaining_time": "1:19:53", "throughput": 19879.71, "total_tokens": 150518912} +{"current_steps": 47830, "total_steps": 78105, "loss": 0.1458, "lr": 1.9600494161819393e-06, "epoch": 3.0619038473849307, "percentage": 61.24, "elapsed_time": "2:06:12", "remaining_time": "1:19:52", "throughput": 19880.03, "total_tokens": 150535232} +{"current_steps": 47835, "total_steps": 78105, "loss": 0.1911, "lr": 1.9595039627796085e-06, "epoch": 3.0622239293259073, "percentage": 61.24, "elapsed_time": "2:06:12", "remaining_time": "1:19:52", "throughput": 19880.32, "total_tokens": 150550912} +{"current_steps": 47840, "total_steps": 78105, "loss": 0.1329, "lr": 1.9589585363668353e-06, "epoch": 3.0625440112668842, "percentage": 61.25, "elapsed_time": "2:06:13", "remaining_time": "1:19:51", "throughput": 19880.61, "total_tokens": 150566528} +{"current_steps": 47845, "total_steps": 78105, "loss": 0.2175, "lr": 1.9584131369708535e-06, "epoch": 3.0628640932078612, "percentage": 61.26, "elapsed_time": "2:06:14", "remaining_time": "1:19:50", "throughput": 19880.84, "total_tokens": 150581056} +{"current_steps": 47850, "total_steps": 78105, "loss": 0.1842, "lr": 1.9578677646189e-06, "epoch": 3.0631841751488382, "percentage": 61.26, "elapsed_time": "2:06:14", "remaining_time": "1:19:49", "throughput": 19881.1, "total_tokens": 150596416} +{"current_steps": 47855, "total_steps": 78105, "loss": 0.1962, "lr": 1.957322419338206e-06, "epoch": 3.063504257089815, "percentage": 61.27, "elapsed_time": "2:06:15", "remaining_time": "1:19:48", "throughput": 19881.39, "total_tokens": 150612096} +{"current_steps": 47860, "total_steps": 78105, "loss": 0.138, "lr": 1.956777101156004e-06, "epoch": 3.0638243390307918, "percentage": 61.28, "elapsed_time": "2:06:16", "remaining_time": "1:19:47", "throughput": 19881.72, "total_tokens": 150628224} +{"current_steps": 47865, "total_steps": 78105, "loss": 0.1429, "lr": 1.956231810099523e-06, "epoch": 3.0641444209717688, "percentage": 61.28, "elapsed_time": "2:06:16", "remaining_time": "1:19:46", "throughput": 19882.03, "total_tokens": 150644160} +{"current_steps": 47870, "total_steps": 78105, "loss": 0.1739, "lr": 1.9556865461959936e-06, "epoch": 3.0644645029127457, "percentage": 61.29, "elapsed_time": "2:06:17", "remaining_time": "1:19:46", "throughput": 19882.37, "total_tokens": 150660736} +{"current_steps": 47875, "total_steps": 78105, "loss": 0.1547, "lr": 1.9551413094726422e-06, "epoch": 3.0647845848537227, "percentage": 61.3, "elapsed_time": "2:06:18", "remaining_time": "1:19:45", "throughput": 19882.68, "total_tokens": 150676800} +{"current_steps": 47880, "total_steps": 78105, "loss": 0.2209, "lr": 1.9545960999566973e-06, "epoch": 3.0651046667946993, "percentage": 61.3, "elapsed_time": "2:06:18", "remaining_time": "1:19:44", "throughput": 19882.99, "total_tokens": 150692800} +{"current_steps": 47885, "total_steps": 78105, "loss": 0.131, "lr": 1.9540509176753807e-06, "epoch": 3.0654247487356763, "percentage": 61.31, "elapsed_time": "2:06:19", "remaining_time": "1:19:43", "throughput": 19883.26, "total_tokens": 150707776} +{"current_steps": 47890, "total_steps": 78105, "loss": 0.1623, "lr": 1.9535057626559177e-06, "epoch": 3.0657448306766533, "percentage": 61.31, "elapsed_time": "2:06:20", "remaining_time": "1:19:42", "throughput": 19883.55, "total_tokens": 150723008} +{"current_steps": 47895, "total_steps": 78105, "loss": 0.1904, "lr": 1.9529606349255316e-06, "epoch": 3.0660649126176303, "percentage": 61.32, "elapsed_time": "2:06:20", "remaining_time": "1:19:41", "throughput": 19883.87, "total_tokens": 150738944} +{"current_steps": 47900, "total_steps": 78105, "loss": 0.1105, "lr": 1.9524155345114406e-06, "epoch": 3.066384994558607, "percentage": 61.33, "elapsed_time": "2:06:21", "remaining_time": "1:19:40", "throughput": 19884.21, "total_tokens": 150755200} +{"current_steps": 47905, "total_steps": 78105, "loss": 0.2196, "lr": 1.951870461440866e-06, "epoch": 3.066705076499584, "percentage": 61.33, "elapsed_time": "2:06:22", "remaining_time": "1:19:40", "throughput": 19884.51, "total_tokens": 150771136} +{"current_steps": 47910, "total_steps": 78105, "loss": 0.1726, "lr": 1.9513254157410253e-06, "epoch": 3.067025158440561, "percentage": 61.34, "elapsed_time": "2:06:22", "remaining_time": "1:19:39", "throughput": 19884.76, "total_tokens": 150785984} +{"current_steps": 47915, "total_steps": 78105, "loss": 0.1345, "lr": 1.950780397439136e-06, "epoch": 3.0673452403815378, "percentage": 61.35, "elapsed_time": "2:06:23", "remaining_time": "1:19:38", "throughput": 19885.17, "total_tokens": 150803712} +{"current_steps": 47920, "total_steps": 78105, "loss": 0.1882, "lr": 1.9502354065624124e-06, "epoch": 3.0676653223225148, "percentage": 61.35, "elapsed_time": "2:06:24", "remaining_time": "1:19:37", "throughput": 19885.49, "total_tokens": 150819712} +{"current_steps": 47925, "total_steps": 78105, "loss": 0.1113, "lr": 1.949690443138069e-06, "epoch": 3.0679854042634913, "percentage": 61.36, "elapsed_time": "2:06:25", "remaining_time": "1:19:36", "throughput": 19885.8, "total_tokens": 150835776} +{"current_steps": 47930, "total_steps": 78105, "loss": 0.1513, "lr": 1.949145507193318e-06, "epoch": 3.0683054862044683, "percentage": 61.37, "elapsed_time": "2:06:25", "remaining_time": "1:19:35", "throughput": 19886.06, "total_tokens": 150850816} +{"current_steps": 47935, "total_steps": 78105, "loss": 0.0757, "lr": 1.9486005987553722e-06, "epoch": 3.0686255681454453, "percentage": 61.37, "elapsed_time": "2:06:26", "remaining_time": "1:19:34", "throughput": 19886.39, "total_tokens": 150867264} +{"current_steps": 47940, "total_steps": 78105, "loss": 0.103, "lr": 1.948055717851439e-06, "epoch": 3.0689456500864223, "percentage": 61.38, "elapsed_time": "2:06:27", "remaining_time": "1:19:33", "throughput": 19886.64, "total_tokens": 150881728} +{"current_steps": 47945, "total_steps": 78105, "loss": 0.1954, "lr": 1.9475108645087297e-06, "epoch": 3.069265732027399, "percentage": 61.39, "elapsed_time": "2:06:27", "remaining_time": "1:19:33", "throughput": 19887.02, "total_tokens": 150898880} +{"current_steps": 47950, "total_steps": 78105, "loss": 0.1764, "lr": 1.9469660387544493e-06, "epoch": 3.069585813968376, "percentage": 61.39, "elapsed_time": "2:06:28", "remaining_time": "1:19:32", "throughput": 19887.31, "total_tokens": 150914624} +{"current_steps": 47955, "total_steps": 78105, "loss": 0.2132, "lr": 1.9464212406158036e-06, "epoch": 3.069905895909353, "percentage": 61.4, "elapsed_time": "2:06:29", "remaining_time": "1:19:31", "throughput": 19887.62, "total_tokens": 150930432} +{"current_steps": 47960, "total_steps": 78105, "loss": 0.2277, "lr": 1.9458764701199986e-06, "epoch": 3.07022597785033, "percentage": 61.4, "elapsed_time": "2:06:29", "remaining_time": "1:19:30", "throughput": 19887.89, "total_tokens": 150945664} +{"current_steps": 47965, "total_steps": 78105, "loss": 0.1633, "lr": 1.9453317272942358e-06, "epoch": 3.0705460597913063, "percentage": 61.41, "elapsed_time": "2:06:30", "remaining_time": "1:19:29", "throughput": 19888.19, "total_tokens": 150961344} +{"current_steps": 47970, "total_steps": 78105, "loss": 0.2098, "lr": 1.944787012165718e-06, "epoch": 3.0708661417322833, "percentage": 61.42, "elapsed_time": "2:06:31", "remaining_time": "1:19:28", "throughput": 19888.47, "total_tokens": 150976512} +{"current_steps": 47975, "total_steps": 78105, "loss": 0.1358, "lr": 1.944242324761644e-06, "epoch": 3.0711862236732603, "percentage": 61.42, "elapsed_time": "2:06:31", "remaining_time": "1:19:27", "throughput": 19888.92, "total_tokens": 150995136} +{"current_steps": 47980, "total_steps": 78105, "loss": 0.0953, "lr": 1.9436976651092143e-06, "epoch": 3.0715063056142373, "percentage": 61.43, "elapsed_time": "2:06:32", "remaining_time": "1:19:27", "throughput": 19889.3, "total_tokens": 151012416} +{"current_steps": 47985, "total_steps": 78105, "loss": 0.158, "lr": 1.9431530332356247e-06, "epoch": 3.0718263875552143, "percentage": 61.44, "elapsed_time": "2:06:33", "remaining_time": "1:19:26", "throughput": 19889.59, "total_tokens": 151028032} +{"current_steps": 47990, "total_steps": 78105, "loss": 0.1351, "lr": 1.942608429168073e-06, "epoch": 3.072146469496191, "percentage": 61.44, "elapsed_time": "2:06:33", "remaining_time": "1:19:25", "throughput": 19889.86, "total_tokens": 151043200} +{"current_steps": 47995, "total_steps": 78105, "loss": 0.1743, "lr": 1.9420638529337528e-06, "epoch": 3.072466551437168, "percentage": 61.45, "elapsed_time": "2:06:34", "remaining_time": "1:19:24", "throughput": 19890.2, "total_tokens": 151059712} +{"current_steps": 48000, "total_steps": 78105, "loss": 0.2285, "lr": 1.9415193045598575e-06, "epoch": 3.072786633378145, "percentage": 61.46, "elapsed_time": "2:06:35", "remaining_time": "1:19:23", "throughput": 19890.55, "total_tokens": 151076608} +{"current_steps": 48005, "total_steps": 78105, "loss": 0.1377, "lr": 1.940974784073579e-06, "epoch": 3.073106715319122, "percentage": 61.46, "elapsed_time": "2:06:36", "remaining_time": "1:19:22", "throughput": 19890.84, "total_tokens": 151092288} +{"current_steps": 48010, "total_steps": 78105, "loss": 0.1857, "lr": 1.940430291502107e-06, "epoch": 3.0734267972600984, "percentage": 61.47, "elapsed_time": "2:06:36", "remaining_time": "1:19:22", "throughput": 19891.19, "total_tokens": 151108992} +{"current_steps": 48015, "total_steps": 78105, "loss": 0.1747, "lr": 1.9398858268726327e-06, "epoch": 3.0737468792010754, "percentage": 61.47, "elapsed_time": "2:06:37", "remaining_time": "1:19:21", "throughput": 19891.45, "total_tokens": 151124224} +{"current_steps": 48020, "total_steps": 78105, "loss": 0.1461, "lr": 1.9393413902123425e-06, "epoch": 3.0740669611420524, "percentage": 61.48, "elapsed_time": "2:06:38", "remaining_time": "1:19:20", "throughput": 19891.8, "total_tokens": 151140928} +{"current_steps": 48025, "total_steps": 78105, "loss": 0.1835, "lr": 1.9387969815484233e-06, "epoch": 3.0743870430830293, "percentage": 61.49, "elapsed_time": "2:06:38", "remaining_time": "1:19:19", "throughput": 19892.08, "total_tokens": 151156416} +{"current_steps": 48030, "total_steps": 78105, "loss": 0.1423, "lr": 1.938252600908059e-06, "epoch": 3.0747071250240063, "percentage": 61.49, "elapsed_time": "2:06:39", "remaining_time": "1:19:18", "throughput": 19892.43, "total_tokens": 151172800} +{"current_steps": 48035, "total_steps": 78105, "loss": 0.1535, "lr": 1.9377082483184344e-06, "epoch": 3.075027206964983, "percentage": 61.5, "elapsed_time": "2:06:40", "remaining_time": "1:19:17", "throughput": 19892.73, "total_tokens": 151188352} +{"current_steps": 48040, "total_steps": 78105, "loss": 0.2626, "lr": 1.9371639238067302e-06, "epoch": 3.07534728890596, "percentage": 61.51, "elapsed_time": "2:06:40", "remaining_time": "1:19:16", "throughput": 19893.02, "total_tokens": 151204288} +{"current_steps": 48045, "total_steps": 78105, "loss": 0.2089, "lr": 1.9366196274001286e-06, "epoch": 3.075667370846937, "percentage": 61.51, "elapsed_time": "2:06:41", "remaining_time": "1:19:16", "throughput": 19893.3, "total_tokens": 151219712} +{"current_steps": 48050, "total_steps": 78105, "loss": 0.1127, "lr": 1.9360753591258073e-06, "epoch": 3.075987452787914, "percentage": 61.52, "elapsed_time": "2:06:42", "remaining_time": "1:19:15", "throughput": 19893.58, "total_tokens": 151235136} +{"current_steps": 48055, "total_steps": 78105, "loss": 0.131, "lr": 1.9355311190109465e-06, "epoch": 3.0763075347288904, "percentage": 61.53, "elapsed_time": "2:06:42", "remaining_time": "1:19:14", "throughput": 19893.86, "total_tokens": 151250624} +{"current_steps": 48060, "total_steps": 78105, "loss": 0.1795, "lr": 1.9349869070827205e-06, "epoch": 3.0766276166698674, "percentage": 61.53, "elapsed_time": "2:06:43", "remaining_time": "1:19:13", "throughput": 19894.14, "total_tokens": 151265792} +{"current_steps": 48065, "total_steps": 78105, "loss": 0.1688, "lr": 1.9344427233683053e-06, "epoch": 3.0769476986108444, "percentage": 61.54, "elapsed_time": "2:06:44", "remaining_time": "1:19:12", "throughput": 19894.44, "total_tokens": 151281664} +{"current_steps": 48070, "total_steps": 78105, "loss": 0.1275, "lr": 1.933898567894875e-06, "epoch": 3.0772677805518214, "percentage": 61.55, "elapsed_time": "2:06:44", "remaining_time": "1:19:11", "throughput": 19894.72, "total_tokens": 151297216} +{"current_steps": 48075, "total_steps": 78105, "loss": 0.2356, "lr": 1.933354440689601e-06, "epoch": 3.0775878624927984, "percentage": 61.55, "elapsed_time": "2:06:45", "remaining_time": "1:19:10", "throughput": 19895.04, "total_tokens": 151313152} +{"current_steps": 48080, "total_steps": 78105, "loss": 0.155, "lr": 1.932810341779656e-06, "epoch": 3.077907944433775, "percentage": 61.56, "elapsed_time": "2:06:46", "remaining_time": "1:19:09", "throughput": 19895.3, "total_tokens": 151328704} +{"current_steps": 48085, "total_steps": 78105, "loss": 0.1709, "lr": 1.9322662711922064e-06, "epoch": 3.078228026374752, "percentage": 61.56, "elapsed_time": "2:06:46", "remaining_time": "1:19:09", "throughput": 19895.58, "total_tokens": 151344128} +{"current_steps": 48090, "total_steps": 78105, "loss": 0.2147, "lr": 1.9317222289544234e-06, "epoch": 3.078548108315729, "percentage": 61.57, "elapsed_time": "2:06:47", "remaining_time": "1:19:08", "throughput": 19895.68, "total_tokens": 151361024} +{"current_steps": 48095, "total_steps": 78105, "loss": 0.1662, "lr": 1.9311782150934715e-06, "epoch": 3.078868190256706, "percentage": 61.58, "elapsed_time": "2:06:48", "remaining_time": "1:19:07", "throughput": 19896.02, "total_tokens": 151377600} +{"current_steps": 48100, "total_steps": 78105, "loss": 0.1914, "lr": 1.930634229636517e-06, "epoch": 3.0791882721976824, "percentage": 61.58, "elapsed_time": "2:06:49", "remaining_time": "1:19:06", "throughput": 19896.3, "total_tokens": 151393152} +{"current_steps": 48105, "total_steps": 78105, "loss": 0.1662, "lr": 1.930090272610723e-06, "epoch": 3.0795083541386594, "percentage": 61.59, "elapsed_time": "2:06:49", "remaining_time": "1:19:05", "throughput": 19896.59, "total_tokens": 151408576} +{"current_steps": 48110, "total_steps": 78105, "loss": 0.1979, "lr": 1.9295463440432528e-06, "epoch": 3.0798284360796364, "percentage": 61.6, "elapsed_time": "2:06:50", "remaining_time": "1:19:04", "throughput": 19896.85, "total_tokens": 151423616} +{"current_steps": 48115, "total_steps": 78105, "loss": 0.1284, "lr": 1.929002443961266e-06, "epoch": 3.0801485180206134, "percentage": 61.6, "elapsed_time": "2:06:51", "remaining_time": "1:19:03", "throughput": 19897.18, "total_tokens": 151439616} +{"current_steps": 48120, "total_steps": 78105, "loss": 0.144, "lr": 1.9284585723919237e-06, "epoch": 3.0804685999615904, "percentage": 61.61, "elapsed_time": "2:06:51", "remaining_time": "1:19:03", "throughput": 19897.11, "total_tokens": 151455424} +{"current_steps": 48125, "total_steps": 78105, "loss": 0.1426, "lr": 1.9279147293623825e-06, "epoch": 3.080788681902567, "percentage": 61.62, "elapsed_time": "2:06:52", "remaining_time": "1:19:02", "throughput": 19897.38, "total_tokens": 151470528} +{"current_steps": 48130, "total_steps": 78105, "loss": 0.1396, "lr": 1.9273709148998003e-06, "epoch": 3.081108763843544, "percentage": 61.62, "elapsed_time": "2:06:53", "remaining_time": "1:19:01", "throughput": 19897.55, "total_tokens": 151488768} +{"current_steps": 48135, "total_steps": 78105, "loss": 0.1163, "lr": 1.9268271290313317e-06, "epoch": 3.081428845784521, "percentage": 61.63, "elapsed_time": "2:06:54", "remaining_time": "1:19:00", "throughput": 19897.81, "total_tokens": 151504000} +{"current_steps": 48140, "total_steps": 78105, "loss": 0.177, "lr": 1.9262833717841306e-06, "epoch": 3.081748927725498, "percentage": 61.63, "elapsed_time": "2:06:54", "remaining_time": "1:18:59", "throughput": 19898.08, "total_tokens": 151519424} +{"current_steps": 48145, "total_steps": 78105, "loss": 0.1353, "lr": 1.9257396431853503e-06, "epoch": 3.0820690096664745, "percentage": 61.64, "elapsed_time": "2:06:55", "remaining_time": "1:18:58", "throughput": 19898.39, "total_tokens": 151535232} +{"current_steps": 48150, "total_steps": 78105, "loss": 0.1628, "lr": 1.92519594326214e-06, "epoch": 3.0823890916074514, "percentage": 61.65, "elapsed_time": "2:06:56", "remaining_time": "1:18:58", "throughput": 19898.64, "total_tokens": 151550272} +{"current_steps": 48155, "total_steps": 78105, "loss": 0.1283, "lr": 1.924652272041651e-06, "epoch": 3.0827091735484284, "percentage": 61.65, "elapsed_time": "2:06:56", "remaining_time": "1:18:57", "throughput": 19898.92, "total_tokens": 151565824} +{"current_steps": 48160, "total_steps": 78105, "loss": 0.1678, "lr": 1.92410862955103e-06, "epoch": 3.0830292554894054, "percentage": 61.66, "elapsed_time": "2:06:57", "remaining_time": "1:18:56", "throughput": 19899.29, "total_tokens": 151583040} +{"current_steps": 48165, "total_steps": 78105, "loss": 0.2132, "lr": 1.9235650158174253e-06, "epoch": 3.083349337430382, "percentage": 61.67, "elapsed_time": "2:06:58", "remaining_time": "1:18:55", "throughput": 19899.59, "total_tokens": 151599168} +{"current_steps": 48170, "total_steps": 78105, "loss": 0.1977, "lr": 1.9230214308679806e-06, "epoch": 3.083669419371359, "percentage": 61.67, "elapsed_time": "2:06:58", "remaining_time": "1:18:54", "throughput": 19899.87, "total_tokens": 151614464} +{"current_steps": 48175, "total_steps": 78105, "loss": 0.1759, "lr": 1.9224778747298407e-06, "epoch": 3.083989501312336, "percentage": 61.68, "elapsed_time": "2:06:59", "remaining_time": "1:18:53", "throughput": 19900.12, "total_tokens": 151629440} +{"current_steps": 48180, "total_steps": 78105, "loss": 0.2023, "lr": 1.9219343474301466e-06, "epoch": 3.084309583253313, "percentage": 61.69, "elapsed_time": "2:07:00", "remaining_time": "1:18:52", "throughput": 19900.44, "total_tokens": 151645312} +{"current_steps": 48185, "total_steps": 78105, "loss": 0.1556, "lr": 1.92139084899604e-06, "epoch": 3.08462966519429, "percentage": 61.69, "elapsed_time": "2:07:00", "remaining_time": "1:18:52", "throughput": 19900.72, "total_tokens": 151660736} +{"current_steps": 48190, "total_steps": 78105, "loss": 0.1916, "lr": 1.920847379454662e-06, "epoch": 3.0849497471352665, "percentage": 61.7, "elapsed_time": "2:07:01", "remaining_time": "1:18:51", "throughput": 19901.03, "total_tokens": 151676672} +{"current_steps": 48195, "total_steps": 78105, "loss": 0.1536, "lr": 1.9203039388331473e-06, "epoch": 3.0852698290762435, "percentage": 61.71, "elapsed_time": "2:07:02", "remaining_time": "1:18:50", "throughput": 19901.32, "total_tokens": 151692032} +{"current_steps": 48200, "total_steps": 78105, "loss": 0.2809, "lr": 1.9197605271586364e-06, "epoch": 3.0855899110172205, "percentage": 61.71, "elapsed_time": "2:07:02", "remaining_time": "1:18:49", "throughput": 19901.58, "total_tokens": 151707264} +{"current_steps": 48205, "total_steps": 78105, "loss": 0.1642, "lr": 1.9192171444582606e-06, "epoch": 3.0859099929581975, "percentage": 61.72, "elapsed_time": "2:07:03", "remaining_time": "1:18:48", "throughput": 19901.87, "total_tokens": 151723328} +{"current_steps": 48210, "total_steps": 78105, "loss": 0.195, "lr": 1.918673790759157e-06, "epoch": 3.086230074899174, "percentage": 61.72, "elapsed_time": "2:07:04", "remaining_time": "1:18:47", "throughput": 19902.16, "total_tokens": 151739008} +{"current_steps": 48215, "total_steps": 78105, "loss": 0.2135, "lr": 1.918130466088455e-06, "epoch": 3.086550156840151, "percentage": 61.73, "elapsed_time": "2:07:04", "remaining_time": "1:18:46", "throughput": 19902.45, "total_tokens": 151754752} +{"current_steps": 48220, "total_steps": 78105, "loss": 0.162, "lr": 1.917587170473288e-06, "epoch": 3.086870238781128, "percentage": 61.74, "elapsed_time": "2:07:05", "remaining_time": "1:18:46", "throughput": 19902.77, "total_tokens": 151770688} +{"current_steps": 48225, "total_steps": 78105, "loss": 0.2155, "lr": 1.9170439039407827e-06, "epoch": 3.087190320722105, "percentage": 61.74, "elapsed_time": "2:07:06", "remaining_time": "1:18:45", "throughput": 19903.05, "total_tokens": 151786112} +{"current_steps": 48230, "total_steps": 78105, "loss": 0.2069, "lr": 1.9165006665180694e-06, "epoch": 3.0875104026630815, "percentage": 61.75, "elapsed_time": "2:07:06", "remaining_time": "1:18:44", "throughput": 19903.29, "total_tokens": 151800896} +{"current_steps": 48235, "total_steps": 78105, "loss": 0.1197, "lr": 1.9159574582322725e-06, "epoch": 3.0878304846040585, "percentage": 61.76, "elapsed_time": "2:07:07", "remaining_time": "1:18:43", "throughput": 19903.57, "total_tokens": 151816192} +{"current_steps": 48240, "total_steps": 78105, "loss": 0.1611, "lr": 1.915414279110518e-06, "epoch": 3.0881505665450355, "percentage": 61.76, "elapsed_time": "2:07:08", "remaining_time": "1:18:42", "throughput": 19903.86, "total_tokens": 151831808} +{"current_steps": 48245, "total_steps": 78105, "loss": 0.1622, "lr": 1.91487112917993e-06, "epoch": 3.0884706484860125, "percentage": 61.77, "elapsed_time": "2:07:08", "remaining_time": "1:18:41", "throughput": 19904.13, "total_tokens": 151847168} +{"current_steps": 48250, "total_steps": 78105, "loss": 0.2085, "lr": 1.9143280084676295e-06, "epoch": 3.0887907304269895, "percentage": 61.78, "elapsed_time": "2:07:09", "remaining_time": "1:18:40", "throughput": 19904.5, "total_tokens": 151864256} +{"current_steps": 48255, "total_steps": 78105, "loss": 0.1397, "lr": 1.9137849170007382e-06, "epoch": 3.089110812367966, "percentage": 61.78, "elapsed_time": "2:07:10", "remaining_time": "1:18:40", "throughput": 19904.75, "total_tokens": 151879168} +{"current_steps": 48260, "total_steps": 78105, "loss": 0.1202, "lr": 1.9132418548063743e-06, "epoch": 3.089430894308943, "percentage": 61.79, "elapsed_time": "2:07:10", "remaining_time": "1:18:39", "throughput": 19905.03, "total_tokens": 151894592} +{"current_steps": 48265, "total_steps": 78105, "loss": 0.1594, "lr": 1.9126988219116567e-06, "epoch": 3.08975097624992, "percentage": 61.8, "elapsed_time": "2:07:11", "remaining_time": "1:18:38", "throughput": 19905.25, "total_tokens": 151908864} +{"current_steps": 48270, "total_steps": 78105, "loss": 0.2039, "lr": 1.9121558183436994e-06, "epoch": 3.090071058190897, "percentage": 61.8, "elapsed_time": "2:07:12", "remaining_time": "1:18:37", "throughput": 19905.55, "total_tokens": 151924736} +{"current_steps": 48275, "total_steps": 78105, "loss": 0.1521, "lr": 1.9116128441296194e-06, "epoch": 3.0903911401318735, "percentage": 61.81, "elapsed_time": "2:07:12", "remaining_time": "1:18:36", "throughput": 19905.87, "total_tokens": 151940864} +{"current_steps": 48280, "total_steps": 78105, "loss": 0.1123, "lr": 1.9110698992965284e-06, "epoch": 3.0907112220728505, "percentage": 61.81, "elapsed_time": "2:07:13", "remaining_time": "1:18:35", "throughput": 19906.23, "total_tokens": 151958144} +{"current_steps": 48285, "total_steps": 78105, "loss": 0.1449, "lr": 1.9105269838715405e-06, "epoch": 3.0910313040138275, "percentage": 61.82, "elapsed_time": "2:07:14", "remaining_time": "1:18:34", "throughput": 19906.56, "total_tokens": 151974144} +{"current_steps": 48290, "total_steps": 78105, "loss": 0.1393, "lr": 1.909984097881763e-06, "epoch": 3.0913513859548045, "percentage": 61.83, "elapsed_time": "2:07:15", "remaining_time": "1:18:34", "throughput": 19906.95, "total_tokens": 151991488} +{"current_steps": 48295, "total_steps": 78105, "loss": 0.2076, "lr": 1.9094412413543074e-06, "epoch": 3.0916714678957815, "percentage": 61.83, "elapsed_time": "2:07:15", "remaining_time": "1:18:33", "throughput": 19907.24, "total_tokens": 152006912} +{"current_steps": 48300, "total_steps": 78105, "loss": 0.2696, "lr": 1.908898414316279e-06, "epoch": 3.091991549836758, "percentage": 61.84, "elapsed_time": "2:07:16", "remaining_time": "1:18:32", "throughput": 19907.45, "total_tokens": 152021184} +{"current_steps": 48305, "total_steps": 78105, "loss": 0.1362, "lr": 1.908355616794785e-06, "epoch": 3.092311631777735, "percentage": 61.85, "elapsed_time": "2:07:17", "remaining_time": "1:18:31", "throughput": 19907.72, "total_tokens": 152036288} +{"current_steps": 48310, "total_steps": 78105, "loss": 0.1609, "lr": 1.9078128488169313e-06, "epoch": 3.092631713718712, "percentage": 61.85, "elapsed_time": "2:07:17", "remaining_time": "1:18:30", "throughput": 19907.97, "total_tokens": 152051200} +{"current_steps": 48315, "total_steps": 78105, "loss": 0.1461, "lr": 1.907270110409818e-06, "epoch": 3.092951795659689, "percentage": 61.86, "elapsed_time": "2:07:18", "remaining_time": "1:18:29", "throughput": 19908.25, "total_tokens": 152066240} +{"current_steps": 48320, "total_steps": 78105, "loss": 0.2316, "lr": 1.9067274016005488e-06, "epoch": 3.0932718776006656, "percentage": 61.87, "elapsed_time": "2:07:19", "remaining_time": "1:18:28", "throughput": 19908.56, "total_tokens": 152082304} +{"current_steps": 48325, "total_steps": 78105, "loss": 0.0797, "lr": 1.906184722416222e-06, "epoch": 3.0935919595416426, "percentage": 61.87, "elapsed_time": "2:07:19", "remaining_time": "1:18:27", "throughput": 19908.87, "total_tokens": 152098240} +{"current_steps": 48330, "total_steps": 78105, "loss": 0.1827, "lr": 1.9056420728839375e-06, "epoch": 3.0939120414826196, "percentage": 61.88, "elapsed_time": "2:07:20", "remaining_time": "1:18:27", "throughput": 19909.13, "total_tokens": 152113536} +{"current_steps": 48335, "total_steps": 78105, "loss": 0.1234, "lr": 1.9050994530307918e-06, "epoch": 3.0942321234235965, "percentage": 61.88, "elapsed_time": "2:07:21", "remaining_time": "1:18:26", "throughput": 19909.42, "total_tokens": 152129152} +{"current_steps": 48340, "total_steps": 78105, "loss": 0.2724, "lr": 1.9045568628838814e-06, "epoch": 3.0945522053645735, "percentage": 61.89, "elapsed_time": "2:07:21", "remaining_time": "1:18:25", "throughput": 19909.71, "total_tokens": 152144704} +{"current_steps": 48345, "total_steps": 78105, "loss": 0.1893, "lr": 1.9040143024702988e-06, "epoch": 3.09487228730555, "percentage": 61.9, "elapsed_time": "2:07:22", "remaining_time": "1:18:24", "throughput": 19910.0, "total_tokens": 152160640} +{"current_steps": 48350, "total_steps": 78105, "loss": 0.1086, "lr": 1.903471771817138e-06, "epoch": 3.095192369246527, "percentage": 61.9, "elapsed_time": "2:07:23", "remaining_time": "1:18:23", "throughput": 19910.29, "total_tokens": 152176320} +{"current_steps": 48355, "total_steps": 78105, "loss": 0.1531, "lr": 1.9029292709514894e-06, "epoch": 3.095512451187504, "percentage": 61.91, "elapsed_time": "2:07:23", "remaining_time": "1:18:22", "throughput": 19910.59, "total_tokens": 152191808} +{"current_steps": 48360, "total_steps": 78105, "loss": 0.1193, "lr": 1.9023867999004433e-06, "epoch": 3.095832533128481, "percentage": 61.92, "elapsed_time": "2:07:24", "remaining_time": "1:18:21", "throughput": 19910.99, "total_tokens": 152209280} +{"current_steps": 48365, "total_steps": 78105, "loss": 0.1564, "lr": 1.9018443586910875e-06, "epoch": 3.0961526150694576, "percentage": 61.92, "elapsed_time": "2:07:25", "remaining_time": "1:18:21", "throughput": 19911.23, "total_tokens": 152223936} +{"current_steps": 48370, "total_steps": 78105, "loss": 0.1128, "lr": 1.9013019473505085e-06, "epoch": 3.0964726970104346, "percentage": 61.93, "elapsed_time": "2:07:25", "remaining_time": "1:18:20", "throughput": 19911.44, "total_tokens": 152238400} +{"current_steps": 48375, "total_steps": 78105, "loss": 0.1442, "lr": 1.9007595659057925e-06, "epoch": 3.0967927789514116, "percentage": 61.94, "elapsed_time": "2:07:26", "remaining_time": "1:18:19", "throughput": 19911.74, "total_tokens": 152254336} +{"current_steps": 48380, "total_steps": 78105, "loss": 0.1276, "lr": 1.9002172143840217e-06, "epoch": 3.0971128608923886, "percentage": 61.94, "elapsed_time": "2:07:27", "remaining_time": "1:18:18", "throughput": 19912.02, "total_tokens": 152269760} +{"current_steps": 48385, "total_steps": 78105, "loss": 0.1542, "lr": 1.8996748928122794e-06, "epoch": 3.0974329428333656, "percentage": 61.95, "elapsed_time": "2:07:27", "remaining_time": "1:18:17", "throughput": 19912.27, "total_tokens": 152284736} +{"current_steps": 48390, "total_steps": 78105, "loss": 0.1119, "lr": 1.8991326012176455e-06, "epoch": 3.097753024774342, "percentage": 61.96, "elapsed_time": "2:07:28", "remaining_time": "1:18:16", "throughput": 19912.58, "total_tokens": 152300608} +{"current_steps": 48395, "total_steps": 78105, "loss": 0.1317, "lr": 1.898590339627201e-06, "epoch": 3.098073106715319, "percentage": 61.96, "elapsed_time": "2:07:29", "remaining_time": "1:18:15", "throughput": 19912.83, "total_tokens": 152315264} +{"current_steps": 48400, "total_steps": 78105, "loss": 0.1217, "lr": 1.8980481080680207e-06, "epoch": 3.098393188656296, "percentage": 61.97, "elapsed_time": "2:07:29", "remaining_time": "1:18:14", "throughput": 19913.2, "total_tokens": 152332096} +{"current_steps": 48405, "total_steps": 78105, "loss": 0.1199, "lr": 1.897505906567184e-06, "epoch": 3.098713270597273, "percentage": 61.97, "elapsed_time": "2:07:30", "remaining_time": "1:18:14", "throughput": 19913.49, "total_tokens": 152347712} +{"current_steps": 48410, "total_steps": 78105, "loss": 0.2118, "lr": 1.8969637351517625e-06, "epoch": 3.0990333525382496, "percentage": 61.98, "elapsed_time": "2:07:31", "remaining_time": "1:18:13", "throughput": 19913.76, "total_tokens": 152362816} +{"current_steps": 48415, "total_steps": 78105, "loss": 0.1492, "lr": 1.8964215938488312e-06, "epoch": 3.0993534344792266, "percentage": 61.99, "elapsed_time": "2:07:31", "remaining_time": "1:18:12", "throughput": 19914.08, "total_tokens": 152378880} +{"current_steps": 48420, "total_steps": 78105, "loss": 0.1298, "lr": 1.8958794826854623e-06, "epoch": 3.0996735164202036, "percentage": 61.99, "elapsed_time": "2:07:32", "remaining_time": "1:18:11", "throughput": 19914.34, "total_tokens": 152394368} +{"current_steps": 48425, "total_steps": 78105, "loss": 0.1463, "lr": 1.8953374016887247e-06, "epoch": 3.0999935983611806, "percentage": 62.0, "elapsed_time": "2:07:33", "remaining_time": "1:18:10", "throughput": 19914.62, "total_tokens": 152409792} +{"current_steps": 48430, "total_steps": 78105, "loss": 0.1744, "lr": 1.8947953508856887e-06, "epoch": 3.100313680302157, "percentage": 62.01, "elapsed_time": "2:07:33", "remaining_time": "1:18:09", "throughput": 19914.86, "total_tokens": 152424640} +{"current_steps": 48435, "total_steps": 78105, "loss": 0.1828, "lr": 1.8942533303034192e-06, "epoch": 3.100633762243134, "percentage": 62.01, "elapsed_time": "2:07:34", "remaining_time": "1:18:08", "throughput": 19915.23, "total_tokens": 152441856} +{"current_steps": 48440, "total_steps": 78105, "loss": 0.2279, "lr": 1.8937113399689838e-06, "epoch": 3.100953844184111, "percentage": 62.02, "elapsed_time": "2:07:35", "remaining_time": "1:18:08", "throughput": 19915.51, "total_tokens": 152457216} +{"current_steps": 48445, "total_steps": 78105, "loss": 0.1664, "lr": 1.8931693799094456e-06, "epoch": 3.101273926125088, "percentage": 62.03, "elapsed_time": "2:07:35", "remaining_time": "1:18:07", "throughput": 19915.79, "total_tokens": 152472704} +{"current_steps": 48450, "total_steps": 78105, "loss": 0.1746, "lr": 1.8926274501518687e-06, "epoch": 3.101594008066065, "percentage": 62.03, "elapsed_time": "2:07:36", "remaining_time": "1:18:06", "throughput": 19916.11, "total_tokens": 152488896} +{"current_steps": 48455, "total_steps": 78105, "loss": 0.1245, "lr": 1.892085550723312e-06, "epoch": 3.1019140900070417, "percentage": 62.04, "elapsed_time": "2:07:37", "remaining_time": "1:18:05", "throughput": 19916.39, "total_tokens": 152504384} +{"current_steps": 48460, "total_steps": 78105, "loss": 0.1303, "lr": 1.8915436816508379e-06, "epoch": 3.1022341719480186, "percentage": 62.04, "elapsed_time": "2:07:37", "remaining_time": "1:18:04", "throughput": 19916.64, "total_tokens": 152519104} +{"current_steps": 48465, "total_steps": 78105, "loss": 0.1384, "lr": 1.8910018429615017e-06, "epoch": 3.1025542538889956, "percentage": 62.05, "elapsed_time": "2:07:38", "remaining_time": "1:18:03", "throughput": 19916.92, "total_tokens": 152534208} +{"current_steps": 48470, "total_steps": 78105, "loss": 0.1584, "lr": 1.8904600346823617e-06, "epoch": 3.1028743358299726, "percentage": 62.06, "elapsed_time": "2:07:39", "remaining_time": "1:18:02", "throughput": 19917.18, "total_tokens": 152549184} +{"current_steps": 48475, "total_steps": 78105, "loss": 0.2436, "lr": 1.8899182568404723e-06, "epoch": 3.103194417770949, "percentage": 62.06, "elapsed_time": "2:07:39", "remaining_time": "1:18:02", "throughput": 19917.43, "total_tokens": 152564416} +{"current_steps": 48480, "total_steps": 78105, "loss": 0.1038, "lr": 1.8893765094628875e-06, "epoch": 3.103514499711926, "percentage": 62.07, "elapsed_time": "2:07:40", "remaining_time": "1:18:01", "throughput": 19917.72, "total_tokens": 152580032} +{"current_steps": 48485, "total_steps": 78105, "loss": 0.1456, "lr": 1.8888347925766598e-06, "epoch": 3.103834581652903, "percentage": 62.08, "elapsed_time": "2:07:41", "remaining_time": "1:18:00", "throughput": 19918.02, "total_tokens": 152596160} +{"current_steps": 48490, "total_steps": 78105, "loss": 0.1355, "lr": 1.8882931062088383e-06, "epoch": 3.10415466359388, "percentage": 62.08, "elapsed_time": "2:07:41", "remaining_time": "1:17:59", "throughput": 19918.35, "total_tokens": 152612864} +{"current_steps": 48495, "total_steps": 78105, "loss": 0.1907, "lr": 1.8877514503864738e-06, "epoch": 3.1044747455348567, "percentage": 62.09, "elapsed_time": "2:07:42", "remaining_time": "1:17:58", "throughput": 19918.66, "total_tokens": 152628672} +{"current_steps": 48500, "total_steps": 78105, "loss": 0.0881, "lr": 1.8872098251366117e-06, "epoch": 3.1047948274758337, "percentage": 62.1, "elapsed_time": "2:07:43", "remaining_time": "1:17:57", "throughput": 19918.93, "total_tokens": 152643968} +{"current_steps": 48505, "total_steps": 78105, "loss": 0.1305, "lr": 1.8866682304862999e-06, "epoch": 3.1051149094168107, "percentage": 62.1, "elapsed_time": "2:07:43", "remaining_time": "1:17:56", "throughput": 19919.27, "total_tokens": 152660288} +{"current_steps": 48510, "total_steps": 78105, "loss": 0.2122, "lr": 1.886126666462581e-06, "epoch": 3.1054349913577877, "percentage": 62.11, "elapsed_time": "2:07:44", "remaining_time": "1:17:56", "throughput": 19919.56, "total_tokens": 152675712} +{"current_steps": 48515, "total_steps": 78105, "loss": 0.1203, "lr": 1.8855851330925002e-06, "epoch": 3.1057550732987647, "percentage": 62.12, "elapsed_time": "2:07:45", "remaining_time": "1:17:55", "throughput": 19919.85, "total_tokens": 152691840} +{"current_steps": 48520, "total_steps": 78105, "loss": 0.1581, "lr": 1.885043630403096e-06, "epoch": 3.106075155239741, "percentage": 62.12, "elapsed_time": "2:07:46", "remaining_time": "1:17:54", "throughput": 19920.18, "total_tokens": 152708224} +{"current_steps": 48525, "total_steps": 78105, "loss": 0.1143, "lr": 1.8845021584214104e-06, "epoch": 3.106395237180718, "percentage": 62.13, "elapsed_time": "2:07:46", "remaining_time": "1:17:53", "throughput": 19920.51, "total_tokens": 152725376} +{"current_steps": 48530, "total_steps": 78105, "loss": 0.1379, "lr": 1.8839607171744805e-06, "epoch": 3.106715319121695, "percentage": 62.13, "elapsed_time": "2:07:47", "remaining_time": "1:17:52", "throughput": 19920.88, "total_tokens": 152742656} +{"current_steps": 48535, "total_steps": 78105, "loss": 0.1376, "lr": 1.8834193066893436e-06, "epoch": 3.107035401062672, "percentage": 62.14, "elapsed_time": "2:07:48", "remaining_time": "1:17:51", "throughput": 19921.1, "total_tokens": 152757120} +{"current_steps": 48540, "total_steps": 78105, "loss": 0.1454, "lr": 1.8828779269930362e-06, "epoch": 3.1073554830036487, "percentage": 62.15, "elapsed_time": "2:07:48", "remaining_time": "1:17:50", "throughput": 19921.34, "total_tokens": 152771840} +{"current_steps": 48545, "total_steps": 78105, "loss": 0.2025, "lr": 1.882336578112589e-06, "epoch": 3.1076755649446257, "percentage": 62.15, "elapsed_time": "2:07:49", "remaining_time": "1:17:50", "throughput": 19921.65, "total_tokens": 152787712} +{"current_steps": 48550, "total_steps": 78105, "loss": 0.1196, "lr": 1.8817952600750373e-06, "epoch": 3.1079956468856027, "percentage": 62.16, "elapsed_time": "2:07:50", "remaining_time": "1:17:49", "throughput": 19921.92, "total_tokens": 152803136} +{"current_steps": 48555, "total_steps": 78105, "loss": 0.1579, "lr": 1.8812539729074092e-06, "epoch": 3.1083157288265797, "percentage": 62.17, "elapsed_time": "2:07:50", "remaining_time": "1:17:48", "throughput": 19922.2, "total_tokens": 152818432} +{"current_steps": 48560, "total_steps": 78105, "loss": 0.1381, "lr": 1.880712716636735e-06, "epoch": 3.1086358107675567, "percentage": 62.17, "elapsed_time": "2:07:51", "remaining_time": "1:17:47", "throughput": 19922.49, "total_tokens": 152833920} +{"current_steps": 48565, "total_steps": 78105, "loss": 0.1764, "lr": 1.8801714912900415e-06, "epoch": 3.1089558927085332, "percentage": 62.18, "elapsed_time": "2:07:52", "remaining_time": "1:17:46", "throughput": 19922.8, "total_tokens": 152850112} +{"current_steps": 48570, "total_steps": 78105, "loss": 0.1551, "lr": 1.8796302968943564e-06, "epoch": 3.10927597464951, "percentage": 62.19, "elapsed_time": "2:07:52", "remaining_time": "1:17:45", "throughput": 19923.08, "total_tokens": 152865536} +{"current_steps": 48575, "total_steps": 78105, "loss": 0.2279, "lr": 1.8790891334767017e-06, "epoch": 3.109596056590487, "percentage": 62.19, "elapsed_time": "2:07:53", "remaining_time": "1:17:44", "throughput": 19923.38, "total_tokens": 152881664} +{"current_steps": 48580, "total_steps": 78105, "loss": 0.1577, "lr": 1.8785480010641018e-06, "epoch": 3.109916138531464, "percentage": 62.2, "elapsed_time": "2:07:54", "remaining_time": "1:17:44", "throughput": 19923.71, "total_tokens": 152898112} +{"current_steps": 48585, "total_steps": 78105, "loss": 0.1234, "lr": 1.8780068996835776e-06, "epoch": 3.1102362204724407, "percentage": 62.2, "elapsed_time": "2:07:54", "remaining_time": "1:17:43", "throughput": 19923.98, "total_tokens": 152913280} +{"current_steps": 48590, "total_steps": 78105, "loss": 0.1973, "lr": 1.8774658293621486e-06, "epoch": 3.1105563024134177, "percentage": 62.21, "elapsed_time": "2:07:55", "remaining_time": "1:17:42", "throughput": 19924.25, "total_tokens": 152928960} +{"current_steps": 48595, "total_steps": 78105, "loss": 0.1454, "lr": 1.8769247901268348e-06, "epoch": 3.1108763843543947, "percentage": 62.22, "elapsed_time": "2:07:56", "remaining_time": "1:17:41", "throughput": 19924.65, "total_tokens": 152946880} +{"current_steps": 48600, "total_steps": 78105, "loss": 0.1297, "lr": 1.8763837820046502e-06, "epoch": 3.1111964662953717, "percentage": 62.22, "elapsed_time": "2:07:56", "remaining_time": "1:17:40", "throughput": 19924.95, "total_tokens": 152962880} +{"current_steps": 48605, "total_steps": 78105, "loss": 0.1453, "lr": 1.8758428050226124e-06, "epoch": 3.1115165482363487, "percentage": 62.23, "elapsed_time": "2:07:57", "remaining_time": "1:17:39", "throughput": 19925.24, "total_tokens": 152978432} +{"current_steps": 48610, "total_steps": 78105, "loss": 0.1802, "lr": 1.8753018592077326e-06, "epoch": 3.1118366301773253, "percentage": 62.24, "elapsed_time": "2:07:58", "remaining_time": "1:17:38", "throughput": 19925.53, "total_tokens": 152993984} +{"current_steps": 48615, "total_steps": 78105, "loss": 0.1786, "lr": 1.8747609445870246e-06, "epoch": 3.1121567121183022, "percentage": 62.24, "elapsed_time": "2:07:58", "remaining_time": "1:17:38", "throughput": 19925.77, "total_tokens": 153008512} +{"current_steps": 48620, "total_steps": 78105, "loss": 0.1538, "lr": 1.8742200611874978e-06, "epoch": 3.1124767940592792, "percentage": 62.25, "elapsed_time": "2:07:59", "remaining_time": "1:17:37", "throughput": 19926.13, "total_tokens": 153025344} +{"current_steps": 48625, "total_steps": 78105, "loss": 0.1805, "lr": 1.8736792090361628e-06, "epoch": 3.1127968760002562, "percentage": 62.26, "elapsed_time": "2:08:00", "remaining_time": "1:17:36", "throughput": 19926.37, "total_tokens": 153040320} +{"current_steps": 48630, "total_steps": 78105, "loss": 0.1545, "lr": 1.8731383881600247e-06, "epoch": 3.1131169579412328, "percentage": 62.26, "elapsed_time": "2:08:00", "remaining_time": "1:17:35", "throughput": 19926.63, "total_tokens": 153055360} +{"current_steps": 48635, "total_steps": 78105, "loss": 0.1554, "lr": 1.8725975985860912e-06, "epoch": 3.1134370398822098, "percentage": 62.27, "elapsed_time": "2:08:01", "remaining_time": "1:17:34", "throughput": 19926.92, "total_tokens": 153070656} +{"current_steps": 48640, "total_steps": 78105, "loss": 0.1662, "lr": 1.8720568403413642e-06, "epoch": 3.1137571218231868, "percentage": 62.28, "elapsed_time": "2:08:02", "remaining_time": "1:17:33", "throughput": 19927.22, "total_tokens": 153086336} +{"current_steps": 48645, "total_steps": 78105, "loss": 0.2895, "lr": 1.8715161134528487e-06, "epoch": 3.1140772037641637, "percentage": 62.28, "elapsed_time": "2:08:02", "remaining_time": "1:17:32", "throughput": 19927.46, "total_tokens": 153101056} +{"current_steps": 48650, "total_steps": 78105, "loss": 0.1772, "lr": 1.8709754179475442e-06, "epoch": 3.1143972857051407, "percentage": 62.29, "elapsed_time": "2:08:03", "remaining_time": "1:17:32", "throughput": 19927.77, "total_tokens": 153117568} +{"current_steps": 48655, "total_steps": 78105, "loss": 0.1515, "lr": 1.8704347538524509e-06, "epoch": 3.1147173676461173, "percentage": 62.29, "elapsed_time": "2:08:04", "remaining_time": "1:17:31", "throughput": 19928.04, "total_tokens": 153132608} +{"current_steps": 48660, "total_steps": 78105, "loss": 0.1736, "lr": 1.8698941211945675e-06, "epoch": 3.1150374495870943, "percentage": 62.3, "elapsed_time": "2:08:04", "remaining_time": "1:17:30", "throughput": 19928.33, "total_tokens": 153148480} +{"current_steps": 48665, "total_steps": 78105, "loss": 0.1968, "lr": 1.869353520000889e-06, "epoch": 3.1153575315280713, "percentage": 62.31, "elapsed_time": "2:08:05", "remaining_time": "1:17:29", "throughput": 19928.57, "total_tokens": 153163328} +{"current_steps": 48670, "total_steps": 78105, "loss": 0.2051, "lr": 1.868812950298411e-06, "epoch": 3.1156776134690483, "percentage": 62.31, "elapsed_time": "2:08:06", "remaining_time": "1:17:28", "throughput": 19928.89, "total_tokens": 153179072} +{"current_steps": 48675, "total_steps": 78105, "loss": 0.1486, "lr": 1.868272412114126e-06, "epoch": 3.115997695410025, "percentage": 62.32, "elapsed_time": "2:08:06", "remaining_time": "1:17:27", "throughput": 19929.18, "total_tokens": 153194944} +{"current_steps": 48680, "total_steps": 78105, "loss": 0.129, "lr": 1.8677319054750272e-06, "epoch": 3.116317777351002, "percentage": 62.33, "elapsed_time": "2:08:07", "remaining_time": "1:17:26", "throughput": 19929.43, "total_tokens": 153209856} +{"current_steps": 48685, "total_steps": 78105, "loss": 0.1309, "lr": 1.867191430408103e-06, "epoch": 3.116637859291979, "percentage": 62.33, "elapsed_time": "2:08:08", "remaining_time": "1:17:25", "throughput": 19929.72, "total_tokens": 153225472} +{"current_steps": 48690, "total_steps": 78105, "loss": 0.1015, "lr": 1.8666509869403433e-06, "epoch": 3.1169579412329558, "percentage": 62.34, "elapsed_time": "2:08:09", "remaining_time": "1:17:25", "throughput": 19930.14, "total_tokens": 153243392} +{"current_steps": 48695, "total_steps": 78105, "loss": 0.1797, "lr": 1.8661105750987333e-06, "epoch": 3.1172780231739323, "percentage": 62.35, "elapsed_time": "2:08:09", "remaining_time": "1:17:24", "throughput": 19930.4, "total_tokens": 153258752} +{"current_steps": 48700, "total_steps": 78105, "loss": 0.3228, "lr": 1.86557019491026e-06, "epoch": 3.1175981051149093, "percentage": 62.35, "elapsed_time": "2:08:10", "remaining_time": "1:17:23", "throughput": 19930.65, "total_tokens": 153273856} +{"current_steps": 48705, "total_steps": 78105, "loss": 0.1741, "lr": 1.8650298464019057e-06, "epoch": 3.1179181870558863, "percentage": 62.36, "elapsed_time": "2:08:11", "remaining_time": "1:17:22", "throughput": 19930.98, "total_tokens": 153290496} +{"current_steps": 48710, "total_steps": 78105, "loss": 0.1642, "lr": 1.8644895296006539e-06, "epoch": 3.1182382689968633, "percentage": 62.36, "elapsed_time": "2:08:11", "remaining_time": "1:17:21", "throughput": 19931.29, "total_tokens": 153306752} +{"current_steps": 48715, "total_steps": 78105, "loss": 0.1198, "lr": 1.8639492445334857e-06, "epoch": 3.1185583509378403, "percentage": 62.37, "elapsed_time": "2:08:12", "remaining_time": "1:17:20", "throughput": 19931.52, "total_tokens": 153321600} +{"current_steps": 48720, "total_steps": 78105, "loss": 0.1153, "lr": 1.8634089912273779e-06, "epoch": 3.118878432878817, "percentage": 62.38, "elapsed_time": "2:08:13", "remaining_time": "1:17:20", "throughput": 19931.79, "total_tokens": 153336640} +{"current_steps": 48725, "total_steps": 78105, "loss": 0.1617, "lr": 1.8628687697093101e-06, "epoch": 3.119198514819794, "percentage": 62.38, "elapsed_time": "2:08:13", "remaining_time": "1:17:19", "throughput": 19932.11, "total_tokens": 153352832} +{"current_steps": 48730, "total_steps": 78105, "loss": 0.2183, "lr": 1.8623285800062565e-06, "epoch": 3.119518596760771, "percentage": 62.39, "elapsed_time": "2:08:14", "remaining_time": "1:17:18", "throughput": 19932.43, "total_tokens": 153368768} +{"current_steps": 48735, "total_steps": 78105, "loss": 0.1451, "lr": 1.8617884221451927e-06, "epoch": 3.119838678701748, "percentage": 62.4, "elapsed_time": "2:08:15", "remaining_time": "1:17:17", "throughput": 19932.8, "total_tokens": 153385920} +{"current_steps": 48740, "total_steps": 78105, "loss": 0.1351, "lr": 1.8612482961530897e-06, "epoch": 3.1201587606427243, "percentage": 62.4, "elapsed_time": "2:08:15", "remaining_time": "1:17:16", "throughput": 19933.09, "total_tokens": 153401216} +{"current_steps": 48745, "total_steps": 78105, "loss": 0.1363, "lr": 1.8607082020569214e-06, "epoch": 3.1204788425837013, "percentage": 62.41, "elapsed_time": "2:08:16", "remaining_time": "1:17:15", "throughput": 19933.38, "total_tokens": 153417024} +{"current_steps": 48750, "total_steps": 78105, "loss": 0.2222, "lr": 1.8601681398836541e-06, "epoch": 3.1207989245246783, "percentage": 62.42, "elapsed_time": "2:08:17", "remaining_time": "1:17:14", "throughput": 19933.72, "total_tokens": 153433664} +{"current_steps": 48755, "total_steps": 78105, "loss": 0.1448, "lr": 1.859628109660258e-06, "epoch": 3.1211190064656553, "percentage": 62.42, "elapsed_time": "2:08:17", "remaining_time": "1:17:14", "throughput": 19934.04, "total_tokens": 153450112} +{"current_steps": 48760, "total_steps": 78105, "loss": 0.1144, "lr": 1.8590881114136978e-06, "epoch": 3.1214390884066323, "percentage": 62.43, "elapsed_time": "2:08:18", "remaining_time": "1:17:13", "throughput": 19934.27, "total_tokens": 153464512} +{"current_steps": 48765, "total_steps": 78105, "loss": 0.1898, "lr": 1.8585481451709386e-06, "epoch": 3.121759170347609, "percentage": 62.44, "elapsed_time": "2:08:19", "remaining_time": "1:17:12", "throughput": 19934.55, "total_tokens": 153479808} +{"current_steps": 48770, "total_steps": 78105, "loss": 0.2137, "lr": 1.8580082109589454e-06, "epoch": 3.122079252288586, "percentage": 62.44, "elapsed_time": "2:08:19", "remaining_time": "1:17:11", "throughput": 19934.85, "total_tokens": 153495680} +{"current_steps": 48775, "total_steps": 78105, "loss": 0.1361, "lr": 1.8574683088046769e-06, "epoch": 3.122399334229563, "percentage": 62.45, "elapsed_time": "2:08:20", "remaining_time": "1:17:10", "throughput": 19935.2, "total_tokens": 153512640} +{"current_steps": 48780, "total_steps": 78105, "loss": 0.1215, "lr": 1.856928438735095e-06, "epoch": 3.12271941617054, "percentage": 62.45, "elapsed_time": "2:08:21", "remaining_time": "1:17:09", "throughput": 19935.45, "total_tokens": 153527680} +{"current_steps": 48785, "total_steps": 78105, "loss": 0.2302, "lr": 1.8563886007771564e-06, "epoch": 3.1230394981115164, "percentage": 62.46, "elapsed_time": "2:08:21", "remaining_time": "1:17:08", "throughput": 19935.69, "total_tokens": 153542336} +{"current_steps": 48790, "total_steps": 78105, "loss": 0.1459, "lr": 1.8558487949578192e-06, "epoch": 3.1233595800524934, "percentage": 62.47, "elapsed_time": "2:08:22", "remaining_time": "1:17:08", "throughput": 19935.95, "total_tokens": 153557568} +{"current_steps": 48795, "total_steps": 78105, "loss": 0.1635, "lr": 1.8553090213040372e-06, "epoch": 3.1236796619934704, "percentage": 62.47, "elapsed_time": "2:08:23", "remaining_time": "1:17:07", "throughput": 19936.29, "total_tokens": 153574272} +{"current_steps": 48800, "total_steps": 78105, "loss": 0.2197, "lr": 1.8547692798427659e-06, "epoch": 3.1239997439344473, "percentage": 62.48, "elapsed_time": "2:08:23", "remaining_time": "1:17:06", "throughput": 19936.56, "total_tokens": 153589184} +{"current_steps": 48805, "total_steps": 78105, "loss": 0.2185, "lr": 1.854229570600955e-06, "epoch": 3.124319825875424, "percentage": 62.49, "elapsed_time": "2:08:24", "remaining_time": "1:17:05", "throughput": 19936.81, "total_tokens": 153603840} +{"current_steps": 48810, "total_steps": 78105, "loss": 0.1806, "lr": 1.853689893605556e-06, "epoch": 3.124639907816401, "percentage": 62.49, "elapsed_time": "2:08:25", "remaining_time": "1:17:04", "throughput": 19937.11, "total_tokens": 153620288} +{"current_steps": 48815, "total_steps": 78105, "loss": 0.1389, "lr": 1.8531502488835162e-06, "epoch": 3.124959989757378, "percentage": 62.5, "elapsed_time": "2:08:25", "remaining_time": "1:17:03", "throughput": 19937.35, "total_tokens": 153635264} +{"current_steps": 48820, "total_steps": 78105, "loss": 0.2212, "lr": 1.852610636461785e-06, "epoch": 3.125280071698355, "percentage": 62.51, "elapsed_time": "2:08:26", "remaining_time": "1:17:02", "throughput": 19937.66, "total_tokens": 153651264} +{"current_steps": 48825, "total_steps": 78105, "loss": 0.1795, "lr": 1.8520710563673055e-06, "epoch": 3.125600153639332, "percentage": 62.51, "elapsed_time": "2:08:27", "remaining_time": "1:17:01", "throughput": 19937.96, "total_tokens": 153667072} +{"current_steps": 48830, "total_steps": 78105, "loss": 0.1396, "lr": 1.8515315086270228e-06, "epoch": 3.1259202355803084, "percentage": 62.52, "elapsed_time": "2:08:27", "remaining_time": "1:17:01", "throughput": 19938.25, "total_tokens": 153682880} +{"current_steps": 48835, "total_steps": 78105, "loss": 0.1204, "lr": 1.8509919932678797e-06, "epoch": 3.1262403175212854, "percentage": 62.52, "elapsed_time": "2:08:28", "remaining_time": "1:17:00", "throughput": 19938.54, "total_tokens": 153698880} +{"current_steps": 48840, "total_steps": 78105, "loss": 0.154, "lr": 1.8504525103168147e-06, "epoch": 3.1265603994622624, "percentage": 62.53, "elapsed_time": "2:08:29", "remaining_time": "1:16:59", "throughput": 19938.87, "total_tokens": 153715136} +{"current_steps": 48845, "total_steps": 78105, "loss": 0.1258, "lr": 1.8499130598007685e-06, "epoch": 3.1268804814032394, "percentage": 62.54, "elapsed_time": "2:08:29", "remaining_time": "1:16:58", "throughput": 19939.09, "total_tokens": 153729728} +{"current_steps": 48850, "total_steps": 78105, "loss": 0.1056, "lr": 1.8493736417466774e-06, "epoch": 3.127200563344216, "percentage": 62.54, "elapsed_time": "2:08:30", "remaining_time": "1:16:57", "throughput": 19939.47, "total_tokens": 153747264} +{"current_steps": 48855, "total_steps": 78105, "loss": 0.1394, "lr": 1.8488342561814787e-06, "epoch": 3.127520645285193, "percentage": 62.55, "elapsed_time": "2:08:31", "remaining_time": "1:16:56", "throughput": 19939.71, "total_tokens": 153761728} +{"current_steps": 48860, "total_steps": 78105, "loss": 0.1739, "lr": 1.8482949031321045e-06, "epoch": 3.12784072722617, "percentage": 62.56, "elapsed_time": "2:08:32", "remaining_time": "1:16:56", "throughput": 19940.02, "total_tokens": 153777856} +{"current_steps": 48865, "total_steps": 78105, "loss": 0.1185, "lr": 1.8477555826254893e-06, "epoch": 3.128160809167147, "percentage": 62.56, "elapsed_time": "2:08:32", "remaining_time": "1:16:55", "throughput": 19940.29, "total_tokens": 153793344} +{"current_steps": 48870, "total_steps": 78105, "loss": 0.1611, "lr": 1.8472162946885617e-06, "epoch": 3.128480891108124, "percentage": 62.57, "elapsed_time": "2:08:33", "remaining_time": "1:16:54", "throughput": 19940.56, "total_tokens": 153808704} +{"current_steps": 48875, "total_steps": 78105, "loss": 0.1739, "lr": 1.8466770393482526e-06, "epoch": 3.1288009730491004, "percentage": 62.58, "elapsed_time": "2:08:34", "remaining_time": "1:16:53", "throughput": 19940.85, "total_tokens": 153824576} +{"current_steps": 48880, "total_steps": 78105, "loss": 0.171, "lr": 1.8461378166314886e-06, "epoch": 3.1291210549900774, "percentage": 62.58, "elapsed_time": "2:08:34", "remaining_time": "1:16:52", "throughput": 19941.1, "total_tokens": 153839616} +{"current_steps": 48885, "total_steps": 78105, "loss": 0.1554, "lr": 1.8455986265651966e-06, "epoch": 3.1294411369310544, "percentage": 62.59, "elapsed_time": "2:08:35", "remaining_time": "1:16:51", "throughput": 19941.41, "total_tokens": 153855552} +{"current_steps": 48890, "total_steps": 78105, "loss": 0.1696, "lr": 1.845059469176301e-06, "epoch": 3.1297612188720314, "percentage": 62.6, "elapsed_time": "2:08:36", "remaining_time": "1:16:50", "throughput": 19941.72, "total_tokens": 153871616} +{"current_steps": 48895, "total_steps": 78105, "loss": 0.1619, "lr": 1.8445203444917231e-06, "epoch": 3.130081300813008, "percentage": 62.6, "elapsed_time": "2:08:36", "remaining_time": "1:16:49", "throughput": 19941.97, "total_tokens": 153886720} +{"current_steps": 48900, "total_steps": 78105, "loss": 0.1497, "lr": 1.8439812525383858e-06, "epoch": 3.130401382753985, "percentage": 62.61, "elapsed_time": "2:08:37", "remaining_time": "1:16:49", "throughput": 19942.21, "total_tokens": 153901568} +{"current_steps": 48905, "total_steps": 78105, "loss": 0.1483, "lr": 1.8434421933432068e-06, "epoch": 3.130721464694962, "percentage": 62.61, "elapsed_time": "2:08:38", "remaining_time": "1:16:48", "throughput": 19942.58, "total_tokens": 153918720} +{"current_steps": 48910, "total_steps": 78105, "loss": 0.1223, "lr": 1.842903166933106e-06, "epoch": 3.131041546635939, "percentage": 62.62, "elapsed_time": "2:08:38", "remaining_time": "1:16:47", "throughput": 19942.94, "total_tokens": 153935808} +{"current_steps": 48915, "total_steps": 78105, "loss": 0.1352, "lr": 1.8423641733349974e-06, "epoch": 3.131361628576916, "percentage": 62.63, "elapsed_time": "2:08:39", "remaining_time": "1:16:46", "throughput": 19943.25, "total_tokens": 153951616} +{"current_steps": 48920, "total_steps": 78105, "loss": 0.1278, "lr": 1.8418252125757974e-06, "epoch": 3.1316817105178925, "percentage": 62.63, "elapsed_time": "2:08:40", "remaining_time": "1:16:45", "throughput": 19943.51, "total_tokens": 153966976} +{"current_steps": 48925, "total_steps": 78105, "loss": 0.1283, "lr": 1.841286284682417e-06, "epoch": 3.1320017924588694, "percentage": 62.64, "elapsed_time": "2:08:40", "remaining_time": "1:16:44", "throughput": 19943.79, "total_tokens": 153982720} +{"current_steps": 48930, "total_steps": 78105, "loss": 0.2001, "lr": 1.8407473896817688e-06, "epoch": 3.1323218743998464, "percentage": 62.65, "elapsed_time": "2:08:41", "remaining_time": "1:16:44", "throughput": 19944.05, "total_tokens": 153998080} +{"current_steps": 48935, "total_steps": 78105, "loss": 0.1943, "lr": 1.8402085276007614e-06, "epoch": 3.1326419563408234, "percentage": 62.65, "elapsed_time": "2:08:42", "remaining_time": "1:16:43", "throughput": 19944.31, "total_tokens": 154013632} +{"current_steps": 48940, "total_steps": 78105, "loss": 0.1954, "lr": 1.8396696984663036e-06, "epoch": 3.1329620382818, "percentage": 62.66, "elapsed_time": "2:08:42", "remaining_time": "1:16:42", "throughput": 19944.6, "total_tokens": 154029248} +{"current_steps": 48945, "total_steps": 78105, "loss": 0.128, "lr": 1.8391309023053028e-06, "epoch": 3.133282120222777, "percentage": 62.67, "elapsed_time": "2:08:43", "remaining_time": "1:16:41", "throughput": 19944.86, "total_tokens": 154044096} +{"current_steps": 48950, "total_steps": 78105, "loss": 0.1484, "lr": 1.838592139144661e-06, "epoch": 3.133602202163754, "percentage": 62.67, "elapsed_time": "2:08:44", "remaining_time": "1:16:40", "throughput": 19945.14, "total_tokens": 154060032} +{"current_steps": 48955, "total_steps": 78105, "loss": 0.2031, "lr": 1.8380534090112834e-06, "epoch": 3.133922284104731, "percentage": 62.68, "elapsed_time": "2:08:44", "remaining_time": "1:16:39", "throughput": 19945.43, "total_tokens": 154075776} +{"current_steps": 48960, "total_steps": 78105, "loss": 0.2037, "lr": 1.8375147119320705e-06, "epoch": 3.1342423660457075, "percentage": 62.68, "elapsed_time": "2:08:45", "remaining_time": "1:16:38", "throughput": 19945.66, "total_tokens": 154090176} +{"current_steps": 48965, "total_steps": 78105, "loss": 0.152, "lr": 1.8369760479339222e-06, "epoch": 3.1345624479866845, "percentage": 62.69, "elapsed_time": "2:08:46", "remaining_time": "1:16:38", "throughput": 19946.0, "total_tokens": 154107072} +{"current_steps": 48970, "total_steps": 78105, "loss": 0.1923, "lr": 1.8364374170437357e-06, "epoch": 3.1348825299276615, "percentage": 62.7, "elapsed_time": "2:08:46", "remaining_time": "1:16:37", "throughput": 19946.25, "total_tokens": 154121792} +{"current_steps": 48975, "total_steps": 78105, "loss": 0.152, "lr": 1.8358988192884098e-06, "epoch": 3.1352026118686385, "percentage": 62.7, "elapsed_time": "2:08:47", "remaining_time": "1:16:36", "throughput": 19946.59, "total_tokens": 154138240} +{"current_steps": 48980, "total_steps": 78105, "loss": 0.1644, "lr": 1.835360254694836e-06, "epoch": 3.1355226938096155, "percentage": 62.71, "elapsed_time": "2:08:48", "remaining_time": "1:16:35", "throughput": 19946.85, "total_tokens": 154153408} +{"current_steps": 48985, "total_steps": 78105, "loss": 0.1467, "lr": 1.83482172328991e-06, "epoch": 3.135842775750592, "percentage": 62.72, "elapsed_time": "2:08:48", "remaining_time": "1:16:34", "throughput": 19947.14, "total_tokens": 154169216} +{"current_steps": 48990, "total_steps": 78105, "loss": 0.1552, "lr": 1.8342832251005216e-06, "epoch": 3.136162857691569, "percentage": 62.72, "elapsed_time": "2:08:49", "remaining_time": "1:16:33", "throughput": 19947.42, "total_tokens": 154184832} +{"current_steps": 48995, "total_steps": 78105, "loss": 0.2161, "lr": 1.8337447601535627e-06, "epoch": 3.136482939632546, "percentage": 62.73, "elapsed_time": "2:08:50", "remaining_time": "1:16:32", "throughput": 19947.76, "total_tokens": 154201728} +{"current_steps": 49000, "total_steps": 78105, "loss": 0.1876, "lr": 1.8332063284759188e-06, "epoch": 3.136803021573523, "percentage": 62.74, "elapsed_time": "2:08:50", "remaining_time": "1:16:32", "throughput": 19948.04, "total_tokens": 154217024} +{"current_steps": 49005, "total_steps": 78105, "loss": 0.1428, "lr": 1.8326679300944778e-06, "epoch": 3.1371231035144995, "percentage": 62.74, "elapsed_time": "2:08:51", "remaining_time": "1:16:31", "throughput": 19948.31, "total_tokens": 154232640} +{"current_steps": 49010, "total_steps": 78105, "loss": 0.2121, "lr": 1.8321295650361253e-06, "epoch": 3.1374431854554765, "percentage": 62.75, "elapsed_time": "2:08:52", "remaining_time": "1:16:30", "throughput": 19948.58, "total_tokens": 154248064} +{"current_steps": 49015, "total_steps": 78105, "loss": 0.2179, "lr": 1.8315912333277425e-06, "epoch": 3.1377632673964535, "percentage": 62.76, "elapsed_time": "2:08:52", "remaining_time": "1:16:29", "throughput": 19948.89, "total_tokens": 154264192} +{"current_steps": 49020, "total_steps": 78105, "loss": 0.1238, "lr": 1.8310529349962125e-06, "epoch": 3.1380833493374305, "percentage": 62.76, "elapsed_time": "2:08:53", "remaining_time": "1:16:28", "throughput": 19949.21, "total_tokens": 154280576} +{"current_steps": 49025, "total_steps": 78105, "loss": 0.1334, "lr": 1.830514670068414e-06, "epoch": 3.138403431278407, "percentage": 62.77, "elapsed_time": "2:08:54", "remaining_time": "1:16:27", "throughput": 19949.48, "total_tokens": 154295872} +{"current_steps": 49030, "total_steps": 78105, "loss": 0.1362, "lr": 1.8299764385712264e-06, "epoch": 3.138723513219384, "percentage": 62.77, "elapsed_time": "2:08:54", "remaining_time": "1:16:26", "throughput": 19949.73, "total_tokens": 154310976} +{"current_steps": 49035, "total_steps": 78105, "loss": 0.2198, "lr": 1.8294382405315248e-06, "epoch": 3.139043595160361, "percentage": 62.78, "elapsed_time": "2:08:55", "remaining_time": "1:16:26", "throughput": 19950.07, "total_tokens": 154327680} +{"current_steps": 49040, "total_steps": 78105, "loss": 0.1391, "lr": 1.8289000759761849e-06, "epoch": 3.139363677101338, "percentage": 62.79, "elapsed_time": "2:08:56", "remaining_time": "1:16:25", "throughput": 19950.36, "total_tokens": 154343296} +{"current_steps": 49045, "total_steps": 78105, "loss": 0.1522, "lr": 1.8283619449320788e-06, "epoch": 3.139683759042315, "percentage": 62.79, "elapsed_time": "2:08:57", "remaining_time": "1:16:24", "throughput": 19950.64, "total_tokens": 154358976} +{"current_steps": 49050, "total_steps": 78105, "loss": 0.1084, "lr": 1.8278238474260801e-06, "epoch": 3.1400038409832915, "percentage": 62.8, "elapsed_time": "2:08:57", "remaining_time": "1:16:23", "throughput": 19950.94, "total_tokens": 154374784} +{"current_steps": 49055, "total_steps": 78105, "loss": 0.2112, "lr": 1.8272857834850562e-06, "epoch": 3.1403239229242685, "percentage": 62.81, "elapsed_time": "2:08:58", "remaining_time": "1:16:22", "throughput": 19951.21, "total_tokens": 154390080} +{"current_steps": 49060, "total_steps": 78105, "loss": 0.1409, "lr": 1.8267477531358763e-06, "epoch": 3.1406440048652455, "percentage": 62.81, "elapsed_time": "2:08:59", "remaining_time": "1:16:21", "throughput": 19951.53, "total_tokens": 154406848} +{"current_steps": 49065, "total_steps": 78105, "loss": 0.1635, "lr": 1.8262097564054076e-06, "epoch": 3.1409640868062225, "percentage": 62.82, "elapsed_time": "2:08:59", "remaining_time": "1:16:20", "throughput": 19951.84, "total_tokens": 154423168} +{"current_steps": 49070, "total_steps": 78105, "loss": 0.154, "lr": 1.825671793320513e-06, "epoch": 3.141284168747199, "percentage": 62.83, "elapsed_time": "2:09:00", "remaining_time": "1:16:20", "throughput": 19952.12, "total_tokens": 154438720} +{"current_steps": 49075, "total_steps": 78105, "loss": 0.1403, "lr": 1.8251338639080575e-06, "epoch": 3.141604250688176, "percentage": 62.83, "elapsed_time": "2:09:01", "remaining_time": "1:16:19", "throughput": 19952.38, "total_tokens": 154454144} +{"current_steps": 49080, "total_steps": 78105, "loss": 0.1403, "lr": 1.8245959681949004e-06, "epoch": 3.141924332629153, "percentage": 62.84, "elapsed_time": "2:09:01", "remaining_time": "1:16:18", "throughput": 19952.66, "total_tokens": 154469632} +{"current_steps": 49085, "total_steps": 78105, "loss": 0.2037, "lr": 1.8240581062079043e-06, "epoch": 3.14224441457013, "percentage": 62.84, "elapsed_time": "2:09:02", "remaining_time": "1:16:17", "throughput": 19952.93, "total_tokens": 154485120} +{"current_steps": 49090, "total_steps": 78105, "loss": 0.1463, "lr": 1.8235202779739242e-06, "epoch": 3.142564496511107, "percentage": 62.85, "elapsed_time": "2:09:03", "remaining_time": "1:16:16", "throughput": 19953.22, "total_tokens": 154500864} +{"current_steps": 49095, "total_steps": 78105, "loss": 0.1618, "lr": 1.8229824835198187e-06, "epoch": 3.1428845784520836, "percentage": 62.86, "elapsed_time": "2:09:03", "remaining_time": "1:16:15", "throughput": 19953.5, "total_tokens": 154516608} +{"current_steps": 49100, "total_steps": 78105, "loss": 0.2045, "lr": 1.8224447228724407e-06, "epoch": 3.1432046603930606, "percentage": 62.86, "elapsed_time": "2:09:04", "remaining_time": "1:16:14", "throughput": 19953.76, "total_tokens": 154531904} +{"current_steps": 49105, "total_steps": 78105, "loss": 0.1027, "lr": 1.8219069960586444e-06, "epoch": 3.1435247423340376, "percentage": 62.87, "elapsed_time": "2:09:05", "remaining_time": "1:16:14", "throughput": 19954.01, "total_tokens": 154546944} +{"current_steps": 49110, "total_steps": 78105, "loss": 0.1544, "lr": 1.8213693031052798e-06, "epoch": 3.1438448242750145, "percentage": 62.88, "elapsed_time": "2:09:05", "remaining_time": "1:16:13", "throughput": 19954.33, "total_tokens": 154563072} +{"current_steps": 49115, "total_steps": 78105, "loss": 0.1837, "lr": 1.8208316440391977e-06, "epoch": 3.144164906215991, "percentage": 62.88, "elapsed_time": "2:09:06", "remaining_time": "1:16:12", "throughput": 19954.68, "total_tokens": 154579712} +{"current_steps": 49120, "total_steps": 78105, "loss": 0.1791, "lr": 1.8202940188872468e-06, "epoch": 3.144484988156968, "percentage": 62.89, "elapsed_time": "2:09:07", "remaining_time": "1:16:11", "throughput": 19954.93, "total_tokens": 154595008} +{"current_steps": 49125, "total_steps": 78105, "loss": 0.2902, "lr": 1.8197564276762708e-06, "epoch": 3.144805070097945, "percentage": 62.9, "elapsed_time": "2:09:07", "remaining_time": "1:16:10", "throughput": 19955.28, "total_tokens": 154611776} +{"current_steps": 49130, "total_steps": 78105, "loss": 0.1741, "lr": 1.8192188704331163e-06, "epoch": 3.145125152038922, "percentage": 62.9, "elapsed_time": "2:09:08", "remaining_time": "1:16:09", "throughput": 19955.51, "total_tokens": 154626432} +{"current_steps": 49135, "total_steps": 78105, "loss": 0.1921, "lr": 1.8186813471846246e-06, "epoch": 3.145445233979899, "percentage": 62.91, "elapsed_time": "2:09:09", "remaining_time": "1:16:08", "throughput": 19955.86, "total_tokens": 154643456} +{"current_steps": 49140, "total_steps": 78105, "loss": 0.1822, "lr": 1.818143857957639e-06, "epoch": 3.1457653159208756, "percentage": 62.92, "elapsed_time": "2:09:09", "remaining_time": "1:16:08", "throughput": 19956.17, "total_tokens": 154659136} +{"current_steps": 49145, "total_steps": 78105, "loss": 0.2113, "lr": 1.8176064027789958e-06, "epoch": 3.1460853978618526, "percentage": 62.92, "elapsed_time": "2:09:10", "remaining_time": "1:16:07", "throughput": 19956.38, "total_tokens": 154673408} +{"current_steps": 49150, "total_steps": 78105, "loss": 0.1947, "lr": 1.817068981675536e-06, "epoch": 3.1464054798028296, "percentage": 62.93, "elapsed_time": "2:09:11", "remaining_time": "1:16:06", "throughput": 19956.64, "total_tokens": 154688448} +{"current_steps": 49155, "total_steps": 78105, "loss": 0.1851, "lr": 1.8165315946740925e-06, "epoch": 3.1467255617438066, "percentage": 62.93, "elapsed_time": "2:09:11", "remaining_time": "1:16:05", "throughput": 19956.96, "total_tokens": 154704448} +{"current_steps": 49160, "total_steps": 78105, "loss": 0.1101, "lr": 1.815994241801502e-06, "epoch": 3.147045643684783, "percentage": 62.94, "elapsed_time": "2:09:12", "remaining_time": "1:16:04", "throughput": 19957.23, "total_tokens": 154719936} +{"current_steps": 49165, "total_steps": 78105, "loss": 0.1941, "lr": 1.8154569230845957e-06, "epoch": 3.14736572562576, "percentage": 62.95, "elapsed_time": "2:09:13", "remaining_time": "1:16:03", "throughput": 19957.52, "total_tokens": 154735808} +{"current_steps": 49170, "total_steps": 78105, "loss": 0.1623, "lr": 1.8149196385502058e-06, "epoch": 3.147685807566737, "percentage": 62.95, "elapsed_time": "2:09:13", "remaining_time": "1:16:02", "throughput": 19957.76, "total_tokens": 154750720} +{"current_steps": 49175, "total_steps": 78105, "loss": 0.2436, "lr": 1.8143823882251598e-06, "epoch": 3.148005889507714, "percentage": 62.96, "elapsed_time": "2:09:14", "remaining_time": "1:16:02", "throughput": 19958.03, "total_tokens": 154766144} +{"current_steps": 49180, "total_steps": 78105, "loss": 0.2159, "lr": 1.813845172136286e-06, "epoch": 3.148325971448691, "percentage": 62.97, "elapsed_time": "2:09:15", "remaining_time": "1:16:01", "throughput": 19958.32, "total_tokens": 154781952} +{"current_steps": 49185, "total_steps": 78105, "loss": 0.1272, "lr": 1.8133079903104105e-06, "epoch": 3.1486460533896676, "percentage": 62.97, "elapsed_time": "2:09:15", "remaining_time": "1:16:00", "throughput": 19958.6, "total_tokens": 154797504} +{"current_steps": 49190, "total_steps": 78105, "loss": 0.1553, "lr": 1.8127708427743572e-06, "epoch": 3.1489661353306446, "percentage": 62.98, "elapsed_time": "2:09:16", "remaining_time": "1:15:59", "throughput": 19958.87, "total_tokens": 154812672} +{"current_steps": 49195, "total_steps": 78105, "loss": 0.1029, "lr": 1.8122337295549491e-06, "epoch": 3.1492862172716216, "percentage": 62.99, "elapsed_time": "2:09:17", "remaining_time": "1:15:58", "throughput": 19959.13, "total_tokens": 154827968} +{"current_steps": 49200, "total_steps": 78105, "loss": 0.0969, "lr": 1.8116966506790052e-06, "epoch": 3.1496062992125986, "percentage": 62.99, "elapsed_time": "2:09:18", "remaining_time": "1:15:57", "throughput": 19959.57, "total_tokens": 154846528} +{"current_steps": 49205, "total_steps": 78105, "loss": 0.085, "lr": 1.811159606173346e-06, "epoch": 3.149926381153575, "percentage": 63.0, "elapsed_time": "2:09:18", "remaining_time": "1:15:56", "throughput": 19959.85, "total_tokens": 154862336} +{"current_steps": 49210, "total_steps": 78105, "loss": 0.1819, "lr": 1.810622596064787e-06, "epoch": 3.150246463094552, "percentage": 63.0, "elapsed_time": "2:09:19", "remaining_time": "1:15:56", "throughput": 19960.07, "total_tokens": 154876992} +{"current_steps": 49215, "total_steps": 78105, "loss": 0.1521, "lr": 1.8100856203801458e-06, "epoch": 3.150566545035529, "percentage": 63.01, "elapsed_time": "2:09:20", "remaining_time": "1:15:55", "throughput": 19960.36, "total_tokens": 154892864} +{"current_steps": 49220, "total_steps": 78105, "loss": 0.2068, "lr": 1.8095486791462342e-06, "epoch": 3.150886626976506, "percentage": 63.02, "elapsed_time": "2:09:20", "remaining_time": "1:15:54", "throughput": 19960.62, "total_tokens": 154908096} +{"current_steps": 49225, "total_steps": 78105, "loss": 0.0886, "lr": 1.8090117723898663e-06, "epoch": 3.1512067089174827, "percentage": 63.02, "elapsed_time": "2:09:21", "remaining_time": "1:15:53", "throughput": 19960.9, "total_tokens": 154923584} +{"current_steps": 49230, "total_steps": 78105, "loss": 0.2037, "lr": 1.8084749001378499e-06, "epoch": 3.1515267908584597, "percentage": 63.03, "elapsed_time": "2:09:21", "remaining_time": "1:15:52", "throughput": 19961.15, "total_tokens": 154938432} +{"current_steps": 49235, "total_steps": 78105, "loss": 0.1023, "lr": 1.8079380624169955e-06, "epoch": 3.1518468727994366, "percentage": 63.04, "elapsed_time": "2:09:22", "remaining_time": "1:15:51", "throughput": 19961.45, "total_tokens": 154954368} +{"current_steps": 49240, "total_steps": 78105, "loss": 0.3264, "lr": 1.8074012592541102e-06, "epoch": 3.1521669547404136, "percentage": 63.04, "elapsed_time": "2:09:23", "remaining_time": "1:15:50", "throughput": 19961.73, "total_tokens": 154970240} +{"current_steps": 49245, "total_steps": 78105, "loss": 0.1556, "lr": 1.8068644906759973e-06, "epoch": 3.1524870366813906, "percentage": 63.05, "elapsed_time": "2:09:24", "remaining_time": "1:15:50", "throughput": 19961.97, "total_tokens": 154984896} +{"current_steps": 49250, "total_steps": 78105, "loss": 0.1864, "lr": 1.8063277567094617e-06, "epoch": 3.152807118622367, "percentage": 63.06, "elapsed_time": "2:09:24", "remaining_time": "1:15:49", "throughput": 19962.26, "total_tokens": 155000768} +{"current_steps": 49255, "total_steps": 78105, "loss": 0.2295, "lr": 1.8057910573813042e-06, "epoch": 3.153127200563344, "percentage": 63.06, "elapsed_time": "2:09:25", "remaining_time": "1:15:48", "throughput": 19962.68, "total_tokens": 155019072} +{"current_steps": 49260, "total_steps": 78105, "loss": 0.2016, "lr": 1.805254392718327e-06, "epoch": 3.153447282504321, "percentage": 63.07, "elapsed_time": "2:09:26", "remaining_time": "1:15:47", "throughput": 19962.97, "total_tokens": 155035072} +{"current_steps": 49265, "total_steps": 78105, "loss": 0.1776, "lr": 1.804717762747325e-06, "epoch": 3.153767364445298, "percentage": 63.08, "elapsed_time": "2:09:26", "remaining_time": "1:15:46", "throughput": 19963.22, "total_tokens": 155049920} +{"current_steps": 49270, "total_steps": 78105, "loss": 0.0771, "lr": 1.804181167495097e-06, "epoch": 3.1540874463862747, "percentage": 63.08, "elapsed_time": "2:09:27", "remaining_time": "1:15:45", "throughput": 19963.55, "total_tokens": 155066432} +{"current_steps": 49275, "total_steps": 78105, "loss": 0.1546, "lr": 1.8036446069884362e-06, "epoch": 3.1544075283272517, "percentage": 63.09, "elapsed_time": "2:09:28", "remaining_time": "1:15:45", "throughput": 19963.84, "total_tokens": 155082496} +{"current_steps": 49280, "total_steps": 78105, "loss": 0.1381, "lr": 1.803108081254138e-06, "epoch": 3.1547276102682287, "percentage": 63.09, "elapsed_time": "2:09:28", "remaining_time": "1:15:44", "throughput": 19964.21, "total_tokens": 155099904} +{"current_steps": 49285, "total_steps": 78105, "loss": 0.1278, "lr": 1.802571590318991e-06, "epoch": 3.1550476922092057, "percentage": 63.1, "elapsed_time": "2:09:29", "remaining_time": "1:15:43", "throughput": 19964.47, "total_tokens": 155114880} +{"current_steps": 49290, "total_steps": 78105, "loss": 0.2006, "lr": 1.8020351342097864e-06, "epoch": 3.155367774150182, "percentage": 63.11, "elapsed_time": "2:09:30", "remaining_time": "1:15:42", "throughput": 19964.75, "total_tokens": 155130496} +{"current_steps": 49295, "total_steps": 78105, "loss": 0.1725, "lr": 1.8014987129533123e-06, "epoch": 3.155687856091159, "percentage": 63.11, "elapsed_time": "2:09:30", "remaining_time": "1:15:41", "throughput": 19965.02, "total_tokens": 155146240} +{"current_steps": 49300, "total_steps": 78105, "loss": 0.1611, "lr": 1.8009623265763531e-06, "epoch": 3.156007938032136, "percentage": 63.12, "elapsed_time": "2:09:31", "remaining_time": "1:15:40", "throughput": 19965.29, "total_tokens": 155161984} +{"current_steps": 49305, "total_steps": 78105, "loss": 0.1304, "lr": 1.800425975105695e-06, "epoch": 3.156328019973113, "percentage": 63.13, "elapsed_time": "2:09:32", "remaining_time": "1:15:39", "throughput": 19965.55, "total_tokens": 155177408} +{"current_steps": 49310, "total_steps": 78105, "loss": 0.1918, "lr": 1.7998896585681189e-06, "epoch": 3.15664810191409, "percentage": 63.13, "elapsed_time": "2:09:32", "remaining_time": "1:15:39", "throughput": 19965.9, "total_tokens": 155194496} +{"current_steps": 49315, "total_steps": 78105, "loss": 0.1523, "lr": 1.7993533769904076e-06, "epoch": 3.1569681838550667, "percentage": 63.14, "elapsed_time": "2:09:33", "remaining_time": "1:15:38", "throughput": 19966.21, "total_tokens": 155210560} +{"current_steps": 49320, "total_steps": 78105, "loss": 0.1641, "lr": 1.7988171303993383e-06, "epoch": 3.1572882657960437, "percentage": 63.15, "elapsed_time": "2:09:34", "remaining_time": "1:15:37", "throughput": 19966.49, "total_tokens": 155226368} +{"current_steps": 49325, "total_steps": 78105, "loss": 0.2784, "lr": 1.79828091882169e-06, "epoch": 3.1576083477370207, "percentage": 63.15, "elapsed_time": "2:09:35", "remaining_time": "1:15:36", "throughput": 19966.74, "total_tokens": 155241472} +{"current_steps": 49330, "total_steps": 78105, "loss": 0.1695, "lr": 1.7977447422842364e-06, "epoch": 3.1579284296779977, "percentage": 63.16, "elapsed_time": "2:09:35", "remaining_time": "1:15:35", "throughput": 19966.99, "total_tokens": 155256640} +{"current_steps": 49335, "total_steps": 78105, "loss": 0.1801, "lr": 1.797208600813753e-06, "epoch": 3.1582485116189742, "percentage": 63.16, "elapsed_time": "2:09:36", "remaining_time": "1:15:34", "throughput": 19967.32, "total_tokens": 155273024} +{"current_steps": 49340, "total_steps": 78105, "loss": 0.2074, "lr": 1.7966724944370108e-06, "epoch": 3.1585685935599512, "percentage": 63.17, "elapsed_time": "2:09:37", "remaining_time": "1:15:33", "throughput": 19967.61, "total_tokens": 155289088} +{"current_steps": 49345, "total_steps": 78105, "loss": 0.1932, "lr": 1.7961364231807822e-06, "epoch": 3.1588886755009282, "percentage": 63.18, "elapsed_time": "2:09:37", "remaining_time": "1:15:33", "throughput": 19967.94, "total_tokens": 155305728} +{"current_steps": 49350, "total_steps": 78105, "loss": 0.2204, "lr": 1.795600387071833e-06, "epoch": 3.159208757441905, "percentage": 63.18, "elapsed_time": "2:09:38", "remaining_time": "1:15:32", "throughput": 19968.23, "total_tokens": 155321792} +{"current_steps": 49355, "total_steps": 78105, "loss": 0.2136, "lr": 1.795064386136931e-06, "epoch": 3.159528839382882, "percentage": 63.19, "elapsed_time": "2:09:39", "remaining_time": "1:15:31", "throughput": 19968.63, "total_tokens": 155339264} +{"current_steps": 49360, "total_steps": 78105, "loss": 0.1362, "lr": 1.7945284204028428e-06, "epoch": 3.1598489213238588, "percentage": 63.2, "elapsed_time": "2:09:39", "remaining_time": "1:15:30", "throughput": 19968.86, "total_tokens": 155354304} +{"current_steps": 49365, "total_steps": 78105, "loss": 0.275, "lr": 1.79399248989633e-06, "epoch": 3.1601690032648357, "percentage": 63.2, "elapsed_time": "2:09:40", "remaining_time": "1:15:29", "throughput": 19969.11, "total_tokens": 155369152} +{"current_steps": 49370, "total_steps": 78105, "loss": 0.2137, "lr": 1.7934565946441556e-06, "epoch": 3.1604890852058127, "percentage": 63.21, "elapsed_time": "2:09:41", "remaining_time": "1:15:28", "throughput": 19969.42, "total_tokens": 155384896} +{"current_steps": 49375, "total_steps": 78105, "loss": 0.1578, "lr": 1.7929207346730776e-06, "epoch": 3.1608091671467897, "percentage": 63.22, "elapsed_time": "2:09:41", "remaining_time": "1:15:28", "throughput": 19969.69, "total_tokens": 155400512} +{"current_steps": 49380, "total_steps": 78105, "loss": 0.1719, "lr": 1.7923849100098562e-06, "epoch": 3.1611292490877663, "percentage": 63.22, "elapsed_time": "2:09:42", "remaining_time": "1:15:27", "throughput": 19970.0, "total_tokens": 155416512} +{"current_steps": 49385, "total_steps": 78105, "loss": 0.169, "lr": 1.7918491206812455e-06, "epoch": 3.1614493310287433, "percentage": 63.23, "elapsed_time": "2:09:43", "remaining_time": "1:15:26", "throughput": 19970.24, "total_tokens": 155431296} +{"current_steps": 49390, "total_steps": 78105, "loss": 0.1524, "lr": 1.7913133667140014e-06, "epoch": 3.1617694129697202, "percentage": 63.24, "elapsed_time": "2:09:43", "remaining_time": "1:15:25", "throughput": 19970.54, "total_tokens": 155447296} +{"current_steps": 49395, "total_steps": 78105, "loss": 0.1339, "lr": 1.7907776481348763e-06, "epoch": 3.1620894949106972, "percentage": 63.24, "elapsed_time": "2:09:44", "remaining_time": "1:15:24", "throughput": 19970.83, "total_tokens": 155463104} +{"current_steps": 49400, "total_steps": 78105, "loss": 0.1519, "lr": 1.790241964970622e-06, "epoch": 3.1624095768516742, "percentage": 63.25, "elapsed_time": "2:09:45", "remaining_time": "1:15:23", "throughput": 19971.12, "total_tokens": 155478784} +{"current_steps": 49405, "total_steps": 78105, "loss": 0.1732, "lr": 1.7897063172479862e-06, "epoch": 3.1627296587926508, "percentage": 63.25, "elapsed_time": "2:09:45", "remaining_time": "1:15:22", "throughput": 19971.47, "total_tokens": 155495616} +{"current_steps": 49410, "total_steps": 78105, "loss": 0.1998, "lr": 1.7891707049937168e-06, "epoch": 3.1630497407336278, "percentage": 63.26, "elapsed_time": "2:09:46", "remaining_time": "1:15:22", "throughput": 19971.85, "total_tokens": 155512640} +{"current_steps": 49415, "total_steps": 78105, "loss": 0.1176, "lr": 1.7886351282345604e-06, "epoch": 3.1633698226746048, "percentage": 63.27, "elapsed_time": "2:09:47", "remaining_time": "1:15:21", "throughput": 19972.15, "total_tokens": 155528576} +{"current_steps": 49420, "total_steps": 78105, "loss": 0.1531, "lr": 1.7880995869972598e-06, "epoch": 3.1636899046155817, "percentage": 63.27, "elapsed_time": "2:09:47", "remaining_time": "1:15:20", "throughput": 19972.45, "total_tokens": 155544704} +{"current_steps": 49425, "total_steps": 78105, "loss": 0.1638, "lr": 1.7875640813085587e-06, "epoch": 3.1640099865565583, "percentage": 63.28, "elapsed_time": "2:09:48", "remaining_time": "1:15:19", "throughput": 19972.75, "total_tokens": 155560704} +{"current_steps": 49430, "total_steps": 78105, "loss": 0.1806, "lr": 1.7870286111951957e-06, "epoch": 3.1643300684975353, "percentage": 63.29, "elapsed_time": "2:09:49", "remaining_time": "1:15:18", "throughput": 19973.0, "total_tokens": 155575680} +{"current_steps": 49435, "total_steps": 78105, "loss": 0.1671, "lr": 1.786493176683911e-06, "epoch": 3.1646501504385123, "percentage": 63.29, "elapsed_time": "2:09:50", "remaining_time": "1:15:17", "throughput": 19973.33, "total_tokens": 155592448} +{"current_steps": 49440, "total_steps": 78105, "loss": 0.1551, "lr": 1.7859577778014393e-06, "epoch": 3.1649702323794893, "percentage": 63.3, "elapsed_time": "2:09:50", "remaining_time": "1:15:16", "throughput": 19973.64, "total_tokens": 155608512} +{"current_steps": 49445, "total_steps": 78105, "loss": 0.1647, "lr": 1.7854224145745175e-06, "epoch": 3.1652903143204663, "percentage": 63.31, "elapsed_time": "2:09:51", "remaining_time": "1:15:16", "throughput": 19973.96, "total_tokens": 155625088} +{"current_steps": 49450, "total_steps": 78105, "loss": 0.1323, "lr": 1.7848870870298777e-06, "epoch": 3.165610396261443, "percentage": 63.31, "elapsed_time": "2:09:52", "remaining_time": "1:15:15", "throughput": 19974.27, "total_tokens": 155641344} +{"current_steps": 49455, "total_steps": 78105, "loss": 0.1538, "lr": 1.784351795194253e-06, "epoch": 3.16593047820242, "percentage": 63.32, "elapsed_time": "2:09:52", "remaining_time": "1:15:14", "throughput": 19974.55, "total_tokens": 155657088} +{"current_steps": 49460, "total_steps": 78105, "loss": 0.2068, "lr": 1.7838165390943706e-06, "epoch": 3.166250560143397, "percentage": 63.33, "elapsed_time": "2:09:53", "remaining_time": "1:15:13", "throughput": 19974.87, "total_tokens": 155673280} +{"current_steps": 49465, "total_steps": 78105, "loss": 0.1617, "lr": 1.783281318756961e-06, "epoch": 3.1665706420843738, "percentage": 63.33, "elapsed_time": "2:09:54", "remaining_time": "1:15:12", "throughput": 19975.14, "total_tokens": 155688832} +{"current_steps": 49470, "total_steps": 78105, "loss": 0.1426, "lr": 1.7827461342087483e-06, "epoch": 3.1668907240253503, "percentage": 63.34, "elapsed_time": "2:09:54", "remaining_time": "1:15:11", "throughput": 19975.38, "total_tokens": 155703552} +{"current_steps": 49475, "total_steps": 78105, "loss": 0.18, "lr": 1.7822109854764575e-06, "epoch": 3.1672108059663273, "percentage": 63.34, "elapsed_time": "2:09:55", "remaining_time": "1:15:11", "throughput": 19975.72, "total_tokens": 155720384} +{"current_steps": 49480, "total_steps": 78105, "loss": 0.1643, "lr": 1.7816758725868117e-06, "epoch": 3.1675308879073043, "percentage": 63.35, "elapsed_time": "2:09:56", "remaining_time": "1:15:10", "throughput": 19976.04, "total_tokens": 155736960} +{"current_steps": 49485, "total_steps": 78105, "loss": 0.1909, "lr": 1.7811407955665306e-06, "epoch": 3.1678509698482813, "percentage": 63.36, "elapsed_time": "2:09:56", "remaining_time": "1:15:09", "throughput": 19976.31, "total_tokens": 155752320} +{"current_steps": 49490, "total_steps": 78105, "loss": 0.1776, "lr": 1.780605754442335e-06, "epoch": 3.1681710517892583, "percentage": 63.36, "elapsed_time": "2:09:57", "remaining_time": "1:15:08", "throughput": 19976.57, "total_tokens": 155767616} +{"current_steps": 49495, "total_steps": 78105, "loss": 0.1464, "lr": 1.7800707492409398e-06, "epoch": 3.168491133730235, "percentage": 63.37, "elapsed_time": "2:09:58", "remaining_time": "1:15:07", "throughput": 19976.94, "total_tokens": 155785088} +{"current_steps": 49500, "total_steps": 78105, "loss": 0.1997, "lr": 1.7795357799890617e-06, "epoch": 3.168811215671212, "percentage": 63.38, "elapsed_time": "2:09:58", "remaining_time": "1:15:06", "throughput": 19977.25, "total_tokens": 155801600} +{"current_steps": 49505, "total_steps": 78105, "loss": 0.1413, "lr": 1.7790008467134136e-06, "epoch": 3.169131297612189, "percentage": 63.38, "elapsed_time": "2:09:59", "remaining_time": "1:15:06", "throughput": 19977.61, "total_tokens": 155818496} +{"current_steps": 49510, "total_steps": 78105, "loss": 0.1477, "lr": 1.7784659494407085e-06, "epoch": 3.169451379553166, "percentage": 63.39, "elapsed_time": "2:10:00", "remaining_time": "1:15:05", "throughput": 19977.86, "total_tokens": 155833664} +{"current_steps": 49515, "total_steps": 78105, "loss": 0.168, "lr": 1.777931088197655e-06, "epoch": 3.1697714614941424, "percentage": 63.4, "elapsed_time": "2:10:01", "remaining_time": "1:15:04", "throughput": 19978.2, "total_tokens": 155850496} +{"current_steps": 49520, "total_steps": 78105, "loss": 0.1681, "lr": 1.7773962630109625e-06, "epoch": 3.1700915434351193, "percentage": 63.4, "elapsed_time": "2:10:01", "remaining_time": "1:15:03", "throughput": 19978.46, "total_tokens": 155865664} +{"current_steps": 49525, "total_steps": 78105, "loss": 0.2676, "lr": 1.776861473907336e-06, "epoch": 3.1704116253760963, "percentage": 63.41, "elapsed_time": "2:10:02", "remaining_time": "1:15:02", "throughput": 19978.72, "total_tokens": 155881088} +{"current_steps": 49530, "total_steps": 78105, "loss": 0.1935, "lr": 1.7763267209134805e-06, "epoch": 3.1707317073170733, "percentage": 63.41, "elapsed_time": "2:10:03", "remaining_time": "1:15:01", "throughput": 19978.99, "total_tokens": 155896640} +{"current_steps": 49535, "total_steps": 78105, "loss": 0.1933, "lr": 1.7757920040560996e-06, "epoch": 3.17105178925805, "percentage": 63.42, "elapsed_time": "2:10:03", "remaining_time": "1:15:00", "throughput": 19979.23, "total_tokens": 155911616} +{"current_steps": 49540, "total_steps": 78105, "loss": 0.1791, "lr": 1.7752573233618936e-06, "epoch": 3.171371871199027, "percentage": 63.43, "elapsed_time": "2:10:04", "remaining_time": "1:15:00", "throughput": 19979.49, "total_tokens": 155926784} +{"current_steps": 49545, "total_steps": 78105, "loss": 0.1269, "lr": 1.7747226788575622e-06, "epoch": 3.171691953140004, "percentage": 63.43, "elapsed_time": "2:10:05", "remaining_time": "1:14:59", "throughput": 19979.84, "total_tokens": 155943552} +{"current_steps": 49550, "total_steps": 78105, "loss": 0.1519, "lr": 1.774188070569802e-06, "epoch": 3.172012035080981, "percentage": 63.44, "elapsed_time": "2:10:05", "remaining_time": "1:14:58", "throughput": 19980.1, "total_tokens": 155959040} +{"current_steps": 49555, "total_steps": 78105, "loss": 0.1188, "lr": 1.773653498525309e-06, "epoch": 3.1723321170219574, "percentage": 63.45, "elapsed_time": "2:10:06", "remaining_time": "1:14:57", "throughput": 19980.38, "total_tokens": 155974592} +{"current_steps": 49560, "total_steps": 78105, "loss": 0.1247, "lr": 1.7731189627507772e-06, "epoch": 3.1726521989629344, "percentage": 63.45, "elapsed_time": "2:10:07", "remaining_time": "1:14:56", "throughput": 19980.68, "total_tokens": 155990592} +{"current_steps": 49565, "total_steps": 78105, "loss": 0.105, "lr": 1.772584463272898e-06, "epoch": 3.1729722809039114, "percentage": 63.46, "elapsed_time": "2:10:07", "remaining_time": "1:14:55", "throughput": 19980.94, "total_tokens": 156006144} +{"current_steps": 49570, "total_steps": 78105, "loss": 0.0919, "lr": 1.7720500001183617e-06, "epoch": 3.1732923628448884, "percentage": 63.47, "elapsed_time": "2:10:08", "remaining_time": "1:14:54", "throughput": 19981.23, "total_tokens": 156022016} +{"current_steps": 49575, "total_steps": 78105, "loss": 0.1088, "lr": 1.771515573313857e-06, "epoch": 3.1736124447858653, "percentage": 63.47, "elapsed_time": "2:10:09", "remaining_time": "1:14:54", "throughput": 19981.53, "total_tokens": 156037824} +{"current_steps": 49580, "total_steps": 78105, "loss": 0.2396, "lr": 1.7709811828860693e-06, "epoch": 3.173932526726842, "percentage": 63.48, "elapsed_time": "2:10:09", "remaining_time": "1:14:53", "throughput": 19981.78, "total_tokens": 156053120} +{"current_steps": 49585, "total_steps": 78105, "loss": 0.2079, "lr": 1.770446828861684e-06, "epoch": 3.174252608667819, "percentage": 63.49, "elapsed_time": "2:10:10", "remaining_time": "1:14:52", "throughput": 19982.01, "total_tokens": 156068160} +{"current_steps": 49590, "total_steps": 78105, "loss": 0.0824, "lr": 1.7699125112673843e-06, "epoch": 3.174572690608796, "percentage": 63.49, "elapsed_time": "2:10:11", "remaining_time": "1:14:51", "throughput": 19982.32, "total_tokens": 156084480} +{"current_steps": 49595, "total_steps": 78105, "loss": 0.1434, "lr": 1.7693782301298508e-06, "epoch": 3.174892772549773, "percentage": 63.5, "elapsed_time": "2:10:11", "remaining_time": "1:14:50", "throughput": 19982.61, "total_tokens": 156100736} +{"current_steps": 49600, "total_steps": 78105, "loss": 0.2225, "lr": 1.7688439854757635e-06, "epoch": 3.1752128544907494, "percentage": 63.5, "elapsed_time": "2:10:12", "remaining_time": "1:14:49", "throughput": 19982.9, "total_tokens": 156117056} +{"current_steps": 49605, "total_steps": 78105, "loss": 0.1643, "lr": 1.7683097773317981e-06, "epoch": 3.1755329364317264, "percentage": 63.51, "elapsed_time": "2:10:13", "remaining_time": "1:14:48", "throughput": 19983.16, "total_tokens": 156132672} +{"current_steps": 49610, "total_steps": 78105, "loss": 0.1787, "lr": 1.7677756057246326e-06, "epoch": 3.1758530183727034, "percentage": 63.52, "elapsed_time": "2:10:13", "remaining_time": "1:14:48", "throughput": 19983.44, "total_tokens": 156148480} +{"current_steps": 49615, "total_steps": 78105, "loss": 0.1366, "lr": 1.7672414706809377e-06, "epoch": 3.1761731003136804, "percentage": 63.52, "elapsed_time": "2:10:14", "remaining_time": "1:14:47", "throughput": 19983.73, "total_tokens": 156164160} +{"current_steps": 49620, "total_steps": 78105, "loss": 0.1075, "lr": 1.7667073722273877e-06, "epoch": 3.1764931822546574, "percentage": 63.53, "elapsed_time": "2:10:15", "remaining_time": "1:14:46", "throughput": 19983.97, "total_tokens": 156179008} +{"current_steps": 49625, "total_steps": 78105, "loss": 0.2051, "lr": 1.7661733103906517e-06, "epoch": 3.176813264195634, "percentage": 63.54, "elapsed_time": "2:10:15", "remaining_time": "1:14:45", "throughput": 19984.28, "total_tokens": 156195072} +{"current_steps": 49630, "total_steps": 78105, "loss": 0.1316, "lr": 1.765639285197399e-06, "epoch": 3.177133346136611, "percentage": 63.54, "elapsed_time": "2:10:16", "remaining_time": "1:14:44", "throughput": 19984.54, "total_tokens": 156210560} +{"current_steps": 49635, "total_steps": 78105, "loss": 0.1347, "lr": 1.7651052966742943e-06, "epoch": 3.177453428077588, "percentage": 63.55, "elapsed_time": "2:10:17", "remaining_time": "1:14:43", "throughput": 19984.8, "total_tokens": 156226240} +{"current_steps": 49640, "total_steps": 78105, "loss": 0.122, "lr": 1.7645713448480037e-06, "epoch": 3.177773510018565, "percentage": 63.56, "elapsed_time": "2:10:17", "remaining_time": "1:14:43", "throughput": 19985.13, "total_tokens": 156242880} +{"current_steps": 49645, "total_steps": 78105, "loss": 0.1503, "lr": 1.764037429745189e-06, "epoch": 3.1780935919595414, "percentage": 63.56, "elapsed_time": "2:10:18", "remaining_time": "1:14:42", "throughput": 19985.41, "total_tokens": 156258432} +{"current_steps": 49650, "total_steps": 78105, "loss": 0.1754, "lr": 1.7635035513925115e-06, "epoch": 3.1784136739005184, "percentage": 63.57, "elapsed_time": "2:10:19", "remaining_time": "1:14:41", "throughput": 19985.67, "total_tokens": 156273664} +{"current_steps": 49655, "total_steps": 78105, "loss": 0.231, "lr": 1.762969709816631e-06, "epoch": 3.1787337558414954, "percentage": 63.57, "elapsed_time": "2:10:19", "remaining_time": "1:14:40", "throughput": 19985.96, "total_tokens": 156289792} +{"current_steps": 49660, "total_steps": 78105, "loss": 0.2028, "lr": 1.7624359050442037e-06, "epoch": 3.1790538377824724, "percentage": 63.58, "elapsed_time": "2:10:20", "remaining_time": "1:14:39", "throughput": 19986.25, "total_tokens": 156305408} +{"current_steps": 49665, "total_steps": 78105, "loss": 0.1313, "lr": 1.761902137101886e-06, "epoch": 3.1793739197234494, "percentage": 63.59, "elapsed_time": "2:10:21", "remaining_time": "1:14:38", "throughput": 19986.51, "total_tokens": 156320896} +{"current_steps": 49670, "total_steps": 78105, "loss": 0.1381, "lr": 1.7613684060163303e-06, "epoch": 3.179694001664426, "percentage": 63.59, "elapsed_time": "2:10:21", "remaining_time": "1:14:37", "throughput": 19986.79, "total_tokens": 156336576} +{"current_steps": 49675, "total_steps": 78105, "loss": 0.1875, "lr": 1.7608347118141894e-06, "epoch": 3.180014083605403, "percentage": 63.6, "elapsed_time": "2:10:22", "remaining_time": "1:14:37", "throughput": 19987.05, "total_tokens": 156351872} +{"current_steps": 49680, "total_steps": 78105, "loss": 0.1498, "lr": 1.7603010545221123e-06, "epoch": 3.18033416554638, "percentage": 63.61, "elapsed_time": "2:10:23", "remaining_time": "1:14:36", "throughput": 19987.3, "total_tokens": 156366976} +{"current_steps": 49685, "total_steps": 78105, "loss": 0.2003, "lr": 1.7597674341667487e-06, "epoch": 3.180654247487357, "percentage": 63.61, "elapsed_time": "2:10:24", "remaining_time": "1:14:35", "throughput": 19987.6, "total_tokens": 156383104} +{"current_steps": 49690, "total_steps": 78105, "loss": 0.1034, "lr": 1.7592338507747425e-06, "epoch": 3.1809743294283335, "percentage": 63.62, "elapsed_time": "2:10:24", "remaining_time": "1:14:34", "throughput": 19987.88, "total_tokens": 156398720} +{"current_steps": 49695, "total_steps": 78105, "loss": 0.2642, "lr": 1.7587003043727403e-06, "epoch": 3.1812944113693105, "percentage": 63.63, "elapsed_time": "2:10:25", "remaining_time": "1:14:33", "throughput": 19988.14, "total_tokens": 156414080} +{"current_steps": 49700, "total_steps": 78105, "loss": 0.1386, "lr": 1.7581667949873827e-06, "epoch": 3.1816144933102875, "percentage": 63.63, "elapsed_time": "2:10:25", "remaining_time": "1:14:32", "throughput": 19988.4, "total_tokens": 156429056} +{"current_steps": 49705, "total_steps": 78105, "loss": 0.1463, "lr": 1.7576333226453113e-06, "epoch": 3.1819345752512644, "percentage": 63.64, "elapsed_time": "2:10:26", "remaining_time": "1:14:31", "throughput": 19988.71, "total_tokens": 156445184} +{"current_steps": 49710, "total_steps": 78105, "loss": 0.1941, "lr": 1.7570998873731654e-06, "epoch": 3.1822546571922414, "percentage": 63.65, "elapsed_time": "2:10:27", "remaining_time": "1:14:31", "throughput": 19988.98, "total_tokens": 156460864} +{"current_steps": 49715, "total_steps": 78105, "loss": 0.2133, "lr": 1.756566489197581e-06, "epoch": 3.182574739133218, "percentage": 63.65, "elapsed_time": "2:10:28", "remaining_time": "1:14:30", "throughput": 19989.38, "total_tokens": 156478464} +{"current_steps": 49720, "total_steps": 78105, "loss": 0.1716, "lr": 1.7560331281451947e-06, "epoch": 3.182894821074195, "percentage": 63.66, "elapsed_time": "2:10:28", "remaining_time": "1:14:29", "throughput": 19989.7, "total_tokens": 156495296} +{"current_steps": 49725, "total_steps": 78105, "loss": 0.1428, "lr": 1.755499804242638e-06, "epoch": 3.183214903015172, "percentage": 63.66, "elapsed_time": "2:10:29", "remaining_time": "1:14:28", "throughput": 19989.97, "total_tokens": 156510464} +{"current_steps": 49730, "total_steps": 78105, "loss": 0.1476, "lr": 1.7549665175165435e-06, "epoch": 3.183534984956149, "percentage": 63.67, "elapsed_time": "2:10:30", "remaining_time": "1:14:27", "throughput": 19990.21, "total_tokens": 156525632} +{"current_steps": 49735, "total_steps": 78105, "loss": 0.1398, "lr": 1.7544332679935397e-06, "epoch": 3.1838550668971255, "percentage": 63.68, "elapsed_time": "2:10:30", "remaining_time": "1:14:26", "throughput": 19990.5, "total_tokens": 156541568} +{"current_steps": 49740, "total_steps": 78105, "loss": 0.1856, "lr": 1.753900055700256e-06, "epoch": 3.1841751488381025, "percentage": 63.68, "elapsed_time": "2:10:31", "remaining_time": "1:14:26", "throughput": 19990.73, "total_tokens": 156556288} +{"current_steps": 49745, "total_steps": 78105, "loss": 0.1636, "lr": 1.7533668806633163e-06, "epoch": 3.1844952307790795, "percentage": 63.69, "elapsed_time": "2:10:32", "remaining_time": "1:14:25", "throughput": 19990.96, "total_tokens": 156571328} +{"current_steps": 49750, "total_steps": 78105, "loss": 0.2179, "lr": 1.7528337429093468e-06, "epoch": 3.1848153127200565, "percentage": 63.7, "elapsed_time": "2:10:32", "remaining_time": "1:14:24", "throughput": 19991.21, "total_tokens": 156586432} +{"current_steps": 49755, "total_steps": 78105, "loss": 0.1316, "lr": 1.7523006424649668e-06, "epoch": 3.1851353946610335, "percentage": 63.7, "elapsed_time": "2:10:33", "remaining_time": "1:14:23", "throughput": 19991.52, "total_tokens": 156602560} +{"current_steps": 49760, "total_steps": 78105, "loss": 0.148, "lr": 1.7517675793567984e-06, "epoch": 3.18545547660201, "percentage": 63.71, "elapsed_time": "2:10:34", "remaining_time": "1:14:22", "throughput": 19991.78, "total_tokens": 156617984} +{"current_steps": 49765, "total_steps": 78105, "loss": 0.1879, "lr": 1.7512345536114605e-06, "epoch": 3.185775558542987, "percentage": 63.72, "elapsed_time": "2:10:34", "remaining_time": "1:14:21", "throughput": 19992.08, "total_tokens": 156634304} +{"current_steps": 49770, "total_steps": 78105, "loss": 0.1933, "lr": 1.750701565255568e-06, "epoch": 3.186095640483964, "percentage": 63.72, "elapsed_time": "2:10:35", "remaining_time": "1:14:20", "throughput": 19992.38, "total_tokens": 156650304} +{"current_steps": 49775, "total_steps": 78105, "loss": 0.1263, "lr": 1.750168614315737e-06, "epoch": 3.186415722424941, "percentage": 63.73, "elapsed_time": "2:10:36", "remaining_time": "1:14:20", "throughput": 19992.67, "total_tokens": 156666112} +{"current_steps": 49780, "total_steps": 78105, "loss": 0.1138, "lr": 1.7496357008185793e-06, "epoch": 3.1867358043659175, "percentage": 63.73, "elapsed_time": "2:10:36", "remaining_time": "1:14:19", "throughput": 19992.95, "total_tokens": 156681728} +{"current_steps": 49785, "total_steps": 78105, "loss": 0.1, "lr": 1.7491028247907063e-06, "epoch": 3.1870558863068945, "percentage": 63.74, "elapsed_time": "2:10:37", "remaining_time": "1:14:18", "throughput": 19993.17, "total_tokens": 156696320} +{"current_steps": 49790, "total_steps": 78105, "loss": 0.1686, "lr": 1.7485699862587267e-06, "epoch": 3.1873759682478715, "percentage": 63.75, "elapsed_time": "2:10:38", "remaining_time": "1:14:17", "throughput": 19993.51, "total_tokens": 156713088} +{"current_steps": 49795, "total_steps": 78105, "loss": 0.1719, "lr": 1.748037185249249e-06, "epoch": 3.1876960501888485, "percentage": 63.75, "elapsed_time": "2:10:38", "remaining_time": "1:14:16", "throughput": 19993.76, "total_tokens": 156728192} +{"current_steps": 49800, "total_steps": 78105, "loss": 0.1797, "lr": 1.747504421788876e-06, "epoch": 3.188016132129825, "percentage": 63.76, "elapsed_time": "2:10:39", "remaining_time": "1:14:15", "throughput": 19994.01, "total_tokens": 156743360} +{"current_steps": 49805, "total_steps": 78105, "loss": 0.2801, "lr": 1.7469716959042137e-06, "epoch": 3.188336214070802, "percentage": 63.77, "elapsed_time": "2:10:40", "remaining_time": "1:14:14", "throughput": 19994.39, "total_tokens": 156760960} +{"current_steps": 49810, "total_steps": 78105, "loss": 0.1715, "lr": 1.7464390076218618e-06, "epoch": 3.188656296011779, "percentage": 63.77, "elapsed_time": "2:10:40", "remaining_time": "1:14:14", "throughput": 19994.64, "total_tokens": 156775872} +{"current_steps": 49815, "total_steps": 78105, "loss": 0.1884, "lr": 1.7459063569684209e-06, "epoch": 3.188976377952756, "percentage": 63.78, "elapsed_time": "2:10:41", "remaining_time": "1:14:13", "throughput": 19994.88, "total_tokens": 156791040} +{"current_steps": 49820, "total_steps": 78105, "loss": 0.1933, "lr": 1.7453737439704882e-06, "epoch": 3.1892964598937326, "percentage": 63.79, "elapsed_time": "2:10:42", "remaining_time": "1:14:12", "throughput": 19995.2, "total_tokens": 156807360} +{"current_steps": 49825, "total_steps": 78105, "loss": 0.163, "lr": 1.7448411686546602e-06, "epoch": 3.1896165418347096, "percentage": 63.79, "elapsed_time": "2:10:42", "remaining_time": "1:14:11", "throughput": 19995.45, "total_tokens": 156822656} +{"current_steps": 49830, "total_steps": 78105, "loss": 0.2076, "lr": 1.7443086310475315e-06, "epoch": 3.1899366237756865, "percentage": 63.8, "elapsed_time": "2:10:43", "remaining_time": "1:14:10", "throughput": 19995.69, "total_tokens": 156837824} +{"current_steps": 49835, "total_steps": 78105, "loss": 0.1561, "lr": 1.7437761311756927e-06, "epoch": 3.1902567057166635, "percentage": 63.81, "elapsed_time": "2:10:44", "remaining_time": "1:14:09", "throughput": 19996.0, "total_tokens": 156853952} +{"current_steps": 49840, "total_steps": 78105, "loss": 0.2274, "lr": 1.7432436690657357e-06, "epoch": 3.1905767876576405, "percentage": 63.81, "elapsed_time": "2:10:44", "remaining_time": "1:14:08", "throughput": 19996.3, "total_tokens": 156870144} +{"current_steps": 49845, "total_steps": 78105, "loss": 0.1089, "lr": 1.7427112447442474e-06, "epoch": 3.190896869598617, "percentage": 63.82, "elapsed_time": "2:10:45", "remaining_time": "1:14:08", "throughput": 19996.6, "total_tokens": 156885952} +{"current_steps": 49850, "total_steps": 78105, "loss": 0.1579, "lr": 1.7421788582378153e-06, "epoch": 3.191216951539594, "percentage": 63.82, "elapsed_time": "2:10:46", "remaining_time": "1:14:07", "throughput": 19996.9, "total_tokens": 156901824} +{"current_steps": 49855, "total_steps": 78105, "loss": 0.2451, "lr": 1.741646509573023e-06, "epoch": 3.191537033480571, "percentage": 63.83, "elapsed_time": "2:10:46", "remaining_time": "1:14:06", "throughput": 19997.18, "total_tokens": 156917440} +{"current_steps": 49860, "total_steps": 78105, "loss": 0.152, "lr": 1.7411141987764552e-06, "epoch": 3.191857115421548, "percentage": 63.84, "elapsed_time": "2:10:47", "remaining_time": "1:14:05", "throughput": 19997.47, "total_tokens": 156933376} +{"current_steps": 49865, "total_steps": 78105, "loss": 0.2357, "lr": 1.7405819258746897e-06, "epoch": 3.1921771973625246, "percentage": 63.84, "elapsed_time": "2:10:48", "remaining_time": "1:14:04", "throughput": 19997.73, "total_tokens": 156948736} +{"current_steps": 49870, "total_steps": 78105, "loss": 0.141, "lr": 1.7400496908943085e-06, "epoch": 3.1924972793035016, "percentage": 63.85, "elapsed_time": "2:10:49", "remaining_time": "1:14:03", "throughput": 19998.04, "total_tokens": 156965120} +{"current_steps": 49875, "total_steps": 78105, "loss": 0.1475, "lr": 1.7395174938618864e-06, "epoch": 3.1928173612444786, "percentage": 63.86, "elapsed_time": "2:10:49", "remaining_time": "1:14:03", "throughput": 19998.29, "total_tokens": 156980608} +{"current_steps": 49880, "total_steps": 78105, "loss": 0.1343, "lr": 1.7389853348039996e-06, "epoch": 3.1931374431854556, "percentage": 63.86, "elapsed_time": "2:10:50", "remaining_time": "1:14:02", "throughput": 19998.61, "total_tokens": 156997056} +{"current_steps": 49885, "total_steps": 78105, "loss": 0.1241, "lr": 1.7384532137472221e-06, "epoch": 3.1934575251264326, "percentage": 63.87, "elapsed_time": "2:10:51", "remaining_time": "1:14:01", "throughput": 19998.89, "total_tokens": 157012800} +{"current_steps": 49890, "total_steps": 78105, "loss": 0.1486, "lr": 1.7379211307181235e-06, "epoch": 3.193777607067409, "percentage": 63.88, "elapsed_time": "2:10:51", "remaining_time": "1:14:00", "throughput": 19999.26, "total_tokens": 157030144} +{"current_steps": 49895, "total_steps": 78105, "loss": 0.2988, "lr": 1.7373890857432752e-06, "epoch": 3.194097689008386, "percentage": 63.88, "elapsed_time": "2:10:52", "remaining_time": "1:13:59", "throughput": 19999.51, "total_tokens": 157045312} +{"current_steps": 49900, "total_steps": 78105, "loss": 0.1294, "lr": 1.7368570788492422e-06, "epoch": 3.194417770949363, "percentage": 63.89, "elapsed_time": "2:10:53", "remaining_time": "1:13:58", "throughput": 19999.86, "total_tokens": 157062400} +{"current_steps": 49905, "total_steps": 78105, "loss": 0.1612, "lr": 1.7363251100625922e-06, "epoch": 3.19473785289034, "percentage": 63.89, "elapsed_time": "2:10:53", "remaining_time": "1:13:58", "throughput": 20000.21, "total_tokens": 157079488} +{"current_steps": 49910, "total_steps": 78105, "loss": 0.1299, "lr": 1.735793179409888e-06, "epoch": 3.1950579348313166, "percentage": 63.9, "elapsed_time": "2:10:54", "remaining_time": "1:13:57", "throughput": 20000.55, "total_tokens": 157096320} +{"current_steps": 49915, "total_steps": 78105, "loss": 0.1561, "lr": 1.7352612869176932e-06, "epoch": 3.1953780167722936, "percentage": 63.91, "elapsed_time": "2:10:55", "remaining_time": "1:13:56", "throughput": 20000.8, "total_tokens": 157111680} +{"current_steps": 49920, "total_steps": 78105, "loss": 0.1775, "lr": 1.7347294326125646e-06, "epoch": 3.1956980987132706, "percentage": 63.91, "elapsed_time": "2:10:55", "remaining_time": "1:13:55", "throughput": 20001.1, "total_tokens": 157127552} +{"current_steps": 49925, "total_steps": 78105, "loss": 0.1089, "lr": 1.7341976165210638e-06, "epoch": 3.1960181806542476, "percentage": 63.92, "elapsed_time": "2:10:56", "remaining_time": "1:13:54", "throughput": 20001.39, "total_tokens": 157143488} +{"current_steps": 49930, "total_steps": 78105, "loss": 0.197, "lr": 1.7336658386697436e-06, "epoch": 3.1963382625952246, "percentage": 63.93, "elapsed_time": "2:10:57", "remaining_time": "1:13:53", "throughput": 20001.64, "total_tokens": 157158720} +{"current_steps": 49935, "total_steps": 78105, "loss": 0.1638, "lr": 1.7331340990851597e-06, "epoch": 3.196658344536201, "percentage": 63.93, "elapsed_time": "2:10:58", "remaining_time": "1:13:52", "throughput": 20002.0, "total_tokens": 157176192} +{"current_steps": 49940, "total_steps": 78105, "loss": 0.147, "lr": 1.732602397793865e-06, "epoch": 3.196978426477178, "percentage": 63.94, "elapsed_time": "2:10:58", "remaining_time": "1:13:52", "throughput": 20002.29, "total_tokens": 157192192} +{"current_steps": 49945, "total_steps": 78105, "loss": 0.1966, "lr": 1.732070734822409e-06, "epoch": 3.197298508418155, "percentage": 63.95, "elapsed_time": "2:10:59", "remaining_time": "1:13:51", "throughput": 20002.59, "total_tokens": 157208384} +{"current_steps": 49950, "total_steps": 78105, "loss": 0.1784, "lr": 1.7315391101973416e-06, "epoch": 3.197618590359132, "percentage": 63.95, "elapsed_time": "2:11:00", "remaining_time": "1:13:50", "throughput": 20002.91, "total_tokens": 157224832} +{"current_steps": 49955, "total_steps": 78105, "loss": 0.1672, "lr": 1.7310075239452067e-06, "epoch": 3.1979386723001086, "percentage": 63.96, "elapsed_time": "2:11:00", "remaining_time": "1:13:49", "throughput": 20003.15, "total_tokens": 157240064} +{"current_steps": 49960, "total_steps": 78105, "loss": 0.2047, "lr": 1.7304759760925516e-06, "epoch": 3.1982587542410856, "percentage": 63.97, "elapsed_time": "2:11:01", "remaining_time": "1:13:48", "throughput": 20003.43, "total_tokens": 157256000} +{"current_steps": 49965, "total_steps": 78105, "loss": 0.1714, "lr": 1.729944466665917e-06, "epoch": 3.1985788361820626, "percentage": 63.97, "elapsed_time": "2:11:02", "remaining_time": "1:13:47", "throughput": 20003.78, "total_tokens": 157272704} +{"current_steps": 49970, "total_steps": 78105, "loss": 0.1405, "lr": 1.7294129956918459e-06, "epoch": 3.1988989181230396, "percentage": 63.98, "elapsed_time": "2:11:02", "remaining_time": "1:13:47", "throughput": 20004.11, "total_tokens": 157289664} +{"current_steps": 49975, "total_steps": 78105, "loss": 0.1312, "lr": 1.7288815631968752e-06, "epoch": 3.1992190000640166, "percentage": 63.98, "elapsed_time": "2:11:03", "remaining_time": "1:13:46", "throughput": 20004.46, "total_tokens": 157306688} +{"current_steps": 49980, "total_steps": 78105, "loss": 0.1973, "lr": 1.7283501692075432e-06, "epoch": 3.199539082004993, "percentage": 63.99, "elapsed_time": "2:11:04", "remaining_time": "1:13:45", "throughput": 20004.73, "total_tokens": 157322688} +{"current_steps": 49985, "total_steps": 78105, "loss": 0.1386, "lr": 1.7278188137503837e-06, "epoch": 3.19985916394597, "percentage": 64.0, "elapsed_time": "2:11:04", "remaining_time": "1:13:44", "throughput": 20004.97, "total_tokens": 157337664} +{"current_steps": 49990, "total_steps": 78105, "loss": 0.1684, "lr": 1.727287496851931e-06, "epoch": 3.200179245886947, "percentage": 64.0, "elapsed_time": "2:11:05", "remaining_time": "1:13:43", "throughput": 20005.24, "total_tokens": 157353472} +{"current_steps": 49995, "total_steps": 78105, "loss": 0.1185, "lr": 1.7267562185387148e-06, "epoch": 3.200499327827924, "percentage": 64.01, "elapsed_time": "2:11:06", "remaining_time": "1:13:42", "throughput": 20005.49, "total_tokens": 157368768} +{"current_steps": 50000, "total_steps": 78105, "loss": 0.2063, "lr": 1.7262249788372659e-06, "epoch": 3.2008194097689007, "percentage": 64.02, "elapsed_time": "2:11:06", "remaining_time": "1:13:42", "throughput": 20005.83, "total_tokens": 157385728} +{"current_steps": 50005, "total_steps": 78105, "loss": 0.2027, "lr": 1.7256937777741117e-06, "epoch": 3.2011394917098777, "percentage": 64.02, "elapsed_time": "2:11:07", "remaining_time": "1:13:41", "throughput": 20006.09, "total_tokens": 157401152} +{"current_steps": 50010, "total_steps": 78105, "loss": 0.1551, "lr": 1.7251626153757765e-06, "epoch": 3.2014595736508547, "percentage": 64.03, "elapsed_time": "2:11:08", "remaining_time": "1:13:40", "throughput": 20006.36, "total_tokens": 157416576} +{"current_steps": 50015, "total_steps": 78105, "loss": 0.2308, "lr": 1.7246314916687846e-06, "epoch": 3.2017796555918316, "percentage": 64.04, "elapsed_time": "2:11:09", "remaining_time": "1:13:39", "throughput": 20006.62, "total_tokens": 157432320} +{"current_steps": 50020, "total_steps": 78105, "loss": 0.1514, "lr": 1.7241004066796566e-06, "epoch": 3.2020997375328086, "percentage": 64.04, "elapsed_time": "2:11:09", "remaining_time": "1:13:38", "throughput": 20006.97, "total_tokens": 157449152} +{"current_steps": 50025, "total_steps": 78105, "loss": 0.1343, "lr": 1.723569360434914e-06, "epoch": 3.202419819473785, "percentage": 64.05, "elapsed_time": "2:11:10", "remaining_time": "1:13:37", "throughput": 20007.29, "total_tokens": 157465344} +{"current_steps": 50030, "total_steps": 78105, "loss": 0.1736, "lr": 1.723038352961072e-06, "epoch": 3.202739901414762, "percentage": 64.05, "elapsed_time": "2:11:11", "remaining_time": "1:13:36", "throughput": 20007.52, "total_tokens": 157480256} +{"current_steps": 50035, "total_steps": 78105, "loss": 0.244, "lr": 1.7225073842846487e-06, "epoch": 3.203059983355739, "percentage": 64.06, "elapsed_time": "2:11:11", "remaining_time": "1:13:36", "throughput": 20007.83, "total_tokens": 157496320} +{"current_steps": 50040, "total_steps": 78105, "loss": 0.1672, "lr": 1.721976454432156e-06, "epoch": 3.203380065296716, "percentage": 64.07, "elapsed_time": "2:11:12", "remaining_time": "1:13:35", "throughput": 20008.11, "total_tokens": 157512128} +{"current_steps": 50045, "total_steps": 78105, "loss": 0.1483, "lr": 1.721445563430107e-06, "epoch": 3.2037001472376927, "percentage": 64.07, "elapsed_time": "2:11:13", "remaining_time": "1:13:34", "throughput": 20008.33, "total_tokens": 157526656} +{"current_steps": 50050, "total_steps": 78105, "loss": 0.0944, "lr": 1.7209147113050106e-06, "epoch": 3.2040202291786697, "percentage": 64.08, "elapsed_time": "2:11:13", "remaining_time": "1:13:33", "throughput": 20008.6, "total_tokens": 157542144} +{"current_steps": 50055, "total_steps": 78105, "loss": 0.1734, "lr": 1.7203838980833753e-06, "epoch": 3.2043403111196467, "percentage": 64.09, "elapsed_time": "2:11:14", "remaining_time": "1:13:32", "throughput": 20008.86, "total_tokens": 157557888} +{"current_steps": 50060, "total_steps": 78105, "loss": 0.1445, "lr": 1.7198531237917087e-06, "epoch": 3.2046603930606237, "percentage": 64.09, "elapsed_time": "2:11:15", "remaining_time": "1:13:31", "throughput": 20009.15, "total_tokens": 157573760} +{"current_steps": 50065, "total_steps": 78105, "loss": 0.132, "lr": 1.7193223884565121e-06, "epoch": 3.2049804750016, "percentage": 64.1, "elapsed_time": "2:11:15", "remaining_time": "1:13:30", "throughput": 20009.39, "total_tokens": 157588928} +{"current_steps": 50070, "total_steps": 78105, "loss": 0.1412, "lr": 1.7187916921042902e-06, "epoch": 3.205300556942577, "percentage": 64.11, "elapsed_time": "2:11:16", "remaining_time": "1:13:30", "throughput": 20009.62, "total_tokens": 157603776} +{"current_steps": 50075, "total_steps": 78105, "loss": 0.1607, "lr": 1.7182610347615409e-06, "epoch": 3.205620638883554, "percentage": 64.11, "elapsed_time": "2:11:17", "remaining_time": "1:13:29", "throughput": 20009.92, "total_tokens": 157620160} +{"current_steps": 50080, "total_steps": 78105, "loss": 0.1085, "lr": 1.7177304164547643e-06, "epoch": 3.205940720824531, "percentage": 64.12, "elapsed_time": "2:11:17", "remaining_time": "1:13:28", "throughput": 20010.16, "total_tokens": 157635136} +{"current_steps": 50085, "total_steps": 78105, "loss": 0.1608, "lr": 1.7171998372104553e-06, "epoch": 3.206260802765508, "percentage": 64.13, "elapsed_time": "2:11:18", "remaining_time": "1:13:27", "throughput": 20010.37, "total_tokens": 157649728} +{"current_steps": 50090, "total_steps": 78105, "loss": 0.0994, "lr": 1.71666929705511e-06, "epoch": 3.2065808847064847, "percentage": 64.13, "elapsed_time": "2:11:19", "remaining_time": "1:13:26", "throughput": 20010.64, "total_tokens": 157665152} +{"current_steps": 50095, "total_steps": 78105, "loss": 0.1194, "lr": 1.7161387960152187e-06, "epoch": 3.2069009666474617, "percentage": 64.14, "elapsed_time": "2:11:19", "remaining_time": "1:13:25", "throughput": 20010.87, "total_tokens": 157680064} +{"current_steps": 50100, "total_steps": 78105, "loss": 0.1971, "lr": 1.7156083341172735e-06, "epoch": 3.2072210485884387, "percentage": 64.14, "elapsed_time": "2:11:20", "remaining_time": "1:13:24", "throughput": 20011.14, "total_tokens": 157695552} +{"current_steps": 50105, "total_steps": 78105, "loss": 0.1669, "lr": 1.7150779113877619e-06, "epoch": 3.2075411305294157, "percentage": 64.15, "elapsed_time": "2:11:21", "remaining_time": "1:13:24", "throughput": 20011.4, "total_tokens": 157710784} +{"current_steps": 50110, "total_steps": 78105, "loss": 0.1842, "lr": 1.714547527853171e-06, "epoch": 3.2078612124703922, "percentage": 64.16, "elapsed_time": "2:11:21", "remaining_time": "1:13:23", "throughput": 20011.64, "total_tokens": 157725312} +{"current_steps": 50115, "total_steps": 78105, "loss": 0.1329, "lr": 1.7140171835399862e-06, "epoch": 3.2081812944113692, "percentage": 64.16, "elapsed_time": "2:11:22", "remaining_time": "1:13:22", "throughput": 20011.91, "total_tokens": 157741184} +{"current_steps": 50120, "total_steps": 78105, "loss": 0.1845, "lr": 1.7134868784746883e-06, "epoch": 3.2085013763523462, "percentage": 64.17, "elapsed_time": "2:11:23", "remaining_time": "1:13:21", "throughput": 20012.25, "total_tokens": 157758144} +{"current_steps": 50125, "total_steps": 78105, "loss": 0.15, "lr": 1.7129566126837598e-06, "epoch": 3.208821458293323, "percentage": 64.18, "elapsed_time": "2:11:23", "remaining_time": "1:13:20", "throughput": 20012.5, "total_tokens": 157773120} +{"current_steps": 50130, "total_steps": 78105, "loss": 0.0986, "lr": 1.7124263861936774e-06, "epoch": 3.2091415402342998, "percentage": 64.18, "elapsed_time": "2:11:24", "remaining_time": "1:13:19", "throughput": 20012.77, "total_tokens": 157788736} +{"current_steps": 50135, "total_steps": 78105, "loss": 0.1814, "lr": 1.7118961990309196e-06, "epoch": 3.2094616221752768, "percentage": 64.19, "elapsed_time": "2:11:25", "remaining_time": "1:13:19", "throughput": 20013.05, "total_tokens": 157804480} +{"current_steps": 50140, "total_steps": 78105, "loss": 0.1131, "lr": 1.71136605122196e-06, "epoch": 3.2097817041162537, "percentage": 64.2, "elapsed_time": "2:11:25", "remaining_time": "1:13:18", "throughput": 20013.38, "total_tokens": 157820992} +{"current_steps": 50145, "total_steps": 78105, "loss": 0.0996, "lr": 1.710835942793273e-06, "epoch": 3.2101017860572307, "percentage": 64.2, "elapsed_time": "2:11:26", "remaining_time": "1:13:17", "throughput": 20013.6, "total_tokens": 157835520} +{"current_steps": 50150, "total_steps": 78105, "loss": 0.1827, "lr": 1.7103058737713275e-06, "epoch": 3.2104218679982077, "percentage": 64.21, "elapsed_time": "2:11:27", "remaining_time": "1:13:16", "throughput": 20013.83, "total_tokens": 157850560} +{"current_steps": 50155, "total_steps": 78105, "loss": 0.1209, "lr": 1.7097758441825934e-06, "epoch": 3.2107419499391843, "percentage": 64.21, "elapsed_time": "2:11:27", "remaining_time": "1:13:15", "throughput": 20014.08, "total_tokens": 157865728} +{"current_steps": 50160, "total_steps": 78105, "loss": 0.0946, "lr": 1.7092458540535378e-06, "epoch": 3.2110620318801613, "percentage": 64.22, "elapsed_time": "2:11:28", "remaining_time": "1:13:14", "throughput": 20014.36, "total_tokens": 157881920} +{"current_steps": 50165, "total_steps": 78105, "loss": 0.1415, "lr": 1.7087159034106255e-06, "epoch": 3.2113821138211383, "percentage": 64.23, "elapsed_time": "2:11:29", "remaining_time": "1:13:13", "throughput": 20014.64, "total_tokens": 157897792} +{"current_steps": 50170, "total_steps": 78105, "loss": 0.1709, "lr": 1.7081859922803184e-06, "epoch": 3.2117021957621152, "percentage": 64.23, "elapsed_time": "2:11:29", "remaining_time": "1:13:13", "throughput": 20014.91, "total_tokens": 157913024} +{"current_steps": 50175, "total_steps": 78105, "loss": 0.146, "lr": 1.7076561206890787e-06, "epoch": 3.212022277703092, "percentage": 64.24, "elapsed_time": "2:11:30", "remaining_time": "1:13:12", "throughput": 20015.23, "total_tokens": 157929728} +{"current_steps": 50180, "total_steps": 78105, "loss": 0.1187, "lr": 1.7071262886633663e-06, "epoch": 3.212342359644069, "percentage": 64.25, "elapsed_time": "2:11:31", "remaining_time": "1:13:11", "throughput": 20015.5, "total_tokens": 157945280} +{"current_steps": 50185, "total_steps": 78105, "loss": 0.1315, "lr": 1.706596496229636e-06, "epoch": 3.2126624415850458, "percentage": 64.25, "elapsed_time": "2:11:31", "remaining_time": "1:13:10", "throughput": 20015.83, "total_tokens": 157961920} +{"current_steps": 50190, "total_steps": 78105, "loss": 0.1425, "lr": 1.7060667434143446e-06, "epoch": 3.2129825235260228, "percentage": 64.26, "elapsed_time": "2:11:32", "remaining_time": "1:13:09", "throughput": 20016.09, "total_tokens": 157977792} +{"current_steps": 50195, "total_steps": 78105, "loss": 0.117, "lr": 1.7055370302439439e-06, "epoch": 3.2133026054669998, "percentage": 64.27, "elapsed_time": "2:11:33", "remaining_time": "1:13:08", "throughput": 20016.37, "total_tokens": 157993664} +{"current_steps": 50200, "total_steps": 78105, "loss": 0.1704, "lr": 1.7050073567448867e-06, "epoch": 3.2136226874079763, "percentage": 64.27, "elapsed_time": "2:11:33", "remaining_time": "1:13:08", "throughput": 20016.59, "total_tokens": 158008704} +{"current_steps": 50205, "total_steps": 78105, "loss": 0.1336, "lr": 1.7044777229436201e-06, "epoch": 3.2139427693489533, "percentage": 64.28, "elapsed_time": "2:11:34", "remaining_time": "1:13:07", "throughput": 20016.89, "total_tokens": 158024640} +{"current_steps": 50210, "total_steps": 78105, "loss": 0.18, "lr": 1.7039481288665935e-06, "epoch": 3.2142628512899303, "percentage": 64.29, "elapsed_time": "2:11:35", "remaining_time": "1:13:06", "throughput": 20017.14, "total_tokens": 158039680} +{"current_steps": 50215, "total_steps": 78105, "loss": 0.1753, "lr": 1.7034185745402498e-06, "epoch": 3.2145829332309073, "percentage": 64.29, "elapsed_time": "2:11:35", "remaining_time": "1:13:05", "throughput": 20017.41, "total_tokens": 158055296} +{"current_steps": 50220, "total_steps": 78105, "loss": 0.1483, "lr": 1.7028890599910337e-06, "epoch": 3.214903015171884, "percentage": 64.3, "elapsed_time": "2:11:37", "remaining_time": "1:13:04", "throughput": 20016.42, "total_tokens": 158070528} +{"current_steps": 50225, "total_steps": 78105, "loss": 0.1415, "lr": 1.7023595852453855e-06, "epoch": 3.215223097112861, "percentage": 64.3, "elapsed_time": "2:11:37", "remaining_time": "1:13:04", "throughput": 20016.7, "total_tokens": 158086016} +{"current_steps": 50230, "total_steps": 78105, "loss": 0.176, "lr": 1.7018301503297447e-06, "epoch": 3.215543179053838, "percentage": 64.31, "elapsed_time": "2:11:38", "remaining_time": "1:13:03", "throughput": 20016.97, "total_tokens": 158101568} +{"current_steps": 50235, "total_steps": 78105, "loss": 0.1828, "lr": 1.7013007552705495e-06, "epoch": 3.215863260994815, "percentage": 64.32, "elapsed_time": "2:11:39", "remaining_time": "1:13:02", "throughput": 20017.19, "total_tokens": 158116352} +{"current_steps": 50240, "total_steps": 78105, "loss": 0.1761, "lr": 1.7007714000942338e-06, "epoch": 3.216183342935792, "percentage": 64.32, "elapsed_time": "2:11:39", "remaining_time": "1:13:01", "throughput": 20017.43, "total_tokens": 158131392} +{"current_steps": 50245, "total_steps": 78105, "loss": 0.1337, "lr": 1.7002420848272317e-06, "epoch": 3.2165034248767683, "percentage": 64.33, "elapsed_time": "2:11:40", "remaining_time": "1:13:00", "throughput": 20017.7, "total_tokens": 158147008} +{"current_steps": 50250, "total_steps": 78105, "loss": 0.1049, "lr": 1.6997128094959736e-06, "epoch": 3.2168235068177453, "percentage": 64.34, "elapsed_time": "2:11:41", "remaining_time": "1:12:59", "throughput": 20017.9, "total_tokens": 158161536} +{"current_steps": 50255, "total_steps": 78105, "loss": 0.2973, "lr": 1.6991835741268903e-06, "epoch": 3.2171435887587223, "percentage": 64.34, "elapsed_time": "2:11:41", "remaining_time": "1:12:58", "throughput": 20018.21, "total_tokens": 158177600} +{"current_steps": 50260, "total_steps": 78105, "loss": 0.2662, "lr": 1.6986543787464065e-06, "epoch": 3.2174636706996993, "percentage": 64.35, "elapsed_time": "2:11:42", "remaining_time": "1:12:58", "throughput": 20018.43, "total_tokens": 158192384} +{"current_steps": 50265, "total_steps": 78105, "loss": 0.2164, "lr": 1.6981252233809505e-06, "epoch": 3.217783752640676, "percentage": 64.36, "elapsed_time": "2:11:42", "remaining_time": "1:12:57", "throughput": 20018.68, "total_tokens": 158207424} +{"current_steps": 50270, "total_steps": 78105, "loss": 0.3059, "lr": 1.697596108056943e-06, "epoch": 3.218103834581653, "percentage": 64.36, "elapsed_time": "2:11:43", "remaining_time": "1:12:56", "throughput": 20018.99, "total_tokens": 158223680} +{"current_steps": 50275, "total_steps": 78105, "loss": 0.1693, "lr": 1.6970670328008066e-06, "epoch": 3.21842391652263, "percentage": 64.37, "elapsed_time": "2:11:45", "remaining_time": "1:12:56", "throughput": 20015.73, "total_tokens": 158240448} +{"current_steps": 50280, "total_steps": 78105, "loss": 0.1316, "lr": 1.69653799763896e-06, "epoch": 3.218743998463607, "percentage": 64.37, "elapsed_time": "2:11:46", "remaining_time": "1:12:55", "throughput": 20015.98, "total_tokens": 158255552} +{"current_steps": 50285, "total_steps": 78105, "loss": 0.1596, "lr": 1.6960090025978204e-06, "epoch": 3.219064080404584, "percentage": 64.38, "elapsed_time": "2:11:47", "remaining_time": "1:12:54", "throughput": 20016.33, "total_tokens": 158272704} +{"current_steps": 50290, "total_steps": 78105, "loss": 0.1489, "lr": 1.6954800477038046e-06, "epoch": 3.2193841623455604, "percentage": 64.39, "elapsed_time": "2:11:47", "remaining_time": "1:12:53", "throughput": 20016.6, "total_tokens": 158288064} +{"current_steps": 50295, "total_steps": 78105, "loss": 0.1086, "lr": 1.6949511329833236e-06, "epoch": 3.2197042442865373, "percentage": 64.39, "elapsed_time": "2:11:48", "remaining_time": "1:12:52", "throughput": 20016.94, "total_tokens": 158304704} +{"current_steps": 50300, "total_steps": 78105, "loss": 0.1302, "lr": 1.6944222584627907e-06, "epoch": 3.2200243262275143, "percentage": 64.4, "elapsed_time": "2:11:49", "remaining_time": "1:12:52", "throughput": 20017.22, "total_tokens": 158320384} +{"current_steps": 50305, "total_steps": 78105, "loss": 0.15, "lr": 1.6938934241686128e-06, "epoch": 3.2203444081684913, "percentage": 64.41, "elapsed_time": "2:11:49", "remaining_time": "1:12:51", "throughput": 20017.44, "total_tokens": 158335168} +{"current_steps": 50310, "total_steps": 78105, "loss": 0.1386, "lr": 1.6933646301271993e-06, "epoch": 3.220664490109468, "percentage": 64.41, "elapsed_time": "2:11:50", "remaining_time": "1:12:50", "throughput": 20017.71, "total_tokens": 158350784} +{"current_steps": 50315, "total_steps": 78105, "loss": 0.1085, "lr": 1.692835876364954e-06, "epoch": 3.220984572050445, "percentage": 64.42, "elapsed_time": "2:11:51", "remaining_time": "1:12:49", "throughput": 20017.93, "total_tokens": 158365504} +{"current_steps": 50320, "total_steps": 78105, "loss": 0.1634, "lr": 1.6923071629082815e-06, "epoch": 3.221304653991422, "percentage": 64.43, "elapsed_time": "2:11:54", "remaining_time": "1:12:49", "throughput": 20012.42, "total_tokens": 158380416} +{"current_steps": 50325, "total_steps": 78105, "loss": 0.1808, "lr": 1.6917784897835815e-06, "epoch": 3.221624735932399, "percentage": 64.43, "elapsed_time": "2:11:54", "remaining_time": "1:12:49", "throughput": 20012.76, "total_tokens": 158397184} +{"current_steps": 50330, "total_steps": 78105, "loss": 0.2651, "lr": 1.6912498570172542e-06, "epoch": 3.2219448178733754, "percentage": 64.44, "elapsed_time": "2:11:55", "remaining_time": "1:12:48", "throughput": 20012.98, "total_tokens": 158411904} +{"current_steps": 50335, "total_steps": 78105, "loss": 0.2043, "lr": 1.6907212646356957e-06, "epoch": 3.2222648998143524, "percentage": 64.45, "elapsed_time": "2:11:56", "remaining_time": "1:12:47", "throughput": 20013.24, "total_tokens": 158427264} +{"current_steps": 50340, "total_steps": 78105, "loss": 0.1144, "lr": 1.6901927126653028e-06, "epoch": 3.2225849817553294, "percentage": 64.45, "elapsed_time": "2:11:57", "remaining_time": "1:12:46", "throughput": 20012.78, "total_tokens": 158442240} +{"current_steps": 50345, "total_steps": 78105, "loss": 0.2137, "lr": 1.689664201132467e-06, "epoch": 3.2229050636963064, "percentage": 64.46, "elapsed_time": "2:11:57", "remaining_time": "1:12:45", "throughput": 20013.05, "total_tokens": 158457920} +{"current_steps": 50350, "total_steps": 78105, "loss": 0.0984, "lr": 1.6891357300635803e-06, "epoch": 3.2232251456372834, "percentage": 64.46, "elapsed_time": "2:11:58", "remaining_time": "1:12:44", "throughput": 20013.33, "total_tokens": 158473664} +{"current_steps": 50355, "total_steps": 78105, "loss": 0.2136, "lr": 1.6886072994850322e-06, "epoch": 3.22354522757826, "percentage": 64.47, "elapsed_time": "2:11:59", "remaining_time": "1:12:44", "throughput": 20013.68, "total_tokens": 158491200} +{"current_steps": 50360, "total_steps": 78105, "loss": 0.1379, "lr": 1.688078909423208e-06, "epoch": 3.223865309519237, "percentage": 64.48, "elapsed_time": "2:11:59", "remaining_time": "1:12:43", "throughput": 20013.95, "total_tokens": 158506880} +{"current_steps": 50365, "total_steps": 78105, "loss": 0.1701, "lr": 1.687550559904495e-06, "epoch": 3.224185391460214, "percentage": 64.48, "elapsed_time": "2:12:00", "remaining_time": "1:12:42", "throughput": 20014.21, "total_tokens": 158522112} +{"current_steps": 50370, "total_steps": 78105, "loss": 0.2155, "lr": 1.6870222509552742e-06, "epoch": 3.224505473401191, "percentage": 64.49, "elapsed_time": "2:12:01", "remaining_time": "1:12:41", "throughput": 20014.5, "total_tokens": 158538176} +{"current_steps": 50375, "total_steps": 78105, "loss": 0.1279, "lr": 1.6864939826019288e-06, "epoch": 3.2248255553421674, "percentage": 64.5, "elapsed_time": "2:12:02", "remaining_time": "1:12:40", "throughput": 20014.35, "total_tokens": 158553728} +{"current_steps": 50380, "total_steps": 78105, "loss": 0.1478, "lr": 1.6859657548708353e-06, "epoch": 3.2251456372831444, "percentage": 64.5, "elapsed_time": "2:12:02", "remaining_time": "1:12:39", "throughput": 20014.6, "total_tokens": 158568768} +{"current_steps": 50385, "total_steps": 78105, "loss": 0.1325, "lr": 1.6854375677883727e-06, "epoch": 3.2254657192241214, "percentage": 64.51, "elapsed_time": "2:12:03", "remaining_time": "1:12:39", "throughput": 20014.91, "total_tokens": 158585024} +{"current_steps": 50390, "total_steps": 78105, "loss": 0.087, "lr": 1.6849094213809142e-06, "epoch": 3.2257858011650984, "percentage": 64.52, "elapsed_time": "2:12:04", "remaining_time": "1:12:38", "throughput": 20015.19, "total_tokens": 158601024} +{"current_steps": 50395, "total_steps": 78105, "loss": 0.2116, "lr": 1.6843813156748345e-06, "epoch": 3.226105883106075, "percentage": 64.52, "elapsed_time": "2:12:04", "remaining_time": "1:12:37", "throughput": 20015.45, "total_tokens": 158616512} +{"current_steps": 50400, "total_steps": 78105, "loss": 0.2442, "lr": 1.6838532506965028e-06, "epoch": 3.226425965047052, "percentage": 64.53, "elapsed_time": "2:12:05", "remaining_time": "1:12:36", "throughput": 20015.74, "total_tokens": 158632512} +{"current_steps": 50405, "total_steps": 78105, "loss": 0.1336, "lr": 1.6833252264722885e-06, "epoch": 3.226746046988029, "percentage": 64.53, "elapsed_time": "2:12:06", "remaining_time": "1:12:35", "throughput": 20016.02, "total_tokens": 158648192} +{"current_steps": 50410, "total_steps": 78105, "loss": 0.1862, "lr": 1.6827972430285595e-06, "epoch": 3.227066128929006, "percentage": 64.54, "elapsed_time": "2:12:06", "remaining_time": "1:12:34", "throughput": 20016.29, "total_tokens": 158663744} +{"current_steps": 50415, "total_steps": 78105, "loss": 0.1386, "lr": 1.6822693003916784e-06, "epoch": 3.227386210869983, "percentage": 64.55, "elapsed_time": "2:12:07", "remaining_time": "1:12:34", "throughput": 20016.6, "total_tokens": 158679808} +{"current_steps": 50420, "total_steps": 78105, "loss": 0.1487, "lr": 1.6817413985880098e-06, "epoch": 3.2277062928109594, "percentage": 64.55, "elapsed_time": "2:12:11", "remaining_time": "1:12:35", "throughput": 20008.63, "total_tokens": 158695552} +{"current_steps": 50425, "total_steps": 78105, "loss": 0.1717, "lr": 1.6812135376439125e-06, "epoch": 3.2280263747519364, "percentage": 64.56, "elapsed_time": "2:12:12", "remaining_time": "1:12:34", "throughput": 20008.89, "total_tokens": 158711040} +{"current_steps": 50430, "total_steps": 78105, "loss": 0.2013, "lr": 1.680685717585748e-06, "epoch": 3.2283464566929134, "percentage": 64.57, "elapsed_time": "2:12:12", "remaining_time": "1:12:33", "throughput": 20009.11, "total_tokens": 158725888} +{"current_steps": 50435, "total_steps": 78105, "loss": 0.1652, "lr": 1.6801579384398698e-06, "epoch": 3.2286665386338904, "percentage": 64.57, "elapsed_time": "2:12:13", "remaining_time": "1:12:32", "throughput": 20009.41, "total_tokens": 158741760} +{"current_steps": 50440, "total_steps": 78105, "loss": 0.1905, "lr": 1.6796302002326347e-06, "epoch": 3.228986620574867, "percentage": 64.58, "elapsed_time": "2:12:14", "remaining_time": "1:12:31", "throughput": 20009.62, "total_tokens": 158756352} +{"current_steps": 50445, "total_steps": 78105, "loss": 0.1151, "lr": 1.6791025029903932e-06, "epoch": 3.229306702515844, "percentage": 64.59, "elapsed_time": "2:12:14", "remaining_time": "1:12:30", "throughput": 20009.95, "total_tokens": 158773120} +{"current_steps": 50450, "total_steps": 78105, "loss": 0.1392, "lr": 1.6785748467394974e-06, "epoch": 3.229626784456821, "percentage": 64.59, "elapsed_time": "2:12:15", "remaining_time": "1:12:29", "throughput": 20010.24, "total_tokens": 158789312} +{"current_steps": 50455, "total_steps": 78105, "loss": 0.1836, "lr": 1.6780472315062947e-06, "epoch": 3.229946866397798, "percentage": 64.6, "elapsed_time": "2:12:16", "remaining_time": "1:12:29", "throughput": 20010.49, "total_tokens": 158804544} +{"current_steps": 50460, "total_steps": 78105, "loss": 0.2771, "lr": 1.6775196573171315e-06, "epoch": 3.230266948338775, "percentage": 64.61, "elapsed_time": "2:12:16", "remaining_time": "1:12:28", "throughput": 20010.74, "total_tokens": 158819904} +{"current_steps": 50465, "total_steps": 78105, "loss": 0.1154, "lr": 1.676992124198354e-06, "epoch": 3.2305870302797515, "percentage": 64.61, "elapsed_time": "2:12:17", "remaining_time": "1:12:27", "throughput": 20010.94, "total_tokens": 158834112} +{"current_steps": 50470, "total_steps": 78105, "loss": 0.1504, "lr": 1.6764646321763014e-06, "epoch": 3.2309071122207285, "percentage": 64.62, "elapsed_time": "2:12:18", "remaining_time": "1:12:26", "throughput": 20011.22, "total_tokens": 158850112} +{"current_steps": 50475, "total_steps": 78105, "loss": 0.1946, "lr": 1.6759371812773163e-06, "epoch": 3.2312271941617055, "percentage": 64.62, "elapsed_time": "2:12:18", "remaining_time": "1:12:25", "throughput": 20011.51, "total_tokens": 158866176} +{"current_steps": 50480, "total_steps": 78105, "loss": 0.1821, "lr": 1.675409771527735e-06, "epoch": 3.2315472761026824, "percentage": 64.63, "elapsed_time": "2:12:19", "remaining_time": "1:12:24", "throughput": 20011.72, "total_tokens": 158880768} +{"current_steps": 50485, "total_steps": 78105, "loss": 0.242, "lr": 1.6748824029538963e-06, "epoch": 3.231867358043659, "percentage": 64.64, "elapsed_time": "2:12:20", "remaining_time": "1:12:23", "throughput": 20011.98, "total_tokens": 158895872} +{"current_steps": 50490, "total_steps": 78105, "loss": 0.1342, "lr": 1.6743550755821308e-06, "epoch": 3.232187439984636, "percentage": 64.64, "elapsed_time": "2:12:20", "remaining_time": "1:12:23", "throughput": 20012.25, "total_tokens": 158911872} +{"current_steps": 50495, "total_steps": 78105, "loss": 0.1507, "lr": 1.673827789438774e-06, "epoch": 3.232507521925613, "percentage": 64.65, "elapsed_time": "2:12:21", "remaining_time": "1:12:22", "throughput": 20012.58, "total_tokens": 158928768} +{"current_steps": 50500, "total_steps": 78105, "loss": 0.1835, "lr": 1.6733005445501521e-06, "epoch": 3.23282760386659, "percentage": 64.66, "elapsed_time": "2:12:22", "remaining_time": "1:12:21", "throughput": 20012.98, "total_tokens": 158947392} +{"current_steps": 50505, "total_steps": 78105, "loss": 0.1034, "lr": 1.6727733409425962e-06, "epoch": 3.233147685807567, "percentage": 64.66, "elapsed_time": "2:12:22", "remaining_time": "1:12:20", "throughput": 20013.27, "total_tokens": 158963520} +{"current_steps": 50510, "total_steps": 78105, "loss": 0.172, "lr": 1.6722461786424299e-06, "epoch": 3.2334677677485435, "percentage": 64.67, "elapsed_time": "2:12:23", "remaining_time": "1:12:19", "throughput": 20013.56, "total_tokens": 158979392} +{"current_steps": 50515, "total_steps": 78105, "loss": 0.2307, "lr": 1.6717190576759787e-06, "epoch": 3.2337878496895205, "percentage": 64.68, "elapsed_time": "2:12:24", "remaining_time": "1:12:18", "throughput": 20013.75, "total_tokens": 158993728} +{"current_steps": 50520, "total_steps": 78105, "loss": 0.1392, "lr": 1.6711919780695623e-06, "epoch": 3.2341079316304975, "percentage": 64.68, "elapsed_time": "2:12:24", "remaining_time": "1:12:18", "throughput": 20013.99, "total_tokens": 159008768} +{"current_steps": 50525, "total_steps": 78105, "loss": 0.1858, "lr": 1.6706649398495018e-06, "epoch": 3.2344280135714745, "percentage": 64.69, "elapsed_time": "2:12:25", "remaining_time": "1:12:17", "throughput": 20014.31, "total_tokens": 159025216} +{"current_steps": 50530, "total_steps": 78105, "loss": 0.1169, "lr": 1.6701379430421153e-06, "epoch": 3.234748095512451, "percentage": 64.69, "elapsed_time": "2:12:26", "remaining_time": "1:12:16", "throughput": 20014.55, "total_tokens": 159040064} +{"current_steps": 50535, "total_steps": 78105, "loss": 0.1845, "lr": 1.6696109876737163e-06, "epoch": 3.235068177453428, "percentage": 64.7, "elapsed_time": "2:12:26", "remaining_time": "1:12:15", "throughput": 20014.79, "total_tokens": 159055616} +{"current_steps": 50540, "total_steps": 78105, "loss": 0.176, "lr": 1.6690840737706195e-06, "epoch": 3.235388259394405, "percentage": 64.71, "elapsed_time": "2:12:27", "remaining_time": "1:12:14", "throughput": 20015.07, "total_tokens": 159071296} +{"current_steps": 50545, "total_steps": 78105, "loss": 0.2208, "lr": 1.6685572013591355e-06, "epoch": 3.235708341335382, "percentage": 64.71, "elapsed_time": "2:12:28", "remaining_time": "1:12:13", "throughput": 20015.29, "total_tokens": 159086016} +{"current_steps": 50550, "total_steps": 78105, "loss": 0.144, "lr": 1.6680303704655753e-06, "epoch": 3.236028423276359, "percentage": 64.72, "elapsed_time": "2:12:28", "remaining_time": "1:12:12", "throughput": 20015.54, "total_tokens": 159100864} +{"current_steps": 50555, "total_steps": 78105, "loss": 0.1831, "lr": 1.6675035811162437e-06, "epoch": 3.2363485052173355, "percentage": 64.73, "elapsed_time": "2:12:29", "remaining_time": "1:12:12", "throughput": 20015.84, "total_tokens": 159117056} +{"current_steps": 50560, "total_steps": 78105, "loss": 0.1634, "lr": 1.6669768333374476e-06, "epoch": 3.2366685871583125, "percentage": 64.73, "elapsed_time": "2:12:30", "remaining_time": "1:12:11", "throughput": 20016.14, "total_tokens": 159133440} +{"current_steps": 50565, "total_steps": 78105, "loss": 0.1818, "lr": 1.6664501271554889e-06, "epoch": 3.2369886690992895, "percentage": 64.74, "elapsed_time": "2:12:30", "remaining_time": "1:12:10", "throughput": 20016.43, "total_tokens": 159149568} +{"current_steps": 50570, "total_steps": 78105, "loss": 0.1208, "lr": 1.6659234625966697e-06, "epoch": 3.2373087510402665, "percentage": 64.75, "elapsed_time": "2:12:31", "remaining_time": "1:12:09", "throughput": 20016.68, "total_tokens": 159164608} +{"current_steps": 50575, "total_steps": 78105, "loss": 0.0906, "lr": 1.6653968396872877e-06, "epoch": 3.237628832981243, "percentage": 64.75, "elapsed_time": "2:12:32", "remaining_time": "1:12:08", "throughput": 20016.89, "total_tokens": 159179136} +{"current_steps": 50580, "total_steps": 78105, "loss": 0.2349, "lr": 1.6648702584536398e-06, "epoch": 3.23794891492222, "percentage": 64.76, "elapsed_time": "2:12:32", "remaining_time": "1:12:07", "throughput": 20017.2, "total_tokens": 159195456} +{"current_steps": 50585, "total_steps": 78105, "loss": 0.1733, "lr": 1.6643437189220223e-06, "epoch": 3.238268996863197, "percentage": 64.77, "elapsed_time": "2:12:33", "remaining_time": "1:12:07", "throughput": 20017.51, "total_tokens": 159211840} +{"current_steps": 50590, "total_steps": 78105, "loss": 0.1659, "lr": 1.6638172211187258e-06, "epoch": 3.238589078804174, "percentage": 64.77, "elapsed_time": "2:12:34", "remaining_time": "1:12:06", "throughput": 20017.87, "total_tokens": 159229888} +{"current_steps": 50595, "total_steps": 78105, "loss": 0.1958, "lr": 1.6632907650700419e-06, "epoch": 3.2389091607451506, "percentage": 64.78, "elapsed_time": "2:12:35", "remaining_time": "1:12:05", "throughput": 20018.14, "total_tokens": 159245376} +{"current_steps": 50600, "total_steps": 78105, "loss": 0.1657, "lr": 1.662764350802259e-06, "epoch": 3.2392292426861276, "percentage": 64.78, "elapsed_time": "2:12:35", "remaining_time": "1:12:04", "throughput": 20018.4, "total_tokens": 159260608} +{"current_steps": 50605, "total_steps": 78105, "loss": 0.1726, "lr": 1.6622379783416641e-06, "epoch": 3.2395493246271045, "percentage": 64.79, "elapsed_time": "2:12:36", "remaining_time": "1:12:03", "throughput": 20018.67, "total_tokens": 159276096} +{"current_steps": 50610, "total_steps": 78105, "loss": 0.15, "lr": 1.6617116477145397e-06, "epoch": 3.2398694065680815, "percentage": 64.8, "elapsed_time": "2:12:37", "remaining_time": "1:12:02", "throughput": 20019.01, "total_tokens": 159292992} +{"current_steps": 50615, "total_steps": 78105, "loss": 0.1745, "lr": 1.66118535894717e-06, "epoch": 3.2401894885090585, "percentage": 64.8, "elapsed_time": "2:12:37", "remaining_time": "1:12:02", "throughput": 20019.23, "total_tokens": 159307776} +{"current_steps": 50620, "total_steps": 78105, "loss": 0.1346, "lr": 1.6606591120658333e-06, "epoch": 3.240509570450035, "percentage": 64.81, "elapsed_time": "2:12:38", "remaining_time": "1:12:01", "throughput": 20019.55, "total_tokens": 159324160} +{"current_steps": 50625, "total_steps": 78105, "loss": 0.1276, "lr": 1.66013290709681e-06, "epoch": 3.240829652391012, "percentage": 64.82, "elapsed_time": "2:12:39", "remaining_time": "1:12:00", "throughput": 20019.78, "total_tokens": 159339392} +{"current_steps": 50630, "total_steps": 78105, "loss": 0.1764, "lr": 1.6596067440663731e-06, "epoch": 3.241149734331989, "percentage": 64.82, "elapsed_time": "2:12:39", "remaining_time": "1:11:59", "throughput": 20020.1, "total_tokens": 159356096} +{"current_steps": 50635, "total_steps": 78105, "loss": 0.1711, "lr": 1.6590806230007986e-06, "epoch": 3.241469816272966, "percentage": 64.83, "elapsed_time": "2:12:40", "remaining_time": "1:11:58", "throughput": 20020.34, "total_tokens": 159371008} +{"current_steps": 50640, "total_steps": 78105, "loss": 0.1747, "lr": 1.6585545439263585e-06, "epoch": 3.2417898982139426, "percentage": 64.84, "elapsed_time": "2:12:41", "remaining_time": "1:11:57", "throughput": 20020.58, "total_tokens": 159385792} +{"current_steps": 50645, "total_steps": 78105, "loss": 0.1891, "lr": 1.6580285068693202e-06, "epoch": 3.2421099801549196, "percentage": 64.84, "elapsed_time": "2:12:41", "remaining_time": "1:11:56", "throughput": 20020.86, "total_tokens": 159401600} +{"current_steps": 50650, "total_steps": 78105, "loss": 0.181, "lr": 1.6575025118559532e-06, "epoch": 3.2424300620958966, "percentage": 64.85, "elapsed_time": "2:12:42", "remaining_time": "1:11:56", "throughput": 20021.14, "total_tokens": 159417344} +{"current_steps": 50655, "total_steps": 78105, "loss": 0.1942, "lr": 1.656976558912522e-06, "epoch": 3.2427501440368736, "percentage": 64.86, "elapsed_time": "2:12:43", "remaining_time": "1:11:55", "throughput": 20021.45, "total_tokens": 159433792} +{"current_steps": 50660, "total_steps": 78105, "loss": 0.1781, "lr": 1.6564506480652908e-06, "epoch": 3.24307022597785, "percentage": 64.86, "elapsed_time": "2:12:43", "remaining_time": "1:11:54", "throughput": 20021.73, "total_tokens": 159449728} +{"current_steps": 50665, "total_steps": 78105, "loss": 0.1456, "lr": 1.6559247793405198e-06, "epoch": 3.243390307918827, "percentage": 64.87, "elapsed_time": "2:12:44", "remaining_time": "1:11:53", "throughput": 20021.99, "total_tokens": 159464704} +{"current_steps": 50670, "total_steps": 78105, "loss": 0.113, "lr": 1.6553989527644698e-06, "epoch": 3.243710389859804, "percentage": 64.87, "elapsed_time": "2:12:45", "remaining_time": "1:11:52", "throughput": 20022.23, "total_tokens": 159479936} +{"current_steps": 50675, "total_steps": 78105, "loss": 0.1559, "lr": 1.6548731683633957e-06, "epoch": 3.244030471800781, "percentage": 64.88, "elapsed_time": "2:12:45", "remaining_time": "1:11:51", "throughput": 20022.47, "total_tokens": 159495360} +{"current_steps": 50680, "total_steps": 78105, "loss": 0.1396, "lr": 1.6543474261635535e-06, "epoch": 3.244350553741758, "percentage": 64.89, "elapsed_time": "2:12:46", "remaining_time": "1:11:50", "throughput": 20022.7, "total_tokens": 159509952} +{"current_steps": 50685, "total_steps": 78105, "loss": 0.1429, "lr": 1.6538217261911959e-06, "epoch": 3.2446706356827346, "percentage": 64.89, "elapsed_time": "2:12:47", "remaining_time": "1:11:50", "throughput": 20023.01, "total_tokens": 159526528} +{"current_steps": 50690, "total_steps": 78105, "loss": 0.1186, "lr": 1.6532960684725745e-06, "epoch": 3.2449907176237116, "percentage": 64.9, "elapsed_time": "2:12:47", "remaining_time": "1:11:49", "throughput": 20023.23, "total_tokens": 159541760} +{"current_steps": 50695, "total_steps": 78105, "loss": 0.1645, "lr": 1.6527704530339361e-06, "epoch": 3.2453107995646886, "percentage": 64.91, "elapsed_time": "2:12:48", "remaining_time": "1:11:48", "throughput": 20023.53, "total_tokens": 159558016} +{"current_steps": 50700, "total_steps": 78105, "loss": 0.1832, "lr": 1.6522448799015284e-06, "epoch": 3.2456308815056656, "percentage": 64.91, "elapsed_time": "2:12:49", "remaining_time": "1:11:47", "throughput": 20023.92, "total_tokens": 159576640} +{"current_steps": 50705, "total_steps": 78105, "loss": 0.1161, "lr": 1.6517193491015963e-06, "epoch": 3.245950963446642, "percentage": 64.92, "elapsed_time": "2:12:49", "remaining_time": "1:11:46", "throughput": 20024.18, "total_tokens": 159592256} +{"current_steps": 50710, "total_steps": 78105, "loss": 0.0625, "lr": 1.6511938606603805e-06, "epoch": 3.246271045387619, "percentage": 64.93, "elapsed_time": "2:12:50", "remaining_time": "1:11:45", "throughput": 20024.43, "total_tokens": 159607616} +{"current_steps": 50715, "total_steps": 78105, "loss": 0.1533, "lr": 1.6506684146041233e-06, "epoch": 3.246591127328596, "percentage": 64.93, "elapsed_time": "2:12:51", "remaining_time": "1:11:45", "throughput": 20024.73, "total_tokens": 159623744} +{"current_steps": 50720, "total_steps": 78105, "loss": 0.1751, "lr": 1.6501430109590603e-06, "epoch": 3.246911209269573, "percentage": 64.94, "elapsed_time": "2:12:51", "remaining_time": "1:11:44", "throughput": 20024.99, "total_tokens": 159639104} +{"current_steps": 50725, "total_steps": 78105, "loss": 0.1446, "lr": 1.6496176497514294e-06, "epoch": 3.24723129121055, "percentage": 64.94, "elapsed_time": "2:12:52", "remaining_time": "1:11:43", "throughput": 20025.28, "total_tokens": 159655168} +{"current_steps": 50730, "total_steps": 78105, "loss": 0.1741, "lr": 1.649092331007463e-06, "epoch": 3.2475513731515266, "percentage": 64.95, "elapsed_time": "2:12:53", "remaining_time": "1:11:42", "throughput": 20025.6, "total_tokens": 159671872} +{"current_steps": 50735, "total_steps": 78105, "loss": 0.1633, "lr": 1.6485670547533936e-06, "epoch": 3.2478714550925036, "percentage": 64.96, "elapsed_time": "2:12:54", "remaining_time": "1:11:41", "throughput": 20025.79, "total_tokens": 159686272} +{"current_steps": 50740, "total_steps": 78105, "loss": 0.2598, "lr": 1.6480418210154502e-06, "epoch": 3.2481915370334806, "percentage": 64.96, "elapsed_time": "2:12:54", "remaining_time": "1:11:40", "throughput": 20026.22, "total_tokens": 159705344} +{"current_steps": 50745, "total_steps": 78105, "loss": 0.189, "lr": 1.6475166298198614e-06, "epoch": 3.2485116189744576, "percentage": 64.97, "elapsed_time": "2:12:55", "remaining_time": "1:11:40", "throughput": 20026.46, "total_tokens": 159720576} +{"current_steps": 50750, "total_steps": 78105, "loss": 0.0661, "lr": 1.6469914811928505e-06, "epoch": 3.248831700915434, "percentage": 64.98, "elapsed_time": "2:12:56", "remaining_time": "1:11:39", "throughput": 20026.75, "total_tokens": 159736896} +{"current_steps": 50755, "total_steps": 78105, "loss": 0.2317, "lr": 1.6464663751606425e-06, "epoch": 3.249151782856411, "percentage": 64.98, "elapsed_time": "2:12:56", "remaining_time": "1:11:38", "throughput": 20027.01, "total_tokens": 159752320} +{"current_steps": 50760, "total_steps": 78105, "loss": 0.1442, "lr": 1.6459413117494582e-06, "epoch": 3.249471864797388, "percentage": 64.99, "elapsed_time": "2:12:57", "remaining_time": "1:11:37", "throughput": 20027.26, "total_tokens": 159767872} +{"current_steps": 50765, "total_steps": 78105, "loss": 0.1711, "lr": 1.6454162909855156e-06, "epoch": 3.249791946738365, "percentage": 65.0, "elapsed_time": "2:12:58", "remaining_time": "1:11:36", "throughput": 20027.5, "total_tokens": 159782784} +{"current_steps": 50770, "total_steps": 78105, "loss": 0.1899, "lr": 1.6448913128950321e-06, "epoch": 3.250112028679342, "percentage": 65.0, "elapsed_time": "2:12:58", "remaining_time": "1:11:35", "throughput": 20027.73, "total_tokens": 159798016} +{"current_steps": 50775, "total_steps": 78105, "loss": 0.1005, "lr": 1.6443663775042224e-06, "epoch": 3.2504321106203187, "percentage": 65.01, "elapsed_time": "2:12:59", "remaining_time": "1:11:35", "throughput": 20028.07, "total_tokens": 159814976} +{"current_steps": 50778, "total_steps": 78105, "eval_loss": 0.5568700432777405, "epoch": 3.250624159784905, "percentage": 65.01, "elapsed_time": "2:13:51", "remaining_time": "1:12:02", "throughput": 19900.63, "total_tokens": 159826368} +{"current_steps": 50780, "total_steps": 78105, "loss": 0.2279, "lr": 1.6438414848392997e-06, "epoch": 3.2507521925612957, "percentage": 65.02, "elapsed_time": "2:14:24", "remaining_time": "1:12:19", "throughput": 19818.15, "total_tokens": 159832960} +{"current_steps": 50785, "total_steps": 78105, "loss": 0.1268, "lr": 1.6433166349264728e-06, "epoch": 3.2510722745022727, "percentage": 65.02, "elapsed_time": "2:14:25", "remaining_time": "1:12:18", "throughput": 19818.42, "total_tokens": 159848704} +{"current_steps": 50790, "total_steps": 78105, "loss": 0.1757, "lr": 1.642791827791951e-06, "epoch": 3.2513923564432496, "percentage": 65.03, "elapsed_time": "2:14:26", "remaining_time": "1:12:18", "throughput": 19818.73, "total_tokens": 159864832} +{"current_steps": 50795, "total_steps": 78105, "loss": 0.1713, "lr": 1.6422670634619404e-06, "epoch": 3.251712438384226, "percentage": 65.03, "elapsed_time": "2:14:27", "remaining_time": "1:12:17", "throughput": 19819.0, "total_tokens": 159880256} +{"current_steps": 50800, "total_steps": 78105, "loss": 0.263, "lr": 1.6417423419626452e-06, "epoch": 3.252032520325203, "percentage": 65.04, "elapsed_time": "2:14:27", "remaining_time": "1:12:16", "throughput": 19819.32, "total_tokens": 159896640} +{"current_steps": 50805, "total_steps": 78105, "loss": 0.1778, "lr": 1.6412176633202664e-06, "epoch": 3.25235260226618, "percentage": 65.05, "elapsed_time": "2:14:28", "remaining_time": "1:12:15", "throughput": 19819.57, "total_tokens": 159911616} +{"current_steps": 50810, "total_steps": 78105, "loss": 0.1722, "lr": 1.640693027561004e-06, "epoch": 3.252672684207157, "percentage": 65.05, "elapsed_time": "2:14:29", "remaining_time": "1:12:14", "throughput": 19819.86, "total_tokens": 159927680} +{"current_steps": 50815, "total_steps": 78105, "loss": 0.1964, "lr": 1.6401684347110569e-06, "epoch": 3.252992766148134, "percentage": 65.06, "elapsed_time": "2:14:29", "remaining_time": "1:12:13", "throughput": 19820.14, "total_tokens": 159943424} +{"current_steps": 50820, "total_steps": 78105, "loss": 0.2241, "lr": 1.6396438847966185e-06, "epoch": 3.2533128480891107, "percentage": 65.07, "elapsed_time": "2:14:30", "remaining_time": "1:12:12", "throughput": 19820.54, "total_tokens": 159961152} +{"current_steps": 50825, "total_steps": 78105, "loss": 0.1458, "lr": 1.6391193778438835e-06, "epoch": 3.2536329300300877, "percentage": 65.07, "elapsed_time": "2:14:31", "remaining_time": "1:12:12", "throughput": 19820.81, "total_tokens": 159976768} +{"current_steps": 50830, "total_steps": 78105, "loss": 0.1221, "lr": 1.6385949138790425e-06, "epoch": 3.2539530119710647, "percentage": 65.08, "elapsed_time": "2:14:31", "remaining_time": "1:12:11", "throughput": 19821.12, "total_tokens": 159992896} +{"current_steps": 50835, "total_steps": 78105, "loss": 0.198, "lr": 1.6380704929282852e-06, "epoch": 3.2542730939120417, "percentage": 65.09, "elapsed_time": "2:14:32", "remaining_time": "1:12:10", "throughput": 19821.48, "total_tokens": 160009792} +{"current_steps": 50840, "total_steps": 78105, "loss": 0.165, "lr": 1.6375461150177969e-06, "epoch": 3.254593175853018, "percentage": 65.09, "elapsed_time": "2:14:33", "remaining_time": "1:12:09", "throughput": 19821.77, "total_tokens": 160025856} +{"current_steps": 50845, "total_steps": 78105, "loss": 0.2053, "lr": 1.6370217801737637e-06, "epoch": 3.254913257793995, "percentage": 65.1, "elapsed_time": "2:14:33", "remaining_time": "1:12:08", "throughput": 19822.0, "total_tokens": 160040768} +{"current_steps": 50850, "total_steps": 78105, "loss": 0.176, "lr": 1.6364974884223672e-06, "epoch": 3.255233339734972, "percentage": 65.1, "elapsed_time": "2:14:34", "remaining_time": "1:12:07", "throughput": 19822.26, "total_tokens": 160056000} +{"current_steps": 50855, "total_steps": 78105, "loss": 0.1947, "lr": 1.6359732397897895e-06, "epoch": 3.255553421675949, "percentage": 65.11, "elapsed_time": "2:14:35", "remaining_time": "1:12:07", "throughput": 19822.63, "total_tokens": 160073600} +{"current_steps": 50860, "total_steps": 78105, "loss": 0.1147, "lr": 1.6354490343022068e-06, "epoch": 3.255873503616926, "percentage": 65.12, "elapsed_time": "2:14:35", "remaining_time": "1:12:06", "throughput": 19822.87, "total_tokens": 160088320} +{"current_steps": 50865, "total_steps": 78105, "loss": 0.1613, "lr": 1.6349248719857966e-06, "epoch": 3.2561935855579027, "percentage": 65.12, "elapsed_time": "2:14:36", "remaining_time": "1:12:05", "throughput": 19823.12, "total_tokens": 160103296} +{"current_steps": 50870, "total_steps": 78105, "loss": 0.2662, "lr": 1.6344007528667317e-06, "epoch": 3.2565136674988797, "percentage": 65.13, "elapsed_time": "2:14:37", "remaining_time": "1:12:04", "throughput": 19823.36, "total_tokens": 160118336} +{"current_steps": 50875, "total_steps": 78105, "loss": 0.1258, "lr": 1.6338766769711839e-06, "epoch": 3.2568337494398567, "percentage": 65.14, "elapsed_time": "2:14:37", "remaining_time": "1:12:03", "throughput": 19823.64, "total_tokens": 160134080} +{"current_steps": 50880, "total_steps": 78105, "loss": 0.1309, "lr": 1.6333526443253245e-06, "epoch": 3.2571538313808333, "percentage": 65.14, "elapsed_time": "2:14:38", "remaining_time": "1:12:02", "throughput": 19823.89, "total_tokens": 160148992} +{"current_steps": 50885, "total_steps": 78105, "loss": 0.1755, "lr": 1.6328286549553192e-06, "epoch": 3.2574739133218102, "percentage": 65.15, "elapsed_time": "2:14:39", "remaining_time": "1:12:01", "throughput": 19824.16, "total_tokens": 160164544} +{"current_steps": 50890, "total_steps": 78105, "loss": 0.143, "lr": 1.6323047088873345e-06, "epoch": 3.2577939952627872, "percentage": 65.16, "elapsed_time": "2:14:39", "remaining_time": "1:12:00", "throughput": 19824.39, "total_tokens": 160179328} +{"current_steps": 50895, "total_steps": 78105, "loss": 0.1076, "lr": 1.6317808061475324e-06, "epoch": 3.2581140772037642, "percentage": 65.16, "elapsed_time": "2:14:40", "remaining_time": "1:12:00", "throughput": 19824.63, "total_tokens": 160194176} +{"current_steps": 50900, "total_steps": 78105, "loss": 0.3658, "lr": 1.6312569467620754e-06, "epoch": 3.258434159144741, "percentage": 65.17, "elapsed_time": "2:14:41", "remaining_time": "1:11:59", "throughput": 19824.88, "total_tokens": 160209472} +{"current_steps": 50905, "total_steps": 78105, "loss": 0.1431, "lr": 1.63073313075712e-06, "epoch": 3.2587542410857178, "percentage": 65.18, "elapsed_time": "2:14:41", "remaining_time": "1:11:58", "throughput": 19825.21, "total_tokens": 160225920} +{"current_steps": 50910, "total_steps": 78105, "loss": 0.1891, "lr": 1.6302093581588252e-06, "epoch": 3.2590743230266948, "percentage": 65.18, "elapsed_time": "2:14:42", "remaining_time": "1:11:57", "throughput": 19825.48, "total_tokens": 160241088} +{"current_steps": 50915, "total_steps": 78105, "loss": 0.1269, "lr": 1.629685628993344e-06, "epoch": 3.2593944049676717, "percentage": 65.19, "elapsed_time": "2:14:43", "remaining_time": "1:11:56", "throughput": 19825.81, "total_tokens": 160257984} +{"current_steps": 50920, "total_steps": 78105, "loss": 0.1527, "lr": 1.62916194328683e-06, "epoch": 3.2597144869086487, "percentage": 65.19, "elapsed_time": "2:14:43", "remaining_time": "1:11:55", "throughput": 19826.09, "total_tokens": 160273792} +{"current_steps": 50925, "total_steps": 78105, "loss": 0.1758, "lr": 1.6286383010654313e-06, "epoch": 3.2600345688496253, "percentage": 65.2, "elapsed_time": "2:14:44", "remaining_time": "1:11:55", "throughput": 19826.39, "total_tokens": 160290176} +{"current_steps": 50930, "total_steps": 78105, "loss": 0.2347, "lr": 1.6281147023552974e-06, "epoch": 3.2603546507906023, "percentage": 65.21, "elapsed_time": "2:14:45", "remaining_time": "1:11:54", "throughput": 19826.78, "total_tokens": 160308096} +{"current_steps": 50935, "total_steps": 78105, "loss": 0.2139, "lr": 1.627591147182574e-06, "epoch": 3.2606747327315793, "percentage": 65.21, "elapsed_time": "2:14:46", "remaining_time": "1:11:53", "throughput": 19827.14, "total_tokens": 160325504} +{"current_steps": 50940, "total_steps": 78105, "loss": 0.1226, "lr": 1.6270676355734043e-06, "epoch": 3.2609948146725563, "percentage": 65.22, "elapsed_time": "2:14:46", "remaining_time": "1:11:52", "throughput": 19827.46, "total_tokens": 160342208} +{"current_steps": 50945, "total_steps": 78105, "loss": 0.1524, "lr": 1.6265441675539306e-06, "epoch": 3.2613148966135332, "percentage": 65.23, "elapsed_time": "2:14:47", "remaining_time": "1:11:51", "throughput": 19827.71, "total_tokens": 160357184} +{"current_steps": 50950, "total_steps": 78105, "loss": 0.1378, "lr": 1.626020743150291e-06, "epoch": 3.26163497855451, "percentage": 65.23, "elapsed_time": "2:14:48", "remaining_time": "1:11:50", "throughput": 19827.95, "total_tokens": 160372352} +{"current_steps": 50955, "total_steps": 78105, "loss": 0.1331, "lr": 1.625497362388624e-06, "epoch": 3.261955060495487, "percentage": 65.24, "elapsed_time": "2:14:49", "remaining_time": "1:11:50", "throughput": 19828.45, "total_tokens": 160392448} +{"current_steps": 50960, "total_steps": 78105, "loss": 0.143, "lr": 1.6249740252950618e-06, "epoch": 3.2622751424364638, "percentage": 65.25, "elapsed_time": "2:14:49", "remaining_time": "1:11:49", "throughput": 19828.69, "total_tokens": 160407616} +{"current_steps": 50965, "total_steps": 78105, "loss": 0.1814, "lr": 1.6244507318957403e-06, "epoch": 3.2625952243774408, "percentage": 65.25, "elapsed_time": "2:14:50", "remaining_time": "1:11:48", "throughput": 19828.96, "total_tokens": 160423552} +{"current_steps": 50970, "total_steps": 78105, "loss": 0.1984, "lr": 1.6239274822167877e-06, "epoch": 3.2629153063184173, "percentage": 65.26, "elapsed_time": "2:14:51", "remaining_time": "1:11:47", "throughput": 19829.26, "total_tokens": 160440064} +{"current_steps": 50975, "total_steps": 78105, "loss": 0.1385, "lr": 1.623404276284335e-06, "epoch": 3.2632353882593943, "percentage": 65.26, "elapsed_time": "2:14:51", "remaining_time": "1:11:46", "throughput": 19829.53, "total_tokens": 160455360} +{"current_steps": 50980, "total_steps": 78105, "loss": 0.1309, "lr": 1.6228811141245052e-06, "epoch": 3.2635554702003713, "percentage": 65.27, "elapsed_time": "2:14:52", "remaining_time": "1:11:45", "throughput": 19829.82, "total_tokens": 160471232} +{"current_steps": 50985, "total_steps": 78105, "loss": 0.1685, "lr": 1.622357995763424e-06, "epoch": 3.2638755521413483, "percentage": 65.28, "elapsed_time": "2:14:53", "remaining_time": "1:11:44", "throughput": 19830.09, "total_tokens": 160486976} +{"current_steps": 50990, "total_steps": 78105, "loss": 0.2235, "lr": 1.6218349212272139e-06, "epoch": 3.2641956340823253, "percentage": 65.28, "elapsed_time": "2:14:53", "remaining_time": "1:11:44", "throughput": 19830.45, "total_tokens": 160504256} +{"current_steps": 50995, "total_steps": 78105, "loss": 0.1593, "lr": 1.6213118905419924e-06, "epoch": 3.264515716023302, "percentage": 65.29, "elapsed_time": "2:14:54", "remaining_time": "1:11:43", "throughput": 19830.82, "total_tokens": 160521664} +{"current_steps": 51000, "total_steps": 78105, "loss": 0.1769, "lr": 1.6207889037338792e-06, "epoch": 3.264835797964279, "percentage": 65.3, "elapsed_time": "2:14:55", "remaining_time": "1:11:42", "throughput": 19831.07, "total_tokens": 160537024} +{"current_steps": 51005, "total_steps": 78105, "loss": 0.1815, "lr": 1.6202659608289875e-06, "epoch": 3.265155879905256, "percentage": 65.3, "elapsed_time": "2:14:55", "remaining_time": "1:11:41", "throughput": 19831.3, "total_tokens": 160552192} +{"current_steps": 51010, "total_steps": 78105, "loss": 0.1378, "lr": 1.6197430618534327e-06, "epoch": 3.265475961846233, "percentage": 65.31, "elapsed_time": "2:14:56", "remaining_time": "1:11:40", "throughput": 19831.55, "total_tokens": 160567616} +{"current_steps": 51015, "total_steps": 78105, "loss": 0.1762, "lr": 1.619220206833323e-06, "epoch": 3.2657960437872093, "percentage": 65.32, "elapsed_time": "2:14:57", "remaining_time": "1:11:39", "throughput": 19831.86, "total_tokens": 160584128} +{"current_steps": 51020, "total_steps": 78105, "loss": 0.1345, "lr": 1.6186973957947694e-06, "epoch": 3.2661161257281863, "percentage": 65.32, "elapsed_time": "2:14:57", "remaining_time": "1:11:38", "throughput": 19832.16, "total_tokens": 160600256} +{"current_steps": 51025, "total_steps": 78105, "loss": 0.1763, "lr": 1.618174628763876e-06, "epoch": 3.2664362076691633, "percentage": 65.33, "elapsed_time": "2:14:58", "remaining_time": "1:11:38", "throughput": 19832.43, "total_tokens": 160616000} +{"current_steps": 51030, "total_steps": 78105, "loss": 0.1702, "lr": 1.61765190576675e-06, "epoch": 3.2667562896101403, "percentage": 65.34, "elapsed_time": "2:14:59", "remaining_time": "1:11:37", "throughput": 19832.77, "total_tokens": 160633024} +{"current_steps": 51035, "total_steps": 78105, "loss": 0.2605, "lr": 1.6171292268294908e-06, "epoch": 3.2670763715511173, "percentage": 65.34, "elapsed_time": "2:15:00", "remaining_time": "1:11:36", "throughput": 19833.01, "total_tokens": 160648128} +{"current_steps": 51040, "total_steps": 78105, "loss": 0.1063, "lr": 1.6166065919782004e-06, "epoch": 3.267396453492094, "percentage": 65.35, "elapsed_time": "2:15:00", "remaining_time": "1:11:35", "throughput": 19833.25, "total_tokens": 160663360} +{"current_steps": 51045, "total_steps": 78105, "loss": 0.1636, "lr": 1.616084001238974e-06, "epoch": 3.267716535433071, "percentage": 65.35, "elapsed_time": "2:15:01", "remaining_time": "1:11:34", "throughput": 19833.51, "total_tokens": 160678848} +{"current_steps": 51050, "total_steps": 78105, "loss": 0.2143, "lr": 1.6155614546379083e-06, "epoch": 3.268036617374048, "percentage": 65.36, "elapsed_time": "2:15:02", "remaining_time": "1:11:33", "throughput": 19833.78, "total_tokens": 160694400} +{"current_steps": 51055, "total_steps": 78105, "loss": 0.1625, "lr": 1.6150389522010975e-06, "epoch": 3.268356699315025, "percentage": 65.37, "elapsed_time": "2:15:02", "remaining_time": "1:11:33", "throughput": 19834.09, "total_tokens": 160710912} +{"current_steps": 51060, "total_steps": 78105, "loss": 0.1617, "lr": 1.6145164939546315e-06, "epoch": 3.2686767812560014, "percentage": 65.37, "elapsed_time": "2:15:03", "remaining_time": "1:11:32", "throughput": 19834.34, "total_tokens": 160726016} +{"current_steps": 51065, "total_steps": 78105, "loss": 0.176, "lr": 1.6139940799246001e-06, "epoch": 3.2689968631969784, "percentage": 65.38, "elapsed_time": "2:15:04", "remaining_time": "1:11:31", "throughput": 19834.61, "total_tokens": 160741824} +{"current_steps": 51070, "total_steps": 78105, "loss": 0.1164, "lr": 1.6134717101370886e-06, "epoch": 3.2693169451379553, "percentage": 65.39, "elapsed_time": "2:15:04", "remaining_time": "1:11:30", "throughput": 19834.85, "total_tokens": 160757120} +{"current_steps": 51075, "total_steps": 78105, "loss": 0.1337, "lr": 1.6129493846181827e-06, "epoch": 3.2696370270789323, "percentage": 65.39, "elapsed_time": "2:15:05", "remaining_time": "1:11:29", "throughput": 19835.1, "total_tokens": 160772800} +{"current_steps": 51080, "total_steps": 78105, "loss": 0.1944, "lr": 1.612427103393963e-06, "epoch": 3.2699571090199093, "percentage": 65.4, "elapsed_time": "2:15:06", "remaining_time": "1:11:28", "throughput": 19835.33, "total_tokens": 160787584} +{"current_steps": 51085, "total_steps": 78105, "loss": 0.1293, "lr": 1.6119048664905122e-06, "epoch": 3.270277190960886, "percentage": 65.41, "elapsed_time": "2:15:06", "remaining_time": "1:11:27", "throughput": 19835.65, "total_tokens": 160804224} +{"current_steps": 51090, "total_steps": 78105, "loss": 0.1281, "lr": 1.611382673933905e-06, "epoch": 3.270597272901863, "percentage": 65.41, "elapsed_time": "2:15:07", "remaining_time": "1:11:27", "throughput": 19835.94, "total_tokens": 160820160} +{"current_steps": 51095, "total_steps": 78105, "loss": 0.1661, "lr": 1.6108605257502196e-06, "epoch": 3.27091735484284, "percentage": 65.42, "elapsed_time": "2:15:08", "remaining_time": "1:11:26", "throughput": 19836.17, "total_tokens": 160835008} +{"current_steps": 51100, "total_steps": 78105, "loss": 0.0908, "lr": 1.6103384219655271e-06, "epoch": 3.271237436783817, "percentage": 65.42, "elapsed_time": "2:15:08", "remaining_time": "1:11:25", "throughput": 19836.45, "total_tokens": 160850944} +{"current_steps": 51105, "total_steps": 78105, "loss": 0.11, "lr": 1.6098163626059e-06, "epoch": 3.2715575187247934, "percentage": 65.43, "elapsed_time": "2:15:09", "remaining_time": "1:11:24", "throughput": 19836.78, "total_tokens": 160867328} +{"current_steps": 51110, "total_steps": 78105, "loss": 0.1708, "lr": 1.609294347697407e-06, "epoch": 3.2718776006657704, "percentage": 65.44, "elapsed_time": "2:15:10", "remaining_time": "1:11:23", "throughput": 19837.01, "total_tokens": 160881984} +{"current_steps": 51115, "total_steps": 78105, "loss": 0.2074, "lr": 1.608772377266115e-06, "epoch": 3.2721976826067474, "percentage": 65.44, "elapsed_time": "2:15:10", "remaining_time": "1:11:22", "throughput": 19837.24, "total_tokens": 160896768} +{"current_steps": 51120, "total_steps": 78105, "loss": 0.1457, "lr": 1.6082504513380886e-06, "epoch": 3.2725177645477244, "percentage": 65.45, "elapsed_time": "2:15:11", "remaining_time": "1:11:21", "throughput": 19837.56, "total_tokens": 160912960} +{"current_steps": 51125, "total_steps": 78105, "loss": 0.1962, "lr": 1.607728569939389e-06, "epoch": 3.2728378464887014, "percentage": 65.46, "elapsed_time": "2:15:12", "remaining_time": "1:11:21", "throughput": 19837.96, "total_tokens": 160930944} +{"current_steps": 51130, "total_steps": 78105, "loss": 0.154, "lr": 1.6072067330960777e-06, "epoch": 3.273157928429678, "percentage": 65.46, "elapsed_time": "2:15:12", "remaining_time": "1:11:20", "throughput": 19838.22, "total_tokens": 160945920} +{"current_steps": 51135, "total_steps": 78105, "loss": 0.1215, "lr": 1.6066849408342112e-06, "epoch": 3.273478010370655, "percentage": 65.47, "elapsed_time": "2:15:13", "remaining_time": "1:11:19", "throughput": 19838.44, "total_tokens": 160960576} +{"current_steps": 51140, "total_steps": 78105, "loss": 0.1596, "lr": 1.6061631931798454e-06, "epoch": 3.273798092311632, "percentage": 65.48, "elapsed_time": "2:15:14", "remaining_time": "1:11:18", "throughput": 19838.73, "total_tokens": 160976576} +{"current_steps": 51145, "total_steps": 78105, "loss": 0.2135, "lr": 1.605641490159034e-06, "epoch": 3.2741181742526084, "percentage": 65.48, "elapsed_time": "2:15:14", "remaining_time": "1:11:17", "throughput": 19839.04, "total_tokens": 160992768} +{"current_steps": 51150, "total_steps": 78105, "loss": 0.1276, "lr": 1.605119831797829e-06, "epoch": 3.2744382561935854, "percentage": 65.49, "elapsed_time": "2:15:15", "remaining_time": "1:11:16", "throughput": 19839.28, "total_tokens": 161008128} +{"current_steps": 51155, "total_steps": 78105, "loss": 0.1963, "lr": 1.6045982181222772e-06, "epoch": 3.2747583381345624, "percentage": 65.5, "elapsed_time": "2:15:16", "remaining_time": "1:11:15", "throughput": 19839.6, "total_tokens": 161024512} +{"current_steps": 51160, "total_steps": 78105, "loss": 0.1377, "lr": 1.6040766491584264e-06, "epoch": 3.2750784200755394, "percentage": 65.5, "elapsed_time": "2:15:16", "remaining_time": "1:11:15", "throughput": 19839.83, "total_tokens": 161039232} +{"current_steps": 51165, "total_steps": 78105, "loss": 0.1584, "lr": 1.6035551249323216e-06, "epoch": 3.2753985020165164, "percentage": 65.51, "elapsed_time": "2:15:17", "remaining_time": "1:11:14", "throughput": 19840.08, "total_tokens": 161054528} +{"current_steps": 51170, "total_steps": 78105, "loss": 0.1557, "lr": 1.603033645470004e-06, "epoch": 3.275718583957493, "percentage": 65.51, "elapsed_time": "2:15:18", "remaining_time": "1:11:13", "throughput": 19840.33, "total_tokens": 161069312} +{"current_steps": 51175, "total_steps": 78105, "loss": 0.1792, "lr": 1.602512210797515e-06, "epoch": 3.27603866589847, "percentage": 65.52, "elapsed_time": "2:15:18", "remaining_time": "1:11:12", "throughput": 19840.58, "total_tokens": 161084352} +{"current_steps": 51180, "total_steps": 78105, "loss": 0.1318, "lr": 1.6019908209408902e-06, "epoch": 3.276358747839447, "percentage": 65.53, "elapsed_time": "2:15:19", "remaining_time": "1:11:11", "throughput": 19840.88, "total_tokens": 161100672} +{"current_steps": 51185, "total_steps": 78105, "loss": 0.1644, "lr": 1.6014694759261674e-06, "epoch": 3.276678829780424, "percentage": 65.53, "elapsed_time": "2:15:20", "remaining_time": "1:11:10", "throughput": 19841.14, "total_tokens": 161116224} +{"current_steps": 51190, "total_steps": 78105, "loss": 0.1413, "lr": 1.6009481757793776e-06, "epoch": 3.2769989117214005, "percentage": 65.54, "elapsed_time": "2:15:21", "remaining_time": "1:11:09", "throughput": 19841.46, "total_tokens": 161132608} +{"current_steps": 51195, "total_steps": 78105, "loss": 0.1782, "lr": 1.6004269205265532e-06, "epoch": 3.2773189936623774, "percentage": 65.55, "elapsed_time": "2:15:21", "remaining_time": "1:11:09", "throughput": 19841.8, "total_tokens": 161149312} +{"current_steps": 51200, "total_steps": 78105, "loss": 0.1237, "lr": 1.5999057101937227e-06, "epoch": 3.2776390756033544, "percentage": 65.55, "elapsed_time": "2:15:22", "remaining_time": "1:11:08", "throughput": 19842.11, "total_tokens": 161165504} +{"current_steps": 51205, "total_steps": 78105, "loss": 0.1619, "lr": 1.5993845448069134e-06, "epoch": 3.2779591575443314, "percentage": 65.56, "elapsed_time": "2:15:23", "remaining_time": "1:11:07", "throughput": 19842.35, "total_tokens": 161180800} +{"current_steps": 51210, "total_steps": 78105, "loss": 0.1661, "lr": 1.5988634243921474e-06, "epoch": 3.2782792394853084, "percentage": 65.57, "elapsed_time": "2:15:23", "remaining_time": "1:11:06", "throughput": 19842.64, "total_tokens": 161196864} +{"current_steps": 51215, "total_steps": 78105, "loss": 0.1789, "lr": 1.598342348975449e-06, "epoch": 3.278599321426285, "percentage": 65.57, "elapsed_time": "2:15:24", "remaining_time": "1:11:05", "throughput": 19842.9, "total_tokens": 161211776} +{"current_steps": 51220, "total_steps": 78105, "loss": 0.1678, "lr": 1.5978213185828368e-06, "epoch": 3.278919403367262, "percentage": 65.58, "elapsed_time": "2:15:25", "remaining_time": "1:11:04", "throughput": 19843.16, "total_tokens": 161227008} +{"current_steps": 51225, "total_steps": 78105, "loss": 0.1313, "lr": 1.5973003332403288e-06, "epoch": 3.279239485308239, "percentage": 65.58, "elapsed_time": "2:15:25", "remaining_time": "1:11:03", "throughput": 19843.46, "total_tokens": 161243200} +{"current_steps": 51230, "total_steps": 78105, "loss": 0.159, "lr": 1.5967793929739406e-06, "epoch": 3.279559567249216, "percentage": 65.59, "elapsed_time": "2:15:26", "remaining_time": "1:11:03", "throughput": 19843.68, "total_tokens": 161257856} +{"current_steps": 51235, "total_steps": 78105, "loss": 0.1918, "lr": 1.596258497809684e-06, "epoch": 3.2798796491901925, "percentage": 65.6, "elapsed_time": "2:15:27", "remaining_time": "1:11:02", "throughput": 19843.96, "total_tokens": 161273536} +{"current_steps": 51240, "total_steps": 78105, "loss": 0.1254, "lr": 1.5957376477735714e-06, "epoch": 3.2801997311311695, "percentage": 65.6, "elapsed_time": "2:15:27", "remaining_time": "1:11:01", "throughput": 19844.19, "total_tokens": 161288192} +{"current_steps": 51245, "total_steps": 78105, "loss": 0.2384, "lr": 1.5952168428916098e-06, "epoch": 3.2805198130721465, "percentage": 65.61, "elapsed_time": "2:15:28", "remaining_time": "1:11:00", "throughput": 19844.52, "total_tokens": 161304832} +{"current_steps": 51250, "total_steps": 78105, "loss": 0.1288, "lr": 1.5946960831898068e-06, "epoch": 3.2808398950131235, "percentage": 65.62, "elapsed_time": "2:15:29", "remaining_time": "1:10:59", "throughput": 19844.82, "total_tokens": 161321472} +{"current_steps": 51255, "total_steps": 78105, "loss": 0.1756, "lr": 1.5941753686941652e-06, "epoch": 3.2811599769541004, "percentage": 65.62, "elapsed_time": "2:15:29", "remaining_time": "1:10:58", "throughput": 19845.11, "total_tokens": 161337152} +{"current_steps": 51260, "total_steps": 78105, "loss": 0.167, "lr": 1.5936546994306887e-06, "epoch": 3.281480058895077, "percentage": 65.63, "elapsed_time": "2:15:30", "remaining_time": "1:10:57", "throughput": 19845.42, "total_tokens": 161353472} +{"current_steps": 51265, "total_steps": 78105, "loss": 0.2041, "lr": 1.5931340754253743e-06, "epoch": 3.281800140836054, "percentage": 65.64, "elapsed_time": "2:15:31", "remaining_time": "1:10:57", "throughput": 19845.67, "total_tokens": 161368576} +{"current_steps": 51270, "total_steps": 78105, "loss": 0.2391, "lr": 1.5926134967042218e-06, "epoch": 3.282120222777031, "percentage": 65.64, "elapsed_time": "2:15:31", "remaining_time": "1:10:56", "throughput": 19845.98, "total_tokens": 161385024} +{"current_steps": 51275, "total_steps": 78105, "loss": 0.2544, "lr": 1.5920929632932233e-06, "epoch": 3.282440304718008, "percentage": 65.65, "elapsed_time": "2:15:32", "remaining_time": "1:10:55", "throughput": 19846.3, "total_tokens": 161401664} +{"current_steps": 51280, "total_steps": 78105, "loss": 0.2, "lr": 1.5915724752183733e-06, "epoch": 3.2827603866589845, "percentage": 65.66, "elapsed_time": "2:15:33", "remaining_time": "1:10:54", "throughput": 19846.51, "total_tokens": 161416000} +{"current_steps": 51285, "total_steps": 78105, "loss": 0.1225, "lr": 1.5910520325056627e-06, "epoch": 3.2830804685999615, "percentage": 65.66, "elapsed_time": "2:15:33", "remaining_time": "1:10:53", "throughput": 19846.78, "total_tokens": 161431424} +{"current_steps": 51290, "total_steps": 78105, "loss": 0.142, "lr": 1.5905316351810784e-06, "epoch": 3.2834005505409385, "percentage": 65.67, "elapsed_time": "2:15:34", "remaining_time": "1:10:52", "throughput": 19847.03, "total_tokens": 161446272} +{"current_steps": 51295, "total_steps": 78105, "loss": 0.1649, "lr": 1.5900112832706084e-06, "epoch": 3.2837206324819155, "percentage": 65.67, "elapsed_time": "2:15:35", "remaining_time": "1:10:51", "throughput": 19847.3, "total_tokens": 161461696} +{"current_steps": 51300, "total_steps": 78105, "loss": 0.1297, "lr": 1.5894909768002334e-06, "epoch": 3.2840407144228925, "percentage": 65.68, "elapsed_time": "2:15:35", "remaining_time": "1:10:51", "throughput": 19847.54, "total_tokens": 161476736} +{"current_steps": 51305, "total_steps": 78105, "loss": 0.1668, "lr": 1.5889707157959373e-06, "epoch": 3.284360796363869, "percentage": 65.69, "elapsed_time": "2:15:36", "remaining_time": "1:10:50", "throughput": 19847.75, "total_tokens": 161490944} +{"current_steps": 51310, "total_steps": 78105, "loss": 0.0876, "lr": 1.5884505002836975e-06, "epoch": 3.284680878304846, "percentage": 65.69, "elapsed_time": "2:15:37", "remaining_time": "1:10:49", "throughput": 19848.01, "total_tokens": 161505792} +{"current_steps": 51315, "total_steps": 78105, "loss": 0.202, "lr": 1.5879303302894928e-06, "epoch": 3.285000960245823, "percentage": 65.7, "elapsed_time": "2:15:37", "remaining_time": "1:10:48", "throughput": 19848.34, "total_tokens": 161522624} +{"current_steps": 51320, "total_steps": 78105, "loss": 0.2409, "lr": 1.5874102058392954e-06, "epoch": 3.2853210421868, "percentage": 65.71, "elapsed_time": "2:15:38", "remaining_time": "1:10:47", "throughput": 19848.59, "total_tokens": 161537152} +{"current_steps": 51325, "total_steps": 78105, "loss": 0.1001, "lr": 1.5868901269590797e-06, "epoch": 3.2856411241277765, "percentage": 65.71, "elapsed_time": "2:15:39", "remaining_time": "1:10:46", "throughput": 19848.88, "total_tokens": 161552832} +{"current_steps": 51330, "total_steps": 78105, "loss": 0.1714, "lr": 1.5863700936748142e-06, "epoch": 3.2859612060687535, "percentage": 65.72, "elapsed_time": "2:15:39", "remaining_time": "1:10:45", "throughput": 19849.19, "total_tokens": 161569024} +{"current_steps": 51335, "total_steps": 78105, "loss": 0.1252, "lr": 1.585850106012467e-06, "epoch": 3.2862812880097305, "percentage": 65.73, "elapsed_time": "2:15:40", "remaining_time": "1:10:45", "throughput": 19849.43, "total_tokens": 161584000} +{"current_steps": 51340, "total_steps": 78105, "loss": 0.2069, "lr": 1.5853301639980046e-06, "epoch": 3.2866013699507075, "percentage": 65.73, "elapsed_time": "2:15:41", "remaining_time": "1:10:44", "throughput": 19849.73, "total_tokens": 161600128} +{"current_steps": 51345, "total_steps": 78105, "loss": 0.17, "lr": 1.584810267657389e-06, "epoch": 3.2869214518916845, "percentage": 65.74, "elapsed_time": "2:15:41", "remaining_time": "1:10:43", "throughput": 19850.02, "total_tokens": 161615744} +{"current_steps": 51350, "total_steps": 78105, "loss": 0.1748, "lr": 1.5842904170165825e-06, "epoch": 3.287241533832661, "percentage": 65.74, "elapsed_time": "2:15:42", "remaining_time": "1:10:42", "throughput": 19850.28, "total_tokens": 161631040} +{"current_steps": 51355, "total_steps": 78105, "loss": 0.1542, "lr": 1.5837706121015418e-06, "epoch": 3.287561615773638, "percentage": 65.75, "elapsed_time": "2:15:43", "remaining_time": "1:10:41", "throughput": 19850.59, "total_tokens": 161647424} +{"current_steps": 51360, "total_steps": 78105, "loss": 0.1548, "lr": 1.5832508529382251e-06, "epoch": 3.287881697714615, "percentage": 65.76, "elapsed_time": "2:15:43", "remaining_time": "1:10:40", "throughput": 19850.88, "total_tokens": 161663424} +{"current_steps": 51365, "total_steps": 78105, "loss": 0.2099, "lr": 1.582731139552585e-06, "epoch": 3.288201779655592, "percentage": 65.76, "elapsed_time": "2:15:44", "remaining_time": "1:10:39", "throughput": 19851.14, "total_tokens": 161678720} +{"current_steps": 51370, "total_steps": 78105, "loss": 0.1522, "lr": 1.5822114719705738e-06, "epoch": 3.2885218615965686, "percentage": 65.77, "elapsed_time": "2:15:45", "remaining_time": "1:10:39", "throughput": 19851.42, "total_tokens": 161694720} +{"current_steps": 51375, "total_steps": 78105, "loss": 0.2042, "lr": 1.5816918502181412e-06, "epoch": 3.2888419435375456, "percentage": 65.78, "elapsed_time": "2:15:45", "remaining_time": "1:10:38", "throughput": 19851.72, "total_tokens": 161711104} +{"current_steps": 51380, "total_steps": 78105, "loss": 0.1617, "lr": 1.5811722743212351e-06, "epoch": 3.2891620254785225, "percentage": 65.78, "elapsed_time": "2:15:46", "remaining_time": "1:10:37", "throughput": 19852.02, "total_tokens": 161727168} +{"current_steps": 51385, "total_steps": 78105, "loss": 0.1495, "lr": 1.5806527443057987e-06, "epoch": 3.2894821074194995, "percentage": 65.79, "elapsed_time": "2:15:47", "remaining_time": "1:10:36", "throughput": 19852.29, "total_tokens": 161742656} +{"current_steps": 51390, "total_steps": 78105, "loss": 0.1643, "lr": 1.580133260197776e-06, "epoch": 3.2898021893604765, "percentage": 65.8, "elapsed_time": "2:15:48", "remaining_time": "1:10:35", "throughput": 19852.6, "total_tokens": 161759104} +{"current_steps": 51395, "total_steps": 78105, "loss": 0.1352, "lr": 1.5796138220231065e-06, "epoch": 3.290122271301453, "percentage": 65.8, "elapsed_time": "2:15:48", "remaining_time": "1:10:34", "throughput": 19852.88, "total_tokens": 161774720} +{"current_steps": 51400, "total_steps": 78105, "loss": 0.1548, "lr": 1.5790944298077281e-06, "epoch": 3.29044235324243, "percentage": 65.81, "elapsed_time": "2:15:49", "remaining_time": "1:10:34", "throughput": 19853.16, "total_tokens": 161790336} +{"current_steps": 51405, "total_steps": 78105, "loss": 0.1835, "lr": 1.5785750835775788e-06, "epoch": 3.290762435183407, "percentage": 65.82, "elapsed_time": "2:15:50", "remaining_time": "1:10:33", "throughput": 19853.41, "total_tokens": 161805568} +{"current_steps": 51410, "total_steps": 78105, "loss": 0.2432, "lr": 1.5780557833585886e-06, "epoch": 3.2910825171243836, "percentage": 65.82, "elapsed_time": "2:15:50", "remaining_time": "1:10:32", "throughput": 19853.69, "total_tokens": 161821376} +{"current_steps": 51415, "total_steps": 78105, "loss": 0.1006, "lr": 1.577536529176692e-06, "epoch": 3.2914025990653606, "percentage": 65.83, "elapsed_time": "2:15:51", "remaining_time": "1:10:31", "throughput": 19853.96, "total_tokens": 161836992} +{"current_steps": 51420, "total_steps": 78105, "loss": 0.2084, "lr": 1.5770173210578146e-06, "epoch": 3.2917226810063376, "percentage": 65.83, "elapsed_time": "2:15:52", "remaining_time": "1:10:30", "throughput": 19854.24, "total_tokens": 161852608} +{"current_steps": 51425, "total_steps": 78105, "loss": 0.1015, "lr": 1.5764981590278854e-06, "epoch": 3.2920427629473146, "percentage": 65.84, "elapsed_time": "2:15:52", "remaining_time": "1:10:29", "throughput": 19854.52, "total_tokens": 161868480} +{"current_steps": 51430, "total_steps": 78105, "loss": 0.1725, "lr": 1.5759790431128273e-06, "epoch": 3.2923628448882916, "percentage": 65.85, "elapsed_time": "2:15:53", "remaining_time": "1:10:28", "throughput": 19854.81, "total_tokens": 161884416} +{"current_steps": 51435, "total_steps": 78105, "loss": 0.1928, "lr": 1.5754599733385635e-06, "epoch": 3.292682926829268, "percentage": 65.85, "elapsed_time": "2:15:54", "remaining_time": "1:10:28", "throughput": 19855.11, "total_tokens": 161900288} +{"current_steps": 51440, "total_steps": 78105, "loss": 0.1744, "lr": 1.574940949731012e-06, "epoch": 3.293003008770245, "percentage": 65.86, "elapsed_time": "2:15:54", "remaining_time": "1:10:27", "throughput": 19855.38, "total_tokens": 161915712} +{"current_steps": 51445, "total_steps": 78105, "loss": 0.1224, "lr": 1.5744219723160913e-06, "epoch": 3.293323090711222, "percentage": 65.87, "elapsed_time": "2:15:55", "remaining_time": "1:10:26", "throughput": 19855.67, "total_tokens": 161931648} +{"current_steps": 51450, "total_steps": 78105, "loss": 0.3029, "lr": 1.5739030411197162e-06, "epoch": 3.293643172652199, "percentage": 65.87, "elapsed_time": "2:15:56", "remaining_time": "1:10:25", "throughput": 19855.94, "total_tokens": 161947072} +{"current_steps": 51455, "total_steps": 78105, "loss": 0.2028, "lr": 1.573384156167799e-06, "epoch": 3.2939632545931756, "percentage": 65.88, "elapsed_time": "2:15:56", "remaining_time": "1:10:24", "throughput": 19856.16, "total_tokens": 161961472} +{"current_steps": 51460, "total_steps": 78105, "loss": 0.2772, "lr": 1.5728653174862516e-06, "epoch": 3.2942833365341526, "percentage": 65.89, "elapsed_time": "2:15:57", "remaining_time": "1:10:23", "throughput": 19856.39, "total_tokens": 161976320} +{"current_steps": 51465, "total_steps": 78105, "loss": 0.2452, "lr": 1.5723465251009797e-06, "epoch": 3.2946034184751296, "percentage": 65.89, "elapsed_time": "2:15:58", "remaining_time": "1:10:22", "throughput": 19856.65, "total_tokens": 161991488} +{"current_steps": 51470, "total_steps": 78105, "loss": 0.1274, "lr": 1.5718277790378916e-06, "epoch": 3.2949235004161066, "percentage": 65.9, "elapsed_time": "2:15:58", "remaining_time": "1:10:22", "throughput": 19856.96, "total_tokens": 162007552} +{"current_steps": 51475, "total_steps": 78105, "loss": 0.1282, "lr": 1.5713090793228886e-06, "epoch": 3.2952435823570836, "percentage": 65.9, "elapsed_time": "2:15:59", "remaining_time": "1:10:21", "throughput": 19857.24, "total_tokens": 162023616} +{"current_steps": 51480, "total_steps": 78105, "loss": 0.1591, "lr": 1.5707904259818731e-06, "epoch": 3.29556366429806, "percentage": 65.91, "elapsed_time": "2:16:00", "remaining_time": "1:10:20", "throughput": 19857.5, "total_tokens": 162038848} +{"current_steps": 51485, "total_steps": 78105, "loss": 0.1127, "lr": 1.5702718190407435e-06, "epoch": 3.295883746239037, "percentage": 65.92, "elapsed_time": "2:16:00", "remaining_time": "1:10:19", "throughput": 19857.76, "total_tokens": 162053824} +{"current_steps": 51490, "total_steps": 78105, "loss": 0.1609, "lr": 1.5697532585253972e-06, "epoch": 3.296203828180014, "percentage": 65.92, "elapsed_time": "2:16:01", "remaining_time": "1:10:18", "throughput": 19858.03, "total_tokens": 162069120} +{"current_steps": 51495, "total_steps": 78105, "loss": 0.1923, "lr": 1.5692347444617267e-06, "epoch": 3.296523910120991, "percentage": 65.93, "elapsed_time": "2:16:02", "remaining_time": "1:10:17", "throughput": 19858.31, "total_tokens": 162084800} +{"current_steps": 51500, "total_steps": 78105, "loss": 0.1272, "lr": 1.5687162768756259e-06, "epoch": 3.2968439920619677, "percentage": 65.94, "elapsed_time": "2:16:02", "remaining_time": "1:10:16", "throughput": 19858.6, "total_tokens": 162100992} +{"current_steps": 51505, "total_steps": 78105, "loss": 0.1235, "lr": 1.5681978557929823e-06, "epoch": 3.2971640740029446, "percentage": 65.94, "elapsed_time": "2:16:03", "remaining_time": "1:10:16", "throughput": 19858.85, "total_tokens": 162115904} +{"current_steps": 51510, "total_steps": 78105, "loss": 0.2148, "lr": 1.5676794812396843e-06, "epoch": 3.2974841559439216, "percentage": 65.95, "elapsed_time": "2:16:04", "remaining_time": "1:10:15", "throughput": 19859.13, "total_tokens": 162131712} +{"current_steps": 51515, "total_steps": 78105, "loss": 0.1377, "lr": 1.567161153241617e-06, "epoch": 3.2978042378848986, "percentage": 65.96, "elapsed_time": "2:16:04", "remaining_time": "1:10:14", "throughput": 19859.42, "total_tokens": 162147584} +{"current_steps": 51520, "total_steps": 78105, "loss": 0.1467, "lr": 1.5666428718246623e-06, "epoch": 3.2981243198258756, "percentage": 65.96, "elapsed_time": "2:16:05", "remaining_time": "1:10:13", "throughput": 19859.67, "total_tokens": 162162496} +{"current_steps": 51525, "total_steps": 78105, "loss": 0.2769, "lr": 1.566124637014702e-06, "epoch": 3.298444401766852, "percentage": 65.97, "elapsed_time": "2:16:06", "remaining_time": "1:10:12", "throughput": 19859.9, "total_tokens": 162177408} +{"current_steps": 51530, "total_steps": 78105, "loss": 0.1259, "lr": 1.5656064488376122e-06, "epoch": 3.298764483707829, "percentage": 65.98, "elapsed_time": "2:16:06", "remaining_time": "1:10:11", "throughput": 19860.2, "total_tokens": 162193216} +{"current_steps": 51535, "total_steps": 78105, "loss": 0.1456, "lr": 1.5650883073192697e-06, "epoch": 3.299084565648806, "percentage": 65.98, "elapsed_time": "2:16:07", "remaining_time": "1:10:10", "throughput": 19860.47, "total_tokens": 162208960} +{"current_steps": 51540, "total_steps": 78105, "loss": 0.154, "lr": 1.5645702124855466e-06, "epoch": 3.299404647589783, "percentage": 65.99, "elapsed_time": "2:16:08", "remaining_time": "1:10:10", "throughput": 19860.86, "total_tokens": 162226944} +{"current_steps": 51545, "total_steps": 78105, "loss": 0.2134, "lr": 1.5640521643623157e-06, "epoch": 3.2997247295307597, "percentage": 65.99, "elapsed_time": "2:16:08", "remaining_time": "1:10:09", "throughput": 19861.15, "total_tokens": 162242816} +{"current_steps": 51550, "total_steps": 78105, "loss": 0.1578, "lr": 1.5635341629754435e-06, "epoch": 3.3000448114717367, "percentage": 66.0, "elapsed_time": "2:16:09", "remaining_time": "1:10:08", "throughput": 19861.44, "total_tokens": 162258880} +{"current_steps": 51555, "total_steps": 78105, "loss": 0.2403, "lr": 1.5630162083507983e-06, "epoch": 3.3003648934127137, "percentage": 66.01, "elapsed_time": "2:16:10", "remaining_time": "1:10:07", "throughput": 19861.73, "total_tokens": 162274752} +{"current_steps": 51560, "total_steps": 78105, "loss": 0.2098, "lr": 1.5624983005142422e-06, "epoch": 3.3006849753536907, "percentage": 66.01, "elapsed_time": "2:16:10", "remaining_time": "1:10:06", "throughput": 19861.97, "total_tokens": 162289856} +{"current_steps": 51565, "total_steps": 78105, "loss": 0.211, "lr": 1.5619804394916382e-06, "epoch": 3.3010050572946676, "percentage": 66.02, "elapsed_time": "2:16:11", "remaining_time": "1:10:05", "throughput": 19862.25, "total_tokens": 162305856} +{"current_steps": 51570, "total_steps": 78105, "loss": 0.1482, "lr": 1.5614626253088444e-06, "epoch": 3.301325139235644, "percentage": 66.03, "elapsed_time": "2:16:12", "remaining_time": "1:10:04", "throughput": 19862.54, "total_tokens": 162321920} +{"current_steps": 51575, "total_steps": 78105, "loss": 0.2047, "lr": 1.5609448579917186e-06, "epoch": 3.301645221176621, "percentage": 66.03, "elapsed_time": "2:16:12", "remaining_time": "1:10:04", "throughput": 19862.85, "total_tokens": 162338304} +{"current_steps": 51580, "total_steps": 78105, "loss": 0.1453, "lr": 1.5604271375661162e-06, "epoch": 3.301965303117598, "percentage": 66.04, "elapsed_time": "2:16:13", "remaining_time": "1:10:03", "throughput": 19863.06, "total_tokens": 162352768} +{"current_steps": 51585, "total_steps": 78105, "loss": 0.2241, "lr": 1.5599094640578876e-06, "epoch": 3.302285385058575, "percentage": 66.05, "elapsed_time": "2:16:14", "remaining_time": "1:10:02", "throughput": 19863.41, "total_tokens": 162369536} +{"current_steps": 51590, "total_steps": 78105, "loss": 0.1666, "lr": 1.5593918374928846e-06, "epoch": 3.3026054669995517, "percentage": 66.05, "elapsed_time": "2:16:14", "remaining_time": "1:10:01", "throughput": 19863.65, "total_tokens": 162384704} +{"current_steps": 51595, "total_steps": 78105, "loss": 0.2035, "lr": 1.5588742578969523e-06, "epoch": 3.3029255489405287, "percentage": 66.06, "elapsed_time": "2:16:15", "remaining_time": "1:10:00", "throughput": 19863.95, "total_tokens": 162400832} +{"current_steps": 51600, "total_steps": 78105, "loss": 0.125, "lr": 1.5583567252959387e-06, "epoch": 3.3032456308815057, "percentage": 66.06, "elapsed_time": "2:16:16", "remaining_time": "1:09:59", "throughput": 19864.18, "total_tokens": 162415552} +{"current_steps": 51605, "total_steps": 78105, "loss": 0.1395, "lr": 1.5578392397156845e-06, "epoch": 3.3035657128224827, "percentage": 66.07, "elapsed_time": "2:16:16", "remaining_time": "1:09:59", "throughput": 19864.47, "total_tokens": 162431680} +{"current_steps": 51610, "total_steps": 78105, "loss": 0.2007, "lr": 1.5573218011820317e-06, "epoch": 3.3038857947634597, "percentage": 66.08, "elapsed_time": "2:16:17", "remaining_time": "1:09:58", "throughput": 19864.73, "total_tokens": 162446912} +{"current_steps": 51615, "total_steps": 78105, "loss": 0.1321, "lr": 1.5568044097208177e-06, "epoch": 3.304205876704436, "percentage": 66.08, "elapsed_time": "2:16:18", "remaining_time": "1:09:57", "throughput": 19865.08, "total_tokens": 162463744} +{"current_steps": 51620, "total_steps": 78105, "loss": 0.1678, "lr": 1.5562870653578788e-06, "epoch": 3.304525958645413, "percentage": 66.09, "elapsed_time": "2:16:19", "remaining_time": "1:09:56", "throughput": 19865.36, "total_tokens": 162479232} +{"current_steps": 51625, "total_steps": 78105, "loss": 0.1444, "lr": 1.5557697681190475e-06, "epoch": 3.30484604058639, "percentage": 66.1, "elapsed_time": "2:16:19", "remaining_time": "1:09:55", "throughput": 19865.63, "total_tokens": 162494464} +{"current_steps": 51630, "total_steps": 78105, "loss": 0.1948, "lr": 1.555252518030156e-06, "epoch": 3.305166122527367, "percentage": 66.1, "elapsed_time": "2:16:20", "remaining_time": "1:09:54", "throughput": 19865.93, "total_tokens": 162510784} +{"current_steps": 51635, "total_steps": 78105, "loss": 0.1419, "lr": 1.5547353151170333e-06, "epoch": 3.3054862044683437, "percentage": 66.11, "elapsed_time": "2:16:21", "remaining_time": "1:09:53", "throughput": 19866.24, "total_tokens": 162527104} +{"current_steps": 51640, "total_steps": 78105, "loss": 0.1725, "lr": 1.5542181594055045e-06, "epoch": 3.3058062864093207, "percentage": 66.12, "elapsed_time": "2:16:21", "remaining_time": "1:09:53", "throughput": 19866.59, "total_tokens": 162544064} +{"current_steps": 51645, "total_steps": 78105, "loss": 0.1908, "lr": 1.5537010509213955e-06, "epoch": 3.3061263683502977, "percentage": 66.12, "elapsed_time": "2:16:22", "remaining_time": "1:09:52", "throughput": 19866.82, "total_tokens": 162558848} +{"current_steps": 51650, "total_steps": 78105, "loss": 0.1431, "lr": 1.5531839896905257e-06, "epoch": 3.3064464502912747, "percentage": 66.13, "elapsed_time": "2:16:23", "remaining_time": "1:09:51", "throughput": 19867.07, "total_tokens": 162574400} +{"current_steps": 51655, "total_steps": 78105, "loss": 0.146, "lr": 1.5526669757387164e-06, "epoch": 3.3067665322322517, "percentage": 66.14, "elapsed_time": "2:16:23", "remaining_time": "1:09:50", "throughput": 19867.37, "total_tokens": 162590464} +{"current_steps": 51660, "total_steps": 78105, "loss": 0.1786, "lr": 1.5521500090917832e-06, "epoch": 3.3070866141732282, "percentage": 66.14, "elapsed_time": "2:16:24", "remaining_time": "1:09:49", "throughput": 19867.74, "total_tokens": 162607424} +{"current_steps": 51665, "total_steps": 78105, "loss": 0.259, "lr": 1.5516330897755427e-06, "epoch": 3.3074066961142052, "percentage": 66.15, "elapsed_time": "2:16:25", "remaining_time": "1:09:48", "throughput": 19868.04, "total_tokens": 162623296} +{"current_steps": 51670, "total_steps": 78105, "loss": 0.1724, "lr": 1.5511162178158047e-06, "epoch": 3.3077267780551822, "percentage": 66.15, "elapsed_time": "2:16:25", "remaining_time": "1:09:47", "throughput": 19868.32, "total_tokens": 162639296} +{"current_steps": 51675, "total_steps": 78105, "loss": 0.1288, "lr": 1.5505993932383803e-06, "epoch": 3.3080468599961588, "percentage": 66.16, "elapsed_time": "2:16:26", "remaining_time": "1:09:47", "throughput": 19868.59, "total_tokens": 162654912} +{"current_steps": 51680, "total_steps": 78105, "loss": 0.1219, "lr": 1.5500826160690768e-06, "epoch": 3.3083669419371358, "percentage": 66.17, "elapsed_time": "2:16:27", "remaining_time": "1:09:46", "throughput": 19868.83, "total_tokens": 162669568} +{"current_steps": 51685, "total_steps": 78105, "loss": 0.1946, "lr": 1.5495658863336993e-06, "epoch": 3.3086870238781128, "percentage": 66.17, "elapsed_time": "2:16:27", "remaining_time": "1:09:45", "throughput": 19869.1, "total_tokens": 162685184} +{"current_steps": 51690, "total_steps": 78105, "loss": 0.1778, "lr": 1.5490492040580521e-06, "epoch": 3.3090071058190897, "percentage": 66.18, "elapsed_time": "2:16:28", "remaining_time": "1:09:44", "throughput": 19869.37, "total_tokens": 162700800} +{"current_steps": 51695, "total_steps": 78105, "loss": 0.2389, "lr": 1.5485325692679332e-06, "epoch": 3.3093271877600667, "percentage": 66.19, "elapsed_time": "2:16:29", "remaining_time": "1:09:43", "throughput": 19869.61, "total_tokens": 162715712} +{"current_steps": 51700, "total_steps": 78105, "loss": 0.1923, "lr": 1.5480159819891427e-06, "epoch": 3.3096472697010433, "percentage": 66.19, "elapsed_time": "2:16:29", "remaining_time": "1:09:42", "throughput": 19869.87, "total_tokens": 162731328} +{"current_steps": 51705, "total_steps": 78105, "loss": 0.1421, "lr": 1.5474994422474745e-06, "epoch": 3.3099673516420203, "percentage": 66.2, "elapsed_time": "2:16:30", "remaining_time": "1:09:41", "throughput": 19870.11, "total_tokens": 162746304} +{"current_steps": 51710, "total_steps": 78105, "loss": 0.1303, "lr": 1.5469829500687233e-06, "epoch": 3.3102874335829973, "percentage": 66.21, "elapsed_time": "2:16:31", "remaining_time": "1:09:41", "throughput": 19870.41, "total_tokens": 162762368} +{"current_steps": 51715, "total_steps": 78105, "loss": 0.1704, "lr": 1.5464665054786787e-06, "epoch": 3.3106075155239743, "percentage": 66.21, "elapsed_time": "2:16:31", "remaining_time": "1:09:40", "throughput": 19870.69, "total_tokens": 162777728} +{"current_steps": 51720, "total_steps": 78105, "loss": 0.0864, "lr": 1.5459501085031314e-06, "epoch": 3.310927597464951, "percentage": 66.22, "elapsed_time": "2:16:32", "remaining_time": "1:09:39", "throughput": 19871.04, "total_tokens": 162794944} +{"current_steps": 51725, "total_steps": 78105, "loss": 0.1593, "lr": 1.545433759167865e-06, "epoch": 3.311247679405928, "percentage": 66.22, "elapsed_time": "2:16:33", "remaining_time": "1:09:38", "throughput": 19871.3, "total_tokens": 162810304} +{"current_steps": 51730, "total_steps": 78105, "loss": 0.1378, "lr": 1.5449174574986658e-06, "epoch": 3.311567761346905, "percentage": 66.23, "elapsed_time": "2:16:33", "remaining_time": "1:09:37", "throughput": 19871.59, "total_tokens": 162826176} +{"current_steps": 51735, "total_steps": 78105, "loss": 0.2269, "lr": 1.5444012035213124e-06, "epoch": 3.3118878432878818, "percentage": 66.24, "elapsed_time": "2:16:34", "remaining_time": "1:09:36", "throughput": 19871.84, "total_tokens": 162841152} +{"current_steps": 51740, "total_steps": 78105, "loss": 0.1379, "lr": 1.5438849972615863e-06, "epoch": 3.3122079252288588, "percentage": 66.24, "elapsed_time": "2:16:35", "remaining_time": "1:09:36", "throughput": 19872.12, "total_tokens": 162856896} +{"current_steps": 51745, "total_steps": 78105, "loss": 0.112, "lr": 1.5433688387452627e-06, "epoch": 3.3125280071698353, "percentage": 66.25, "elapsed_time": "2:16:35", "remaining_time": "1:09:35", "throughput": 19872.41, "total_tokens": 162872768} +{"current_steps": 51750, "total_steps": 78105, "loss": 0.164, "lr": 1.5428527279981161e-06, "epoch": 3.3128480891108123, "percentage": 66.26, "elapsed_time": "2:16:36", "remaining_time": "1:09:34", "throughput": 19872.68, "total_tokens": 162888832} +{"current_steps": 51755, "total_steps": 78105, "loss": 0.1244, "lr": 1.54233666504592e-06, "epoch": 3.3131681710517893, "percentage": 66.26, "elapsed_time": "2:16:37", "remaining_time": "1:09:33", "throughput": 19872.96, "total_tokens": 162904512} +{"current_steps": 51760, "total_steps": 78105, "loss": 0.1858, "lr": 1.5418206499144411e-06, "epoch": 3.3134882529927663, "percentage": 66.27, "elapsed_time": "2:16:37", "remaining_time": "1:09:32", "throughput": 19873.23, "total_tokens": 162920000} +{"current_steps": 51765, "total_steps": 78105, "loss": 0.1148, "lr": 1.541304682629449e-06, "epoch": 3.313808334933743, "percentage": 66.28, "elapsed_time": "2:16:38", "remaining_time": "1:09:31", "throughput": 19873.45, "total_tokens": 162934912} +{"current_steps": 51770, "total_steps": 78105, "loss": 0.1397, "lr": 1.5407887632167065e-06, "epoch": 3.31412841687472, "percentage": 66.28, "elapsed_time": "2:16:39", "remaining_time": "1:09:30", "throughput": 19873.68, "total_tokens": 162949824} +{"current_steps": 51775, "total_steps": 78105, "loss": 0.1157, "lr": 1.540272891701978e-06, "epoch": 3.314448498815697, "percentage": 66.29, "elapsed_time": "2:16:39", "remaining_time": "1:09:30", "throughput": 19873.96, "total_tokens": 162965632} +{"current_steps": 51780, "total_steps": 78105, "loss": 0.2852, "lr": 1.5397570681110214e-06, "epoch": 3.314768580756674, "percentage": 66.3, "elapsed_time": "2:16:40", "remaining_time": "1:09:29", "throughput": 19874.2, "total_tokens": 162980672} +{"current_steps": 51785, "total_steps": 78105, "loss": 0.1575, "lr": 1.5392412924695958e-06, "epoch": 3.315088662697651, "percentage": 66.3, "elapsed_time": "2:16:41", "remaining_time": "1:09:28", "throughput": 19874.44, "total_tokens": 162995392} +{"current_steps": 51790, "total_steps": 78105, "loss": 0.1573, "lr": 1.5387255648034545e-06, "epoch": 3.3154087446386273, "percentage": 66.31, "elapsed_time": "2:16:41", "remaining_time": "1:09:27", "throughput": 19874.69, "total_tokens": 163010624} +{"current_steps": 51795, "total_steps": 78105, "loss": 0.0979, "lr": 1.5382098851383524e-06, "epoch": 3.3157288265796043, "percentage": 66.31, "elapsed_time": "2:16:42", "remaining_time": "1:09:26", "throughput": 19874.97, "total_tokens": 163026304} +{"current_steps": 51800, "total_steps": 78105, "loss": 0.1413, "lr": 1.537694253500038e-06, "epoch": 3.3160489085205813, "percentage": 66.32, "elapsed_time": "2:16:43", "remaining_time": "1:09:25", "throughput": 19875.3, "total_tokens": 163043136} +{"current_steps": 51805, "total_steps": 78105, "loss": 0.1279, "lr": 1.5371786699142604e-06, "epoch": 3.3163689904615583, "percentage": 66.33, "elapsed_time": "2:16:43", "remaining_time": "1:09:24", "throughput": 19875.58, "total_tokens": 163058432} +{"current_steps": 51810, "total_steps": 78105, "loss": 0.1982, "lr": 1.5366631344067657e-06, "epoch": 3.316689072402535, "percentage": 66.33, "elapsed_time": "2:16:44", "remaining_time": "1:09:24", "throughput": 19875.85, "total_tokens": 163074048} +{"current_steps": 51815, "total_steps": 78105, "loss": 0.1727, "lr": 1.5361476470032949e-06, "epoch": 3.317009154343512, "percentage": 66.34, "elapsed_time": "2:16:45", "remaining_time": "1:09:23", "throughput": 19876.11, "total_tokens": 163089216} +{"current_steps": 51820, "total_steps": 78105, "loss": 0.1093, "lr": 1.535632207729591e-06, "epoch": 3.317329236284489, "percentage": 66.35, "elapsed_time": "2:16:46", "remaining_time": "1:09:22", "throughput": 19876.44, "total_tokens": 163106176} +{"current_steps": 51825, "total_steps": 78105, "loss": 0.1487, "lr": 1.5351168166113915e-06, "epoch": 3.317649318225466, "percentage": 66.35, "elapsed_time": "2:16:46", "remaining_time": "1:09:21", "throughput": 19876.75, "total_tokens": 163122816} +{"current_steps": 51830, "total_steps": 78105, "loss": 0.1041, "lr": 1.5346014736744318e-06, "epoch": 3.317969400166443, "percentage": 66.36, "elapsed_time": "2:16:47", "remaining_time": "1:09:20", "throughput": 19877.01, "total_tokens": 163138240} +{"current_steps": 51835, "total_steps": 78105, "loss": 0.144, "lr": 1.534086178944446e-06, "epoch": 3.3182894821074194, "percentage": 66.37, "elapsed_time": "2:16:48", "remaining_time": "1:09:19", "throughput": 19877.27, "total_tokens": 163153600} +{"current_steps": 51840, "total_steps": 78105, "loss": 0.1569, "lr": 1.5335709324471659e-06, "epoch": 3.3186095640483964, "percentage": 66.37, "elapsed_time": "2:16:48", "remaining_time": "1:09:18", "throughput": 19877.53, "total_tokens": 163168832} +{"current_steps": 51845, "total_steps": 78105, "loss": 0.2149, "lr": 1.5330557342083186e-06, "epoch": 3.3189296459893733, "percentage": 66.38, "elapsed_time": "2:16:49", "remaining_time": "1:09:18", "throughput": 19877.77, "total_tokens": 163183616} +{"current_steps": 51850, "total_steps": 78105, "loss": 0.2786, "lr": 1.5325405842536317e-06, "epoch": 3.3192497279303503, "percentage": 66.38, "elapsed_time": "2:16:50", "remaining_time": "1:09:17", "throughput": 19878.04, "total_tokens": 163199040} +{"current_steps": 51855, "total_steps": 78105, "loss": 0.1501, "lr": 1.5320254826088282e-06, "epoch": 3.319569809871327, "percentage": 66.39, "elapsed_time": "2:16:50", "remaining_time": "1:09:16", "throughput": 19878.32, "total_tokens": 163214784} +{"current_steps": 51860, "total_steps": 78105, "loss": 0.1559, "lr": 1.5315104292996302e-06, "epoch": 3.319889891812304, "percentage": 66.4, "elapsed_time": "2:16:51", "remaining_time": "1:09:15", "throughput": 19878.63, "total_tokens": 163231360} +{"current_steps": 51865, "total_steps": 78105, "loss": 0.2074, "lr": 1.5309954243517575e-06, "epoch": 3.320209973753281, "percentage": 66.4, "elapsed_time": "2:16:52", "remaining_time": "1:09:14", "throughput": 19878.89, "total_tokens": 163246656} +{"current_steps": 51870, "total_steps": 78105, "loss": 0.2297, "lr": 1.5304804677909253e-06, "epoch": 3.320530055694258, "percentage": 66.41, "elapsed_time": "2:16:52", "remaining_time": "1:09:13", "throughput": 19879.14, "total_tokens": 163262016} +{"current_steps": 51875, "total_steps": 78105, "loss": 0.1756, "lr": 1.529965559642849e-06, "epoch": 3.320850137635235, "percentage": 66.42, "elapsed_time": "2:16:53", "remaining_time": "1:09:12", "throughput": 19879.34, "total_tokens": 163276288} +{"current_steps": 51880, "total_steps": 78105, "loss": 0.1887, "lr": 1.5294506999332392e-06, "epoch": 3.3211702195762114, "percentage": 66.42, "elapsed_time": "2:16:54", "remaining_time": "1:09:12", "throughput": 19879.61, "total_tokens": 163291904} +{"current_steps": 51885, "total_steps": 78105, "loss": 0.1598, "lr": 1.5289358886878065e-06, "epoch": 3.3214903015171884, "percentage": 66.43, "elapsed_time": "2:16:54", "remaining_time": "1:09:11", "throughput": 19879.81, "total_tokens": 163306304} +{"current_steps": 51890, "total_steps": 78105, "loss": 0.1919, "lr": 1.5284211259322569e-06, "epoch": 3.3218103834581654, "percentage": 66.44, "elapsed_time": "2:16:55", "remaining_time": "1:09:10", "throughput": 19880.14, "total_tokens": 163323072} +{"current_steps": 51895, "total_steps": 78105, "loss": 0.1985, "lr": 1.5279064116922964e-06, "epoch": 3.3221304653991424, "percentage": 66.44, "elapsed_time": "2:16:56", "remaining_time": "1:09:09", "throughput": 19880.34, "total_tokens": 163337344} +{"current_steps": 51900, "total_steps": 78105, "loss": 0.1853, "lr": 1.5273917459936255e-06, "epoch": 3.322450547340119, "percentage": 66.45, "elapsed_time": "2:16:56", "remaining_time": "1:09:08", "throughput": 19880.68, "total_tokens": 163353984} +{"current_steps": 51905, "total_steps": 78105, "loss": 0.1524, "lr": 1.526877128861945e-06, "epoch": 3.322770629281096, "percentage": 66.46, "elapsed_time": "2:16:57", "remaining_time": "1:09:07", "throughput": 19880.92, "total_tokens": 163368960} +{"current_steps": 51910, "total_steps": 78105, "loss": 0.2759, "lr": 1.5263625603229515e-06, "epoch": 3.323090711222073, "percentage": 66.46, "elapsed_time": "2:16:58", "remaining_time": "1:09:07", "throughput": 19881.15, "total_tokens": 163383616} +{"current_steps": 51915, "total_steps": 78105, "loss": 0.1216, "lr": 1.5258480404023413e-06, "epoch": 3.32341079316305, "percentage": 66.47, "elapsed_time": "2:16:58", "remaining_time": "1:09:06", "throughput": 19881.38, "total_tokens": 163398464} +{"current_steps": 51920, "total_steps": 78105, "loss": 0.2778, "lr": 1.5253335691258049e-06, "epoch": 3.323730875104027, "percentage": 66.47, "elapsed_time": "2:16:59", "remaining_time": "1:09:05", "throughput": 19881.61, "total_tokens": 163413504} +{"current_steps": 51925, "total_steps": 78105, "loss": 0.1836, "lr": 1.5248191465190326e-06, "epoch": 3.3240509570450034, "percentage": 66.48, "elapsed_time": "2:17:00", "remaining_time": "1:09:04", "throughput": 19881.92, "total_tokens": 163429888} +{"current_steps": 51930, "total_steps": 78105, "loss": 0.1867, "lr": 1.524304772607714e-06, "epoch": 3.3243710389859804, "percentage": 66.49, "elapsed_time": "2:17:00", "remaining_time": "1:09:03", "throughput": 19882.12, "total_tokens": 163444288} +{"current_steps": 51935, "total_steps": 78105, "loss": 0.1265, "lr": 1.5237904474175319e-06, "epoch": 3.3246911209269574, "percentage": 66.49, "elapsed_time": "2:17:01", "remaining_time": "1:09:02", "throughput": 19882.47, "total_tokens": 163461504} +{"current_steps": 51940, "total_steps": 78105, "loss": 0.1699, "lr": 1.5232761709741703e-06, "epoch": 3.325011202867934, "percentage": 66.5, "elapsed_time": "2:17:02", "remaining_time": "1:09:01", "throughput": 19882.79, "total_tokens": 163477952} +{"current_steps": 51945, "total_steps": 78105, "loss": 0.3431, "lr": 1.5227619433033087e-06, "epoch": 3.325331284808911, "percentage": 66.51, "elapsed_time": "2:17:02", "remaining_time": "1:09:01", "throughput": 19883.07, "total_tokens": 163493760} +{"current_steps": 51950, "total_steps": 78105, "loss": 0.1206, "lr": 1.5222477644306265e-06, "epoch": 3.325651366749888, "percentage": 66.51, "elapsed_time": "2:17:03", "remaining_time": "1:09:00", "throughput": 19883.34, "total_tokens": 163509568} +{"current_steps": 51955, "total_steps": 78105, "loss": 0.1551, "lr": 1.5217336343817967e-06, "epoch": 3.325971448690865, "percentage": 66.52, "elapsed_time": "2:17:04", "remaining_time": "1:08:59", "throughput": 19883.65, "total_tokens": 163525888} +{"current_steps": 51960, "total_steps": 78105, "loss": 0.2371, "lr": 1.5212195531824952e-06, "epoch": 3.326291530631842, "percentage": 66.53, "elapsed_time": "2:17:04", "remaining_time": "1:08:58", "throughput": 19883.95, "total_tokens": 163542016} +{"current_steps": 51965, "total_steps": 78105, "loss": 0.1392, "lr": 1.5207055208583893e-06, "epoch": 3.3266116125728185, "percentage": 66.53, "elapsed_time": "2:17:05", "remaining_time": "1:08:57", "throughput": 19884.33, "total_tokens": 163559872} +{"current_steps": 51970, "total_steps": 78105, "loss": 0.2296, "lr": 1.5201915374351498e-06, "epoch": 3.3269316945137954, "percentage": 66.54, "elapsed_time": "2:17:06", "remaining_time": "1:08:56", "throughput": 19884.67, "total_tokens": 163576832} +{"current_steps": 51975, "total_steps": 78105, "loss": 0.1343, "lr": 1.5196776029384402e-06, "epoch": 3.3272517764547724, "percentage": 66.55, "elapsed_time": "2:17:06", "remaining_time": "1:08:56", "throughput": 19884.93, "total_tokens": 163592448} +{"current_steps": 51980, "total_steps": 78105, "loss": 0.1818, "lr": 1.5191637173939254e-06, "epoch": 3.3275718583957494, "percentage": 66.55, "elapsed_time": "2:17:07", "remaining_time": "1:08:55", "throughput": 19885.17, "total_tokens": 163607680} +{"current_steps": 51985, "total_steps": 78105, "loss": 0.102, "lr": 1.5186498808272662e-06, "epoch": 3.327891940336726, "percentage": 66.56, "elapsed_time": "2:17:08", "remaining_time": "1:08:54", "throughput": 19885.43, "total_tokens": 163623104} +{"current_steps": 51990, "total_steps": 78105, "loss": 0.2705, "lr": 1.5181360932641194e-06, "epoch": 3.328212022277703, "percentage": 66.56, "elapsed_time": "2:17:08", "remaining_time": "1:08:53", "throughput": 19885.67, "total_tokens": 163638272} +{"current_steps": 51995, "total_steps": 78105, "loss": 0.1754, "lr": 1.5176223547301423e-06, "epoch": 3.32853210421868, "percentage": 66.57, "elapsed_time": "2:17:09", "remaining_time": "1:08:52", "throughput": 19885.97, "total_tokens": 163654720} +{"current_steps": 52000, "total_steps": 78105, "loss": 0.1243, "lr": 1.517108665250987e-06, "epoch": 3.328852186159657, "percentage": 66.58, "elapsed_time": "2:17:10", "remaining_time": "1:08:51", "throughput": 19886.24, "total_tokens": 163670400} +{"current_steps": 52005, "total_steps": 78105, "loss": 0.1807, "lr": 1.5165950248523065e-06, "epoch": 3.329172268100634, "percentage": 66.58, "elapsed_time": "2:17:11", "remaining_time": "1:08:50", "throughput": 19886.54, "total_tokens": 163686400} +{"current_steps": 52010, "total_steps": 78105, "loss": 0.1709, "lr": 1.5160814335597468e-06, "epoch": 3.3294923500416105, "percentage": 66.59, "elapsed_time": "2:17:11", "remaining_time": "1:08:50", "throughput": 19886.81, "total_tokens": 163702208} +{"current_steps": 52015, "total_steps": 78105, "loss": 0.1957, "lr": 1.5155678913989568e-06, "epoch": 3.3298124319825875, "percentage": 66.6, "elapsed_time": "2:17:12", "remaining_time": "1:08:49", "throughput": 19887.15, "total_tokens": 163719296} +{"current_steps": 52020, "total_steps": 78105, "loss": 0.2161, "lr": 1.5150543983955774e-06, "epoch": 3.3301325139235645, "percentage": 66.6, "elapsed_time": "2:17:13", "remaining_time": "1:08:48", "throughput": 19887.45, "total_tokens": 163735616} +{"current_steps": 52025, "total_steps": 78105, "loss": 0.2133, "lr": 1.514540954575251e-06, "epoch": 3.3304525958645415, "percentage": 66.61, "elapsed_time": "2:17:13", "remaining_time": "1:08:47", "throughput": 19887.65, "total_tokens": 163749952} +{"current_steps": 52030, "total_steps": 78105, "loss": 0.1288, "lr": 1.5140275599636162e-06, "epoch": 3.330772677805518, "percentage": 66.62, "elapsed_time": "2:17:14", "remaining_time": "1:08:46", "throughput": 19887.94, "total_tokens": 163765952} +{"current_steps": 52035, "total_steps": 78105, "loss": 0.1351, "lr": 1.513514214586309e-06, "epoch": 3.331092759746495, "percentage": 66.62, "elapsed_time": "2:17:15", "remaining_time": "1:08:45", "throughput": 19888.17, "total_tokens": 163781056} +{"current_steps": 52040, "total_steps": 78105, "loss": 0.15, "lr": 1.5130009184689648e-06, "epoch": 3.331412841687472, "percentage": 66.63, "elapsed_time": "2:17:15", "remaining_time": "1:08:44", "throughput": 19888.38, "total_tokens": 163795584} +{"current_steps": 52045, "total_steps": 78105, "loss": 0.2191, "lr": 1.5124876716372127e-06, "epoch": 3.331732923628449, "percentage": 66.63, "elapsed_time": "2:17:16", "remaining_time": "1:08:44", "throughput": 19888.66, "total_tokens": 163811392} +{"current_steps": 52050, "total_steps": 78105, "loss": 0.1737, "lr": 1.511974474116683e-06, "epoch": 3.332053005569426, "percentage": 66.64, "elapsed_time": "2:17:17", "remaining_time": "1:08:43", "throughput": 19888.97, "total_tokens": 163827904} +{"current_steps": 52055, "total_steps": 78105, "loss": 0.1451, "lr": 1.5114613259330007e-06, "epoch": 3.3323730875104025, "percentage": 66.65, "elapsed_time": "2:17:17", "remaining_time": "1:08:42", "throughput": 19889.21, "total_tokens": 163843072} +{"current_steps": 52060, "total_steps": 78105, "loss": 0.1819, "lr": 1.510948227111792e-06, "epoch": 3.3326931694513795, "percentage": 66.65, "elapsed_time": "2:17:18", "remaining_time": "1:08:41", "throughput": 19889.46, "total_tokens": 163858240} +{"current_steps": 52065, "total_steps": 78105, "loss": 0.1353, "lr": 1.5104351776786762e-06, "epoch": 3.3330132513923565, "percentage": 66.66, "elapsed_time": "2:17:19", "remaining_time": "1:08:40", "throughput": 19889.75, "total_tokens": 163874176} +{"current_steps": 52070, "total_steps": 78105, "loss": 0.147, "lr": 1.509922177659274e-06, "epoch": 3.3333333333333335, "percentage": 66.67, "elapsed_time": "2:17:19", "remaining_time": "1:08:39", "throughput": 19890.05, "total_tokens": 163890176} +{"current_steps": 52075, "total_steps": 78105, "loss": 0.1087, "lr": 1.5094092270792001e-06, "epoch": 3.33365341527431, "percentage": 66.67, "elapsed_time": "2:17:20", "remaining_time": "1:08:39", "throughput": 19890.3, "total_tokens": 163905472} +{"current_steps": 52080, "total_steps": 78105, "loss": 0.2845, "lr": 1.5088963259640702e-06, "epoch": 3.333973497215287, "percentage": 66.68, "elapsed_time": "2:17:21", "remaining_time": "1:08:38", "throughput": 19890.59, "total_tokens": 163921600} +{"current_steps": 52085, "total_steps": 78105, "loss": 0.1563, "lr": 1.5083834743394948e-06, "epoch": 3.334293579156264, "percentage": 66.69, "elapsed_time": "2:17:21", "remaining_time": "1:08:37", "throughput": 19890.86, "total_tokens": 163937280} +{"current_steps": 52090, "total_steps": 78105, "loss": 0.1064, "lr": 1.5078706722310843e-06, "epoch": 3.334613661097241, "percentage": 66.69, "elapsed_time": "2:17:22", "remaining_time": "1:08:36", "throughput": 19891.13, "total_tokens": 163952960} +{"current_steps": 52095, "total_steps": 78105, "loss": 0.1586, "lr": 1.5073579196644439e-06, "epoch": 3.334933743038218, "percentage": 66.7, "elapsed_time": "2:17:23", "remaining_time": "1:08:35", "throughput": 19891.37, "total_tokens": 163967680} +{"current_steps": 52100, "total_steps": 78105, "loss": 0.1476, "lr": 1.5068452166651784e-06, "epoch": 3.3352538249791945, "percentage": 66.71, "elapsed_time": "2:17:23", "remaining_time": "1:08:34", "throughput": 19891.65, "total_tokens": 163983616} +{"current_steps": 52105, "total_steps": 78105, "loss": 0.1312, "lr": 1.5063325632588905e-06, "epoch": 3.3355739069201715, "percentage": 66.71, "elapsed_time": "2:17:24", "remaining_time": "1:08:33", "throughput": 19891.88, "total_tokens": 163998528} +{"current_steps": 52110, "total_steps": 78105, "loss": 0.1513, "lr": 1.5058199594711779e-06, "epoch": 3.3358939888611485, "percentage": 66.72, "elapsed_time": "2:17:25", "remaining_time": "1:08:33", "throughput": 19892.13, "total_tokens": 164013952} +{"current_steps": 52115, "total_steps": 78105, "loss": 0.1451, "lr": 1.5053074053276384e-06, "epoch": 3.3362140708021255, "percentage": 66.72, "elapsed_time": "2:17:25", "remaining_time": "1:08:32", "throughput": 19892.34, "total_tokens": 164028416} +{"current_steps": 52120, "total_steps": 78105, "loss": 0.1569, "lr": 1.504794900853865e-06, "epoch": 3.336534152743102, "percentage": 66.73, "elapsed_time": "2:17:26", "remaining_time": "1:08:31", "throughput": 19892.57, "total_tokens": 164043456} +{"current_steps": 52125, "total_steps": 78105, "loss": 0.1808, "lr": 1.5042824460754518e-06, "epoch": 3.336854234684079, "percentage": 66.74, "elapsed_time": "2:17:27", "remaining_time": "1:08:30", "throughput": 19892.83, "total_tokens": 164058816} +{"current_steps": 52130, "total_steps": 78105, "loss": 0.2024, "lr": 1.5037700410179856e-06, "epoch": 3.337174316625056, "percentage": 66.74, "elapsed_time": "2:17:27", "remaining_time": "1:08:29", "throughput": 19893.11, "total_tokens": 164074624} +{"current_steps": 52135, "total_steps": 78105, "loss": 0.2193, "lr": 1.5032576857070552e-06, "epoch": 3.337494398566033, "percentage": 66.75, "elapsed_time": "2:17:28", "remaining_time": "1:08:28", "throughput": 19893.36, "total_tokens": 164090112} +{"current_steps": 52140, "total_steps": 78105, "loss": 0.2415, "lr": 1.5027453801682435e-06, "epoch": 3.33781448050701, "percentage": 66.76, "elapsed_time": "2:17:29", "remaining_time": "1:08:27", "throughput": 19893.63, "total_tokens": 164105664} +{"current_steps": 52145, "total_steps": 78105, "loss": 0.157, "lr": 1.5022331244271338e-06, "epoch": 3.3381345624479866, "percentage": 66.76, "elapsed_time": "2:17:29", "remaining_time": "1:08:27", "throughput": 19893.87, "total_tokens": 164120512} +{"current_steps": 52150, "total_steps": 78105, "loss": 0.1663, "lr": 1.5017209185093038e-06, "epoch": 3.3384546443889636, "percentage": 66.77, "elapsed_time": "2:17:30", "remaining_time": "1:08:26", "throughput": 19894.17, "total_tokens": 164136576} +{"current_steps": 52155, "total_steps": 78105, "loss": 0.2125, "lr": 1.5012087624403313e-06, "epoch": 3.3387747263299405, "percentage": 66.78, "elapsed_time": "2:17:31", "remaining_time": "1:08:25", "throughput": 19894.42, "total_tokens": 164151552} +{"current_steps": 52160, "total_steps": 78105, "loss": 0.2435, "lr": 1.5006966562457916e-06, "epoch": 3.3390948082709175, "percentage": 66.78, "elapsed_time": "2:17:31", "remaining_time": "1:08:24", "throughput": 19894.79, "total_tokens": 164169088} +{"current_steps": 52165, "total_steps": 78105, "loss": 0.119, "lr": 1.5001845999512548e-06, "epoch": 3.339414890211894, "percentage": 66.79, "elapsed_time": "2:17:32", "remaining_time": "1:08:23", "throughput": 19895.05, "total_tokens": 164184448} +{"current_steps": 52170, "total_steps": 78105, "loss": 0.1432, "lr": 1.4996725935822918e-06, "epoch": 3.339734972152871, "percentage": 66.79, "elapsed_time": "2:17:33", "remaining_time": "1:08:22", "throughput": 19895.39, "total_tokens": 164201664} +{"current_steps": 52175, "total_steps": 78105, "loss": 0.1176, "lr": 1.4991606371644684e-06, "epoch": 3.340055054093848, "percentage": 66.8, "elapsed_time": "2:17:33", "remaining_time": "1:08:22", "throughput": 19895.64, "total_tokens": 164216640} +{"current_steps": 52180, "total_steps": 78105, "loss": 0.2017, "lr": 1.4986487307233506e-06, "epoch": 3.340375136034825, "percentage": 66.81, "elapsed_time": "2:17:34", "remaining_time": "1:08:21", "throughput": 19895.86, "total_tokens": 164231616} +{"current_steps": 52185, "total_steps": 78105, "loss": 0.1749, "lr": 1.498136874284498e-06, "epoch": 3.340695217975802, "percentage": 66.81, "elapsed_time": "2:17:35", "remaining_time": "1:08:20", "throughput": 19896.13, "total_tokens": 164247040} +{"current_steps": 52190, "total_steps": 78105, "loss": 0.1614, "lr": 1.4976250678734727e-06, "epoch": 3.3410152999167786, "percentage": 66.82, "elapsed_time": "2:17:35", "remaining_time": "1:08:19", "throughput": 19896.36, "total_tokens": 164262080} +{"current_steps": 52195, "total_steps": 78105, "loss": 0.1396, "lr": 1.4971133115158297e-06, "epoch": 3.3413353818577556, "percentage": 66.83, "elapsed_time": "2:17:36", "remaining_time": "1:08:18", "throughput": 19896.61, "total_tokens": 164277312} +{"current_steps": 52200, "total_steps": 78105, "loss": 0.2273, "lr": 1.4966016052371241e-06, "epoch": 3.3416554637987326, "percentage": 66.83, "elapsed_time": "2:17:37", "remaining_time": "1:08:17", "throughput": 19896.89, "total_tokens": 164293312} +{"current_steps": 52205, "total_steps": 78105, "loss": 0.2192, "lr": 1.4960899490629073e-06, "epoch": 3.341975545739709, "percentage": 66.84, "elapsed_time": "2:17:37", "remaining_time": "1:08:16", "throughput": 19897.17, "total_tokens": 164309440} +{"current_steps": 52210, "total_steps": 78105, "loss": 0.1269, "lr": 1.4955783430187305e-06, "epoch": 3.342295627680686, "percentage": 66.85, "elapsed_time": "2:17:38", "remaining_time": "1:08:16", "throughput": 19897.4, "total_tokens": 164323968} +{"current_steps": 52215, "total_steps": 78105, "loss": 0.2158, "lr": 1.4950667871301384e-06, "epoch": 3.342615709621663, "percentage": 66.85, "elapsed_time": "2:17:39", "remaining_time": "1:08:15", "throughput": 19897.63, "total_tokens": 164338752} +{"current_steps": 52220, "total_steps": 78105, "loss": 0.1074, "lr": 1.494555281422676e-06, "epoch": 3.34293579156264, "percentage": 66.86, "elapsed_time": "2:17:39", "remaining_time": "1:08:14", "throughput": 19897.9, "total_tokens": 164354560} +{"current_steps": 52225, "total_steps": 78105, "loss": 0.2454, "lr": 1.4940438259218868e-06, "epoch": 3.343255873503617, "percentage": 66.87, "elapsed_time": "2:17:40", "remaining_time": "1:08:13", "throughput": 19898.15, "total_tokens": 164369920} +{"current_steps": 52230, "total_steps": 78105, "loss": 0.1124, "lr": 1.493532420653308e-06, "epoch": 3.3435759554445936, "percentage": 66.87, "elapsed_time": "2:17:41", "remaining_time": "1:08:12", "throughput": 19898.37, "total_tokens": 164384640} +{"current_steps": 52235, "total_steps": 78105, "loss": 0.2178, "lr": 1.4930210656424794e-06, "epoch": 3.3438960373855706, "percentage": 66.88, "elapsed_time": "2:17:41", "remaining_time": "1:08:11", "throughput": 19898.64, "total_tokens": 164400448} +{"current_steps": 52240, "total_steps": 78105, "loss": 0.1355, "lr": 1.4925097609149319e-06, "epoch": 3.3442161193265476, "percentage": 66.88, "elapsed_time": "2:17:42", "remaining_time": "1:08:10", "throughput": 19898.9, "total_tokens": 164416064} +{"current_steps": 52245, "total_steps": 78105, "loss": 0.1537, "lr": 1.4919985064962003e-06, "epoch": 3.3445362012675246, "percentage": 66.89, "elapsed_time": "2:17:43", "remaining_time": "1:08:10", "throughput": 19899.15, "total_tokens": 164431680} +{"current_steps": 52250, "total_steps": 78105, "loss": 0.2147, "lr": 1.491487302411812e-06, "epoch": 3.344856283208501, "percentage": 66.9, "elapsed_time": "2:17:43", "remaining_time": "1:08:09", "throughput": 19899.37, "total_tokens": 164446464} +{"current_steps": 52255, "total_steps": 78105, "loss": 0.1536, "lr": 1.4909761486872947e-06, "epoch": 3.345176365149478, "percentage": 66.9, "elapsed_time": "2:17:44", "remaining_time": "1:08:08", "throughput": 19899.68, "total_tokens": 164462656} +{"current_steps": 52260, "total_steps": 78105, "loss": 0.1829, "lr": 1.4904650453481728e-06, "epoch": 3.345496447090455, "percentage": 66.91, "elapsed_time": "2:17:45", "remaining_time": "1:08:07", "throughput": 19899.89, "total_tokens": 164477248} +{"current_steps": 52265, "total_steps": 78105, "loss": 0.119, "lr": 1.4899539924199688e-06, "epoch": 3.345816529031432, "percentage": 66.92, "elapsed_time": "2:17:45", "remaining_time": "1:08:06", "throughput": 19900.23, "total_tokens": 164494272} +{"current_steps": 52270, "total_steps": 78105, "loss": 0.1539, "lr": 1.4894429899282004e-06, "epoch": 3.346136610972409, "percentage": 66.92, "elapsed_time": "2:17:46", "remaining_time": "1:08:05", "throughput": 19900.47, "total_tokens": 164509312} +{"current_steps": 52275, "total_steps": 78105, "loss": 0.0911, "lr": 1.488932037898385e-06, "epoch": 3.3464566929133857, "percentage": 66.93, "elapsed_time": "2:17:47", "remaining_time": "1:08:05", "throughput": 19900.75, "total_tokens": 164525120} +{"current_steps": 52280, "total_steps": 78105, "loss": 0.2595, "lr": 1.488421136356038e-06, "epoch": 3.3467767748543626, "percentage": 66.94, "elapsed_time": "2:17:47", "remaining_time": "1:08:04", "throughput": 19901.08, "total_tokens": 164542016} +{"current_steps": 52285, "total_steps": 78105, "loss": 0.1644, "lr": 1.4879102853266696e-06, "epoch": 3.3470968567953396, "percentage": 66.94, "elapsed_time": "2:17:48", "remaining_time": "1:08:03", "throughput": 19901.32, "total_tokens": 164557056} +{"current_steps": 52290, "total_steps": 78105, "loss": 0.1882, "lr": 1.4873994848357909e-06, "epoch": 3.3474169387363166, "percentage": 66.95, "elapsed_time": "2:17:49", "remaining_time": "1:08:02", "throughput": 19901.6, "total_tokens": 164572608} +{"current_steps": 52295, "total_steps": 78105, "loss": 0.1258, "lr": 1.4868887349089065e-06, "epoch": 3.347737020677293, "percentage": 66.95, "elapsed_time": "2:17:50", "remaining_time": "1:08:01", "throughput": 19901.93, "total_tokens": 164589184} +{"current_steps": 52300, "total_steps": 78105, "loss": 0.1126, "lr": 1.4863780355715225e-06, "epoch": 3.34805710261827, "percentage": 66.96, "elapsed_time": "2:17:50", "remaining_time": "1:08:00", "throughput": 19902.16, "total_tokens": 164604160} +{"current_steps": 52305, "total_steps": 78105, "loss": 0.2105, "lr": 1.4858673868491386e-06, "epoch": 3.348377184559247, "percentage": 66.97, "elapsed_time": "2:17:51", "remaining_time": "1:07:59", "throughput": 19902.42, "total_tokens": 164619712} +{"current_steps": 52310, "total_steps": 78105, "loss": 0.2556, "lr": 1.4853567887672555e-06, "epoch": 3.348697266500224, "percentage": 66.97, "elapsed_time": "2:17:52", "remaining_time": "1:07:59", "throughput": 19902.7, "total_tokens": 164635904} +{"current_steps": 52315, "total_steps": 78105, "loss": 0.1156, "lr": 1.4848462413513686e-06, "epoch": 3.349017348441201, "percentage": 66.98, "elapsed_time": "2:17:52", "remaining_time": "1:07:58", "throughput": 19903.02, "total_tokens": 164652288} +{"current_steps": 52320, "total_steps": 78105, "loss": 0.1689, "lr": 1.484335744626974e-06, "epoch": 3.3493374303821777, "percentage": 66.99, "elapsed_time": "2:17:53", "remaining_time": "1:07:57", "throughput": 19903.26, "total_tokens": 164667776} +{"current_steps": 52325, "total_steps": 78105, "loss": 0.1575, "lr": 1.4838252986195605e-06, "epoch": 3.3496575123231547, "percentage": 66.99, "elapsed_time": "2:17:54", "remaining_time": "1:07:56", "throughput": 19903.5, "total_tokens": 164682752} +{"current_steps": 52330, "total_steps": 78105, "loss": 0.1663, "lr": 1.483314903354619e-06, "epoch": 3.3499775942641317, "percentage": 67.0, "elapsed_time": "2:17:54", "remaining_time": "1:07:55", "throughput": 19903.79, "total_tokens": 164698688} +{"current_steps": 52335, "total_steps": 78105, "loss": 0.1392, "lr": 1.4828045588576361e-06, "epoch": 3.3502976762051087, "percentage": 67.01, "elapsed_time": "2:17:55", "remaining_time": "1:07:54", "throughput": 19904.05, "total_tokens": 164714560} +{"current_steps": 52340, "total_steps": 78105, "loss": 0.1694, "lr": 1.4822942651540944e-06, "epoch": 3.350617758146085, "percentage": 67.01, "elapsed_time": "2:17:56", "remaining_time": "1:07:54", "throughput": 19904.29, "total_tokens": 164729920} +{"current_steps": 52345, "total_steps": 78105, "loss": 0.1632, "lr": 1.4817840222694768e-06, "epoch": 3.350937840087062, "percentage": 67.02, "elapsed_time": "2:17:56", "remaining_time": "1:07:53", "throughput": 19904.59, "total_tokens": 164745920} +{"current_steps": 52350, "total_steps": 78105, "loss": 0.1718, "lr": 1.4812738302292605e-06, "epoch": 3.351257922028039, "percentage": 67.03, "elapsed_time": "2:17:57", "remaining_time": "1:07:52", "throughput": 19904.84, "total_tokens": 164761344} +{"current_steps": 52355, "total_steps": 78105, "loss": 0.186, "lr": 1.4807636890589238e-06, "epoch": 3.351578003969016, "percentage": 67.03, "elapsed_time": "2:17:58", "remaining_time": "1:07:51", "throughput": 19905.13, "total_tokens": 164777728} +{"current_steps": 52360, "total_steps": 78105, "loss": 0.1383, "lr": 1.480253598783939e-06, "epoch": 3.351898085909993, "percentage": 67.04, "elapsed_time": "2:17:58", "remaining_time": "1:07:50", "throughput": 19905.39, "total_tokens": 164792896} +{"current_steps": 52365, "total_steps": 78105, "loss": 0.1454, "lr": 1.4797435594297777e-06, "epoch": 3.3522181678509697, "percentage": 67.04, "elapsed_time": "2:17:59", "remaining_time": "1:07:49", "throughput": 19905.69, "total_tokens": 164809216} +{"current_steps": 52370, "total_steps": 78105, "loss": 0.1029, "lr": 1.4792335710219086e-06, "epoch": 3.3525382497919467, "percentage": 67.05, "elapsed_time": "2:18:00", "remaining_time": "1:07:48", "throughput": 19905.9, "total_tokens": 164824000} +{"current_steps": 52375, "total_steps": 78105, "loss": 0.1583, "lr": 1.4787236335857984e-06, "epoch": 3.3528583317329237, "percentage": 67.06, "elapsed_time": "2:18:00", "remaining_time": "1:07:48", "throughput": 19906.12, "total_tokens": 164838784} +{"current_steps": 52380, "total_steps": 78105, "loss": 0.1767, "lr": 1.47821374714691e-06, "epoch": 3.3531784136739007, "percentage": 67.06, "elapsed_time": "2:18:01", "remaining_time": "1:07:47", "throughput": 19906.34, "total_tokens": 164853376} +{"current_steps": 52385, "total_steps": 78105, "loss": 0.155, "lr": 1.4777039117307054e-06, "epoch": 3.3534984956148772, "percentage": 67.07, "elapsed_time": "2:18:02", "remaining_time": "1:07:46", "throughput": 19906.56, "total_tokens": 164868416} +{"current_steps": 52390, "total_steps": 78105, "loss": 0.1631, "lr": 1.4771941273626417e-06, "epoch": 3.353818577555854, "percentage": 67.08, "elapsed_time": "2:18:02", "remaining_time": "1:07:45", "throughput": 19906.8, "total_tokens": 164883392} +{"current_steps": 52395, "total_steps": 78105, "loss": 0.1535, "lr": 1.4766843940681755e-06, "epoch": 3.354138659496831, "percentage": 67.08, "elapsed_time": "2:18:03", "remaining_time": "1:07:44", "throughput": 19907.13, "total_tokens": 164900224} +{"current_steps": 52400, "total_steps": 78105, "loss": 0.1386, "lr": 1.476174711872761e-06, "epoch": 3.354458741437808, "percentage": 67.09, "elapsed_time": "2:18:04", "remaining_time": "1:07:43", "throughput": 19907.37, "total_tokens": 164915456} +{"current_steps": 52405, "total_steps": 78105, "loss": 0.158, "lr": 1.475665080801848e-06, "epoch": 3.354778823378785, "percentage": 67.1, "elapsed_time": "2:18:04", "remaining_time": "1:07:42", "throughput": 19907.69, "total_tokens": 164932096} +{"current_steps": 52410, "total_steps": 78105, "loss": 0.1584, "lr": 1.475155500880886e-06, "epoch": 3.3550989053197617, "percentage": 67.1, "elapsed_time": "2:18:05", "remaining_time": "1:07:42", "throughput": 19908.12, "total_tokens": 164951168} +{"current_steps": 52415, "total_steps": 78105, "loss": 0.1488, "lr": 1.4746459721353196e-06, "epoch": 3.3554189872607387, "percentage": 67.11, "elapsed_time": "2:18:06", "remaining_time": "1:07:41", "throughput": 19908.33, "total_tokens": 164965568} +{"current_steps": 52420, "total_steps": 78105, "loss": 0.1788, "lr": 1.4741364945905925e-06, "epoch": 3.3557390692017157, "percentage": 67.11, "elapsed_time": "2:18:06", "remaining_time": "1:07:40", "throughput": 19908.61, "total_tokens": 164981504} +{"current_steps": 52425, "total_steps": 78105, "loss": 0.2274, "lr": 1.4736270682721454e-06, "epoch": 3.3560591511426927, "percentage": 67.12, "elapsed_time": "2:18:07", "remaining_time": "1:07:39", "throughput": 19908.95, "total_tokens": 164998400} +{"current_steps": 52430, "total_steps": 78105, "loss": 0.1673, "lr": 1.4731176932054163e-06, "epoch": 3.3563792330836693, "percentage": 67.13, "elapsed_time": "2:18:08", "remaining_time": "1:07:38", "throughput": 19909.15, "total_tokens": 165012736} +{"current_steps": 52435, "total_steps": 78105, "loss": 0.1393, "lr": 1.4726083694158407e-06, "epoch": 3.3566993150246462, "percentage": 67.13, "elapsed_time": "2:18:08", "remaining_time": "1:07:37", "throughput": 19909.39, "total_tokens": 165027968} +{"current_steps": 52440, "total_steps": 78105, "loss": 0.167, "lr": 1.4720990969288523e-06, "epoch": 3.3570193969656232, "percentage": 67.14, "elapsed_time": "2:18:09", "remaining_time": "1:07:37", "throughput": 19909.7, "total_tokens": 165044096} +{"current_steps": 52445, "total_steps": 78105, "loss": 0.1592, "lr": 1.47158987576988e-06, "epoch": 3.3573394789066002, "percentage": 67.15, "elapsed_time": "2:18:10", "remaining_time": "1:07:36", "throughput": 19909.99, "total_tokens": 165060096} +{"current_steps": 52450, "total_steps": 78105, "loss": 0.1531, "lr": 1.4710807059643523e-06, "epoch": 3.357659560847577, "percentage": 67.15, "elapsed_time": "2:18:11", "remaining_time": "1:07:35", "throughput": 19910.25, "total_tokens": 165075904} +{"current_steps": 52455, "total_steps": 78105, "loss": 0.1206, "lr": 1.4705715875376956e-06, "epoch": 3.3579796427885538, "percentage": 67.16, "elapsed_time": "2:18:11", "remaining_time": "1:07:34", "throughput": 19910.47, "total_tokens": 165090432} +{"current_steps": 52460, "total_steps": 78105, "loss": 0.1642, "lr": 1.4700625205153307e-06, "epoch": 3.3582997247295308, "percentage": 67.17, "elapsed_time": "2:18:12", "remaining_time": "1:07:33", "throughput": 19910.7, "total_tokens": 165105344} +{"current_steps": 52465, "total_steps": 78105, "loss": 0.1409, "lr": 1.46955350492268e-06, "epoch": 3.3586198066705077, "percentage": 67.17, "elapsed_time": "2:18:12", "remaining_time": "1:07:32", "throughput": 19910.97, "total_tokens": 165120896} +{"current_steps": 52470, "total_steps": 78105, "loss": 0.1729, "lr": 1.469044540785159e-06, "epoch": 3.3589398886114843, "percentage": 67.18, "elapsed_time": "2:18:13", "remaining_time": "1:07:31", "throughput": 19911.28, "total_tokens": 165137408} +{"current_steps": 52475, "total_steps": 78105, "loss": 0.1621, "lr": 1.4685356281281845e-06, "epoch": 3.3592599705524613, "percentage": 67.19, "elapsed_time": "2:18:14", "remaining_time": "1:07:31", "throughput": 19911.53, "total_tokens": 165152896} +{"current_steps": 52480, "total_steps": 78105, "loss": 0.2257, "lr": 1.468026766977167e-06, "epoch": 3.3595800524934383, "percentage": 67.19, "elapsed_time": "2:18:15", "remaining_time": "1:07:30", "throughput": 19911.84, "total_tokens": 165169600} +{"current_steps": 52485, "total_steps": 78105, "loss": 0.1312, "lr": 1.467517957357518e-06, "epoch": 3.3599001344344153, "percentage": 67.2, "elapsed_time": "2:18:15", "remaining_time": "1:07:29", "throughput": 19912.15, "total_tokens": 165185920} +{"current_steps": 52490, "total_steps": 78105, "loss": 0.1837, "lr": 1.4670091992946433e-06, "epoch": 3.3602202163753923, "percentage": 67.2, "elapsed_time": "2:18:16", "remaining_time": "1:07:28", "throughput": 19912.41, "total_tokens": 165201728} +{"current_steps": 52495, "total_steps": 78105, "loss": 0.1726, "lr": 1.4665004928139501e-06, "epoch": 3.360540298316369, "percentage": 67.21, "elapsed_time": "2:18:17", "remaining_time": "1:07:27", "throughput": 19912.68, "total_tokens": 165217216} +{"current_steps": 52500, "total_steps": 78105, "loss": 0.1053, "lr": 1.4659918379408378e-06, "epoch": 3.360860380257346, "percentage": 67.22, "elapsed_time": "2:18:17", "remaining_time": "1:07:26", "throughput": 19912.97, "total_tokens": 165233088} +{"current_steps": 52505, "total_steps": 78105, "loss": 0.1588, "lr": 1.4654832347007069e-06, "epoch": 3.361180462198323, "percentage": 67.22, "elapsed_time": "2:18:18", "remaining_time": "1:07:26", "throughput": 19913.27, "total_tokens": 165249472} +{"current_steps": 52510, "total_steps": 78105, "loss": 0.1919, "lr": 1.4649746831189553e-06, "epoch": 3.3615005441392998, "percentage": 67.23, "elapsed_time": "2:18:19", "remaining_time": "1:07:25", "throughput": 19913.55, "total_tokens": 165265664} +{"current_steps": 52515, "total_steps": 78105, "loss": 0.2331, "lr": 1.4644661832209767e-06, "epoch": 3.3618206260802763, "percentage": 67.24, "elapsed_time": "2:18:19", "remaining_time": "1:07:24", "throughput": 19913.83, "total_tokens": 165281856} +{"current_steps": 52520, "total_steps": 78105, "loss": 0.1156, "lr": 1.4639577350321635e-06, "epoch": 3.3621407080212533, "percentage": 67.24, "elapsed_time": "2:18:20", "remaining_time": "1:07:23", "throughput": 19914.09, "total_tokens": 165297280} +{"current_steps": 52525, "total_steps": 78105, "loss": 0.1516, "lr": 1.4634493385779036e-06, "epoch": 3.3624607899622303, "percentage": 67.25, "elapsed_time": "2:18:21", "remaining_time": "1:07:22", "throughput": 19914.4, "total_tokens": 165313664} +{"current_steps": 52530, "total_steps": 78105, "loss": 0.1599, "lr": 1.4629409938835853e-06, "epoch": 3.3627808719032073, "percentage": 67.26, "elapsed_time": "2:18:21", "remaining_time": "1:07:21", "throughput": 19914.65, "total_tokens": 165328960} +{"current_steps": 52535, "total_steps": 78105, "loss": 0.139, "lr": 1.4624327009745915e-06, "epoch": 3.3631009538441843, "percentage": 67.26, "elapsed_time": "2:18:22", "remaining_time": "1:07:21", "throughput": 19914.95, "total_tokens": 165344960} +{"current_steps": 52540, "total_steps": 78105, "loss": 0.1452, "lr": 1.4619244598763041e-06, "epoch": 3.363421035785161, "percentage": 67.27, "elapsed_time": "2:18:23", "remaining_time": "1:07:20", "throughput": 19915.27, "total_tokens": 165361536} +{"current_steps": 52545, "total_steps": 78105, "loss": 0.15, "lr": 1.4614162706141017e-06, "epoch": 3.363741117726138, "percentage": 67.27, "elapsed_time": "2:18:23", "remaining_time": "1:07:19", "throughput": 19915.49, "total_tokens": 165376320} +{"current_steps": 52550, "total_steps": 78105, "loss": 0.1734, "lr": 1.4609081332133621e-06, "epoch": 3.364061199667115, "percentage": 67.28, "elapsed_time": "2:18:24", "remaining_time": "1:07:18", "throughput": 19915.76, "total_tokens": 165392128} +{"current_steps": 52555, "total_steps": 78105, "loss": 0.1577, "lr": 1.4604000476994568e-06, "epoch": 3.364381281608092, "percentage": 67.29, "elapsed_time": "2:18:25", "remaining_time": "1:07:17", "throughput": 19916.07, "total_tokens": 165408704} +{"current_steps": 52560, "total_steps": 78105, "loss": 0.1585, "lr": 1.459892014097759e-06, "epoch": 3.3647013635490683, "percentage": 67.29, "elapsed_time": "2:18:25", "remaining_time": "1:07:16", "throughput": 19916.31, "total_tokens": 165423872} +{"current_steps": 52565, "total_steps": 78105, "loss": 0.1832, "lr": 1.459384032433635e-06, "epoch": 3.3650214454900453, "percentage": 67.3, "elapsed_time": "2:18:26", "remaining_time": "1:07:15", "throughput": 19916.57, "total_tokens": 165439168} +{"current_steps": 52570, "total_steps": 78105, "loss": 0.2009, "lr": 1.458876102732452e-06, "epoch": 3.3653415274310223, "percentage": 67.31, "elapsed_time": "2:18:27", "remaining_time": "1:07:15", "throughput": 19916.84, "total_tokens": 165455040} +{"current_steps": 52575, "total_steps": 78105, "loss": 0.1912, "lr": 1.4583682250195753e-06, "epoch": 3.3656616093719993, "percentage": 67.31, "elapsed_time": "2:18:27", "remaining_time": "1:07:14", "throughput": 19917.11, "total_tokens": 165470656} +{"current_steps": 52580, "total_steps": 78105, "loss": 0.1539, "lr": 1.4578603993203617e-06, "epoch": 3.3659816913129763, "percentage": 67.32, "elapsed_time": "2:18:28", "remaining_time": "1:07:13", "throughput": 19917.08, "total_tokens": 165486080} +{"current_steps": 52585, "total_steps": 78105, "loss": 0.167, "lr": 1.4573526256601728e-06, "epoch": 3.366301773253953, "percentage": 67.33, "elapsed_time": "2:18:29", "remaining_time": "1:07:12", "throughput": 19917.36, "total_tokens": 165501568} +{"current_steps": 52590, "total_steps": 78105, "loss": 0.1771, "lr": 1.4568449040643622e-06, "epoch": 3.36662185519493, "percentage": 67.33, "elapsed_time": "2:18:30", "remaining_time": "1:07:11", "throughput": 19917.6, "total_tokens": 165516800} +{"current_steps": 52595, "total_steps": 78105, "loss": 0.1523, "lr": 1.4563372345582838e-06, "epoch": 3.366941937135907, "percentage": 67.34, "elapsed_time": "2:18:30", "remaining_time": "1:07:10", "throughput": 19917.91, "total_tokens": 165533248} +{"current_steps": 52600, "total_steps": 78105, "loss": 0.2097, "lr": 1.4558296171672878e-06, "epoch": 3.367262019076884, "percentage": 67.35, "elapsed_time": "2:18:31", "remaining_time": "1:07:10", "throughput": 19918.14, "total_tokens": 165548288} +{"current_steps": 52605, "total_steps": 78105, "loss": 0.2918, "lr": 1.4553220519167216e-06, "epoch": 3.3675821010178604, "percentage": 67.35, "elapsed_time": "2:18:32", "remaining_time": "1:07:09", "throughput": 19918.39, "total_tokens": 165563456} +{"current_steps": 52610, "total_steps": 78105, "loss": 0.1531, "lr": 1.4548145388319306e-06, "epoch": 3.3679021829588374, "percentage": 67.36, "elapsed_time": "2:18:32", "remaining_time": "1:07:08", "throughput": 19918.76, "total_tokens": 165581248} +{"current_steps": 52615, "total_steps": 78105, "loss": 0.2317, "lr": 1.454307077938258e-06, "epoch": 3.3682222648998144, "percentage": 67.36, "elapsed_time": "2:18:33", "remaining_time": "1:07:07", "throughput": 19919.04, "total_tokens": 165597824} +{"current_steps": 52620, "total_steps": 78105, "loss": 0.1681, "lr": 1.4537996692610413e-06, "epoch": 3.3685423468407913, "percentage": 67.37, "elapsed_time": "2:18:34", "remaining_time": "1:07:06", "throughput": 19919.35, "total_tokens": 165614016} +{"current_steps": 52625, "total_steps": 78105, "loss": 0.1565, "lr": 1.453292312825621e-06, "epoch": 3.3688624287817683, "percentage": 67.38, "elapsed_time": "2:18:34", "remaining_time": "1:07:05", "throughput": 19919.61, "total_tokens": 165629568} +{"current_steps": 52630, "total_steps": 78105, "loss": 0.1623, "lr": 1.4527850086573303e-06, "epoch": 3.369182510722745, "percentage": 67.38, "elapsed_time": "2:18:35", "remaining_time": "1:07:05", "throughput": 19919.87, "total_tokens": 165645120} +{"current_steps": 52635, "total_steps": 78105, "loss": 0.1302, "lr": 1.4522777567815017e-06, "epoch": 3.369502592663722, "percentage": 67.39, "elapsed_time": "2:18:36", "remaining_time": "1:07:04", "throughput": 19920.13, "total_tokens": 165660352} +{"current_steps": 52640, "total_steps": 78105, "loss": 0.1674, "lr": 1.4517705572234647e-06, "epoch": 3.369822674604699, "percentage": 67.4, "elapsed_time": "2:18:36", "remaining_time": "1:07:03", "throughput": 19920.41, "total_tokens": 165676160} +{"current_steps": 52645, "total_steps": 78105, "loss": 0.1841, "lr": 1.4512634100085449e-06, "epoch": 3.370142756545676, "percentage": 67.4, "elapsed_time": "2:18:37", "remaining_time": "1:07:02", "throughput": 19920.69, "total_tokens": 165691968} +{"current_steps": 52650, "total_steps": 78105, "loss": 0.1902, "lr": 1.4507563151620696e-06, "epoch": 3.3704628384866524, "percentage": 67.41, "elapsed_time": "2:18:38", "remaining_time": "1:07:01", "throughput": 19920.98, "total_tokens": 165707968} +{"current_steps": 52655, "total_steps": 78105, "loss": 0.1807, "lr": 1.4502492727093565e-06, "epoch": 3.3707829204276294, "percentage": 67.42, "elapsed_time": "2:18:38", "remaining_time": "1:07:00", "throughput": 19921.23, "total_tokens": 165723456} +{"current_steps": 52660, "total_steps": 78105, "loss": 0.1911, "lr": 1.449742282675729e-06, "epoch": 3.3711030023686064, "percentage": 67.42, "elapsed_time": "2:18:39", "remaining_time": "1:06:59", "throughput": 19921.46, "total_tokens": 165738432} +{"current_steps": 52665, "total_steps": 78105, "loss": 0.1701, "lr": 1.4492353450864992e-06, "epoch": 3.3714230843095834, "percentage": 67.43, "elapsed_time": "2:18:40", "remaining_time": "1:06:59", "throughput": 19921.75, "total_tokens": 165754624} +{"current_steps": 52670, "total_steps": 78105, "loss": 0.1334, "lr": 1.4487284599669842e-06, "epoch": 3.3717431662505604, "percentage": 67.43, "elapsed_time": "2:18:40", "remaining_time": "1:06:58", "throughput": 19922.02, "total_tokens": 165770432} +{"current_steps": 52675, "total_steps": 78105, "loss": 0.1972, "lr": 1.448221627342494e-06, "epoch": 3.372063248191537, "percentage": 67.44, "elapsed_time": "2:18:41", "remaining_time": "1:06:57", "throughput": 19922.31, "total_tokens": 165786368} +{"current_steps": 52680, "total_steps": 78105, "loss": 0.162, "lr": 1.4477148472383363e-06, "epoch": 3.372383330132514, "percentage": 67.45, "elapsed_time": "2:18:42", "remaining_time": "1:06:56", "throughput": 19922.54, "total_tokens": 165801088} +{"current_steps": 52685, "total_steps": 78105, "loss": 0.249, "lr": 1.44720811967982e-06, "epoch": 3.372703412073491, "percentage": 67.45, "elapsed_time": "2:18:42", "remaining_time": "1:06:55", "throughput": 19922.75, "total_tokens": 165815744} +{"current_steps": 52690, "total_steps": 78105, "loss": 0.1416, "lr": 1.446701444692244e-06, "epoch": 3.373023494014468, "percentage": 67.46, "elapsed_time": "2:18:43", "remaining_time": "1:06:54", "throughput": 19923.02, "total_tokens": 165831104} +{"current_steps": 52695, "total_steps": 78105, "loss": 0.1625, "lr": 1.4461948223009126e-06, "epoch": 3.3733435759554444, "percentage": 67.47, "elapsed_time": "2:18:44", "remaining_time": "1:06:54", "throughput": 19923.23, "total_tokens": 165845760} +{"current_steps": 52700, "total_steps": 78105, "loss": 0.1882, "lr": 1.4456882525311228e-06, "epoch": 3.3736636578964214, "percentage": 67.47, "elapsed_time": "2:18:44", "remaining_time": "1:06:53", "throughput": 19923.52, "total_tokens": 165861952} +{"current_steps": 52705, "total_steps": 78105, "loss": 0.2035, "lr": 1.4451817354081698e-06, "epoch": 3.3739837398373984, "percentage": 67.48, "elapsed_time": "2:18:45", "remaining_time": "1:06:52", "throughput": 19923.8, "total_tokens": 165877760} +{"current_steps": 52710, "total_steps": 78105, "loss": 0.1695, "lr": 1.444675270957347e-06, "epoch": 3.3743038217783754, "percentage": 67.49, "elapsed_time": "2:18:46", "remaining_time": "1:06:51", "throughput": 19924.02, "total_tokens": 165892288} +{"current_steps": 52715, "total_steps": 78105, "loss": 0.1716, "lr": 1.4441688592039438e-06, "epoch": 3.3746239037193524, "percentage": 67.49, "elapsed_time": "2:18:46", "remaining_time": "1:06:50", "throughput": 19924.25, "total_tokens": 165906880} +{"current_steps": 52720, "total_steps": 78105, "loss": 0.2366, "lr": 1.4436625001732483e-06, "epoch": 3.374943985660329, "percentage": 67.5, "elapsed_time": "2:18:47", "remaining_time": "1:06:49", "throughput": 19924.55, "total_tokens": 165923456} +{"current_steps": 52725, "total_steps": 78105, "loss": 0.1386, "lr": 1.4431561938905453e-06, "epoch": 3.375264067601306, "percentage": 67.51, "elapsed_time": "2:18:48", "remaining_time": "1:06:48", "throughput": 19924.84, "total_tokens": 165939584} +{"current_steps": 52730, "total_steps": 78105, "loss": 0.1757, "lr": 1.4426499403811156e-06, "epoch": 3.375584149542283, "percentage": 67.51, "elapsed_time": "2:18:48", "remaining_time": "1:06:48", "throughput": 19925.12, "total_tokens": 165955392} +{"current_steps": 52735, "total_steps": 78105, "loss": 0.1456, "lr": 1.442143739670243e-06, "epoch": 3.3759042314832595, "percentage": 67.52, "elapsed_time": "2:18:49", "remaining_time": "1:06:47", "throughput": 19925.43, "total_tokens": 165971840} +{"current_steps": 52740, "total_steps": 78105, "loss": 0.1885, "lr": 1.4416375917831995e-06, "epoch": 3.3762243134242365, "percentage": 67.52, "elapsed_time": "2:18:50", "remaining_time": "1:06:46", "throughput": 19925.69, "total_tokens": 165987392} +{"current_steps": 52745, "total_steps": 78105, "loss": 0.1549, "lr": 1.441131496745263e-06, "epoch": 3.3765443953652134, "percentage": 67.53, "elapsed_time": "2:18:50", "remaining_time": "1:06:45", "throughput": 19925.91, "total_tokens": 166002432} +{"current_steps": 52750, "total_steps": 78105, "loss": 0.1498, "lr": 1.4406254545817041e-06, "epoch": 3.3768644773061904, "percentage": 67.54, "elapsed_time": "2:18:51", "remaining_time": "1:06:44", "throughput": 19926.15, "total_tokens": 166017856} +{"current_steps": 52755, "total_steps": 78105, "loss": 0.1363, "lr": 1.440119465317792e-06, "epoch": 3.3771845592471674, "percentage": 67.54, "elapsed_time": "2:18:52", "remaining_time": "1:06:43", "throughput": 19926.34, "total_tokens": 166032192} +{"current_steps": 52760, "total_steps": 78105, "loss": 0.1525, "lr": 1.4396135289787926e-06, "epoch": 3.377504641188144, "percentage": 67.55, "elapsed_time": "2:18:52", "remaining_time": "1:06:43", "throughput": 19926.57, "total_tokens": 166047424} +{"current_steps": 52765, "total_steps": 78105, "loss": 0.1357, "lr": 1.4391076455899699e-06, "epoch": 3.377824723129121, "percentage": 67.56, "elapsed_time": "2:18:53", "remaining_time": "1:06:42", "throughput": 19926.83, "total_tokens": 166062976} +{"current_steps": 52770, "total_steps": 78105, "loss": 0.2519, "lr": 1.438601815176587e-06, "epoch": 3.378144805070098, "percentage": 67.56, "elapsed_time": "2:18:54", "remaining_time": "1:06:41", "throughput": 19927.1, "total_tokens": 166078720} +{"current_steps": 52775, "total_steps": 78105, "loss": 0.1971, "lr": 1.4380960377638992e-06, "epoch": 3.378464887011075, "percentage": 67.57, "elapsed_time": "2:18:55", "remaining_time": "1:06:40", "throughput": 19927.38, "total_tokens": 166094720} +{"current_steps": 52780, "total_steps": 78105, "loss": 0.166, "lr": 1.4375903133771646e-06, "epoch": 3.3787849689520515, "percentage": 67.58, "elapsed_time": "2:18:55", "remaining_time": "1:06:39", "throughput": 19927.64, "total_tokens": 166110208} +{"current_steps": 52785, "total_steps": 78105, "loss": 0.223, "lr": 1.4370846420416359e-06, "epoch": 3.3791050508930285, "percentage": 67.58, "elapsed_time": "2:18:56", "remaining_time": "1:06:38", "throughput": 19927.87, "total_tokens": 166125184} +{"current_steps": 52790, "total_steps": 78105, "loss": 0.1478, "lr": 1.436579023782564e-06, "epoch": 3.3794251328340055, "percentage": 67.59, "elapsed_time": "2:18:57", "remaining_time": "1:06:37", "throughput": 19928.14, "total_tokens": 166140992} +{"current_steps": 52795, "total_steps": 78105, "loss": 0.1542, "lr": 1.4360734586251958e-06, "epoch": 3.3797452147749825, "percentage": 67.59, "elapsed_time": "2:18:57", "remaining_time": "1:06:37", "throughput": 19928.4, "total_tokens": 166156672} +{"current_steps": 52800, "total_steps": 78105, "loss": 0.1927, "lr": 1.4355679465947768e-06, "epoch": 3.3800652967159595, "percentage": 67.6, "elapsed_time": "2:18:58", "remaining_time": "1:06:36", "throughput": 19928.66, "total_tokens": 166172224} +{"current_steps": 52805, "total_steps": 78105, "loss": 0.1953, "lr": 1.435062487716552e-06, "epoch": 3.380385378656936, "percentage": 67.61, "elapsed_time": "2:18:59", "remaining_time": "1:06:35", "throughput": 19928.93, "total_tokens": 166187904} +{"current_steps": 52810, "total_steps": 78105, "loss": 0.1237, "lr": 1.4345570820157577e-06, "epoch": 3.380705460597913, "percentage": 67.61, "elapsed_time": "2:18:59", "remaining_time": "1:06:34", "throughput": 19929.21, "total_tokens": 166203840} +{"current_steps": 52815, "total_steps": 78105, "loss": 0.1942, "lr": 1.4340517295176338e-06, "epoch": 3.38102554253889, "percentage": 67.62, "elapsed_time": "2:19:00", "remaining_time": "1:06:33", "throughput": 19929.47, "total_tokens": 166219712} +{"current_steps": 52820, "total_steps": 78105, "loss": 0.1575, "lr": 1.4335464302474145e-06, "epoch": 3.381345624479867, "percentage": 67.63, "elapsed_time": "2:19:01", "remaining_time": "1:06:32", "throughput": 19929.72, "total_tokens": 166234880} +{"current_steps": 52825, "total_steps": 78105, "loss": 0.1432, "lr": 1.4330411842303311e-06, "epoch": 3.3816657064208435, "percentage": 67.63, "elapsed_time": "2:19:01", "remaining_time": "1:06:32", "throughput": 19930.03, "total_tokens": 166251648} +{"current_steps": 52830, "total_steps": 78105, "loss": 0.2069, "lr": 1.4325359914916137e-06, "epoch": 3.3819857883618205, "percentage": 67.64, "elapsed_time": "2:19:02", "remaining_time": "1:06:31", "throughput": 19930.24, "total_tokens": 166266240} +{"current_steps": 52835, "total_steps": 78105, "loss": 0.2206, "lr": 1.4320308520564885e-06, "epoch": 3.3823058703027975, "percentage": 67.65, "elapsed_time": "2:19:03", "remaining_time": "1:06:30", "throughput": 19930.46, "total_tokens": 166281280} +{"current_steps": 52840, "total_steps": 78105, "loss": 0.1591, "lr": 1.4315257659501797e-06, "epoch": 3.3826259522437745, "percentage": 67.65, "elapsed_time": "2:19:03", "remaining_time": "1:06:29", "throughput": 19930.72, "total_tokens": 166296960} +{"current_steps": 52845, "total_steps": 78105, "loss": 0.1698, "lr": 1.4310207331979087e-06, "epoch": 3.3829460341847515, "percentage": 67.66, "elapsed_time": "2:19:04", "remaining_time": "1:06:28", "throughput": 19931.01, "total_tokens": 166312832} +{"current_steps": 52850, "total_steps": 78105, "loss": 0.1811, "lr": 1.4305157538248932e-06, "epoch": 3.383266116125728, "percentage": 67.67, "elapsed_time": "2:19:05", "remaining_time": "1:06:27", "throughput": 19931.28, "total_tokens": 166328704} +{"current_steps": 52855, "total_steps": 78105, "loss": 0.1651, "lr": 1.4300108278563512e-06, "epoch": 3.383586198066705, "percentage": 67.67, "elapsed_time": "2:19:05", "remaining_time": "1:06:26", "throughput": 19931.57, "total_tokens": 166345280} +{"current_steps": 52860, "total_steps": 78105, "loss": 0.1416, "lr": 1.4295059553174954e-06, "epoch": 3.383906280007682, "percentage": 67.68, "elapsed_time": "2:19:06", "remaining_time": "1:06:26", "throughput": 19931.82, "total_tokens": 166360704} +{"current_steps": 52865, "total_steps": 78105, "loss": 0.1792, "lr": 1.4290011362335358e-06, "epoch": 3.384226361948659, "percentage": 67.68, "elapsed_time": "2:19:07", "remaining_time": "1:06:25", "throughput": 19932.12, "total_tokens": 166377408} +{"current_steps": 52870, "total_steps": 78105, "loss": 0.1122, "lr": 1.4284963706296808e-06, "epoch": 3.3845464438896355, "percentage": 67.69, "elapsed_time": "2:19:07", "remaining_time": "1:06:24", "throughput": 19932.37, "total_tokens": 166392768} +{"current_steps": 52875, "total_steps": 78105, "loss": 0.212, "lr": 1.427991658531135e-06, "epoch": 3.3848665258306125, "percentage": 67.7, "elapsed_time": "2:19:08", "remaining_time": "1:06:23", "throughput": 19932.66, "total_tokens": 166408960} +{"current_steps": 52880, "total_steps": 78105, "loss": 0.1707, "lr": 1.4274869999631036e-06, "epoch": 3.3851866077715895, "percentage": 67.7, "elapsed_time": "2:19:09", "remaining_time": "1:06:22", "throughput": 19932.92, "total_tokens": 166424320} +{"current_steps": 52885, "total_steps": 78105, "loss": 0.1575, "lr": 1.4269823949507832e-06, "epoch": 3.3855066897125665, "percentage": 67.71, "elapsed_time": "2:19:09", "remaining_time": "1:06:21", "throughput": 19933.17, "total_tokens": 166439616} +{"current_steps": 52890, "total_steps": 78105, "loss": 0.116, "lr": 1.426477843519375e-06, "epoch": 3.3858267716535435, "percentage": 67.72, "elapsed_time": "2:19:10", "remaining_time": "1:06:21", "throughput": 19933.45, "total_tokens": 166455552} +{"current_steps": 52895, "total_steps": 78105, "loss": 0.2972, "lr": 1.4259733456940691e-06, "epoch": 3.38614685359452, "percentage": 67.72, "elapsed_time": "2:19:11", "remaining_time": "1:06:20", "throughput": 19933.7, "total_tokens": 166471232} +{"current_steps": 52900, "total_steps": 78105, "loss": 0.1586, "lr": 1.425468901500061e-06, "epoch": 3.386466935535497, "percentage": 67.73, "elapsed_time": "2:19:11", "remaining_time": "1:06:19", "throughput": 19933.93, "total_tokens": 166486144} +{"current_steps": 52905, "total_steps": 78105, "loss": 0.1247, "lr": 1.4249645109625394e-06, "epoch": 3.386787017476474, "percentage": 67.74, "elapsed_time": "2:19:12", "remaining_time": "1:06:18", "throughput": 19934.14, "total_tokens": 166500608} +{"current_steps": 52910, "total_steps": 78105, "loss": 0.1257, "lr": 1.42446017410669e-06, "epoch": 3.387107099417451, "percentage": 67.74, "elapsed_time": "2:19:13", "remaining_time": "1:06:17", "throughput": 19934.47, "total_tokens": 166517504} +{"current_steps": 52915, "total_steps": 78105, "loss": 0.1979, "lr": 1.4239558909576973e-06, "epoch": 3.3874271813584276, "percentage": 67.75, "elapsed_time": "2:19:13", "remaining_time": "1:06:16", "throughput": 19934.71, "total_tokens": 166532608} +{"current_steps": 52920, "total_steps": 78105, "loss": 0.1649, "lr": 1.4234516615407413e-06, "epoch": 3.3877472632994046, "percentage": 67.75, "elapsed_time": "2:19:14", "remaining_time": "1:06:16", "throughput": 19935.1, "total_tokens": 166550784} +{"current_steps": 52925, "total_steps": 78105, "loss": 0.0975, "lr": 1.422947485881003e-06, "epoch": 3.3880673452403816, "percentage": 67.76, "elapsed_time": "2:19:15", "remaining_time": "1:06:15", "throughput": 19935.36, "total_tokens": 166566464} +{"current_steps": 52930, "total_steps": 78105, "loss": 0.1885, "lr": 1.4224433640036572e-06, "epoch": 3.3883874271813585, "percentage": 67.77, "elapsed_time": "2:19:15", "remaining_time": "1:06:14", "throughput": 19935.64, "total_tokens": 166581952} +{"current_steps": 52935, "total_steps": 78105, "loss": 0.1934, "lr": 1.4219392959338767e-06, "epoch": 3.3887075091223355, "percentage": 67.77, "elapsed_time": "2:19:16", "remaining_time": "1:06:13", "throughput": 19935.91, "total_tokens": 166598080} +{"current_steps": 52940, "total_steps": 78105, "loss": 0.2168, "lr": 1.4214352816968327e-06, "epoch": 3.389027591063312, "percentage": 67.78, "elapsed_time": "2:19:17", "remaining_time": "1:06:12", "throughput": 19936.14, "total_tokens": 166613120} +{"current_steps": 52945, "total_steps": 78105, "loss": 0.1545, "lr": 1.4209313213176922e-06, "epoch": 3.389347673004289, "percentage": 67.79, "elapsed_time": "2:19:17", "remaining_time": "1:06:11", "throughput": 19936.36, "total_tokens": 166628032} +{"current_steps": 52950, "total_steps": 78105, "loss": 0.1895, "lr": 1.4204274148216217e-06, "epoch": 3.389667754945266, "percentage": 67.79, "elapsed_time": "2:19:18", "remaining_time": "1:06:10", "throughput": 19936.61, "total_tokens": 166643328} +{"current_steps": 52955, "total_steps": 78105, "loss": 0.2368, "lr": 1.419923562233782e-06, "epoch": 3.389987836886243, "percentage": 67.8, "elapsed_time": "2:19:19", "remaining_time": "1:06:10", "throughput": 19936.89, "total_tokens": 166659328} +{"current_steps": 52960, "total_steps": 78105, "loss": 0.1384, "lr": 1.419419763579333e-06, "epoch": 3.3903079188272196, "percentage": 67.81, "elapsed_time": "2:19:19", "remaining_time": "1:06:09", "throughput": 19937.13, "total_tokens": 166674176} +{"current_steps": 52965, "total_steps": 78105, "loss": 0.1512, "lr": 1.4189160188834344e-06, "epoch": 3.3906280007681966, "percentage": 67.81, "elapsed_time": "2:19:20", "remaining_time": "1:06:08", "throughput": 19937.46, "total_tokens": 166690816} +{"current_steps": 52970, "total_steps": 78105, "loss": 0.2621, "lr": 1.418412328171237e-06, "epoch": 3.3909480827091736, "percentage": 67.82, "elapsed_time": "2:19:21", "remaining_time": "1:06:07", "throughput": 19937.75, "total_tokens": 166707264} +{"current_steps": 52975, "total_steps": 78105, "loss": 0.1546, "lr": 1.4179086914678947e-06, "epoch": 3.3912681646501506, "percentage": 67.83, "elapsed_time": "2:19:22", "remaining_time": "1:06:06", "throughput": 19937.98, "total_tokens": 166722432} +{"current_steps": 52980, "total_steps": 78105, "loss": 0.1353, "lr": 1.4174051087985563e-06, "epoch": 3.3915882465911276, "percentage": 67.83, "elapsed_time": "2:19:22", "remaining_time": "1:06:05", "throughput": 19938.23, "total_tokens": 166737536} +{"current_steps": 52985, "total_steps": 78105, "loss": 0.1705, "lr": 1.4169015801883674e-06, "epoch": 3.391908328532104, "percentage": 67.84, "elapsed_time": "2:19:23", "remaining_time": "1:06:05", "throughput": 19938.46, "total_tokens": 166752768} +{"current_steps": 52990, "total_steps": 78105, "loss": 0.1519, "lr": 1.416398105662472e-06, "epoch": 3.392228410473081, "percentage": 67.84, "elapsed_time": "2:19:24", "remaining_time": "1:06:04", "throughput": 19938.67, "total_tokens": 166767168} +{"current_steps": 52995, "total_steps": 78105, "loss": 0.1357, "lr": 1.41589468524601e-06, "epoch": 3.392548492414058, "percentage": 67.85, "elapsed_time": "2:19:24", "remaining_time": "1:06:03", "throughput": 19938.94, "total_tokens": 166782848} +{"current_steps": 53000, "total_steps": 78105, "loss": 0.2156, "lr": 1.4153913189641226e-06, "epoch": 3.392868574355035, "percentage": 67.86, "elapsed_time": "2:19:25", "remaining_time": "1:06:02", "throughput": 19939.26, "total_tokens": 166799552} +{"current_steps": 53005, "total_steps": 78105, "loss": 0.141, "lr": 1.4148880068419413e-06, "epoch": 3.3931886562960116, "percentage": 67.86, "elapsed_time": "2:19:26", "remaining_time": "1:06:01", "throughput": 19939.58, "total_tokens": 166815936} +{"current_steps": 53010, "total_steps": 78105, "loss": 0.156, "lr": 1.4143847489046014e-06, "epoch": 3.3935087382369886, "percentage": 67.87, "elapsed_time": "2:19:26", "remaining_time": "1:06:00", "throughput": 19939.88, "total_tokens": 166832320} +{"current_steps": 53015, "total_steps": 78105, "loss": 0.1561, "lr": 1.4138815451772325e-06, "epoch": 3.3938288201779656, "percentage": 67.88, "elapsed_time": "2:19:27", "remaining_time": "1:05:59", "throughput": 19940.13, "total_tokens": 166847680} +{"current_steps": 53020, "total_steps": 78105, "loss": 0.1001, "lr": 1.4133783956849623e-06, "epoch": 3.3941489021189426, "percentage": 67.88, "elapsed_time": "2:19:28", "remaining_time": "1:05:59", "throughput": 19940.35, "total_tokens": 166862400} +{"current_steps": 53025, "total_steps": 78105, "loss": 0.1729, "lr": 1.4128753004529145e-06, "epoch": 3.394468984059919, "percentage": 67.89, "elapsed_time": "2:19:28", "remaining_time": "1:05:58", "throughput": 19940.61, "total_tokens": 166878144} +{"current_steps": 53030, "total_steps": 78105, "loss": 0.1541, "lr": 1.4123722595062107e-06, "epoch": 3.394789066000896, "percentage": 67.9, "elapsed_time": "2:19:29", "remaining_time": "1:05:57", "throughput": 19940.83, "total_tokens": 166893120} +{"current_steps": 53035, "total_steps": 78105, "loss": 0.1637, "lr": 1.4118692728699733e-06, "epoch": 3.395109147941873, "percentage": 67.9, "elapsed_time": "2:19:30", "remaining_time": "1:05:56", "throughput": 19941.19, "total_tokens": 166910976} +{"current_steps": 53040, "total_steps": 78105, "loss": 0.1321, "lr": 1.411366340569314e-06, "epoch": 3.39542922988285, "percentage": 67.91, "elapsed_time": "2:19:30", "remaining_time": "1:05:55", "throughput": 19941.56, "total_tokens": 166928832} +{"current_steps": 53045, "total_steps": 78105, "loss": 0.1172, "lr": 1.4108634626293505e-06, "epoch": 3.3957493118238267, "percentage": 67.91, "elapsed_time": "2:19:31", "remaining_time": "1:05:54", "throughput": 19941.82, "total_tokens": 166944576} +{"current_steps": 53050, "total_steps": 78105, "loss": 0.1946, "lr": 1.4103606390751925e-06, "epoch": 3.3960693937648037, "percentage": 67.92, "elapsed_time": "2:19:32", "remaining_time": "1:05:54", "throughput": 19942.06, "total_tokens": 166959936} +{"current_steps": 53055, "total_steps": 78105, "loss": 0.142, "lr": 1.4098578699319486e-06, "epoch": 3.3963894757057806, "percentage": 67.93, "elapsed_time": "2:19:32", "remaining_time": "1:05:53", "throughput": 19942.32, "total_tokens": 166975424} +{"current_steps": 53060, "total_steps": 78105, "loss": 0.1515, "lr": 1.4093551552247243e-06, "epoch": 3.3967095576467576, "percentage": 67.93, "elapsed_time": "2:19:33", "remaining_time": "1:05:52", "throughput": 19942.52, "total_tokens": 166989952} +{"current_steps": 53065, "total_steps": 78105, "loss": 0.1446, "lr": 1.4088524949786225e-06, "epoch": 3.3970296395877346, "percentage": 67.94, "elapsed_time": "2:19:34", "remaining_time": "1:05:51", "throughput": 19942.74, "total_tokens": 167004864} +{"current_steps": 53070, "total_steps": 78105, "loss": 0.1743, "lr": 1.4083498892187424e-06, "epoch": 3.397349721528711, "percentage": 67.95, "elapsed_time": "2:19:34", "remaining_time": "1:05:50", "throughput": 19943.0, "total_tokens": 167020352} +{"current_steps": 53075, "total_steps": 78105, "loss": 0.1583, "lr": 1.407847337970185e-06, "epoch": 3.397669803469688, "percentage": 67.95, "elapsed_time": "2:19:35", "remaining_time": "1:05:49", "throughput": 19943.28, "total_tokens": 167036032} +{"current_steps": 53080, "total_steps": 78105, "loss": 0.2474, "lr": 1.4073448412580403e-06, "epoch": 3.397989885410665, "percentage": 67.96, "elapsed_time": "2:19:36", "remaining_time": "1:05:49", "throughput": 19943.55, "total_tokens": 167051520} +{"current_steps": 53085, "total_steps": 78105, "loss": 0.1208, "lr": 1.406842399107405e-06, "epoch": 3.398309967351642, "percentage": 67.97, "elapsed_time": "2:19:36", "remaining_time": "1:05:48", "throughput": 19943.83, "total_tokens": 167067392} +{"current_steps": 53090, "total_steps": 78105, "loss": 0.1728, "lr": 1.4063400115433642e-06, "epoch": 3.3986300492926187, "percentage": 67.97, "elapsed_time": "2:19:37", "remaining_time": "1:05:47", "throughput": 19944.09, "total_tokens": 167082816} +{"current_steps": 53095, "total_steps": 78105, "loss": 0.1649, "lr": 1.4058376785910078e-06, "epoch": 3.3989501312335957, "percentage": 67.98, "elapsed_time": "2:19:38", "remaining_time": "1:05:46", "throughput": 19944.32, "total_tokens": 167097920} +{"current_steps": 53100, "total_steps": 78105, "loss": 0.1787, "lr": 1.4053354002754183e-06, "epoch": 3.3992702131745727, "percentage": 67.99, "elapsed_time": "2:19:38", "remaining_time": "1:05:45", "throughput": 19944.57, "total_tokens": 167113152} +{"current_steps": 53105, "total_steps": 78105, "loss": 0.1523, "lr": 1.4048331766216757e-06, "epoch": 3.3995902951155497, "percentage": 67.99, "elapsed_time": "2:19:39", "remaining_time": "1:05:44", "throughput": 19944.86, "total_tokens": 167129472} +{"current_steps": 53110, "total_steps": 78105, "loss": 0.2269, "lr": 1.4043310076548621e-06, "epoch": 3.3999103770565267, "percentage": 68.0, "elapsed_time": "2:19:40", "remaining_time": "1:05:43", "throughput": 19945.08, "total_tokens": 167144192} +{"current_steps": 53115, "total_steps": 78105, "loss": 0.1375, "lr": 1.403828893400049e-06, "epoch": 3.400230458997503, "percentage": 68.0, "elapsed_time": "2:19:40", "remaining_time": "1:05:43", "throughput": 19945.31, "total_tokens": 167158976} +{"current_steps": 53120, "total_steps": 78105, "loss": 0.1697, "lr": 1.4033268338823125e-06, "epoch": 3.40055054093848, "percentage": 68.01, "elapsed_time": "2:19:41", "remaining_time": "1:05:42", "throughput": 19945.58, "total_tokens": 167174656} +{"current_steps": 53125, "total_steps": 78105, "loss": 0.1128, "lr": 1.4028248291267203e-06, "epoch": 3.400870622879457, "percentage": 68.02, "elapsed_time": "2:19:42", "remaining_time": "1:05:41", "throughput": 19945.82, "total_tokens": 167189632} +{"current_steps": 53130, "total_steps": 78105, "loss": 0.1365, "lr": 1.4023228791583416e-06, "epoch": 3.401190704820434, "percentage": 68.02, "elapsed_time": "2:19:42", "remaining_time": "1:05:40", "throughput": 19946.08, "total_tokens": 167205440} +{"current_steps": 53135, "total_steps": 78105, "loss": 0.1522, "lr": 1.4018209840022412e-06, "epoch": 3.4015107867614107, "percentage": 68.03, "elapsed_time": "2:19:43", "remaining_time": "1:05:39", "throughput": 19946.35, "total_tokens": 167221120} +{"current_steps": 53140, "total_steps": 78105, "loss": 0.1633, "lr": 1.4013191436834805e-06, "epoch": 3.4018308687023877, "percentage": 68.04, "elapsed_time": "2:19:44", "remaining_time": "1:05:38", "throughput": 19946.58, "total_tokens": 167236160} +{"current_steps": 53145, "total_steps": 78105, "loss": 0.2166, "lr": 1.4008173582271186e-06, "epoch": 3.4021509506433647, "percentage": 68.04, "elapsed_time": "2:19:44", "remaining_time": "1:05:38", "throughput": 19946.85, "total_tokens": 167252224} +{"current_steps": 53150, "total_steps": 78105, "loss": 0.1682, "lr": 1.400315627658212e-06, "epoch": 3.4024710325843417, "percentage": 68.05, "elapsed_time": "2:19:45", "remaining_time": "1:05:37", "throughput": 19947.12, "total_tokens": 167268352} +{"current_steps": 53155, "total_steps": 78105, "loss": 0.1205, "lr": 1.3998139520018155e-06, "epoch": 3.4027911145253187, "percentage": 68.06, "elapsed_time": "2:19:46", "remaining_time": "1:05:36", "throughput": 19947.33, "total_tokens": 167283072} +{"current_steps": 53160, "total_steps": 78105, "loss": 0.1569, "lr": 1.3993123312829796e-06, "epoch": 3.4031111964662952, "percentage": 68.06, "elapsed_time": "2:19:46", "remaining_time": "1:05:35", "throughput": 19947.69, "total_tokens": 167300736} +{"current_steps": 53165, "total_steps": 78105, "loss": 0.1232, "lr": 1.3988107655267527e-06, "epoch": 3.403431278407272, "percentage": 68.07, "elapsed_time": "2:19:47", "remaining_time": "1:05:34", "throughput": 19947.98, "total_tokens": 167316800} +{"current_steps": 53170, "total_steps": 78105, "loss": 0.2691, "lr": 1.39830925475818e-06, "epoch": 3.403751360348249, "percentage": 68.08, "elapsed_time": "2:19:48", "remaining_time": "1:05:33", "throughput": 19948.2, "total_tokens": 167331520} +{"current_steps": 53175, "total_steps": 78105, "loss": 0.1229, "lr": 1.397807799002305e-06, "epoch": 3.404071442289226, "percentage": 68.08, "elapsed_time": "2:19:48", "remaining_time": "1:05:32", "throughput": 19948.46, "total_tokens": 167347072} +{"current_steps": 53180, "total_steps": 78105, "loss": 0.155, "lr": 1.3973063982841673e-06, "epoch": 3.4043915242302027, "percentage": 68.09, "elapsed_time": "2:19:49", "remaining_time": "1:05:32", "throughput": 19948.73, "total_tokens": 167362880} +{"current_steps": 53185, "total_steps": 78105, "loss": 0.135, "lr": 1.3968050526288042e-06, "epoch": 3.4047116061711797, "percentage": 68.09, "elapsed_time": "2:19:50", "remaining_time": "1:05:31", "throughput": 19948.98, "total_tokens": 167378304} +{"current_steps": 53190, "total_steps": 78105, "loss": 0.1764, "lr": 1.3963037620612496e-06, "epoch": 3.4050316881121567, "percentage": 68.1, "elapsed_time": "2:19:50", "remaining_time": "1:05:30", "throughput": 19949.2, "total_tokens": 167393408} +{"current_steps": 53195, "total_steps": 78105, "loss": 0.1792, "lr": 1.3958025266065384e-06, "epoch": 3.4053517700531337, "percentage": 68.11, "elapsed_time": "2:19:51", "remaining_time": "1:05:29", "throughput": 19949.45, "total_tokens": 167408960} +{"current_steps": 53200, "total_steps": 78105, "loss": 0.1613, "lr": 1.3953013462896952e-06, "epoch": 3.4056718519941107, "percentage": 68.11, "elapsed_time": "2:19:52", "remaining_time": "1:05:28", "throughput": 19949.74, "total_tokens": 167424896} +{"current_steps": 53205, "total_steps": 78105, "loss": 0.1138, "lr": 1.3948002211357496e-06, "epoch": 3.4059919339350873, "percentage": 68.12, "elapsed_time": "2:19:53", "remaining_time": "1:05:27", "throughput": 19950.03, "total_tokens": 167441088} +{"current_steps": 53210, "total_steps": 78105, "loss": 0.1472, "lr": 1.394299151169724e-06, "epoch": 3.4063120158760642, "percentage": 68.13, "elapsed_time": "2:19:53", "remaining_time": "1:05:27", "throughput": 19950.3, "total_tokens": 167457024} +{"current_steps": 53215, "total_steps": 78105, "loss": 0.121, "lr": 1.3937981364166397e-06, "epoch": 3.4066320978170412, "percentage": 68.13, "elapsed_time": "2:19:54", "remaining_time": "1:05:26", "throughput": 19950.52, "total_tokens": 167471936} +{"current_steps": 53220, "total_steps": 78105, "loss": 0.1657, "lr": 1.3932971769015143e-06, "epoch": 3.4069521797580182, "percentage": 68.14, "elapsed_time": "2:19:55", "remaining_time": "1:05:25", "throughput": 19950.77, "total_tokens": 167487104} +{"current_steps": 53225, "total_steps": 78105, "loss": 0.1632, "lr": 1.3927962726493624e-06, "epoch": 3.4072722616989948, "percentage": 68.15, "elapsed_time": "2:19:55", "remaining_time": "1:05:24", "throughput": 19951.03, "total_tokens": 167502848} +{"current_steps": 53230, "total_steps": 78105, "loss": 0.2176, "lr": 1.3922954236851993e-06, "epoch": 3.4075923436399718, "percentage": 68.15, "elapsed_time": "2:19:56", "remaining_time": "1:05:23", "throughput": 19951.36, "total_tokens": 167520192} +{"current_steps": 53235, "total_steps": 78105, "loss": 0.1436, "lr": 1.3917946300340312e-06, "epoch": 3.4079124255809488, "percentage": 68.16, "elapsed_time": "2:19:57", "remaining_time": "1:05:22", "throughput": 19951.62, "total_tokens": 167535680} +{"current_steps": 53240, "total_steps": 78105, "loss": 0.248, "lr": 1.3912938917208677e-06, "epoch": 3.4082325075219257, "percentage": 68.16, "elapsed_time": "2:19:57", "remaining_time": "1:05:22", "throughput": 19951.95, "total_tokens": 167552768} +{"current_steps": 53245, "total_steps": 78105, "loss": 0.1496, "lr": 1.3907932087707121e-06, "epoch": 3.4085525894629027, "percentage": 68.17, "elapsed_time": "2:19:58", "remaining_time": "1:05:21", "throughput": 19952.19, "total_tokens": 167568320} +{"current_steps": 53250, "total_steps": 78105, "loss": 0.172, "lr": 1.3902925812085661e-06, "epoch": 3.4088726714038793, "percentage": 68.18, "elapsed_time": "2:19:59", "remaining_time": "1:05:20", "throughput": 19952.44, "total_tokens": 167583616} +{"current_steps": 53255, "total_steps": 78105, "loss": 0.1069, "lr": 1.389792009059428e-06, "epoch": 3.4091927533448563, "percentage": 68.18, "elapsed_time": "2:19:59", "remaining_time": "1:05:19", "throughput": 19952.73, "total_tokens": 167599616} +{"current_steps": 53260, "total_steps": 78105, "loss": 0.2235, "lr": 1.3892914923482947e-06, "epoch": 3.4095128352858333, "percentage": 68.19, "elapsed_time": "2:20:00", "remaining_time": "1:05:18", "throughput": 19953.03, "total_tokens": 167615616} +{"current_steps": 53265, "total_steps": 78105, "loss": 0.1071, "lr": 1.3887910311001584e-06, "epoch": 3.4098329172268103, "percentage": 68.2, "elapsed_time": "2:20:01", "remaining_time": "1:05:17", "throughput": 19953.26, "total_tokens": 167630464} +{"current_steps": 53270, "total_steps": 78105, "loss": 0.2736, "lr": 1.388290625340009e-06, "epoch": 3.410152999167787, "percentage": 68.2, "elapsed_time": "2:20:01", "remaining_time": "1:05:17", "throughput": 19953.59, "total_tokens": 167647424} +{"current_steps": 53275, "total_steps": 78105, "loss": 0.1264, "lr": 1.3877902750928364e-06, "epoch": 3.410473081108764, "percentage": 68.21, "elapsed_time": "2:20:02", "remaining_time": "1:05:16", "throughput": 19953.87, "total_tokens": 167663424} +{"current_steps": 53280, "total_steps": 78105, "loss": 0.2118, "lr": 1.3872899803836237e-06, "epoch": 3.410793163049741, "percentage": 68.22, "elapsed_time": "2:20:03", "remaining_time": "1:05:15", "throughput": 19954.09, "total_tokens": 167678208} +{"current_steps": 53285, "total_steps": 78105, "loss": 0.1357, "lr": 1.3867897412373538e-06, "epoch": 3.4111132449907178, "percentage": 68.22, "elapsed_time": "2:20:03", "remaining_time": "1:05:14", "throughput": 19954.33, "total_tokens": 167693696} +{"current_steps": 53290, "total_steps": 78105, "loss": 0.1305, "lr": 1.3862895576790052e-06, "epoch": 3.4114333269316943, "percentage": 68.23, "elapsed_time": "2:20:04", "remaining_time": "1:05:13", "throughput": 19954.56, "total_tokens": 167708608} +{"current_steps": 53295, "total_steps": 78105, "loss": 0.2431, "lr": 1.3857894297335555e-06, "epoch": 3.4117534088726713, "percentage": 68.24, "elapsed_time": "2:20:05", "remaining_time": "1:05:12", "throughput": 19954.86, "total_tokens": 167724992} +{"current_steps": 53300, "total_steps": 78105, "loss": 0.1511, "lr": 1.385289357425977e-06, "epoch": 3.4120734908136483, "percentage": 68.24, "elapsed_time": "2:20:05", "remaining_time": "1:05:11", "throughput": 19955.11, "total_tokens": 167740288} +{"current_steps": 53305, "total_steps": 78105, "loss": 0.1275, "lr": 1.3847893407812434e-06, "epoch": 3.4123935727546253, "percentage": 68.25, "elapsed_time": "2:20:06", "remaining_time": "1:05:11", "throughput": 19955.34, "total_tokens": 167755328} +{"current_steps": 53310, "total_steps": 78105, "loss": 0.1331, "lr": 1.3842893798243195e-06, "epoch": 3.412713654695602, "percentage": 68.25, "elapsed_time": "2:20:07", "remaining_time": "1:05:10", "throughput": 19955.58, "total_tokens": 167770752} +{"current_steps": 53315, "total_steps": 78105, "loss": 0.2498, "lr": 1.3837894745801745e-06, "epoch": 3.413033736636579, "percentage": 68.26, "elapsed_time": "2:20:07", "remaining_time": "1:05:09", "throughput": 19955.83, "total_tokens": 167785984} +{"current_steps": 53320, "total_steps": 78105, "loss": 0.1911, "lr": 1.3832896250737665e-06, "epoch": 3.413353818577556, "percentage": 68.27, "elapsed_time": "2:20:08", "remaining_time": "1:05:08", "throughput": 19956.08, "total_tokens": 167801792} +{"current_steps": 53325, "total_steps": 78105, "loss": 0.1828, "lr": 1.382789831330059e-06, "epoch": 3.413673900518533, "percentage": 68.27, "elapsed_time": "2:20:09", "remaining_time": "1:05:07", "throughput": 19956.31, "total_tokens": 167816896} +{"current_steps": 53330, "total_steps": 78105, "loss": 0.1473, "lr": 1.3822900933740085e-06, "epoch": 3.41399398245951, "percentage": 68.28, "elapsed_time": "2:20:09", "remaining_time": "1:05:06", "throughput": 19956.56, "total_tokens": 167832320} +{"current_steps": 53335, "total_steps": 78105, "loss": 0.2207, "lr": 1.3817904112305676e-06, "epoch": 3.4143140644004863, "percentage": 68.29, "elapsed_time": "2:20:10", "remaining_time": "1:05:06", "throughput": 19956.85, "total_tokens": 167848512} +{"current_steps": 53340, "total_steps": 78105, "loss": 0.2879, "lr": 1.3812907849246907e-06, "epoch": 3.4146341463414633, "percentage": 68.29, "elapsed_time": "2:20:11", "remaining_time": "1:05:05", "throughput": 19957.09, "total_tokens": 167863680} +{"current_steps": 53345, "total_steps": 78105, "loss": 0.1084, "lr": 1.380791214481323e-06, "epoch": 3.4149542282824403, "percentage": 68.3, "elapsed_time": "2:20:11", "remaining_time": "1:05:04", "throughput": 19957.36, "total_tokens": 167879488} +{"current_steps": 53350, "total_steps": 78105, "loss": 0.2407, "lr": 1.3802916999254146e-06, "epoch": 3.4152743102234173, "percentage": 68.31, "elapsed_time": "2:20:12", "remaining_time": "1:05:03", "throughput": 19957.64, "total_tokens": 167895296} +{"current_steps": 53355, "total_steps": 78105, "loss": 0.2134, "lr": 1.379792241281904e-06, "epoch": 3.415594392164394, "percentage": 68.31, "elapsed_time": "2:20:13", "remaining_time": "1:05:02", "throughput": 19957.89, "total_tokens": 167910528} +{"current_steps": 53360, "total_steps": 78105, "loss": 0.1455, "lr": 1.3792928385757348e-06, "epoch": 3.415914474105371, "percentage": 68.32, "elapsed_time": "2:20:13", "remaining_time": "1:05:01", "throughput": 19958.16, "total_tokens": 167926400} +{"current_steps": 53365, "total_steps": 78105, "loss": 0.1733, "lr": 1.378793491831844e-06, "epoch": 3.416234556046348, "percentage": 68.32, "elapsed_time": "2:20:14", "remaining_time": "1:05:01", "throughput": 19958.47, "total_tokens": 167942976} +{"current_steps": 53370, "total_steps": 78105, "loss": 0.1124, "lr": 1.378294201075166e-06, "epoch": 3.416554637987325, "percentage": 68.33, "elapsed_time": "2:20:15", "remaining_time": "1:05:00", "throughput": 19958.74, "total_tokens": 167958656} +{"current_steps": 53375, "total_steps": 78105, "loss": 0.1323, "lr": 1.3777949663306329e-06, "epoch": 3.416874719928302, "percentage": 68.34, "elapsed_time": "2:20:16", "remaining_time": "1:04:59", "throughput": 19959.06, "total_tokens": 167975552} +{"current_steps": 53380, "total_steps": 78105, "loss": 0.1302, "lr": 1.377295787623173e-06, "epoch": 3.4171948018692784, "percentage": 68.34, "elapsed_time": "2:20:16", "remaining_time": "1:04:58", "throughput": 19959.3, "total_tokens": 167990720} +{"current_steps": 53385, "total_steps": 78105, "loss": 0.1207, "lr": 1.3767966649777143e-06, "epoch": 3.4175148838102554, "percentage": 68.35, "elapsed_time": "2:20:17", "remaining_time": "1:04:57", "throughput": 19959.53, "total_tokens": 168005632} +{"current_steps": 53390, "total_steps": 78105, "loss": 0.0946, "lr": 1.3762975984191795e-06, "epoch": 3.4178349657512324, "percentage": 68.36, "elapsed_time": "2:20:17", "remaining_time": "1:04:56", "throughput": 19959.79, "total_tokens": 168020992} +{"current_steps": 53395, "total_steps": 78105, "loss": 0.1956, "lr": 1.3757985879724894e-06, "epoch": 3.4181550476922093, "percentage": 68.36, "elapsed_time": "2:20:18", "remaining_time": "1:04:55", "throughput": 19960.09, "total_tokens": 168037440} +{"current_steps": 53400, "total_steps": 78105, "loss": 0.1871, "lr": 1.3752996336625623e-06, "epoch": 3.418475129633186, "percentage": 68.37, "elapsed_time": "2:20:19", "remaining_time": "1:04:55", "throughput": 19960.43, "total_tokens": 168054528} +{"current_steps": 53405, "total_steps": 78105, "loss": 0.1581, "lr": 1.3748007355143129e-06, "epoch": 3.418795211574163, "percentage": 68.38, "elapsed_time": "2:20:20", "remaining_time": "1:04:54", "throughput": 19960.78, "total_tokens": 168071744} +{"current_steps": 53410, "total_steps": 78105, "loss": 0.1422, "lr": 1.3743018935526542e-06, "epoch": 3.41911529351514, "percentage": 68.38, "elapsed_time": "2:20:20", "remaining_time": "1:04:53", "throughput": 19961.06, "total_tokens": 168087744} +{"current_steps": 53415, "total_steps": 78105, "loss": 0.1392, "lr": 1.373803107802495e-06, "epoch": 3.419435375456117, "percentage": 68.39, "elapsed_time": "2:20:21", "remaining_time": "1:04:52", "throughput": 19961.37, "total_tokens": 168104256} +{"current_steps": 53420, "total_steps": 78105, "loss": 0.1463, "lr": 1.3733043782887417e-06, "epoch": 3.419755457397094, "percentage": 68.4, "elapsed_time": "2:20:22", "remaining_time": "1:04:51", "throughput": 19961.72, "total_tokens": 168121472} +{"current_steps": 53425, "total_steps": 78105, "loss": 0.1322, "lr": 1.372805705036301e-06, "epoch": 3.4200755393380704, "percentage": 68.4, "elapsed_time": "2:20:22", "remaining_time": "1:04:50", "throughput": 19961.98, "total_tokens": 168137344} +{"current_steps": 53430, "total_steps": 78105, "loss": 0.2022, "lr": 1.37230708807007e-06, "epoch": 3.4203956212790474, "percentage": 68.41, "elapsed_time": "2:20:23", "remaining_time": "1:04:50", "throughput": 19962.23, "total_tokens": 168152768} +{"current_steps": 53435, "total_steps": 78105, "loss": 0.1392, "lr": 1.37180852741495e-06, "epoch": 3.4207157032200244, "percentage": 68.41, "elapsed_time": "2:20:24", "remaining_time": "1:04:49", "throughput": 19962.55, "total_tokens": 168169984} +{"current_steps": 53440, "total_steps": 78105, "loss": 0.1241, "lr": 1.3713100230958356e-06, "epoch": 3.4210357851610014, "percentage": 68.42, "elapsed_time": "2:20:24", "remaining_time": "1:04:48", "throughput": 19962.88, "total_tokens": 168186560} +{"current_steps": 53445, "total_steps": 78105, "loss": 0.1573, "lr": 1.3708115751376194e-06, "epoch": 3.421355867101978, "percentage": 68.43, "elapsed_time": "2:20:25", "remaining_time": "1:04:47", "throughput": 19963.12, "total_tokens": 168201728} +{"current_steps": 53450, "total_steps": 78105, "loss": 0.136, "lr": 1.3703131835651917e-06, "epoch": 3.421675949042955, "percentage": 68.43, "elapsed_time": "2:20:26", "remaining_time": "1:04:46", "throughput": 19963.39, "total_tokens": 168217408} +{"current_steps": 53455, "total_steps": 78105, "loss": 0.1847, "lr": 1.3698148484034385e-06, "epoch": 3.421996030983932, "percentage": 68.44, "elapsed_time": "2:20:26", "remaining_time": "1:04:45", "throughput": 19963.67, "total_tokens": 168233408} +{"current_steps": 53460, "total_steps": 78105, "loss": 0.1363, "lr": 1.3693165696772465e-06, "epoch": 3.422316112924909, "percentage": 68.45, "elapsed_time": "2:20:27", "remaining_time": "1:04:45", "throughput": 19963.93, "total_tokens": 168249472} +{"current_steps": 53465, "total_steps": 78105, "loss": 0.1404, "lr": 1.3688183474114936e-06, "epoch": 3.422636194865886, "percentage": 68.45, "elapsed_time": "2:20:28", "remaining_time": "1:04:44", "throughput": 19964.13, "total_tokens": 168263808} +{"current_steps": 53470, "total_steps": 78105, "loss": 0.2012, "lr": 1.3683201816310609e-06, "epoch": 3.4229562768068624, "percentage": 68.46, "elapsed_time": "2:20:28", "remaining_time": "1:04:43", "throughput": 19964.36, "total_tokens": 168279104} +{"current_steps": 53475, "total_steps": 78105, "loss": 0.147, "lr": 1.367822072360824e-06, "epoch": 3.4232763587478394, "percentage": 68.47, "elapsed_time": "2:20:29", "remaining_time": "1:04:42", "throughput": 19964.61, "total_tokens": 168294720} +{"current_steps": 53480, "total_steps": 78105, "loss": 0.265, "lr": 1.3673240196256554e-06, "epoch": 3.4235964406888164, "percentage": 68.47, "elapsed_time": "2:20:30", "remaining_time": "1:04:41", "throughput": 19964.9, "total_tokens": 168310848} +{"current_steps": 53485, "total_steps": 78105, "loss": 0.1805, "lr": 1.3668260234504255e-06, "epoch": 3.4239165226297934, "percentage": 68.48, "elapsed_time": "2:20:31", "remaining_time": "1:04:40", "throughput": 19965.14, "total_tokens": 168326208} +{"current_steps": 53490, "total_steps": 78105, "loss": 0.1349, "lr": 1.3663280838600015e-06, "epoch": 3.42423660457077, "percentage": 68.48, "elapsed_time": "2:20:31", "remaining_time": "1:04:40", "throughput": 19965.38, "total_tokens": 168341568} +{"current_steps": 53495, "total_steps": 78105, "loss": 0.1316, "lr": 1.365830200879248e-06, "epoch": 3.424556686511747, "percentage": 68.49, "elapsed_time": "2:20:32", "remaining_time": "1:04:39", "throughput": 19965.68, "total_tokens": 168358016} +{"current_steps": 53500, "total_steps": 78105, "loss": 0.1207, "lr": 1.3653323745330255e-06, "epoch": 3.424876768452724, "percentage": 68.5, "elapsed_time": "2:20:33", "remaining_time": "1:04:38", "throughput": 19965.89, "total_tokens": 168372672} +{"current_steps": 53505, "total_steps": 78105, "loss": 0.1532, "lr": 1.3648346048461946e-06, "epoch": 3.425196850393701, "percentage": 68.5, "elapsed_time": "2:20:33", "remaining_time": "1:04:37", "throughput": 19966.11, "total_tokens": 168387584} +{"current_steps": 53510, "total_steps": 78105, "loss": 0.1866, "lr": 1.3643368918436112e-06, "epoch": 3.425516932334678, "percentage": 68.51, "elapsed_time": "2:20:34", "remaining_time": "1:04:36", "throughput": 19966.38, "total_tokens": 168403264} +{"current_steps": 53515, "total_steps": 78105, "loss": 0.127, "lr": 1.3638392355501279e-06, "epoch": 3.4258370142756545, "percentage": 68.52, "elapsed_time": "2:20:34", "remaining_time": "1:04:35", "throughput": 19966.59, "total_tokens": 168417856} +{"current_steps": 53520, "total_steps": 78105, "loss": 0.1511, "lr": 1.363341635990595e-06, "epoch": 3.4261570962166314, "percentage": 68.52, "elapsed_time": "2:20:35", "remaining_time": "1:04:35", "throughput": 19966.98, "total_tokens": 168436224} +{"current_steps": 53525, "total_steps": 78105, "loss": 0.2167, "lr": 1.3628440931898602e-06, "epoch": 3.4264771781576084, "percentage": 68.53, "elapsed_time": "2:20:36", "remaining_time": "1:04:34", "throughput": 19967.19, "total_tokens": 168450816} +{"current_steps": 53530, "total_steps": 78105, "loss": 0.1586, "lr": 1.362346607172767e-06, "epoch": 3.4267972600985854, "percentage": 68.54, "elapsed_time": "2:20:37", "remaining_time": "1:04:33", "throughput": 19967.47, "total_tokens": 168466880} +{"current_steps": 53535, "total_steps": 78105, "loss": 0.1427, "lr": 1.3618491779641603e-06, "epoch": 3.427117342039562, "percentage": 68.54, "elapsed_time": "2:20:37", "remaining_time": "1:04:32", "throughput": 19967.78, "total_tokens": 168483648} +{"current_steps": 53540, "total_steps": 78105, "loss": 0.1957, "lr": 1.3613518055888755e-06, "epoch": 3.427437423980539, "percentage": 68.55, "elapsed_time": "2:20:38", "remaining_time": "1:04:31", "throughput": 19968.05, "total_tokens": 168499456} +{"current_steps": 53545, "total_steps": 78105, "loss": 0.1738, "lr": 1.3608544900717524e-06, "epoch": 3.427757505921516, "percentage": 68.56, "elapsed_time": "2:20:39", "remaining_time": "1:04:30", "throughput": 19968.28, "total_tokens": 168515008} +{"current_steps": 53550, "total_steps": 78105, "loss": 0.1664, "lr": 1.3603572314376207e-06, "epoch": 3.428077587862493, "percentage": 68.56, "elapsed_time": "2:20:39", "remaining_time": "1:04:30", "throughput": 19968.57, "total_tokens": 168531200} +{"current_steps": 53555, "total_steps": 78105, "loss": 0.1422, "lr": 1.3598600297113135e-06, "epoch": 3.4283976698034695, "percentage": 68.57, "elapsed_time": "2:20:40", "remaining_time": "1:04:29", "throughput": 19968.86, "total_tokens": 168547328} +{"current_steps": 53560, "total_steps": 78105, "loss": 0.159, "lr": 1.3593628849176576e-06, "epoch": 3.4287177517444465, "percentage": 68.57, "elapsed_time": "2:20:41", "remaining_time": "1:04:28", "throughput": 19969.13, "total_tokens": 168563328} +{"current_steps": 53565, "total_steps": 78105, "loss": 0.1592, "lr": 1.3588657970814769e-06, "epoch": 3.4290378336854235, "percentage": 68.58, "elapsed_time": "2:20:41", "remaining_time": "1:04:27", "throughput": 19969.4, "total_tokens": 168579264} +{"current_steps": 53570, "total_steps": 78105, "loss": 0.1542, "lr": 1.3583687662275962e-06, "epoch": 3.4293579156264005, "percentage": 68.59, "elapsed_time": "2:20:42", "remaining_time": "1:04:26", "throughput": 19969.69, "total_tokens": 168595712} +{"current_steps": 53575, "total_steps": 78105, "loss": 0.1631, "lr": 1.3578717923808312e-06, "epoch": 3.429677997567377, "percentage": 68.59, "elapsed_time": "2:20:43", "remaining_time": "1:04:25", "throughput": 19970.02, "total_tokens": 168612672} +{"current_steps": 53580, "total_steps": 78105, "loss": 0.1705, "lr": 1.3573748755660015e-06, "epoch": 3.429998079508354, "percentage": 68.6, "elapsed_time": "2:20:43", "remaining_time": "1:04:25", "throughput": 19970.24, "total_tokens": 168627776} +{"current_steps": 53585, "total_steps": 78105, "loss": 0.1229, "lr": 1.356878015807917e-06, "epoch": 3.430318161449331, "percentage": 68.61, "elapsed_time": "2:20:44", "remaining_time": "1:04:24", "throughput": 19970.48, "total_tokens": 168642816} +{"current_steps": 53590, "total_steps": 78105, "loss": 0.2726, "lr": 1.3563812131313909e-06, "epoch": 3.430638243390308, "percentage": 68.61, "elapsed_time": "2:20:45", "remaining_time": "1:04:23", "throughput": 19970.16, "total_tokens": 168658432} +{"current_steps": 53595, "total_steps": 78105, "loss": 0.2198, "lr": 1.3558844675612297e-06, "epoch": 3.430958325331285, "percentage": 68.62, "elapsed_time": "2:20:46", "remaining_time": "1:04:22", "throughput": 19970.4, "total_tokens": 168673472} +{"current_steps": 53600, "total_steps": 78105, "loss": 0.1386, "lr": 1.3553877791222394e-06, "epoch": 3.4312784072722615, "percentage": 68.63, "elapsed_time": "2:20:46", "remaining_time": "1:04:21", "throughput": 19970.69, "total_tokens": 168689664} +{"current_steps": 53605, "total_steps": 78105, "loss": 0.1974, "lr": 1.3548911478392213e-06, "epoch": 3.4315984892132385, "percentage": 68.63, "elapsed_time": "2:20:47", "remaining_time": "1:04:20", "throughput": 19970.98, "total_tokens": 168706112} +{"current_steps": 53610, "total_steps": 78105, "loss": 0.2249, "lr": 1.354394573736974e-06, "epoch": 3.4319185711542155, "percentage": 68.64, "elapsed_time": "2:20:48", "remaining_time": "1:04:20", "throughput": 19971.21, "total_tokens": 168721024} +{"current_steps": 53615, "total_steps": 78105, "loss": 0.1364, "lr": 1.353898056840294e-06, "epoch": 3.4322386530951925, "percentage": 68.64, "elapsed_time": "2:20:48", "remaining_time": "1:04:19", "throughput": 19971.45, "total_tokens": 168736448} +{"current_steps": 53620, "total_steps": 78105, "loss": 0.1433, "lr": 1.3534015971739761e-06, "epoch": 3.432558735036169, "percentage": 68.65, "elapsed_time": "2:20:49", "remaining_time": "1:04:18", "throughput": 19971.72, "total_tokens": 168751936} +{"current_steps": 53625, "total_steps": 78105, "loss": 0.2215, "lr": 1.35290519476281e-06, "epoch": 3.432878816977146, "percentage": 68.66, "elapsed_time": "2:20:50", "remaining_time": "1:04:17", "throughput": 19971.96, "total_tokens": 168767360} +{"current_steps": 53630, "total_steps": 78105, "loss": 0.2581, "lr": 1.3524088496315835e-06, "epoch": 3.433198898918123, "percentage": 68.66, "elapsed_time": "2:20:50", "remaining_time": "1:04:16", "throughput": 19972.23, "total_tokens": 168783104} +{"current_steps": 53635, "total_steps": 78105, "loss": 0.2828, "lr": 1.3519125618050814e-06, "epoch": 3.4335189808591, "percentage": 68.67, "elapsed_time": "2:20:51", "remaining_time": "1:04:15", "throughput": 19972.52, "total_tokens": 168799616} +{"current_steps": 53640, "total_steps": 78105, "loss": 0.1831, "lr": 1.3514163313080857e-06, "epoch": 3.433839062800077, "percentage": 68.68, "elapsed_time": "2:20:52", "remaining_time": "1:04:15", "throughput": 19972.76, "total_tokens": 168814656} +{"current_steps": 53645, "total_steps": 78105, "loss": 0.1593, "lr": 1.350920158165376e-06, "epoch": 3.4341591447410535, "percentage": 68.68, "elapsed_time": "2:20:53", "remaining_time": "1:04:14", "throughput": 19973.15, "total_tokens": 168833152} +{"current_steps": 53650, "total_steps": 78105, "loss": 0.1567, "lr": 1.3504240424017268e-06, "epoch": 3.4344792266820305, "percentage": 68.69, "elapsed_time": "2:20:53", "remaining_time": "1:04:13", "throughput": 19973.42, "total_tokens": 168849216} +{"current_steps": 53655, "total_steps": 78105, "loss": 0.1661, "lr": 1.3499279840419155e-06, "epoch": 3.4347993086230075, "percentage": 68.7, "elapsed_time": "2:20:54", "remaining_time": "1:04:12", "throughput": 19973.67, "total_tokens": 168864512} +{"current_steps": 53660, "total_steps": 78105, "loss": 0.1892, "lr": 1.3494319831107079e-06, "epoch": 3.4351193905639845, "percentage": 68.7, "elapsed_time": "2:20:55", "remaining_time": "1:04:11", "throughput": 19973.91, "total_tokens": 168879616} +{"current_steps": 53665, "total_steps": 78105, "loss": 0.1741, "lr": 1.348936039632875e-06, "epoch": 3.435439472504961, "percentage": 68.71, "elapsed_time": "2:20:55", "remaining_time": "1:04:10", "throughput": 19974.18, "total_tokens": 168895616} +{"current_steps": 53670, "total_steps": 78105, "loss": 0.1418, "lr": 1.3484401536331804e-06, "epoch": 3.435759554445938, "percentage": 68.72, "elapsed_time": "2:20:56", "remaining_time": "1:04:10", "throughput": 19974.42, "total_tokens": 168910912} +{"current_steps": 53675, "total_steps": 78105, "loss": 0.257, "lr": 1.3479443251363867e-06, "epoch": 3.436079636386915, "percentage": 68.72, "elapsed_time": "2:20:57", "remaining_time": "1:04:09", "throughput": 19974.66, "total_tokens": 168926336} +{"current_steps": 53680, "total_steps": 78105, "loss": 0.1206, "lr": 1.3474485541672521e-06, "epoch": 3.436399718327892, "percentage": 68.73, "elapsed_time": "2:20:57", "remaining_time": "1:04:08", "throughput": 19974.87, "total_tokens": 168940800} +{"current_steps": 53685, "total_steps": 78105, "loss": 0.2162, "lr": 1.3469528407505326e-06, "epoch": 3.436719800268869, "percentage": 68.73, "elapsed_time": "2:20:58", "remaining_time": "1:04:07", "throughput": 19975.13, "total_tokens": 168956544} +{"current_steps": 53690, "total_steps": 78105, "loss": 0.1356, "lr": 1.3464571849109842e-06, "epoch": 3.4370398822098456, "percentage": 68.74, "elapsed_time": "2:20:59", "remaining_time": "1:04:06", "throughput": 19975.36, "total_tokens": 168971776} +{"current_steps": 53695, "total_steps": 78105, "loss": 0.1781, "lr": 1.3459615866733534e-06, "epoch": 3.4373599641508226, "percentage": 68.75, "elapsed_time": "2:20:59", "remaining_time": "1:04:05", "throughput": 19975.58, "total_tokens": 168986560} +{"current_steps": 53700, "total_steps": 78105, "loss": 0.1384, "lr": 1.3454660460623905e-06, "epoch": 3.4376800460917996, "percentage": 68.75, "elapsed_time": "2:21:00", "remaining_time": "1:04:04", "throughput": 19975.79, "total_tokens": 169001344} +{"current_steps": 53705, "total_steps": 78105, "loss": 0.1207, "lr": 1.3449705631028398e-06, "epoch": 3.4380001280327765, "percentage": 68.76, "elapsed_time": "2:21:01", "remaining_time": "1:04:04", "throughput": 19976.1, "total_tokens": 169017856} +{"current_steps": 53710, "total_steps": 78105, "loss": 0.2258, "lr": 1.3444751378194427e-06, "epoch": 3.438320209973753, "percentage": 68.77, "elapsed_time": "2:21:01", "remaining_time": "1:04:03", "throughput": 19976.42, "total_tokens": 169034816} +{"current_steps": 53715, "total_steps": 78105, "loss": 0.1529, "lr": 1.3439797702369385e-06, "epoch": 3.43864029191473, "percentage": 68.77, "elapsed_time": "2:21:02", "remaining_time": "1:04:02", "throughput": 19976.67, "total_tokens": 169049920} +{"current_steps": 53720, "total_steps": 78105, "loss": 0.2182, "lr": 1.343484460380063e-06, "epoch": 3.438960373855707, "percentage": 68.78, "elapsed_time": "2:21:03", "remaining_time": "1:04:01", "throughput": 19976.94, "total_tokens": 169065664} +{"current_steps": 53725, "total_steps": 78105, "loss": 0.2003, "lr": 1.3429892082735496e-06, "epoch": 3.439280455796684, "percentage": 68.79, "elapsed_time": "2:21:03", "remaining_time": "1:04:00", "throughput": 19977.19, "total_tokens": 169081216} +{"current_steps": 53730, "total_steps": 78105, "loss": 0.2119, "lr": 1.3424940139421273e-06, "epoch": 3.439600537737661, "percentage": 68.79, "elapsed_time": "2:21:04", "remaining_time": "1:03:59", "throughput": 19977.37, "total_tokens": 169095296} +{"current_steps": 53735, "total_steps": 78105, "loss": 0.1635, "lr": 1.3419988774105258e-06, "epoch": 3.4399206196786376, "percentage": 68.8, "elapsed_time": "2:21:05", "remaining_time": "1:03:59", "throughput": 19977.65, "total_tokens": 169111680} +{"current_steps": 53740, "total_steps": 78105, "loss": 0.0987, "lr": 1.3415037987034685e-06, "epoch": 3.4402407016196146, "percentage": 68.8, "elapsed_time": "2:21:05", "remaining_time": "1:03:58", "throughput": 19977.93, "total_tokens": 169127872} +{"current_steps": 53745, "total_steps": 78105, "loss": 0.1418, "lr": 1.341008777845677e-06, "epoch": 3.4405607835605916, "percentage": 68.81, "elapsed_time": "2:21:06", "remaining_time": "1:03:57", "throughput": 19978.16, "total_tokens": 169142848} +{"current_steps": 53750, "total_steps": 78105, "loss": 0.1778, "lr": 1.3405138148618703e-06, "epoch": 3.4408808655015686, "percentage": 68.82, "elapsed_time": "2:21:07", "remaining_time": "1:03:56", "throughput": 19978.51, "total_tokens": 169160256} +{"current_steps": 53755, "total_steps": 78105, "loss": 0.1404, "lr": 1.3400189097767641e-06, "epoch": 3.441200947442545, "percentage": 68.82, "elapsed_time": "2:21:07", "remaining_time": "1:03:55", "throughput": 19978.74, "total_tokens": 169175936} +{"current_steps": 53760, "total_steps": 78105, "loss": 0.1682, "lr": 1.3395240626150707e-06, "epoch": 3.441521029383522, "percentage": 68.83, "elapsed_time": "2:21:08", "remaining_time": "1:03:54", "throughput": 19978.96, "total_tokens": 169190912} +{"current_steps": 53765, "total_steps": 78105, "loss": 0.1384, "lr": 1.3390292734015025e-06, "epoch": 3.441841111324499, "percentage": 68.84, "elapsed_time": "2:21:09", "remaining_time": "1:03:54", "throughput": 19979.19, "total_tokens": 169205760} +{"current_steps": 53770, "total_steps": 78105, "loss": 0.2142, "lr": 1.3385345421607632e-06, "epoch": 3.442161193265476, "percentage": 68.84, "elapsed_time": "2:21:09", "remaining_time": "1:03:53", "throughput": 19979.48, "total_tokens": 169222144} +{"current_steps": 53775, "total_steps": 78105, "loss": 0.1446, "lr": 1.338039868917561e-06, "epoch": 3.442481275206453, "percentage": 68.85, "elapsed_time": "2:21:10", "remaining_time": "1:03:52", "throughput": 19979.8, "total_tokens": 169239104} +{"current_steps": 53780, "total_steps": 78105, "loss": 0.2169, "lr": 1.3375452536965938e-06, "epoch": 3.4428013571474296, "percentage": 68.86, "elapsed_time": "2:21:11", "remaining_time": "1:03:51", "throughput": 19980.05, "total_tokens": 169254400} +{"current_steps": 53785, "total_steps": 78105, "loss": 0.1781, "lr": 1.3370506965225621e-06, "epoch": 3.4431214390884066, "percentage": 68.86, "elapsed_time": "2:21:11", "remaining_time": "1:03:50", "throughput": 19980.28, "total_tokens": 169269632} +{"current_steps": 53790, "total_steps": 78105, "loss": 0.1089, "lr": 1.3365561974201612e-06, "epoch": 3.4434415210293836, "percentage": 68.87, "elapsed_time": "2:21:12", "remaining_time": "1:03:49", "throughput": 19980.52, "total_tokens": 169284800} +{"current_steps": 53795, "total_steps": 78105, "loss": 0.1775, "lr": 1.3360617564140826e-06, "epoch": 3.4437616029703606, "percentage": 68.88, "elapsed_time": "2:21:13", "remaining_time": "1:03:49", "throughput": 19980.74, "total_tokens": 169299648} +{"current_steps": 53800, "total_steps": 78105, "loss": 0.2354, "lr": 1.3355673735290192e-06, "epoch": 3.444081684911337, "percentage": 68.88, "elapsed_time": "2:21:13", "remaining_time": "1:03:48", "throughput": 19981.01, "total_tokens": 169315200} +{"current_steps": 53805, "total_steps": 78105, "loss": 0.1418, "lr": 1.335073048789654e-06, "epoch": 3.444401766852314, "percentage": 68.89, "elapsed_time": "2:21:14", "remaining_time": "1:03:47", "throughput": 19981.33, "total_tokens": 169331904} +{"current_steps": 53810, "total_steps": 78105, "loss": 0.1935, "lr": 1.3345787822206745e-06, "epoch": 3.444721848793291, "percentage": 68.89, "elapsed_time": "2:21:15", "remaining_time": "1:03:46", "throughput": 19981.62, "total_tokens": 169348032} +{"current_steps": 53815, "total_steps": 78105, "loss": 0.1424, "lr": 1.3340845738467584e-06, "epoch": 3.445041930734268, "percentage": 68.9, "elapsed_time": "2:21:15", "remaining_time": "1:03:45", "throughput": 19981.89, "total_tokens": 169363776} +{"current_steps": 53820, "total_steps": 78105, "loss": 0.19, "lr": 1.3335904236925868e-06, "epoch": 3.445362012675245, "percentage": 68.91, "elapsed_time": "2:21:16", "remaining_time": "1:03:44", "throughput": 19982.1, "total_tokens": 169378368} +{"current_steps": 53825, "total_steps": 78105, "loss": 0.1903, "lr": 1.3330963317828333e-06, "epoch": 3.4456820946162217, "percentage": 68.91, "elapsed_time": "2:21:17", "remaining_time": "1:03:43", "throughput": 19982.32, "total_tokens": 169393280} +{"current_steps": 53830, "total_steps": 78105, "loss": 0.1758, "lr": 1.3326022981421713e-06, "epoch": 3.4460021765571986, "percentage": 68.92, "elapsed_time": "2:21:17", "remaining_time": "1:03:43", "throughput": 19982.63, "total_tokens": 169409984} +{"current_steps": 53835, "total_steps": 78105, "loss": 0.1885, "lr": 1.3321083227952697e-06, "epoch": 3.4463222584981756, "percentage": 68.93, "elapsed_time": "2:21:18", "remaining_time": "1:03:42", "throughput": 19982.88, "total_tokens": 169425024} +{"current_steps": 53840, "total_steps": 78105, "loss": 0.1566, "lr": 1.3316144057667951e-06, "epoch": 3.446642340439152, "percentage": 68.93, "elapsed_time": "2:21:19", "remaining_time": "1:03:41", "throughput": 19983.15, "total_tokens": 169440960} +{"current_steps": 53845, "total_steps": 78105, "loss": 0.1346, "lr": 1.3311205470814102e-06, "epoch": 3.446962422380129, "percentage": 68.94, "elapsed_time": "2:21:19", "remaining_time": "1:03:40", "throughput": 19983.38, "total_tokens": 169455936} +{"current_steps": 53850, "total_steps": 78105, "loss": 0.1984, "lr": 1.3306267467637773e-06, "epoch": 3.447282504321106, "percentage": 68.95, "elapsed_time": "2:21:20", "remaining_time": "1:03:39", "throughput": 19983.63, "total_tokens": 169471168} +{"current_steps": 53855, "total_steps": 78105, "loss": 0.1497, "lr": 1.3301330048385542e-06, "epoch": 3.447602586262083, "percentage": 68.95, "elapsed_time": "2:21:21", "remaining_time": "1:03:38", "throughput": 19983.87, "total_tokens": 169486336} +{"current_steps": 53860, "total_steps": 78105, "loss": 0.163, "lr": 1.329639321330395e-06, "epoch": 3.44792266820306, "percentage": 68.96, "elapsed_time": "2:21:21", "remaining_time": "1:03:38", "throughput": 19984.11, "total_tokens": 169501440} +{"current_steps": 53865, "total_steps": 78105, "loss": 0.0954, "lr": 1.3291456962639518e-06, "epoch": 3.4482427501440367, "percentage": 68.96, "elapsed_time": "2:21:22", "remaining_time": "1:03:37", "throughput": 19984.34, "total_tokens": 169516480} +{"current_steps": 53870, "total_steps": 78105, "loss": 0.1782, "lr": 1.328652129663874e-06, "epoch": 3.4485628320850137, "percentage": 68.97, "elapsed_time": "2:21:23", "remaining_time": "1:03:36", "throughput": 19984.63, "total_tokens": 169532864} +{"current_steps": 53875, "total_steps": 78105, "loss": 0.2489, "lr": 1.3281586215548072e-06, "epoch": 3.4488829140259907, "percentage": 68.98, "elapsed_time": "2:21:23", "remaining_time": "1:03:35", "throughput": 19984.87, "total_tokens": 169548032} +{"current_steps": 53880, "total_steps": 78105, "loss": 0.166, "lr": 1.3276651719613942e-06, "epoch": 3.4492029959669677, "percentage": 68.98, "elapsed_time": "2:21:24", "remaining_time": "1:03:34", "throughput": 19985.16, "total_tokens": 169564352} +{"current_steps": 53885, "total_steps": 78105, "loss": 0.1573, "lr": 1.327171780908278e-06, "epoch": 3.449523077907944, "percentage": 68.99, "elapsed_time": "2:21:25", "remaining_time": "1:03:33", "throughput": 19985.4, "total_tokens": 169579456} +{"current_steps": 53890, "total_steps": 78105, "loss": 0.1914, "lr": 1.326678448420092e-06, "epoch": 3.449843159848921, "percentage": 69.0, "elapsed_time": "2:21:25", "remaining_time": "1:03:33", "throughput": 19985.66, "total_tokens": 169595392} +{"current_steps": 53895, "total_steps": 78105, "loss": 0.1444, "lr": 1.3261851745214738e-06, "epoch": 3.450163241789898, "percentage": 69.0, "elapsed_time": "2:21:26", "remaining_time": "1:03:32", "throughput": 19986.0, "total_tokens": 169612928} +{"current_steps": 53900, "total_steps": 78105, "loss": 0.1282, "lr": 1.3256919592370538e-06, "epoch": 3.450483323730875, "percentage": 69.01, "elapsed_time": "2:21:27", "remaining_time": "1:03:31", "throughput": 19986.25, "total_tokens": 169628608} +{"current_steps": 53905, "total_steps": 78105, "loss": 0.1949, "lr": 1.3251988025914604e-06, "epoch": 3.450803405671852, "percentage": 69.02, "elapsed_time": "2:21:27", "remaining_time": "1:03:30", "throughput": 19986.48, "total_tokens": 169643840} +{"current_steps": 53910, "total_steps": 78105, "loss": 0.0894, "lr": 1.3247057046093192e-06, "epoch": 3.4511234876128287, "percentage": 69.02, "elapsed_time": "2:21:28", "remaining_time": "1:03:29", "throughput": 19986.76, "total_tokens": 169659904} +{"current_steps": 53915, "total_steps": 78105, "loss": 0.1221, "lr": 1.3242126653152526e-06, "epoch": 3.4514435695538057, "percentage": 69.03, "elapsed_time": "2:21:29", "remaining_time": "1:03:28", "throughput": 19987.04, "total_tokens": 169676288} +{"current_steps": 53920, "total_steps": 78105, "loss": 0.2169, "lr": 1.3237196847338833e-06, "epoch": 3.4517636514947827, "percentage": 69.04, "elapsed_time": "2:21:30", "remaining_time": "1:03:28", "throughput": 19987.29, "total_tokens": 169692224} +{"current_steps": 53925, "total_steps": 78105, "loss": 0.1808, "lr": 1.3232267628898238e-06, "epoch": 3.4520837334357597, "percentage": 69.04, "elapsed_time": "2:21:30", "remaining_time": "1:03:27", "throughput": 19987.59, "total_tokens": 169708864} +{"current_steps": 53930, "total_steps": 78105, "loss": 0.1437, "lr": 1.3227338998076907e-06, "epoch": 3.4524038153767362, "percentage": 69.05, "elapsed_time": "2:21:31", "remaining_time": "1:03:26", "throughput": 19987.83, "total_tokens": 169724288} +{"current_steps": 53935, "total_steps": 78105, "loss": 0.1715, "lr": 1.322241095512095e-06, "epoch": 3.4527238973177132, "percentage": 69.05, "elapsed_time": "2:21:32", "remaining_time": "1:03:25", "throughput": 19988.08, "total_tokens": 169739776} +{"current_steps": 53940, "total_steps": 78105, "loss": 0.1788, "lr": 1.321748350027644e-06, "epoch": 3.45304397925869, "percentage": 69.06, "elapsed_time": "2:21:32", "remaining_time": "1:03:24", "throughput": 19988.35, "total_tokens": 169755904} +{"current_steps": 53945, "total_steps": 78105, "loss": 0.174, "lr": 1.321255663378943e-06, "epoch": 3.453364061199667, "percentage": 69.07, "elapsed_time": "2:21:33", "remaining_time": "1:03:23", "throughput": 19988.58, "total_tokens": 169771200} +{"current_steps": 53950, "total_steps": 78105, "loss": 0.1106, "lr": 1.3207630355905948e-06, "epoch": 3.453684143140644, "percentage": 69.07, "elapsed_time": "2:21:34", "remaining_time": "1:03:23", "throughput": 19988.81, "total_tokens": 169785984} +{"current_steps": 53955, "total_steps": 78105, "loss": 0.2024, "lr": 1.320270466687198e-06, "epoch": 3.4540042250816207, "percentage": 69.08, "elapsed_time": "2:21:34", "remaining_time": "1:03:22", "throughput": 19989.1, "total_tokens": 169802560} +{"current_steps": 53960, "total_steps": 78105, "loss": 0.0919, "lr": 1.319777956693349e-06, "epoch": 3.4543243070225977, "percentage": 69.09, "elapsed_time": "2:21:35", "remaining_time": "1:03:21", "throughput": 19989.32, "total_tokens": 169817152} +{"current_steps": 53965, "total_steps": 78105, "loss": 0.1311, "lr": 1.3192855056336402e-06, "epoch": 3.4546443889635747, "percentage": 69.09, "elapsed_time": "2:21:36", "remaining_time": "1:03:20", "throughput": 19989.61, "total_tokens": 169833408} +{"current_steps": 53970, "total_steps": 78105, "loss": 0.1746, "lr": 1.3187931135326642e-06, "epoch": 3.4549644709045517, "percentage": 69.1, "elapsed_time": "2:21:36", "remaining_time": "1:03:19", "throughput": 19989.87, "total_tokens": 169849088} +{"current_steps": 53975, "total_steps": 78105, "loss": 0.1401, "lr": 1.3183007804150074e-06, "epoch": 3.4552845528455283, "percentage": 69.11, "elapsed_time": "2:21:37", "remaining_time": "1:03:18", "throughput": 19990.1, "total_tokens": 169864192} +{"current_steps": 53980, "total_steps": 78105, "loss": 0.2488, "lr": 1.3178085063052546e-06, "epoch": 3.4556046347865053, "percentage": 69.11, "elapsed_time": "2:21:38", "remaining_time": "1:03:18", "throughput": 19990.35, "total_tokens": 169879488} +{"current_steps": 53985, "total_steps": 78105, "loss": 0.1743, "lr": 1.317316291227987e-06, "epoch": 3.4559247167274822, "percentage": 69.12, "elapsed_time": "2:21:38", "remaining_time": "1:03:17", "throughput": 19990.61, "total_tokens": 169894976} +{"current_steps": 53990, "total_steps": 78105, "loss": 0.1497, "lr": 1.3168241352077823e-06, "epoch": 3.4562447986684592, "percentage": 69.12, "elapsed_time": "2:21:39", "remaining_time": "1:03:16", "throughput": 19990.84, "total_tokens": 169910080} +{"current_steps": 53995, "total_steps": 78105, "loss": 0.1819, "lr": 1.3163320382692196e-06, "epoch": 3.4565648806094362, "percentage": 69.13, "elapsed_time": "2:21:40", "remaining_time": "1:03:15", "throughput": 19991.09, "total_tokens": 169925120} +{"current_steps": 54000, "total_steps": 78105, "loss": 0.1647, "lr": 1.3158400004368672e-06, "epoch": 3.4568849625504128, "percentage": 69.14, "elapsed_time": "2:21:40", "remaining_time": "1:03:14", "throughput": 19991.31, "total_tokens": 169940096} +{"current_steps": 54005, "total_steps": 78105, "loss": 0.1532, "lr": 1.3153480217352993e-06, "epoch": 3.4572050444913898, "percentage": 69.14, "elapsed_time": "2:21:41", "remaining_time": "1:03:13", "throughput": 19991.57, "total_tokens": 169955520} +{"current_steps": 54010, "total_steps": 78105, "loss": 0.1127, "lr": 1.3148561021890784e-06, "epoch": 3.4575251264323668, "percentage": 69.15, "elapsed_time": "2:21:42", "remaining_time": "1:03:12", "throughput": 19991.87, "total_tokens": 169972160} +{"current_steps": 54015, "total_steps": 78105, "loss": 0.2193, "lr": 1.314364241822772e-06, "epoch": 3.4578452083733437, "percentage": 69.16, "elapsed_time": "2:21:42", "remaining_time": "1:03:12", "throughput": 19992.12, "total_tokens": 169987712} +{"current_steps": 54020, "total_steps": 78105, "loss": 0.1667, "lr": 1.313872440660939e-06, "epoch": 3.4581652903143203, "percentage": 69.16, "elapsed_time": "2:21:43", "remaining_time": "1:03:11", "throughput": 19992.4, "total_tokens": 170003968} +{"current_steps": 54025, "total_steps": 78105, "loss": 0.1623, "lr": 1.3133806987281373e-06, "epoch": 3.4584853722552973, "percentage": 69.17, "elapsed_time": "2:21:44", "remaining_time": "1:03:10", "throughput": 19992.64, "total_tokens": 170019008} +{"current_steps": 54030, "total_steps": 78105, "loss": 0.1978, "lr": 1.3128890160489248e-06, "epoch": 3.4588054541962743, "percentage": 69.18, "elapsed_time": "2:21:44", "remaining_time": "1:03:09", "throughput": 19992.88, "total_tokens": 170034368} +{"current_steps": 54035, "total_steps": 78105, "loss": 0.1493, "lr": 1.312397392647849e-06, "epoch": 3.4591255361372513, "percentage": 69.18, "elapsed_time": "2:21:45", "remaining_time": "1:03:08", "throughput": 19993.14, "total_tokens": 170050368} +{"current_steps": 54040, "total_steps": 78105, "loss": 0.2024, "lr": 1.3119058285494638e-06, "epoch": 3.4594456180782283, "percentage": 69.19, "elapsed_time": "2:21:46", "remaining_time": "1:03:07", "throughput": 19993.41, "total_tokens": 170066560} +{"current_steps": 54045, "total_steps": 78105, "loss": 0.1027, "lr": 1.3114143237783106e-06, "epoch": 3.459765700019205, "percentage": 69.2, "elapsed_time": "2:21:46", "remaining_time": "1:03:07", "throughput": 19993.72, "total_tokens": 170083520} +{"current_steps": 54050, "total_steps": 78105, "loss": 0.1372, "lr": 1.3109228783589361e-06, "epoch": 3.460085781960182, "percentage": 69.2, "elapsed_time": "2:21:47", "remaining_time": "1:03:06", "throughput": 19994.01, "total_tokens": 170099904} +{"current_steps": 54055, "total_steps": 78105, "loss": 0.1701, "lr": 1.3104314923158794e-06, "epoch": 3.460405863901159, "percentage": 69.21, "elapsed_time": "2:21:48", "remaining_time": "1:03:05", "throughput": 19994.3, "total_tokens": 170116736} +{"current_steps": 54060, "total_steps": 78105, "loss": 0.1479, "lr": 1.309940165673678e-06, "epoch": 3.4607259458421358, "percentage": 69.21, "elapsed_time": "2:21:48", "remaining_time": "1:03:04", "throughput": 19994.51, "total_tokens": 170131328} +{"current_steps": 54065, "total_steps": 78105, "loss": 0.1405, "lr": 1.3094488984568657e-06, "epoch": 3.4610460277831123, "percentage": 69.22, "elapsed_time": "2:21:49", "remaining_time": "1:03:03", "throughput": 19994.79, "total_tokens": 170147584} +{"current_steps": 54070, "total_steps": 78105, "loss": 0.1724, "lr": 1.308957690689974e-06, "epoch": 3.4613661097240893, "percentage": 69.23, "elapsed_time": "2:21:50", "remaining_time": "1:03:02", "throughput": 19995.1, "total_tokens": 170164288} +{"current_steps": 54075, "total_steps": 78105, "loss": 0.2134, "lr": 1.3084665423975304e-06, "epoch": 3.4616861916650663, "percentage": 69.23, "elapsed_time": "2:21:50", "remaining_time": "1:03:02", "throughput": 19995.35, "total_tokens": 170179776} +{"current_steps": 54080, "total_steps": 78105, "loss": 0.1867, "lr": 1.3079754536040624e-06, "epoch": 3.4620062736060433, "percentage": 69.24, "elapsed_time": "2:21:51", "remaining_time": "1:03:01", "throughput": 19995.62, "total_tokens": 170195648} +{"current_steps": 54085, "total_steps": 78105, "loss": 0.0758, "lr": 1.3074844243340906e-06, "epoch": 3.4623263555470203, "percentage": 69.25, "elapsed_time": "2:21:52", "remaining_time": "1:03:00", "throughput": 19995.85, "total_tokens": 170210816} +{"current_steps": 54090, "total_steps": 78105, "loss": 0.2233, "lr": 1.3069934546121354e-06, "epoch": 3.462646437487997, "percentage": 69.25, "elapsed_time": "2:21:52", "remaining_time": "1:02:59", "throughput": 19996.07, "total_tokens": 170225408} +{"current_steps": 54095, "total_steps": 78105, "loss": 0.1863, "lr": 1.3065025444627134e-06, "epoch": 3.462966519428974, "percentage": 69.26, "elapsed_time": "2:21:53", "remaining_time": "1:02:58", "throughput": 19996.33, "total_tokens": 170241088} +{"current_steps": 54100, "total_steps": 78105, "loss": 0.1855, "lr": 1.3060116939103368e-06, "epoch": 3.463286601369951, "percentage": 69.27, "elapsed_time": "2:21:54", "remaining_time": "1:02:57", "throughput": 19996.55, "total_tokens": 170255936} +{"current_steps": 54105, "total_steps": 78105, "loss": 0.0975, "lr": 1.3055209029795174e-06, "epoch": 3.4636066833109274, "percentage": 69.27, "elapsed_time": "2:21:54", "remaining_time": "1:02:57", "throughput": 19996.77, "total_tokens": 170271040} +{"current_steps": 54110, "total_steps": 78105, "loss": 0.1701, "lr": 1.3050301716947613e-06, "epoch": 3.4639267652519043, "percentage": 69.28, "elapsed_time": "2:21:55", "remaining_time": "1:02:56", "throughput": 19997.07, "total_tokens": 170287552} +{"current_steps": 54115, "total_steps": 78105, "loss": 0.1511, "lr": 1.304539500080576e-06, "epoch": 3.4642468471928813, "percentage": 69.28, "elapsed_time": "2:21:56", "remaining_time": "1:02:55", "throughput": 19997.35, "total_tokens": 170303744} +{"current_steps": 54120, "total_steps": 78105, "loss": 0.1382, "lr": 1.3040488881614593e-06, "epoch": 3.4645669291338583, "percentage": 69.29, "elapsed_time": "2:21:57", "remaining_time": "1:02:54", "throughput": 19997.73, "total_tokens": 170321728} +{"current_steps": 54125, "total_steps": 78105, "loss": 0.2174, "lr": 1.3035583359619125e-06, "epoch": 3.4648870110748353, "percentage": 69.3, "elapsed_time": "2:21:57", "remaining_time": "1:02:53", "throughput": 19997.96, "total_tokens": 170336768} +{"current_steps": 54130, "total_steps": 78105, "loss": 0.2128, "lr": 1.3030678435064304e-06, "epoch": 3.465207093015812, "percentage": 69.3, "elapsed_time": "2:21:58", "remaining_time": "1:02:52", "throughput": 19998.22, "total_tokens": 170352576} +{"current_steps": 54135, "total_steps": 78105, "loss": 0.1207, "lr": 1.3025774108195055e-06, "epoch": 3.465527174956789, "percentage": 69.31, "elapsed_time": "2:21:59", "remaining_time": "1:02:52", "throughput": 19998.49, "total_tokens": 170368832} +{"current_steps": 54140, "total_steps": 78105, "loss": 0.1648, "lr": 1.3020870379256273e-06, "epoch": 3.465847256897766, "percentage": 69.32, "elapsed_time": "2:21:59", "remaining_time": "1:02:51", "throughput": 19998.75, "total_tokens": 170384256} +{"current_steps": 54145, "total_steps": 78105, "loss": 0.2808, "lr": 1.3015967248492822e-06, "epoch": 3.466167338838743, "percentage": 69.32, "elapsed_time": "2:22:00", "remaining_time": "1:02:50", "throughput": 19998.98, "total_tokens": 170399360} +{"current_steps": 54150, "total_steps": 78105, "loss": 0.1558, "lr": 1.3011064716149563e-06, "epoch": 3.4664874207797194, "percentage": 69.33, "elapsed_time": "2:22:01", "remaining_time": "1:02:49", "throughput": 19999.25, "total_tokens": 170415360} +{"current_steps": 54155, "total_steps": 78105, "loss": 0.1096, "lr": 1.3006162782471264e-06, "epoch": 3.4668075027206964, "percentage": 69.34, "elapsed_time": "2:22:01", "remaining_time": "1:02:48", "throughput": 19999.48, "total_tokens": 170430528} +{"current_steps": 54160, "total_steps": 78105, "loss": 0.1795, "lr": 1.300126144770273e-06, "epoch": 3.4671275846616734, "percentage": 69.34, "elapsed_time": "2:22:02", "remaining_time": "1:02:47", "throughput": 19999.82, "total_tokens": 170448000} +{"current_steps": 54165, "total_steps": 78105, "loss": 0.1462, "lr": 1.2996360712088702e-06, "epoch": 3.4674476666026504, "percentage": 69.35, "elapsed_time": "2:22:03", "remaining_time": "1:02:47", "throughput": 20000.07, "total_tokens": 170463488} +{"current_steps": 54170, "total_steps": 78105, "loss": 0.1631, "lr": 1.2991460575873893e-06, "epoch": 3.4677677485436273, "percentage": 69.36, "elapsed_time": "2:22:03", "remaining_time": "1:02:46", "throughput": 20000.34, "total_tokens": 170479680} +{"current_steps": 54175, "total_steps": 78105, "loss": 0.2201, "lr": 1.2986561039302997e-06, "epoch": 3.468087830484604, "percentage": 69.36, "elapsed_time": "2:22:04", "remaining_time": "1:02:45", "throughput": 20000.62, "total_tokens": 170495616} +{"current_steps": 54180, "total_steps": 78105, "loss": 0.1784, "lr": 1.2981662102620662e-06, "epoch": 3.468407912425581, "percentage": 69.37, "elapsed_time": "2:22:05", "remaining_time": "1:02:44", "throughput": 20000.95, "total_tokens": 170512960} +{"current_steps": 54185, "total_steps": 78105, "loss": 0.2556, "lr": 1.2976763766071525e-06, "epoch": 3.468727994366558, "percentage": 69.37, "elapsed_time": "2:22:05", "remaining_time": "1:02:43", "throughput": 20001.21, "total_tokens": 170529024} +{"current_steps": 54190, "total_steps": 78105, "loss": 0.1448, "lr": 1.2971866029900176e-06, "epoch": 3.469048076307535, "percentage": 69.38, "elapsed_time": "2:22:06", "remaining_time": "1:02:42", "throughput": 20001.45, "total_tokens": 170544832} +{"current_steps": 54195, "total_steps": 78105, "loss": 0.1645, "lr": 1.2966968894351175e-06, "epoch": 3.4693681582485114, "percentage": 69.39, "elapsed_time": "2:22:07", "remaining_time": "1:02:42", "throughput": 20001.68, "total_tokens": 170559808} +{"current_steps": 54200, "total_steps": 78105, "loss": 0.1558, "lr": 1.296207235966908e-06, "epoch": 3.4696882401894884, "percentage": 69.39, "elapsed_time": "2:22:07", "remaining_time": "1:02:41", "throughput": 20001.94, "total_tokens": 170575424} +{"current_steps": 54205, "total_steps": 78105, "loss": 0.1591, "lr": 1.295717642609839e-06, "epoch": 3.4700083221304654, "percentage": 69.4, "elapsed_time": "2:22:08", "remaining_time": "1:02:40", "throughput": 20002.2, "total_tokens": 170591232} +{"current_steps": 54210, "total_steps": 78105, "loss": 0.1287, "lr": 1.295228109388358e-06, "epoch": 3.4703284040714424, "percentage": 69.41, "elapsed_time": "2:22:09", "remaining_time": "1:02:39", "throughput": 20002.47, "total_tokens": 170607232} +{"current_steps": 54215, "total_steps": 78105, "loss": 0.1459, "lr": 1.2947386363269097e-06, "epoch": 3.4706484860124194, "percentage": 69.41, "elapsed_time": "2:22:09", "remaining_time": "1:02:38", "throughput": 20002.7, "total_tokens": 170622272} +{"current_steps": 54220, "total_steps": 78105, "loss": 0.1371, "lr": 1.2942492234499348e-06, "epoch": 3.470968567953396, "percentage": 69.42, "elapsed_time": "2:22:10", "remaining_time": "1:02:37", "throughput": 20002.95, "total_tokens": 170637824} +{"current_steps": 54225, "total_steps": 78105, "loss": 0.1282, "lr": 1.2937598707818755e-06, "epoch": 3.471288649894373, "percentage": 69.43, "elapsed_time": "2:22:11", "remaining_time": "1:02:37", "throughput": 20003.19, "total_tokens": 170652928} +{"current_steps": 54230, "total_steps": 78105, "loss": 0.1599, "lr": 1.2932705783471628e-06, "epoch": 3.47160873183535, "percentage": 69.43, "elapsed_time": "2:22:11", "remaining_time": "1:02:36", "throughput": 20003.44, "total_tokens": 170668864} +{"current_steps": 54235, "total_steps": 78105, "loss": 0.134, "lr": 1.2927813461702337e-06, "epoch": 3.471928813776327, "percentage": 69.44, "elapsed_time": "2:22:12", "remaining_time": "1:02:35", "throughput": 20003.69, "total_tokens": 170684416} +{"current_steps": 54240, "total_steps": 78105, "loss": 0.1454, "lr": 1.2922921742755145e-06, "epoch": 3.4722488957173034, "percentage": 69.44, "elapsed_time": "2:22:13", "remaining_time": "1:02:34", "throughput": 20004.08, "total_tokens": 170702720} +{"current_steps": 54245, "total_steps": 78105, "loss": 0.1346, "lr": 1.2918030626874339e-06, "epoch": 3.4725689776582804, "percentage": 69.45, "elapsed_time": "2:22:14", "remaining_time": "1:02:33", "throughput": 20004.31, "total_tokens": 170717568} +{"current_steps": 54250, "total_steps": 78105, "loss": 0.1632, "lr": 1.2913140114304157e-06, "epoch": 3.4728890595992574, "percentage": 69.46, "elapsed_time": "2:22:14", "remaining_time": "1:02:32", "throughput": 20004.54, "total_tokens": 170732416} +{"current_steps": 54255, "total_steps": 78105, "loss": 0.1729, "lr": 1.2908250205288786e-06, "epoch": 3.4732091415402344, "percentage": 69.46, "elapsed_time": "2:22:15", "remaining_time": "1:02:32", "throughput": 20004.75, "total_tokens": 170747008} +{"current_steps": 54260, "total_steps": 78105, "loss": 0.1996, "lr": 1.2903360900072434e-06, "epoch": 3.4735292234812114, "percentage": 69.47, "elapsed_time": "2:22:15", "remaining_time": "1:02:31", "throughput": 20005.01, "total_tokens": 170762624} +{"current_steps": 54265, "total_steps": 78105, "loss": 0.1166, "lr": 1.2898472198899212e-06, "epoch": 3.473849305422188, "percentage": 69.48, "elapsed_time": "2:22:16", "remaining_time": "1:02:30", "throughput": 20005.22, "total_tokens": 170777216} +{"current_steps": 54270, "total_steps": 78105, "loss": 0.1858, "lr": 1.2893584102013262e-06, "epoch": 3.474169387363165, "percentage": 69.48, "elapsed_time": "2:22:17", "remaining_time": "1:02:29", "throughput": 20005.45, "total_tokens": 170792128} +{"current_steps": 54275, "total_steps": 78105, "loss": 0.1894, "lr": 1.2888696609658663e-06, "epoch": 3.474489469304142, "percentage": 69.49, "elapsed_time": "2:22:17", "remaining_time": "1:02:28", "throughput": 20005.72, "total_tokens": 170808128} +{"current_steps": 54280, "total_steps": 78105, "loss": 0.1623, "lr": 1.288380972207947e-06, "epoch": 3.474809551245119, "percentage": 69.5, "elapsed_time": "2:22:18", "remaining_time": "1:02:27", "throughput": 20006.01, "total_tokens": 170824448} +{"current_steps": 54285, "total_steps": 78105, "loss": 0.224, "lr": 1.2878923439519709e-06, "epoch": 3.4751296331860955, "percentage": 69.5, "elapsed_time": "2:22:19", "remaining_time": "1:02:27", "throughput": 20006.27, "total_tokens": 170840512} +{"current_steps": 54290, "total_steps": 78105, "loss": 0.1499, "lr": 1.2874037762223373e-06, "epoch": 3.4754497151270725, "percentage": 69.51, "elapsed_time": "2:22:20", "remaining_time": "1:02:26", "throughput": 20006.56, "total_tokens": 170857088} +{"current_steps": 54295, "total_steps": 78105, "loss": 0.1297, "lr": 1.2869152690434434e-06, "epoch": 3.4757697970680494, "percentage": 69.52, "elapsed_time": "2:22:20", "remaining_time": "1:02:25", "throughput": 20006.82, "total_tokens": 170872576} +{"current_steps": 54300, "total_steps": 78105, "loss": 0.1237, "lr": 1.286426822439682e-06, "epoch": 3.4760898790090264, "percentage": 69.52, "elapsed_time": "2:22:21", "remaining_time": "1:02:24", "throughput": 20007.06, "total_tokens": 170887872} +{"current_steps": 54305, "total_steps": 78105, "loss": 0.1636, "lr": 1.2859384364354426e-06, "epoch": 3.4764099609500034, "percentage": 69.53, "elapsed_time": "2:22:22", "remaining_time": "1:02:23", "throughput": 20007.3, "total_tokens": 170903168} +{"current_steps": 54310, "total_steps": 78105, "loss": 0.1947, "lr": 1.285450111055116e-06, "epoch": 3.47673004289098, "percentage": 69.53, "elapsed_time": "2:22:22", "remaining_time": "1:02:22", "throughput": 20007.6, "total_tokens": 170919424} +{"current_steps": 54315, "total_steps": 78105, "loss": 0.1981, "lr": 1.2849618463230826e-06, "epoch": 3.477050124831957, "percentage": 69.54, "elapsed_time": "2:22:23", "remaining_time": "1:02:22", "throughput": 20007.82, "total_tokens": 170934336} +{"current_steps": 54320, "total_steps": 78105, "loss": 0.199, "lr": 1.2844736422637267e-06, "epoch": 3.477370206772934, "percentage": 69.55, "elapsed_time": "2:22:24", "remaining_time": "1:02:21", "throughput": 20008.22, "total_tokens": 170953024} +{"current_steps": 54325, "total_steps": 78105, "loss": 0.1898, "lr": 1.2839854989014256e-06, "epoch": 3.477690288713911, "percentage": 69.55, "elapsed_time": "2:22:24", "remaining_time": "1:02:20", "throughput": 20008.43, "total_tokens": 170967808} +{"current_steps": 54330, "total_steps": 78105, "loss": 0.111, "lr": 1.2834974162605546e-06, "epoch": 3.4780103706548875, "percentage": 69.56, "elapsed_time": "2:22:25", "remaining_time": "1:02:19", "throughput": 20008.68, "total_tokens": 170983360} +{"current_steps": 54335, "total_steps": 78105, "loss": 0.0923, "lr": 1.2830093943654858e-06, "epoch": 3.4783304525958645, "percentage": 69.57, "elapsed_time": "2:22:26", "remaining_time": "1:02:18", "throughput": 20008.91, "total_tokens": 170998784} +{"current_steps": 54340, "total_steps": 78105, "loss": 0.128, "lr": 1.2825214332405884e-06, "epoch": 3.4786505345368415, "percentage": 69.57, "elapsed_time": "2:22:26", "remaining_time": "1:02:17", "throughput": 20009.11, "total_tokens": 171013440} +{"current_steps": 54345, "total_steps": 78105, "loss": 0.1324, "lr": 1.2820335329102308e-06, "epoch": 3.4789706164778185, "percentage": 69.58, "elapsed_time": "2:22:27", "remaining_time": "1:02:17", "throughput": 20009.36, "total_tokens": 171028928} +{"current_steps": 54350, "total_steps": 78105, "loss": 0.1495, "lr": 1.2815456933987725e-06, "epoch": 3.4792906984187955, "percentage": 69.59, "elapsed_time": "2:22:28", "remaining_time": "1:02:16", "throughput": 20009.61, "total_tokens": 171044672} +{"current_steps": 54355, "total_steps": 78105, "loss": 0.1096, "lr": 1.2810579147305762e-06, "epoch": 3.479610780359772, "percentage": 69.59, "elapsed_time": "2:22:28", "remaining_time": "1:02:15", "throughput": 20009.87, "total_tokens": 171060480} +{"current_steps": 54360, "total_steps": 78105, "loss": 0.1723, "lr": 1.280570196929999e-06, "epoch": 3.479930862300749, "percentage": 69.6, "elapsed_time": "2:22:29", "remaining_time": "1:02:14", "throughput": 20010.11, "total_tokens": 171075968} +{"current_steps": 54365, "total_steps": 78105, "loss": 0.1517, "lr": 1.2800825400213937e-06, "epoch": 3.480250944241726, "percentage": 69.61, "elapsed_time": "2:22:30", "remaining_time": "1:02:13", "throughput": 20010.37, "total_tokens": 171091840} +{"current_steps": 54370, "total_steps": 78105, "loss": 0.1621, "lr": 1.2795949440291122e-06, "epoch": 3.4805710261827025, "percentage": 69.61, "elapsed_time": "2:22:30", "remaining_time": "1:02:12", "throughput": 20010.61, "total_tokens": 171107008} +{"current_steps": 54375, "total_steps": 78105, "loss": 0.123, "lr": 1.2791074089775018e-06, "epoch": 3.4808911081236795, "percentage": 69.62, "elapsed_time": "2:22:31", "remaining_time": "1:02:11", "throughput": 20010.83, "total_tokens": 171122176} +{"current_steps": 54380, "total_steps": 78105, "loss": 0.1391, "lr": 1.2786199348909095e-06, "epoch": 3.4812111900646565, "percentage": 69.62, "elapsed_time": "2:22:32", "remaining_time": "1:02:11", "throughput": 20011.07, "total_tokens": 171137600} +{"current_steps": 54385, "total_steps": 78105, "loss": 0.123, "lr": 1.2781325217936741e-06, "epoch": 3.4815312720056335, "percentage": 69.63, "elapsed_time": "2:22:32", "remaining_time": "1:02:10", "throughput": 20011.34, "total_tokens": 171153536} +{"current_steps": 54390, "total_steps": 78105, "loss": 0.0861, "lr": 1.2776451697101367e-06, "epoch": 3.4818513539466105, "percentage": 69.64, "elapsed_time": "2:22:33", "remaining_time": "1:02:09", "throughput": 20011.65, "total_tokens": 171170048} +{"current_steps": 54395, "total_steps": 78105, "loss": 0.1005, "lr": 1.2771578786646328e-06, "epoch": 3.482171435887587, "percentage": 69.64, "elapsed_time": "2:22:34", "remaining_time": "1:02:08", "throughput": 20011.89, "total_tokens": 171185664} +{"current_steps": 54400, "total_steps": 78105, "loss": 0.1379, "lr": 1.2766706486814948e-06, "epoch": 3.482491517828564, "percentage": 69.65, "elapsed_time": "2:22:34", "remaining_time": "1:02:07", "throughput": 20012.1, "total_tokens": 171200448} +{"current_steps": 54405, "total_steps": 78105, "loss": 0.19, "lr": 1.2761834797850524e-06, "epoch": 3.482811599769541, "percentage": 69.66, "elapsed_time": "2:22:35", "remaining_time": "1:02:06", "throughput": 20012.33, "total_tokens": 171216000} +{"current_steps": 54410, "total_steps": 78105, "loss": 0.109, "lr": 1.2756963719996323e-06, "epoch": 3.483131681710518, "percentage": 69.66, "elapsed_time": "2:22:36", "remaining_time": "1:02:06", "throughput": 20012.58, "total_tokens": 171231424} +{"current_steps": 54415, "total_steps": 78105, "loss": 0.146, "lr": 1.2752093253495584e-06, "epoch": 3.4834517636514946, "percentage": 69.67, "elapsed_time": "2:22:36", "remaining_time": "1:02:05", "throughput": 20012.79, "total_tokens": 171246272} +{"current_steps": 54420, "total_steps": 78105, "loss": 0.172, "lr": 1.2747223398591513e-06, "epoch": 3.4837718455924715, "percentage": 69.68, "elapsed_time": "2:22:37", "remaining_time": "1:02:04", "throughput": 20013.03, "total_tokens": 171261440} +{"current_steps": 54425, "total_steps": 78105, "loss": 0.1479, "lr": 1.274235415552727e-06, "epoch": 3.4840919275334485, "percentage": 69.68, "elapsed_time": "2:22:38", "remaining_time": "1:02:03", "throughput": 20013.19, "total_tokens": 171275264} +{"current_steps": 54430, "total_steps": 78105, "loss": 0.2251, "lr": 1.2737485524546025e-06, "epoch": 3.4844120094744255, "percentage": 69.69, "elapsed_time": "2:22:38", "remaining_time": "1:02:02", "throughput": 20013.46, "total_tokens": 171291392} +{"current_steps": 54435, "total_steps": 78105, "loss": 0.1986, "lr": 1.2732617505890878e-06, "epoch": 3.4847320914154025, "percentage": 69.69, "elapsed_time": "2:22:39", "remaining_time": "1:02:01", "throughput": 20013.72, "total_tokens": 171306944} +{"current_steps": 54440, "total_steps": 78105, "loss": 0.2145, "lr": 1.2727750099804915e-06, "epoch": 3.485052173356379, "percentage": 69.7, "elapsed_time": "2:22:40", "remaining_time": "1:02:01", "throughput": 20014.0, "total_tokens": 171323136} +{"current_steps": 54445, "total_steps": 78105, "loss": 0.1658, "lr": 1.2722883306531188e-06, "epoch": 3.485372255297356, "percentage": 69.71, "elapsed_time": "2:22:40", "remaining_time": "1:02:00", "throughput": 20014.27, "total_tokens": 171339264} +{"current_steps": 54450, "total_steps": 78105, "loss": 0.1896, "lr": 1.271801712631271e-06, "epoch": 3.485692337238333, "percentage": 69.71, "elapsed_time": "2:22:41", "remaining_time": "1:01:59", "throughput": 20014.56, "total_tokens": 171355392} +{"current_steps": 54455, "total_steps": 78105, "loss": 0.2414, "lr": 1.27131515593925e-06, "epoch": 3.48601241917931, "percentage": 69.72, "elapsed_time": "2:22:42", "remaining_time": "1:01:58", "throughput": 20014.82, "total_tokens": 171371456} +{"current_steps": 54460, "total_steps": 78105, "loss": 0.1557, "lr": 1.2708286606013487e-06, "epoch": 3.4863325011202866, "percentage": 69.73, "elapsed_time": "2:22:42", "remaining_time": "1:01:57", "throughput": 20015.04, "total_tokens": 171386304} +{"current_steps": 54465, "total_steps": 78105, "loss": 0.1722, "lr": 1.270342226641863e-06, "epoch": 3.4866525830612636, "percentage": 69.73, "elapsed_time": "2:22:43", "remaining_time": "1:01:56", "throughput": 20015.25, "total_tokens": 171400896} +{"current_steps": 54470, "total_steps": 78105, "loss": 0.1206, "lr": 1.2698558540850797e-06, "epoch": 3.4869726650022406, "percentage": 69.74, "elapsed_time": "2:22:44", "remaining_time": "1:01:56", "throughput": 20015.45, "total_tokens": 171415488} +{"current_steps": 54475, "total_steps": 78105, "loss": 0.1393, "lr": 1.2693695429552883e-06, "epoch": 3.4872927469432176, "percentage": 69.75, "elapsed_time": "2:22:44", "remaining_time": "1:01:55", "throughput": 20015.7, "total_tokens": 171430976} +{"current_steps": 54480, "total_steps": 78105, "loss": 0.1704, "lr": 1.268883293276772e-06, "epoch": 3.4876128288841945, "percentage": 69.75, "elapsed_time": "2:22:45", "remaining_time": "1:01:54", "throughput": 20015.99, "total_tokens": 171447040} +{"current_steps": 54485, "total_steps": 78105, "loss": 0.1885, "lr": 1.268397105073811e-06, "epoch": 3.487932910825171, "percentage": 69.76, "elapsed_time": "2:22:46", "remaining_time": "1:01:53", "throughput": 20016.3, "total_tokens": 171463616} +{"current_steps": 54490, "total_steps": 78105, "loss": 0.2525, "lr": 1.2679109783706838e-06, "epoch": 3.488252992766148, "percentage": 69.77, "elapsed_time": "2:22:46", "remaining_time": "1:01:52", "throughput": 20016.59, "total_tokens": 171480128} +{"current_steps": 54495, "total_steps": 78105, "loss": 0.1436, "lr": 1.2674249131916636e-06, "epoch": 3.488573074707125, "percentage": 69.77, "elapsed_time": "2:22:47", "remaining_time": "1:01:51", "throughput": 20016.83, "total_tokens": 171495744} +{"current_steps": 54500, "total_steps": 78105, "loss": 0.183, "lr": 1.2669389095610237e-06, "epoch": 3.488893156648102, "percentage": 69.78, "elapsed_time": "2:22:48", "remaining_time": "1:01:51", "throughput": 20017.04, "total_tokens": 171510272} +{"current_steps": 54505, "total_steps": 78105, "loss": 0.1267, "lr": 1.2664529675030321e-06, "epoch": 3.4892132385890786, "percentage": 69.78, "elapsed_time": "2:22:48", "remaining_time": "1:01:50", "throughput": 20017.28, "total_tokens": 171525568} +{"current_steps": 54510, "total_steps": 78105, "loss": 0.2177, "lr": 1.2659670870419537e-06, "epoch": 3.4895333205300556, "percentage": 69.79, "elapsed_time": "2:22:49", "remaining_time": "1:01:49", "throughput": 20017.54, "total_tokens": 171541376} +{"current_steps": 54515, "total_steps": 78105, "loss": 0.1468, "lr": 1.2654812682020517e-06, "epoch": 3.4898534024710326, "percentage": 69.8, "elapsed_time": "2:22:50", "remaining_time": "1:01:48", "throughput": 20017.82, "total_tokens": 171557312} +{"current_steps": 54520, "total_steps": 78105, "loss": 0.0905, "lr": 1.2649955110075846e-06, "epoch": 3.4901734844120096, "percentage": 69.8, "elapsed_time": "2:22:50", "remaining_time": "1:01:47", "throughput": 20018.1, "total_tokens": 171573696} +{"current_steps": 54525, "total_steps": 78105, "loss": 0.1485, "lr": 1.2645098154828084e-06, "epoch": 3.4904935663529866, "percentage": 69.81, "elapsed_time": "2:22:51", "remaining_time": "1:01:46", "throughput": 20018.33, "total_tokens": 171588928} +{"current_steps": 54530, "total_steps": 78105, "loss": 0.1302, "lr": 1.2640241816519765e-06, "epoch": 3.490813648293963, "percentage": 69.82, "elapsed_time": "2:22:52", "remaining_time": "1:01:46", "throughput": 20018.57, "total_tokens": 171604160} +{"current_steps": 54535, "total_steps": 78105, "loss": 0.1761, "lr": 1.2635386095393387e-06, "epoch": 3.49113373023494, "percentage": 69.82, "elapsed_time": "2:22:52", "remaining_time": "1:01:45", "throughput": 20018.82, "total_tokens": 171619584} +{"current_steps": 54540, "total_steps": 78105, "loss": 0.2513, "lr": 1.2630530991691436e-06, "epoch": 3.491453812175917, "percentage": 69.83, "elapsed_time": "2:22:53", "remaining_time": "1:01:44", "throughput": 20019.09, "total_tokens": 171635456} +{"current_steps": 54545, "total_steps": 78105, "loss": 0.1897, "lr": 1.262567650565632e-06, "epoch": 3.491773894116894, "percentage": 69.84, "elapsed_time": "2:22:54", "remaining_time": "1:01:43", "throughput": 20019.34, "total_tokens": 171650880} +{"current_steps": 54550, "total_steps": 78105, "loss": 0.1038, "lr": 1.2620822637530475e-06, "epoch": 3.4920939760578706, "percentage": 69.84, "elapsed_time": "2:22:54", "remaining_time": "1:01:42", "throughput": 20019.57, "total_tokens": 171666304} +{"current_steps": 54555, "total_steps": 78105, "loss": 0.1508, "lr": 1.2615969387556265e-06, "epoch": 3.4924140579988476, "percentage": 69.85, "elapsed_time": "2:22:55", "remaining_time": "1:01:41", "throughput": 20019.82, "total_tokens": 171681856} +{"current_steps": 54560, "total_steps": 78105, "loss": 0.2129, "lr": 1.2611116755976037e-06, "epoch": 3.4927341399398246, "percentage": 69.85, "elapsed_time": "2:22:56", "remaining_time": "1:01:41", "throughput": 20020.06, "total_tokens": 171697600} +{"current_steps": 54565, "total_steps": 78105, "loss": 0.1515, "lr": 1.2606264743032108e-06, "epoch": 3.4930542218808016, "percentage": 69.86, "elapsed_time": "2:22:56", "remaining_time": "1:01:40", "throughput": 20020.3, "total_tokens": 171713024} +{"current_steps": 54570, "total_steps": 78105, "loss": 0.2095, "lr": 1.260141334896675e-06, "epoch": 3.4933743038217786, "percentage": 69.87, "elapsed_time": "2:22:57", "remaining_time": "1:01:39", "throughput": 20020.51, "total_tokens": 171728128} +{"current_steps": 54575, "total_steps": 78105, "loss": 0.1379, "lr": 1.259656257402225e-06, "epoch": 3.493694385762755, "percentage": 69.87, "elapsed_time": "2:22:58", "remaining_time": "1:01:38", "throughput": 20020.76, "total_tokens": 171743744} +{"current_steps": 54580, "total_steps": 78105, "loss": 0.2119, "lr": 1.2591712418440788e-06, "epoch": 3.494014467703732, "percentage": 69.88, "elapsed_time": "2:22:58", "remaining_time": "1:01:37", "throughput": 20021.02, "total_tokens": 171759616} +{"current_steps": 54585, "total_steps": 78105, "loss": 0.1256, "lr": 1.2586862882464584e-06, "epoch": 3.494334549644709, "percentage": 69.89, "elapsed_time": "2:22:59", "remaining_time": "1:01:36", "throughput": 20021.34, "total_tokens": 171776192} +{"current_steps": 54590, "total_steps": 78105, "loss": 0.1207, "lr": 1.2582013966335788e-06, "epoch": 3.494654631585686, "percentage": 69.89, "elapsed_time": "2:23:00", "remaining_time": "1:01:36", "throughput": 20021.55, "total_tokens": 171791168} +{"current_steps": 54595, "total_steps": 78105, "loss": 0.1626, "lr": 1.2577165670296535e-06, "epoch": 3.4949747135266627, "percentage": 69.9, "elapsed_time": "2:23:00", "remaining_time": "1:01:35", "throughput": 20021.79, "total_tokens": 171806528} +{"current_steps": 54600, "total_steps": 78105, "loss": 0.1652, "lr": 1.2572317994588918e-06, "epoch": 3.4952947954676397, "percentage": 69.91, "elapsed_time": "2:23:01", "remaining_time": "1:01:34", "throughput": 20022.01, "total_tokens": 171821504} +{"current_steps": 54605, "total_steps": 78105, "loss": 0.1281, "lr": 1.2567470939454995e-06, "epoch": 3.4956148774086166, "percentage": 69.91, "elapsed_time": "2:23:02", "remaining_time": "1:01:33", "throughput": 20022.26, "total_tokens": 171837312} +{"current_steps": 54610, "total_steps": 78105, "loss": 0.164, "lr": 1.2562624505136839e-06, "epoch": 3.4959349593495936, "percentage": 69.92, "elapsed_time": "2:23:02", "remaining_time": "1:01:32", "throughput": 20022.48, "total_tokens": 171852416} +{"current_steps": 54615, "total_steps": 78105, "loss": 0.1751, "lr": 1.255777869187641e-06, "epoch": 3.4962550412905706, "percentage": 69.93, "elapsed_time": "2:23:03", "remaining_time": "1:01:31", "throughput": 20022.72, "total_tokens": 171867840} +{"current_steps": 54620, "total_steps": 78105, "loss": 0.2423, "lr": 1.2552933499915715e-06, "epoch": 3.496575123231547, "percentage": 69.93, "elapsed_time": "2:23:04", "remaining_time": "1:01:30", "throughput": 20022.94, "total_tokens": 171882816} +{"current_steps": 54625, "total_steps": 78105, "loss": 0.2101, "lr": 1.2548088929496682e-06, "epoch": 3.496895205172524, "percentage": 69.94, "elapsed_time": "2:23:05", "remaining_time": "1:01:30", "throughput": 20023.23, "total_tokens": 171899520} +{"current_steps": 54630, "total_steps": 78105, "loss": 0.1913, "lr": 1.254324498086123e-06, "epoch": 3.497215287113501, "percentage": 69.94, "elapsed_time": "2:23:05", "remaining_time": "1:01:29", "throughput": 20023.5, "total_tokens": 171915584} +{"current_steps": 54635, "total_steps": 78105, "loss": 0.1486, "lr": 1.2538401654251242e-06, "epoch": 3.4975353690544777, "percentage": 69.95, "elapsed_time": "2:23:06", "remaining_time": "1:01:28", "throughput": 20023.71, "total_tokens": 171930624} +{"current_steps": 54640, "total_steps": 78105, "loss": 0.1292, "lr": 1.2533558949908564e-06, "epoch": 3.4978554509954547, "percentage": 69.96, "elapsed_time": "2:23:07", "remaining_time": "1:01:27", "throughput": 20023.94, "total_tokens": 171946112} +{"current_steps": 54645, "total_steps": 78105, "loss": 0.183, "lr": 1.2528716868075014e-06, "epoch": 3.4981755329364317, "percentage": 69.96, "elapsed_time": "2:23:07", "remaining_time": "1:01:26", "throughput": 20024.19, "total_tokens": 171961856} +{"current_steps": 54650, "total_steps": 78105, "loss": 0.1333, "lr": 1.2523875408992387e-06, "epoch": 3.4984956148774087, "percentage": 69.97, "elapsed_time": "2:23:08", "remaining_time": "1:01:26", "throughput": 20024.5, "total_tokens": 171978624} +{"current_steps": 54655, "total_steps": 78105, "loss": 0.1237, "lr": 1.2519034572902422e-06, "epoch": 3.4988156968183857, "percentage": 69.98, "elapsed_time": "2:23:09", "remaining_time": "1:01:25", "throughput": 20024.78, "total_tokens": 171994688} +{"current_steps": 54660, "total_steps": 78105, "loss": 0.1462, "lr": 1.2514194360046883e-06, "epoch": 3.499135778759362, "percentage": 69.98, "elapsed_time": "2:23:09", "remaining_time": "1:01:24", "throughput": 20025.03, "total_tokens": 172010048} +{"current_steps": 54665, "total_steps": 78105, "loss": 0.19, "lr": 1.250935477066742e-06, "epoch": 3.499455860700339, "percentage": 69.99, "elapsed_time": "2:23:10", "remaining_time": "1:01:23", "throughput": 20025.26, "total_tokens": 172025152} +{"current_steps": 54670, "total_steps": 78105, "loss": 0.169, "lr": 1.250451580500573e-06, "epoch": 3.499775942641316, "percentage": 70.0, "elapsed_time": "2:23:11", "remaining_time": "1:01:22", "throughput": 20025.56, "total_tokens": 172041856} +{"current_steps": 54675, "total_steps": 78105, "loss": 0.1667, "lr": 1.249967746330343e-06, "epoch": 3.500096024582293, "percentage": 70.0, "elapsed_time": "2:23:11", "remaining_time": "1:01:21", "throughput": 20025.76, "total_tokens": 172056576} +{"current_steps": 54680, "total_steps": 78105, "loss": 0.159, "lr": 1.249483974580212e-06, "epoch": 3.5004161065232697, "percentage": 70.01, "elapsed_time": "2:23:12", "remaining_time": "1:01:21", "throughput": 20026.0, "total_tokens": 172072256} +{"current_steps": 54684, "total_steps": 78105, "eval_loss": 0.5747029781341553, "epoch": 3.5006721720760514, "percentage": 70.01, "elapsed_time": "2:24:03", "remaining_time": "1:01:42", "throughput": 19908.04, "total_tokens": 172084032} +{"current_steps": 54685, "total_steps": 78105, "loss": 0.1904, "lr": 1.2490002652743394e-06, "epoch": 3.5007361884642467, "percentage": 70.01, "elapsed_time": "2:24:38", "remaining_time": "1:01:56", "throughput": 19829.66, "total_tokens": 172086912} +{"current_steps": 54690, "total_steps": 78105, "loss": 0.1551, "lr": 1.2485166184368756e-06, "epoch": 3.5010562704052237, "percentage": 70.02, "elapsed_time": "2:24:38", "remaining_time": "1:01:55", "throughput": 19829.9, "total_tokens": 172102144} +{"current_steps": 54695, "total_steps": 78105, "loss": 0.1735, "lr": 1.2480330340919746e-06, "epoch": 3.5013763523462007, "percentage": 70.03, "elapsed_time": "2:24:39", "remaining_time": "1:01:54", "throughput": 19830.12, "total_tokens": 172117184} +{"current_steps": 54700, "total_steps": 78105, "loss": 0.1312, "lr": 1.2475495122637813e-06, "epoch": 3.5016964342871777, "percentage": 70.03, "elapsed_time": "2:24:40", "remaining_time": "1:01:54", "throughput": 19830.38, "total_tokens": 172132736} +{"current_steps": 54705, "total_steps": 78105, "loss": 0.1723, "lr": 1.247066052976442e-06, "epoch": 3.5020165162281542, "percentage": 70.04, "elapsed_time": "2:24:40", "remaining_time": "1:01:53", "throughput": 19830.64, "total_tokens": 172148160} +{"current_steps": 54710, "total_steps": 78105, "loss": 0.2084, "lr": 1.246582656254098e-06, "epoch": 3.5023365981691312, "percentage": 70.05, "elapsed_time": "2:24:41", "remaining_time": "1:01:52", "throughput": 19830.9, "total_tokens": 172163392} +{"current_steps": 54715, "total_steps": 78105, "loss": 0.2715, "lr": 1.2460993221208872e-06, "epoch": 3.5026566801101082, "percentage": 70.05, "elapsed_time": "2:24:42", "remaining_time": "1:01:51", "throughput": 19831.15, "total_tokens": 172178688} +{"current_steps": 54720, "total_steps": 78105, "loss": 0.1911, "lr": 1.245616050600945e-06, "epoch": 3.502976762051085, "percentage": 70.06, "elapsed_time": "2:24:42", "remaining_time": "1:01:50", "throughput": 19831.48, "total_tokens": 172195712} +{"current_steps": 54725, "total_steps": 78105, "loss": 0.1843, "lr": 1.2451328417184025e-06, "epoch": 3.5032968439920618, "percentage": 70.07, "elapsed_time": "2:24:43", "remaining_time": "1:01:49", "throughput": 19831.76, "total_tokens": 172211712} +{"current_steps": 54730, "total_steps": 78105, "loss": 0.2073, "lr": 1.2446496954973903e-06, "epoch": 3.5036169259330388, "percentage": 70.07, "elapsed_time": "2:24:44", "remaining_time": "1:01:49", "throughput": 19832.07, "total_tokens": 172228288} +{"current_steps": 54735, "total_steps": 78105, "loss": 0.185, "lr": 1.2441666119620335e-06, "epoch": 3.5039370078740157, "percentage": 70.08, "elapsed_time": "2:24:45", "remaining_time": "1:01:48", "throughput": 19832.39, "total_tokens": 172244864} +{"current_steps": 54740, "total_steps": 78105, "loss": 0.3193, "lr": 1.2436835911364548e-06, "epoch": 3.5042570898149927, "percentage": 70.09, "elapsed_time": "2:24:45", "remaining_time": "1:01:47", "throughput": 19832.63, "total_tokens": 172260288} +{"current_steps": 54745, "total_steps": 78105, "loss": 0.2089, "lr": 1.2432006330447736e-06, "epoch": 3.5045771717559697, "percentage": 70.09, "elapsed_time": "2:24:46", "remaining_time": "1:01:46", "throughput": 19832.86, "total_tokens": 172275008} +{"current_steps": 54750, "total_steps": 78105, "loss": 0.1869, "lr": 1.2427177377111062e-06, "epoch": 3.5048972536969463, "percentage": 70.1, "elapsed_time": "2:24:47", "remaining_time": "1:01:45", "throughput": 19833.16, "total_tokens": 172291520} +{"current_steps": 54755, "total_steps": 78105, "loss": 0.1421, "lr": 1.2422349051595662e-06, "epoch": 3.5052173356379233, "percentage": 70.1, "elapsed_time": "2:24:47", "remaining_time": "1:01:44", "throughput": 19833.42, "total_tokens": 172306816} +{"current_steps": 54760, "total_steps": 78105, "loss": 0.1578, "lr": 1.2417521354142637e-06, "epoch": 3.5055374175789002, "percentage": 70.11, "elapsed_time": "2:24:48", "remaining_time": "1:01:43", "throughput": 19833.69, "total_tokens": 172322752} +{"current_steps": 54765, "total_steps": 78105, "loss": 0.2266, "lr": 1.2412694284993043e-06, "epoch": 3.5058574995198772, "percentage": 70.12, "elapsed_time": "2:24:49", "remaining_time": "1:01:43", "throughput": 19833.93, "total_tokens": 172337984} +{"current_steps": 54770, "total_steps": 78105, "loss": 0.1267, "lr": 1.2407867844387952e-06, "epoch": 3.506177581460854, "percentage": 70.12, "elapsed_time": "2:24:49", "remaining_time": "1:01:42", "throughput": 19834.2, "total_tokens": 172353728} +{"current_steps": 54775, "total_steps": 78105, "loss": 0.1485, "lr": 1.2403042032568332e-06, "epoch": 3.5064976634018308, "percentage": 70.13, "elapsed_time": "2:24:50", "remaining_time": "1:01:41", "throughput": 19834.47, "total_tokens": 172369472} +{"current_steps": 54780, "total_steps": 78105, "loss": 0.1988, "lr": 1.2398216849775189e-06, "epoch": 3.5068177453428078, "percentage": 70.14, "elapsed_time": "2:24:51", "remaining_time": "1:01:40", "throughput": 19834.7, "total_tokens": 172384768} +{"current_steps": 54785, "total_steps": 78105, "loss": 0.1131, "lr": 1.2393392296249454e-06, "epoch": 3.5071378272837848, "percentage": 70.14, "elapsed_time": "2:24:51", "remaining_time": "1:01:39", "throughput": 19834.93, "total_tokens": 172399680} +{"current_steps": 54790, "total_steps": 78105, "loss": 0.2084, "lr": 1.2388568372232043e-06, "epoch": 3.5074579092247617, "percentage": 70.15, "elapsed_time": "2:24:52", "remaining_time": "1:01:38", "throughput": 19835.17, "total_tokens": 172415040} +{"current_steps": 54795, "total_steps": 78105, "loss": 0.2005, "lr": 1.2383745077963836e-06, "epoch": 3.5077779911657383, "percentage": 70.16, "elapsed_time": "2:24:53", "remaining_time": "1:01:38", "throughput": 19835.41, "total_tokens": 172429888} +{"current_steps": 54800, "total_steps": 78105, "loss": 0.1536, "lr": 1.2378922413685677e-06, "epoch": 3.5080980731067153, "percentage": 70.16, "elapsed_time": "2:24:53", "remaining_time": "1:01:37", "throughput": 19835.62, "total_tokens": 172444416} +{"current_steps": 54805, "total_steps": 78105, "loss": 0.2097, "lr": 1.2374100379638406e-06, "epoch": 3.5084181550476923, "percentage": 70.17, "elapsed_time": "2:24:54", "remaining_time": "1:01:36", "throughput": 19835.87, "total_tokens": 172459968} +{"current_steps": 54810, "total_steps": 78105, "loss": 0.1501, "lr": 1.2369278976062783e-06, "epoch": 3.508738236988669, "percentage": 70.17, "elapsed_time": "2:24:55", "remaining_time": "1:01:35", "throughput": 19836.08, "total_tokens": 172474816} +{"current_steps": 54815, "total_steps": 78105, "loss": 0.162, "lr": 1.2364458203199583e-06, "epoch": 3.509058318929646, "percentage": 70.18, "elapsed_time": "2:24:55", "remaining_time": "1:01:34", "throughput": 19836.38, "total_tokens": 172491392} +{"current_steps": 54820, "total_steps": 78105, "loss": 0.1621, "lr": 1.2359638061289524e-06, "epoch": 3.509378400870623, "percentage": 70.19, "elapsed_time": "2:24:56", "remaining_time": "1:01:33", "throughput": 19836.66, "total_tokens": 172507072} +{"current_steps": 54825, "total_steps": 78105, "loss": 0.1915, "lr": 1.23548185505733e-06, "epoch": 3.5096984828116, "percentage": 70.19, "elapsed_time": "2:24:57", "remaining_time": "1:01:32", "throughput": 19836.89, "total_tokens": 172522368} +{"current_steps": 54830, "total_steps": 78105, "loss": 0.1609, "lr": 1.234999967129157e-06, "epoch": 3.510018564752577, "percentage": 70.2, "elapsed_time": "2:24:57", "remaining_time": "1:01:32", "throughput": 19837.24, "total_tokens": 172540032} +{"current_steps": 54835, "total_steps": 78105, "loss": 0.1412, "lr": 1.2345181423684968e-06, "epoch": 3.5103386466935538, "percentage": 70.21, "elapsed_time": "2:24:58", "remaining_time": "1:01:31", "throughput": 19837.49, "total_tokens": 172555840} +{"current_steps": 54840, "total_steps": 78105, "loss": 0.1772, "lr": 1.2340363807994087e-06, "epoch": 3.5106587286345303, "percentage": 70.21, "elapsed_time": "2:24:59", "remaining_time": "1:01:30", "throughput": 19837.76, "total_tokens": 172571712} +{"current_steps": 54845, "total_steps": 78105, "loss": 0.1899, "lr": 1.2335546824459487e-06, "epoch": 3.5109788105755073, "percentage": 70.22, "elapsed_time": "2:24:59", "remaining_time": "1:01:29", "throughput": 19838.05, "total_tokens": 172588032} +{"current_steps": 54850, "total_steps": 78105, "loss": 0.1741, "lr": 1.233073047332172e-06, "epoch": 3.5112988925164843, "percentage": 70.23, "elapsed_time": "2:25:00", "remaining_time": "1:01:28", "throughput": 19838.28, "total_tokens": 172603200} +{"current_steps": 54855, "total_steps": 78105, "loss": 0.1684, "lr": 1.2325914754821284e-06, "epoch": 3.511618974457461, "percentage": 70.23, "elapsed_time": "2:25:01", "remaining_time": "1:01:27", "throughput": 19838.54, "total_tokens": 172618880} +{"current_steps": 54860, "total_steps": 78105, "loss": 0.1823, "lr": 1.2321099669198646e-06, "epoch": 3.511939056398438, "percentage": 70.24, "elapsed_time": "2:25:01", "remaining_time": "1:01:27", "throughput": 19838.84, "total_tokens": 172635456} +{"current_steps": 54865, "total_steps": 78105, "loss": 0.0929, "lr": 1.231628521669425e-06, "epoch": 3.512259138339415, "percentage": 70.25, "elapsed_time": "2:25:02", "remaining_time": "1:01:26", "throughput": 19839.11, "total_tokens": 172651456} +{"current_steps": 54870, "total_steps": 78105, "loss": 0.2091, "lr": 1.2311471397548503e-06, "epoch": 3.512579220280392, "percentage": 70.25, "elapsed_time": "2:25:03", "remaining_time": "1:01:25", "throughput": 19839.41, "total_tokens": 172668096} +{"current_steps": 54875, "total_steps": 78105, "loss": 0.1772, "lr": 1.230665821200178e-06, "epoch": 3.512899302221369, "percentage": 70.26, "elapsed_time": "2:25:03", "remaining_time": "1:01:24", "throughput": 19839.66, "total_tokens": 172683456} +{"current_steps": 54880, "total_steps": 78105, "loss": 0.1792, "lr": 1.2301845660294429e-06, "epoch": 3.513219384162346, "percentage": 70.26, "elapsed_time": "2:25:04", "remaining_time": "1:01:23", "throughput": 19839.92, "total_tokens": 172699712} +{"current_steps": 54885, "total_steps": 78105, "loss": 0.1578, "lr": 1.2297033742666756e-06, "epoch": 3.5135394661033224, "percentage": 70.27, "elapsed_time": "2:25:05", "remaining_time": "1:01:22", "throughput": 19840.17, "total_tokens": 172715200} +{"current_steps": 54890, "total_steps": 78105, "loss": 0.2734, "lr": 1.2292222459359068e-06, "epoch": 3.5138595480442993, "percentage": 70.28, "elapsed_time": "2:25:05", "remaining_time": "1:01:22", "throughput": 19840.4, "total_tokens": 172730176} +{"current_steps": 54895, "total_steps": 78105, "loss": 0.1462, "lr": 1.2287411810611579e-06, "epoch": 3.5141796299852763, "percentage": 70.28, "elapsed_time": "2:25:06", "remaining_time": "1:01:21", "throughput": 19840.64, "total_tokens": 172745728} +{"current_steps": 54900, "total_steps": 78105, "loss": 0.1788, "lr": 1.228260179666454e-06, "epoch": 3.514499711926253, "percentage": 70.29, "elapsed_time": "2:25:07", "remaining_time": "1:01:20", "throughput": 19840.89, "total_tokens": 172761536} +{"current_steps": 54905, "total_steps": 78105, "loss": 0.1697, "lr": 1.227779241775812e-06, "epoch": 3.51481979386723, "percentage": 70.3, "elapsed_time": "2:25:08", "remaining_time": "1:01:19", "throughput": 19841.15, "total_tokens": 172777280} +{"current_steps": 54910, "total_steps": 78105, "loss": 0.2206, "lr": 1.2272983674132472e-06, "epoch": 3.515139875808207, "percentage": 70.3, "elapsed_time": "2:25:08", "remaining_time": "1:01:18", "throughput": 19841.39, "total_tokens": 172792704} +{"current_steps": 54915, "total_steps": 78105, "loss": 0.1639, "lr": 1.2268175566027745e-06, "epoch": 3.515459957749184, "percentage": 70.31, "elapsed_time": "2:25:09", "remaining_time": "1:01:17", "throughput": 19841.67, "total_tokens": 172809088} +{"current_steps": 54920, "total_steps": 78105, "loss": 0.11, "lr": 1.2263368093683994e-06, "epoch": 3.515780039690161, "percentage": 70.32, "elapsed_time": "2:25:10", "remaining_time": "1:01:17", "throughput": 19841.88, "total_tokens": 172823744} +{"current_steps": 54925, "total_steps": 78105, "loss": 0.2312, "lr": 1.2258561257341317e-06, "epoch": 3.516100121631138, "percentage": 70.32, "elapsed_time": "2:25:10", "remaining_time": "1:01:16", "throughput": 19842.2, "total_tokens": 172840896} +{"current_steps": 54930, "total_steps": 78105, "loss": 0.1506, "lr": 1.2253755057239702e-06, "epoch": 3.5164202035721144, "percentage": 70.33, "elapsed_time": "2:25:11", "remaining_time": "1:01:15", "throughput": 19842.5, "total_tokens": 172857472} +{"current_steps": 54935, "total_steps": 78105, "loss": 0.1447, "lr": 1.2248949493619178e-06, "epoch": 3.5167402855130914, "percentage": 70.33, "elapsed_time": "2:25:12", "remaining_time": "1:01:14", "throughput": 19842.71, "total_tokens": 172872384} +{"current_steps": 54940, "total_steps": 78105, "loss": 0.2104, "lr": 1.2244144566719699e-06, "epoch": 3.5170603674540684, "percentage": 70.34, "elapsed_time": "2:25:12", "remaining_time": "1:01:13", "throughput": 19842.94, "total_tokens": 172887680} +{"current_steps": 54945, "total_steps": 78105, "loss": 0.1787, "lr": 1.2239340276781195e-06, "epoch": 3.517380449395045, "percentage": 70.35, "elapsed_time": "2:25:13", "remaining_time": "1:01:12", "throughput": 19843.2, "total_tokens": 172903488} +{"current_steps": 54950, "total_steps": 78105, "loss": 0.1806, "lr": 1.2234536624043573e-06, "epoch": 3.517700531336022, "percentage": 70.35, "elapsed_time": "2:25:14", "remaining_time": "1:01:11", "throughput": 19843.42, "total_tokens": 172918464} +{"current_steps": 54955, "total_steps": 78105, "loss": 0.2439, "lr": 1.2229733608746696e-06, "epoch": 3.518020613276999, "percentage": 70.36, "elapsed_time": "2:25:14", "remaining_time": "1:01:11", "throughput": 19843.73, "total_tokens": 172935488} +{"current_steps": 54960, "total_steps": 78105, "loss": 0.2157, "lr": 1.2224931231130396e-06, "epoch": 3.518340695217976, "percentage": 70.37, "elapsed_time": "2:25:15", "remaining_time": "1:01:10", "throughput": 19844.0, "total_tokens": 172951488} +{"current_steps": 54965, "total_steps": 78105, "loss": 0.2159, "lr": 1.2220129491434499e-06, "epoch": 3.518660777158953, "percentage": 70.37, "elapsed_time": "2:25:16", "remaining_time": "1:01:09", "throughput": 19844.24, "total_tokens": 172967360} +{"current_steps": 54970, "total_steps": 78105, "loss": 0.1599, "lr": 1.2215328389898766e-06, "epoch": 3.51898085909993, "percentage": 70.38, "elapsed_time": "2:25:16", "remaining_time": "1:01:08", "throughput": 19844.46, "total_tokens": 172982336} +{"current_steps": 54975, "total_steps": 78105, "loss": 0.2029, "lr": 1.2210527926762939e-06, "epoch": 3.5193009410409064, "percentage": 70.39, "elapsed_time": "2:25:17", "remaining_time": "1:01:07", "throughput": 19844.74, "total_tokens": 172998464} +{"current_steps": 54980, "total_steps": 78105, "loss": 0.1688, "lr": 1.220572810226673e-06, "epoch": 3.5196210229818834, "percentage": 70.39, "elapsed_time": "2:25:18", "remaining_time": "1:01:06", "throughput": 19844.96, "total_tokens": 173013824} +{"current_steps": 54985, "total_steps": 78105, "loss": 0.148, "lr": 1.2200928916649818e-06, "epoch": 3.5199411049228604, "percentage": 70.4, "elapsed_time": "2:25:18", "remaining_time": "1:01:06", "throughput": 19845.22, "total_tokens": 173029504} +{"current_steps": 54990, "total_steps": 78105, "loss": 0.1436, "lr": 1.2196130370151847e-06, "epoch": 3.520261186863837, "percentage": 70.41, "elapsed_time": "2:25:19", "remaining_time": "1:01:05", "throughput": 19845.44, "total_tokens": 173045056} +{"current_steps": 54995, "total_steps": 78105, "loss": 0.1622, "lr": 1.2191332463012424e-06, "epoch": 3.520581268804814, "percentage": 70.41, "elapsed_time": "2:25:20", "remaining_time": "1:01:04", "throughput": 19845.75, "total_tokens": 173062144} +{"current_steps": 55000, "total_steps": 78105, "loss": 0.0933, "lr": 1.2186535195471159e-06, "epoch": 3.520901350745791, "percentage": 70.42, "elapsed_time": "2:25:21", "remaining_time": "1:01:03", "throughput": 19845.95, "total_tokens": 173076544} +{"current_steps": 55005, "total_steps": 78105, "loss": 0.1678, "lr": 1.2181738567767562e-06, "epoch": 3.521221432686768, "percentage": 70.42, "elapsed_time": "2:25:21", "remaining_time": "1:01:02", "throughput": 19846.23, "total_tokens": 173092864} +{"current_steps": 55010, "total_steps": 78105, "loss": 0.1625, "lr": 1.2176942580141193e-06, "epoch": 3.521541514627745, "percentage": 70.43, "elapsed_time": "2:25:22", "remaining_time": "1:01:01", "throughput": 19846.5, "total_tokens": 173108736} +{"current_steps": 55015, "total_steps": 78105, "loss": 0.1287, "lr": 1.2172147232831499e-06, "epoch": 3.5218615965687214, "percentage": 70.44, "elapsed_time": "2:25:23", "remaining_time": "1:01:01", "throughput": 19846.82, "total_tokens": 173125952} +{"current_steps": 55020, "total_steps": 78105, "loss": 0.1979, "lr": 1.2167352526077967e-06, "epoch": 3.5221816785096984, "percentage": 70.44, "elapsed_time": "2:25:23", "remaining_time": "1:01:00", "throughput": 19847.05, "total_tokens": 173141376} +{"current_steps": 55025, "total_steps": 78105, "loss": 0.1789, "lr": 1.2162558460120002e-06, "epoch": 3.5225017604506754, "percentage": 70.45, "elapsed_time": "2:25:24", "remaining_time": "1:00:59", "throughput": 19847.33, "total_tokens": 173157312} +{"current_steps": 55030, "total_steps": 78105, "loss": 0.1455, "lr": 1.2157765035196995e-06, "epoch": 3.5228218423916524, "percentage": 70.46, "elapsed_time": "2:25:25", "remaining_time": "1:00:58", "throughput": 19847.66, "total_tokens": 173174464} +{"current_steps": 55035, "total_steps": 78105, "loss": 0.1821, "lr": 1.2152972251548326e-06, "epoch": 3.523141924332629, "percentage": 70.46, "elapsed_time": "2:25:25", "remaining_time": "1:00:57", "throughput": 19847.88, "total_tokens": 173189824} +{"current_steps": 55040, "total_steps": 78105, "loss": 0.1983, "lr": 1.2148180109413288e-06, "epoch": 3.523462006273606, "percentage": 70.47, "elapsed_time": "2:25:26", "remaining_time": "1:00:56", "throughput": 19848.11, "total_tokens": 173205056} +{"current_steps": 55045, "total_steps": 78105, "loss": 0.1334, "lr": 1.2143388609031202e-06, "epoch": 3.523782088214583, "percentage": 70.48, "elapsed_time": "2:25:27", "remaining_time": "1:00:56", "throughput": 19848.34, "total_tokens": 173220288} +{"current_steps": 55050, "total_steps": 78105, "loss": 0.2197, "lr": 1.2138597750641319e-06, "epoch": 3.52410217015556, "percentage": 70.48, "elapsed_time": "2:25:27", "remaining_time": "1:00:55", "throughput": 19848.58, "total_tokens": 173235904} +{"current_steps": 55055, "total_steps": 78105, "loss": 0.1482, "lr": 1.2133807534482872e-06, "epoch": 3.524422252096537, "percentage": 70.49, "elapsed_time": "2:25:28", "remaining_time": "1:00:54", "throughput": 19848.79, "total_tokens": 173250816} +{"current_steps": 55060, "total_steps": 78105, "loss": 0.1846, "lr": 1.212901796079506e-06, "epoch": 3.5247423340375135, "percentage": 70.49, "elapsed_time": "2:25:29", "remaining_time": "1:00:53", "throughput": 19849.03, "total_tokens": 173266048} +{"current_steps": 55065, "total_steps": 78105, "loss": 0.1462, "lr": 1.2124229029817047e-06, "epoch": 3.5250624159784905, "percentage": 70.5, "elapsed_time": "2:25:29", "remaining_time": "1:00:52", "throughput": 19849.27, "total_tokens": 173281408} +{"current_steps": 55070, "total_steps": 78105, "loss": 0.1719, "lr": 1.2119440741787972e-06, "epoch": 3.5253824979194675, "percentage": 70.51, "elapsed_time": "2:25:30", "remaining_time": "1:00:51", "throughput": 19849.59, "total_tokens": 173298560} +{"current_steps": 55075, "total_steps": 78105, "loss": 0.1421, "lr": 1.2114653096946922e-06, "epoch": 3.525702579860444, "percentage": 70.51, "elapsed_time": "2:25:31", "remaining_time": "1:00:51", "throughput": 19849.82, "total_tokens": 173313920} +{"current_steps": 55080, "total_steps": 78105, "loss": 0.1484, "lr": 1.2109866095532988e-06, "epoch": 3.526022661801421, "percentage": 70.52, "elapsed_time": "2:25:31", "remaining_time": "1:00:50", "throughput": 19850.13, "total_tokens": 173330304} +{"current_steps": 55085, "total_steps": 78105, "loss": 0.1517, "lr": 1.21050797377852e-06, "epoch": 3.526342743742398, "percentage": 70.53, "elapsed_time": "2:25:32", "remaining_time": "1:00:49", "throughput": 19850.39, "total_tokens": 173345664} +{"current_steps": 55090, "total_steps": 78105, "loss": 0.1688, "lr": 1.2100294023942563e-06, "epoch": 3.526662825683375, "percentage": 70.53, "elapsed_time": "2:25:33", "remaining_time": "1:00:48", "throughput": 19850.64, "total_tokens": 173361408} +{"current_steps": 55095, "total_steps": 78105, "loss": 0.2171, "lr": 1.2095508954244051e-06, "epoch": 3.526982907624352, "percentage": 70.54, "elapsed_time": "2:25:33", "remaining_time": "1:00:47", "throughput": 19850.95, "total_tokens": 173378112} +{"current_steps": 55100, "total_steps": 78105, "loss": 0.1459, "lr": 1.2090724528928604e-06, "epoch": 3.527302989565329, "percentage": 70.55, "elapsed_time": "2:25:34", "remaining_time": "1:00:46", "throughput": 19851.21, "total_tokens": 173393856} +{"current_steps": 55105, "total_steps": 78105, "loss": 0.1535, "lr": 1.208594074823513e-06, "epoch": 3.5276230715063055, "percentage": 70.55, "elapsed_time": "2:25:35", "remaining_time": "1:00:46", "throughput": 19851.46, "total_tokens": 173409920} +{"current_steps": 55110, "total_steps": 78105, "loss": 0.1757, "lr": 1.2081157612402512e-06, "epoch": 3.5279431534472825, "percentage": 70.56, "elapsed_time": "2:25:36", "remaining_time": "1:00:45", "throughput": 19851.7, "total_tokens": 173425472} +{"current_steps": 55115, "total_steps": 78105, "loss": 0.1457, "lr": 1.2076375121669578e-06, "epoch": 3.5282632353882595, "percentage": 70.57, "elapsed_time": "2:25:36", "remaining_time": "1:00:44", "throughput": 19851.99, "total_tokens": 173441408} +{"current_steps": 55120, "total_steps": 78105, "loss": 0.1543, "lr": 1.2071593276275176e-06, "epoch": 3.528583317329236, "percentage": 70.57, "elapsed_time": "2:25:37", "remaining_time": "1:00:43", "throughput": 19852.34, "total_tokens": 173458688} +{"current_steps": 55125, "total_steps": 78105, "loss": 0.1125, "lr": 1.2066812076458043e-06, "epoch": 3.528903399270213, "percentage": 70.58, "elapsed_time": "2:25:38", "remaining_time": "1:00:42", "throughput": 19852.58, "total_tokens": 173474048} +{"current_steps": 55130, "total_steps": 78105, "loss": 0.1115, "lr": 1.206203152245696e-06, "epoch": 3.52922348121119, "percentage": 70.58, "elapsed_time": "2:25:38", "remaining_time": "1:00:41", "throughput": 19852.8, "total_tokens": 173488704} +{"current_steps": 55135, "total_steps": 78105, "loss": 0.1522, "lr": 1.2057251614510629e-06, "epoch": 3.529543563152167, "percentage": 70.59, "elapsed_time": "2:25:39", "remaining_time": "1:00:40", "throughput": 19853.02, "total_tokens": 173503808} +{"current_steps": 55140, "total_steps": 78105, "loss": 0.2235, "lr": 1.2052472352857727e-06, "epoch": 3.529863645093144, "percentage": 70.6, "elapsed_time": "2:25:40", "remaining_time": "1:00:40", "throughput": 19853.33, "total_tokens": 173520256} +{"current_steps": 55145, "total_steps": 78105, "loss": 0.1624, "lr": 1.2047693737736935e-06, "epoch": 3.530183727034121, "percentage": 70.6, "elapsed_time": "2:25:40", "remaining_time": "1:00:39", "throughput": 19853.7, "total_tokens": 173538816} +{"current_steps": 55150, "total_steps": 78105, "loss": 0.1122, "lr": 1.2042915769386832e-06, "epoch": 3.5305038089750975, "percentage": 70.61, "elapsed_time": "2:25:41", "remaining_time": "1:00:38", "throughput": 19853.96, "total_tokens": 173554624} +{"current_steps": 55155, "total_steps": 78105, "loss": 0.1504, "lr": 1.203813844804604e-06, "epoch": 3.5308238909160745, "percentage": 70.62, "elapsed_time": "2:25:42", "remaining_time": "1:00:37", "throughput": 19854.25, "total_tokens": 173570880} +{"current_steps": 55160, "total_steps": 78105, "loss": 0.2002, "lr": 1.2033361773953084e-06, "epoch": 3.5311439728570515, "percentage": 70.62, "elapsed_time": "2:25:42", "remaining_time": "1:00:36", "throughput": 19854.53, "total_tokens": 173587008} +{"current_steps": 55165, "total_steps": 78105, "loss": 0.1973, "lr": 1.2028585747346508e-06, "epoch": 3.531464054798028, "percentage": 70.63, "elapsed_time": "2:25:43", "remaining_time": "1:00:35", "throughput": 19854.76, "total_tokens": 173602432} +{"current_steps": 55170, "total_steps": 78105, "loss": 0.1014, "lr": 1.2023810368464791e-06, "epoch": 3.531784136739005, "percentage": 70.64, "elapsed_time": "2:25:44", "remaining_time": "1:00:35", "throughput": 19855.03, "total_tokens": 173618560} +{"current_steps": 55175, "total_steps": 78105, "loss": 0.1761, "lr": 1.2019035637546396e-06, "epoch": 3.532104218679982, "percentage": 70.64, "elapsed_time": "2:25:44", "remaining_time": "1:00:34", "throughput": 19855.27, "total_tokens": 173633664} +{"current_steps": 55180, "total_steps": 78105, "loss": 0.1754, "lr": 1.2014261554829748e-06, "epoch": 3.532424300620959, "percentage": 70.65, "elapsed_time": "2:25:45", "remaining_time": "1:00:33", "throughput": 19855.6, "total_tokens": 173650240} +{"current_steps": 55185, "total_steps": 78105, "loss": 0.1137, "lr": 1.2009488120553236e-06, "epoch": 3.532744382561936, "percentage": 70.65, "elapsed_time": "2:25:46", "remaining_time": "1:00:32", "throughput": 19855.82, "total_tokens": 173665088} +{"current_steps": 55190, "total_steps": 78105, "loss": 0.154, "lr": 1.2004715334955214e-06, "epoch": 3.533064464502913, "percentage": 70.66, "elapsed_time": "2:25:46", "remaining_time": "1:00:31", "throughput": 19856.05, "total_tokens": 173680256} +{"current_steps": 55195, "total_steps": 78105, "loss": 0.2658, "lr": 1.1999943198274027e-06, "epoch": 3.5333845464438896, "percentage": 70.67, "elapsed_time": "2:25:47", "remaining_time": "1:00:30", "throughput": 19856.3, "total_tokens": 173695872} +{"current_steps": 55200, "total_steps": 78105, "loss": 0.1829, "lr": 1.199517171074796e-06, "epoch": 3.5337046283848665, "percentage": 70.67, "elapsed_time": "2:25:48", "remaining_time": "1:00:30", "throughput": 19856.54, "total_tokens": 173711104} +{"current_steps": 55205, "total_steps": 78105, "loss": 0.1435, "lr": 1.199040087261528e-06, "epoch": 3.5340247103258435, "percentage": 70.68, "elapsed_time": "2:25:49", "remaining_time": "1:00:29", "throughput": 19856.86, "total_tokens": 173728128} +{"current_steps": 55210, "total_steps": 78105, "loss": 0.1638, "lr": 1.1985630684114216e-06, "epoch": 3.53434479226682, "percentage": 70.69, "elapsed_time": "2:25:49", "remaining_time": "1:00:28", "throughput": 19857.18, "total_tokens": 173744640} +{"current_steps": 55215, "total_steps": 78105, "loss": 0.1891, "lr": 1.1980861145482968e-06, "epoch": 3.534664874207797, "percentage": 70.69, "elapsed_time": "2:25:50", "remaining_time": "1:00:27", "throughput": 19857.43, "total_tokens": 173760384} +{"current_steps": 55220, "total_steps": 78105, "loss": 0.1534, "lr": 1.1976092256959696e-06, "epoch": 3.534984956148774, "percentage": 70.7, "elapsed_time": "2:25:51", "remaining_time": "1:00:26", "throughput": 19857.67, "total_tokens": 173775680} +{"current_steps": 55225, "total_steps": 78105, "loss": 0.1374, "lr": 1.197132401878253e-06, "epoch": 3.535305038089751, "percentage": 70.71, "elapsed_time": "2:25:51", "remaining_time": "1:00:25", "throughput": 19858.01, "total_tokens": 173793472} +{"current_steps": 55230, "total_steps": 78105, "loss": 0.1977, "lr": 1.1966556431189597e-06, "epoch": 3.535625120030728, "percentage": 70.71, "elapsed_time": "2:25:52", "remaining_time": "1:00:25", "throughput": 19858.3, "total_tokens": 173809664} +{"current_steps": 55235, "total_steps": 78105, "loss": 0.1825, "lr": 1.196178949441893e-06, "epoch": 3.535945201971705, "percentage": 70.72, "elapsed_time": "2:25:53", "remaining_time": "1:00:24", "throughput": 19858.52, "total_tokens": 173824832} +{"current_steps": 55240, "total_steps": 78105, "loss": 0.1694, "lr": 1.1957023208708598e-06, "epoch": 3.5362652839126816, "percentage": 70.73, "elapsed_time": "2:25:53", "remaining_time": "1:00:23", "throughput": 19858.75, "total_tokens": 173839808} +{"current_steps": 55245, "total_steps": 78105, "loss": 0.1843, "lr": 1.1952257574296572e-06, "epoch": 3.5365853658536586, "percentage": 70.73, "elapsed_time": "2:25:54", "remaining_time": "1:00:22", "throughput": 19858.99, "total_tokens": 173855424} +{"current_steps": 55250, "total_steps": 78105, "loss": 0.1045, "lr": 1.1947492591420847e-06, "epoch": 3.5369054477946356, "percentage": 70.74, "elapsed_time": "2:25:55", "remaining_time": "1:00:21", "throughput": 19859.19, "total_tokens": 173870144} +{"current_steps": 55255, "total_steps": 78105, "loss": 0.2019, "lr": 1.1942728260319355e-06, "epoch": 3.537225529735612, "percentage": 70.74, "elapsed_time": "2:25:55", "remaining_time": "1:00:20", "throughput": 19859.45, "total_tokens": 173885824} +{"current_steps": 55260, "total_steps": 78105, "loss": 0.1624, "lr": 1.1937964581229993e-06, "epoch": 3.537545611676589, "percentage": 70.75, "elapsed_time": "2:25:56", "remaining_time": "1:00:20", "throughput": 19859.68, "total_tokens": 173901056} +{"current_steps": 55265, "total_steps": 78105, "loss": 0.2457, "lr": 1.1933201554390661e-06, "epoch": 3.537865693617566, "percentage": 70.76, "elapsed_time": "2:25:57", "remaining_time": "1:00:19", "throughput": 19859.95, "total_tokens": 173917376} +{"current_steps": 55270, "total_steps": 78105, "loss": 0.0853, "lr": 1.192843918003916e-06, "epoch": 3.538185775558543, "percentage": 70.76, "elapsed_time": "2:25:57", "remaining_time": "1:00:18", "throughput": 19860.18, "total_tokens": 173932224} +{"current_steps": 55275, "total_steps": 78105, "loss": 0.1423, "lr": 1.192367745841333e-06, "epoch": 3.53850585749952, "percentage": 70.77, "elapsed_time": "2:25:58", "remaining_time": "1:00:17", "throughput": 19860.43, "total_tokens": 173947520} +{"current_steps": 55280, "total_steps": 78105, "loss": 0.2957, "lr": 1.1918916389750934e-06, "epoch": 3.5388259394404966, "percentage": 70.78, "elapsed_time": "2:25:59", "remaining_time": "1:00:16", "throughput": 19860.66, "total_tokens": 173962944} +{"current_steps": 55285, "total_steps": 78105, "loss": 0.1717, "lr": 1.1914155974289719e-06, "epoch": 3.5391460213814736, "percentage": 70.78, "elapsed_time": "2:25:59", "remaining_time": "1:00:15", "throughput": 19860.88, "total_tokens": 173977472} +{"current_steps": 55290, "total_steps": 78105, "loss": 0.1313, "lr": 1.190939621226739e-06, "epoch": 3.5394661033224506, "percentage": 70.79, "elapsed_time": "2:26:00", "remaining_time": "1:00:14", "throughput": 19861.14, "total_tokens": 173993408} +{"current_steps": 55295, "total_steps": 78105, "loss": 0.1651, "lr": 1.1904637103921629e-06, "epoch": 3.5397861852634276, "percentage": 70.8, "elapsed_time": "2:26:01", "remaining_time": "1:00:14", "throughput": 19861.42, "total_tokens": 174009536} +{"current_steps": 55300, "total_steps": 78105, "loss": 0.2041, "lr": 1.189987864949008e-06, "epoch": 3.540106267204404, "percentage": 70.8, "elapsed_time": "2:26:01", "remaining_time": "1:00:13", "throughput": 19861.66, "total_tokens": 174025024} +{"current_steps": 55305, "total_steps": 78105, "loss": 0.157, "lr": 1.1895120849210355e-06, "epoch": 3.540426349145381, "percentage": 70.81, "elapsed_time": "2:26:02", "remaining_time": "1:00:12", "throughput": 19861.97, "total_tokens": 174041536} +{"current_steps": 55310, "total_steps": 78105, "loss": 0.2251, "lr": 1.1890363703320024e-06, "epoch": 3.540746431086358, "percentage": 70.81, "elapsed_time": "2:26:03", "remaining_time": "1:00:11", "throughput": 19862.4, "total_tokens": 174061184} +{"current_steps": 55315, "total_steps": 78105, "loss": 0.162, "lr": 1.1885607212056654e-06, "epoch": 3.541066513027335, "percentage": 70.82, "elapsed_time": "2:26:04", "remaining_time": "1:00:10", "throughput": 19862.64, "total_tokens": 174076864} +{"current_steps": 55320, "total_steps": 78105, "loss": 0.1838, "lr": 1.1880851375657751e-06, "epoch": 3.541386594968312, "percentage": 70.83, "elapsed_time": "2:26:04", "remaining_time": "1:00:09", "throughput": 19862.86, "total_tokens": 174091776} +{"current_steps": 55325, "total_steps": 78105, "loss": 0.1919, "lr": 1.1876096194360792e-06, "epoch": 3.5417066769092886, "percentage": 70.83, "elapsed_time": "2:26:05", "remaining_time": "1:00:09", "throughput": 19863.23, "total_tokens": 174109632} +{"current_steps": 55330, "total_steps": 78105, "loss": 0.1432, "lr": 1.1871341668403233e-06, "epoch": 3.5420267588502656, "percentage": 70.84, "elapsed_time": "2:26:06", "remaining_time": "1:00:08", "throughput": 19863.47, "total_tokens": 174124992} +{"current_steps": 55335, "total_steps": 78105, "loss": 0.221, "lr": 1.1866587798022474e-06, "epoch": 3.5423468407912426, "percentage": 70.85, "elapsed_time": "2:26:06", "remaining_time": "1:00:07", "throughput": 19863.69, "total_tokens": 174139968} +{"current_steps": 55340, "total_steps": 78105, "loss": 0.1514, "lr": 1.1861834583455931e-06, "epoch": 3.5426669227322196, "percentage": 70.85, "elapsed_time": "2:26:07", "remaining_time": "1:00:06", "throughput": 19863.98, "total_tokens": 174156224} +{"current_steps": 55345, "total_steps": 78105, "loss": 0.136, "lr": 1.1857082024940917e-06, "epoch": 3.542987004673196, "percentage": 70.86, "elapsed_time": "2:26:08", "remaining_time": "1:00:05", "throughput": 19864.34, "total_tokens": 174174144} +{"current_steps": 55350, "total_steps": 78105, "loss": 0.1773, "lr": 1.1852330122714783e-06, "epoch": 3.543307086614173, "percentage": 70.87, "elapsed_time": "2:26:08", "remaining_time": "1:00:04", "throughput": 19864.68, "total_tokens": 174191168} +{"current_steps": 55355, "total_steps": 78105, "loss": 0.226, "lr": 1.184757887701478e-06, "epoch": 3.54362716855515, "percentage": 70.87, "elapsed_time": "2:26:09", "remaining_time": "1:00:04", "throughput": 19864.91, "total_tokens": 174206208} +{"current_steps": 55360, "total_steps": 78105, "loss": 0.1229, "lr": 1.1842828288078193e-06, "epoch": 3.543947250496127, "percentage": 70.88, "elapsed_time": "2:26:10", "remaining_time": "1:00:03", "throughput": 19865.15, "total_tokens": 174222016} +{"current_steps": 55365, "total_steps": 78105, "loss": 0.1571, "lr": 1.1838078356142227e-06, "epoch": 3.544267332437104, "percentage": 70.89, "elapsed_time": "2:26:10", "remaining_time": "1:00:02", "throughput": 19865.46, "total_tokens": 174239104} +{"current_steps": 55370, "total_steps": 78105, "loss": 0.1274, "lr": 1.1833329081444059e-06, "epoch": 3.5445874143780807, "percentage": 70.89, "elapsed_time": "2:26:11", "remaining_time": "1:00:01", "throughput": 19865.7, "total_tokens": 174254784} +{"current_steps": 55375, "total_steps": 78105, "loss": 0.1939, "lr": 1.1828580464220873e-06, "epoch": 3.5449074963190577, "percentage": 70.9, "elapsed_time": "2:26:12", "remaining_time": "1:00:00", "throughput": 19865.94, "total_tokens": 174270336} +{"current_steps": 55380, "total_steps": 78105, "loss": 0.1964, "lr": 1.1823832504709754e-06, "epoch": 3.5452275782600347, "percentage": 70.9, "elapsed_time": "2:26:12", "remaining_time": "0:59:59", "throughput": 19866.18, "total_tokens": 174285696} +{"current_steps": 55385, "total_steps": 78105, "loss": 0.1029, "lr": 1.1819085203147823e-06, "epoch": 3.545547660201011, "percentage": 70.91, "elapsed_time": "2:26:13", "remaining_time": "0:59:59", "throughput": 19866.39, "total_tokens": 174300672} +{"current_steps": 55390, "total_steps": 78105, "loss": 0.1274, "lr": 1.1814338559772104e-06, "epoch": 3.545867742141988, "percentage": 70.92, "elapsed_time": "2:26:14", "remaining_time": "0:59:58", "throughput": 19866.66, "total_tokens": 174316672} +{"current_steps": 55395, "total_steps": 78105, "loss": 0.1423, "lr": 1.1809592574819644e-06, "epoch": 3.546187824082965, "percentage": 70.92, "elapsed_time": "2:26:15", "remaining_time": "0:59:57", "throughput": 19866.95, "total_tokens": 174332928} +{"current_steps": 55400, "total_steps": 78105, "loss": 0.1825, "lr": 1.1804847248527426e-06, "epoch": 3.546507906023942, "percentage": 70.93, "elapsed_time": "2:26:15", "remaining_time": "0:59:56", "throughput": 19867.17, "total_tokens": 174347712} +{"current_steps": 55405, "total_steps": 78105, "loss": 0.1215, "lr": 1.1800102581132405e-06, "epoch": 3.546827987964919, "percentage": 70.94, "elapsed_time": "2:26:16", "remaining_time": "0:59:55", "throughput": 19867.39, "total_tokens": 174362752} +{"current_steps": 55410, "total_steps": 78105, "loss": 0.159, "lr": 1.1795358572871504e-06, "epoch": 3.547148069905896, "percentage": 70.94, "elapsed_time": "2:26:17", "remaining_time": "0:59:54", "throughput": 19867.65, "total_tokens": 174378880} +{"current_steps": 55415, "total_steps": 78105, "loss": 0.1177, "lr": 1.1790615223981616e-06, "epoch": 3.5474681518468727, "percentage": 70.95, "elapsed_time": "2:26:17", "remaining_time": "0:59:54", "throughput": 19867.95, "total_tokens": 174395520} +{"current_steps": 55420, "total_steps": 78105, "loss": 0.1959, "lr": 1.1785872534699591e-06, "epoch": 3.5477882337878497, "percentage": 70.96, "elapsed_time": "2:26:18", "remaining_time": "0:59:53", "throughput": 19868.27, "total_tokens": 174412672} +{"current_steps": 55425, "total_steps": 78105, "loss": 0.1277, "lr": 1.178113050526227e-06, "epoch": 3.5481083157288267, "percentage": 70.96, "elapsed_time": "2:26:19", "remaining_time": "0:59:52", "throughput": 19868.51, "total_tokens": 174428096} +{"current_steps": 55430, "total_steps": 78105, "loss": 0.1724, "lr": 1.1776389135906435e-06, "epoch": 3.5484283976698032, "percentage": 70.97, "elapsed_time": "2:26:19", "remaining_time": "0:59:51", "throughput": 19868.71, "total_tokens": 174442880} +{"current_steps": 55435, "total_steps": 78105, "loss": 0.2434, "lr": 1.1771648426868847e-06, "epoch": 3.54874847961078, "percentage": 70.97, "elapsed_time": "2:26:20", "remaining_time": "0:59:50", "throughput": 19868.92, "total_tokens": 174457600} +{"current_steps": 55440, "total_steps": 78105, "loss": 0.1841, "lr": 1.1766908378386236e-06, "epoch": 3.549068561551757, "percentage": 70.98, "elapsed_time": "2:26:21", "remaining_time": "0:59:49", "throughput": 19869.18, "total_tokens": 174473152} +{"current_steps": 55445, "total_steps": 78105, "loss": 0.1522, "lr": 1.1762168990695292e-06, "epoch": 3.549388643492734, "percentage": 70.99, "elapsed_time": "2:26:21", "remaining_time": "0:59:49", "throughput": 19869.48, "total_tokens": 174489728} +{"current_steps": 55450, "total_steps": 78105, "loss": 0.2114, "lr": 1.1757430264032671e-06, "epoch": 3.549708725433711, "percentage": 70.99, "elapsed_time": "2:26:22", "remaining_time": "0:59:48", "throughput": 19869.74, "total_tokens": 174505792} +{"current_steps": 55455, "total_steps": 78105, "loss": 0.1655, "lr": 1.1752692198635e-06, "epoch": 3.550028807374688, "percentage": 71.0, "elapsed_time": "2:26:23", "remaining_time": "0:59:47", "throughput": 19869.97, "total_tokens": 174520896} +{"current_steps": 55460, "total_steps": 78105, "loss": 0.1716, "lr": 1.1747954794738895e-06, "epoch": 3.5503488893156647, "percentage": 71.01, "elapsed_time": "2:26:23", "remaining_time": "0:59:46", "throughput": 19870.19, "total_tokens": 174535488} +{"current_steps": 55465, "total_steps": 78105, "loss": 0.1682, "lr": 1.174321805258088e-06, "epoch": 3.5506689712566417, "percentage": 71.01, "elapsed_time": "2:26:24", "remaining_time": "0:59:45", "throughput": 19870.41, "total_tokens": 174550208} +{"current_steps": 55470, "total_steps": 78105, "loss": 0.1313, "lr": 1.1738481972397522e-06, "epoch": 3.5509890531976187, "percentage": 71.02, "elapsed_time": "2:26:25", "remaining_time": "0:59:44", "throughput": 19870.64, "total_tokens": 174565824} +{"current_steps": 55475, "total_steps": 78105, "loss": 0.1355, "lr": 1.1733746554425278e-06, "epoch": 3.5513091351385953, "percentage": 71.03, "elapsed_time": "2:26:25", "remaining_time": "0:59:44", "throughput": 19870.91, "total_tokens": 174582208} +{"current_steps": 55480, "total_steps": 78105, "loss": 0.2098, "lr": 1.172901179890064e-06, "epoch": 3.5516292170795722, "percentage": 71.03, "elapsed_time": "2:26:26", "remaining_time": "0:59:43", "throughput": 19871.23, "total_tokens": 174599232} +{"current_steps": 55485, "total_steps": 78105, "loss": 0.1394, "lr": 1.172427770606003e-06, "epoch": 3.5519492990205492, "percentage": 71.04, "elapsed_time": "2:26:27", "remaining_time": "0:59:42", "throughput": 19871.56, "total_tokens": 174616320} +{"current_steps": 55490, "total_steps": 78105, "loss": 0.1705, "lr": 1.171954427613983e-06, "epoch": 3.5522693809615262, "percentage": 71.05, "elapsed_time": "2:26:27", "remaining_time": "0:59:41", "throughput": 19871.81, "total_tokens": 174631808} +{"current_steps": 55495, "total_steps": 78105, "loss": 0.1855, "lr": 1.1714811509376432e-06, "epoch": 3.552589462902503, "percentage": 71.05, "elapsed_time": "2:26:28", "remaining_time": "0:59:40", "throughput": 19872.02, "total_tokens": 174646784} +{"current_steps": 55500, "total_steps": 78105, "loss": 0.1401, "lr": 1.1710079406006125e-06, "epoch": 3.55290954484348, "percentage": 71.06, "elapsed_time": "2:26:29", "remaining_time": "0:59:39", "throughput": 19872.28, "total_tokens": 174662592} +{"current_steps": 55505, "total_steps": 78105, "loss": 0.1048, "lr": 1.170534796626524e-06, "epoch": 3.5532296267844568, "percentage": 71.06, "elapsed_time": "2:26:29", "remaining_time": "0:59:39", "throughput": 19872.56, "total_tokens": 174678720} +{"current_steps": 55510, "total_steps": 78105, "loss": 0.157, "lr": 1.1700617190390029e-06, "epoch": 3.5535497087254337, "percentage": 71.07, "elapsed_time": "2:26:30", "remaining_time": "0:59:38", "throughput": 19872.83, "total_tokens": 174694848} +{"current_steps": 55515, "total_steps": 78105, "loss": 0.1073, "lr": 1.1695887078616718e-06, "epoch": 3.5538697906664107, "percentage": 71.08, "elapsed_time": "2:26:31", "remaining_time": "0:59:37", "throughput": 19873.13, "total_tokens": 174711488} +{"current_steps": 55520, "total_steps": 78105, "loss": 0.195, "lr": 1.169115763118151e-06, "epoch": 3.5541898726073873, "percentage": 71.08, "elapsed_time": "2:26:32", "remaining_time": "0:59:36", "throughput": 19873.54, "total_tokens": 174730240} +{"current_steps": 55525, "total_steps": 78105, "loss": 0.2061, "lr": 1.1686428848320568e-06, "epoch": 3.5545099545483643, "percentage": 71.09, "elapsed_time": "2:26:32", "remaining_time": "0:59:35", "throughput": 19873.82, "total_tokens": 174746112} +{"current_steps": 55530, "total_steps": 78105, "loss": 0.1502, "lr": 1.168170073027002e-06, "epoch": 3.5548300364893413, "percentage": 71.1, "elapsed_time": "2:26:33", "remaining_time": "0:59:34", "throughput": 19874.09, "total_tokens": 174762048} +{"current_steps": 55535, "total_steps": 78105, "loss": 0.2336, "lr": 1.1676973277265965e-06, "epoch": 3.5551501184303183, "percentage": 71.1, "elapsed_time": "2:26:34", "remaining_time": "0:59:34", "throughput": 19874.32, "total_tokens": 174777344} +{"current_steps": 55540, "total_steps": 78105, "loss": 0.1855, "lr": 1.167224648954446e-06, "epoch": 3.5554702003712952, "percentage": 71.11, "elapsed_time": "2:26:34", "remaining_time": "0:59:33", "throughput": 19874.63, "total_tokens": 174793792} +{"current_steps": 55545, "total_steps": 78105, "loss": 0.2353, "lr": 1.166752036734155e-06, "epoch": 3.555790282312272, "percentage": 71.12, "elapsed_time": "2:26:35", "remaining_time": "0:59:32", "throughput": 19874.92, "total_tokens": 174810112} +{"current_steps": 55550, "total_steps": 78105, "loss": 0.1519, "lr": 1.1662794910893228e-06, "epoch": 3.556110364253249, "percentage": 71.12, "elapsed_time": "2:26:36", "remaining_time": "0:59:31", "throughput": 19875.17, "total_tokens": 174825344} +{"current_steps": 55555, "total_steps": 78105, "loss": 0.1605, "lr": 1.1658070120435457e-06, "epoch": 3.5564304461942258, "percentage": 71.13, "elapsed_time": "2:26:36", "remaining_time": "0:59:30", "throughput": 19875.4, "total_tokens": 174840448} +{"current_steps": 55560, "total_steps": 78105, "loss": 0.2179, "lr": 1.1653345996204168e-06, "epoch": 3.5567505281352028, "percentage": 71.14, "elapsed_time": "2:26:37", "remaining_time": "0:59:29", "throughput": 19875.61, "total_tokens": 174855168} +{"current_steps": 55565, "total_steps": 78105, "loss": 0.176, "lr": 1.1648622538435252e-06, "epoch": 3.5570706100761793, "percentage": 71.14, "elapsed_time": "2:26:38", "remaining_time": "0:59:28", "throughput": 19875.86, "total_tokens": 174870400} +{"current_steps": 55570, "total_steps": 78105, "loss": 0.2301, "lr": 1.1643899747364599e-06, "epoch": 3.5573906920171563, "percentage": 71.15, "elapsed_time": "2:26:38", "remaining_time": "0:59:28", "throughput": 19876.08, "total_tokens": 174885312} +{"current_steps": 55575, "total_steps": 78105, "loss": 0.1409, "lr": 1.1639177623228005e-06, "epoch": 3.5577107739581333, "percentage": 71.15, "elapsed_time": "2:26:39", "remaining_time": "0:59:27", "throughput": 19876.31, "total_tokens": 174900288} +{"current_steps": 55580, "total_steps": 78105, "loss": 0.202, "lr": 1.1634456166261304e-06, "epoch": 3.5580308558991103, "percentage": 71.16, "elapsed_time": "2:26:40", "remaining_time": "0:59:26", "throughput": 19876.6, "total_tokens": 174916672} +{"current_steps": 55585, "total_steps": 78105, "loss": 0.1512, "lr": 1.1629735376700227e-06, "epoch": 3.5583509378400873, "percentage": 71.17, "elapsed_time": "2:26:40", "remaining_time": "0:59:25", "throughput": 19876.87, "total_tokens": 174932800} +{"current_steps": 55590, "total_steps": 78105, "loss": 0.1533, "lr": 1.1625015254780531e-06, "epoch": 3.558671019781064, "percentage": 71.17, "elapsed_time": "2:26:41", "remaining_time": "0:59:24", "throughput": 19877.13, "total_tokens": 174948608} +{"current_steps": 55595, "total_steps": 78105, "loss": 0.1162, "lr": 1.1620295800737905e-06, "epoch": 3.558991101722041, "percentage": 71.18, "elapsed_time": "2:26:42", "remaining_time": "0:59:23", "throughput": 19877.3, "total_tokens": 174962816} +{"current_steps": 55600, "total_steps": 78105, "loss": 0.2085, "lr": 1.1615577014808005e-06, "epoch": 3.559311183663018, "percentage": 71.19, "elapsed_time": "2:26:42", "remaining_time": "0:59:23", "throughput": 19877.58, "total_tokens": 174979200} +{"current_steps": 55605, "total_steps": 78105, "loss": 0.1848, "lr": 1.161085889722649e-06, "epoch": 3.559631265603995, "percentage": 71.19, "elapsed_time": "2:26:43", "remaining_time": "0:59:22", "throughput": 19877.84, "total_tokens": 174995008} +{"current_steps": 55610, "total_steps": 78105, "loss": 0.0892, "lr": 1.160614144822892e-06, "epoch": 3.5599513475449713, "percentage": 71.2, "elapsed_time": "2:26:44", "remaining_time": "0:59:21", "throughput": 19878.08, "total_tokens": 175010560} +{"current_steps": 55615, "total_steps": 78105, "loss": 0.1707, "lr": 1.16014246680509e-06, "epoch": 3.5602714294859483, "percentage": 71.21, "elapsed_time": "2:26:44", "remaining_time": "0:59:20", "throughput": 19878.31, "total_tokens": 175026240} +{"current_steps": 55620, "total_steps": 78105, "loss": 0.1431, "lr": 1.1596708556927924e-06, "epoch": 3.5605915114269253, "percentage": 71.21, "elapsed_time": "2:26:45", "remaining_time": "0:59:19", "throughput": 19878.6, "total_tokens": 175042752} +{"current_steps": 55625, "total_steps": 78105, "loss": 0.166, "lr": 1.1591993115095517e-06, "epoch": 3.5609115933679023, "percentage": 71.22, "elapsed_time": "2:26:46", "remaining_time": "0:59:18", "throughput": 19878.82, "total_tokens": 175057856} +{"current_steps": 55630, "total_steps": 78105, "loss": 0.2111, "lr": 1.1587278342789135e-06, "epoch": 3.5612316753088793, "percentage": 71.22, "elapsed_time": "2:26:46", "remaining_time": "0:59:18", "throughput": 19879.07, "total_tokens": 175073472} +{"current_steps": 55635, "total_steps": 78105, "loss": 0.1581, "lr": 1.1582564240244205e-06, "epoch": 3.561551757249856, "percentage": 71.23, "elapsed_time": "2:26:47", "remaining_time": "0:59:17", "throughput": 19879.4, "total_tokens": 175090880} +{"current_steps": 55640, "total_steps": 78105, "loss": 0.1837, "lr": 1.1577850807696131e-06, "epoch": 3.561871839190833, "percentage": 71.24, "elapsed_time": "2:26:48", "remaining_time": "0:59:16", "throughput": 19879.69, "total_tokens": 175107072} +{"current_steps": 55645, "total_steps": 78105, "loss": 0.1625, "lr": 1.1573138045380273e-06, "epoch": 3.56219192113181, "percentage": 71.24, "elapsed_time": "2:26:49", "remaining_time": "0:59:15", "throughput": 19879.96, "total_tokens": 175123008} +{"current_steps": 55650, "total_steps": 78105, "loss": 0.1638, "lr": 1.1568425953531953e-06, "epoch": 3.5625120030727864, "percentage": 71.25, "elapsed_time": "2:26:49", "remaining_time": "0:59:14", "throughput": 19880.26, "total_tokens": 175139584} +{"current_steps": 55655, "total_steps": 78105, "loss": 0.2076, "lr": 1.1563714532386498e-06, "epoch": 3.5628320850137634, "percentage": 71.26, "elapsed_time": "2:26:50", "remaining_time": "0:59:13", "throughput": 19880.47, "total_tokens": 175154240} +{"current_steps": 55660, "total_steps": 78105, "loss": 0.2162, "lr": 1.1559003782179131e-06, "epoch": 3.5631521669547404, "percentage": 71.26, "elapsed_time": "2:26:51", "remaining_time": "0:59:13", "throughput": 19880.68, "total_tokens": 175168896} +{"current_steps": 55665, "total_steps": 78105, "loss": 0.0951, "lr": 1.1554293703145114e-06, "epoch": 3.5634722488957173, "percentage": 71.27, "elapsed_time": "2:26:51", "remaining_time": "0:59:12", "throughput": 19880.91, "total_tokens": 175183552} +{"current_steps": 55670, "total_steps": 78105, "loss": 0.1163, "lr": 1.1549584295519635e-06, "epoch": 3.5637923308366943, "percentage": 71.28, "elapsed_time": "2:26:52", "remaining_time": "0:59:11", "throughput": 19881.13, "total_tokens": 175198528} +{"current_steps": 55675, "total_steps": 78105, "loss": 0.1664, "lr": 1.1544875559537856e-06, "epoch": 3.5641124127776713, "percentage": 71.28, "elapsed_time": "2:26:52", "remaining_time": "0:59:10", "throughput": 19881.36, "total_tokens": 175213952} +{"current_steps": 55680, "total_steps": 78105, "loss": 0.1836, "lr": 1.1540167495434904e-06, "epoch": 3.564432494718648, "percentage": 71.29, "elapsed_time": "2:26:53", "remaining_time": "0:59:09", "throughput": 19881.6, "total_tokens": 175229184} +{"current_steps": 55685, "total_steps": 78105, "loss": 0.1354, "lr": 1.153546010344587e-06, "epoch": 3.564752576659625, "percentage": 71.3, "elapsed_time": "2:26:54", "remaining_time": "0:59:08", "throughput": 19881.88, "total_tokens": 175245312} +{"current_steps": 55690, "total_steps": 78105, "loss": 0.1861, "lr": 1.1530753383805843e-06, "epoch": 3.565072658600602, "percentage": 71.3, "elapsed_time": "2:26:54", "remaining_time": "0:59:07", "throughput": 19882.13, "total_tokens": 175260416} +{"current_steps": 55695, "total_steps": 78105, "loss": 0.2018, "lr": 1.1526047336749812e-06, "epoch": 3.5653927405415784, "percentage": 71.31, "elapsed_time": "2:26:55", "remaining_time": "0:59:07", "throughput": 19882.34, "total_tokens": 175275200} +{"current_steps": 55700, "total_steps": 78105, "loss": 0.1482, "lr": 1.1521341962512805e-06, "epoch": 3.5657128224825554, "percentage": 71.31, "elapsed_time": "2:26:56", "remaining_time": "0:59:06", "throughput": 19882.59, "total_tokens": 175291072} +{"current_steps": 55705, "total_steps": 78105, "loss": 0.15, "lr": 1.1516637261329774e-06, "epoch": 3.5660329044235324, "percentage": 71.32, "elapsed_time": "2:26:57", "remaining_time": "0:59:05", "throughput": 19882.91, "total_tokens": 175307968} +{"current_steps": 55710, "total_steps": 78105, "loss": 0.2098, "lr": 1.1511933233435643e-06, "epoch": 3.5663529863645094, "percentage": 71.33, "elapsed_time": "2:26:57", "remaining_time": "0:59:04", "throughput": 19883.1, "total_tokens": 175322496} +{"current_steps": 55715, "total_steps": 78105, "loss": 0.238, "lr": 1.1507229879065312e-06, "epoch": 3.5666730683054864, "percentage": 71.33, "elapsed_time": "2:26:58", "remaining_time": "0:59:03", "throughput": 19883.37, "total_tokens": 175338368} +{"current_steps": 55720, "total_steps": 78105, "loss": 0.2193, "lr": 1.1502527198453632e-06, "epoch": 3.5669931502464634, "percentage": 71.34, "elapsed_time": "2:26:59", "remaining_time": "0:59:02", "throughput": 19883.66, "total_tokens": 175354688} +{"current_steps": 55725, "total_steps": 78105, "loss": 0.2203, "lr": 1.1497825191835453e-06, "epoch": 3.56731323218744, "percentage": 71.35, "elapsed_time": "2:26:59", "remaining_time": "0:59:02", "throughput": 19883.98, "total_tokens": 175372032} +{"current_steps": 55730, "total_steps": 78105, "loss": 0.1978, "lr": 1.1493123859445534e-06, "epoch": 3.567633314128417, "percentage": 71.35, "elapsed_time": "2:27:00", "remaining_time": "0:59:01", "throughput": 19884.22, "total_tokens": 175387392} +{"current_steps": 55735, "total_steps": 78105, "loss": 0.1106, "lr": 1.1488423201518668e-06, "epoch": 3.567953396069394, "percentage": 71.36, "elapsed_time": "2:27:01", "remaining_time": "0:59:00", "throughput": 19884.46, "total_tokens": 175402624} +{"current_steps": 55740, "total_steps": 78105, "loss": 0.2546, "lr": 1.1483723218289564e-06, "epoch": 3.5682734780103704, "percentage": 71.37, "elapsed_time": "2:27:01", "remaining_time": "0:58:59", "throughput": 19884.7, "total_tokens": 175418368} +{"current_steps": 55745, "total_steps": 78105, "loss": 0.1019, "lr": 1.147902390999292e-06, "epoch": 3.5685935599513474, "percentage": 71.37, "elapsed_time": "2:27:02", "remaining_time": "0:58:58", "throughput": 19884.96, "total_tokens": 175434112} +{"current_steps": 55750, "total_steps": 78105, "loss": 0.1804, "lr": 1.1474325276863395e-06, "epoch": 3.5689136418923244, "percentage": 71.38, "elapsed_time": "2:27:03", "remaining_time": "0:58:57", "throughput": 19885.26, "total_tokens": 175450752} +{"current_steps": 55755, "total_steps": 78105, "loss": 0.232, "lr": 1.1469627319135612e-06, "epoch": 3.5692337238333014, "percentage": 71.38, "elapsed_time": "2:27:03", "remaining_time": "0:58:57", "throughput": 19885.54, "total_tokens": 175467008} +{"current_steps": 55760, "total_steps": 78105, "loss": 0.1739, "lr": 1.1464930037044164e-06, "epoch": 3.5695538057742784, "percentage": 71.39, "elapsed_time": "2:27:04", "remaining_time": "0:58:56", "throughput": 19885.8, "total_tokens": 175482752} +{"current_steps": 55765, "total_steps": 78105, "loss": 0.1691, "lr": 1.1460233430823608e-06, "epoch": 3.5698738877152554, "percentage": 71.4, "elapsed_time": "2:27:05", "remaining_time": "0:58:55", "throughput": 19886.05, "total_tokens": 175498368} +{"current_steps": 55770, "total_steps": 78105, "loss": 0.1854, "lr": 1.145553750070846e-06, "epoch": 3.570193969656232, "percentage": 71.4, "elapsed_time": "2:27:06", "remaining_time": "0:58:54", "throughput": 19886.48, "total_tokens": 175518400} +{"current_steps": 55775, "total_steps": 78105, "loss": 0.1446, "lr": 1.1450842246933227e-06, "epoch": 3.570514051597209, "percentage": 71.41, "elapsed_time": "2:27:06", "remaining_time": "0:58:53", "throughput": 19886.74, "total_tokens": 175534144} +{"current_steps": 55780, "total_steps": 78105, "loss": 0.1739, "lr": 1.144614766973236e-06, "epoch": 3.570834133538186, "percentage": 71.42, "elapsed_time": "2:27:07", "remaining_time": "0:58:53", "throughput": 19886.98, "total_tokens": 175549952} +{"current_steps": 55785, "total_steps": 78105, "loss": 0.1334, "lr": 1.1441453769340275e-06, "epoch": 3.5711542154791625, "percentage": 71.42, "elapsed_time": "2:27:08", "remaining_time": "0:58:52", "throughput": 19887.26, "total_tokens": 175566336} +{"current_steps": 55790, "total_steps": 78105, "loss": 0.1545, "lr": 1.1436760545991368e-06, "epoch": 3.5714742974201394, "percentage": 71.43, "elapsed_time": "2:27:08", "remaining_time": "0:58:51", "throughput": 19887.53, "total_tokens": 175582400} +{"current_steps": 55795, "total_steps": 78105, "loss": 0.166, "lr": 1.1432067999919979e-06, "epoch": 3.5717943793611164, "percentage": 71.44, "elapsed_time": "2:27:09", "remaining_time": "0:58:50", "throughput": 19887.83, "total_tokens": 175598784} +{"current_steps": 55800, "total_steps": 78105, "loss": 0.2502, "lr": 1.1427376131360462e-06, "epoch": 3.5721144613020934, "percentage": 71.44, "elapsed_time": "2:27:10", "remaining_time": "0:58:49", "throughput": 19888.2, "total_tokens": 175617152} +{"current_steps": 55805, "total_steps": 78105, "loss": 0.1629, "lr": 1.1422684940547064e-06, "epoch": 3.5724345432430704, "percentage": 71.45, "elapsed_time": "2:27:10", "remaining_time": "0:58:48", "throughput": 19888.48, "total_tokens": 175633664} +{"current_steps": 55810, "total_steps": 78105, "loss": 0.1444, "lr": 1.1417994427714076e-06, "epoch": 3.572754625184047, "percentage": 71.46, "elapsed_time": "2:27:11", "remaining_time": "0:58:48", "throughput": 19888.7, "total_tokens": 175648448} +{"current_steps": 55815, "total_steps": 78105, "loss": 0.1474, "lr": 1.1413304593095683e-06, "epoch": 3.573074707125024, "percentage": 71.46, "elapsed_time": "2:27:12", "remaining_time": "0:58:47", "throughput": 19888.99, "total_tokens": 175664640} +{"current_steps": 55820, "total_steps": 78105, "loss": 0.0936, "lr": 1.1408615436926098e-06, "epoch": 3.573394789066001, "percentage": 71.47, "elapsed_time": "2:27:12", "remaining_time": "0:58:46", "throughput": 19889.2, "total_tokens": 175679488} +{"current_steps": 55825, "total_steps": 78105, "loss": 0.1603, "lr": 1.1403926959439462e-06, "epoch": 3.573714871006978, "percentage": 71.47, "elapsed_time": "2:27:13", "remaining_time": "0:58:45", "throughput": 19889.49, "total_tokens": 175695744} +{"current_steps": 55830, "total_steps": 78105, "loss": 0.1229, "lr": 1.1399239160869897e-06, "epoch": 3.5740349529479545, "percentage": 71.48, "elapsed_time": "2:27:14", "remaining_time": "0:58:44", "throughput": 19889.71, "total_tokens": 175710848} +{"current_steps": 55835, "total_steps": 78105, "loss": 0.1613, "lr": 1.1394552041451482e-06, "epoch": 3.5743550348889315, "percentage": 71.49, "elapsed_time": "2:27:14", "remaining_time": "0:58:43", "throughput": 19889.96, "total_tokens": 175726592} +{"current_steps": 55840, "total_steps": 78105, "loss": 0.2505, "lr": 1.138986560141826e-06, "epoch": 3.5746751168299085, "percentage": 71.49, "elapsed_time": "2:27:15", "remaining_time": "0:58:43", "throughput": 19890.19, "total_tokens": 175741632} +{"current_steps": 55845, "total_steps": 78105, "loss": 0.1542, "lr": 1.1385179841004276e-06, "epoch": 3.5749951987708855, "percentage": 71.5, "elapsed_time": "2:27:16", "remaining_time": "0:58:42", "throughput": 19890.39, "total_tokens": 175756608} +{"current_steps": 55850, "total_steps": 78105, "loss": 0.2027, "lr": 1.1380494760443475e-06, "epoch": 3.5753152807118624, "percentage": 71.51, "elapsed_time": "2:27:16", "remaining_time": "0:58:41", "throughput": 19890.63, "total_tokens": 175771904} +{"current_steps": 55855, "total_steps": 78105, "loss": 0.2972, "lr": 1.1375810359969834e-06, "epoch": 3.575635362652839, "percentage": 71.51, "elapsed_time": "2:27:17", "remaining_time": "0:58:40", "throughput": 19890.88, "total_tokens": 175787264} +{"current_steps": 55860, "total_steps": 78105, "loss": 0.2696, "lr": 1.137112663981726e-06, "epoch": 3.575955444593816, "percentage": 71.52, "elapsed_time": "2:27:18", "remaining_time": "0:58:39", "throughput": 19891.1, "total_tokens": 175802240} +{"current_steps": 55865, "total_steps": 78105, "loss": 0.1377, "lr": 1.136644360021963e-06, "epoch": 3.576275526534793, "percentage": 71.53, "elapsed_time": "2:27:18", "remaining_time": "0:58:38", "throughput": 19891.34, "total_tokens": 175817728} +{"current_steps": 55870, "total_steps": 78105, "loss": 0.1294, "lr": 1.136176124141079e-06, "epoch": 3.57659560847577, "percentage": 71.53, "elapsed_time": "2:27:19", "remaining_time": "0:58:37", "throughput": 19891.59, "total_tokens": 175833344} +{"current_steps": 55875, "total_steps": 78105, "loss": 0.1668, "lr": 1.1357079563624556e-06, "epoch": 3.5769156904167465, "percentage": 71.54, "elapsed_time": "2:27:20", "remaining_time": "0:58:37", "throughput": 19891.86, "total_tokens": 175849600} +{"current_steps": 55880, "total_steps": 78105, "loss": 0.1578, "lr": 1.1352398567094696e-06, "epoch": 3.5772357723577235, "percentage": 71.54, "elapsed_time": "2:27:20", "remaining_time": "0:58:36", "throughput": 19892.07, "total_tokens": 175864064} +{"current_steps": 55885, "total_steps": 78105, "loss": 0.1167, "lr": 1.1347718252054981e-06, "epoch": 3.5775558542987005, "percentage": 71.55, "elapsed_time": "2:27:21", "remaining_time": "0:58:35", "throughput": 19892.29, "total_tokens": 175879104} +{"current_steps": 55890, "total_steps": 78105, "loss": 0.1646, "lr": 1.134303861873909e-06, "epoch": 3.5778759362396775, "percentage": 71.56, "elapsed_time": "2:27:22", "remaining_time": "0:58:34", "throughput": 19892.63, "total_tokens": 175896320} +{"current_steps": 55895, "total_steps": 78105, "loss": 0.2506, "lr": 1.133835966738072e-06, "epoch": 3.5781960181806545, "percentage": 71.56, "elapsed_time": "2:27:22", "remaining_time": "0:58:33", "throughput": 19892.83, "total_tokens": 175910784} +{"current_steps": 55900, "total_steps": 78105, "loss": 0.1526, "lr": 1.1333681398213509e-06, "epoch": 3.578516100121631, "percentage": 71.57, "elapsed_time": "2:27:23", "remaining_time": "0:58:32", "throughput": 19893.04, "total_tokens": 175925760} +{"current_steps": 55905, "total_steps": 78105, "loss": 0.1295, "lr": 1.1329003811471061e-06, "epoch": 3.578836182062608, "percentage": 71.58, "elapsed_time": "2:27:24", "remaining_time": "0:58:32", "throughput": 19893.29, "total_tokens": 175941568} +{"current_steps": 55910, "total_steps": 78105, "loss": 0.1643, "lr": 1.1324326907386955e-06, "epoch": 3.579156264003585, "percentage": 71.58, "elapsed_time": "2:27:24", "remaining_time": "0:58:31", "throughput": 19893.5, "total_tokens": 175956352} +{"current_steps": 55915, "total_steps": 78105, "loss": 0.1576, "lr": 1.1319650686194719e-06, "epoch": 3.5794763459445615, "percentage": 71.59, "elapsed_time": "2:27:25", "remaining_time": "0:58:30", "throughput": 19893.74, "total_tokens": 175971648} +{"current_steps": 55920, "total_steps": 78105, "loss": 0.1446, "lr": 1.1314975148127889e-06, "epoch": 3.5797964278855385, "percentage": 71.6, "elapsed_time": "2:27:26", "remaining_time": "0:58:29", "throughput": 19893.96, "total_tokens": 175986752} +{"current_steps": 55925, "total_steps": 78105, "loss": 0.15, "lr": 1.1310300293419898e-06, "epoch": 3.5801165098265155, "percentage": 71.6, "elapsed_time": "2:27:26", "remaining_time": "0:58:28", "throughput": 19894.2, "total_tokens": 176002368} +{"current_steps": 55930, "total_steps": 78105, "loss": 0.1144, "lr": 1.130562612230421e-06, "epoch": 3.5804365917674925, "percentage": 71.61, "elapsed_time": "2:27:27", "remaining_time": "0:58:27", "throughput": 19894.45, "total_tokens": 176018304} +{"current_steps": 55935, "total_steps": 78105, "loss": 0.1389, "lr": 1.1300952635014226e-06, "epoch": 3.5807566737084695, "percentage": 71.62, "elapsed_time": "2:27:28", "remaining_time": "0:58:27", "throughput": 19894.72, "total_tokens": 176034560} +{"current_steps": 55940, "total_steps": 78105, "loss": 0.2644, "lr": 1.129627983178331e-06, "epoch": 3.5810767556494465, "percentage": 71.62, "elapsed_time": "2:27:28", "remaining_time": "0:58:26", "throughput": 19894.93, "total_tokens": 176049472} +{"current_steps": 55945, "total_steps": 78105, "loss": 0.1385, "lr": 1.12916077128448e-06, "epoch": 3.581396837590423, "percentage": 71.63, "elapsed_time": "2:27:29", "remaining_time": "0:58:25", "throughput": 19895.24, "total_tokens": 176066560} +{"current_steps": 55950, "total_steps": 78105, "loss": 0.1688, "lr": 1.1286936278431986e-06, "epoch": 3.5817169195314, "percentage": 71.63, "elapsed_time": "2:27:30", "remaining_time": "0:58:24", "throughput": 19895.5, "total_tokens": 176082432} +{"current_steps": 55955, "total_steps": 78105, "loss": 0.161, "lr": 1.1282265528778167e-06, "epoch": 3.582037001472377, "percentage": 71.64, "elapsed_time": "2:27:31", "remaining_time": "0:58:23", "throughput": 19895.74, "total_tokens": 176097664} +{"current_steps": 55960, "total_steps": 78105, "loss": 0.1433, "lr": 1.1277595464116534e-06, "epoch": 3.5823570834133536, "percentage": 71.65, "elapsed_time": "2:27:31", "remaining_time": "0:58:22", "throughput": 19895.99, "total_tokens": 176113024} +{"current_steps": 55965, "total_steps": 78105, "loss": 0.1655, "lr": 1.1272926084680317e-06, "epoch": 3.5826771653543306, "percentage": 71.65, "elapsed_time": "2:27:32", "remaining_time": "0:58:22", "throughput": 19896.2, "total_tokens": 176128064} +{"current_steps": 55970, "total_steps": 78105, "loss": 0.216, "lr": 1.1268257390702667e-06, "epoch": 3.5829972472953076, "percentage": 71.66, "elapsed_time": "2:27:33", "remaining_time": "0:58:21", "throughput": 19896.5, "total_tokens": 176144512} +{"current_steps": 55975, "total_steps": 78105, "loss": 0.2733, "lr": 1.1263589382416717e-06, "epoch": 3.5833173292362845, "percentage": 71.67, "elapsed_time": "2:27:33", "remaining_time": "0:58:20", "throughput": 19896.75, "total_tokens": 176160128} +{"current_steps": 55980, "total_steps": 78105, "loss": 0.1822, "lr": 1.1258922060055563e-06, "epoch": 3.5836374111772615, "percentage": 71.67, "elapsed_time": "2:27:34", "remaining_time": "0:58:19", "throughput": 19897.0, "total_tokens": 176175616} +{"current_steps": 55985, "total_steps": 78105, "loss": 0.1332, "lr": 1.1254255423852267e-06, "epoch": 3.5839574931182385, "percentage": 71.68, "elapsed_time": "2:27:35", "remaining_time": "0:58:18", "throughput": 19897.26, "total_tokens": 176191616} +{"current_steps": 55990, "total_steps": 78105, "loss": 0.1587, "lr": 1.1249589474039857e-06, "epoch": 3.584277575059215, "percentage": 71.69, "elapsed_time": "2:27:35", "remaining_time": "0:58:17", "throughput": 19897.49, "total_tokens": 176206976} +{"current_steps": 55995, "total_steps": 78105, "loss": 0.1667, "lr": 1.1244924210851324e-06, "epoch": 3.584597657000192, "percentage": 71.69, "elapsed_time": "2:27:36", "remaining_time": "0:58:17", "throughput": 19897.8, "total_tokens": 176223552} +{"current_steps": 56000, "total_steps": 78105, "loss": 0.1975, "lr": 1.1240259634519622e-06, "epoch": 3.584917738941169, "percentage": 71.7, "elapsed_time": "2:27:37", "remaining_time": "0:58:16", "throughput": 19898.01, "total_tokens": 176238400} +{"current_steps": 56005, "total_steps": 78105, "loss": 0.1504, "lr": 1.1235595745277697e-06, "epoch": 3.5852378208821456, "percentage": 71.7, "elapsed_time": "2:27:37", "remaining_time": "0:58:15", "throughput": 19898.27, "total_tokens": 176254208} +{"current_steps": 56010, "total_steps": 78105, "loss": 0.1632, "lr": 1.1230932543358405e-06, "epoch": 3.5855579028231226, "percentage": 71.71, "elapsed_time": "2:27:38", "remaining_time": "0:58:14", "throughput": 19898.55, "total_tokens": 176270208} +{"current_steps": 56015, "total_steps": 78105, "loss": 0.1512, "lr": 1.1226270028994634e-06, "epoch": 3.5858779847640996, "percentage": 71.72, "elapsed_time": "2:27:39", "remaining_time": "0:58:13", "throughput": 19898.75, "total_tokens": 176284736} +{"current_steps": 56020, "total_steps": 78105, "loss": 0.1237, "lr": 1.1221608202419194e-06, "epoch": 3.5861980667050766, "percentage": 71.72, "elapsed_time": "2:27:39", "remaining_time": "0:58:12", "throughput": 19898.99, "total_tokens": 176299968} +{"current_steps": 56025, "total_steps": 78105, "loss": 0.175, "lr": 1.1216947063864857e-06, "epoch": 3.5865181486460536, "percentage": 71.73, "elapsed_time": "2:27:40", "remaining_time": "0:58:11", "throughput": 19899.31, "total_tokens": 176317056} +{"current_steps": 56030, "total_steps": 78105, "loss": 0.1639, "lr": 1.1212286613564413e-06, "epoch": 3.5868382305870306, "percentage": 71.74, "elapsed_time": "2:27:41", "remaining_time": "0:58:11", "throughput": 19899.55, "total_tokens": 176332288} +{"current_steps": 56035, "total_steps": 78105, "loss": 0.1594, "lr": 1.1207626851750535e-06, "epoch": 3.587158312528007, "percentage": 71.74, "elapsed_time": "2:27:41", "remaining_time": "0:58:10", "throughput": 19899.87, "total_tokens": 176349440} +{"current_steps": 56040, "total_steps": 78105, "loss": 0.2285, "lr": 1.1202967778655954e-06, "epoch": 3.587478394468984, "percentage": 71.75, "elapsed_time": "2:27:42", "remaining_time": "0:58:09", "throughput": 19900.08, "total_tokens": 176364672} +{"current_steps": 56045, "total_steps": 78105, "loss": 0.1599, "lr": 1.1198309394513277e-06, "epoch": 3.587798476409961, "percentage": 71.76, "elapsed_time": "2:27:43", "remaining_time": "0:58:08", "throughput": 19900.35, "total_tokens": 176380480} +{"current_steps": 56050, "total_steps": 78105, "loss": 0.1678, "lr": 1.1193651699555144e-06, "epoch": 3.5881185583509376, "percentage": 71.76, "elapsed_time": "2:27:43", "remaining_time": "0:58:07", "throughput": 19900.58, "total_tokens": 176395712} +{"current_steps": 56055, "total_steps": 78105, "loss": 0.18, "lr": 1.1188994694014132e-06, "epoch": 3.5884386402919146, "percentage": 71.77, "elapsed_time": "2:27:44", "remaining_time": "0:58:06", "throughput": 19900.79, "total_tokens": 176410880} +{"current_steps": 56060, "total_steps": 78105, "loss": 0.0902, "lr": 1.1184338378122785e-06, "epoch": 3.5887587222328916, "percentage": 71.78, "elapsed_time": "2:27:45", "remaining_time": "0:58:06", "throughput": 19901.05, "total_tokens": 176426496} +{"current_steps": 56065, "total_steps": 78105, "loss": 0.1495, "lr": 1.1179682752113622e-06, "epoch": 3.5890788041738686, "percentage": 71.78, "elapsed_time": "2:27:45", "remaining_time": "0:58:05", "throughput": 19901.27, "total_tokens": 176441472} +{"current_steps": 56070, "total_steps": 78105, "loss": 0.1449, "lr": 1.11750278162191e-06, "epoch": 3.5893988861148456, "percentage": 71.79, "elapsed_time": "2:27:46", "remaining_time": "0:58:04", "throughput": 19901.58, "total_tokens": 176458176} +{"current_steps": 56075, "total_steps": 78105, "loss": 0.2215, "lr": 1.1170373570671697e-06, "epoch": 3.589718968055822, "percentage": 71.79, "elapsed_time": "2:27:47", "remaining_time": "0:58:03", "throughput": 19901.83, "total_tokens": 176473856} +{"current_steps": 56080, "total_steps": 78105, "loss": 0.1884, "lr": 1.1165720015703784e-06, "epoch": 3.590039049996799, "percentage": 71.8, "elapsed_time": "2:27:47", "remaining_time": "0:58:02", "throughput": 19902.06, "total_tokens": 176489216} +{"current_steps": 56085, "total_steps": 78105, "loss": 0.1233, "lr": 1.1161067151547761e-06, "epoch": 3.590359131937776, "percentage": 71.81, "elapsed_time": "2:27:48", "remaining_time": "0:58:01", "throughput": 19902.27, "total_tokens": 176504192} +{"current_steps": 56090, "total_steps": 78105, "loss": 0.1553, "lr": 1.1156414978435961e-06, "epoch": 3.590679213878753, "percentage": 71.81, "elapsed_time": "2:27:49", "remaining_time": "0:58:01", "throughput": 19902.48, "total_tokens": 176518912} +{"current_steps": 56095, "total_steps": 78105, "loss": 0.1501, "lr": 1.115176349660069e-06, "epoch": 3.5909992958197297, "percentage": 71.82, "elapsed_time": "2:27:49", "remaining_time": "0:58:00", "throughput": 19902.78, "total_tokens": 176535744} +{"current_steps": 56100, "total_steps": 78105, "loss": 0.2381, "lr": 1.1147112706274216e-06, "epoch": 3.5913193777607066, "percentage": 71.83, "elapsed_time": "2:27:50", "remaining_time": "0:57:59", "throughput": 19903.01, "total_tokens": 176550784} +{"current_steps": 56105, "total_steps": 78105, "loss": 0.3078, "lr": 1.1142462607688776e-06, "epoch": 3.5916394597016836, "percentage": 71.83, "elapsed_time": "2:27:51", "remaining_time": "0:57:58", "throughput": 19903.26, "total_tokens": 176566464} +{"current_steps": 56110, "total_steps": 78105, "loss": 0.1107, "lr": 1.1137813201076564e-06, "epoch": 3.5919595416426606, "percentage": 71.84, "elapsed_time": "2:27:51", "remaining_time": "0:57:57", "throughput": 19903.54, "total_tokens": 176582592} +{"current_steps": 56115, "total_steps": 78105, "loss": 0.1347, "lr": 1.113316448666977e-06, "epoch": 3.5922796235836376, "percentage": 71.85, "elapsed_time": "2:27:52", "remaining_time": "0:57:56", "throughput": 19903.79, "total_tokens": 176598528} +{"current_steps": 56120, "total_steps": 78105, "loss": 0.1809, "lr": 1.1128516464700498e-06, "epoch": 3.592599705524614, "percentage": 71.85, "elapsed_time": "2:27:53", "remaining_time": "0:57:56", "throughput": 19904.1, "total_tokens": 176615552} +{"current_steps": 56125, "total_steps": 78105, "loss": 0.1755, "lr": 1.1123869135400866e-06, "epoch": 3.592919787465591, "percentage": 71.86, "elapsed_time": "2:27:53", "remaining_time": "0:57:55", "throughput": 19904.36, "total_tokens": 176631232} +{"current_steps": 56130, "total_steps": 78105, "loss": 0.238, "lr": 1.1119222499002931e-06, "epoch": 3.593239869406568, "percentage": 71.86, "elapsed_time": "2:27:54", "remaining_time": "0:57:54", "throughput": 19904.59, "total_tokens": 176646400} +{"current_steps": 56135, "total_steps": 78105, "loss": 0.2218, "lr": 1.1114576555738725e-06, "epoch": 3.593559951347545, "percentage": 71.87, "elapsed_time": "2:27:55", "remaining_time": "0:57:53", "throughput": 19904.88, "total_tokens": 176662528} +{"current_steps": 56140, "total_steps": 78105, "loss": 0.1394, "lr": 1.1109931305840238e-06, "epoch": 3.5938800332885217, "percentage": 71.88, "elapsed_time": "2:27:56", "remaining_time": "0:57:52", "throughput": 19905.15, "total_tokens": 176678720} +{"current_steps": 56145, "total_steps": 78105, "loss": 0.1446, "lr": 1.110528674953942e-06, "epoch": 3.5942001152294987, "percentage": 71.88, "elapsed_time": "2:27:56", "remaining_time": "0:57:51", "throughput": 19905.46, "total_tokens": 176695552} +{"current_steps": 56150, "total_steps": 78105, "loss": 0.1611, "lr": 1.1100642887068228e-06, "epoch": 3.5945201971704757, "percentage": 71.89, "elapsed_time": "2:27:57", "remaining_time": "0:57:51", "throughput": 19905.63, "total_tokens": 176709760} +{"current_steps": 56155, "total_steps": 78105, "loss": 0.1788, "lr": 1.1095999718658512e-06, "epoch": 3.5948402791114527, "percentage": 71.9, "elapsed_time": "2:27:58", "remaining_time": "0:57:50", "throughput": 19905.85, "total_tokens": 176724544} +{"current_steps": 56160, "total_steps": 78105, "loss": 0.1416, "lr": 1.1091357244542156e-06, "epoch": 3.5951603610524296, "percentage": 71.9, "elapsed_time": "2:27:58", "remaining_time": "0:57:49", "throughput": 19906.14, "total_tokens": 176740864} +{"current_steps": 56165, "total_steps": 78105, "loss": 0.1585, "lr": 1.108671546495097e-06, "epoch": 3.595480442993406, "percentage": 71.91, "elapsed_time": "2:27:59", "remaining_time": "0:57:48", "throughput": 19906.46, "total_tokens": 176757504} +{"current_steps": 56170, "total_steps": 78105, "loss": 0.2267, "lr": 1.1082074380116746e-06, "epoch": 3.595800524934383, "percentage": 71.92, "elapsed_time": "2:28:00", "remaining_time": "0:57:47", "throughput": 19906.75, "total_tokens": 176773888} +{"current_steps": 56175, "total_steps": 78105, "loss": 0.1493, "lr": 1.1077433990271228e-06, "epoch": 3.59612060687536, "percentage": 71.92, "elapsed_time": "2:28:00", "remaining_time": "0:57:46", "throughput": 19906.98, "total_tokens": 176789568} +{"current_steps": 56180, "total_steps": 78105, "loss": 0.1922, "lr": 1.1072794295646135e-06, "epoch": 3.5964406888163367, "percentage": 71.93, "elapsed_time": "2:28:01", "remaining_time": "0:57:46", "throughput": 19907.27, "total_tokens": 176805696} +{"current_steps": 56185, "total_steps": 78105, "loss": 0.1178, "lr": 1.1068155296473153e-06, "epoch": 3.5967607707573137, "percentage": 71.94, "elapsed_time": "2:28:02", "remaining_time": "0:57:45", "throughput": 19907.65, "total_tokens": 176824064} +{"current_steps": 56190, "total_steps": 78105, "loss": 0.1523, "lr": 1.1063516992983918e-06, "epoch": 3.5970808526982907, "percentage": 71.94, "elapsed_time": "2:28:02", "remaining_time": "0:57:44", "throughput": 19907.86, "total_tokens": 176839040} +{"current_steps": 56195, "total_steps": 78105, "loss": 0.1325, "lr": 1.1058879385410062e-06, "epoch": 3.5974009346392677, "percentage": 71.95, "elapsed_time": "2:28:03", "remaining_time": "0:57:43", "throughput": 19908.11, "total_tokens": 176854720} +{"current_steps": 56200, "total_steps": 78105, "loss": 0.2025, "lr": 1.1054242473983151e-06, "epoch": 3.5977210165802447, "percentage": 71.95, "elapsed_time": "2:28:04", "remaining_time": "0:57:42", "throughput": 19908.31, "total_tokens": 176869376} +{"current_steps": 56205, "total_steps": 78105, "loss": 0.0986, "lr": 1.104960625893473e-06, "epoch": 3.5980410985212217, "percentage": 71.96, "elapsed_time": "2:28:04", "remaining_time": "0:57:41", "throughput": 19908.63, "total_tokens": 176886784} +{"current_steps": 56210, "total_steps": 78105, "loss": 0.1185, "lr": 1.1044970740496308e-06, "epoch": 3.598361180462198, "percentage": 71.97, "elapsed_time": "2:28:05", "remaining_time": "0:57:41", "throughput": 19908.83, "total_tokens": 176901376} +{"current_steps": 56215, "total_steps": 78105, "loss": 0.148, "lr": 1.104033591889936e-06, "epoch": 3.598681262403175, "percentage": 71.97, "elapsed_time": "2:28:06", "remaining_time": "0:57:40", "throughput": 19909.07, "total_tokens": 176916928} +{"current_steps": 56220, "total_steps": 78105, "loss": 0.184, "lr": 1.1035701794375322e-06, "epoch": 3.599001344344152, "percentage": 71.98, "elapsed_time": "2:28:06", "remaining_time": "0:57:39", "throughput": 19909.32, "total_tokens": 176932288} +{"current_steps": 56225, "total_steps": 78105, "loss": 0.1429, "lr": 1.1031068367155598e-06, "epoch": 3.5993214262851287, "percentage": 71.99, "elapsed_time": "2:28:07", "remaining_time": "0:57:38", "throughput": 19909.62, "total_tokens": 176948800} +{"current_steps": 56230, "total_steps": 78105, "loss": 0.2124, "lr": 1.102643563747155e-06, "epoch": 3.5996415082261057, "percentage": 71.99, "elapsed_time": "2:28:08", "remaining_time": "0:57:37", "throughput": 19909.83, "total_tokens": 176963648} +{"current_steps": 56235, "total_steps": 78105, "loss": 0.1687, "lr": 1.1021803605554538e-06, "epoch": 3.5999615901670827, "percentage": 72.0, "elapsed_time": "2:28:08", "remaining_time": "0:57:36", "throughput": 19910.07, "total_tokens": 176979200} +{"current_steps": 56240, "total_steps": 78105, "loss": 0.1796, "lr": 1.1017172271635825e-06, "epoch": 3.6002816721080597, "percentage": 72.01, "elapsed_time": "2:28:09", "remaining_time": "0:57:36", "throughput": 19910.32, "total_tokens": 176994880} +{"current_steps": 56245, "total_steps": 78105, "loss": 0.1608, "lr": 1.1012541635946705e-06, "epoch": 3.6006017540490367, "percentage": 72.01, "elapsed_time": "2:28:10", "remaining_time": "0:57:35", "throughput": 19910.54, "total_tokens": 177009792} +{"current_steps": 56250, "total_steps": 78105, "loss": 0.1098, "lr": 1.10079116987184e-06, "epoch": 3.6009218359900137, "percentage": 72.02, "elapsed_time": "2:28:10", "remaining_time": "0:57:34", "throughput": 19910.77, "total_tokens": 177025024} +{"current_steps": 56255, "total_steps": 78105, "loss": 0.2529, "lr": 1.1003282460182091e-06, "epoch": 3.6012419179309902, "percentage": 72.02, "elapsed_time": "2:28:11", "remaining_time": "0:57:33", "throughput": 19910.98, "total_tokens": 177039872} +{"current_steps": 56260, "total_steps": 78105, "loss": 0.1617, "lr": 1.099865392056897e-06, "epoch": 3.6015619998719672, "percentage": 72.03, "elapsed_time": "2:28:12", "remaining_time": "0:57:32", "throughput": 19911.25, "total_tokens": 177055872} +{"current_steps": 56265, "total_steps": 78105, "loss": 0.204, "lr": 1.099402608011012e-06, "epoch": 3.6018820818129442, "percentage": 72.04, "elapsed_time": "2:28:12", "remaining_time": "0:57:31", "throughput": 19911.53, "total_tokens": 177072320} +{"current_steps": 56270, "total_steps": 78105, "loss": 0.1869, "lr": 1.0989398939036674e-06, "epoch": 3.6022021637539208, "percentage": 72.04, "elapsed_time": "2:28:13", "remaining_time": "0:57:31", "throughput": 19911.8, "total_tokens": 177088512} +{"current_steps": 56275, "total_steps": 78105, "loss": 0.2186, "lr": 1.0984772497579648e-06, "epoch": 3.6025222456948978, "percentage": 72.05, "elapsed_time": "2:28:14", "remaining_time": "0:57:30", "throughput": 19912.03, "total_tokens": 177103744} +{"current_steps": 56280, "total_steps": 78105, "loss": 0.1175, "lr": 1.0980146755970088e-06, "epoch": 3.6028423276358748, "percentage": 72.06, "elapsed_time": "2:28:14", "remaining_time": "0:57:29", "throughput": 19912.25, "total_tokens": 177118400} +{"current_steps": 56285, "total_steps": 78105, "loss": 0.1222, "lr": 1.0975521714438975e-06, "epoch": 3.6031624095768517, "percentage": 72.06, "elapsed_time": "2:28:15", "remaining_time": "0:57:28", "throughput": 19912.49, "total_tokens": 177133696} +{"current_steps": 56290, "total_steps": 78105, "loss": 0.1266, "lr": 1.0970897373217258e-06, "epoch": 3.6034824915178287, "percentage": 72.07, "elapsed_time": "2:28:16", "remaining_time": "0:57:27", "throughput": 19912.7, "total_tokens": 177148608} +{"current_steps": 56295, "total_steps": 78105, "loss": 0.2266, "lr": 1.0966273732535846e-06, "epoch": 3.6038025734588057, "percentage": 72.08, "elapsed_time": "2:28:16", "remaining_time": "0:57:26", "throughput": 19912.93, "total_tokens": 177163904} +{"current_steps": 56300, "total_steps": 78105, "loss": 0.21, "lr": 1.0961650792625618e-06, "epoch": 3.6041226553997823, "percentage": 72.08, "elapsed_time": "2:28:17", "remaining_time": "0:57:26", "throughput": 19913.16, "total_tokens": 177179008} +{"current_steps": 56305, "total_steps": 78105, "loss": 0.2096, "lr": 1.0957028553717448e-06, "epoch": 3.6044427373407593, "percentage": 72.09, "elapsed_time": "2:28:18", "remaining_time": "0:57:25", "throughput": 19913.44, "total_tokens": 177195584} +{"current_steps": 56310, "total_steps": 78105, "loss": 0.1885, "lr": 1.0952407016042103e-06, "epoch": 3.6047628192817363, "percentage": 72.1, "elapsed_time": "2:28:18", "remaining_time": "0:57:24", "throughput": 19913.7, "total_tokens": 177211200} +{"current_steps": 56315, "total_steps": 78105, "loss": 0.1208, "lr": 1.0947786179830388e-06, "epoch": 3.605082901222713, "percentage": 72.1, "elapsed_time": "2:28:19", "remaining_time": "0:57:23", "throughput": 19913.95, "total_tokens": 177226624} +{"current_steps": 56320, "total_steps": 78105, "loss": 0.1208, "lr": 1.0943166045313039e-06, "epoch": 3.60540298316369, "percentage": 72.11, "elapsed_time": "2:28:20", "remaining_time": "0:57:22", "throughput": 19914.19, "total_tokens": 177242240} +{"current_steps": 56325, "total_steps": 78105, "loss": 0.1427, "lr": 1.0938546612720756e-06, "epoch": 3.605723065104667, "percentage": 72.11, "elapsed_time": "2:28:20", "remaining_time": "0:57:21", "throughput": 19914.5, "total_tokens": 177258944} +{"current_steps": 56330, "total_steps": 78105, "loss": 0.1503, "lr": 1.0933927882284216e-06, "epoch": 3.6060431470456438, "percentage": 72.12, "elapsed_time": "2:28:21", "remaining_time": "0:57:21", "throughput": 19914.74, "total_tokens": 177274240} +{"current_steps": 56335, "total_steps": 78105, "loss": 0.1458, "lr": 1.0929309854234047e-06, "epoch": 3.6063632289866208, "percentage": 72.13, "elapsed_time": "2:28:22", "remaining_time": "0:57:20", "throughput": 19914.95, "total_tokens": 177289024} +{"current_steps": 56340, "total_steps": 78105, "loss": 0.1496, "lr": 1.0924692528800844e-06, "epoch": 3.6066833109275973, "percentage": 72.13, "elapsed_time": "2:28:22", "remaining_time": "0:57:19", "throughput": 19915.17, "total_tokens": 177303744} +{"current_steps": 56345, "total_steps": 78105, "loss": 0.1769, "lr": 1.0920075906215196e-06, "epoch": 3.6070033928685743, "percentage": 72.14, "elapsed_time": "2:28:23", "remaining_time": "0:57:18", "throughput": 19915.41, "total_tokens": 177319488} +{"current_steps": 56350, "total_steps": 78105, "loss": 0.1742, "lr": 1.0915459986707601e-06, "epoch": 3.6073234748095513, "percentage": 72.15, "elapsed_time": "2:28:24", "remaining_time": "0:57:17", "throughput": 19915.64, "total_tokens": 177334400} +{"current_steps": 56355, "total_steps": 78105, "loss": 0.2224, "lr": 1.0910844770508588e-06, "epoch": 3.6076435567505283, "percentage": 72.15, "elapsed_time": "2:28:24", "remaining_time": "0:57:16", "throughput": 19915.9, "total_tokens": 177350336} +{"current_steps": 56360, "total_steps": 78105, "loss": 0.1737, "lr": 1.0906230257848586e-06, "epoch": 3.607963638691505, "percentage": 72.16, "elapsed_time": "2:28:25", "remaining_time": "0:57:16", "throughput": 19916.18, "total_tokens": 177366784} +{"current_steps": 56365, "total_steps": 78105, "loss": 0.1851, "lr": 1.0901616448958036e-06, "epoch": 3.608283720632482, "percentage": 72.17, "elapsed_time": "2:28:26", "remaining_time": "0:57:15", "throughput": 19916.38, "total_tokens": 177381504} +{"current_steps": 56370, "total_steps": 78105, "loss": 0.23, "lr": 1.0897003344067328e-06, "epoch": 3.608603802573459, "percentage": 72.17, "elapsed_time": "2:28:26", "remaining_time": "0:57:14", "throughput": 19916.63, "total_tokens": 177396864} +{"current_steps": 56375, "total_steps": 78105, "loss": 0.124, "lr": 1.0892390943406803e-06, "epoch": 3.608923884514436, "percentage": 72.18, "elapsed_time": "2:28:27", "remaining_time": "0:57:13", "throughput": 19916.89, "total_tokens": 177412800} +{"current_steps": 56380, "total_steps": 78105, "loss": 0.1778, "lr": 1.0887779247206809e-06, "epoch": 3.609243966455413, "percentage": 72.18, "elapsed_time": "2:28:28", "remaining_time": "0:57:12", "throughput": 19917.15, "total_tokens": 177428736} +{"current_steps": 56385, "total_steps": 78105, "loss": 0.114, "lr": 1.0883168255697596e-06, "epoch": 3.6095640483963893, "percentage": 72.19, "elapsed_time": "2:28:29", "remaining_time": "0:57:11", "throughput": 19917.4, "total_tokens": 177444544} +{"current_steps": 56390, "total_steps": 78105, "loss": 0.1327, "lr": 1.0878557969109432e-06, "epoch": 3.6098841303373663, "percentage": 72.2, "elapsed_time": "2:28:29", "remaining_time": "0:57:10", "throughput": 19917.65, "total_tokens": 177460224} +{"current_steps": 56395, "total_steps": 78105, "loss": 0.1233, "lr": 1.0873948387672531e-06, "epoch": 3.6102042122783433, "percentage": 72.2, "elapsed_time": "2:28:30", "remaining_time": "0:57:10", "throughput": 19917.89, "total_tokens": 177475776} +{"current_steps": 56400, "total_steps": 78105, "loss": 0.1733, "lr": 1.0869339511617066e-06, "epoch": 3.6105242942193203, "percentage": 72.21, "elapsed_time": "2:28:31", "remaining_time": "0:57:09", "throughput": 19918.14, "total_tokens": 177491520} +{"current_steps": 56405, "total_steps": 78105, "loss": 0.1546, "lr": 1.0864731341173186e-06, "epoch": 3.610844376160297, "percentage": 72.22, "elapsed_time": "2:28:31", "remaining_time": "0:57:08", "throughput": 19918.34, "total_tokens": 177506048} +{"current_steps": 56410, "total_steps": 78105, "loss": 0.1158, "lr": 1.0860123876570991e-06, "epoch": 3.611164458101274, "percentage": 72.22, "elapsed_time": "2:28:32", "remaining_time": "0:57:07", "throughput": 19918.68, "total_tokens": 177523904} +{"current_steps": 56415, "total_steps": 78105, "loss": 0.162, "lr": 1.0855517118040556e-06, "epoch": 3.611484540042251, "percentage": 72.23, "elapsed_time": "2:28:33", "remaining_time": "0:57:06", "throughput": 19918.92, "total_tokens": 177539264} +{"current_steps": 56420, "total_steps": 78105, "loss": 0.255, "lr": 1.0850911065811914e-06, "epoch": 3.611804621983228, "percentage": 72.24, "elapsed_time": "2:28:33", "remaining_time": "0:57:06", "throughput": 19919.2, "total_tokens": 177555392} +{"current_steps": 56425, "total_steps": 78105, "loss": 0.1543, "lr": 1.0846305720115083e-06, "epoch": 3.612124703924205, "percentage": 72.24, "elapsed_time": "2:28:34", "remaining_time": "0:57:05", "throughput": 19919.44, "total_tokens": 177570816} +{"current_steps": 56430, "total_steps": 78105, "loss": 0.136, "lr": 1.0841701081180018e-06, "epoch": 3.6124447858651814, "percentage": 72.25, "elapsed_time": "2:28:35", "remaining_time": "0:57:04", "throughput": 19919.7, "total_tokens": 177587136} +{"current_steps": 56435, "total_steps": 78105, "loss": 0.2711, "lr": 1.0837097149236655e-06, "epoch": 3.6127648678061584, "percentage": 72.26, "elapsed_time": "2:28:35", "remaining_time": "0:57:03", "throughput": 19919.93, "total_tokens": 177602752} +{"current_steps": 56440, "total_steps": 78105, "loss": 0.2058, "lr": 1.0832493924514886e-06, "epoch": 3.6130849497471353, "percentage": 72.26, "elapsed_time": "2:28:36", "remaining_time": "0:57:02", "throughput": 19920.24, "total_tokens": 177619712} +{"current_steps": 56445, "total_steps": 78105, "loss": 0.183, "lr": 1.0827891407244578e-06, "epoch": 3.613405031688112, "percentage": 72.27, "elapsed_time": "2:28:37", "remaining_time": "0:57:01", "throughput": 19920.48, "total_tokens": 177635008} +{"current_steps": 56450, "total_steps": 78105, "loss": 0.1528, "lr": 1.082328959765555e-06, "epoch": 3.613725113629089, "percentage": 72.27, "elapsed_time": "2:28:37", "remaining_time": "0:57:01", "throughput": 19920.72, "total_tokens": 177650688} +{"current_steps": 56455, "total_steps": 78105, "loss": 0.1042, "lr": 1.08186884959776e-06, "epoch": 3.614045195570066, "percentage": 72.28, "elapsed_time": "2:28:38", "remaining_time": "0:57:00", "throughput": 19920.95, "total_tokens": 177665792} +{"current_steps": 56460, "total_steps": 78105, "loss": 0.14, "lr": 1.0814088102440464e-06, "epoch": 3.614365277511043, "percentage": 72.29, "elapsed_time": "2:28:39", "remaining_time": "0:56:59", "throughput": 19921.22, "total_tokens": 177681792} +{"current_steps": 56465, "total_steps": 78105, "loss": 0.1633, "lr": 1.08094884172739e-06, "epoch": 3.61468535945202, "percentage": 72.29, "elapsed_time": "2:28:39", "remaining_time": "0:56:58", "throughput": 19921.4, "total_tokens": 177696128} +{"current_steps": 56470, "total_steps": 78105, "loss": 0.1264, "lr": 1.0804889440707548e-06, "epoch": 3.615005441392997, "percentage": 72.3, "elapsed_time": "2:28:40", "remaining_time": "0:56:57", "throughput": 19921.7, "total_tokens": 177712960} +{"current_steps": 56475, "total_steps": 78105, "loss": 0.1892, "lr": 1.0800291172971085e-06, "epoch": 3.6153255233339734, "percentage": 72.31, "elapsed_time": "2:28:41", "remaining_time": "0:56:56", "throughput": 19921.92, "total_tokens": 177728128} +{"current_steps": 56480, "total_steps": 78105, "loss": 0.1293, "lr": 1.0795693614294122e-06, "epoch": 3.6156456052749504, "percentage": 72.31, "elapsed_time": "2:28:41", "remaining_time": "0:56:56", "throughput": 19922.21, "total_tokens": 177744832} +{"current_steps": 56485, "total_steps": 78105, "loss": 0.1196, "lr": 1.079109676490622e-06, "epoch": 3.6159656872159274, "percentage": 72.32, "elapsed_time": "2:28:42", "remaining_time": "0:56:55", "throughput": 19922.47, "total_tokens": 177760512} +{"current_steps": 56490, "total_steps": 78105, "loss": 0.136, "lr": 1.0786500625036952e-06, "epoch": 3.616285769156904, "percentage": 72.33, "elapsed_time": "2:28:43", "remaining_time": "0:56:54", "throughput": 19922.75, "total_tokens": 177776896} +{"current_steps": 56495, "total_steps": 78105, "loss": 0.2233, "lr": 1.0781905194915793e-06, "epoch": 3.616605851097881, "percentage": 72.33, "elapsed_time": "2:28:43", "remaining_time": "0:56:53", "throughput": 19922.98, "total_tokens": 177792256} +{"current_steps": 56500, "total_steps": 78105, "loss": 0.1567, "lr": 1.0777310474772242e-06, "epoch": 3.616925933038858, "percentage": 72.34, "elapsed_time": "2:28:44", "remaining_time": "0:56:52", "throughput": 19923.21, "total_tokens": 177807616} +{"current_steps": 56505, "total_steps": 78105, "loss": 0.1297, "lr": 1.0772716464835708e-06, "epoch": 3.617246014979835, "percentage": 72.34, "elapsed_time": "2:28:45", "remaining_time": "0:56:51", "throughput": 19923.44, "total_tokens": 177822848} +{"current_steps": 56510, "total_steps": 78105, "loss": 0.177, "lr": 1.0768123165335615e-06, "epoch": 3.617566096920812, "percentage": 72.35, "elapsed_time": "2:28:45", "remaining_time": "0:56:51", "throughput": 19923.69, "total_tokens": 177838656} +{"current_steps": 56515, "total_steps": 78105, "loss": 0.1451, "lr": 1.076353057650132e-06, "epoch": 3.617886178861789, "percentage": 72.36, "elapsed_time": "2:28:46", "remaining_time": "0:56:50", "throughput": 19923.99, "total_tokens": 177855744} +{"current_steps": 56520, "total_steps": 78105, "loss": 0.1705, "lr": 1.0758938698562152e-06, "epoch": 3.6182062608027654, "percentage": 72.36, "elapsed_time": "2:28:47", "remaining_time": "0:56:49", "throughput": 19924.22, "total_tokens": 177870720} +{"current_steps": 56525, "total_steps": 78105, "loss": 0.2157, "lr": 1.0754347531747406e-06, "epoch": 3.6185263427437424, "percentage": 72.37, "elapsed_time": "2:28:48", "remaining_time": "0:56:48", "throughput": 19924.44, "total_tokens": 177886144} +{"current_steps": 56530, "total_steps": 78105, "loss": 0.2933, "lr": 1.0749757076286343e-06, "epoch": 3.6188464246847194, "percentage": 72.38, "elapsed_time": "2:28:48", "remaining_time": "0:56:47", "throughput": 19924.71, "total_tokens": 177902208} +{"current_steps": 56535, "total_steps": 78105, "loss": 0.1728, "lr": 1.0745167332408175e-06, "epoch": 3.619166506625696, "percentage": 72.38, "elapsed_time": "2:28:49", "remaining_time": "0:56:46", "throughput": 19924.96, "total_tokens": 177918272} +{"current_steps": 56540, "total_steps": 78105, "loss": 0.0814, "lr": 1.0740578300342107e-06, "epoch": 3.619486588566673, "percentage": 72.39, "elapsed_time": "2:28:50", "remaining_time": "0:56:46", "throughput": 19925.25, "total_tokens": 177934592} +{"current_steps": 56545, "total_steps": 78105, "loss": 0.1128, "lr": 1.0735989980317287e-06, "epoch": 3.61980667050765, "percentage": 72.4, "elapsed_time": "2:28:50", "remaining_time": "0:56:45", "throughput": 19925.5, "total_tokens": 177950528} +{"current_steps": 56550, "total_steps": 78105, "loss": 0.1652, "lr": 1.0731402372562826e-06, "epoch": 3.620126752448627, "percentage": 72.4, "elapsed_time": "2:28:51", "remaining_time": "0:56:44", "throughput": 19925.74, "total_tokens": 177965696} +{"current_steps": 56555, "total_steps": 78105, "loss": 0.1763, "lr": 1.072681547730781e-06, "epoch": 3.620446834389604, "percentage": 72.41, "elapsed_time": "2:28:52", "remaining_time": "0:56:43", "throughput": 19925.98, "total_tokens": 177981056} +{"current_steps": 56560, "total_steps": 78105, "loss": 0.1637, "lr": 1.0722229294781284e-06, "epoch": 3.620766916330581, "percentage": 72.42, "elapsed_time": "2:28:52", "remaining_time": "0:56:42", "throughput": 19926.27, "total_tokens": 177997696} +{"current_steps": 56565, "total_steps": 78105, "loss": 0.1245, "lr": 1.0717643825212253e-06, "epoch": 3.6210869982715574, "percentage": 72.42, "elapsed_time": "2:28:53", "remaining_time": "0:56:41", "throughput": 19926.51, "total_tokens": 178013760} +{"current_steps": 56570, "total_steps": 78105, "loss": 0.1651, "lr": 1.071305906882969e-06, "epoch": 3.6214070802125344, "percentage": 72.43, "elapsed_time": "2:28:54", "remaining_time": "0:56:41", "throughput": 19926.78, "total_tokens": 178029952} +{"current_steps": 56575, "total_steps": 78105, "loss": 0.1644, "lr": 1.0708475025862558e-06, "epoch": 3.6217271621535114, "percentage": 72.43, "elapsed_time": "2:28:54", "remaining_time": "0:56:40", "throughput": 19927.09, "total_tokens": 178046976} +{"current_steps": 56580, "total_steps": 78105, "loss": 0.0867, "lr": 1.070389169653972e-06, "epoch": 3.622047244094488, "percentage": 72.44, "elapsed_time": "2:28:55", "remaining_time": "0:56:39", "throughput": 19927.36, "total_tokens": 178063040} +{"current_steps": 56585, "total_steps": 78105, "loss": 0.1882, "lr": 1.0699309081090085e-06, "epoch": 3.622367326035465, "percentage": 72.45, "elapsed_time": "2:28:56", "remaining_time": "0:56:38", "throughput": 19927.62, "total_tokens": 178078912} +{"current_steps": 56590, "total_steps": 78105, "loss": 0.1782, "lr": 1.0694727179742445e-06, "epoch": 3.622687407976442, "percentage": 72.45, "elapsed_time": "2:28:56", "remaining_time": "0:56:37", "throughput": 19927.84, "total_tokens": 178093888} +{"current_steps": 56595, "total_steps": 78105, "loss": 0.1866, "lr": 1.0690145992725625e-06, "epoch": 3.623007489917419, "percentage": 72.46, "elapsed_time": "2:28:57", "remaining_time": "0:56:36", "throughput": 19928.05, "total_tokens": 178108992} +{"current_steps": 56600, "total_steps": 78105, "loss": 0.2354, "lr": 1.068556552026838e-06, "epoch": 3.623327571858396, "percentage": 72.47, "elapsed_time": "2:28:58", "remaining_time": "0:56:36", "throughput": 19928.27, "total_tokens": 178124352} +{"current_steps": 56605, "total_steps": 78105, "loss": 0.1695, "lr": 1.0680985762599418e-06, "epoch": 3.6236476537993725, "percentage": 72.47, "elapsed_time": "2:28:58", "remaining_time": "0:56:35", "throughput": 19928.58, "total_tokens": 178141568} +{"current_steps": 56610, "total_steps": 78105, "loss": 0.2273, "lr": 1.0676406719947466e-06, "epoch": 3.6239677357403495, "percentage": 72.48, "elapsed_time": "2:28:59", "remaining_time": "0:56:34", "throughput": 19928.9, "total_tokens": 178158720} +{"current_steps": 56615, "total_steps": 78105, "loss": 0.1764, "lr": 1.067182839254113e-06, "epoch": 3.6242878176813265, "percentage": 72.49, "elapsed_time": "2:29:00", "remaining_time": "0:56:33", "throughput": 19929.2, "total_tokens": 178175424} +{"current_steps": 56620, "total_steps": 78105, "loss": 0.2011, "lr": 1.0667250780609065e-06, "epoch": 3.6246078996223035, "percentage": 72.49, "elapsed_time": "2:29:01", "remaining_time": "0:56:32", "throughput": 19929.5, "total_tokens": 178192000} +{"current_steps": 56625, "total_steps": 78105, "loss": 0.2361, "lr": 1.0662673884379837e-06, "epoch": 3.62492798156328, "percentage": 72.5, "elapsed_time": "2:29:01", "remaining_time": "0:56:31", "throughput": 19929.71, "total_tokens": 178206528} +{"current_steps": 56630, "total_steps": 78105, "loss": 0.2, "lr": 1.0658097704081993e-06, "epoch": 3.625248063504257, "percentage": 72.5, "elapsed_time": "2:29:02", "remaining_time": "0:56:31", "throughput": 19929.97, "total_tokens": 178222528} +{"current_steps": 56635, "total_steps": 78105, "loss": 0.114, "lr": 1.0653522239944048e-06, "epoch": 3.625568145445234, "percentage": 72.51, "elapsed_time": "2:29:03", "remaining_time": "0:56:30", "throughput": 19930.2, "total_tokens": 178237696} +{"current_steps": 56640, "total_steps": 78105, "loss": 0.1414, "lr": 1.0648947492194473e-06, "epoch": 3.625888227386211, "percentage": 72.52, "elapsed_time": "2:29:03", "remaining_time": "0:56:29", "throughput": 19930.45, "total_tokens": 178253376} +{"current_steps": 56645, "total_steps": 78105, "loss": 0.138, "lr": 1.064437346106171e-06, "epoch": 3.626208309327188, "percentage": 72.52, "elapsed_time": "2:29:04", "remaining_time": "0:56:28", "throughput": 19930.92, "total_tokens": 178274368} +{"current_steps": 56650, "total_steps": 78105, "loss": 0.1887, "lr": 1.063980014677415e-06, "epoch": 3.6265283912681645, "percentage": 72.53, "elapsed_time": "2:29:05", "remaining_time": "0:56:27", "throughput": 19931.15, "total_tokens": 178289664} +{"current_steps": 56655, "total_steps": 78105, "loss": 0.1833, "lr": 1.0635227549560182e-06, "epoch": 3.6268484732091415, "percentage": 72.54, "elapsed_time": "2:29:05", "remaining_time": "0:56:27", "throughput": 19931.47, "total_tokens": 178306816} +{"current_steps": 56660, "total_steps": 78105, "loss": 0.1544, "lr": 1.0630655669648123e-06, "epoch": 3.6271685551501185, "percentage": 72.54, "elapsed_time": "2:29:06", "remaining_time": "0:56:26", "throughput": 19931.71, "total_tokens": 178322688} +{"current_steps": 56665, "total_steps": 78105, "loss": 0.2436, "lr": 1.0626084507266276e-06, "epoch": 3.6274886370910955, "percentage": 72.55, "elapsed_time": "2:29:07", "remaining_time": "0:56:25", "throughput": 19931.98, "total_tokens": 178338752} +{"current_steps": 56670, "total_steps": 78105, "loss": 0.1756, "lr": 1.0621514062642898e-06, "epoch": 3.627808719032072, "percentage": 72.56, "elapsed_time": "2:29:08", "remaining_time": "0:56:24", "throughput": 19932.24, "total_tokens": 178354496} +{"current_steps": 56675, "total_steps": 78105, "loss": 0.1773, "lr": 1.061694433600621e-06, "epoch": 3.628128800973049, "percentage": 72.56, "elapsed_time": "2:29:08", "remaining_time": "0:56:23", "throughput": 19932.46, "total_tokens": 178369984} +{"current_steps": 56680, "total_steps": 78105, "loss": 0.197, "lr": 1.0612375327584406e-06, "epoch": 3.628448882914026, "percentage": 72.57, "elapsed_time": "2:29:09", "remaining_time": "0:56:22", "throughput": 19932.8, "total_tokens": 178387392} +{"current_steps": 56685, "total_steps": 78105, "loss": 0.1712, "lr": 1.0607807037605637e-06, "epoch": 3.628768964855003, "percentage": 72.58, "elapsed_time": "2:29:10", "remaining_time": "0:56:22", "throughput": 19933.04, "total_tokens": 178403072} +{"current_steps": 56690, "total_steps": 78105, "loss": 0.1992, "lr": 1.0603239466298004e-06, "epoch": 3.62908904679598, "percentage": 72.58, "elapsed_time": "2:29:10", "remaining_time": "0:56:21", "throughput": 19933.27, "total_tokens": 178418240} +{"current_steps": 56695, "total_steps": 78105, "loss": 0.1193, "lr": 1.0598672613889623e-06, "epoch": 3.6294091287369565, "percentage": 72.59, "elapsed_time": "2:29:11", "remaining_time": "0:56:20", "throughput": 19933.48, "total_tokens": 178433280} +{"current_steps": 56700, "total_steps": 78105, "loss": 0.1284, "lr": 1.05941064806085e-06, "epoch": 3.6297292106779335, "percentage": 72.59, "elapsed_time": "2:29:12", "remaining_time": "0:56:19", "throughput": 19933.7, "total_tokens": 178448512} +{"current_steps": 56705, "total_steps": 78105, "loss": 0.2032, "lr": 1.0589541066682674e-06, "epoch": 3.6300492926189105, "percentage": 72.6, "elapsed_time": "2:29:12", "remaining_time": "0:56:18", "throughput": 19933.96, "total_tokens": 178464320} +{"current_steps": 56710, "total_steps": 78105, "loss": 0.1425, "lr": 1.05849763723401e-06, "epoch": 3.630369374559887, "percentage": 72.61, "elapsed_time": "2:29:13", "remaining_time": "0:56:17", "throughput": 19934.18, "total_tokens": 178479488} +{"current_steps": 56715, "total_steps": 78105, "loss": 0.1901, "lr": 1.0580412397808715e-06, "epoch": 3.630689456500864, "percentage": 72.61, "elapsed_time": "2:29:14", "remaining_time": "0:56:17", "throughput": 19934.35, "total_tokens": 178493632} +{"current_steps": 56720, "total_steps": 78105, "loss": 0.176, "lr": 1.0575849143316445e-06, "epoch": 3.631009538441841, "percentage": 72.62, "elapsed_time": "2:29:14", "remaining_time": "0:56:16", "throughput": 19934.55, "total_tokens": 178508608} +{"current_steps": 56725, "total_steps": 78105, "loss": 0.1989, "lr": 1.0571286609091116e-06, "epoch": 3.631329620382818, "percentage": 72.63, "elapsed_time": "2:29:15", "remaining_time": "0:56:15", "throughput": 19934.76, "total_tokens": 178523392} +{"current_steps": 56730, "total_steps": 78105, "loss": 0.2151, "lr": 1.0566724795360594e-06, "epoch": 3.631649702323795, "percentage": 72.63, "elapsed_time": "2:29:16", "remaining_time": "0:56:14", "throughput": 19935.05, "total_tokens": 178540096} +{"current_steps": 56735, "total_steps": 78105, "loss": 0.203, "lr": 1.056216370235264e-06, "epoch": 3.631969784264772, "percentage": 72.64, "elapsed_time": "2:29:16", "remaining_time": "0:56:13", "throughput": 19935.3, "total_tokens": 178555904} +{"current_steps": 56740, "total_steps": 78105, "loss": 0.2156, "lr": 1.0557603330295035e-06, "epoch": 3.6322898662057486, "percentage": 72.65, "elapsed_time": "2:29:17", "remaining_time": "0:56:12", "throughput": 19935.53, "total_tokens": 178571200} +{"current_steps": 56745, "total_steps": 78105, "loss": 0.2445, "lr": 1.0553043679415493e-06, "epoch": 3.6326099481467256, "percentage": 72.65, "elapsed_time": "2:29:18", "remaining_time": "0:56:12", "throughput": 19935.91, "total_tokens": 178589696} +{"current_steps": 56750, "total_steps": 78105, "loss": 0.1446, "lr": 1.05484847499417e-06, "epoch": 3.6329300300877025, "percentage": 72.66, "elapsed_time": "2:29:18", "remaining_time": "0:56:11", "throughput": 19936.17, "total_tokens": 178605376} +{"current_steps": 56755, "total_steps": 78105, "loss": 0.1405, "lr": 1.0543926542101299e-06, "epoch": 3.633250112028679, "percentage": 72.67, "elapsed_time": "2:29:19", "remaining_time": "0:56:10", "throughput": 19936.42, "total_tokens": 178621440} +{"current_steps": 56760, "total_steps": 78105, "loss": 0.1387, "lr": 1.0539369056121909e-06, "epoch": 3.633570193969656, "percentage": 72.67, "elapsed_time": "2:29:20", "remaining_time": "0:56:09", "throughput": 19936.66, "total_tokens": 178636864} +{"current_steps": 56765, "total_steps": 78105, "loss": 0.1275, "lr": 1.0534812292231097e-06, "epoch": 3.633890275910633, "percentage": 72.68, "elapsed_time": "2:29:20", "remaining_time": "0:56:08", "throughput": 19936.86, "total_tokens": 178651328} +{"current_steps": 56770, "total_steps": 78105, "loss": 0.1511, "lr": 1.053025625065642e-06, "epoch": 3.63421035785161, "percentage": 72.68, "elapsed_time": "2:29:21", "remaining_time": "0:56:07", "throughput": 19937.11, "total_tokens": 178666624} +{"current_steps": 56775, "total_steps": 78105, "loss": 0.1657, "lr": 1.052570093162538e-06, "epoch": 3.634530439792587, "percentage": 72.69, "elapsed_time": "2:29:22", "remaining_time": "0:56:07", "throughput": 19937.32, "total_tokens": 178681536} +{"current_steps": 56780, "total_steps": 78105, "loss": 0.1717, "lr": 1.0521146335365438e-06, "epoch": 3.634850521733564, "percentage": 72.7, "elapsed_time": "2:29:22", "remaining_time": "0:56:06", "throughput": 19937.53, "total_tokens": 178696512} +{"current_steps": 56785, "total_steps": 78105, "loss": 0.1864, "lr": 1.051659246210403e-06, "epoch": 3.6351706036745406, "percentage": 72.7, "elapsed_time": "2:29:23", "remaining_time": "0:56:05", "throughput": 19937.75, "total_tokens": 178711680} +{"current_steps": 56790, "total_steps": 78105, "loss": 0.1241, "lr": 1.0512039312068556e-06, "epoch": 3.6354906856155176, "percentage": 72.71, "elapsed_time": "2:29:24", "remaining_time": "0:56:04", "throughput": 19938.06, "total_tokens": 178728832} +{"current_steps": 56795, "total_steps": 78105, "loss": 0.2808, "lr": 1.0507486885486374e-06, "epoch": 3.6358107675564946, "percentage": 72.72, "elapsed_time": "2:29:24", "remaining_time": "0:56:03", "throughput": 19938.29, "total_tokens": 178743872} +{"current_steps": 56800, "total_steps": 78105, "loss": 0.1577, "lr": 1.0502935182584794e-06, "epoch": 3.636130849497471, "percentage": 72.72, "elapsed_time": "2:29:25", "remaining_time": "0:56:02", "throughput": 19938.57, "total_tokens": 178760192} +{"current_steps": 56805, "total_steps": 78105, "loss": 0.0967, "lr": 1.0498384203591141e-06, "epoch": 3.636450931438448, "percentage": 72.73, "elapsed_time": "2:29:26", "remaining_time": "0:56:02", "throughput": 19938.77, "total_tokens": 178774784} +{"current_steps": 56810, "total_steps": 78105, "loss": 0.1555, "lr": 1.0493833948732626e-06, "epoch": 3.636771013379425, "percentage": 72.74, "elapsed_time": "2:29:26", "remaining_time": "0:56:01", "throughput": 19939.0, "total_tokens": 178790208} +{"current_steps": 56815, "total_steps": 78105, "loss": 0.1846, "lr": 1.0489284418236504e-06, "epoch": 3.637091095320402, "percentage": 72.74, "elapsed_time": "2:29:27", "remaining_time": "0:56:00", "throughput": 19939.26, "total_tokens": 178806016} +{"current_steps": 56820, "total_steps": 78105, "loss": 0.1632, "lr": 1.0484735612329914e-06, "epoch": 3.637411177261379, "percentage": 72.75, "elapsed_time": "2:29:28", "remaining_time": "0:55:59", "throughput": 19939.52, "total_tokens": 178821760} +{"current_steps": 56825, "total_steps": 78105, "loss": 0.1746, "lr": 1.0480187531240028e-06, "epoch": 3.637731259202356, "percentage": 72.75, "elapsed_time": "2:29:28", "remaining_time": "0:55:58", "throughput": 19939.72, "total_tokens": 178836160} +{"current_steps": 56830, "total_steps": 78105, "loss": 0.2226, "lr": 1.0475640175193946e-06, "epoch": 3.6380513411433326, "percentage": 72.76, "elapsed_time": "2:29:29", "remaining_time": "0:55:57", "throughput": 19939.93, "total_tokens": 178850880} +{"current_steps": 56835, "total_steps": 78105, "loss": 0.1764, "lr": 1.047109354441873e-06, "epoch": 3.6383714230843096, "percentage": 72.77, "elapsed_time": "2:29:30", "remaining_time": "0:55:57", "throughput": 19940.18, "total_tokens": 178866816} +{"current_steps": 56840, "total_steps": 78105, "loss": 0.1911, "lr": 1.0466547639141444e-06, "epoch": 3.6386915050252866, "percentage": 72.77, "elapsed_time": "2:29:30", "remaining_time": "0:55:56", "throughput": 19940.44, "total_tokens": 178882752} +{"current_steps": 56845, "total_steps": 78105, "loss": 0.1797, "lr": 1.046200245958905e-06, "epoch": 3.639011586966263, "percentage": 72.78, "elapsed_time": "2:29:31", "remaining_time": "0:55:55", "throughput": 19940.66, "total_tokens": 178897664} +{"current_steps": 56850, "total_steps": 78105, "loss": 0.1386, "lr": 1.0457458005988533e-06, "epoch": 3.63933166890724, "percentage": 72.79, "elapsed_time": "2:29:32", "remaining_time": "0:55:54", "throughput": 19940.99, "total_tokens": 178915072} +{"current_steps": 56855, "total_steps": 78105, "loss": 0.1424, "lr": 1.0452914278566816e-06, "epoch": 3.639651750848217, "percentage": 72.79, "elapsed_time": "2:29:32", "remaining_time": "0:55:53", "throughput": 19941.22, "total_tokens": 178930368} +{"current_steps": 56860, "total_steps": 78105, "loss": 0.1593, "lr": 1.0448371277550787e-06, "epoch": 3.639971832789194, "percentage": 72.8, "elapsed_time": "2:29:33", "remaining_time": "0:55:52", "throughput": 19941.46, "total_tokens": 178945984} +{"current_steps": 56865, "total_steps": 78105, "loss": 0.1889, "lr": 1.0443829003167299e-06, "epoch": 3.640291914730171, "percentage": 72.81, "elapsed_time": "2:29:34", "remaining_time": "0:55:52", "throughput": 19941.89, "total_tokens": 178965632} +{"current_steps": 56870, "total_steps": 78105, "loss": 0.199, "lr": 1.043928745564317e-06, "epoch": 3.6406119966711477, "percentage": 72.81, "elapsed_time": "2:29:35", "remaining_time": "0:55:51", "throughput": 19942.11, "total_tokens": 178980864} +{"current_steps": 56875, "total_steps": 78105, "loss": 0.1782, "lr": 1.0434746635205182e-06, "epoch": 3.6409320786121246, "percentage": 72.82, "elapsed_time": "2:29:35", "remaining_time": "0:55:50", "throughput": 19942.37, "total_tokens": 178996864} +{"current_steps": 56880, "total_steps": 78105, "loss": 0.0972, "lr": 1.043020654208008e-06, "epoch": 3.6412521605531016, "percentage": 72.83, "elapsed_time": "2:29:36", "remaining_time": "0:55:49", "throughput": 19942.62, "total_tokens": 179012288} +{"current_steps": 56885, "total_steps": 78105, "loss": 0.1872, "lr": 1.042566717649456e-06, "epoch": 3.6415722424940786, "percentage": 72.83, "elapsed_time": "2:29:37", "remaining_time": "0:55:48", "throughput": 19942.88, "total_tokens": 179028224} +{"current_steps": 56890, "total_steps": 78105, "loss": 0.1648, "lr": 1.0421128538675319e-06, "epoch": 3.641892324435055, "percentage": 72.84, "elapsed_time": "2:29:37", "remaining_time": "0:55:47", "throughput": 19943.17, "total_tokens": 179044992} +{"current_steps": 56895, "total_steps": 78105, "loss": 0.1396, "lr": 1.0416590628848977e-06, "epoch": 3.642212406376032, "percentage": 72.84, "elapsed_time": "2:29:38", "remaining_time": "0:55:47", "throughput": 19943.38, "total_tokens": 179060032} +{"current_steps": 56900, "total_steps": 78105, "loss": 0.2087, "lr": 1.041205344724214e-06, "epoch": 3.642532488317009, "percentage": 72.85, "elapsed_time": "2:29:39", "remaining_time": "0:55:46", "throughput": 19943.65, "total_tokens": 179076160} +{"current_steps": 56905, "total_steps": 78105, "loss": 0.2219, "lr": 1.0407516994081365e-06, "epoch": 3.642852570257986, "percentage": 72.86, "elapsed_time": "2:29:39", "remaining_time": "0:55:45", "throughput": 19943.9, "total_tokens": 179091584} +{"current_steps": 56910, "total_steps": 78105, "loss": 0.1018, "lr": 1.0402981269593184e-06, "epoch": 3.643172652198963, "percentage": 72.86, "elapsed_time": "2:29:40", "remaining_time": "0:55:44", "throughput": 19944.17, "total_tokens": 179107520} +{"current_steps": 56915, "total_steps": 78105, "loss": 0.1515, "lr": 1.0398446274004083e-06, "epoch": 3.6434927341399397, "percentage": 72.87, "elapsed_time": "2:29:41", "remaining_time": "0:55:43", "throughput": 19944.42, "total_tokens": 179123712} +{"current_steps": 56920, "total_steps": 78105, "loss": 0.1506, "lr": 1.0393912007540514e-06, "epoch": 3.6438128160809167, "percentage": 72.88, "elapsed_time": "2:29:41", "remaining_time": "0:55:42", "throughput": 19944.63, "total_tokens": 179138688} +{"current_steps": 56925, "total_steps": 78105, "loss": 0.1346, "lr": 1.0389378470428913e-06, "epoch": 3.6441328980218937, "percentage": 72.88, "elapsed_time": "2:29:42", "remaining_time": "0:55:42", "throughput": 19944.86, "total_tokens": 179154368} +{"current_steps": 56930, "total_steps": 78105, "loss": 0.1626, "lr": 1.0384845662895629e-06, "epoch": 3.6444529799628707, "percentage": 72.89, "elapsed_time": "2:29:43", "remaining_time": "0:55:41", "throughput": 19945.09, "total_tokens": 179169728} +{"current_steps": 56935, "total_steps": 78105, "loss": 0.1644, "lr": 1.0380313585167037e-06, "epoch": 3.644773061903847, "percentage": 72.9, "elapsed_time": "2:29:43", "remaining_time": "0:55:40", "throughput": 19945.34, "total_tokens": 179185536} +{"current_steps": 56940, "total_steps": 78105, "loss": 0.184, "lr": 1.0375782237469432e-06, "epoch": 3.645093143844824, "percentage": 72.9, "elapsed_time": "2:29:44", "remaining_time": "0:55:39", "throughput": 19945.63, "total_tokens": 179202048} +{"current_steps": 56945, "total_steps": 78105, "loss": 0.1639, "lr": 1.0371251620029076e-06, "epoch": 3.645413225785801, "percentage": 72.91, "elapsed_time": "2:29:45", "remaining_time": "0:55:38", "throughput": 19945.87, "total_tokens": 179217920} +{"current_steps": 56950, "total_steps": 78105, "loss": 0.2135, "lr": 1.0366721733072239e-06, "epoch": 3.645733307726778, "percentage": 72.91, "elapsed_time": "2:29:45", "remaining_time": "0:55:37", "throughput": 19946.19, "total_tokens": 179234752} +{"current_steps": 56955, "total_steps": 78105, "loss": 0.1203, "lr": 1.0362192576825072e-06, "epoch": 3.646053389667755, "percentage": 72.92, "elapsed_time": "2:29:46", "remaining_time": "0:55:37", "throughput": 19946.48, "total_tokens": 179251264} +{"current_steps": 56960, "total_steps": 78105, "loss": 0.1495, "lr": 1.0357664151513785e-06, "epoch": 3.6463734716087317, "percentage": 72.93, "elapsed_time": "2:29:47", "remaining_time": "0:55:36", "throughput": 19946.76, "total_tokens": 179267712} +{"current_steps": 56965, "total_steps": 78105, "loss": 0.1079, "lr": 1.035313645736446e-06, "epoch": 3.6466935535497087, "percentage": 72.93, "elapsed_time": "2:29:47", "remaining_time": "0:55:35", "throughput": 19947.0, "total_tokens": 179283072} +{"current_steps": 56970, "total_steps": 78105, "loss": 0.1446, "lr": 1.0348609494603221e-06, "epoch": 3.6470136354906857, "percentage": 72.94, "elapsed_time": "2:29:48", "remaining_time": "0:55:34", "throughput": 19947.23, "total_tokens": 179298560} +{"current_steps": 56975, "total_steps": 78105, "loss": 0.203, "lr": 1.0344083263456103e-06, "epoch": 3.6473337174316622, "percentage": 72.95, "elapsed_time": "2:29:49", "remaining_time": "0:55:33", "throughput": 19947.43, "total_tokens": 179313152} +{"current_steps": 56980, "total_steps": 78105, "loss": 0.1554, "lr": 1.033955776414913e-06, "epoch": 3.6476537993726392, "percentage": 72.95, "elapsed_time": "2:29:49", "remaining_time": "0:55:32", "throughput": 19947.7, "total_tokens": 179329024} +{"current_steps": 56985, "total_steps": 78105, "loss": 0.1955, "lr": 1.0335032996908275e-06, "epoch": 3.647973881313616, "percentage": 72.96, "elapsed_time": "2:29:50", "remaining_time": "0:55:32", "throughput": 19947.95, "total_tokens": 179344640} +{"current_steps": 56990, "total_steps": 78105, "loss": 0.1811, "lr": 1.033050896195949e-06, "epoch": 3.648293963254593, "percentage": 72.97, "elapsed_time": "2:29:51", "remaining_time": "0:55:31", "throughput": 19948.24, "total_tokens": 179361216} +{"current_steps": 56995, "total_steps": 78105, "loss": 0.1391, "lr": 1.0325985659528664e-06, "epoch": 3.64861404519557, "percentage": 72.97, "elapsed_time": "2:29:51", "remaining_time": "0:55:30", "throughput": 19948.46, "total_tokens": 179376128} +{"current_steps": 57000, "total_steps": 78105, "loss": 0.1746, "lr": 1.0321463089841692e-06, "epoch": 3.648934127136547, "percentage": 72.98, "elapsed_time": "2:29:52", "remaining_time": "0:55:29", "throughput": 19948.72, "total_tokens": 179392064} +{"current_steps": 57005, "total_steps": 78105, "loss": 0.1963, "lr": 1.0316941253124393e-06, "epoch": 3.6492542090775237, "percentage": 72.99, "elapsed_time": "2:29:53", "remaining_time": "0:55:28", "throughput": 19948.96, "total_tokens": 179407616} +{"current_steps": 57010, "total_steps": 78105, "loss": 0.1602, "lr": 1.0312420149602568e-06, "epoch": 3.6495742910185007, "percentage": 72.99, "elapsed_time": "2:29:54", "remaining_time": "0:55:27", "throughput": 19949.23, "total_tokens": 179423872} +{"current_steps": 57015, "total_steps": 78105, "loss": 0.1398, "lr": 1.0307899779501976e-06, "epoch": 3.6498943729594777, "percentage": 73.0, "elapsed_time": "2:29:54", "remaining_time": "0:55:27", "throughput": 19949.43, "total_tokens": 179438656} +{"current_steps": 57020, "total_steps": 78105, "loss": 0.1756, "lr": 1.030338014304834e-06, "epoch": 3.6502144549004543, "percentage": 73.0, "elapsed_time": "2:29:55", "remaining_time": "0:55:26", "throughput": 19949.64, "total_tokens": 179453184} +{"current_steps": 57025, "total_steps": 78105, "loss": 0.1713, "lr": 1.0298861240467348e-06, "epoch": 3.6505345368414313, "percentage": 73.01, "elapsed_time": "2:29:55", "remaining_time": "0:55:25", "throughput": 19949.9, "total_tokens": 179469120} +{"current_steps": 57030, "total_steps": 78105, "loss": 0.1347, "lr": 1.0294343071984641e-06, "epoch": 3.6508546187824082, "percentage": 73.02, "elapsed_time": "2:29:56", "remaining_time": "0:55:24", "throughput": 19950.21, "total_tokens": 179485952} +{"current_steps": 57035, "total_steps": 78105, "loss": 0.1821, "lr": 1.028982563782586e-06, "epoch": 3.6511747007233852, "percentage": 73.02, "elapsed_time": "2:29:57", "remaining_time": "0:55:23", "throughput": 19950.45, "total_tokens": 179501696} +{"current_steps": 57040, "total_steps": 78105, "loss": 0.1825, "lr": 1.0285308938216545e-06, "epoch": 3.6514947826643622, "percentage": 73.03, "elapsed_time": "2:29:58", "remaining_time": "0:55:23", "throughput": 19950.7, "total_tokens": 179517568} +{"current_steps": 57045, "total_steps": 78105, "loss": 0.2007, "lr": 1.0280792973382276e-06, "epoch": 3.651814864605339, "percentage": 73.04, "elapsed_time": "2:29:58", "remaining_time": "0:55:22", "throughput": 19951.02, "total_tokens": 179534912} +{"current_steps": 57050, "total_steps": 78105, "loss": 0.1787, "lr": 1.0276277743548518e-06, "epoch": 3.6521349465463158, "percentage": 73.04, "elapsed_time": "2:29:59", "remaining_time": "0:55:21", "throughput": 19951.27, "total_tokens": 179550656} +{"current_steps": 57055, "total_steps": 78105, "loss": 0.2023, "lr": 1.0271763248940763e-06, "epoch": 3.6524550284872928, "percentage": 73.05, "elapsed_time": "2:30:00", "remaining_time": "0:55:20", "throughput": 19951.54, "total_tokens": 179566912} +{"current_steps": 57060, "total_steps": 78105, "loss": 0.2051, "lr": 1.0267249489784437e-06, "epoch": 3.6527751104282697, "percentage": 73.06, "elapsed_time": "2:30:00", "remaining_time": "0:55:19", "throughput": 19951.81, "total_tokens": 179583040} +{"current_steps": 57065, "total_steps": 78105, "loss": 0.2087, "lr": 1.0262736466304923e-06, "epoch": 3.6530951923692463, "percentage": 73.06, "elapsed_time": "2:30:01", "remaining_time": "0:55:18", "throughput": 19952.09, "total_tokens": 179599424} +{"current_steps": 57070, "total_steps": 78105, "loss": 0.1832, "lr": 1.0258224178727602e-06, "epoch": 3.6534152743102233, "percentage": 73.07, "elapsed_time": "2:30:02", "remaining_time": "0:55:18", "throughput": 19952.28, "total_tokens": 179614272} +{"current_steps": 57075, "total_steps": 78105, "loss": 0.2102, "lr": 1.0253712627277761e-06, "epoch": 3.6537353562512003, "percentage": 73.07, "elapsed_time": "2:30:02", "remaining_time": "0:55:17", "throughput": 19952.53, "total_tokens": 179630144} +{"current_steps": 57080, "total_steps": 78105, "loss": 0.1381, "lr": 1.0249201812180711e-06, "epoch": 3.6540554381921773, "percentage": 73.08, "elapsed_time": "2:30:03", "remaining_time": "0:55:16", "throughput": 19952.86, "total_tokens": 179647744} +{"current_steps": 57085, "total_steps": 78105, "loss": 0.2123, "lr": 1.0244691733661687e-06, "epoch": 3.6543755201331543, "percentage": 73.09, "elapsed_time": "2:30:04", "remaining_time": "0:55:15", "throughput": 19953.11, "total_tokens": 179663744} +{"current_steps": 57090, "total_steps": 78105, "loss": 0.1327, "lr": 1.0240182391945902e-06, "epoch": 3.6546956020741312, "percentage": 73.09, "elapsed_time": "2:30:04", "remaining_time": "0:55:14", "throughput": 19953.35, "total_tokens": 179679360} +{"current_steps": 57095, "total_steps": 78105, "loss": 0.2272, "lr": 1.0235673787258527e-06, "epoch": 3.655015684015108, "percentage": 73.1, "elapsed_time": "2:30:05", "remaining_time": "0:55:13", "throughput": 19953.56, "total_tokens": 179694272} +{"current_steps": 57100, "total_steps": 78105, "loss": 0.2296, "lr": 1.0231165919824699e-06, "epoch": 3.655335765956085, "percentage": 73.11, "elapsed_time": "2:30:06", "remaining_time": "0:55:13", "throughput": 19953.79, "total_tokens": 179709632} +{"current_steps": 57105, "total_steps": 78105, "loss": 0.2083, "lr": 1.0226658789869514e-06, "epoch": 3.6556558478970618, "percentage": 73.11, "elapsed_time": "2:30:06", "remaining_time": "0:55:12", "throughput": 19954.0, "total_tokens": 179724736} +{"current_steps": 57110, "total_steps": 78105, "loss": 0.1711, "lr": 1.0222152397618037e-06, "epoch": 3.6559759298380383, "percentage": 73.12, "elapsed_time": "2:30:07", "remaining_time": "0:55:11", "throughput": 19954.3, "total_tokens": 179741440} +{"current_steps": 57115, "total_steps": 78105, "loss": 0.1709, "lr": 1.021764674329529e-06, "epoch": 3.6562960117790153, "percentage": 73.13, "elapsed_time": "2:30:08", "remaining_time": "0:55:10", "throughput": 19954.54, "total_tokens": 179756992} +{"current_steps": 57120, "total_steps": 78105, "loss": 0.1895, "lr": 1.021314182712627e-06, "epoch": 3.6566160937199923, "percentage": 73.13, "elapsed_time": "2:30:08", "remaining_time": "0:55:09", "throughput": 19954.76, "total_tokens": 179771968} +{"current_steps": 57125, "total_steps": 78105, "loss": 0.1812, "lr": 1.0208637649335928e-06, "epoch": 3.6569361756609693, "percentage": 73.14, "elapsed_time": "2:30:09", "remaining_time": "0:55:08", "throughput": 19955.02, "total_tokens": 179788096} +{"current_steps": 57130, "total_steps": 78105, "loss": 0.1849, "lr": 1.0204134210149178e-06, "epoch": 3.6572562576019463, "percentage": 73.15, "elapsed_time": "2:30:10", "remaining_time": "0:55:08", "throughput": 19955.32, "total_tokens": 179804864} +{"current_steps": 57135, "total_steps": 78105, "loss": 0.1623, "lr": 1.0199631509790893e-06, "epoch": 3.657576339542923, "percentage": 73.15, "elapsed_time": "2:30:11", "remaining_time": "0:55:07", "throughput": 19955.56, "total_tokens": 179820544} +{"current_steps": 57140, "total_steps": 78105, "loss": 0.119, "lr": 1.019512954848592e-06, "epoch": 3.6578964214839, "percentage": 73.16, "elapsed_time": "2:30:11", "remaining_time": "0:55:06", "throughput": 19955.82, "total_tokens": 179836544} +{"current_steps": 57145, "total_steps": 78105, "loss": 0.1459, "lr": 1.0190628326459062e-06, "epoch": 3.658216503424877, "percentage": 73.16, "elapsed_time": "2:30:12", "remaining_time": "0:55:05", "throughput": 19956.02, "total_tokens": 179851648} +{"current_steps": 57150, "total_steps": 78105, "loss": 0.1805, "lr": 1.0186127843935079e-06, "epoch": 3.658536585365854, "percentage": 73.17, "elapsed_time": "2:30:13", "remaining_time": "0:55:04", "throughput": 19956.26, "total_tokens": 179867072} +{"current_steps": 57155, "total_steps": 78105, "loss": 0.1724, "lr": 1.0181628101138724e-06, "epoch": 3.6588566673068303, "percentage": 73.18, "elapsed_time": "2:30:13", "remaining_time": "0:55:03", "throughput": 19956.51, "total_tokens": 179883008} +{"current_steps": 57160, "total_steps": 78105, "loss": 0.1869, "lr": 1.0177129098294658e-06, "epoch": 3.6591767492478073, "percentage": 73.18, "elapsed_time": "2:30:14", "remaining_time": "0:55:03", "throughput": 19956.77, "total_tokens": 179898560} +{"current_steps": 57165, "total_steps": 78105, "loss": 0.1669, "lr": 1.0172630835627564e-06, "epoch": 3.6594968311887843, "percentage": 73.19, "elapsed_time": "2:30:15", "remaining_time": "0:55:02", "throughput": 19957.01, "total_tokens": 179913856} +{"current_steps": 57170, "total_steps": 78105, "loss": 0.1365, "lr": 1.0168133313362053e-06, "epoch": 3.6598169131297613, "percentage": 73.2, "elapsed_time": "2:30:15", "remaining_time": "0:55:01", "throughput": 19957.33, "total_tokens": 179931008} +{"current_steps": 57175, "total_steps": 78105, "loss": 0.1969, "lr": 1.01636365317227e-06, "epoch": 3.6601369950707383, "percentage": 73.2, "elapsed_time": "2:30:16", "remaining_time": "0:55:00", "throughput": 19957.56, "total_tokens": 179946240} +{"current_steps": 57180, "total_steps": 78105, "loss": 0.1953, "lr": 1.0159140490934077e-06, "epoch": 3.660457077011715, "percentage": 73.21, "elapsed_time": "2:30:17", "remaining_time": "0:54:59", "throughput": 19957.77, "total_tokens": 179961728} +{"current_steps": 57185, "total_steps": 78105, "loss": 0.1594, "lr": 1.0154645191220655e-06, "epoch": 3.660777158952692, "percentage": 73.22, "elapsed_time": "2:30:17", "remaining_time": "0:54:58", "throughput": 19957.99, "total_tokens": 179976576} +{"current_steps": 57190, "total_steps": 78105, "loss": 0.1153, "lr": 1.0150150632806944e-06, "epoch": 3.661097240893669, "percentage": 73.22, "elapsed_time": "2:30:18", "remaining_time": "0:54:58", "throughput": 19958.24, "total_tokens": 179992384} +{"current_steps": 57195, "total_steps": 78105, "loss": 0.144, "lr": 1.0145656815917344e-06, "epoch": 3.661417322834646, "percentage": 73.23, "elapsed_time": "2:30:19", "remaining_time": "0:54:57", "throughput": 19958.56, "total_tokens": 180009600} +{"current_steps": 57200, "total_steps": 78105, "loss": 0.1512, "lr": 1.0141163740776277e-06, "epoch": 3.6617374047756224, "percentage": 73.23, "elapsed_time": "2:30:19", "remaining_time": "0:54:56", "throughput": 19958.8, "total_tokens": 180025152} +{"current_steps": 57205, "total_steps": 78105, "loss": 0.1425, "lr": 1.0136671407608096e-06, "epoch": 3.6620574867165994, "percentage": 73.24, "elapsed_time": "2:30:20", "remaining_time": "0:54:55", "throughput": 19959.02, "total_tokens": 180040320} +{"current_steps": 57210, "total_steps": 78105, "loss": 0.117, "lr": 1.0132179816637126e-06, "epoch": 3.6623775686575764, "percentage": 73.25, "elapsed_time": "2:30:21", "remaining_time": "0:54:54", "throughput": 19959.36, "total_tokens": 180058112} +{"current_steps": 57215, "total_steps": 78105, "loss": 0.1732, "lr": 1.0127688968087647e-06, "epoch": 3.6626976505985533, "percentage": 73.25, "elapsed_time": "2:30:21", "remaining_time": "0:54:54", "throughput": 19959.54, "total_tokens": 180072640} +{"current_steps": 57220, "total_steps": 78105, "loss": 0.2144, "lr": 1.012319886218392e-06, "epoch": 3.6630177325395303, "percentage": 73.26, "elapsed_time": "2:30:22", "remaining_time": "0:54:53", "throughput": 19959.74, "total_tokens": 180087424} +{"current_steps": 57225, "total_steps": 78105, "loss": 0.1504, "lr": 1.0118709499150139e-06, "epoch": 3.663337814480507, "percentage": 73.27, "elapsed_time": "2:30:23", "remaining_time": "0:54:52", "throughput": 19960.12, "total_tokens": 180106176} +{"current_steps": 57230, "total_steps": 78105, "loss": 0.1149, "lr": 1.0114220879210513e-06, "epoch": 3.663657896421484, "percentage": 73.27, "elapsed_time": "2:30:23", "remaining_time": "0:54:51", "throughput": 19960.33, "total_tokens": 180121408} +{"current_steps": 57235, "total_steps": 78105, "loss": 0.1256, "lr": 1.0109733002589137e-06, "epoch": 3.663977978362461, "percentage": 73.28, "elapsed_time": "2:30:24", "remaining_time": "0:54:50", "throughput": 19960.56, "total_tokens": 180137024} +{"current_steps": 57240, "total_steps": 78105, "loss": 0.1808, "lr": 1.0105245869510146e-06, "epoch": 3.6642980603034374, "percentage": 73.29, "elapsed_time": "2:30:25", "remaining_time": "0:54:49", "throughput": 19960.81, "total_tokens": 180153152} +{"current_steps": 57245, "total_steps": 78105, "loss": 0.1366, "lr": 1.0100759480197592e-06, "epoch": 3.6646181422444144, "percentage": 73.29, "elapsed_time": "2:30:26", "remaining_time": "0:54:49", "throughput": 19961.02, "total_tokens": 180168320} +{"current_steps": 57250, "total_steps": 78105, "loss": 0.1463, "lr": 1.0096273834875502e-06, "epoch": 3.6649382241853914, "percentage": 73.3, "elapsed_time": "2:30:26", "remaining_time": "0:54:48", "throughput": 19961.21, "total_tokens": 180182784} +{"current_steps": 57255, "total_steps": 78105, "loss": 0.2409, "lr": 1.0091788933767865e-06, "epoch": 3.6652583061263684, "percentage": 73.31, "elapsed_time": "2:30:27", "remaining_time": "0:54:47", "throughput": 19961.48, "total_tokens": 180198848} +{"current_steps": 57260, "total_steps": 78105, "loss": 0.1213, "lr": 1.0087304777098628e-06, "epoch": 3.6655783880673454, "percentage": 73.31, "elapsed_time": "2:30:27", "remaining_time": "0:54:46", "throughput": 19961.68, "total_tokens": 180213568} +{"current_steps": 57265, "total_steps": 78105, "loss": 0.1878, "lr": 1.008282136509173e-06, "epoch": 3.6658984700083224, "percentage": 73.32, "elapsed_time": "2:30:28", "remaining_time": "0:54:45", "throughput": 19961.95, "total_tokens": 180229888} +{"current_steps": 57270, "total_steps": 78105, "loss": 0.1587, "lr": 1.0078338697971013e-06, "epoch": 3.666218551949299, "percentage": 73.32, "elapsed_time": "2:30:29", "remaining_time": "0:54:44", "throughput": 19962.16, "total_tokens": 180244864} +{"current_steps": 57275, "total_steps": 78105, "loss": 0.1306, "lr": 1.0073856775960356e-06, "epoch": 3.666538633890276, "percentage": 73.33, "elapsed_time": "2:30:30", "remaining_time": "0:54:44", "throughput": 19962.44, "total_tokens": 180261184} +{"current_steps": 57280, "total_steps": 78105, "loss": 0.1935, "lr": 1.0069375599283526e-06, "epoch": 3.666858715831253, "percentage": 73.34, "elapsed_time": "2:30:30", "remaining_time": "0:54:43", "throughput": 19962.66, "total_tokens": 180276224} +{"current_steps": 57285, "total_steps": 78105, "loss": 0.2373, "lr": 1.0064895168164319e-06, "epoch": 3.6671787977722294, "percentage": 73.34, "elapsed_time": "2:30:31", "remaining_time": "0:54:42", "throughput": 19962.92, "total_tokens": 180292096} +{"current_steps": 57290, "total_steps": 78105, "loss": 0.1558, "lr": 1.0060415482826449e-06, "epoch": 3.6674988797132064, "percentage": 73.35, "elapsed_time": "2:30:32", "remaining_time": "0:54:41", "throughput": 19963.19, "total_tokens": 180308160} +{"current_steps": 57295, "total_steps": 78105, "loss": 0.1774, "lr": 1.0055936543493609e-06, "epoch": 3.6678189616541834, "percentage": 73.36, "elapsed_time": "2:30:32", "remaining_time": "0:54:40", "throughput": 19963.4, "total_tokens": 180323200} +{"current_steps": 57300, "total_steps": 78105, "loss": 0.2222, "lr": 1.005145835038947e-06, "epoch": 3.6681390435951604, "percentage": 73.36, "elapsed_time": "2:30:33", "remaining_time": "0:54:39", "throughput": 19963.69, "total_tokens": 180340032} +{"current_steps": 57305, "total_steps": 78105, "loss": 0.2245, "lr": 1.0046980903737621e-06, "epoch": 3.6684591255361374, "percentage": 73.37, "elapsed_time": "2:30:34", "remaining_time": "0:54:39", "throughput": 19963.89, "total_tokens": 180354752} +{"current_steps": 57310, "total_steps": 78105, "loss": 0.1197, "lr": 1.0042504203761667e-06, "epoch": 3.6687792074771144, "percentage": 73.38, "elapsed_time": "2:30:34", "remaining_time": "0:54:38", "throughput": 19964.16, "total_tokens": 180370944} +{"current_steps": 57315, "total_steps": 78105, "loss": 0.1151, "lr": 1.0038028250685142e-06, "epoch": 3.669099289418091, "percentage": 73.38, "elapsed_time": "2:30:35", "remaining_time": "0:54:37", "throughput": 19964.33, "total_tokens": 180385344} +{"current_steps": 57320, "total_steps": 78105, "loss": 0.1282, "lr": 1.0033553044731554e-06, "epoch": 3.669419371359068, "percentage": 73.39, "elapsed_time": "2:30:36", "remaining_time": "0:54:36", "throughput": 19964.54, "total_tokens": 180400256} +{"current_steps": 57325, "total_steps": 78105, "loss": 0.1913, "lr": 1.0029078586124367e-06, "epoch": 3.669739453300045, "percentage": 73.39, "elapsed_time": "2:30:36", "remaining_time": "0:54:35", "throughput": 19964.85, "total_tokens": 180417216} +{"current_steps": 57330, "total_steps": 78105, "loss": 0.1991, "lr": 1.0024604875087016e-06, "epoch": 3.6700595352410215, "percentage": 73.4, "elapsed_time": "2:30:37", "remaining_time": "0:54:34", "throughput": 19965.1, "total_tokens": 180433152} +{"current_steps": 57335, "total_steps": 78105, "loss": 0.1723, "lr": 1.0020131911842892e-06, "epoch": 3.6703796171819985, "percentage": 73.41, "elapsed_time": "2:30:38", "remaining_time": "0:54:34", "throughput": 19965.4, "total_tokens": 180450112} +{"current_steps": 57340, "total_steps": 78105, "loss": 0.1653, "lr": 1.0015659696615355e-06, "epoch": 3.6706996991229754, "percentage": 73.41, "elapsed_time": "2:30:38", "remaining_time": "0:54:33", "throughput": 19965.69, "total_tokens": 180466880} +{"current_steps": 57345, "total_steps": 78105, "loss": 0.1518, "lr": 1.001118822962771e-06, "epoch": 3.6710197810639524, "percentage": 73.42, "elapsed_time": "2:30:39", "remaining_time": "0:54:32", "throughput": 19965.96, "total_tokens": 180482944} +{"current_steps": 57350, "total_steps": 78105, "loss": 0.2415, "lr": 1.0006717511103264e-06, "epoch": 3.6713398630049294, "percentage": 73.43, "elapsed_time": "2:30:40", "remaining_time": "0:54:31", "throughput": 19966.18, "total_tokens": 180498112} +{"current_steps": 57355, "total_steps": 78105, "loss": 0.1801, "lr": 1.0002247541265247e-06, "epoch": 3.6716599449459064, "percentage": 73.43, "elapsed_time": "2:30:40", "remaining_time": "0:54:30", "throughput": 19966.37, "total_tokens": 180512704} +{"current_steps": 57360, "total_steps": 78105, "loss": 0.1716, "lr": 9.997778320336867e-07, "epoch": 3.671980026886883, "percentage": 73.44, "elapsed_time": "2:30:41", "remaining_time": "0:54:29", "throughput": 19966.6, "total_tokens": 180528000} +{"current_steps": 57365, "total_steps": 78105, "loss": 0.1643, "lr": 9.993309848541295e-07, "epoch": 3.67230010882786, "percentage": 73.45, "elapsed_time": "2:30:42", "remaining_time": "0:54:29", "throughput": 19966.87, "total_tokens": 180544768} +{"current_steps": 57370, "total_steps": 78105, "loss": 0.2062, "lr": 9.98884212610166e-07, "epoch": 3.672620190768837, "percentage": 73.45, "elapsed_time": "2:30:42", "remaining_time": "0:54:28", "throughput": 19967.1, "total_tokens": 180560384} +{"current_steps": 57375, "total_steps": 78105, "loss": 0.1647, "lr": 9.98437515324106e-07, "epoch": 3.6729402727098135, "percentage": 73.46, "elapsed_time": "2:30:43", "remaining_time": "0:54:27", "throughput": 19967.41, "total_tokens": 180577280} +{"current_steps": 57380, "total_steps": 78105, "loss": 0.1513, "lr": 9.979908930182543e-07, "epoch": 3.6732603546507905, "percentage": 73.47, "elapsed_time": "2:30:44", "remaining_time": "0:54:26", "throughput": 19967.69, "total_tokens": 180593920} +{"current_steps": 57385, "total_steps": 78105, "loss": 0.1824, "lr": 9.975443457149155e-07, "epoch": 3.6735804365917675, "percentage": 73.47, "elapsed_time": "2:30:44", "remaining_time": "0:54:25", "throughput": 19967.89, "total_tokens": 180608640} +{"current_steps": 57390, "total_steps": 78105, "loss": 0.2315, "lr": 9.970978734363843e-07, "epoch": 3.6739005185327445, "percentage": 73.48, "elapsed_time": "2:30:45", "remaining_time": "0:54:25", "throughput": 19968.1, "total_tokens": 180623680} +{"current_steps": 57395, "total_steps": 78105, "loss": 0.157, "lr": 9.96651476204958e-07, "epoch": 3.6742206004737215, "percentage": 73.48, "elapsed_time": "2:30:46", "remaining_time": "0:54:24", "throughput": 19968.34, "total_tokens": 180639296} +{"current_steps": 57400, "total_steps": 78105, "loss": 0.152, "lr": 9.962051540429257e-07, "epoch": 3.674540682414698, "percentage": 73.49, "elapsed_time": "2:30:46", "remaining_time": "0:54:23", "throughput": 19968.59, "total_tokens": 180655040} +{"current_steps": 57405, "total_steps": 78105, "loss": 0.1281, "lr": 9.957589069725756e-07, "epoch": 3.674860764355675, "percentage": 73.5, "elapsed_time": "2:30:47", "remaining_time": "0:54:22", "throughput": 19968.85, "total_tokens": 180670976} +{"current_steps": 57410, "total_steps": 78105, "loss": 0.273, "lr": 9.953127350161904e-07, "epoch": 3.675180846296652, "percentage": 73.5, "elapsed_time": "2:30:48", "remaining_time": "0:54:21", "throughput": 19969.07, "total_tokens": 180686400} +{"current_steps": 57415, "total_steps": 78105, "loss": 0.2331, "lr": 9.948666381960484e-07, "epoch": 3.675500928237629, "percentage": 73.51, "elapsed_time": "2:30:49", "remaining_time": "0:54:20", "throughput": 19969.38, "total_tokens": 180703488} +{"current_steps": 57420, "total_steps": 78105, "loss": 0.2869, "lr": 9.944206165344284e-07, "epoch": 3.6758210101786055, "percentage": 73.52, "elapsed_time": "2:30:49", "remaining_time": "0:54:20", "throughput": 19969.63, "total_tokens": 180719424} +{"current_steps": 57425, "total_steps": 78105, "loss": 0.1517, "lr": 9.939746700535987e-07, "epoch": 3.6761410921195825, "percentage": 73.52, "elapsed_time": "2:30:50", "remaining_time": "0:54:19", "throughput": 19969.88, "total_tokens": 180735168} +{"current_steps": 57430, "total_steps": 78105, "loss": 0.1296, "lr": 9.935287987758305e-07, "epoch": 3.6764611740605595, "percentage": 73.53, "elapsed_time": "2:30:51", "remaining_time": "0:54:18", "throughput": 19970.15, "total_tokens": 180751232} +{"current_steps": 57435, "total_steps": 78105, "loss": 0.1411, "lr": 9.930830027233871e-07, "epoch": 3.6767812560015365, "percentage": 73.54, "elapsed_time": "2:30:51", "remaining_time": "0:54:17", "throughput": 19970.38, "total_tokens": 180766784} +{"current_steps": 57440, "total_steps": 78105, "loss": 0.2098, "lr": 9.926372819185292e-07, "epoch": 3.6771013379425135, "percentage": 73.54, "elapsed_time": "2:30:52", "remaining_time": "0:54:16", "throughput": 19970.59, "total_tokens": 180781888} +{"current_steps": 57445, "total_steps": 78105, "loss": 0.1277, "lr": 9.921916363835138e-07, "epoch": 3.67742141988349, "percentage": 73.55, "elapsed_time": "2:30:53", "remaining_time": "0:54:15", "throughput": 19970.85, "total_tokens": 180798016} +{"current_steps": 57450, "total_steps": 78105, "loss": 0.1431, "lr": 9.917460661405945e-07, "epoch": 3.677741501824467, "percentage": 73.55, "elapsed_time": "2:30:53", "remaining_time": "0:54:15", "throughput": 19971.11, "total_tokens": 180814016} +{"current_steps": 57455, "total_steps": 78105, "loss": 0.1803, "lr": 9.913005712120191e-07, "epoch": 3.678061583765444, "percentage": 73.56, "elapsed_time": "2:30:54", "remaining_time": "0:54:14", "throughput": 19971.35, "total_tokens": 180829824} +{"current_steps": 57460, "total_steps": 78105, "loss": 0.1095, "lr": 9.908551516200368e-07, "epoch": 3.678381665706421, "percentage": 73.57, "elapsed_time": "2:30:55", "remaining_time": "0:54:13", "throughput": 19971.55, "total_tokens": 180844928} +{"current_steps": 57465, "total_steps": 78105, "loss": 0.2054, "lr": 9.904098073868854e-07, "epoch": 3.6787017476473975, "percentage": 73.57, "elapsed_time": "2:30:55", "remaining_time": "0:54:12", "throughput": 19971.81, "total_tokens": 180860800} +{"current_steps": 57470, "total_steps": 78105, "loss": 0.1437, "lr": 9.899645385348062e-07, "epoch": 3.6790218295883745, "percentage": 73.58, "elapsed_time": "2:30:56", "remaining_time": "0:54:11", "throughput": 19972.01, "total_tokens": 180875712} +{"current_steps": 57475, "total_steps": 78105, "loss": 0.1594, "lr": 9.89519345086032e-07, "epoch": 3.6793419115293515, "percentage": 73.59, "elapsed_time": "2:30:57", "remaining_time": "0:54:10", "throughput": 19972.31, "total_tokens": 180892672} +{"current_steps": 57480, "total_steps": 78105, "loss": 0.1879, "lr": 9.890742270627943e-07, "epoch": 3.6796619934703285, "percentage": 73.59, "elapsed_time": "2:30:57", "remaining_time": "0:54:10", "throughput": 19972.55, "total_tokens": 180907968} +{"current_steps": 57485, "total_steps": 78105, "loss": 0.2108, "lr": 9.886291844873194e-07, "epoch": 3.6799820754113055, "percentage": 73.6, "elapsed_time": "2:30:58", "remaining_time": "0:54:09", "throughput": 19972.72, "total_tokens": 180921984} +{"current_steps": 57490, "total_steps": 78105, "loss": 0.1362, "lr": 9.8818421738183e-07, "epoch": 3.680302157352282, "percentage": 73.61, "elapsed_time": "2:30:59", "remaining_time": "0:54:08", "throughput": 19972.96, "total_tokens": 180937280} +{"current_steps": 57495, "total_steps": 78105, "loss": 0.207, "lr": 9.877393257685474e-07, "epoch": 3.680622239293259, "percentage": 73.61, "elapsed_time": "2:30:59", "remaining_time": "0:54:07", "throughput": 19973.21, "total_tokens": 180952960} +{"current_steps": 57500, "total_steps": 78105, "loss": 0.192, "lr": 9.872945096696843e-07, "epoch": 3.680942321234236, "percentage": 73.62, "elapsed_time": "2:31:00", "remaining_time": "0:54:06", "throughput": 19973.45, "total_tokens": 180968768} +{"current_steps": 57505, "total_steps": 78105, "loss": 0.1509, "lr": 9.868497691074555e-07, "epoch": 3.6812624031752126, "percentage": 73.63, "elapsed_time": "2:31:01", "remaining_time": "0:54:05", "throughput": 19973.73, "total_tokens": 180984960} +{"current_steps": 57510, "total_steps": 78105, "loss": 0.1594, "lr": 9.86405104104066e-07, "epoch": 3.6815824851161896, "percentage": 73.63, "elapsed_time": "2:31:01", "remaining_time": "0:54:05", "throughput": 19973.97, "total_tokens": 181001088} +{"current_steps": 57515, "total_steps": 78105, "loss": 0.1653, "lr": 9.859605146817223e-07, "epoch": 3.6819025670571666, "percentage": 73.64, "elapsed_time": "2:31:02", "remaining_time": "0:54:04", "throughput": 19974.21, "total_tokens": 181016576} +{"current_steps": 57520, "total_steps": 78105, "loss": 0.1897, "lr": 9.85516000862624e-07, "epoch": 3.6822226489981436, "percentage": 73.64, "elapsed_time": "2:31:03", "remaining_time": "0:54:03", "throughput": 19974.43, "total_tokens": 181031488} +{"current_steps": 57525, "total_steps": 78105, "loss": 0.1544, "lr": 9.850715626689674e-07, "epoch": 3.6825427309391205, "percentage": 73.65, "elapsed_time": "2:31:03", "remaining_time": "0:54:02", "throughput": 19974.71, "total_tokens": 181047616} +{"current_steps": 57530, "total_steps": 78105, "loss": 0.108, "lr": 9.846272001229475e-07, "epoch": 3.6828628128800975, "percentage": 73.66, "elapsed_time": "2:31:04", "remaining_time": "0:54:01", "throughput": 19974.9, "total_tokens": 181062144} +{"current_steps": 57535, "total_steps": 78105, "loss": 0.1709, "lr": 9.841829132467504e-07, "epoch": 3.683182894821074, "percentage": 73.66, "elapsed_time": "2:31:05", "remaining_time": "0:54:00", "throughput": 19975.16, "total_tokens": 181077952} +{"current_steps": 57540, "total_steps": 78105, "loss": 0.1293, "lr": 9.837387020625638e-07, "epoch": 3.683502976762051, "percentage": 73.67, "elapsed_time": "2:31:05", "remaining_time": "0:54:00", "throughput": 19975.42, "total_tokens": 181094080} +{"current_steps": 57545, "total_steps": 78105, "loss": 0.1193, "lr": 9.832945665925685e-07, "epoch": 3.683823058703028, "percentage": 73.68, "elapsed_time": "2:31:06", "remaining_time": "0:53:59", "throughput": 19975.66, "total_tokens": 181109696} +{"current_steps": 57550, "total_steps": 78105, "loss": 0.2667, "lr": 9.828505068589422e-07, "epoch": 3.6841431406440046, "percentage": 73.68, "elapsed_time": "2:31:07", "remaining_time": "0:53:58", "throughput": 19975.91, "total_tokens": 181125440} +{"current_steps": 57555, "total_steps": 78105, "loss": 0.1779, "lr": 9.824065228838592e-07, "epoch": 3.6844632225849816, "percentage": 73.69, "elapsed_time": "2:31:07", "remaining_time": "0:53:57", "throughput": 19976.09, "total_tokens": 181139840} +{"current_steps": 57560, "total_steps": 78105, "loss": 0.181, "lr": 9.819626146894895e-07, "epoch": 3.6847833045259586, "percentage": 73.7, "elapsed_time": "2:31:08", "remaining_time": "0:53:56", "throughput": 19976.32, "total_tokens": 181154880} +{"current_steps": 57565, "total_steps": 78105, "loss": 0.1546, "lr": 9.815187822979995e-07, "epoch": 3.6851033864669356, "percentage": 73.7, "elapsed_time": "2:31:09", "remaining_time": "0:53:56", "throughput": 19976.57, "total_tokens": 181170752} +{"current_steps": 57570, "total_steps": 78105, "loss": 0.1677, "lr": 9.810750257315524e-07, "epoch": 3.6854234684079126, "percentage": 73.71, "elapsed_time": "2:31:09", "remaining_time": "0:53:55", "throughput": 19976.82, "total_tokens": 181186688} +{"current_steps": 57575, "total_steps": 78105, "loss": 0.1408, "lr": 9.806313450123056e-07, "epoch": 3.6857435503488896, "percentage": 73.71, "elapsed_time": "2:31:10", "remaining_time": "0:53:54", "throughput": 19977.04, "total_tokens": 181201984} +{"current_steps": 57580, "total_steps": 78105, "loss": 0.1191, "lr": 9.801877401624175e-07, "epoch": 3.686063632289866, "percentage": 73.72, "elapsed_time": "2:31:11", "remaining_time": "0:53:53", "throughput": 19977.31, "total_tokens": 181218048} +{"current_steps": 57585, "total_steps": 78105, "loss": 0.1615, "lr": 9.797442112040349e-07, "epoch": 3.686383714230843, "percentage": 73.73, "elapsed_time": "2:31:11", "remaining_time": "0:53:52", "throughput": 19977.59, "total_tokens": 181234368} +{"current_steps": 57590, "total_steps": 78105, "loss": 0.1587, "lr": 9.79300758159309e-07, "epoch": 3.68670379617182, "percentage": 73.73, "elapsed_time": "2:31:12", "remaining_time": "0:53:51", "throughput": 19977.83, "total_tokens": 181249792} +{"current_steps": 57595, "total_steps": 78105, "loss": 0.1392, "lr": 9.788573810503819e-07, "epoch": 3.6870238781127966, "percentage": 73.74, "elapsed_time": "2:31:13", "remaining_time": "0:53:51", "throughput": 19978.05, "total_tokens": 181265152} +{"current_steps": 57600, "total_steps": 78105, "loss": 0.1723, "lr": 9.784140798993929e-07, "epoch": 3.6873439600537736, "percentage": 73.75, "elapsed_time": "2:31:13", "remaining_time": "0:53:50", "throughput": 19978.34, "total_tokens": 181281728} +{"current_steps": 57605, "total_steps": 78105, "loss": 0.1786, "lr": 9.77970854728481e-07, "epoch": 3.6876640419947506, "percentage": 73.75, "elapsed_time": "2:31:14", "remaining_time": "0:53:49", "throughput": 19978.56, "total_tokens": 181297344} +{"current_steps": 57610, "total_steps": 78105, "loss": 0.1465, "lr": 9.775277055597748e-07, "epoch": 3.6879841239357276, "percentage": 73.76, "elapsed_time": "2:31:15", "remaining_time": "0:53:48", "throughput": 19978.83, "total_tokens": 181313536} +{"current_steps": 57615, "total_steps": 78105, "loss": 0.1422, "lr": 9.770846324154069e-07, "epoch": 3.6883042058767046, "percentage": 73.77, "elapsed_time": "2:31:15", "remaining_time": "0:53:47", "throughput": 19979.08, "total_tokens": 181329536} +{"current_steps": 57620, "total_steps": 78105, "loss": 0.2226, "lr": 9.766416353174976e-07, "epoch": 3.6886242878176816, "percentage": 73.77, "elapsed_time": "2:31:16", "remaining_time": "0:53:46", "throughput": 19979.3, "total_tokens": 181344832} +{"current_steps": 57625, "total_steps": 78105, "loss": 0.1559, "lr": 9.761987142881713e-07, "epoch": 3.688944369758658, "percentage": 73.78, "elapsed_time": "2:31:17", "remaining_time": "0:53:46", "throughput": 19979.53, "total_tokens": 181360768} +{"current_steps": 57630, "total_steps": 78105, "loss": 0.2096, "lr": 9.75755869349544e-07, "epoch": 3.689264451699635, "percentage": 73.79, "elapsed_time": "2:31:18", "remaining_time": "0:53:45", "throughput": 19979.79, "total_tokens": 181376576} +{"current_steps": 57635, "total_steps": 78105, "loss": 0.146, "lr": 9.75313100523729e-07, "epoch": 3.689584533640612, "percentage": 73.79, "elapsed_time": "2:31:18", "remaining_time": "0:53:44", "throughput": 19980.01, "total_tokens": 181391680} +{"current_steps": 57640, "total_steps": 78105, "loss": 0.1516, "lr": 9.74870407832836e-07, "epoch": 3.6899046155815887, "percentage": 73.8, "elapsed_time": "2:31:19", "remaining_time": "0:53:43", "throughput": 19980.2, "total_tokens": 181406400} +{"current_steps": 57645, "total_steps": 78105, "loss": 0.0999, "lr": 9.744277912989697e-07, "epoch": 3.6902246975225657, "percentage": 73.8, "elapsed_time": "2:31:19", "remaining_time": "0:53:42", "throughput": 19980.44, "total_tokens": 181422144} +{"current_steps": 57650, "total_steps": 78105, "loss": 0.1618, "lr": 9.739852509442352e-07, "epoch": 3.6905447794635426, "percentage": 73.81, "elapsed_time": "2:31:20", "remaining_time": "0:53:41", "throughput": 19980.72, "total_tokens": 181438400} +{"current_steps": 57655, "total_steps": 78105, "loss": 0.1733, "lr": 9.735427867907265e-07, "epoch": 3.6908648614045196, "percentage": 73.82, "elapsed_time": "2:31:21", "remaining_time": "0:53:41", "throughput": 19980.98, "total_tokens": 181454400} +{"current_steps": 57660, "total_steps": 78105, "loss": 0.1211, "lr": 9.731003988605408e-07, "epoch": 3.6911849433454966, "percentage": 73.82, "elapsed_time": "2:31:22", "remaining_time": "0:53:40", "throughput": 19981.24, "total_tokens": 181470400} +{"current_steps": 57665, "total_steps": 78105, "loss": 0.1932, "lr": 9.726580871757681e-07, "epoch": 3.691505025286473, "percentage": 73.83, "elapsed_time": "2:31:22", "remaining_time": "0:53:39", "throughput": 19981.46, "total_tokens": 181485632} +{"current_steps": 57670, "total_steps": 78105, "loss": 0.1826, "lr": 9.72215851758495e-07, "epoch": 3.69182510722745, "percentage": 73.84, "elapsed_time": "2:31:23", "remaining_time": "0:53:38", "throughput": 19981.73, "total_tokens": 181502208} +{"current_steps": 57675, "total_steps": 78105, "loss": 0.2884, "lr": 9.717736926308039e-07, "epoch": 3.692145189168427, "percentage": 73.84, "elapsed_time": "2:31:24", "remaining_time": "0:53:37", "throughput": 19981.96, "total_tokens": 181517888} +{"current_steps": 57680, "total_steps": 78105, "loss": 0.1474, "lr": 9.713316098147747e-07, "epoch": 3.692465271109404, "percentage": 73.85, "elapsed_time": "2:31:24", "remaining_time": "0:53:36", "throughput": 19982.18, "total_tokens": 181532992} +{"current_steps": 57685, "total_steps": 78105, "loss": 0.1147, "lr": 9.708896033324814e-07, "epoch": 3.6927853530503807, "percentage": 73.86, "elapsed_time": "2:31:25", "remaining_time": "0:53:36", "throughput": 19982.42, "total_tokens": 181548672} +{"current_steps": 57690, "total_steps": 78105, "loss": 0.1595, "lr": 9.70447673205998e-07, "epoch": 3.6931054349913577, "percentage": 73.86, "elapsed_time": "2:31:26", "remaining_time": "0:53:35", "throughput": 19982.61, "total_tokens": 181563328} +{"current_steps": 57695, "total_steps": 78105, "loss": 0.1967, "lr": 9.700058194573892e-07, "epoch": 3.6934255169323347, "percentage": 73.87, "elapsed_time": "2:31:26", "remaining_time": "0:53:34", "throughput": 19982.87, "total_tokens": 181579520} +{"current_steps": 57700, "total_steps": 78105, "loss": 0.122, "lr": 9.69564042108721e-07, "epoch": 3.6937455988733117, "percentage": 73.87, "elapsed_time": "2:31:27", "remaining_time": "0:53:33", "throughput": 19983.07, "total_tokens": 181594112} +{"current_steps": 57705, "total_steps": 78105, "loss": 0.1416, "lr": 9.691223411820528e-07, "epoch": 3.6940656808142887, "percentage": 73.88, "elapsed_time": "2:31:28", "remaining_time": "0:53:32", "throughput": 19983.3, "total_tokens": 181609536} +{"current_steps": 57710, "total_steps": 78105, "loss": 0.1628, "lr": 9.686807166994407e-07, "epoch": 3.694385762755265, "percentage": 73.89, "elapsed_time": "2:31:28", "remaining_time": "0:53:31", "throughput": 19983.49, "total_tokens": 181624192} +{"current_steps": 57715, "total_steps": 78105, "loss": 0.2056, "lr": 9.682391686829376e-07, "epoch": 3.694705844696242, "percentage": 73.89, "elapsed_time": "2:31:29", "remaining_time": "0:53:31", "throughput": 19983.79, "total_tokens": 181641216} +{"current_steps": 57720, "total_steps": 78105, "loss": 0.1575, "lr": 9.677976971545908e-07, "epoch": 3.695025926637219, "percentage": 73.9, "elapsed_time": "2:31:30", "remaining_time": "0:53:30", "throughput": 19984.01, "total_tokens": 181656832} +{"current_steps": 57725, "total_steps": 78105, "loss": 0.1759, "lr": 9.673563021364476e-07, "epoch": 3.695346008578196, "percentage": 73.91, "elapsed_time": "2:31:30", "remaining_time": "0:53:29", "throughput": 19984.22, "total_tokens": 181671808} +{"current_steps": 57730, "total_steps": 78105, "loss": 0.1817, "lr": 9.66914983650546e-07, "epoch": 3.6956660905191727, "percentage": 73.91, "elapsed_time": "2:31:31", "remaining_time": "0:53:28", "throughput": 19984.46, "total_tokens": 181687360} +{"current_steps": 57735, "total_steps": 78105, "loss": 0.129, "lr": 9.66473741718926e-07, "epoch": 3.6959861724601497, "percentage": 73.92, "elapsed_time": "2:31:32", "remaining_time": "0:53:27", "throughput": 19984.68, "total_tokens": 181702720} +{"current_steps": 57740, "total_steps": 78105, "loss": 0.174, "lr": 9.66032576363618e-07, "epoch": 3.6963062544011267, "percentage": 73.93, "elapsed_time": "2:31:32", "remaining_time": "0:53:27", "throughput": 19984.94, "total_tokens": 181718464} +{"current_steps": 57745, "total_steps": 78105, "loss": 0.1462, "lr": 9.655914876066537e-07, "epoch": 3.6966263363421037, "percentage": 73.93, "elapsed_time": "2:31:33", "remaining_time": "0:53:26", "throughput": 19985.2, "total_tokens": 181734592} +{"current_steps": 57750, "total_steps": 78105, "loss": 0.1505, "lr": 9.65150475470058e-07, "epoch": 3.6969464182830807, "percentage": 73.94, "elapsed_time": "2:31:34", "remaining_time": "0:53:25", "throughput": 19985.44, "total_tokens": 181750464} +{"current_steps": 57755, "total_steps": 78105, "loss": 0.1542, "lr": 9.64709539975853e-07, "epoch": 3.6972665002240572, "percentage": 73.95, "elapsed_time": "2:31:34", "remaining_time": "0:53:24", "throughput": 19985.66, "total_tokens": 181765568} +{"current_steps": 57760, "total_steps": 78105, "loss": 0.1164, "lr": 9.642686811460565e-07, "epoch": 3.697586582165034, "percentage": 73.95, "elapsed_time": "2:31:35", "remaining_time": "0:53:23", "throughput": 19985.91, "total_tokens": 181781888} +{"current_steps": 57765, "total_steps": 78105, "loss": 0.2066, "lr": 9.638278990026818e-07, "epoch": 3.697906664106011, "percentage": 73.96, "elapsed_time": "2:31:36", "remaining_time": "0:53:22", "throughput": 19986.15, "total_tokens": 181797504} +{"current_steps": 57770, "total_steps": 78105, "loss": 0.1559, "lr": 9.633871935677412e-07, "epoch": 3.6982267460469878, "percentage": 73.96, "elapsed_time": "2:31:36", "remaining_time": "0:53:22", "throughput": 19986.37, "total_tokens": 181812800} +{"current_steps": 57775, "total_steps": 78105, "loss": 0.1546, "lr": 9.6294656486324e-07, "epoch": 3.6985468279879647, "percentage": 73.97, "elapsed_time": "2:31:37", "remaining_time": "0:53:21", "throughput": 19986.62, "total_tokens": 181828544} +{"current_steps": 57780, "total_steps": 78105, "loss": 0.1759, "lr": 9.625060129111812e-07, "epoch": 3.6988669099289417, "percentage": 73.98, "elapsed_time": "2:31:38", "remaining_time": "0:53:20", "throughput": 19986.86, "total_tokens": 181844160} +{"current_steps": 57785, "total_steps": 78105, "loss": 0.1689, "lr": 9.620655377335637e-07, "epoch": 3.6991869918699187, "percentage": 73.98, "elapsed_time": "2:31:38", "remaining_time": "0:53:19", "throughput": 19987.07, "total_tokens": 181859392} +{"current_steps": 57790, "total_steps": 78105, "loss": 0.1344, "lr": 9.616251393523823e-07, "epoch": 3.6995070738108957, "percentage": 73.99, "elapsed_time": "2:31:39", "remaining_time": "0:53:18", "throughput": 19987.33, "total_tokens": 181875200} +{"current_steps": 57795, "total_steps": 78105, "loss": 0.1284, "lr": 9.611848177896286e-07, "epoch": 3.6998271557518727, "percentage": 74.0, "elapsed_time": "2:31:40", "remaining_time": "0:53:17", "throughput": 19987.55, "total_tokens": 181890496} +{"current_steps": 57800, "total_steps": 78105, "loss": 0.1392, "lr": 9.607445730672895e-07, "epoch": 3.7001472376928493, "percentage": 74.0, "elapsed_time": "2:31:40", "remaining_time": "0:53:17", "throughput": 19987.76, "total_tokens": 181905472} +{"current_steps": 57805, "total_steps": 78105, "loss": 0.1796, "lr": 9.603044052073479e-07, "epoch": 3.7004673196338262, "percentage": 74.01, "elapsed_time": "2:31:41", "remaining_time": "0:53:16", "throughput": 19987.98, "total_tokens": 181920768} +{"current_steps": 57810, "total_steps": 78105, "loss": 0.1486, "lr": 9.59864314231786e-07, "epoch": 3.7007874015748032, "percentage": 74.02, "elapsed_time": "2:31:42", "remaining_time": "0:53:15", "throughput": 19988.2, "total_tokens": 181935872} +{"current_steps": 57815, "total_steps": 78105, "loss": 0.2066, "lr": 9.594243001625765e-07, "epoch": 3.70110748351578, "percentage": 74.02, "elapsed_time": "2:31:42", "remaining_time": "0:53:14", "throughput": 19988.46, "total_tokens": 181952064} +{"current_steps": 57820, "total_steps": 78105, "loss": 0.1551, "lr": 9.589843630216933e-07, "epoch": 3.7014275654567568, "percentage": 74.03, "elapsed_time": "2:31:43", "remaining_time": "0:53:13", "throughput": 19988.72, "total_tokens": 181968000} +{"current_steps": 57825, "total_steps": 78105, "loss": 0.1295, "lr": 9.585445028311044e-07, "epoch": 3.7017476473977338, "percentage": 74.03, "elapsed_time": "2:31:44", "remaining_time": "0:53:13", "throughput": 19989.1, "total_tokens": 181986816} +{"current_steps": 57830, "total_steps": 78105, "loss": 0.2857, "lr": 9.58104719612773e-07, "epoch": 3.7020677293387108, "percentage": 74.04, "elapsed_time": "2:31:44", "remaining_time": "0:53:12", "throughput": 19989.35, "total_tokens": 182002496} +{"current_steps": 57835, "total_steps": 78105, "loss": 0.1325, "lr": 9.576650133886623e-07, "epoch": 3.7023878112796877, "percentage": 74.05, "elapsed_time": "2:31:45", "remaining_time": "0:53:11", "throughput": 19989.57, "total_tokens": 182017728} +{"current_steps": 57840, "total_steps": 78105, "loss": 0.1165, "lr": 9.572253841807252e-07, "epoch": 3.7027078932206647, "percentage": 74.05, "elapsed_time": "2:31:46", "remaining_time": "0:53:10", "throughput": 19989.81, "total_tokens": 182033536} +{"current_steps": 57845, "total_steps": 78105, "loss": 0.1179, "lr": 9.567858320109182e-07, "epoch": 3.7030279751616413, "percentage": 74.06, "elapsed_time": "2:31:46", "remaining_time": "0:53:09", "throughput": 19990.04, "total_tokens": 182048384} +{"current_steps": 57850, "total_steps": 78105, "loss": 0.2408, "lr": 9.563463569011866e-07, "epoch": 3.7033480571026183, "percentage": 74.07, "elapsed_time": "2:31:47", "remaining_time": "0:53:08", "throughput": 19990.27, "total_tokens": 182063680} +{"current_steps": 57855, "total_steps": 78105, "loss": 0.1413, "lr": 9.559069588734784e-07, "epoch": 3.7036681390435953, "percentage": 74.07, "elapsed_time": "2:31:48", "remaining_time": "0:53:08", "throughput": 19990.49, "total_tokens": 182079168} +{"current_steps": 57860, "total_steps": 78105, "loss": 0.1944, "lr": 9.554676379497338e-07, "epoch": 3.703988220984572, "percentage": 74.08, "elapsed_time": "2:31:48", "remaining_time": "0:53:07", "throughput": 19990.75, "total_tokens": 182095360} +{"current_steps": 57865, "total_steps": 78105, "loss": 0.175, "lr": 9.5502839415189e-07, "epoch": 3.704308302925549, "percentage": 74.09, "elapsed_time": "2:31:49", "remaining_time": "0:53:06", "throughput": 19991.08, "total_tokens": 182112832} +{"current_steps": 57870, "total_steps": 78105, "loss": 0.2135, "lr": 9.54589227501881e-07, "epoch": 3.704628384866526, "percentage": 74.09, "elapsed_time": "2:31:50", "remaining_time": "0:53:05", "throughput": 19991.27, "total_tokens": 182127168} +{"current_steps": 57875, "total_steps": 78105, "loss": 0.1375, "lr": 9.541501380216358e-07, "epoch": 3.704948466807503, "percentage": 74.1, "elapsed_time": "2:31:50", "remaining_time": "0:53:04", "throughput": 19991.51, "total_tokens": 182142656} +{"current_steps": 57880, "total_steps": 78105, "loss": 0.1446, "lr": 9.53711125733081e-07, "epoch": 3.7052685487484798, "percentage": 74.11, "elapsed_time": "2:31:51", "remaining_time": "0:53:03", "throughput": 19991.81, "total_tokens": 182159680} +{"current_steps": 57885, "total_steps": 78105, "loss": 0.233, "lr": 9.532721906581375e-07, "epoch": 3.7055886306894568, "percentage": 74.11, "elapsed_time": "2:31:52", "remaining_time": "0:53:03", "throughput": 19992.02, "total_tokens": 182174464} +{"current_steps": 57890, "total_steps": 78105, "loss": 0.2154, "lr": 9.528333328187248e-07, "epoch": 3.7059087126304333, "percentage": 74.12, "elapsed_time": "2:31:53", "remaining_time": "0:53:02", "throughput": 19992.27, "total_tokens": 182190592} +{"current_steps": 57895, "total_steps": 78105, "loss": 0.2132, "lr": 9.523945522367564e-07, "epoch": 3.7062287945714103, "percentage": 74.12, "elapsed_time": "2:31:53", "remaining_time": "0:53:01", "throughput": 19992.49, "total_tokens": 182205888} +{"current_steps": 57900, "total_steps": 78105, "loss": 0.1342, "lr": 9.519558489341432e-07, "epoch": 3.7065488765123873, "percentage": 74.13, "elapsed_time": "2:31:54", "remaining_time": "0:53:00", "throughput": 19992.76, "total_tokens": 182222208} +{"current_steps": 57905, "total_steps": 78105, "loss": 0.1714, "lr": 9.515172229327912e-07, "epoch": 3.706868958453364, "percentage": 74.14, "elapsed_time": "2:31:55", "remaining_time": "0:52:59", "throughput": 19992.98, "total_tokens": 182237312} +{"current_steps": 57910, "total_steps": 78105, "loss": 0.1067, "lr": 9.510786742546035e-07, "epoch": 3.707189040394341, "percentage": 74.14, "elapsed_time": "2:31:55", "remaining_time": "0:52:58", "throughput": 19993.2, "total_tokens": 182252352} +{"current_steps": 57915, "total_steps": 78105, "loss": 0.148, "lr": 9.506402029214778e-07, "epoch": 3.707509122335318, "percentage": 74.15, "elapsed_time": "2:31:56", "remaining_time": "0:52:58", "throughput": 19993.42, "total_tokens": 182267328} +{"current_steps": 57920, "total_steps": 78105, "loss": 0.1358, "lr": 9.502018089553119e-07, "epoch": 3.707829204276295, "percentage": 74.16, "elapsed_time": "2:31:57", "remaining_time": "0:52:57", "throughput": 19993.66, "total_tokens": 182283200} +{"current_steps": 57925, "total_steps": 78105, "loss": 0.1286, "lr": 9.497634923779933e-07, "epoch": 3.708149286217272, "percentage": 74.16, "elapsed_time": "2:31:57", "remaining_time": "0:52:56", "throughput": 19993.88, "total_tokens": 182298432} +{"current_steps": 57930, "total_steps": 78105, "loss": 0.1499, "lr": 9.493252532114127e-07, "epoch": 3.7084693681582483, "percentage": 74.17, "elapsed_time": "2:31:58", "remaining_time": "0:52:55", "throughput": 19994.11, "total_tokens": 182314048} +{"current_steps": 57935, "total_steps": 78105, "loss": 0.1604, "lr": 9.4888709147745e-07, "epoch": 3.7087894500992253, "percentage": 74.18, "elapsed_time": "2:31:59", "remaining_time": "0:52:54", "throughput": 19994.36, "total_tokens": 182330112} +{"current_steps": 57940, "total_steps": 78105, "loss": 0.1404, "lr": 9.484490071979877e-07, "epoch": 3.7091095320402023, "percentage": 74.18, "elapsed_time": "2:31:59", "remaining_time": "0:52:53", "throughput": 19994.66, "total_tokens": 182347200} +{"current_steps": 57945, "total_steps": 78105, "loss": 0.1375, "lr": 9.480110003948997e-07, "epoch": 3.7094296139811793, "percentage": 74.19, "elapsed_time": "2:32:00", "remaining_time": "0:52:53", "throughput": 19994.9, "total_tokens": 182362816} +{"current_steps": 57950, "total_steps": 78105, "loss": 0.0855, "lr": 9.47573071090058e-07, "epoch": 3.709749695922156, "percentage": 74.19, "elapsed_time": "2:32:01", "remaining_time": "0:52:52", "throughput": 19995.12, "total_tokens": 182377984} +{"current_steps": 57955, "total_steps": 78105, "loss": 0.2059, "lr": 9.47135219305332e-07, "epoch": 3.710069777863133, "percentage": 74.2, "elapsed_time": "2:32:01", "remaining_time": "0:52:51", "throughput": 19995.34, "total_tokens": 182393344} +{"current_steps": 57960, "total_steps": 78105, "loss": 0.1385, "lr": 9.466974450625832e-07, "epoch": 3.71038985980411, "percentage": 74.21, "elapsed_time": "2:32:02", "remaining_time": "0:52:50", "throughput": 19995.57, "total_tokens": 182408832} +{"current_steps": 57965, "total_steps": 78105, "loss": 0.1468, "lr": 9.462597483836741e-07, "epoch": 3.710709941745087, "percentage": 74.21, "elapsed_time": "2:32:03", "remaining_time": "0:52:49", "throughput": 19995.87, "total_tokens": 182426112} +{"current_steps": 57970, "total_steps": 78105, "loss": 0.217, "lr": 9.458221292904599e-07, "epoch": 3.711030023686064, "percentage": 74.22, "elapsed_time": "2:32:03", "remaining_time": "0:52:49", "throughput": 19996.15, "total_tokens": 182442944} +{"current_steps": 57975, "total_steps": 78105, "loss": 0.1142, "lr": 9.453845878047929e-07, "epoch": 3.7113501056270404, "percentage": 74.23, "elapsed_time": "2:32:04", "remaining_time": "0:52:48", "throughput": 19996.39, "total_tokens": 182458496} +{"current_steps": 57980, "total_steps": 78105, "loss": 0.1684, "lr": 9.449471239485222e-07, "epoch": 3.7116701875680174, "percentage": 74.23, "elapsed_time": "2:32:05", "remaining_time": "0:52:47", "throughput": 19996.63, "total_tokens": 182473920} +{"current_steps": 57985, "total_steps": 78105, "loss": 0.1379, "lr": 9.445097377434919e-07, "epoch": 3.7119902695089944, "percentage": 74.24, "elapsed_time": "2:32:05", "remaining_time": "0:52:46", "throughput": 19996.91, "total_tokens": 182490496} +{"current_steps": 57990, "total_steps": 78105, "loss": 0.1118, "lr": 9.440724292115433e-07, "epoch": 3.7123103514499713, "percentage": 74.25, "elapsed_time": "2:32:06", "remaining_time": "0:52:45", "throughput": 19997.13, "total_tokens": 182505920} +{"current_steps": 57995, "total_steps": 78105, "loss": 0.2433, "lr": 9.436351983745121e-07, "epoch": 3.712630433390948, "percentage": 74.25, "elapsed_time": "2:32:07", "remaining_time": "0:52:44", "throughput": 19997.33, "total_tokens": 182520832} +{"current_steps": 58000, "total_steps": 78105, "loss": 0.1167, "lr": 9.431980452542333e-07, "epoch": 3.712950515331925, "percentage": 74.26, "elapsed_time": "2:32:07", "remaining_time": "0:52:44", "throughput": 19997.6, "total_tokens": 182537216} +{"current_steps": 58005, "total_steps": 78105, "loss": 0.1445, "lr": 9.427609698725349e-07, "epoch": 3.713270597272902, "percentage": 74.27, "elapsed_time": "2:32:08", "remaining_time": "0:52:43", "throughput": 19997.8, "total_tokens": 182552320} +{"current_steps": 58010, "total_steps": 78105, "loss": 0.1567, "lr": 9.423239722512422e-07, "epoch": 3.713590679213879, "percentage": 74.27, "elapsed_time": "2:32:09", "remaining_time": "0:52:42", "throughput": 19998.07, "total_tokens": 182568576} +{"current_steps": 58015, "total_steps": 78105, "loss": 0.1741, "lr": 9.418870524121768e-07, "epoch": 3.713910761154856, "percentage": 74.28, "elapsed_time": "2:32:10", "remaining_time": "0:52:41", "throughput": 19998.36, "total_tokens": 182585408} +{"current_steps": 58020, "total_steps": 78105, "loss": 0.1087, "lr": 9.414502103771562e-07, "epoch": 3.7142308430958324, "percentage": 74.28, "elapsed_time": "2:32:10", "remaining_time": "0:52:40", "throughput": 19998.58, "total_tokens": 182601024} +{"current_steps": 58025, "total_steps": 78105, "loss": 0.1428, "lr": 9.410134461679935e-07, "epoch": 3.7145509250368094, "percentage": 74.29, "elapsed_time": "2:32:11", "remaining_time": "0:52:39", "throughput": 19998.78, "total_tokens": 182615936} +{"current_steps": 58030, "total_steps": 78105, "loss": 0.2219, "lr": 9.40576759806499e-07, "epoch": 3.7148710069777864, "percentage": 74.3, "elapsed_time": "2:32:12", "remaining_time": "0:52:39", "throughput": 19999.09, "total_tokens": 182632960} +{"current_steps": 58035, "total_steps": 78105, "loss": 0.1462, "lr": 9.401401513144778e-07, "epoch": 3.715191088918763, "percentage": 74.3, "elapsed_time": "2:32:12", "remaining_time": "0:52:38", "throughput": 19999.34, "total_tokens": 182649216} +{"current_steps": 58040, "total_steps": 78105, "loss": 0.1371, "lr": 9.397036207137339e-07, "epoch": 3.71551117085974, "percentage": 74.31, "elapsed_time": "2:32:13", "remaining_time": "0:52:37", "throughput": 19999.54, "total_tokens": 182664000} +{"current_steps": 58045, "total_steps": 78105, "loss": 0.2014, "lr": 9.392671680260623e-07, "epoch": 3.715831252800717, "percentage": 74.32, "elapsed_time": "2:32:14", "remaining_time": "0:52:36", "throughput": 19999.76, "total_tokens": 182679296} +{"current_steps": 58050, "total_steps": 78105, "loss": 0.1677, "lr": 9.388307932732596e-07, "epoch": 3.716151334741694, "percentage": 74.32, "elapsed_time": "2:32:14", "remaining_time": "0:52:35", "throughput": 20000.02, "total_tokens": 182695360} +{"current_steps": 58055, "total_steps": 78105, "loss": 0.211, "lr": 9.383944964771153e-07, "epoch": 3.716471416682671, "percentage": 74.33, "elapsed_time": "2:32:15", "remaining_time": "0:52:35", "throughput": 20000.26, "total_tokens": 182711040} +{"current_steps": 58060, "total_steps": 78105, "loss": 0.1923, "lr": 9.379582776594145e-07, "epoch": 3.716791498623648, "percentage": 74.34, "elapsed_time": "2:32:16", "remaining_time": "0:52:34", "throughput": 20000.53, "total_tokens": 182727488} +{"current_steps": 58065, "total_steps": 78105, "loss": 0.2335, "lr": 9.37522136841943e-07, "epoch": 3.7171115805646244, "percentage": 74.34, "elapsed_time": "2:32:16", "remaining_time": "0:52:33", "throughput": 20000.75, "total_tokens": 182742656} +{"current_steps": 58070, "total_steps": 78105, "loss": 0.1594, "lr": 9.370860740464757e-07, "epoch": 3.7174316625056014, "percentage": 74.35, "elapsed_time": "2:32:17", "remaining_time": "0:52:32", "throughput": 20001.0, "total_tokens": 182758528} +{"current_steps": 58075, "total_steps": 78105, "loss": 0.1927, "lr": 9.366500892947905e-07, "epoch": 3.7177517444465784, "percentage": 74.36, "elapsed_time": "2:32:18", "remaining_time": "0:52:31", "throughput": 20001.29, "total_tokens": 182775104} +{"current_steps": 58080, "total_steps": 78105, "loss": 0.2175, "lr": 9.362141826086549e-07, "epoch": 3.718071826387555, "percentage": 74.36, "elapsed_time": "2:32:18", "remaining_time": "0:52:30", "throughput": 20001.48, "total_tokens": 182789568} +{"current_steps": 58085, "total_steps": 78105, "loss": 0.1658, "lr": 9.357783540098383e-07, "epoch": 3.718391908328532, "percentage": 74.37, "elapsed_time": "2:32:19", "remaining_time": "0:52:30", "throughput": 20001.68, "total_tokens": 182804800} +{"current_steps": 58090, "total_steps": 78105, "loss": 0.1647, "lr": 9.353426035201032e-07, "epoch": 3.718711990269509, "percentage": 74.37, "elapsed_time": "2:32:20", "remaining_time": "0:52:29", "throughput": 20001.97, "total_tokens": 182821696} +{"current_steps": 58095, "total_steps": 78105, "loss": 0.1604, "lr": 9.349069311612086e-07, "epoch": 3.719032072210486, "percentage": 74.38, "elapsed_time": "2:32:20", "remaining_time": "0:52:28", "throughput": 20002.21, "total_tokens": 182837312} +{"current_steps": 58100, "total_steps": 78105, "loss": 0.1641, "lr": 9.344713369549094e-07, "epoch": 3.719352154151463, "percentage": 74.39, "elapsed_time": "2:32:21", "remaining_time": "0:52:27", "throughput": 20002.46, "total_tokens": 182853184} +{"current_steps": 58105, "total_steps": 78105, "loss": 0.1184, "lr": 9.34035820922957e-07, "epoch": 3.71967223609244, "percentage": 74.39, "elapsed_time": "2:32:22", "remaining_time": "0:52:26", "throughput": 20002.74, "total_tokens": 182869824} +{"current_steps": 58110, "total_steps": 78105, "loss": 0.1285, "lr": 9.336003830870991e-07, "epoch": 3.7199923180334165, "percentage": 74.4, "elapsed_time": "2:32:22", "remaining_time": "0:52:25", "throughput": 20002.99, "total_tokens": 182886016} +{"current_steps": 58115, "total_steps": 78105, "loss": 0.1813, "lr": 9.331650234690778e-07, "epoch": 3.7203123999743934, "percentage": 74.41, "elapsed_time": "2:32:23", "remaining_time": "0:52:25", "throughput": 20003.26, "total_tokens": 182902336} +{"current_steps": 58120, "total_steps": 78105, "loss": 0.1766, "lr": 9.32729742090635e-07, "epoch": 3.7206324819153704, "percentage": 74.41, "elapsed_time": "2:32:24", "remaining_time": "0:52:24", "throughput": 20003.54, "total_tokens": 182919040} +{"current_steps": 58125, "total_steps": 78105, "loss": 0.1951, "lr": 9.322945389735053e-07, "epoch": 3.720952563856347, "percentage": 74.42, "elapsed_time": "2:32:24", "remaining_time": "0:52:23", "throughput": 20003.77, "total_tokens": 182934208} +{"current_steps": 58130, "total_steps": 78105, "loss": 0.1802, "lr": 9.318594141394202e-07, "epoch": 3.721272645797324, "percentage": 74.43, "elapsed_time": "2:32:25", "remaining_time": "0:52:22", "throughput": 20004.01, "total_tokens": 182949888} +{"current_steps": 58135, "total_steps": 78105, "loss": 0.1623, "lr": 9.314243676101082e-07, "epoch": 3.721592727738301, "percentage": 74.43, "elapsed_time": "2:32:26", "remaining_time": "0:52:21", "throughput": 20004.26, "total_tokens": 182966080} +{"current_steps": 58140, "total_steps": 78105, "loss": 0.165, "lr": 9.309893994072924e-07, "epoch": 3.721912809679278, "percentage": 74.44, "elapsed_time": "2:32:27", "remaining_time": "0:52:21", "throughput": 20004.45, "total_tokens": 182980928} +{"current_steps": 58145, "total_steps": 78105, "loss": 0.1839, "lr": 9.305545095526927e-07, "epoch": 3.722232891620255, "percentage": 74.44, "elapsed_time": "2:32:27", "remaining_time": "0:52:20", "throughput": 20004.68, "total_tokens": 182996224} +{"current_steps": 58150, "total_steps": 78105, "loss": 0.1195, "lr": 9.301196980680277e-07, "epoch": 3.722552973561232, "percentage": 74.45, "elapsed_time": "2:32:28", "remaining_time": "0:52:19", "throughput": 20004.88, "total_tokens": 183011008} +{"current_steps": 58155, "total_steps": 78105, "loss": 0.1878, "lr": 9.296849649750059e-07, "epoch": 3.7228730555022085, "percentage": 74.46, "elapsed_time": "2:32:29", "remaining_time": "0:52:18", "throughput": 20005.14, "total_tokens": 183027136} +{"current_steps": 58160, "total_steps": 78105, "loss": 0.2403, "lr": 9.292503102953393e-07, "epoch": 3.7231931374431855, "percentage": 74.46, "elapsed_time": "2:32:29", "remaining_time": "0:52:17", "throughput": 20005.45, "total_tokens": 183044736} +{"current_steps": 58165, "total_steps": 78105, "loss": 0.1359, "lr": 9.288157340507287e-07, "epoch": 3.7235132193841625, "percentage": 74.47, "elapsed_time": "2:32:30", "remaining_time": "0:52:17", "throughput": 20006.03, "total_tokens": 183073088} +{"current_steps": 58170, "total_steps": 78105, "loss": 0.1782, "lr": 9.283812362628772e-07, "epoch": 3.723833301325139, "percentage": 74.48, "elapsed_time": "2:32:31", "remaining_time": "0:52:16", "throughput": 20006.24, "total_tokens": 183088192} +{"current_steps": 58175, "total_steps": 78105, "loss": 0.2739, "lr": 9.279468169534803e-07, "epoch": 3.724153383266116, "percentage": 74.48, "elapsed_time": "2:32:32", "remaining_time": "0:52:15", "throughput": 20006.45, "total_tokens": 183103616} +{"current_steps": 58180, "total_steps": 78105, "loss": 0.1555, "lr": 9.275124761442303e-07, "epoch": 3.724473465207093, "percentage": 74.49, "elapsed_time": "2:32:32", "remaining_time": "0:52:14", "throughput": 20006.7, "total_tokens": 183119680} +{"current_steps": 58185, "total_steps": 78105, "loss": 0.1637, "lr": 9.270782138568179e-07, "epoch": 3.72479354714807, "percentage": 74.5, "elapsed_time": "2:32:33", "remaining_time": "0:52:13", "throughput": 20006.96, "total_tokens": 183135744} +{"current_steps": 58190, "total_steps": 78105, "loss": 0.2175, "lr": 9.266440301129248e-07, "epoch": 3.725113629089047, "percentage": 74.5, "elapsed_time": "2:32:34", "remaining_time": "0:52:12", "throughput": 20007.21, "total_tokens": 183152000} +{"current_steps": 58195, "total_steps": 78105, "loss": 0.1934, "lr": 9.262099249342344e-07, "epoch": 3.7254337110300235, "percentage": 74.51, "elapsed_time": "2:32:34", "remaining_time": "0:52:12", "throughput": 20007.43, "total_tokens": 183167232} +{"current_steps": 58200, "total_steps": 78105, "loss": 0.1466, "lr": 9.257758983424228e-07, "epoch": 3.7257537929710005, "percentage": 74.52, "elapsed_time": "2:32:35", "remaining_time": "0:52:11", "throughput": 20007.64, "total_tokens": 183182464} +{"current_steps": 58205, "total_steps": 78105, "loss": 0.1633, "lr": 9.253419503591632e-07, "epoch": 3.7260738749119775, "percentage": 74.52, "elapsed_time": "2:32:36", "remaining_time": "0:52:10", "throughput": 20007.89, "total_tokens": 183198464} +{"current_steps": 58210, "total_steps": 78105, "loss": 0.2352, "lr": 9.249080810061243e-07, "epoch": 3.7263939568529545, "percentage": 74.53, "elapsed_time": "2:32:36", "remaining_time": "0:52:09", "throughput": 20008.13, "total_tokens": 183214336} +{"current_steps": 58215, "total_steps": 78105, "loss": 0.1492, "lr": 9.244742903049714e-07, "epoch": 3.726714038793931, "percentage": 74.53, "elapsed_time": "2:32:37", "remaining_time": "0:52:08", "throughput": 20008.39, "total_tokens": 183229952} +{"current_steps": 58220, "total_steps": 78105, "loss": 0.193, "lr": 9.240405782773659e-07, "epoch": 3.727034120734908, "percentage": 74.54, "elapsed_time": "2:32:38", "remaining_time": "0:52:08", "throughput": 20008.6, "total_tokens": 183245120} +{"current_steps": 58225, "total_steps": 78105, "loss": 0.1448, "lr": 9.23606944944965e-07, "epoch": 3.727354202675885, "percentage": 74.55, "elapsed_time": "2:32:39", "remaining_time": "0:52:07", "throughput": 20008.85, "total_tokens": 183261248} +{"current_steps": 58230, "total_steps": 78105, "loss": 0.3069, "lr": 9.231733903294213e-07, "epoch": 3.727674284616862, "percentage": 74.55, "elapsed_time": "2:32:39", "remaining_time": "0:52:06", "throughput": 20009.06, "total_tokens": 183276352} +{"current_steps": 58235, "total_steps": 78105, "loss": 0.1813, "lr": 9.22739914452386e-07, "epoch": 3.727994366557839, "percentage": 74.56, "elapsed_time": "2:32:40", "remaining_time": "0:52:05", "throughput": 20009.27, "total_tokens": 183291072} +{"current_steps": 58240, "total_steps": 78105, "loss": 0.1631, "lr": 9.223065173355039e-07, "epoch": 3.7283144484988155, "percentage": 74.57, "elapsed_time": "2:32:40", "remaining_time": "0:52:04", "throughput": 20009.52, "total_tokens": 183306944} +{"current_steps": 58245, "total_steps": 78105, "loss": 0.1864, "lr": 9.218731990004162e-07, "epoch": 3.7286345304397925, "percentage": 74.57, "elapsed_time": "2:32:41", "remaining_time": "0:52:03", "throughput": 20009.72, "total_tokens": 183321664} +{"current_steps": 58250, "total_steps": 78105, "loss": 0.2093, "lr": 9.214399594687609e-07, "epoch": 3.7289546123807695, "percentage": 74.58, "elapsed_time": "2:32:42", "remaining_time": "0:52:03", "throughput": 20009.95, "total_tokens": 183336896} +{"current_steps": 58255, "total_steps": 78105, "loss": 0.2773, "lr": 9.210067987621715e-07, "epoch": 3.7292746943217465, "percentage": 74.59, "elapsed_time": "2:32:43", "remaining_time": "0:52:02", "throughput": 20010.25, "total_tokens": 183354176} +{"current_steps": 58260, "total_steps": 78105, "loss": 0.1509, "lr": 9.205737169022777e-07, "epoch": 3.729594776262723, "percentage": 74.59, "elapsed_time": "2:32:43", "remaining_time": "0:52:01", "throughput": 20010.49, "total_tokens": 183370304} +{"current_steps": 58265, "total_steps": 78105, "loss": 0.1581, "lr": 9.201407139107049e-07, "epoch": 3.7299148582037, "percentage": 74.6, "elapsed_time": "2:32:44", "remaining_time": "0:52:00", "throughput": 20010.65, "total_tokens": 183384000} +{"current_steps": 58270, "total_steps": 78105, "loss": 0.1703, "lr": 9.197077898090776e-07, "epoch": 3.730234940144677, "percentage": 74.6, "elapsed_time": "2:32:45", "remaining_time": "0:51:59", "throughput": 20010.95, "total_tokens": 183401344} +{"current_steps": 58275, "total_steps": 78105, "loss": 0.2102, "lr": 9.1927494461901e-07, "epoch": 3.730555022085654, "percentage": 74.61, "elapsed_time": "2:32:45", "remaining_time": "0:51:58", "throughput": 20011.24, "total_tokens": 183417920} +{"current_steps": 58280, "total_steps": 78105, "loss": 0.1473, "lr": 9.188421783621184e-07, "epoch": 3.730875104026631, "percentage": 74.62, "elapsed_time": "2:32:46", "remaining_time": "0:51:58", "throughput": 20011.61, "total_tokens": 183436544} +{"current_steps": 58285, "total_steps": 78105, "loss": 0.2144, "lr": 9.18409491060013e-07, "epoch": 3.7311951859676076, "percentage": 74.62, "elapsed_time": "2:32:47", "remaining_time": "0:51:57", "throughput": 20011.82, "total_tokens": 183451520} +{"current_steps": 58290, "total_steps": 78105, "loss": 0.141, "lr": 9.179768827342983e-07, "epoch": 3.7315152679085846, "percentage": 74.63, "elapsed_time": "2:32:47", "remaining_time": "0:51:56", "throughput": 20012.11, "total_tokens": 183468352} +{"current_steps": 58295, "total_steps": 78105, "loss": 0.1781, "lr": 9.175443534065795e-07, "epoch": 3.7318353498495616, "percentage": 74.64, "elapsed_time": "2:32:48", "remaining_time": "0:51:55", "throughput": 20012.35, "total_tokens": 183483648} +{"current_steps": 58300, "total_steps": 78105, "loss": 0.142, "lr": 9.17111903098451e-07, "epoch": 3.732155431790538, "percentage": 74.64, "elapsed_time": "2:32:49", "remaining_time": "0:51:54", "throughput": 20012.57, "total_tokens": 183499264} +{"current_steps": 58305, "total_steps": 78105, "loss": 0.134, "lr": 9.166795318315111e-07, "epoch": 3.732475513731515, "percentage": 74.65, "elapsed_time": "2:32:49", "remaining_time": "0:51:54", "throughput": 20012.8, "total_tokens": 183514816} +{"current_steps": 58310, "total_steps": 78105, "loss": 0.1455, "lr": 9.162472396273461e-07, "epoch": 3.732795595672492, "percentage": 74.66, "elapsed_time": "2:32:50", "remaining_time": "0:51:53", "throughput": 20013.11, "total_tokens": 183531840} +{"current_steps": 58315, "total_steps": 78105, "loss": 0.1455, "lr": 9.158150265075458e-07, "epoch": 3.733115677613469, "percentage": 74.66, "elapsed_time": "2:32:51", "remaining_time": "0:51:52", "throughput": 20013.35, "total_tokens": 183547712} +{"current_steps": 58320, "total_steps": 78105, "loss": 0.1859, "lr": 9.153828924936911e-07, "epoch": 3.733435759554446, "percentage": 74.67, "elapsed_time": "2:32:51", "remaining_time": "0:51:51", "throughput": 20013.59, "total_tokens": 183563136} +{"current_steps": 58325, "total_steps": 78105, "loss": 0.1411, "lr": 9.149508376073607e-07, "epoch": 3.733755841495423, "percentage": 74.68, "elapsed_time": "2:32:52", "remaining_time": "0:51:50", "throughput": 20013.79, "total_tokens": 183578176} +{"current_steps": 58330, "total_steps": 78105, "loss": 0.1877, "lr": 9.145188618701292e-07, "epoch": 3.7340759234363996, "percentage": 74.68, "elapsed_time": "2:32:53", "remaining_time": "0:51:49", "throughput": 20014.02, "total_tokens": 183593728} +{"current_steps": 58335, "total_steps": 78105, "loss": 0.2938, "lr": 9.140869653035675e-07, "epoch": 3.7343960053773766, "percentage": 74.69, "elapsed_time": "2:32:53", "remaining_time": "0:51:49", "throughput": 20014.21, "total_tokens": 183608192} +{"current_steps": 58340, "total_steps": 78105, "loss": 0.1158, "lr": 9.136551479292419e-07, "epoch": 3.7347160873183536, "percentage": 74.69, "elapsed_time": "2:32:54", "remaining_time": "0:51:48", "throughput": 20014.44, "total_tokens": 183623616} +{"current_steps": 58345, "total_steps": 78105, "loss": 0.2017, "lr": 9.132234097687146e-07, "epoch": 3.73503616925933, "percentage": 74.7, "elapsed_time": "2:32:55", "remaining_time": "0:51:47", "throughput": 20014.68, "total_tokens": 183639424} +{"current_steps": 58350, "total_steps": 78105, "loss": 0.1849, "lr": 9.127917508435455e-07, "epoch": 3.735356251200307, "percentage": 74.71, "elapsed_time": "2:32:55", "remaining_time": "0:51:46", "throughput": 20014.92, "total_tokens": 183655104} +{"current_steps": 58355, "total_steps": 78105, "loss": 0.1435, "lr": 9.123601711752891e-07, "epoch": 3.735676333141284, "percentage": 74.71, "elapsed_time": "2:32:56", "remaining_time": "0:51:45", "throughput": 20015.18, "total_tokens": 183671040} +{"current_steps": 58360, "total_steps": 78105, "loss": 0.1367, "lr": 9.11928670785496e-07, "epoch": 3.735996415082261, "percentage": 74.72, "elapsed_time": "2:32:57", "remaining_time": "0:51:44", "throughput": 20015.37, "total_tokens": 183685504} +{"current_steps": 58365, "total_steps": 78105, "loss": 0.1668, "lr": 9.114972496957133e-07, "epoch": 3.736316497023238, "percentage": 74.73, "elapsed_time": "2:32:57", "remaining_time": "0:51:44", "throughput": 20015.62, "total_tokens": 183701568} +{"current_steps": 58370, "total_steps": 78105, "loss": 0.1892, "lr": 9.110659079274837e-07, "epoch": 3.736636578964215, "percentage": 74.73, "elapsed_time": "2:32:58", "remaining_time": "0:51:43", "throughput": 20015.88, "total_tokens": 183717760} +{"current_steps": 58375, "total_steps": 78105, "loss": 0.1631, "lr": 9.106346455023452e-07, "epoch": 3.7369566609051916, "percentage": 74.74, "elapsed_time": "2:32:59", "remaining_time": "0:51:42", "throughput": 20016.11, "total_tokens": 183733376} +{"current_steps": 58380, "total_steps": 78105, "loss": 0.1023, "lr": 9.102034624418354e-07, "epoch": 3.7372767428461686, "percentage": 74.75, "elapsed_time": "2:32:59", "remaining_time": "0:51:41", "throughput": 20016.31, "total_tokens": 183748416} +{"current_steps": 58385, "total_steps": 78105, "loss": 0.1932, "lr": 9.097723587674823e-07, "epoch": 3.7375968247871456, "percentage": 74.75, "elapsed_time": "2:33:00", "remaining_time": "0:51:40", "throughput": 20016.55, "total_tokens": 183764032} +{"current_steps": 58390, "total_steps": 78105, "loss": 0.1381, "lr": 9.093413345008159e-07, "epoch": 3.737916906728122, "percentage": 74.76, "elapsed_time": "2:33:01", "remaining_time": "0:51:39", "throughput": 20016.74, "total_tokens": 183779008} +{"current_steps": 58395, "total_steps": 78105, "loss": 0.2475, "lr": 9.089103896633561e-07, "epoch": 3.738236988669099, "percentage": 74.76, "elapsed_time": "2:33:01", "remaining_time": "0:51:39", "throughput": 20016.96, "total_tokens": 183794048} +{"current_steps": 58400, "total_steps": 78105, "loss": 0.1339, "lr": 9.084795242766248e-07, "epoch": 3.738557070610076, "percentage": 74.77, "elapsed_time": "2:33:02", "remaining_time": "0:51:38", "throughput": 20017.19, "total_tokens": 183809664} +{"current_steps": 58405, "total_steps": 78105, "loss": 0.135, "lr": 9.080487383621358e-07, "epoch": 3.738877152551053, "percentage": 74.78, "elapsed_time": "2:33:03", "remaining_time": "0:51:37", "throughput": 20017.44, "total_tokens": 183825472} +{"current_steps": 58410, "total_steps": 78105, "loss": 0.1617, "lr": 9.076180319414002e-07, "epoch": 3.73919723449203, "percentage": 74.78, "elapsed_time": "2:33:03", "remaining_time": "0:51:36", "throughput": 20017.69, "total_tokens": 183841664} +{"current_steps": 58415, "total_steps": 78105, "loss": 0.2482, "lr": 9.071874050359272e-07, "epoch": 3.739517316433007, "percentage": 74.79, "elapsed_time": "2:33:04", "remaining_time": "0:51:35", "throughput": 20017.92, "total_tokens": 183857088} +{"current_steps": 58420, "total_steps": 78105, "loss": 0.2929, "lr": 9.067568576672167e-07, "epoch": 3.7398373983739837, "percentage": 74.8, "elapsed_time": "2:33:05", "remaining_time": "0:51:35", "throughput": 20018.15, "total_tokens": 183872384} +{"current_steps": 58425, "total_steps": 78105, "loss": 0.1294, "lr": 9.063263898567709e-07, "epoch": 3.7401574803149606, "percentage": 74.8, "elapsed_time": "2:33:05", "remaining_time": "0:51:34", "throughput": 20018.38, "total_tokens": 183887680} +{"current_steps": 58430, "total_steps": 78105, "loss": 0.1621, "lr": 9.05896001626084e-07, "epoch": 3.7404775622559376, "percentage": 74.81, "elapsed_time": "2:33:06", "remaining_time": "0:51:33", "throughput": 20018.6, "total_tokens": 183903424} +{"current_steps": 58435, "total_steps": 78105, "loss": 0.1688, "lr": 9.054656929966474e-07, "epoch": 3.740797644196914, "percentage": 74.82, "elapsed_time": "2:33:07", "remaining_time": "0:51:32", "throughput": 20018.85, "total_tokens": 183919296} +{"current_steps": 58440, "total_steps": 78105, "loss": 0.1352, "lr": 9.050354639899484e-07, "epoch": 3.741117726137891, "percentage": 74.82, "elapsed_time": "2:33:07", "remaining_time": "0:51:31", "throughput": 20019.03, "total_tokens": 183933760} +{"current_steps": 58445, "total_steps": 78105, "loss": 0.1704, "lr": 9.046053146274702e-07, "epoch": 3.741437808078868, "percentage": 74.83, "elapsed_time": "2:33:08", "remaining_time": "0:51:30", "throughput": 20019.25, "total_tokens": 183948992} +{"current_steps": 58450, "total_steps": 78105, "loss": 0.2059, "lr": 9.041752449306928e-07, "epoch": 3.741757890019845, "percentage": 74.84, "elapsed_time": "2:33:09", "remaining_time": "0:51:30", "throughput": 20019.52, "total_tokens": 183965312} +{"current_steps": 58455, "total_steps": 78105, "loss": 0.113, "lr": 9.037452549210915e-07, "epoch": 3.742077971960822, "percentage": 74.84, "elapsed_time": "2:33:09", "remaining_time": "0:51:29", "throughput": 20019.74, "total_tokens": 183980480} +{"current_steps": 58460, "total_steps": 78105, "loss": 0.1869, "lr": 9.033153446201365e-07, "epoch": 3.742398053901799, "percentage": 74.85, "elapsed_time": "2:33:10", "remaining_time": "0:51:28", "throughput": 20019.97, "total_tokens": 183996096} +{"current_steps": 58465, "total_steps": 78105, "loss": 0.1666, "lr": 9.02885514049297e-07, "epoch": 3.7427181358427757, "percentage": 74.85, "elapsed_time": "2:33:11", "remaining_time": "0:51:27", "throughput": 20020.18, "total_tokens": 184011264} +{"current_steps": 58470, "total_steps": 78105, "loss": 0.1623, "lr": 9.024557632300362e-07, "epoch": 3.7430382177837527, "percentage": 74.86, "elapsed_time": "2:33:11", "remaining_time": "0:51:26", "throughput": 20020.41, "total_tokens": 184026688} +{"current_steps": 58475, "total_steps": 78105, "loss": 0.1249, "lr": 9.020260921838131e-07, "epoch": 3.7433582997247297, "percentage": 74.87, "elapsed_time": "2:33:12", "remaining_time": "0:51:25", "throughput": 20020.61, "total_tokens": 184042048} +{"current_steps": 58480, "total_steps": 78105, "loss": 0.1691, "lr": 9.015965009320834e-07, "epoch": 3.743678381665706, "percentage": 74.87, "elapsed_time": "2:33:13", "remaining_time": "0:51:25", "throughput": 20020.83, "total_tokens": 184057472} +{"current_steps": 58485, "total_steps": 78105, "loss": 0.1338, "lr": 9.011669894962988e-07, "epoch": 3.743998463606683, "percentage": 74.88, "elapsed_time": "2:33:13", "remaining_time": "0:51:24", "throughput": 20021.07, "total_tokens": 184073344} +{"current_steps": 58490, "total_steps": 78105, "loss": 0.1715, "lr": 9.007375578979066e-07, "epoch": 3.74431854554766, "percentage": 74.89, "elapsed_time": "2:33:14", "remaining_time": "0:51:23", "throughput": 20021.3, "total_tokens": 184088704} +{"current_steps": 58495, "total_steps": 78105, "loss": 0.1331, "lr": 9.003082061583498e-07, "epoch": 3.744638627488637, "percentage": 74.89, "elapsed_time": "2:33:15", "remaining_time": "0:51:22", "throughput": 20021.53, "total_tokens": 184104064} +{"current_steps": 58500, "total_steps": 78105, "loss": 0.1823, "lr": 8.998789342990702e-07, "epoch": 3.744958709429614, "percentage": 74.9, "elapsed_time": "2:33:15", "remaining_time": "0:51:21", "throughput": 20021.74, "total_tokens": 184119232} +{"current_steps": 58505, "total_steps": 78105, "loss": 0.1629, "lr": 8.994497423415005e-07, "epoch": 3.7452787913705907, "percentage": 74.91, "elapsed_time": "2:33:16", "remaining_time": "0:51:21", "throughput": 20021.99, "total_tokens": 184135360} +{"current_steps": 58510, "total_steps": 78105, "loss": 0.1444, "lr": 8.990206303070742e-07, "epoch": 3.7455988733115677, "percentage": 74.91, "elapsed_time": "2:33:17", "remaining_time": "0:51:20", "throughput": 20022.28, "total_tokens": 184152192} +{"current_steps": 58515, "total_steps": 78105, "loss": 0.1326, "lr": 8.985915982172186e-07, "epoch": 3.7459189552525447, "percentage": 74.92, "elapsed_time": "2:33:18", "remaining_time": "0:51:19", "throughput": 20022.52, "total_tokens": 184168256} +{"current_steps": 58520, "total_steps": 78105, "loss": 0.2129, "lr": 8.981626460933562e-07, "epoch": 3.7462390371935217, "percentage": 74.92, "elapsed_time": "2:33:18", "remaining_time": "0:51:18", "throughput": 20022.76, "total_tokens": 184184320} +{"current_steps": 58525, "total_steps": 78105, "loss": 0.1328, "lr": 8.977337739569094e-07, "epoch": 3.7465591191344982, "percentage": 74.93, "elapsed_time": "2:33:19", "remaining_time": "0:51:17", "throughput": 20022.97, "total_tokens": 184199552} +{"current_steps": 58530, "total_steps": 78105, "loss": 0.0902, "lr": 8.973049818292903e-07, "epoch": 3.7468792010754752, "percentage": 74.94, "elapsed_time": "2:33:20", "remaining_time": "0:51:16", "throughput": 20023.21, "total_tokens": 184215104} +{"current_steps": 58535, "total_steps": 78105, "loss": 0.2247, "lr": 8.968762697319141e-07, "epoch": 3.747199283016452, "percentage": 74.94, "elapsed_time": "2:33:20", "remaining_time": "0:51:16", "throughput": 20023.44, "total_tokens": 184230592} +{"current_steps": 58540, "total_steps": 78105, "loss": 0.1407, "lr": 8.964476376861847e-07, "epoch": 3.747519364957429, "percentage": 74.95, "elapsed_time": "2:33:21", "remaining_time": "0:51:15", "throughput": 20023.7, "total_tokens": 184246848} +{"current_steps": 58545, "total_steps": 78105, "loss": 0.2023, "lr": 8.960190857135088e-07, "epoch": 3.747839446898406, "percentage": 74.96, "elapsed_time": "2:33:22", "remaining_time": "0:51:14", "throughput": 20023.93, "total_tokens": 184262656} +{"current_steps": 58550, "total_steps": 78105, "loss": 0.2425, "lr": 8.955906138352846e-07, "epoch": 3.7481595288393827, "percentage": 74.96, "elapsed_time": "2:33:22", "remaining_time": "0:51:13", "throughput": 20024.23, "total_tokens": 184279744} +{"current_steps": 58555, "total_steps": 78105, "loss": 0.2124, "lr": 8.951622220729084e-07, "epoch": 3.7484796107803597, "percentage": 74.97, "elapsed_time": "2:33:23", "remaining_time": "0:51:12", "throughput": 20024.41, "total_tokens": 184294336} +{"current_steps": 58560, "total_steps": 78105, "loss": 0.2263, "lr": 8.947339104477712e-07, "epoch": 3.7487996927213367, "percentage": 74.98, "elapsed_time": "2:33:24", "remaining_time": "0:51:11", "throughput": 20024.67, "total_tokens": 184310336} +{"current_steps": 58565, "total_steps": 78105, "loss": 0.1727, "lr": 8.943056789812612e-07, "epoch": 3.7491197746623133, "percentage": 74.98, "elapsed_time": "2:33:24", "remaining_time": "0:51:11", "throughput": 20024.84, "total_tokens": 184324864} +{"current_steps": 58570, "total_steps": 78105, "loss": 0.1008, "lr": 8.938775276947618e-07, "epoch": 3.7494398566032903, "percentage": 74.99, "elapsed_time": "2:33:25", "remaining_time": "0:51:10", "throughput": 20025.08, "total_tokens": 184340800} +{"current_steps": 58575, "total_steps": 78105, "loss": 0.1586, "lr": 8.934494566096527e-07, "epoch": 3.7497599385442673, "percentage": 75.0, "elapsed_time": "2:33:26", "remaining_time": "0:51:09", "throughput": 20025.31, "total_tokens": 184356096} +{"current_steps": 58580, "total_steps": 78105, "loss": 0.1676, "lr": 8.930214657473085e-07, "epoch": 3.7500800204852442, "percentage": 75.0, "elapsed_time": "2:33:26", "remaining_time": "0:51:08", "throughput": 20025.54, "total_tokens": 184371968} +{"current_steps": 58585, "total_steps": 78105, "loss": 0.1847, "lr": 8.925935551291029e-07, "epoch": 3.7504001024262212, "percentage": 75.01, "elapsed_time": "2:33:27", "remaining_time": "0:51:07", "throughput": 20025.76, "total_tokens": 184387328} +{"current_steps": 58590, "total_steps": 78105, "loss": 0.1324, "lr": 8.921657247764027e-07, "epoch": 3.7507201843671982, "percentage": 75.01, "elapsed_time": "2:33:28", "remaining_time": "0:51:07", "throughput": 20025.98, "total_tokens": 184402752} +{"current_steps": 58590, "total_steps": 78105, "eval_loss": 0.5466420650482178, "epoch": 3.7507201843671982, "percentage": 75.01, "elapsed_time": "2:34:19", "remaining_time": "0:51:24", "throughput": 19915.64, "total_tokens": 184402752} +{"current_steps": 58595, "total_steps": 78105, "loss": 0.1472, "lr": 8.917379747105709e-07, "epoch": 3.7510402663081748, "percentage": 75.02, "elapsed_time": "2:34:53", "remaining_time": "0:51:34", "throughput": 19843.04, "total_tokens": 184417984} +{"current_steps": 58600, "total_steps": 78105, "loss": 0.1734, "lr": 8.913103049529676e-07, "epoch": 3.7513603482491518, "percentage": 75.03, "elapsed_time": "2:34:54", "remaining_time": "0:51:33", "throughput": 19843.3, "total_tokens": 184434176} +{"current_steps": 58605, "total_steps": 78105, "loss": 0.1154, "lr": 8.908827155249475e-07, "epoch": 3.7516804301901288, "percentage": 75.03, "elapsed_time": "2:34:55", "remaining_time": "0:51:32", "throughput": 19843.53, "total_tokens": 184449792} +{"current_steps": 58610, "total_steps": 78105, "loss": 0.1888, "lr": 8.904552064478647e-07, "epoch": 3.7520005121311053, "percentage": 75.04, "elapsed_time": "2:34:55", "remaining_time": "0:51:32", "throughput": 19843.78, "total_tokens": 184465472} +{"current_steps": 58615, "total_steps": 78105, "loss": 0.1765, "lr": 8.900277777430636e-07, "epoch": 3.7523205940720823, "percentage": 75.05, "elapsed_time": "2:34:56", "remaining_time": "0:51:31", "throughput": 19844.0, "total_tokens": 184480832} +{"current_steps": 58620, "total_steps": 78105, "loss": 0.1907, "lr": 8.896004294318906e-07, "epoch": 3.7526406760130593, "percentage": 75.05, "elapsed_time": "2:34:57", "remaining_time": "0:51:30", "throughput": 19844.23, "total_tokens": 184496000} +{"current_steps": 58625, "total_steps": 78105, "loss": 0.1515, "lr": 8.891731615356822e-07, "epoch": 3.7529607579540363, "percentage": 75.06, "elapsed_time": "2:34:57", "remaining_time": "0:51:29", "throughput": 19844.44, "total_tokens": 184510720} +{"current_steps": 58630, "total_steps": 78105, "loss": 0.16, "lr": 8.887459740757767e-07, "epoch": 3.7532808398950133, "percentage": 75.07, "elapsed_time": "2:34:58", "remaining_time": "0:51:28", "throughput": 19844.62, "total_tokens": 184525120} +{"current_steps": 58635, "total_steps": 78105, "loss": 0.1823, "lr": 8.883188670735043e-07, "epoch": 3.7536009218359903, "percentage": 75.07, "elapsed_time": "2:34:59", "remaining_time": "0:51:27", "throughput": 19844.85, "total_tokens": 184540160} +{"current_steps": 58640, "total_steps": 78105, "loss": 0.1471, "lr": 8.878918405501921e-07, "epoch": 3.753921003776967, "percentage": 75.08, "elapsed_time": "2:34:59", "remaining_time": "0:51:26", "throughput": 19845.06, "total_tokens": 184554944} +{"current_steps": 58645, "total_steps": 78105, "loss": 0.156, "lr": 8.874648945271658e-07, "epoch": 3.754241085717944, "percentage": 75.08, "elapsed_time": "2:35:00", "remaining_time": "0:51:26", "throughput": 19845.34, "total_tokens": 184571584} +{"current_steps": 58650, "total_steps": 78105, "loss": 0.1775, "lr": 8.870380290257416e-07, "epoch": 3.754561167658921, "percentage": 75.09, "elapsed_time": "2:35:01", "remaining_time": "0:51:25", "throughput": 19845.54, "total_tokens": 184586240} +{"current_steps": 58655, "total_steps": 78105, "loss": 0.1593, "lr": 8.866112440672375e-07, "epoch": 3.7548812495998973, "percentage": 75.1, "elapsed_time": "2:35:01", "remaining_time": "0:51:24", "throughput": 19845.74, "total_tokens": 184600832} +{"current_steps": 58660, "total_steps": 78105, "loss": 0.1339, "lr": 8.86184539672964e-07, "epoch": 3.7552013315408743, "percentage": 75.1, "elapsed_time": "2:35:02", "remaining_time": "0:51:23", "throughput": 19846.0, "total_tokens": 184616960} +{"current_steps": 58665, "total_steps": 78105, "loss": 0.1753, "lr": 8.857579158642285e-07, "epoch": 3.7555214134818513, "percentage": 75.11, "elapsed_time": "2:35:03", "remaining_time": "0:51:22", "throughput": 19846.23, "total_tokens": 184632576} +{"current_steps": 58670, "total_steps": 78105, "loss": 0.1055, "lr": 8.853313726623344e-07, "epoch": 3.7558414954228283, "percentage": 75.12, "elapsed_time": "2:35:03", "remaining_time": "0:51:21", "throughput": 19846.5, "total_tokens": 184648896} +{"current_steps": 58675, "total_steps": 78105, "loss": 0.1847, "lr": 8.849049100885812e-07, "epoch": 3.7561615773638053, "percentage": 75.12, "elapsed_time": "2:35:04", "remaining_time": "0:51:21", "throughput": 19846.72, "total_tokens": 184664256} +{"current_steps": 58680, "total_steps": 78105, "loss": 0.4096, "lr": 8.84478528164264e-07, "epoch": 3.7564816593047823, "percentage": 75.13, "elapsed_time": "2:35:05", "remaining_time": "0:51:20", "throughput": 19846.94, "total_tokens": 184679488} +{"current_steps": 58685, "total_steps": 78105, "loss": 0.1922, "lr": 8.840522269106741e-07, "epoch": 3.756801741245759, "percentage": 75.14, "elapsed_time": "2:35:05", "remaining_time": "0:51:19", "throughput": 19847.18, "total_tokens": 184695424} +{"current_steps": 58690, "total_steps": 78105, "loss": 0.1835, "lr": 8.836260063490978e-07, "epoch": 3.757121823186736, "percentage": 75.14, "elapsed_time": "2:35:06", "remaining_time": "0:51:18", "throughput": 19847.38, "total_tokens": 184710336} +{"current_steps": 58695, "total_steps": 78105, "loss": 0.1775, "lr": 8.831998665008204e-07, "epoch": 3.757441905127713, "percentage": 75.15, "elapsed_time": "2:35:07", "remaining_time": "0:51:17", "throughput": 19847.64, "total_tokens": 184726272} +{"current_steps": 58700, "total_steps": 78105, "loss": 0.1546, "lr": 8.827738073871203e-07, "epoch": 3.7577619870686894, "percentage": 75.16, "elapsed_time": "2:35:07", "remaining_time": "0:51:16", "throughput": 19847.88, "total_tokens": 184741696} +{"current_steps": 58705, "total_steps": 78105, "loss": 0.2376, "lr": 8.823478290292719e-07, "epoch": 3.7580820690096663, "percentage": 75.16, "elapsed_time": "2:35:08", "remaining_time": "0:51:16", "throughput": 19848.08, "total_tokens": 184756416} +{"current_steps": 58710, "total_steps": 78105, "loss": 0.157, "lr": 8.819219314485472e-07, "epoch": 3.7584021509506433, "percentage": 75.17, "elapsed_time": "2:35:09", "remaining_time": "0:51:15", "throughput": 19848.33, "total_tokens": 184772032} +{"current_steps": 58715, "total_steps": 78105, "loss": 0.1521, "lr": 8.814961146662133e-07, "epoch": 3.7587222328916203, "percentage": 75.17, "elapsed_time": "2:35:09", "remaining_time": "0:51:14", "throughput": 19848.62, "total_tokens": 184788736} +{"current_steps": 58720, "total_steps": 78105, "loss": 0.1624, "lr": 8.810703787035324e-07, "epoch": 3.7590423148325973, "percentage": 75.18, "elapsed_time": "2:35:10", "remaining_time": "0:51:13", "throughput": 19848.85, "total_tokens": 184804224} +{"current_steps": 58725, "total_steps": 78105, "loss": 0.1646, "lr": 8.806447235817633e-07, "epoch": 3.7593623967735743, "percentage": 75.19, "elapsed_time": "2:35:11", "remaining_time": "0:51:12", "throughput": 19849.08, "total_tokens": 184819584} +{"current_steps": 58730, "total_steps": 78105, "loss": 0.1719, "lr": 8.802191493221638e-07, "epoch": 3.759682478714551, "percentage": 75.19, "elapsed_time": "2:35:11", "remaining_time": "0:51:11", "throughput": 19849.31, "total_tokens": 184835200} +{"current_steps": 58735, "total_steps": 78105, "loss": 0.1877, "lr": 8.79793655945981e-07, "epoch": 3.760002560655528, "percentage": 75.2, "elapsed_time": "2:35:12", "remaining_time": "0:51:11", "throughput": 19849.54, "total_tokens": 184850816} +{"current_steps": 58740, "total_steps": 78105, "loss": 0.1455, "lr": 8.793682434744647e-07, "epoch": 3.760322642596505, "percentage": 75.21, "elapsed_time": "2:35:13", "remaining_time": "0:51:10", "throughput": 19849.75, "total_tokens": 184866048} +{"current_steps": 58745, "total_steps": 78105, "loss": 0.2206, "lr": 8.789429119288564e-07, "epoch": 3.7606427245374814, "percentage": 75.21, "elapsed_time": "2:35:13", "remaining_time": "0:51:09", "throughput": 19849.95, "total_tokens": 184880960} +{"current_steps": 58750, "total_steps": 78105, "loss": 0.1905, "lr": 8.785176613303955e-07, "epoch": 3.7609628064784584, "percentage": 75.22, "elapsed_time": "2:35:14", "remaining_time": "0:51:08", "throughput": 19850.22, "total_tokens": 184897344} +{"current_steps": 58755, "total_steps": 78105, "loss": 0.162, "lr": 8.780924917003164e-07, "epoch": 3.7612828884194354, "percentage": 75.23, "elapsed_time": "2:35:15", "remaining_time": "0:51:07", "throughput": 19850.39, "total_tokens": 184911808} +{"current_steps": 58760, "total_steps": 78105, "loss": 0.2049, "lr": 8.776674030598494e-07, "epoch": 3.7616029703604124, "percentage": 75.23, "elapsed_time": "2:35:15", "remaining_time": "0:51:06", "throughput": 19850.62, "total_tokens": 184926976} +{"current_steps": 58765, "total_steps": 78105, "loss": 0.1463, "lr": 8.772423954302237e-07, "epoch": 3.7619230523013893, "percentage": 75.24, "elapsed_time": "2:35:16", "remaining_time": "0:51:06", "throughput": 19850.84, "total_tokens": 184942656} +{"current_steps": 58770, "total_steps": 78105, "loss": 0.1993, "lr": 8.768174688326583e-07, "epoch": 3.762243134242366, "percentage": 75.24, "elapsed_time": "2:35:17", "remaining_time": "0:51:05", "throughput": 19851.12, "total_tokens": 184959232} +{"current_steps": 58775, "total_steps": 78105, "loss": 0.1141, "lr": 8.763926232883743e-07, "epoch": 3.762563216183343, "percentage": 75.25, "elapsed_time": "2:35:17", "remaining_time": "0:51:04", "throughput": 19851.35, "total_tokens": 184974592} +{"current_steps": 58780, "total_steps": 78105, "loss": 0.1266, "lr": 8.75967858818586e-07, "epoch": 3.76288329812432, "percentage": 75.26, "elapsed_time": "2:35:18", "remaining_time": "0:51:03", "throughput": 19851.54, "total_tokens": 184989632} +{"current_steps": 58785, "total_steps": 78105, "loss": 0.2549, "lr": 8.755431754445035e-07, "epoch": 3.763203380065297, "percentage": 75.26, "elapsed_time": "2:35:19", "remaining_time": "0:51:02", "throughput": 19851.83, "total_tokens": 185006464} +{"current_steps": 58790, "total_steps": 78105, "loss": 0.1331, "lr": 8.751185731873329e-07, "epoch": 3.7635234620062734, "percentage": 75.27, "elapsed_time": "2:35:20", "remaining_time": "0:51:02", "throughput": 19852.03, "total_tokens": 185021376} +{"current_steps": 58795, "total_steps": 78105, "loss": 0.2821, "lr": 8.746940520682775e-07, "epoch": 3.7638435439472504, "percentage": 75.28, "elapsed_time": "2:35:20", "remaining_time": "0:51:01", "throughput": 19852.23, "total_tokens": 185036032} +{"current_steps": 58800, "total_steps": 78105, "loss": 0.1156, "lr": 8.742696121085343e-07, "epoch": 3.7641636258882274, "percentage": 75.28, "elapsed_time": "2:35:21", "remaining_time": "0:51:00", "throughput": 19852.46, "total_tokens": 185051520} +{"current_steps": 58805, "total_steps": 78105, "loss": 0.1415, "lr": 8.738452533293002e-07, "epoch": 3.7644837078292044, "percentage": 75.29, "elapsed_time": "2:35:22", "remaining_time": "0:50:59", "throughput": 19852.73, "total_tokens": 185067776} +{"current_steps": 58810, "total_steps": 78105, "loss": 0.1482, "lr": 8.73420975751762e-07, "epoch": 3.7648037897701814, "percentage": 75.3, "elapsed_time": "2:35:22", "remaining_time": "0:50:58", "throughput": 19852.93, "total_tokens": 185082944} +{"current_steps": 58815, "total_steps": 78105, "loss": 0.1886, "lr": 8.72996779397109e-07, "epoch": 3.765123871711158, "percentage": 75.3, "elapsed_time": "2:35:23", "remaining_time": "0:50:57", "throughput": 19853.19, "total_tokens": 185098944} +{"current_steps": 58820, "total_steps": 78105, "loss": 0.178, "lr": 8.725726642865215e-07, "epoch": 3.765443953652135, "percentage": 75.31, "elapsed_time": "2:35:24", "remaining_time": "0:50:57", "throughput": 19853.34, "total_tokens": 185112896} +{"current_steps": 58825, "total_steps": 78105, "loss": 0.2024, "lr": 8.721486304411786e-07, "epoch": 3.765764035593112, "percentage": 75.32, "elapsed_time": "2:35:24", "remaining_time": "0:50:56", "throughput": 19853.52, "total_tokens": 185127296} +{"current_steps": 58830, "total_steps": 78105, "loss": 0.1694, "lr": 8.717246778822538e-07, "epoch": 3.766084117534089, "percentage": 75.32, "elapsed_time": "2:35:25", "remaining_time": "0:50:55", "throughput": 19853.75, "total_tokens": 185143040} +{"current_steps": 58835, "total_steps": 78105, "loss": 0.1773, "lr": 8.713008066309164e-07, "epoch": 3.7664041994750654, "percentage": 75.33, "elapsed_time": "2:35:26", "remaining_time": "0:50:54", "throughput": 19854.07, "total_tokens": 185160768} +{"current_steps": 58840, "total_steps": 78105, "loss": 0.0715, "lr": 8.708770167083346e-07, "epoch": 3.7667242814160424, "percentage": 75.33, "elapsed_time": "2:35:26", "remaining_time": "0:50:53", "throughput": 19854.28, "total_tokens": 185175936} +{"current_steps": 58845, "total_steps": 78105, "loss": 0.1279, "lr": 8.704533081356676e-07, "epoch": 3.7670443633570194, "percentage": 75.34, "elapsed_time": "2:35:27", "remaining_time": "0:50:52", "throughput": 19854.48, "total_tokens": 185190720} +{"current_steps": 58850, "total_steps": 78105, "loss": 0.1493, "lr": 8.700296809340756e-07, "epoch": 3.7673644452979964, "percentage": 75.35, "elapsed_time": "2:35:28", "remaining_time": "0:50:52", "throughput": 19854.75, "total_tokens": 185207296} +{"current_steps": 58855, "total_steps": 78105, "loss": 0.2158, "lr": 8.696061351247095e-07, "epoch": 3.7676845272389734, "percentage": 75.35, "elapsed_time": "2:35:28", "remaining_time": "0:50:51", "throughput": 19854.96, "total_tokens": 185222272} +{"current_steps": 58860, "total_steps": 78105, "loss": 0.129, "lr": 8.691826707287215e-07, "epoch": 3.76800460917995, "percentage": 75.36, "elapsed_time": "2:35:29", "remaining_time": "0:50:50", "throughput": 19855.19, "total_tokens": 185237760} +{"current_steps": 58865, "total_steps": 78105, "loss": 0.1215, "lr": 8.687592877672566e-07, "epoch": 3.768324691120927, "percentage": 75.37, "elapsed_time": "2:35:30", "remaining_time": "0:50:49", "throughput": 19855.41, "total_tokens": 185252992} +{"current_steps": 58870, "total_steps": 78105, "loss": 0.204, "lr": 8.683359862614548e-07, "epoch": 3.768644773061904, "percentage": 75.37, "elapsed_time": "2:35:30", "remaining_time": "0:50:48", "throughput": 19855.63, "total_tokens": 185268288} +{"current_steps": 58875, "total_steps": 78105, "loss": 0.1527, "lr": 8.679127662324569e-07, "epoch": 3.7689648550028805, "percentage": 75.38, "elapsed_time": "2:35:31", "remaining_time": "0:50:47", "throughput": 19855.92, "total_tokens": 185285312} +{"current_steps": 58880, "total_steps": 78105, "loss": 0.1456, "lr": 8.674896277013922e-07, "epoch": 3.7692849369438575, "percentage": 75.39, "elapsed_time": "2:35:32", "remaining_time": "0:50:47", "throughput": 19856.17, "total_tokens": 185301312} +{"current_steps": 58885, "total_steps": 78105, "loss": 0.1527, "lr": 8.67066570689393e-07, "epoch": 3.7696050188848345, "percentage": 75.39, "elapsed_time": "2:35:32", "remaining_time": "0:50:46", "throughput": 19856.36, "total_tokens": 185315776} +{"current_steps": 58890, "total_steps": 78105, "loss": 0.114, "lr": 8.666435952175837e-07, "epoch": 3.7699251008258114, "percentage": 75.4, "elapsed_time": "2:35:33", "remaining_time": "0:50:45", "throughput": 19856.63, "total_tokens": 185332032} +{"current_steps": 58895, "total_steps": 78105, "loss": 0.1257, "lr": 8.662207013070853e-07, "epoch": 3.7702451827667884, "percentage": 75.4, "elapsed_time": "2:35:34", "remaining_time": "0:50:44", "throughput": 19856.85, "total_tokens": 185347456} +{"current_steps": 58900, "total_steps": 78105, "loss": 0.169, "lr": 8.65797888979015e-07, "epoch": 3.7705652647077654, "percentage": 75.41, "elapsed_time": "2:35:34", "remaining_time": "0:50:43", "throughput": 19857.13, "total_tokens": 185363904} +{"current_steps": 58905, "total_steps": 78105, "loss": 0.1429, "lr": 8.653751582544859e-07, "epoch": 3.770885346648742, "percentage": 75.42, "elapsed_time": "2:35:35", "remaining_time": "0:50:42", "throughput": 19857.36, "total_tokens": 185379392} +{"current_steps": 58910, "total_steps": 78105, "loss": 0.1909, "lr": 8.649525091546071e-07, "epoch": 3.771205428589719, "percentage": 75.42, "elapsed_time": "2:35:36", "remaining_time": "0:50:42", "throughput": 19857.66, "total_tokens": 185395968} +{"current_steps": 58915, "total_steps": 78105, "loss": 0.2106, "lr": 8.645299417004832e-07, "epoch": 3.771525510530696, "percentage": 75.43, "elapsed_time": "2:35:36", "remaining_time": "0:50:41", "throughput": 19857.88, "total_tokens": 185410944} +{"current_steps": 58920, "total_steps": 78105, "loss": 0.1437, "lr": 8.641074559132142e-07, "epoch": 3.7718455924716725, "percentage": 75.44, "elapsed_time": "2:35:37", "remaining_time": "0:50:40", "throughput": 19858.12, "total_tokens": 185426816} +{"current_steps": 58925, "total_steps": 78105, "loss": 0.2352, "lr": 8.636850518138997e-07, "epoch": 3.7721656744126495, "percentage": 75.44, "elapsed_time": "2:35:38", "remaining_time": "0:50:39", "throughput": 19858.32, "total_tokens": 185441600} +{"current_steps": 58930, "total_steps": 78105, "loss": 0.1437, "lr": 8.632627294236287e-07, "epoch": 3.7724857563536265, "percentage": 75.45, "elapsed_time": "2:35:38", "remaining_time": "0:50:38", "throughput": 19858.52, "total_tokens": 185456192} +{"current_steps": 58935, "total_steps": 78105, "loss": 0.1591, "lr": 8.628404887634926e-07, "epoch": 3.7728058382946035, "percentage": 75.46, "elapsed_time": "2:35:39", "remaining_time": "0:50:37", "throughput": 19858.76, "total_tokens": 185471680} +{"current_steps": 58940, "total_steps": 78105, "loss": 0.1462, "lr": 8.624183298545744e-07, "epoch": 3.7731259202355805, "percentage": 75.46, "elapsed_time": "2:35:40", "remaining_time": "0:50:37", "throughput": 19858.98, "total_tokens": 185486528} +{"current_steps": 58945, "total_steps": 78105, "loss": 0.1322, "lr": 8.619962527179551e-07, "epoch": 3.7734460021765575, "percentage": 75.47, "elapsed_time": "2:35:40", "remaining_time": "0:50:36", "throughput": 19859.21, "total_tokens": 185502080} +{"current_steps": 58950, "total_steps": 78105, "loss": 0.2411, "lr": 8.61574257374711e-07, "epoch": 3.773766084117534, "percentage": 75.48, "elapsed_time": "2:35:41", "remaining_time": "0:50:35", "throughput": 19859.49, "total_tokens": 185518976} +{"current_steps": 58955, "total_steps": 78105, "loss": 0.1189, "lr": 8.611523438459135e-07, "epoch": 3.774086166058511, "percentage": 75.48, "elapsed_time": "2:35:42", "remaining_time": "0:50:34", "throughput": 19859.71, "total_tokens": 185534016} +{"current_steps": 58960, "total_steps": 78105, "loss": 0.1929, "lr": 8.60730512152633e-07, "epoch": 3.774406247999488, "percentage": 75.49, "elapsed_time": "2:35:42", "remaining_time": "0:50:33", "throughput": 19860.01, "total_tokens": 185550720} +{"current_steps": 58965, "total_steps": 78105, "loss": 0.1363, "lr": 8.603087623159303e-07, "epoch": 3.7747263299404645, "percentage": 75.49, "elapsed_time": "2:35:43", "remaining_time": "0:50:32", "throughput": 19860.24, "total_tokens": 185566080} +{"current_steps": 58970, "total_steps": 78105, "loss": 0.1328, "lr": 8.598870943568682e-07, "epoch": 3.7750464118814415, "percentage": 75.5, "elapsed_time": "2:35:44", "remaining_time": "0:50:32", "throughput": 19860.49, "total_tokens": 185581824} +{"current_steps": 58975, "total_steps": 78105, "loss": 0.2467, "lr": 8.594655082965017e-07, "epoch": 3.7753664938224185, "percentage": 75.51, "elapsed_time": "2:35:44", "remaining_time": "0:50:31", "throughput": 19860.72, "total_tokens": 185597056} +{"current_steps": 58980, "total_steps": 78105, "loss": 0.1799, "lr": 8.590440041558823e-07, "epoch": 3.7756865757633955, "percentage": 75.51, "elapsed_time": "2:35:45", "remaining_time": "0:50:30", "throughput": 19860.91, "total_tokens": 185611584} +{"current_steps": 58985, "total_steps": 78105, "loss": 0.2062, "lr": 8.586225819560578e-07, "epoch": 3.7760066577043725, "percentage": 75.52, "elapsed_time": "2:35:46", "remaining_time": "0:50:29", "throughput": 19861.16, "total_tokens": 185627712} +{"current_steps": 58990, "total_steps": 78105, "loss": 0.165, "lr": 8.582012417180713e-07, "epoch": 3.7763267396453495, "percentage": 75.53, "elapsed_time": "2:35:46", "remaining_time": "0:50:28", "throughput": 19861.42, "total_tokens": 185643520} +{"current_steps": 58995, "total_steps": 78105, "loss": 0.1311, "lr": 8.577799834629647e-07, "epoch": 3.776646821586326, "percentage": 75.53, "elapsed_time": "2:35:47", "remaining_time": "0:50:27", "throughput": 19861.65, "total_tokens": 185659008} +{"current_steps": 59000, "total_steps": 78105, "loss": 0.1303, "lr": 8.573588072117697e-07, "epoch": 3.776966903527303, "percentage": 75.54, "elapsed_time": "2:35:48", "remaining_time": "0:50:27", "throughput": 19861.89, "total_tokens": 185674368} +{"current_steps": 59005, "total_steps": 78105, "loss": 0.1685, "lr": 8.569377129855208e-07, "epoch": 3.77728698546828, "percentage": 75.55, "elapsed_time": "2:35:48", "remaining_time": "0:50:26", "throughput": 19862.16, "total_tokens": 185690816} +{"current_steps": 59010, "total_steps": 78105, "loss": 0.1551, "lr": 8.565167008052438e-07, "epoch": 3.7776070674092566, "percentage": 75.55, "elapsed_time": "2:35:49", "remaining_time": "0:50:25", "throughput": 19862.36, "total_tokens": 185705408} +{"current_steps": 59015, "total_steps": 78105, "loss": 0.1196, "lr": 8.560957706919626e-07, "epoch": 3.7779271493502335, "percentage": 75.56, "elapsed_time": "2:35:50", "remaining_time": "0:50:24", "throughput": 19862.58, "total_tokens": 185720448} +{"current_steps": 59020, "total_steps": 78105, "loss": 0.1554, "lr": 8.556749226666955e-07, "epoch": 3.7782472312912105, "percentage": 75.56, "elapsed_time": "2:35:50", "remaining_time": "0:50:23", "throughput": 19862.89, "total_tokens": 185737344} +{"current_steps": 59025, "total_steps": 78105, "loss": 0.1578, "lr": 8.55254156750458e-07, "epoch": 3.7785673132321875, "percentage": 75.57, "elapsed_time": "2:35:51", "remaining_time": "0:50:22", "throughput": 19863.21, "total_tokens": 185754624} +{"current_steps": 59030, "total_steps": 78105, "loss": 0.1406, "lr": 8.548334729642599e-07, "epoch": 3.7788873951731645, "percentage": 75.58, "elapsed_time": "2:35:52", "remaining_time": "0:50:22", "throughput": 19863.41, "total_tokens": 185769344} +{"current_steps": 59035, "total_steps": 78105, "loss": 0.1151, "lr": 8.544128713291102e-07, "epoch": 3.779207477114141, "percentage": 75.58, "elapsed_time": "2:35:53", "remaining_time": "0:50:21", "throughput": 19863.64, "total_tokens": 185784704} +{"current_steps": 59040, "total_steps": 78105, "loss": 0.2172, "lr": 8.539923518660087e-07, "epoch": 3.779527559055118, "percentage": 75.59, "elapsed_time": "2:35:53", "remaining_time": "0:50:20", "throughput": 19863.88, "total_tokens": 185799872} +{"current_steps": 59045, "total_steps": 78105, "loss": 0.2514, "lr": 8.535719145959561e-07, "epoch": 3.779847640996095, "percentage": 75.6, "elapsed_time": "2:35:54", "remaining_time": "0:50:19", "throughput": 19864.12, "total_tokens": 185815424} +{"current_steps": 59050, "total_steps": 78105, "loss": 0.2081, "lr": 8.531515595399464e-07, "epoch": 3.780167722937072, "percentage": 75.6, "elapsed_time": "2:35:54", "remaining_time": "0:50:18", "throughput": 19864.35, "total_tokens": 185830848} +{"current_steps": 59055, "total_steps": 78105, "loss": 0.1937, "lr": 8.527312867189694e-07, "epoch": 3.7804878048780486, "percentage": 75.61, "elapsed_time": "2:35:55", "remaining_time": "0:50:17", "throughput": 19864.57, "total_tokens": 185845760} +{"current_steps": 59060, "total_steps": 78105, "loss": 0.2577, "lr": 8.523110961540118e-07, "epoch": 3.7808078868190256, "percentage": 75.62, "elapsed_time": "2:35:56", "remaining_time": "0:50:17", "throughput": 19864.79, "total_tokens": 185861056} +{"current_steps": 59065, "total_steps": 78105, "loss": 0.1585, "lr": 8.518909878660547e-07, "epoch": 3.7811279687600026, "percentage": 75.62, "elapsed_time": "2:35:56", "remaining_time": "0:50:16", "throughput": 19865.04, "total_tokens": 185876928} +{"current_steps": 59070, "total_steps": 78105, "loss": 0.1739, "lr": 8.514709618760789e-07, "epoch": 3.7814480507009796, "percentage": 75.63, "elapsed_time": "2:35:57", "remaining_time": "0:50:15", "throughput": 19865.27, "total_tokens": 185892544} +{"current_steps": 59075, "total_steps": 78105, "loss": 0.1742, "lr": 8.510510182050544e-07, "epoch": 3.7817681326419565, "percentage": 75.64, "elapsed_time": "2:35:58", "remaining_time": "0:50:14", "throughput": 19865.55, "total_tokens": 185908608} +{"current_steps": 59080, "total_steps": 78105, "loss": 0.1917, "lr": 8.506311568739547e-07, "epoch": 3.782088214582933, "percentage": 75.64, "elapsed_time": "2:35:59", "remaining_time": "0:50:13", "throughput": 19865.81, "total_tokens": 185924800} +{"current_steps": 59085, "total_steps": 78105, "loss": 0.1868, "lr": 8.502113779037421e-07, "epoch": 3.78240829652391, "percentage": 75.65, "elapsed_time": "2:35:59", "remaining_time": "0:50:12", "throughput": 19866.09, "total_tokens": 185940864} +{"current_steps": 59090, "total_steps": 78105, "loss": 0.1681, "lr": 8.497916813153809e-07, "epoch": 3.782728378464887, "percentage": 75.65, "elapsed_time": "2:36:00", "remaining_time": "0:50:12", "throughput": 19866.35, "total_tokens": 185956928} +{"current_steps": 59095, "total_steps": 78105, "loss": 0.1297, "lr": 8.493720671298272e-07, "epoch": 3.783048460405864, "percentage": 75.66, "elapsed_time": "2:36:01", "remaining_time": "0:50:11", "throughput": 19866.6, "total_tokens": 185972864} +{"current_steps": 59100, "total_steps": 78105, "loss": 0.2017, "lr": 8.489525353680348e-07, "epoch": 3.7833685423468406, "percentage": 75.67, "elapsed_time": "2:36:01", "remaining_time": "0:50:10", "throughput": 19866.83, "total_tokens": 185988416} +{"current_steps": 59105, "total_steps": 78105, "loss": 0.1072, "lr": 8.485330860509527e-07, "epoch": 3.7836886242878176, "percentage": 75.67, "elapsed_time": "2:36:02", "remaining_time": "0:50:09", "throughput": 19867.07, "total_tokens": 186004224} +{"current_steps": 59110, "total_steps": 78105, "loss": 0.1486, "lr": 8.481137191995254e-07, "epoch": 3.7840087062287946, "percentage": 75.68, "elapsed_time": "2:36:03", "remaining_time": "0:50:08", "throughput": 19867.31, "total_tokens": 186019584} +{"current_steps": 59115, "total_steps": 78105, "loss": 0.1568, "lr": 8.476944348346955e-07, "epoch": 3.7843287881697716, "percentage": 75.69, "elapsed_time": "2:36:03", "remaining_time": "0:50:07", "throughput": 19867.52, "total_tokens": 186034752} +{"current_steps": 59120, "total_steps": 78105, "loss": 0.1745, "lr": 8.472752329773989e-07, "epoch": 3.7846488701107486, "percentage": 75.69, "elapsed_time": "2:36:04", "remaining_time": "0:50:07", "throughput": 19867.74, "total_tokens": 186049984} +{"current_steps": 59125, "total_steps": 78105, "loss": 0.1704, "lr": 8.468561136485684e-07, "epoch": 3.784968952051725, "percentage": 75.7, "elapsed_time": "2:36:05", "remaining_time": "0:50:06", "throughput": 19867.97, "total_tokens": 186065472} +{"current_steps": 59130, "total_steps": 78105, "loss": 0.1607, "lr": 8.464370768691327e-07, "epoch": 3.785289033992702, "percentage": 75.71, "elapsed_time": "2:36:05", "remaining_time": "0:50:05", "throughput": 19868.25, "total_tokens": 186081920} +{"current_steps": 59135, "total_steps": 78105, "loss": 0.1746, "lr": 8.460181226600161e-07, "epoch": 3.785609115933679, "percentage": 75.71, "elapsed_time": "2:36:06", "remaining_time": "0:50:04", "throughput": 19868.54, "total_tokens": 186098880} +{"current_steps": 59140, "total_steps": 78105, "loss": 0.1381, "lr": 8.455992510421396e-07, "epoch": 3.7859291978746556, "percentage": 75.72, "elapsed_time": "2:36:07", "remaining_time": "0:50:03", "throughput": 19868.79, "total_tokens": 186114752} +{"current_steps": 59145, "total_steps": 78105, "loss": 0.1937, "lr": 8.451804620364187e-07, "epoch": 3.7862492798156326, "percentage": 75.72, "elapsed_time": "2:36:07", "remaining_time": "0:50:03", "throughput": 19869.02, "total_tokens": 186130304} +{"current_steps": 59150, "total_steps": 78105, "loss": 0.2016, "lr": 8.447617556637652e-07, "epoch": 3.7865693617566096, "percentage": 75.73, "elapsed_time": "2:36:08", "remaining_time": "0:50:02", "throughput": 19869.27, "total_tokens": 186145984} +{"current_steps": 59155, "total_steps": 78105, "loss": 0.1861, "lr": 8.443431319450893e-07, "epoch": 3.7868894436975866, "percentage": 75.74, "elapsed_time": "2:36:09", "remaining_time": "0:50:01", "throughput": 19869.52, "total_tokens": 186162240} +{"current_steps": 59160, "total_steps": 78105, "loss": 0.13, "lr": 8.439245909012919e-07, "epoch": 3.7872095256385636, "percentage": 75.74, "elapsed_time": "2:36:09", "remaining_time": "0:50:00", "throughput": 19869.81, "total_tokens": 186179200} +{"current_steps": 59165, "total_steps": 78105, "loss": 0.1823, "lr": 8.435061325532751e-07, "epoch": 3.7875296075795406, "percentage": 75.75, "elapsed_time": "2:36:10", "remaining_time": "0:49:59", "throughput": 19870.01, "total_tokens": 186193920} +{"current_steps": 59170, "total_steps": 78105, "loss": 0.1573, "lr": 8.430877569219334e-07, "epoch": 3.787849689520517, "percentage": 75.76, "elapsed_time": "2:36:11", "remaining_time": "0:49:58", "throughput": 19870.23, "total_tokens": 186209216} +{"current_steps": 59175, "total_steps": 78105, "loss": 0.1624, "lr": 8.426694640281588e-07, "epoch": 3.788169771461494, "percentage": 75.76, "elapsed_time": "2:36:11", "remaining_time": "0:49:58", "throughput": 19870.48, "total_tokens": 186225216} +{"current_steps": 59180, "total_steps": 78105, "loss": 0.1514, "lr": 8.422512538928384e-07, "epoch": 3.788489853402471, "percentage": 75.77, "elapsed_time": "2:36:12", "remaining_time": "0:49:57", "throughput": 19870.7, "total_tokens": 186240832} +{"current_steps": 59185, "total_steps": 78105, "loss": 0.1732, "lr": 8.418331265368548e-07, "epoch": 3.7888099353434477, "percentage": 75.78, "elapsed_time": "2:36:13", "remaining_time": "0:49:56", "throughput": 19870.92, "total_tokens": 186256128} +{"current_steps": 59190, "total_steps": 78105, "loss": 0.1985, "lr": 8.414150819810893e-07, "epoch": 3.7891300172844247, "percentage": 75.78, "elapsed_time": "2:36:14", "remaining_time": "0:49:55", "throughput": 19871.23, "total_tokens": 186273600} +{"current_steps": 59195, "total_steps": 78105, "loss": 0.1339, "lr": 8.409971202464138e-07, "epoch": 3.7894500992254017, "percentage": 75.79, "elapsed_time": "2:36:14", "remaining_time": "0:49:54", "throughput": 19871.51, "total_tokens": 186289920} +{"current_steps": 59200, "total_steps": 78105, "loss": 0.1111, "lr": 8.405792413537014e-07, "epoch": 3.7897701811663786, "percentage": 75.8, "elapsed_time": "2:36:15", "remaining_time": "0:49:53", "throughput": 19871.82, "total_tokens": 186306816} +{"current_steps": 59205, "total_steps": 78105, "loss": 0.18, "lr": 8.40161445323818e-07, "epoch": 3.7900902631073556, "percentage": 75.8, "elapsed_time": "2:36:16", "remaining_time": "0:49:53", "throughput": 19872.14, "total_tokens": 186324160} +{"current_steps": 59210, "total_steps": 78105, "loss": 0.136, "lr": 8.397437321776264e-07, "epoch": 3.7904103450483326, "percentage": 75.81, "elapsed_time": "2:36:16", "remaining_time": "0:49:52", "throughput": 19872.39, "total_tokens": 186340224} +{"current_steps": 59215, "total_steps": 78105, "loss": 0.1257, "lr": 8.393261019359849e-07, "epoch": 3.790730426989309, "percentage": 75.81, "elapsed_time": "2:36:17", "remaining_time": "0:49:51", "throughput": 19872.68, "total_tokens": 186356928} +{"current_steps": 59220, "total_steps": 78105, "loss": 0.2072, "lr": 8.389085546197467e-07, "epoch": 3.791050508930286, "percentage": 75.82, "elapsed_time": "2:36:18", "remaining_time": "0:49:50", "throughput": 19872.9, "total_tokens": 186372288} +{"current_steps": 59225, "total_steps": 78105, "loss": 0.164, "lr": 8.384910902497648e-07, "epoch": 3.791370590871263, "percentage": 75.83, "elapsed_time": "2:36:18", "remaining_time": "0:49:49", "throughput": 19873.15, "total_tokens": 186388160} +{"current_steps": 59230, "total_steps": 78105, "loss": 0.194, "lr": 8.380737088468815e-07, "epoch": 3.7916906728122397, "percentage": 75.83, "elapsed_time": "2:36:19", "remaining_time": "0:49:49", "throughput": 19873.44, "total_tokens": 186404800} +{"current_steps": 59235, "total_steps": 78105, "loss": 0.1868, "lr": 8.376564104319415e-07, "epoch": 3.7920107547532167, "percentage": 75.84, "elapsed_time": "2:36:20", "remaining_time": "0:49:48", "throughput": 19873.65, "total_tokens": 186419584} +{"current_steps": 59240, "total_steps": 78105, "loss": 0.186, "lr": 8.372391950257816e-07, "epoch": 3.7923308366941937, "percentage": 75.85, "elapsed_time": "2:36:20", "remaining_time": "0:49:47", "throughput": 19873.88, "total_tokens": 186434816} +{"current_steps": 59245, "total_steps": 78105, "loss": 0.176, "lr": 8.368220626492352e-07, "epoch": 3.7926509186351707, "percentage": 75.85, "elapsed_time": "2:36:21", "remaining_time": "0:49:46", "throughput": 19874.14, "total_tokens": 186451136} +{"current_steps": 59250, "total_steps": 78105, "loss": 0.1469, "lr": 8.364050133231319e-07, "epoch": 3.7929710005761477, "percentage": 75.86, "elapsed_time": "2:36:22", "remaining_time": "0:49:45", "throughput": 19874.37, "total_tokens": 186466624} +{"current_steps": 59255, "total_steps": 78105, "loss": 0.2088, "lr": 8.359880470682969e-07, "epoch": 3.7932910825171247, "percentage": 75.87, "elapsed_time": "2:36:22", "remaining_time": "0:49:44", "throughput": 19874.6, "total_tokens": 186482048} +{"current_steps": 59260, "total_steps": 78105, "loss": 0.1554, "lr": 8.355711639055502e-07, "epoch": 3.793611164458101, "percentage": 75.87, "elapsed_time": "2:36:23", "remaining_time": "0:49:44", "throughput": 19874.84, "total_tokens": 186497856} +{"current_steps": 59265, "total_steps": 78105, "loss": 0.1703, "lr": 8.351543638557119e-07, "epoch": 3.793931246399078, "percentage": 75.88, "elapsed_time": "2:36:24", "remaining_time": "0:49:43", "throughput": 19875.11, "total_tokens": 186514176} +{"current_steps": 59270, "total_steps": 78105, "loss": 0.2763, "lr": 8.34737646939591e-07, "epoch": 3.794251328340055, "percentage": 75.89, "elapsed_time": "2:36:24", "remaining_time": "0:49:42", "throughput": 19875.32, "total_tokens": 186529344} +{"current_steps": 59275, "total_steps": 78105, "loss": 0.2097, "lr": 8.343210131779997e-07, "epoch": 3.7945714102810317, "percentage": 75.89, "elapsed_time": "2:36:25", "remaining_time": "0:49:41", "throughput": 19875.61, "total_tokens": 186546304} +{"current_steps": 59280, "total_steps": 78105, "loss": 0.1346, "lr": 8.339044625917389e-07, "epoch": 3.7948914922220087, "percentage": 75.9, "elapsed_time": "2:36:26", "remaining_time": "0:49:40", "throughput": 19875.84, "total_tokens": 186561728} +{"current_steps": 59285, "total_steps": 78105, "loss": 0.0934, "lr": 8.33487995201612e-07, "epoch": 3.7952115741629857, "percentage": 75.9, "elapsed_time": "2:36:27", "remaining_time": "0:49:39", "throughput": 19876.07, "total_tokens": 186577280} +{"current_steps": 59290, "total_steps": 78105, "loss": 0.2336, "lr": 8.330716110284137e-07, "epoch": 3.7955316561039627, "percentage": 75.91, "elapsed_time": "2:36:27", "remaining_time": "0:49:39", "throughput": 19876.36, "total_tokens": 186594048} +{"current_steps": 59295, "total_steps": 78105, "loss": 0.182, "lr": 8.32655310092936e-07, "epoch": 3.7958517380449397, "percentage": 75.92, "elapsed_time": "2:36:28", "remaining_time": "0:49:38", "throughput": 19876.62, "total_tokens": 186610432} +{"current_steps": 59300, "total_steps": 78105, "loss": 0.1275, "lr": 8.322390924159688e-07, "epoch": 3.7961718199859162, "percentage": 75.92, "elapsed_time": "2:36:29", "remaining_time": "0:49:37", "throughput": 19876.81, "total_tokens": 186625024} +{"current_steps": 59305, "total_steps": 78105, "loss": 0.2067, "lr": 8.318229580182924e-07, "epoch": 3.7964919019268932, "percentage": 75.93, "elapsed_time": "2:36:29", "remaining_time": "0:49:36", "throughput": 19877.05, "total_tokens": 186640256} +{"current_steps": 59310, "total_steps": 78105, "loss": 0.1439, "lr": 8.314069069206901e-07, "epoch": 3.79681198386787, "percentage": 75.94, "elapsed_time": "2:36:30", "remaining_time": "0:49:35", "throughput": 19877.33, "total_tokens": 186657152} +{"current_steps": 59315, "total_steps": 78105, "loss": 0.2074, "lr": 8.309909391439339e-07, "epoch": 3.797132065808847, "percentage": 75.94, "elapsed_time": "2:36:31", "remaining_time": "0:49:34", "throughput": 19877.57, "total_tokens": 186672640} +{"current_steps": 59320, "total_steps": 78105, "loss": 0.1296, "lr": 8.305750547087974e-07, "epoch": 3.7974521477498238, "percentage": 75.95, "elapsed_time": "2:36:31", "remaining_time": "0:49:34", "throughput": 19877.87, "total_tokens": 186689664} +{"current_steps": 59325, "total_steps": 78105, "loss": 0.174, "lr": 8.301592536360467e-07, "epoch": 3.7977722296908007, "percentage": 75.96, "elapsed_time": "2:36:32", "remaining_time": "0:49:33", "throughput": 19878.05, "total_tokens": 186704320} +{"current_steps": 59330, "total_steps": 78105, "loss": 0.2151, "lr": 8.297435359464453e-07, "epoch": 3.7980923116317777, "percentage": 75.96, "elapsed_time": "2:36:33", "remaining_time": "0:49:32", "throughput": 19878.25, "total_tokens": 186719040} +{"current_steps": 59335, "total_steps": 78105, "loss": 0.1068, "lr": 8.293279016607514e-07, "epoch": 3.7984123935727547, "percentage": 75.97, "elapsed_time": "2:36:33", "remaining_time": "0:49:31", "throughput": 19878.53, "total_tokens": 186735680} +{"current_steps": 59340, "total_steps": 78105, "loss": 0.1465, "lr": 8.289123507997193e-07, "epoch": 3.7987324755137317, "percentage": 75.97, "elapsed_time": "2:36:34", "remaining_time": "0:49:30", "throughput": 19878.74, "total_tokens": 186750720} +{"current_steps": 59345, "total_steps": 78105, "loss": 0.1444, "lr": 8.284968833841009e-07, "epoch": 3.7990525574547083, "percentage": 75.98, "elapsed_time": "2:36:35", "remaining_time": "0:49:29", "throughput": 19879.0, "total_tokens": 186766720} +{"current_steps": 59350, "total_steps": 78105, "loss": 0.1979, "lr": 8.280814994346415e-07, "epoch": 3.7993726393956853, "percentage": 75.99, "elapsed_time": "2:36:35", "remaining_time": "0:49:29", "throughput": 19879.32, "total_tokens": 186783808} +{"current_steps": 59355, "total_steps": 78105, "loss": 0.1956, "lr": 8.276661989720833e-07, "epoch": 3.7996927213366622, "percentage": 75.99, "elapsed_time": "2:36:36", "remaining_time": "0:49:28", "throughput": 19879.56, "total_tokens": 186799488} +{"current_steps": 59360, "total_steps": 78105, "loss": 0.1846, "lr": 8.27250982017164e-07, "epoch": 3.8000128032776392, "percentage": 76.0, "elapsed_time": "2:36:37", "remaining_time": "0:49:27", "throughput": 19879.82, "total_tokens": 186815488} +{"current_steps": 59365, "total_steps": 78105, "loss": 0.1437, "lr": 8.268358485906181e-07, "epoch": 3.800332885218616, "percentage": 76.01, "elapsed_time": "2:36:37", "remaining_time": "0:49:26", "throughput": 19879.98, "total_tokens": 186829632} +{"current_steps": 59370, "total_steps": 78105, "loss": 0.109, "lr": 8.264207987131745e-07, "epoch": 3.8006529671595928, "percentage": 76.01, "elapsed_time": "2:36:38", "remaining_time": "0:49:25", "throughput": 19880.19, "total_tokens": 186844736} +{"current_steps": 59375, "total_steps": 78105, "loss": 0.1567, "lr": 8.260058324055586e-07, "epoch": 3.8009730491005698, "percentage": 76.02, "elapsed_time": "2:36:39", "remaining_time": "0:49:25", "throughput": 19880.43, "total_tokens": 186860416} +{"current_steps": 59380, "total_steps": 78105, "loss": 0.2166, "lr": 8.255909496884914e-07, "epoch": 3.8012931310415468, "percentage": 76.03, "elapsed_time": "2:36:39", "remaining_time": "0:49:24", "throughput": 19880.66, "total_tokens": 186875776} +{"current_steps": 59385, "total_steps": 78105, "loss": 0.2555, "lr": 8.251761505826922e-07, "epoch": 3.8016132129825237, "percentage": 76.03, "elapsed_time": "2:36:40", "remaining_time": "0:49:23", "throughput": 19880.9, "total_tokens": 186891648} +{"current_steps": 59390, "total_steps": 78105, "loss": 0.1712, "lr": 8.247614351088703e-07, "epoch": 3.8019332949235003, "percentage": 76.04, "elapsed_time": "2:36:41", "remaining_time": "0:49:22", "throughput": 19881.13, "total_tokens": 186907072} +{"current_steps": 59395, "total_steps": 78105, "loss": 0.0999, "lr": 8.243468032877375e-07, "epoch": 3.8022533768644773, "percentage": 76.05, "elapsed_time": "2:36:41", "remaining_time": "0:49:21", "throughput": 19881.38, "total_tokens": 186923264} +{"current_steps": 59400, "total_steps": 78105, "loss": 0.1436, "lr": 8.239322551399967e-07, "epoch": 3.8025734588054543, "percentage": 76.05, "elapsed_time": "2:36:42", "remaining_time": "0:49:20", "throughput": 19881.61, "total_tokens": 186938688} +{"current_steps": 59405, "total_steps": 78105, "loss": 0.1095, "lr": 8.235177906863492e-07, "epoch": 3.802893540746431, "percentage": 76.06, "elapsed_time": "2:36:43", "remaining_time": "0:49:20", "throughput": 19881.84, "total_tokens": 186954304} +{"current_steps": 59410, "total_steps": 78105, "loss": 0.2212, "lr": 8.231034099474905e-07, "epoch": 3.803213622687408, "percentage": 76.06, "elapsed_time": "2:36:43", "remaining_time": "0:49:19", "throughput": 19882.05, "total_tokens": 186969024} +{"current_steps": 59415, "total_steps": 78105, "loss": 0.1865, "lr": 8.226891129441122e-07, "epoch": 3.803533704628385, "percentage": 76.07, "elapsed_time": "2:36:44", "remaining_time": "0:49:18", "throughput": 19882.27, "total_tokens": 186984512} +{"current_steps": 59420, "total_steps": 78105, "loss": 0.167, "lr": 8.222748996969044e-07, "epoch": 3.803853786569362, "percentage": 76.08, "elapsed_time": "2:36:45", "remaining_time": "0:49:17", "throughput": 19882.51, "total_tokens": 187000320} +{"current_steps": 59425, "total_steps": 78105, "loss": 0.2452, "lr": 8.218607702265474e-07, "epoch": 3.804173868510339, "percentage": 76.08, "elapsed_time": "2:36:45", "remaining_time": "0:49:16", "throughput": 19882.75, "total_tokens": 187015936} +{"current_steps": 59430, "total_steps": 78105, "loss": 0.1496, "lr": 8.214467245537233e-07, "epoch": 3.8044939504513158, "percentage": 76.09, "elapsed_time": "2:36:46", "remaining_time": "0:49:15", "throughput": 19882.97, "total_tokens": 187031232} +{"current_steps": 59435, "total_steps": 78105, "loss": 0.1514, "lr": 8.210327626991066e-07, "epoch": 3.8048140323922923, "percentage": 76.1, "elapsed_time": "2:36:47", "remaining_time": "0:49:15", "throughput": 19883.17, "total_tokens": 187045824} +{"current_steps": 59440, "total_steps": 78105, "loss": 0.1598, "lr": 8.206188846833685e-07, "epoch": 3.8051341143332693, "percentage": 76.1, "elapsed_time": "2:36:47", "remaining_time": "0:49:14", "throughput": 19883.43, "total_tokens": 187062208} +{"current_steps": 59445, "total_steps": 78105, "loss": 0.1775, "lr": 8.202050905271752e-07, "epoch": 3.8054541962742463, "percentage": 76.11, "elapsed_time": "2:36:48", "remaining_time": "0:49:13", "throughput": 19883.67, "total_tokens": 187077888} +{"current_steps": 59450, "total_steps": 78105, "loss": 0.1985, "lr": 8.197913802511903e-07, "epoch": 3.805774278215223, "percentage": 76.12, "elapsed_time": "2:36:49", "remaining_time": "0:49:12", "throughput": 19883.88, "total_tokens": 187093120} +{"current_steps": 59455, "total_steps": 78105, "loss": 0.1402, "lr": 8.193777538760719e-07, "epoch": 3.8060943601562, "percentage": 76.12, "elapsed_time": "2:36:49", "remaining_time": "0:49:11", "throughput": 19884.14, "total_tokens": 187109056} +{"current_steps": 59460, "total_steps": 78105, "loss": 0.1896, "lr": 8.189642114224739e-07, "epoch": 3.806414442097177, "percentage": 76.13, "elapsed_time": "2:36:50", "remaining_time": "0:49:10", "throughput": 19884.36, "total_tokens": 187124480} +{"current_steps": 59465, "total_steps": 78105, "loss": 0.1081, "lr": 8.185507529110473e-07, "epoch": 3.806734524038154, "percentage": 76.13, "elapsed_time": "2:36:51", "remaining_time": "0:49:10", "throughput": 19884.62, "total_tokens": 187140864} +{"current_steps": 59470, "total_steps": 78105, "loss": 0.1726, "lr": 8.18137378362438e-07, "epoch": 3.807054605979131, "percentage": 76.14, "elapsed_time": "2:36:52", "remaining_time": "0:49:09", "throughput": 19884.86, "total_tokens": 187156736} +{"current_steps": 59475, "total_steps": 78105, "loss": 0.1552, "lr": 8.177240877972875e-07, "epoch": 3.807374687920108, "percentage": 76.15, "elapsed_time": "2:36:52", "remaining_time": "0:49:08", "throughput": 19885.08, "total_tokens": 187171968} +{"current_steps": 59480, "total_steps": 78105, "loss": 0.1448, "lr": 8.173108812362334e-07, "epoch": 3.8076947698610843, "percentage": 76.15, "elapsed_time": "2:36:53", "remaining_time": "0:49:07", "throughput": 19885.32, "total_tokens": 187187648} +{"current_steps": 59485, "total_steps": 78105, "loss": 0.1607, "lr": 8.168977586999091e-07, "epoch": 3.8080148518020613, "percentage": 76.16, "elapsed_time": "2:36:54", "remaining_time": "0:49:06", "throughput": 19885.6, "total_tokens": 187204416} +{"current_steps": 59490, "total_steps": 78105, "loss": 0.1403, "lr": 8.164847202089429e-07, "epoch": 3.8083349337430383, "percentage": 76.17, "elapsed_time": "2:36:54", "remaining_time": "0:49:05", "throughput": 19885.81, "total_tokens": 187219328} +{"current_steps": 59495, "total_steps": 78105, "loss": 0.1597, "lr": 8.160717657839625e-07, "epoch": 3.808655015684015, "percentage": 76.17, "elapsed_time": "2:36:55", "remaining_time": "0:49:05", "throughput": 19886.03, "total_tokens": 187234560} +{"current_steps": 59500, "total_steps": 78105, "loss": 0.1466, "lr": 8.156588954455849e-07, "epoch": 3.808975097624992, "percentage": 76.18, "elapsed_time": "2:36:56", "remaining_time": "0:49:04", "throughput": 19886.28, "total_tokens": 187250688} +{"current_steps": 59505, "total_steps": 78105, "loss": 0.1375, "lr": 8.152461092144306e-07, "epoch": 3.809295179565969, "percentage": 76.19, "elapsed_time": "2:36:56", "remaining_time": "0:49:03", "throughput": 19886.54, "total_tokens": 187266432} +{"current_steps": 59510, "total_steps": 78105, "loss": 0.1379, "lr": 8.148334071111084e-07, "epoch": 3.809615261506946, "percentage": 76.19, "elapsed_time": "2:36:57", "remaining_time": "0:49:02", "throughput": 19886.83, "total_tokens": 187283200} +{"current_steps": 59515, "total_steps": 78105, "loss": 0.2232, "lr": 8.144207891562287e-07, "epoch": 3.809935343447923, "percentage": 76.2, "elapsed_time": "2:36:58", "remaining_time": "0:49:01", "throughput": 19887.08, "total_tokens": 187299136} +{"current_steps": 59520, "total_steps": 78105, "loss": 0.146, "lr": 8.140082553703948e-07, "epoch": 3.8102554253889, "percentage": 76.21, "elapsed_time": "2:36:58", "remaining_time": "0:49:00", "throughput": 19887.29, "total_tokens": 187314240} +{"current_steps": 59525, "total_steps": 78105, "loss": 0.1601, "lr": 8.135958057742061e-07, "epoch": 3.8105755073298764, "percentage": 76.21, "elapsed_time": "2:36:59", "remaining_time": "0:49:00", "throughput": 19887.51, "total_tokens": 187329984} +{"current_steps": 59530, "total_steps": 78105, "loss": 0.1516, "lr": 8.131834403882605e-07, "epoch": 3.8108955892708534, "percentage": 76.22, "elapsed_time": "2:37:00", "remaining_time": "0:48:59", "throughput": 19887.73, "total_tokens": 187344960} +{"current_steps": 59535, "total_steps": 78105, "loss": 0.194, "lr": 8.127711592331456e-07, "epoch": 3.8112156712118304, "percentage": 76.22, "elapsed_time": "2:37:00", "remaining_time": "0:48:58", "throughput": 19887.97, "total_tokens": 187360320} +{"current_steps": 59540, "total_steps": 78105, "loss": 0.1793, "lr": 8.123589623294522e-07, "epoch": 3.811535753152807, "percentage": 76.23, "elapsed_time": "2:37:01", "remaining_time": "0:48:57", "throughput": 19888.25, "total_tokens": 187377088} +{"current_steps": 59545, "total_steps": 78105, "loss": 0.2101, "lr": 8.1194684969776e-07, "epoch": 3.811855835093784, "percentage": 76.24, "elapsed_time": "2:37:02", "remaining_time": "0:48:56", "throughput": 19888.53, "total_tokens": 187394368} +{"current_steps": 59550, "total_steps": 78105, "loss": 0.1827, "lr": 8.115348213586505e-07, "epoch": 3.812175917034761, "percentage": 76.24, "elapsed_time": "2:37:02", "remaining_time": "0:48:56", "throughput": 19888.84, "total_tokens": 187412224} +{"current_steps": 59555, "total_steps": 78105, "loss": 0.1453, "lr": 8.111228773326968e-07, "epoch": 3.812495998975738, "percentage": 76.25, "elapsed_time": "2:37:03", "remaining_time": "0:48:55", "throughput": 19889.1, "total_tokens": 187428608} +{"current_steps": 59560, "total_steps": 78105, "loss": 0.1248, "lr": 8.107110176404697e-07, "epoch": 3.812816080916715, "percentage": 76.26, "elapsed_time": "2:37:04", "remaining_time": "0:48:54", "throughput": 19889.38, "total_tokens": 187445184} +{"current_steps": 59565, "total_steps": 78105, "loss": 0.1271, "lr": 8.102992423025352e-07, "epoch": 3.8131361628576914, "percentage": 76.26, "elapsed_time": "2:37:05", "remaining_time": "0:48:53", "throughput": 19889.59, "total_tokens": 187460416} +{"current_steps": 59570, "total_steps": 78105, "loss": 0.1845, "lr": 8.098875513394544e-07, "epoch": 3.8134562447986684, "percentage": 76.27, "elapsed_time": "2:37:05", "remaining_time": "0:48:52", "throughput": 19889.79, "total_tokens": 187475328} +{"current_steps": 59575, "total_steps": 78105, "loss": 0.1381, "lr": 8.094759447717865e-07, "epoch": 3.8137763267396454, "percentage": 76.28, "elapsed_time": "2:37:06", "remaining_time": "0:48:51", "throughput": 19890.04, "total_tokens": 187491392} +{"current_steps": 59580, "total_steps": 78105, "loss": 0.2322, "lr": 8.090644226200845e-07, "epoch": 3.8140964086806224, "percentage": 76.28, "elapsed_time": "2:37:07", "remaining_time": "0:48:51", "throughput": 19890.26, "total_tokens": 187506880} +{"current_steps": 59585, "total_steps": 78105, "loss": 0.1146, "lr": 8.086529849048974e-07, "epoch": 3.814416490621599, "percentage": 76.29, "elapsed_time": "2:37:07", "remaining_time": "0:48:50", "throughput": 19890.49, "total_tokens": 187522368} +{"current_steps": 59590, "total_steps": 78105, "loss": 0.141, "lr": 8.082416316467703e-07, "epoch": 3.814736572562576, "percentage": 76.29, "elapsed_time": "2:37:08", "remaining_time": "0:48:49", "throughput": 19890.75, "total_tokens": 187538560} +{"current_steps": 59595, "total_steps": 78105, "loss": 0.1886, "lr": 8.07830362866244e-07, "epoch": 3.815056654503553, "percentage": 76.3, "elapsed_time": "2:37:09", "remaining_time": "0:48:48", "throughput": 19891.09, "total_tokens": 187556608} +{"current_steps": 59600, "total_steps": 78105, "loss": 0.1766, "lr": 8.074191785838554e-07, "epoch": 3.81537673644453, "percentage": 76.31, "elapsed_time": "2:37:09", "remaining_time": "0:48:47", "throughput": 19891.38, "total_tokens": 187573568} +{"current_steps": 59605, "total_steps": 78105, "loss": 0.1894, "lr": 8.070080788201365e-07, "epoch": 3.815696818385507, "percentage": 76.31, "elapsed_time": "2:37:10", "remaining_time": "0:48:47", "throughput": 19891.69, "total_tokens": 187590720} +{"current_steps": 59610, "total_steps": 78105, "loss": 0.2042, "lr": 8.065970635956147e-07, "epoch": 3.8160169003264834, "percentage": 76.32, "elapsed_time": "2:37:11", "remaining_time": "0:48:46", "throughput": 19891.91, "total_tokens": 187606144} +{"current_steps": 59615, "total_steps": 78105, "loss": 0.2101, "lr": 8.061861329308168e-07, "epoch": 3.8163369822674604, "percentage": 76.33, "elapsed_time": "2:37:11", "remaining_time": "0:48:45", "throughput": 19892.19, "total_tokens": 187622592} +{"current_steps": 59620, "total_steps": 78105, "loss": 0.1257, "lr": 8.05775286846259e-07, "epoch": 3.8166570642084374, "percentage": 76.33, "elapsed_time": "2:37:12", "remaining_time": "0:48:44", "throughput": 19892.42, "total_tokens": 187638080} +{"current_steps": 59625, "total_steps": 78105, "loss": 0.117, "lr": 8.053645253624593e-07, "epoch": 3.8169771461494144, "percentage": 76.34, "elapsed_time": "2:37:13", "remaining_time": "0:48:43", "throughput": 19892.65, "total_tokens": 187654144} +{"current_steps": 59630, "total_steps": 78105, "loss": 0.176, "lr": 8.049538484999281e-07, "epoch": 3.817297228090391, "percentage": 76.35, "elapsed_time": "2:37:14", "remaining_time": "0:48:42", "throughput": 19892.94, "total_tokens": 187670720} +{"current_steps": 59635, "total_steps": 78105, "loss": 0.1196, "lr": 8.045432562791727e-07, "epoch": 3.817617310031368, "percentage": 76.35, "elapsed_time": "2:37:14", "remaining_time": "0:48:42", "throughput": 19893.17, "total_tokens": 187686016} +{"current_steps": 59640, "total_steps": 78105, "loss": 0.1555, "lr": 8.041327487206957e-07, "epoch": 3.817937391972345, "percentage": 76.36, "elapsed_time": "2:37:15", "remaining_time": "0:48:41", "throughput": 19893.29, "total_tokens": 187701312} +{"current_steps": 59645, "total_steps": 78105, "loss": 0.154, "lr": 8.037223258449947e-07, "epoch": 3.818257473913322, "percentage": 76.37, "elapsed_time": "2:37:16", "remaining_time": "0:48:40", "throughput": 19893.55, "total_tokens": 187717440} +{"current_steps": 59650, "total_steps": 78105, "loss": 0.1606, "lr": 8.03311987672567e-07, "epoch": 3.818577555854299, "percentage": 76.37, "elapsed_time": "2:37:16", "remaining_time": "0:48:39", "throughput": 19893.73, "total_tokens": 187732992} +{"current_steps": 59655, "total_steps": 78105, "loss": 0.1504, "lr": 8.029017342238995e-07, "epoch": 3.8188976377952755, "percentage": 76.38, "elapsed_time": "2:37:17", "remaining_time": "0:48:38", "throughput": 19893.98, "total_tokens": 187748736} +{"current_steps": 59660, "total_steps": 78105, "loss": 0.1484, "lr": 8.024915655194801e-07, "epoch": 3.8192177197362525, "percentage": 76.38, "elapsed_time": "2:37:18", "remaining_time": "0:48:37", "throughput": 19894.21, "total_tokens": 187764160} +{"current_steps": 59665, "total_steps": 78105, "loss": 0.1643, "lr": 8.0208148157979e-07, "epoch": 3.8195378016772294, "percentage": 76.39, "elapsed_time": "2:37:18", "remaining_time": "0:48:37", "throughput": 19894.48, "total_tokens": 187780800} +{"current_steps": 59670, "total_steps": 78105, "loss": 0.1844, "lr": 8.016714824253063e-07, "epoch": 3.819857883618206, "percentage": 76.4, "elapsed_time": "2:37:19", "remaining_time": "0:48:36", "throughput": 19894.7, "total_tokens": 187796160} +{"current_steps": 59675, "total_steps": 78105, "loss": 0.1807, "lr": 8.012615680765029e-07, "epoch": 3.820177965559183, "percentage": 76.4, "elapsed_time": "2:37:20", "remaining_time": "0:48:35", "throughput": 19894.92, "total_tokens": 187811840} +{"current_steps": 59680, "total_steps": 78105, "loss": 0.1676, "lr": 8.00851738553848e-07, "epoch": 3.82049804750016, "percentage": 76.41, "elapsed_time": "2:37:20", "remaining_time": "0:48:34", "throughput": 19895.14, "total_tokens": 187827584} +{"current_steps": 59685, "total_steps": 78105, "loss": 0.1598, "lr": 8.004419938778071e-07, "epoch": 3.820818129441137, "percentage": 76.42, "elapsed_time": "2:37:21", "remaining_time": "0:48:33", "throughput": 19895.35, "total_tokens": 187842816} +{"current_steps": 59690, "total_steps": 78105, "loss": 0.1937, "lr": 8.000323340688396e-07, "epoch": 3.821138211382114, "percentage": 76.42, "elapsed_time": "2:37:22", "remaining_time": "0:48:33", "throughput": 19895.55, "total_tokens": 187857536} +{"current_steps": 59695, "total_steps": 78105, "loss": 0.156, "lr": 7.996227591474029e-07, "epoch": 3.821458293323091, "percentage": 76.43, "elapsed_time": "2:37:22", "remaining_time": "0:48:32", "throughput": 19895.77, "total_tokens": 187872832} +{"current_steps": 59700, "total_steps": 78105, "loss": 0.1605, "lr": 7.992132691339491e-07, "epoch": 3.8217783752640675, "percentage": 76.44, "elapsed_time": "2:37:23", "remaining_time": "0:48:31", "throughput": 19895.96, "total_tokens": 187887168} +{"current_steps": 59705, "total_steps": 78105, "loss": 0.1874, "lr": 7.988038640489252e-07, "epoch": 3.8220984572050445, "percentage": 76.44, "elapsed_time": "2:37:24", "remaining_time": "0:48:30", "throughput": 19896.17, "total_tokens": 187902464} +{"current_steps": 59710, "total_steps": 78105, "loss": 0.172, "lr": 7.983945439127753e-07, "epoch": 3.8224185391460215, "percentage": 76.45, "elapsed_time": "2:37:24", "remaining_time": "0:48:29", "throughput": 19896.38, "total_tokens": 187917632} +{"current_steps": 59715, "total_steps": 78105, "loss": 0.2199, "lr": 7.979853087459386e-07, "epoch": 3.822738621086998, "percentage": 76.45, "elapsed_time": "2:37:25", "remaining_time": "0:48:28", "throughput": 19896.62, "total_tokens": 187933440} +{"current_steps": 59720, "total_steps": 78105, "loss": 0.1556, "lr": 7.97576158568849e-07, "epoch": 3.823058703027975, "percentage": 76.46, "elapsed_time": "2:37:26", "remaining_time": "0:48:28", "throughput": 19896.84, "total_tokens": 187949376} +{"current_steps": 59725, "total_steps": 78105, "loss": 0.2253, "lr": 7.971670934019405e-07, "epoch": 3.823378784968952, "percentage": 76.47, "elapsed_time": "2:37:26", "remaining_time": "0:48:27", "throughput": 19897.05, "total_tokens": 187964480} +{"current_steps": 59730, "total_steps": 78105, "loss": 0.2192, "lr": 7.967581132656357e-07, "epoch": 3.823698866909929, "percentage": 76.47, "elapsed_time": "2:37:27", "remaining_time": "0:48:26", "throughput": 19897.3, "total_tokens": 187980608} +{"current_steps": 59735, "total_steps": 78105, "loss": 0.193, "lr": 7.963492181803606e-07, "epoch": 3.824018948850906, "percentage": 76.48, "elapsed_time": "2:37:28", "remaining_time": "0:48:25", "throughput": 19897.55, "total_tokens": 187996416} +{"current_steps": 59740, "total_steps": 78105, "loss": 0.2356, "lr": 7.959404081665298e-07, "epoch": 3.824339030791883, "percentage": 76.49, "elapsed_time": "2:37:28", "remaining_time": "0:48:24", "throughput": 19897.79, "total_tokens": 188012032} +{"current_steps": 59745, "total_steps": 78105, "loss": 0.1795, "lr": 7.955316832445598e-07, "epoch": 3.8246591127328595, "percentage": 76.49, "elapsed_time": "2:37:29", "remaining_time": "0:48:23", "throughput": 19898.03, "total_tokens": 188028160} +{"current_steps": 59750, "total_steps": 78105, "loss": 0.1108, "lr": 7.951230434348595e-07, "epoch": 3.8249791946738365, "percentage": 76.5, "elapsed_time": "2:37:30", "remaining_time": "0:48:23", "throughput": 19898.25, "total_tokens": 188043264} +{"current_steps": 59755, "total_steps": 78105, "loss": 0.2085, "lr": 7.947144887578329e-07, "epoch": 3.8252992766148135, "percentage": 76.51, "elapsed_time": "2:37:30", "remaining_time": "0:48:22", "throughput": 19898.43, "total_tokens": 188058112} +{"current_steps": 59760, "total_steps": 78105, "loss": 0.1225, "lr": 7.943060192338839e-07, "epoch": 3.82561935855579, "percentage": 76.51, "elapsed_time": "2:37:31", "remaining_time": "0:48:21", "throughput": 19898.65, "total_tokens": 188073344} +{"current_steps": 59765, "total_steps": 78105, "loss": 0.1736, "lr": 7.938976348834059e-07, "epoch": 3.825939440496767, "percentage": 76.52, "elapsed_time": "2:37:32", "remaining_time": "0:48:20", "throughput": 19898.9, "total_tokens": 188088960} +{"current_steps": 59770, "total_steps": 78105, "loss": 0.1727, "lr": 7.934893357267953e-07, "epoch": 3.826259522437744, "percentage": 76.53, "elapsed_time": "2:37:32", "remaining_time": "0:48:19", "throughput": 19899.15, "total_tokens": 188105152} +{"current_steps": 59775, "total_steps": 78105, "loss": 0.2031, "lr": 7.930811217844365e-07, "epoch": 3.826579604378721, "percentage": 76.53, "elapsed_time": "2:37:33", "remaining_time": "0:48:18", "throughput": 19899.4, "total_tokens": 188121152} +{"current_steps": 59780, "total_steps": 78105, "loss": 0.1778, "lr": 7.926729930767163e-07, "epoch": 3.826899686319698, "percentage": 76.54, "elapsed_time": "2:37:34", "remaining_time": "0:48:18", "throughput": 19899.61, "total_tokens": 188136128} +{"current_steps": 59785, "total_steps": 78105, "loss": 0.1873, "lr": 7.922649496240137e-07, "epoch": 3.827219768260675, "percentage": 76.54, "elapsed_time": "2:37:34", "remaining_time": "0:48:17", "throughput": 19899.87, "total_tokens": 188152320} +{"current_steps": 59790, "total_steps": 78105, "loss": 0.1792, "lr": 7.918569914467041e-07, "epoch": 3.8275398502016516, "percentage": 76.55, "elapsed_time": "2:37:35", "remaining_time": "0:48:16", "throughput": 19900.19, "total_tokens": 188169728} +{"current_steps": 59795, "total_steps": 78105, "loss": 0.1481, "lr": 7.914491185651594e-07, "epoch": 3.8278599321426285, "percentage": 76.56, "elapsed_time": "2:37:36", "remaining_time": "0:48:15", "throughput": 19900.44, "total_tokens": 188185856} +{"current_steps": 59800, "total_steps": 78105, "loss": 0.2949, "lr": 7.910413309997456e-07, "epoch": 3.8281800140836055, "percentage": 76.56, "elapsed_time": "2:37:37", "remaining_time": "0:48:14", "throughput": 19900.7, "total_tokens": 188202176} +{"current_steps": 59805, "total_steps": 78105, "loss": 0.1485, "lr": 7.906336287708258e-07, "epoch": 3.828500096024582, "percentage": 76.57, "elapsed_time": "2:37:37", "remaining_time": "0:48:14", "throughput": 19900.9, "total_tokens": 188216832} +{"current_steps": 59810, "total_steps": 78105, "loss": 0.1764, "lr": 7.902260118987592e-07, "epoch": 3.828820177965559, "percentage": 76.58, "elapsed_time": "2:37:38", "remaining_time": "0:48:13", "throughput": 19901.13, "total_tokens": 188232064} +{"current_steps": 59815, "total_steps": 78105, "loss": 0.1855, "lr": 7.898184804039e-07, "epoch": 3.829140259906536, "percentage": 76.58, "elapsed_time": "2:37:38", "remaining_time": "0:48:12", "throughput": 19901.3, "total_tokens": 188246272} +{"current_steps": 59820, "total_steps": 78105, "loss": 0.1861, "lr": 7.894110343065978e-07, "epoch": 3.829460341847513, "percentage": 76.59, "elapsed_time": "2:37:39", "remaining_time": "0:48:11", "throughput": 19901.56, "total_tokens": 188262400} +{"current_steps": 59825, "total_steps": 78105, "loss": 0.1767, "lr": 7.890036736271983e-07, "epoch": 3.82978042378849, "percentage": 76.6, "elapsed_time": "2:37:40", "remaining_time": "0:48:10", "throughput": 19901.81, "total_tokens": 188278592} +{"current_steps": 59830, "total_steps": 78105, "loss": 0.1602, "lr": 7.885963983860431e-07, "epoch": 3.8301005057294666, "percentage": 76.6, "elapsed_time": "2:37:41", "remaining_time": "0:48:09", "throughput": 19902.04, "total_tokens": 188294272} +{"current_steps": 59835, "total_steps": 78105, "loss": 0.1602, "lr": 7.881892086034695e-07, "epoch": 3.8304205876704436, "percentage": 76.61, "elapsed_time": "2:37:41", "remaining_time": "0:48:09", "throughput": 19902.23, "total_tokens": 188308992} +{"current_steps": 59840, "total_steps": 78105, "loss": 0.1537, "lr": 7.877821042998093e-07, "epoch": 3.8307406696114206, "percentage": 76.61, "elapsed_time": "2:37:42", "remaining_time": "0:48:08", "throughput": 19902.45, "total_tokens": 188324224} +{"current_steps": 59845, "total_steps": 78105, "loss": 0.1458, "lr": 7.87375085495394e-07, "epoch": 3.8310607515523976, "percentage": 76.62, "elapsed_time": "2:37:43", "remaining_time": "0:48:07", "throughput": 19902.67, "total_tokens": 188339136} +{"current_steps": 59850, "total_steps": 78105, "loss": 0.1341, "lr": 7.869681522105446e-07, "epoch": 3.831380833493374, "percentage": 76.63, "elapsed_time": "2:37:43", "remaining_time": "0:48:06", "throughput": 19902.88, "total_tokens": 188354432} +{"current_steps": 59855, "total_steps": 78105, "loss": 0.2227, "lr": 7.865613044655835e-07, "epoch": 3.831700915434351, "percentage": 76.63, "elapsed_time": "2:37:44", "remaining_time": "0:48:05", "throughput": 19903.06, "total_tokens": 188368640} +{"current_steps": 59860, "total_steps": 78105, "loss": 0.0862, "lr": 7.861545422808259e-07, "epoch": 3.832020997375328, "percentage": 76.64, "elapsed_time": "2:37:45", "remaining_time": "0:48:04", "throughput": 19903.34, "total_tokens": 188385344} +{"current_steps": 59865, "total_steps": 78105, "loss": 0.1752, "lr": 7.857478656765826e-07, "epoch": 3.832341079316305, "percentage": 76.65, "elapsed_time": "2:37:45", "remaining_time": "0:48:04", "throughput": 19903.63, "total_tokens": 188401984} +{"current_steps": 59870, "total_steps": 78105, "loss": 0.2178, "lr": 7.853412746731634e-07, "epoch": 3.832661161257282, "percentage": 76.65, "elapsed_time": "2:37:46", "remaining_time": "0:48:03", "throughput": 19903.86, "total_tokens": 188417472} +{"current_steps": 59875, "total_steps": 78105, "loss": 0.1809, "lr": 7.84934769290868e-07, "epoch": 3.8329812431982586, "percentage": 76.66, "elapsed_time": "2:37:47", "remaining_time": "0:48:02", "throughput": 19904.06, "total_tokens": 188432192} +{"current_steps": 59880, "total_steps": 78105, "loss": 0.1476, "lr": 7.845283495499981e-07, "epoch": 3.8333013251392356, "percentage": 76.67, "elapsed_time": "2:37:47", "remaining_time": "0:48:01", "throughput": 19904.33, "total_tokens": 188448576} +{"current_steps": 59885, "total_steps": 78105, "loss": 0.1717, "lr": 7.841220154708454e-07, "epoch": 3.8336214070802126, "percentage": 76.67, "elapsed_time": "2:37:48", "remaining_time": "0:48:00", "throughput": 19904.53, "total_tokens": 188463744} +{"current_steps": 59890, "total_steps": 78105, "loss": 0.2148, "lr": 7.837157670737025e-07, "epoch": 3.8339414890211896, "percentage": 76.68, "elapsed_time": "2:37:49", "remaining_time": "0:47:59", "throughput": 19904.77, "total_tokens": 188479488} +{"current_steps": 59895, "total_steps": 78105, "loss": 0.1013, "lr": 7.833096043788543e-07, "epoch": 3.834261570962166, "percentage": 76.69, "elapsed_time": "2:37:49", "remaining_time": "0:47:59", "throughput": 19904.98, "total_tokens": 188494464} +{"current_steps": 59900, "total_steps": 78105, "loss": 0.1325, "lr": 7.829035274065824e-07, "epoch": 3.834581652903143, "percentage": 76.69, "elapsed_time": "2:37:50", "remaining_time": "0:47:58", "throughput": 19905.18, "total_tokens": 188509376} +{"current_steps": 59905, "total_steps": 78105, "loss": 0.122, "lr": 7.824975361771644e-07, "epoch": 3.83490173484412, "percentage": 76.7, "elapsed_time": "2:37:51", "remaining_time": "0:47:57", "throughput": 19905.37, "total_tokens": 188524032} +{"current_steps": 59910, "total_steps": 78105, "loss": 0.0974, "lr": 7.820916307108734e-07, "epoch": 3.835221816785097, "percentage": 76.7, "elapsed_time": "2:37:51", "remaining_time": "0:47:56", "throughput": 19905.6, "total_tokens": 188539392} +{"current_steps": 59915, "total_steps": 78105, "loss": 0.1441, "lr": 7.816858110279782e-07, "epoch": 3.835541898726074, "percentage": 76.71, "elapsed_time": "2:37:52", "remaining_time": "0:47:55", "throughput": 19905.81, "total_tokens": 188554240} +{"current_steps": 59920, "total_steps": 78105, "loss": 0.1766, "lr": 7.812800771487422e-07, "epoch": 3.8358619806670506, "percentage": 76.72, "elapsed_time": "2:37:53", "remaining_time": "0:47:54", "throughput": 19906.06, "total_tokens": 188570240} +{"current_steps": 59925, "total_steps": 78105, "loss": 0.1876, "lr": 7.808744290934275e-07, "epoch": 3.8361820626080276, "percentage": 76.72, "elapsed_time": "2:37:53", "remaining_time": "0:47:54", "throughput": 19906.26, "total_tokens": 188585024} +{"current_steps": 59930, "total_steps": 78105, "loss": 0.1244, "lr": 7.804688668822893e-07, "epoch": 3.8365021445490046, "percentage": 76.73, "elapsed_time": "2:37:54", "remaining_time": "0:47:53", "throughput": 19906.51, "total_tokens": 188600896} +{"current_steps": 59935, "total_steps": 78105, "loss": 0.1395, "lr": 7.800633905355792e-07, "epoch": 3.836822226489981, "percentage": 76.74, "elapsed_time": "2:37:54", "remaining_time": "0:47:52", "throughput": 19906.75, "total_tokens": 188616256} +{"current_steps": 59940, "total_steps": 78105, "loss": 0.1308, "lr": 7.796580000735443e-07, "epoch": 3.837142308430958, "percentage": 76.74, "elapsed_time": "2:37:55", "remaining_time": "0:47:51", "throughput": 19907.02, "total_tokens": 188633088} +{"current_steps": 59945, "total_steps": 78105, "loss": 0.1823, "lr": 7.792526955164281e-07, "epoch": 3.837462390371935, "percentage": 76.75, "elapsed_time": "2:37:56", "remaining_time": "0:47:50", "throughput": 19907.33, "total_tokens": 188650880} +{"current_steps": 59950, "total_steps": 78105, "loss": 0.1192, "lr": 7.788474768844686e-07, "epoch": 3.837782472312912, "percentage": 76.76, "elapsed_time": "2:37:57", "remaining_time": "0:47:50", "throughput": 19907.63, "total_tokens": 188668096} +{"current_steps": 59955, "total_steps": 78105, "loss": 0.1912, "lr": 7.784423441979025e-07, "epoch": 3.838102554253889, "percentage": 76.76, "elapsed_time": "2:37:57", "remaining_time": "0:47:49", "throughput": 19907.92, "total_tokens": 188685248} +{"current_steps": 59960, "total_steps": 78105, "loss": 0.1559, "lr": 7.780372974769565e-07, "epoch": 3.838422636194866, "percentage": 76.77, "elapsed_time": "2:37:58", "remaining_time": "0:47:48", "throughput": 19908.17, "total_tokens": 188700992} +{"current_steps": 59965, "total_steps": 78105, "loss": 0.1487, "lr": 7.776323367418606e-07, "epoch": 3.8387427181358427, "percentage": 76.77, "elapsed_time": "2:37:59", "remaining_time": "0:47:47", "throughput": 19908.39, "total_tokens": 188716288} +{"current_steps": 59970, "total_steps": 78105, "loss": 0.1905, "lr": 7.772274620128323e-07, "epoch": 3.8390628000768197, "percentage": 76.78, "elapsed_time": "2:37:59", "remaining_time": "0:47:46", "throughput": 19908.61, "total_tokens": 188731648} +{"current_steps": 59975, "total_steps": 78105, "loss": 0.2068, "lr": 7.76822673310092e-07, "epoch": 3.8393828820177966, "percentage": 76.79, "elapsed_time": "2:38:00", "remaining_time": "0:47:45", "throughput": 19908.84, "total_tokens": 188747648} +{"current_steps": 59980, "total_steps": 78105, "loss": 0.166, "lr": 7.764179706538516e-07, "epoch": 3.839702963958773, "percentage": 76.79, "elapsed_time": "2:38:01", "remaining_time": "0:47:45", "throughput": 19909.12, "total_tokens": 188764224} +{"current_steps": 59985, "total_steps": 78105, "loss": 0.1819, "lr": 7.760133540643191e-07, "epoch": 3.84002304589975, "percentage": 76.8, "elapsed_time": "2:38:02", "remaining_time": "0:47:44", "throughput": 19909.42, "total_tokens": 188782016} +{"current_steps": 59990, "total_steps": 78105, "loss": 0.2164, "lr": 7.756088235617015e-07, "epoch": 3.840343127840727, "percentage": 76.81, "elapsed_time": "2:38:02", "remaining_time": "0:47:43", "throughput": 19909.61, "total_tokens": 188797056} +{"current_steps": 59995, "total_steps": 78105, "loss": 0.0811, "lr": 7.752043791661956e-07, "epoch": 3.840663209781704, "percentage": 76.81, "elapsed_time": "2:38:03", "remaining_time": "0:47:42", "throughput": 19909.91, "total_tokens": 188813888} +{"current_steps": 60000, "total_steps": 78105, "loss": 0.188, "lr": 7.748000208980002e-07, "epoch": 3.840983291722681, "percentage": 76.82, "elapsed_time": "2:38:04", "remaining_time": "0:47:41", "throughput": 19910.12, "total_tokens": 188829440} +{"current_steps": 60005, "total_steps": 78105, "loss": 0.2041, "lr": 7.743957487773043e-07, "epoch": 3.841303373663658, "percentage": 76.83, "elapsed_time": "2:38:04", "remaining_time": "0:47:40", "throughput": 19910.34, "total_tokens": 188844352} +{"current_steps": 60010, "total_steps": 78105, "loss": 0.1179, "lr": 7.739915628242969e-07, "epoch": 3.8416234556046347, "percentage": 76.83, "elapsed_time": "2:38:05", "remaining_time": "0:47:40", "throughput": 19910.61, "total_tokens": 188860800} +{"current_steps": 60015, "total_steps": 78105, "loss": 0.209, "lr": 7.735874630591605e-07, "epoch": 3.8419435375456117, "percentage": 76.84, "elapsed_time": "2:38:06", "remaining_time": "0:47:39", "throughput": 19910.83, "total_tokens": 188875968} +{"current_steps": 60020, "total_steps": 78105, "loss": 0.1646, "lr": 7.731834495020738e-07, "epoch": 3.8422636194865887, "percentage": 76.85, "elapsed_time": "2:38:06", "remaining_time": "0:47:38", "throughput": 19911.08, "total_tokens": 188891776} +{"current_steps": 60025, "total_steps": 78105, "loss": 0.2128, "lr": 7.727795221732109e-07, "epoch": 3.8425837014275652, "percentage": 76.85, "elapsed_time": "2:38:07", "remaining_time": "0:47:37", "throughput": 19911.28, "total_tokens": 188906624} +{"current_steps": 60030, "total_steps": 78105, "loss": 0.0938, "lr": 7.723756810927418e-07, "epoch": 3.842903783368542, "percentage": 76.86, "elapsed_time": "2:38:08", "remaining_time": "0:47:36", "throughput": 19911.53, "total_tokens": 188922624} +{"current_steps": 60035, "total_steps": 78105, "loss": 0.1343, "lr": 7.719719262808314e-07, "epoch": 3.843223865309519, "percentage": 76.86, "elapsed_time": "2:38:08", "remaining_time": "0:47:36", "throughput": 19911.77, "total_tokens": 188938624} +{"current_steps": 60040, "total_steps": 78105, "loss": 0.14, "lr": 7.71568257757643e-07, "epoch": 3.843543947250496, "percentage": 76.87, "elapsed_time": "2:38:09", "remaining_time": "0:47:35", "throughput": 19911.99, "total_tokens": 188953984} +{"current_steps": 60045, "total_steps": 78105, "loss": 0.1468, "lr": 7.711646755433328e-07, "epoch": 3.843864029191473, "percentage": 76.88, "elapsed_time": "2:38:10", "remaining_time": "0:47:34", "throughput": 19912.32, "total_tokens": 188971904} +{"current_steps": 60050, "total_steps": 78105, "loss": 0.1918, "lr": 7.707611796580533e-07, "epoch": 3.84418411113245, "percentage": 76.88, "elapsed_time": "2:38:10", "remaining_time": "0:47:33", "throughput": 19912.53, "total_tokens": 188987008} +{"current_steps": 60055, "total_steps": 78105, "loss": 0.1181, "lr": 7.703577701219537e-07, "epoch": 3.8445041930734267, "percentage": 76.89, "elapsed_time": "2:38:11", "remaining_time": "0:47:32", "throughput": 19912.73, "total_tokens": 189001920} +{"current_steps": 60060, "total_steps": 78105, "loss": 0.1857, "lr": 7.69954446955177e-07, "epoch": 3.8448242750144037, "percentage": 76.9, "elapsed_time": "2:38:12", "remaining_time": "0:47:31", "throughput": 19912.94, "total_tokens": 189016960} +{"current_steps": 60065, "total_steps": 78105, "loss": 0.1907, "lr": 7.695512101778644e-07, "epoch": 3.8451443569553807, "percentage": 76.9, "elapsed_time": "2:38:12", "remaining_time": "0:47:31", "throughput": 19913.15, "total_tokens": 189031872} +{"current_steps": 60070, "total_steps": 78105, "loss": 0.195, "lr": 7.691480598101495e-07, "epoch": 3.8454644388963573, "percentage": 76.91, "elapsed_time": "2:38:13", "remaining_time": "0:47:30", "throughput": 19913.35, "total_tokens": 189047104} +{"current_steps": 60075, "total_steps": 78105, "loss": 0.2234, "lr": 7.687449958721665e-07, "epoch": 3.8457845208373342, "percentage": 76.92, "elapsed_time": "2:38:14", "remaining_time": "0:47:29", "throughput": 19913.53, "total_tokens": 189061376} +{"current_steps": 60080, "total_steps": 78105, "loss": 0.1447, "lr": 7.683420183840393e-07, "epoch": 3.8461046027783112, "percentage": 76.92, "elapsed_time": "2:38:14", "remaining_time": "0:47:28", "throughput": 19913.76, "total_tokens": 189077056} +{"current_steps": 60085, "total_steps": 78105, "loss": 0.122, "lr": 7.679391273658923e-07, "epoch": 3.8464246847192882, "percentage": 76.93, "elapsed_time": "2:38:15", "remaining_time": "0:47:27", "throughput": 19913.97, "total_tokens": 189092224} +{"current_steps": 60090, "total_steps": 78105, "loss": 0.1337, "lr": 7.675363228378435e-07, "epoch": 3.846744766660265, "percentage": 76.93, "elapsed_time": "2:38:16", "remaining_time": "0:47:26", "throughput": 19914.23, "total_tokens": 189108160} +{"current_steps": 60095, "total_steps": 78105, "loss": 0.1809, "lr": 7.671336048200057e-07, "epoch": 3.8470648486012418, "percentage": 76.94, "elapsed_time": "2:38:16", "remaining_time": "0:47:26", "throughput": 19914.47, "total_tokens": 189124224} +{"current_steps": 60100, "total_steps": 78105, "loss": 0.1052, "lr": 7.667309733324913e-07, "epoch": 3.8473849305422188, "percentage": 76.95, "elapsed_time": "2:38:17", "remaining_time": "0:47:25", "throughput": 19914.67, "total_tokens": 189138880} +{"current_steps": 60105, "total_steps": 78105, "loss": 0.1603, "lr": 7.66328428395402e-07, "epoch": 3.8477050124831957, "percentage": 76.95, "elapsed_time": "2:38:18", "remaining_time": "0:47:24", "throughput": 19914.92, "total_tokens": 189155520} +{"current_steps": 60110, "total_steps": 78105, "loss": 0.248, "lr": 7.659259700288423e-07, "epoch": 3.8480250944241727, "percentage": 76.96, "elapsed_time": "2:38:18", "remaining_time": "0:47:23", "throughput": 19915.13, "total_tokens": 189170688} +{"current_steps": 60115, "total_steps": 78105, "loss": 0.1931, "lr": 7.655235982529058e-07, "epoch": 3.8483451763651493, "percentage": 76.97, "elapsed_time": "2:38:19", "remaining_time": "0:47:22", "throughput": 19915.34, "total_tokens": 189185792} +{"current_steps": 60120, "total_steps": 78105, "loss": 0.1178, "lr": 7.651213130876867e-07, "epoch": 3.8486652583061263, "percentage": 76.97, "elapsed_time": "2:38:20", "remaining_time": "0:47:22", "throughput": 19915.61, "total_tokens": 189202240} +{"current_steps": 60125, "total_steps": 78105, "loss": 0.2771, "lr": 7.647191145532729e-07, "epoch": 3.8489853402471033, "percentage": 76.98, "elapsed_time": "2:38:20", "remaining_time": "0:47:21", "throughput": 19915.85, "total_tokens": 189217984} +{"current_steps": 60130, "total_steps": 78105, "loss": 0.1143, "lr": 7.643170026697475e-07, "epoch": 3.8493054221880803, "percentage": 76.99, "elapsed_time": "2:38:21", "remaining_time": "0:47:20", "throughput": 19916.09, "total_tokens": 189233728} +{"current_steps": 60135, "total_steps": 78105, "loss": 0.1361, "lr": 7.639149774571902e-07, "epoch": 3.8496255041290572, "percentage": 76.99, "elapsed_time": "2:38:22", "remaining_time": "0:47:19", "throughput": 19916.32, "total_tokens": 189249280} +{"current_steps": 60140, "total_steps": 78105, "loss": 0.1428, "lr": 7.635130389356763e-07, "epoch": 3.849945586070034, "percentage": 77.0, "elapsed_time": "2:38:22", "remaining_time": "0:47:18", "throughput": 19916.63, "total_tokens": 189266688} +{"current_steps": 60145, "total_steps": 78105, "loss": 0.1778, "lr": 7.631111871252758e-07, "epoch": 3.8502656680110108, "percentage": 77.01, "elapsed_time": "2:38:23", "remaining_time": "0:47:17", "throughput": 19916.85, "total_tokens": 189281792} +{"current_steps": 60150, "total_steps": 78105, "loss": 0.1246, "lr": 7.627094220460557e-07, "epoch": 3.8505857499519878, "percentage": 77.01, "elapsed_time": "2:38:24", "remaining_time": "0:47:17", "throughput": 19917.06, "total_tokens": 189296768} +{"current_steps": 60155, "total_steps": 78105, "loss": 0.1854, "lr": 7.623077437180771e-07, "epoch": 3.8509058318929648, "percentage": 77.02, "elapsed_time": "2:38:24", "remaining_time": "0:47:16", "throughput": 19917.29, "total_tokens": 189312384} +{"current_steps": 60160, "total_steps": 78105, "loss": 0.138, "lr": 7.619061521613991e-07, "epoch": 3.8512259138339413, "percentage": 77.02, "elapsed_time": "2:38:25", "remaining_time": "0:47:15", "throughput": 19917.54, "total_tokens": 189328640} +{"current_steps": 60165, "total_steps": 78105, "loss": 0.1848, "lr": 7.615046473960744e-07, "epoch": 3.8515459957749183, "percentage": 77.03, "elapsed_time": "2:38:26", "remaining_time": "0:47:14", "throughput": 19917.72, "total_tokens": 189343360} +{"current_steps": 60170, "total_steps": 78105, "loss": 0.1605, "lr": 7.611032294421522e-07, "epoch": 3.8518660777158953, "percentage": 77.04, "elapsed_time": "2:38:26", "remaining_time": "0:47:13", "throughput": 19917.92, "total_tokens": 189358336} +{"current_steps": 60175, "total_steps": 78105, "loss": 0.1258, "lr": 7.607018983196773e-07, "epoch": 3.8521861596568723, "percentage": 77.04, "elapsed_time": "2:38:27", "remaining_time": "0:47:12", "throughput": 19918.21, "total_tokens": 189375168} +{"current_steps": 60180, "total_steps": 78105, "loss": 0.214, "lr": 7.60300654048689e-07, "epoch": 3.8525062415978493, "percentage": 77.05, "elapsed_time": "2:38:28", "remaining_time": "0:47:12", "throughput": 19918.45, "total_tokens": 189391232} +{"current_steps": 60185, "total_steps": 78105, "loss": 0.1282, "lr": 7.59899496649226e-07, "epoch": 3.852826323538826, "percentage": 77.06, "elapsed_time": "2:38:29", "remaining_time": "0:47:11", "throughput": 19918.7, "total_tokens": 189407424} +{"current_steps": 60190, "total_steps": 78105, "loss": 0.163, "lr": 7.594984261413165e-07, "epoch": 3.853146405479803, "percentage": 77.06, "elapsed_time": "2:38:29", "remaining_time": "0:47:10", "throughput": 19918.92, "total_tokens": 189423040} +{"current_steps": 60195, "total_steps": 78105, "loss": 0.2409, "lr": 7.590974425449913e-07, "epoch": 3.85346648742078, "percentage": 77.07, "elapsed_time": "2:38:30", "remaining_time": "0:47:09", "throughput": 19919.19, "total_tokens": 189439296} +{"current_steps": 60200, "total_steps": 78105, "loss": 0.1665, "lr": 7.586965458802703e-07, "epoch": 3.8537865693617563, "percentage": 77.08, "elapsed_time": "2:38:31", "remaining_time": "0:47:08", "throughput": 19919.39, "total_tokens": 189453952} +{"current_steps": 60205, "total_steps": 78105, "loss": 0.1404, "lr": 7.582957361671744e-07, "epoch": 3.8541066513027333, "percentage": 77.08, "elapsed_time": "2:38:31", "remaining_time": "0:47:08", "throughput": 19919.69, "total_tokens": 189471168} +{"current_steps": 60210, "total_steps": 78105, "loss": 0.1323, "lr": 7.578950134257171e-07, "epoch": 3.8544267332437103, "percentage": 77.09, "elapsed_time": "2:38:32", "remaining_time": "0:47:07", "throughput": 19919.9, "total_tokens": 189486144} +{"current_steps": 60215, "total_steps": 78105, "loss": 0.1816, "lr": 7.57494377675908e-07, "epoch": 3.8547468151846873, "percentage": 77.09, "elapsed_time": "2:38:33", "remaining_time": "0:47:06", "throughput": 19920.11, "total_tokens": 189501376} +{"current_steps": 60220, "total_steps": 78105, "loss": 0.1773, "lr": 7.570938289377547e-07, "epoch": 3.8550668971256643, "percentage": 77.1, "elapsed_time": "2:38:33", "remaining_time": "0:47:05", "throughput": 19920.33, "total_tokens": 189516864} +{"current_steps": 60225, "total_steps": 78105, "loss": 0.131, "lr": 7.566933672312554e-07, "epoch": 3.8553869790666413, "percentage": 77.11, "elapsed_time": "2:38:34", "remaining_time": "0:47:04", "throughput": 19920.52, "total_tokens": 189531904} +{"current_steps": 60230, "total_steps": 78105, "loss": 0.1841, "lr": 7.562929925764098e-07, "epoch": 3.855707061007618, "percentage": 77.11, "elapsed_time": "2:38:35", "remaining_time": "0:47:03", "throughput": 19920.75, "total_tokens": 189547200} +{"current_steps": 60235, "total_steps": 78105, "loss": 0.1869, "lr": 7.558927049932097e-07, "epoch": 3.856027142948595, "percentage": 77.12, "elapsed_time": "2:38:35", "remaining_time": "0:47:03", "throughput": 19921.0, "total_tokens": 189563328} +{"current_steps": 60240, "total_steps": 78105, "loss": 0.1303, "lr": 7.554925045016429e-07, "epoch": 3.856347224889572, "percentage": 77.13, "elapsed_time": "2:38:36", "remaining_time": "0:47:02", "throughput": 19921.25, "total_tokens": 189579456} +{"current_steps": 60245, "total_steps": 78105, "loss": 0.1375, "lr": 7.550923911216937e-07, "epoch": 3.8566673068305484, "percentage": 77.13, "elapsed_time": "2:38:37", "remaining_time": "0:47:01", "throughput": 19921.52, "total_tokens": 189595904} +{"current_steps": 60250, "total_steps": 78105, "loss": 0.2064, "lr": 7.546923648733415e-07, "epoch": 3.8569873887715254, "percentage": 77.14, "elapsed_time": "2:38:37", "remaining_time": "0:47:00", "throughput": 19921.76, "total_tokens": 189612096} +{"current_steps": 60255, "total_steps": 78105, "loss": 0.1931, "lr": 7.542924257765616e-07, "epoch": 3.8573074707125024, "percentage": 77.15, "elapsed_time": "2:38:38", "remaining_time": "0:46:59", "throughput": 19921.97, "total_tokens": 189627584} +{"current_steps": 60260, "total_steps": 78105, "loss": 0.1254, "lr": 7.53892573851325e-07, "epoch": 3.8576275526534793, "percentage": 77.15, "elapsed_time": "2:38:39", "remaining_time": "0:46:58", "throughput": 19922.25, "total_tokens": 189644480} +{"current_steps": 60265, "total_steps": 78105, "loss": 0.2058, "lr": 7.534928091175974e-07, "epoch": 3.8579476345944563, "percentage": 77.16, "elapsed_time": "2:38:39", "remaining_time": "0:46:58", "throughput": 19922.48, "total_tokens": 189660160} +{"current_steps": 60270, "total_steps": 78105, "loss": 0.1398, "lr": 7.530931315953424e-07, "epoch": 3.8582677165354333, "percentage": 77.17, "elapsed_time": "2:38:40", "remaining_time": "0:46:57", "throughput": 19922.7, "total_tokens": 189675584} +{"current_steps": 60275, "total_steps": 78105, "loss": 0.1847, "lr": 7.526935413045172e-07, "epoch": 3.85858779847641, "percentage": 77.17, "elapsed_time": "2:38:41", "remaining_time": "0:46:56", "throughput": 19922.95, "total_tokens": 189691136} +{"current_steps": 60280, "total_steps": 78105, "loss": 0.1821, "lr": 7.522940382650751e-07, "epoch": 3.858907880417387, "percentage": 77.18, "elapsed_time": "2:38:41", "remaining_time": "0:46:55", "throughput": 19923.22, "total_tokens": 189707776} +{"current_steps": 60285, "total_steps": 78105, "loss": 0.1471, "lr": 7.518946224969651e-07, "epoch": 3.859227962358364, "percentage": 77.18, "elapsed_time": "2:38:42", "remaining_time": "0:46:54", "throughput": 19923.46, "total_tokens": 189723584} +{"current_steps": 60290, "total_steps": 78105, "loss": 0.1466, "lr": 7.514952940201323e-07, "epoch": 3.8595480442993404, "percentage": 77.19, "elapsed_time": "2:38:43", "remaining_time": "0:46:54", "throughput": 19923.69, "total_tokens": 189739264} +{"current_steps": 60295, "total_steps": 78105, "loss": 0.1721, "lr": 7.510960528545169e-07, "epoch": 3.8598681262403174, "percentage": 77.2, "elapsed_time": "2:38:43", "remaining_time": "0:46:53", "throughput": 19923.9, "total_tokens": 189754560} +{"current_steps": 60300, "total_steps": 78105, "loss": 0.1718, "lr": 7.506968990200539e-07, "epoch": 3.8601882081812944, "percentage": 77.2, "elapsed_time": "2:38:44", "remaining_time": "0:46:52", "throughput": 19924.09, "total_tokens": 189769600} +{"current_steps": 60305, "total_steps": 78105, "loss": 0.1877, "lr": 7.502978325366778e-07, "epoch": 3.8605082901222714, "percentage": 77.21, "elapsed_time": "2:38:45", "remaining_time": "0:46:51", "throughput": 19924.28, "total_tokens": 189784320} +{"current_steps": 60310, "total_steps": 78105, "loss": 0.1328, "lr": 7.498988534243123e-07, "epoch": 3.8608283720632484, "percentage": 77.22, "elapsed_time": "2:38:45", "remaining_time": "0:46:50", "throughput": 19924.49, "total_tokens": 189799552} +{"current_steps": 60315, "total_steps": 78105, "loss": 0.1716, "lr": 7.494999617028831e-07, "epoch": 3.8611484540042253, "percentage": 77.22, "elapsed_time": "2:38:46", "remaining_time": "0:46:49", "throughput": 19924.71, "total_tokens": 189815424} +{"current_steps": 60320, "total_steps": 78105, "loss": 0.1421, "lr": 7.491011573923077e-07, "epoch": 3.861468535945202, "percentage": 77.23, "elapsed_time": "2:38:47", "remaining_time": "0:46:49", "throughput": 19924.92, "total_tokens": 189830208} +{"current_steps": 60325, "total_steps": 78105, "loss": 0.1343, "lr": 7.487024405125004e-07, "epoch": 3.861788617886179, "percentage": 77.24, "elapsed_time": "2:38:47", "remaining_time": "0:46:48", "throughput": 19925.12, "total_tokens": 189845248} +{"current_steps": 60330, "total_steps": 78105, "loss": 0.199, "lr": 7.48303811083371e-07, "epoch": 3.862108699827156, "percentage": 77.24, "elapsed_time": "2:38:48", "remaining_time": "0:46:47", "throughput": 19925.31, "total_tokens": 189860224} +{"current_steps": 60335, "total_steps": 78105, "loss": 0.1605, "lr": 7.479052691248243e-07, "epoch": 3.8624287817681324, "percentage": 77.25, "elapsed_time": "2:38:49", "remaining_time": "0:46:46", "throughput": 19925.57, "total_tokens": 189876416} +{"current_steps": 60340, "total_steps": 78105, "loss": 0.144, "lr": 7.475068146567635e-07, "epoch": 3.8627488637091094, "percentage": 77.25, "elapsed_time": "2:38:49", "remaining_time": "0:46:45", "throughput": 19925.81, "total_tokens": 189892032} +{"current_steps": 60345, "total_steps": 78105, "loss": 0.2088, "lr": 7.471084476990825e-07, "epoch": 3.8630689456500864, "percentage": 77.26, "elapsed_time": "2:38:50", "remaining_time": "0:46:44", "throughput": 19926.06, "total_tokens": 189908288} +{"current_steps": 60350, "total_steps": 78105, "loss": 0.2278, "lr": 7.467101682716762e-07, "epoch": 3.8633890275910634, "percentage": 77.27, "elapsed_time": "2:38:51", "remaining_time": "0:46:44", "throughput": 19926.24, "total_tokens": 189922880} +{"current_steps": 60355, "total_steps": 78105, "loss": 0.1796, "lr": 7.463119763944313e-07, "epoch": 3.8637091095320404, "percentage": 77.27, "elapsed_time": "2:38:51", "remaining_time": "0:46:43", "throughput": 19926.47, "total_tokens": 189938176} +{"current_steps": 60360, "total_steps": 78105, "loss": 0.1747, "lr": 7.459138720872316e-07, "epoch": 3.864029191473017, "percentage": 77.28, "elapsed_time": "2:38:52", "remaining_time": "0:46:42", "throughput": 19926.23, "total_tokens": 189955264} +{"current_steps": 60365, "total_steps": 78105, "loss": 0.1671, "lr": 7.455158553699568e-07, "epoch": 3.864349273413994, "percentage": 77.29, "elapsed_time": "2:38:53", "remaining_time": "0:46:41", "throughput": 19926.47, "total_tokens": 189971584} +{"current_steps": 60370, "total_steps": 78105, "loss": 0.1623, "lr": 7.451179262624811e-07, "epoch": 3.864669355354971, "percentage": 77.29, "elapsed_time": "2:38:54", "remaining_time": "0:46:40", "throughput": 19926.71, "total_tokens": 189987520} +{"current_steps": 60375, "total_steps": 78105, "loss": 0.1078, "lr": 7.447200847846753e-07, "epoch": 3.864989437295948, "percentage": 77.3, "elapsed_time": "2:38:54", "remaining_time": "0:46:40", "throughput": 19926.94, "total_tokens": 190003072} +{"current_steps": 60380, "total_steps": 78105, "loss": 0.1921, "lr": 7.443223309564057e-07, "epoch": 3.8653095192369245, "percentage": 77.31, "elapsed_time": "2:38:55", "remaining_time": "0:46:39", "throughput": 19927.22, "total_tokens": 190019584} +{"current_steps": 60385, "total_steps": 78105, "loss": 0.0837, "lr": 7.439246647975329e-07, "epoch": 3.8656296011779014, "percentage": 77.31, "elapsed_time": "2:38:56", "remaining_time": "0:46:38", "throughput": 19927.43, "total_tokens": 190034688} +{"current_steps": 60390, "total_steps": 78105, "loss": 0.1779, "lr": 7.435270863279162e-07, "epoch": 3.8659496831188784, "percentage": 77.32, "elapsed_time": "2:38:57", "remaining_time": "0:46:37", "throughput": 19927.66, "total_tokens": 190050624} +{"current_steps": 60395, "total_steps": 78105, "loss": 0.2802, "lr": 7.431295955674078e-07, "epoch": 3.8662697650598554, "percentage": 77.33, "elapsed_time": "2:38:57", "remaining_time": "0:46:36", "throughput": 19927.96, "total_tokens": 190067328} +{"current_steps": 60400, "total_steps": 78105, "loss": 0.1764, "lr": 7.42732192535856e-07, "epoch": 3.8665898470008324, "percentage": 77.33, "elapsed_time": "2:38:58", "remaining_time": "0:46:35", "throughput": 19928.18, "total_tokens": 190082944} +{"current_steps": 60405, "total_steps": 78105, "loss": 0.1618, "lr": 7.423348772531053e-07, "epoch": 3.866909928941809, "percentage": 77.34, "elapsed_time": "2:38:59", "remaining_time": "0:46:35", "throughput": 19928.44, "total_tokens": 190099072} +{"current_steps": 60410, "total_steps": 78105, "loss": 0.202, "lr": 7.419376497389947e-07, "epoch": 3.867230010882786, "percentage": 77.34, "elapsed_time": "2:38:59", "remaining_time": "0:46:34", "throughput": 19928.64, "total_tokens": 190113984} +{"current_steps": 60415, "total_steps": 78105, "loss": 0.1491, "lr": 7.415405100133622e-07, "epoch": 3.867550092823763, "percentage": 77.35, "elapsed_time": "2:39:00", "remaining_time": "0:46:33", "throughput": 19928.88, "total_tokens": 190129664} +{"current_steps": 60420, "total_steps": 78105, "loss": 0.164, "lr": 7.411434580960353e-07, "epoch": 3.86787017476474, "percentage": 77.36, "elapsed_time": "2:39:01", "remaining_time": "0:46:32", "throughput": 19929.08, "total_tokens": 190144896} +{"current_steps": 60425, "total_steps": 78105, "loss": 0.1218, "lr": 7.40746494006844e-07, "epoch": 3.8681902567057165, "percentage": 77.36, "elapsed_time": "2:39:01", "remaining_time": "0:46:31", "throughput": 19929.28, "total_tokens": 190159744} +{"current_steps": 60430, "total_steps": 78105, "loss": 0.101, "lr": 7.40349617765608e-07, "epoch": 3.8685103386466935, "percentage": 77.37, "elapsed_time": "2:39:02", "remaining_time": "0:46:31", "throughput": 19929.49, "total_tokens": 190175040} +{"current_steps": 60435, "total_steps": 78105, "loss": 0.1493, "lr": 7.39952829392147e-07, "epoch": 3.8688304205876705, "percentage": 77.38, "elapsed_time": "2:39:03", "remaining_time": "0:46:30", "throughput": 19929.82, "total_tokens": 190192768} +{"current_steps": 60440, "total_steps": 78105, "loss": 0.1294, "lr": 7.395561289062739e-07, "epoch": 3.8691505025286475, "percentage": 77.38, "elapsed_time": "2:39:04", "remaining_time": "0:46:29", "throughput": 19930.45, "total_tokens": 190223232} +{"current_steps": 60445, "total_steps": 78105, "loss": 0.1829, "lr": 7.391595163277974e-07, "epoch": 3.8694705844696244, "percentage": 77.39, "elapsed_time": "2:39:05", "remaining_time": "0:46:28", "throughput": 19930.74, "total_tokens": 190240128} +{"current_steps": 60450, "total_steps": 78105, "loss": 0.1744, "lr": 7.387629916765241e-07, "epoch": 3.869790666410601, "percentage": 77.4, "elapsed_time": "2:39:05", "remaining_time": "0:46:27", "throughput": 19930.94, "total_tokens": 190255360} +{"current_steps": 60455, "total_steps": 78105, "loss": 0.1547, "lr": 7.383665549722513e-07, "epoch": 3.870110748351578, "percentage": 77.4, "elapsed_time": "2:39:06", "remaining_time": "0:46:27", "throughput": 19931.16, "total_tokens": 190270784} +{"current_steps": 60460, "total_steps": 78105, "loss": 0.1979, "lr": 7.379702062347776e-07, "epoch": 3.870430830292555, "percentage": 77.41, "elapsed_time": "2:39:07", "remaining_time": "0:46:26", "throughput": 19931.39, "total_tokens": 190286784} +{"current_steps": 60465, "total_steps": 78105, "loss": 0.1682, "lr": 7.37573945483894e-07, "epoch": 3.8707509122335315, "percentage": 77.42, "elapsed_time": "2:39:07", "remaining_time": "0:46:25", "throughput": 19931.61, "total_tokens": 190302208} +{"current_steps": 60470, "total_steps": 78105, "loss": 0.1857, "lr": 7.371777727393872e-07, "epoch": 3.8710709941745085, "percentage": 77.42, "elapsed_time": "2:39:08", "remaining_time": "0:46:24", "throughput": 19931.87, "total_tokens": 190318464} +{"current_steps": 60475, "total_steps": 78105, "loss": 0.1751, "lr": 7.367816880210404e-07, "epoch": 3.8713910761154855, "percentage": 77.43, "elapsed_time": "2:39:09", "remaining_time": "0:46:23", "throughput": 19932.09, "total_tokens": 190333824} +{"current_steps": 60480, "total_steps": 78105, "loss": 0.1397, "lr": 7.363856913486317e-07, "epoch": 3.8717111580564625, "percentage": 77.43, "elapsed_time": "2:39:09", "remaining_time": "0:46:22", "throughput": 19932.34, "total_tokens": 190349632} +{"current_steps": 60485, "total_steps": 78105, "loss": 0.152, "lr": 7.359897827419351e-07, "epoch": 3.8720312399974395, "percentage": 77.44, "elapsed_time": "2:39:10", "remaining_time": "0:46:22", "throughput": 19932.65, "total_tokens": 190366976} +{"current_steps": 60490, "total_steps": 78105, "loss": 0.1419, "lr": 7.355939622207206e-07, "epoch": 3.8723513219384165, "percentage": 77.45, "elapsed_time": "2:39:11", "remaining_time": "0:46:21", "throughput": 19932.89, "total_tokens": 190382848} +{"current_steps": 60495, "total_steps": 78105, "loss": 0.1415, "lr": 7.351982298047525e-07, "epoch": 3.872671403879393, "percentage": 77.45, "elapsed_time": "2:39:11", "remaining_time": "0:46:20", "throughput": 19933.09, "total_tokens": 190397824} +{"current_steps": 60500, "total_steps": 78105, "loss": 0.1781, "lr": 7.348025855137936e-07, "epoch": 3.87299148582037, "percentage": 77.46, "elapsed_time": "2:39:12", "remaining_time": "0:46:19", "throughput": 19933.4, "total_tokens": 190415424} +{"current_steps": 60505, "total_steps": 78105, "loss": 0.1485, "lr": 7.344070293675976e-07, "epoch": 3.873311567761347, "percentage": 77.47, "elapsed_time": "2:39:13", "remaining_time": "0:46:18", "throughput": 19933.6, "total_tokens": 190430080} +{"current_steps": 60510, "total_steps": 78105, "loss": 0.1483, "lr": 7.340115613859184e-07, "epoch": 3.8736316497023235, "percentage": 77.47, "elapsed_time": "2:39:13", "remaining_time": "0:46:18", "throughput": 19933.86, "total_tokens": 190446528} +{"current_steps": 60515, "total_steps": 78105, "loss": 0.2556, "lr": 7.336161815885034e-07, "epoch": 3.8739517316433005, "percentage": 77.48, "elapsed_time": "2:39:14", "remaining_time": "0:46:17", "throughput": 19934.09, "total_tokens": 190462144} +{"current_steps": 60520, "total_steps": 78105, "loss": 0.1332, "lr": 7.332208899950955e-07, "epoch": 3.8742718135842775, "percentage": 77.49, "elapsed_time": "2:39:15", "remaining_time": "0:46:16", "throughput": 19934.38, "total_tokens": 190478720} +{"current_steps": 60525, "total_steps": 78105, "loss": 0.1721, "lr": 7.328256866254336e-07, "epoch": 3.8745918955252545, "percentage": 77.49, "elapsed_time": "2:39:15", "remaining_time": "0:46:15", "throughput": 19934.65, "total_tokens": 190495232} +{"current_steps": 60530, "total_steps": 78105, "loss": 0.1546, "lr": 7.324305714992511e-07, "epoch": 3.8749119774662315, "percentage": 77.5, "elapsed_time": "2:39:16", "remaining_time": "0:46:14", "throughput": 19934.89, "total_tokens": 190511360} +{"current_steps": 60535, "total_steps": 78105, "loss": 0.2121, "lr": 7.320355446362804e-07, "epoch": 3.8752320594072085, "percentage": 77.5, "elapsed_time": "2:39:17", "remaining_time": "0:46:13", "throughput": 19935.12, "total_tokens": 190527104} +{"current_steps": 60540, "total_steps": 78105, "loss": 0.1911, "lr": 7.316406060562442e-07, "epoch": 3.875552141348185, "percentage": 77.51, "elapsed_time": "2:39:17", "remaining_time": "0:46:13", "throughput": 19935.31, "total_tokens": 190541632} +{"current_steps": 60545, "total_steps": 78105, "loss": 0.1714, "lr": 7.312457557788658e-07, "epoch": 3.875872223289162, "percentage": 77.52, "elapsed_time": "2:39:18", "remaining_time": "0:46:12", "throughput": 19935.55, "total_tokens": 190557248} +{"current_steps": 60550, "total_steps": 78105, "loss": 0.1592, "lr": 7.308509938238612e-07, "epoch": 3.876192305230139, "percentage": 77.52, "elapsed_time": "2:39:19", "remaining_time": "0:46:11", "throughput": 19935.79, "total_tokens": 190572864} +{"current_steps": 60555, "total_steps": 78105, "loss": 0.1854, "lr": 7.30456320210943e-07, "epoch": 3.8765123871711156, "percentage": 77.53, "elapsed_time": "2:39:19", "remaining_time": "0:46:10", "throughput": 19935.99, "total_tokens": 190587648} +{"current_steps": 60560, "total_steps": 78105, "loss": 0.2262, "lr": 7.300617349598188e-07, "epoch": 3.8768324691120926, "percentage": 77.54, "elapsed_time": "2:39:20", "remaining_time": "0:46:09", "throughput": 19936.24, "total_tokens": 190603968} +{"current_steps": 60565, "total_steps": 78105, "loss": 0.1007, "lr": 7.296672380901918e-07, "epoch": 3.8771525510530696, "percentage": 77.54, "elapsed_time": "2:39:21", "remaining_time": "0:46:09", "throughput": 19936.46, "total_tokens": 190619520} +{"current_steps": 60570, "total_steps": 78105, "loss": 0.1625, "lr": 7.292728296217627e-07, "epoch": 3.8774726329940465, "percentage": 77.55, "elapsed_time": "2:39:22", "remaining_time": "0:46:08", "throughput": 19936.7, "total_tokens": 190635072} +{"current_steps": 60575, "total_steps": 78105, "loss": 0.1696, "lr": 7.288785095742237e-07, "epoch": 3.8777927149350235, "percentage": 77.56, "elapsed_time": "2:39:22", "remaining_time": "0:46:07", "throughput": 19936.99, "total_tokens": 190651712} +{"current_steps": 60580, "total_steps": 78105, "loss": 0.1997, "lr": 7.284842779672674e-07, "epoch": 3.8781127968760005, "percentage": 77.56, "elapsed_time": "2:39:23", "remaining_time": "0:46:06", "throughput": 19937.22, "total_tokens": 190667520} +{"current_steps": 60585, "total_steps": 78105, "loss": 0.175, "lr": 7.280901348205788e-07, "epoch": 3.878432878816977, "percentage": 77.57, "elapsed_time": "2:39:24", "remaining_time": "0:46:05", "throughput": 19937.47, "total_tokens": 190683584} +{"current_steps": 60590, "total_steps": 78105, "loss": 0.128, "lr": 7.276960801538394e-07, "epoch": 3.878752960757954, "percentage": 77.58, "elapsed_time": "2:39:24", "remaining_time": "0:46:04", "throughput": 19937.73, "total_tokens": 190699968} +{"current_steps": 60595, "total_steps": 78105, "loss": 0.185, "lr": 7.273021139867259e-07, "epoch": 3.879073042698931, "percentage": 77.58, "elapsed_time": "2:39:25", "remaining_time": "0:46:04", "throughput": 19937.94, "total_tokens": 190715008} +{"current_steps": 60600, "total_steps": 78105, "loss": 0.1773, "lr": 7.269082363389113e-07, "epoch": 3.8793931246399076, "percentage": 77.59, "elapsed_time": "2:39:26", "remaining_time": "0:46:03", "throughput": 19938.16, "total_tokens": 190730496} +{"current_steps": 60605, "total_steps": 78105, "loss": 0.1031, "lr": 7.265144472300636e-07, "epoch": 3.8797132065808846, "percentage": 77.59, "elapsed_time": "2:39:26", "remaining_time": "0:46:02", "throughput": 19938.47, "total_tokens": 190747776} +{"current_steps": 60610, "total_steps": 78105, "loss": 0.1581, "lr": 7.261207466798467e-07, "epoch": 3.8800332885218616, "percentage": 77.6, "elapsed_time": "2:39:27", "remaining_time": "0:46:01", "throughput": 19938.71, "total_tokens": 190763712} +{"current_steps": 60615, "total_steps": 78105, "loss": 0.1172, "lr": 7.257271347079192e-07, "epoch": 3.8803533704628386, "percentage": 77.61, "elapsed_time": "2:39:28", "remaining_time": "0:46:00", "throughput": 19938.95, "total_tokens": 190779328} +{"current_steps": 60620, "total_steps": 78105, "loss": 0.1333, "lr": 7.253336113339382e-07, "epoch": 3.8806734524038156, "percentage": 77.61, "elapsed_time": "2:39:28", "remaining_time": "0:46:00", "throughput": 19939.16, "total_tokens": 190794752} +{"current_steps": 60625, "total_steps": 78105, "loss": 0.1782, "lr": 7.249401765775513e-07, "epoch": 3.880993534344792, "percentage": 77.62, "elapsed_time": "2:39:29", "remaining_time": "0:45:59", "throughput": 19939.38, "total_tokens": 190810496} +{"current_steps": 60630, "total_steps": 78105, "loss": 0.1172, "lr": 7.245468304584067e-07, "epoch": 3.881313616285769, "percentage": 77.63, "elapsed_time": "2:39:30", "remaining_time": "0:45:58", "throughput": 19939.64, "total_tokens": 190826816} +{"current_steps": 60635, "total_steps": 78105, "loss": 0.1543, "lr": 7.241535729961455e-07, "epoch": 3.881633698226746, "percentage": 77.63, "elapsed_time": "2:39:30", "remaining_time": "0:45:57", "throughput": 19939.84, "total_tokens": 190841792} +{"current_steps": 60640, "total_steps": 78105, "loss": 0.1781, "lr": 7.23760404210404e-07, "epoch": 3.881953780167723, "percentage": 77.64, "elapsed_time": "2:39:31", "remaining_time": "0:45:56", "throughput": 19940.07, "total_tokens": 190857664} +{"current_steps": 60645, "total_steps": 78105, "loss": 0.1459, "lr": 7.233673241208173e-07, "epoch": 3.8822738621086996, "percentage": 77.65, "elapsed_time": "2:39:32", "remaining_time": "0:45:55", "throughput": 19940.33, "total_tokens": 190874304} +{"current_steps": 60650, "total_steps": 78105, "loss": 0.0941, "lr": 7.229743327470109e-07, "epoch": 3.8825939440496766, "percentage": 77.65, "elapsed_time": "2:39:32", "remaining_time": "0:45:55", "throughput": 19940.53, "total_tokens": 190889472} +{"current_steps": 60655, "total_steps": 78105, "loss": 0.1908, "lr": 7.225814301086115e-07, "epoch": 3.8829140259906536, "percentage": 77.66, "elapsed_time": "2:39:33", "remaining_time": "0:45:54", "throughput": 19940.74, "total_tokens": 190904512} +{"current_steps": 60660, "total_steps": 78105, "loss": 0.1529, "lr": 7.221886162252358e-07, "epoch": 3.8832341079316306, "percentage": 77.66, "elapsed_time": "2:39:34", "remaining_time": "0:45:53", "throughput": 19940.93, "total_tokens": 190919552} +{"current_steps": 60665, "total_steps": 78105, "loss": 0.1297, "lr": 7.21795891116501e-07, "epoch": 3.8835541898726076, "percentage": 77.67, "elapsed_time": "2:39:34", "remaining_time": "0:45:52", "throughput": 19941.13, "total_tokens": 190934464} +{"current_steps": 60670, "total_steps": 78105, "loss": 0.1908, "lr": 7.214032548020172e-07, "epoch": 3.883874271813584, "percentage": 77.68, "elapsed_time": "2:39:35", "remaining_time": "0:45:51", "throughput": 19941.32, "total_tokens": 190949376} +{"current_steps": 60675, "total_steps": 78105, "loss": 0.162, "lr": 7.210107073013906e-07, "epoch": 3.884194353754561, "percentage": 77.68, "elapsed_time": "2:39:36", "remaining_time": "0:45:50", "throughput": 19941.54, "total_tokens": 190964672} +{"current_steps": 60680, "total_steps": 78105, "loss": 0.2023, "lr": 7.206182486342225e-07, "epoch": 3.884514435695538, "percentage": 77.69, "elapsed_time": "2:39:36", "remaining_time": "0:45:50", "throughput": 19941.72, "total_tokens": 190979200} +{"current_steps": 60685, "total_steps": 78105, "loss": 0.2337, "lr": 7.2022587882011e-07, "epoch": 3.884834517636515, "percentage": 77.7, "elapsed_time": "2:39:37", "remaining_time": "0:45:49", "throughput": 19941.98, "total_tokens": 190995200} +{"current_steps": 60690, "total_steps": 78105, "loss": 0.2028, "lr": 7.198335978786475e-07, "epoch": 3.8851545995774917, "percentage": 77.7, "elapsed_time": "2:39:38", "remaining_time": "0:45:48", "throughput": 19942.23, "total_tokens": 191011584} +{"current_steps": 60695, "total_steps": 78105, "loss": 0.2709, "lr": 7.194414058294225e-07, "epoch": 3.8854746815184686, "percentage": 77.71, "elapsed_time": "2:39:38", "remaining_time": "0:45:47", "throughput": 19942.51, "total_tokens": 191028288} +{"current_steps": 60700, "total_steps": 78105, "loss": 0.2329, "lr": 7.19049302692019e-07, "epoch": 3.8857947634594456, "percentage": 77.72, "elapsed_time": "2:39:39", "remaining_time": "0:45:46", "throughput": 19942.72, "total_tokens": 191043840} +{"current_steps": 60705, "total_steps": 78105, "loss": 0.2638, "lr": 7.186572884860169e-07, "epoch": 3.8861148454004226, "percentage": 77.72, "elapsed_time": "2:39:40", "remaining_time": "0:45:46", "throughput": 19942.95, "total_tokens": 191059392} +{"current_steps": 60710, "total_steps": 78105, "loss": 0.1877, "lr": 7.182653632309908e-07, "epoch": 3.8864349273413996, "percentage": 77.73, "elapsed_time": "2:39:40", "remaining_time": "0:45:45", "throughput": 19943.21, "total_tokens": 191075264} +{"current_steps": 60715, "total_steps": 78105, "loss": 0.1368, "lr": 7.17873526946512e-07, "epoch": 3.886755009282376, "percentage": 77.74, "elapsed_time": "2:39:41", "remaining_time": "0:45:44", "throughput": 19943.49, "total_tokens": 191092032} +{"current_steps": 60720, "total_steps": 78105, "loss": 0.15, "lr": 7.174817796521466e-07, "epoch": 3.887075091223353, "percentage": 77.74, "elapsed_time": "2:39:42", "remaining_time": "0:45:43", "throughput": 19943.71, "total_tokens": 191107584} +{"current_steps": 60725, "total_steps": 78105, "loss": 0.1673, "lr": 7.170901213674552e-07, "epoch": 3.88739517316433, "percentage": 77.75, "elapsed_time": "2:39:43", "remaining_time": "0:45:42", "throughput": 19943.96, "total_tokens": 191123520} +{"current_steps": 60730, "total_steps": 78105, "loss": 0.1535, "lr": 7.166985521119982e-07, "epoch": 3.8877152551053067, "percentage": 77.75, "elapsed_time": "2:39:43", "remaining_time": "0:45:41", "throughput": 19944.23, "total_tokens": 191140096} +{"current_steps": 60735, "total_steps": 78105, "loss": 0.1902, "lr": 7.16307071905325e-07, "epoch": 3.8880353370462837, "percentage": 77.76, "elapsed_time": "2:39:44", "remaining_time": "0:45:41", "throughput": 19944.44, "total_tokens": 191154944} +{"current_steps": 60740, "total_steps": 78105, "loss": 0.1822, "lr": 7.159156807669862e-07, "epoch": 3.8883554189872607, "percentage": 77.77, "elapsed_time": "2:39:45", "remaining_time": "0:45:40", "throughput": 19944.64, "total_tokens": 191169856} +{"current_steps": 60745, "total_steps": 78105, "loss": 0.1245, "lr": 7.155243787165256e-07, "epoch": 3.8886755009282377, "percentage": 77.77, "elapsed_time": "2:39:45", "remaining_time": "0:45:39", "throughput": 19944.87, "total_tokens": 191185216} +{"current_steps": 60750, "total_steps": 78105, "loss": 0.131, "lr": 7.151331657734825e-07, "epoch": 3.8889955828692147, "percentage": 77.78, "elapsed_time": "2:39:46", "remaining_time": "0:45:38", "throughput": 19945.03, "total_tokens": 191199488} +{"current_steps": 60755, "total_steps": 78105, "loss": 0.2288, "lr": 7.147420419573922e-07, "epoch": 3.8893156648101916, "percentage": 77.79, "elapsed_time": "2:39:47", "remaining_time": "0:45:37", "throughput": 19945.32, "total_tokens": 191216384} +{"current_steps": 60760, "total_steps": 78105, "loss": 0.1724, "lr": 7.143510072877844e-07, "epoch": 3.889635746751168, "percentage": 77.79, "elapsed_time": "2:39:47", "remaining_time": "0:45:36", "throughput": 19945.51, "total_tokens": 191231680} +{"current_steps": 60765, "total_steps": 78105, "loss": 0.1791, "lr": 7.139600617841877e-07, "epoch": 3.889955828692145, "percentage": 77.8, "elapsed_time": "2:39:48", "remaining_time": "0:45:36", "throughput": 19945.71, "total_tokens": 191246848} +{"current_steps": 60770, "total_steps": 78105, "loss": 0.1297, "lr": 7.135692054661206e-07, "epoch": 3.890275910633122, "percentage": 77.81, "elapsed_time": "2:39:49", "remaining_time": "0:45:35", "throughput": 19945.98, "total_tokens": 191263552} +{"current_steps": 60775, "total_steps": 78105, "loss": 0.1783, "lr": 7.131784383531032e-07, "epoch": 3.8905959925740987, "percentage": 77.81, "elapsed_time": "2:39:49", "remaining_time": "0:45:34", "throughput": 19946.22, "total_tokens": 191279744} +{"current_steps": 60780, "total_steps": 78105, "loss": 0.1951, "lr": 7.127877604646474e-07, "epoch": 3.8909160745150757, "percentage": 77.82, "elapsed_time": "2:39:50", "remaining_time": "0:45:33", "throughput": 19946.41, "total_tokens": 191294464} +{"current_steps": 60785, "total_steps": 78105, "loss": 0.2041, "lr": 7.123971718202616e-07, "epoch": 3.8912361564560527, "percentage": 77.82, "elapsed_time": "2:39:51", "remaining_time": "0:45:32", "throughput": 19946.58, "total_tokens": 191308864} +{"current_steps": 60790, "total_steps": 78105, "loss": 0.2145, "lr": 7.120066724394498e-07, "epoch": 3.8915562383970297, "percentage": 77.83, "elapsed_time": "2:39:51", "remaining_time": "0:45:32", "throughput": 19946.82, "total_tokens": 191324352} +{"current_steps": 60795, "total_steps": 78105, "loss": 0.1404, "lr": 7.116162623417114e-07, "epoch": 3.8918763203380067, "percentage": 77.84, "elapsed_time": "2:39:52", "remaining_time": "0:45:31", "throughput": 19947.06, "total_tokens": 191340288} +{"current_steps": 60800, "total_steps": 78105, "loss": 0.1309, "lr": 7.112259415465414e-07, "epoch": 3.8921964022789837, "percentage": 77.84, "elapsed_time": "2:39:53", "remaining_time": "0:45:30", "throughput": 19947.27, "total_tokens": 191355584} +{"current_steps": 60805, "total_steps": 78105, "loss": 0.1976, "lr": 7.108357100734298e-07, "epoch": 3.89251648421996, "percentage": 77.85, "elapsed_time": "2:39:53", "remaining_time": "0:45:29", "throughput": 19947.5, "total_tokens": 191371328} +{"current_steps": 60810, "total_steps": 78105, "loss": 0.1907, "lr": 7.104455679418642e-07, "epoch": 3.892836566160937, "percentage": 77.86, "elapsed_time": "2:39:54", "remaining_time": "0:45:28", "throughput": 19947.7, "total_tokens": 191386368} +{"current_steps": 60815, "total_steps": 78105, "loss": 0.2097, "lr": 7.100555151713254e-07, "epoch": 3.893156648101914, "percentage": 77.86, "elapsed_time": "2:39:55", "remaining_time": "0:45:27", "throughput": 19947.86, "total_tokens": 191400448} +{"current_steps": 60820, "total_steps": 78105, "loss": 0.1963, "lr": 7.096655517812908e-07, "epoch": 3.8934767300428907, "percentage": 77.87, "elapsed_time": "2:39:55", "remaining_time": "0:45:27", "throughput": 19948.12, "total_tokens": 191416768} +{"current_steps": 60825, "total_steps": 78105, "loss": 0.1735, "lr": 7.09275677791233e-07, "epoch": 3.8937968119838677, "percentage": 77.88, "elapsed_time": "2:39:56", "remaining_time": "0:45:26", "throughput": 19948.34, "total_tokens": 191432128} +{"current_steps": 60830, "total_steps": 78105, "loss": 0.0954, "lr": 7.088858932206203e-07, "epoch": 3.8941168939248447, "percentage": 77.88, "elapsed_time": "2:39:57", "remaining_time": "0:45:25", "throughput": 19948.59, "total_tokens": 191448128} +{"current_steps": 60835, "total_steps": 78105, "loss": 0.16, "lr": 7.084961980889166e-07, "epoch": 3.8944369758658217, "percentage": 77.89, "elapsed_time": "2:39:57", "remaining_time": "0:45:24", "throughput": 19948.9, "total_tokens": 191465664} +{"current_steps": 60840, "total_steps": 78105, "loss": 0.2341, "lr": 7.081065924155811e-07, "epoch": 3.8947570578067987, "percentage": 77.9, "elapsed_time": "2:39:58", "remaining_time": "0:45:23", "throughput": 19949.08, "total_tokens": 191480256} +{"current_steps": 60845, "total_steps": 78105, "loss": 0.1444, "lr": 7.077170762200681e-07, "epoch": 3.8950771397477757, "percentage": 77.9, "elapsed_time": "2:39:59", "remaining_time": "0:45:22", "throughput": 19949.26, "total_tokens": 191495424} +{"current_steps": 60850, "total_steps": 78105, "loss": 0.1778, "lr": 7.073276495218298e-07, "epoch": 3.8953972216887522, "percentage": 77.91, "elapsed_time": "2:39:59", "remaining_time": "0:45:22", "throughput": 19949.51, "total_tokens": 191511744} +{"current_steps": 60855, "total_steps": 78105, "loss": 0.0944, "lr": 7.069383123403098e-07, "epoch": 3.8957173036297292, "percentage": 77.91, "elapsed_time": "2:40:00", "remaining_time": "0:45:21", "throughput": 19949.72, "total_tokens": 191526784} +{"current_steps": 60860, "total_steps": 78105, "loss": 0.1969, "lr": 7.065490646949516e-07, "epoch": 3.8960373855707062, "percentage": 77.92, "elapsed_time": "2:40:01", "remaining_time": "0:45:20", "throughput": 19949.97, "total_tokens": 191543040} +{"current_steps": 60865, "total_steps": 78105, "loss": 0.164, "lr": 7.061599066051911e-07, "epoch": 3.8963574675116828, "percentage": 77.93, "elapsed_time": "2:40:01", "remaining_time": "0:45:19", "throughput": 19950.26, "total_tokens": 191560512} +{"current_steps": 60870, "total_steps": 78105, "loss": 0.1916, "lr": 7.057708380904604e-07, "epoch": 3.8966775494526598, "percentage": 77.93, "elapsed_time": "2:40:02", "remaining_time": "0:45:18", "throughput": 19950.45, "total_tokens": 191575744} +{"current_steps": 60875, "total_steps": 78105, "loss": 0.2014, "lr": 7.0538185917019e-07, "epoch": 3.8969976313936368, "percentage": 77.94, "elapsed_time": "2:40:03", "remaining_time": "0:45:18", "throughput": 19950.66, "total_tokens": 191591232} +{"current_steps": 60880, "total_steps": 78105, "loss": 0.3399, "lr": 7.049929698638e-07, "epoch": 3.8973177133346137, "percentage": 77.95, "elapsed_time": "2:40:03", "remaining_time": "0:45:17", "throughput": 19950.89, "total_tokens": 191606848} +{"current_steps": 60885, "total_steps": 78105, "loss": 0.1631, "lr": 7.046041701907127e-07, "epoch": 3.8976377952755907, "percentage": 77.95, "elapsed_time": "2:40:04", "remaining_time": "0:45:16", "throughput": 19951.13, "total_tokens": 191623040} +{"current_steps": 60890, "total_steps": 78105, "loss": 0.1141, "lr": 7.0421546017034e-07, "epoch": 3.8979578772165673, "percentage": 77.96, "elapsed_time": "2:40:05", "remaining_time": "0:45:15", "throughput": 19951.28, "total_tokens": 191637376} +{"current_steps": 60895, "total_steps": 78105, "loss": 0.2313, "lr": 7.038268398220937e-07, "epoch": 3.8982779591575443, "percentage": 77.97, "elapsed_time": "2:40:05", "remaining_time": "0:45:14", "throughput": 19951.48, "total_tokens": 191652480} +{"current_steps": 60900, "total_steps": 78105, "loss": 0.1317, "lr": 7.034383091653793e-07, "epoch": 3.8985980410985213, "percentage": 77.97, "elapsed_time": "2:40:06", "remaining_time": "0:45:13", "throughput": 19951.73, "total_tokens": 191668800} +{"current_steps": 60905, "total_steps": 78105, "loss": 0.2489, "lr": 7.030498682195977e-07, "epoch": 3.8989181230394983, "percentage": 77.98, "elapsed_time": "2:40:07", "remaining_time": "0:45:13", "throughput": 19952.01, "total_tokens": 191685696} +{"current_steps": 60910, "total_steps": 78105, "loss": 0.1676, "lr": 7.026615170041459e-07, "epoch": 3.899238204980475, "percentage": 77.98, "elapsed_time": "2:40:08", "remaining_time": "0:45:12", "throughput": 19952.21, "total_tokens": 191700992} +{"current_steps": 60915, "total_steps": 78105, "loss": 0.2083, "lr": 7.022732555384151e-07, "epoch": 3.899558286921452, "percentage": 77.99, "elapsed_time": "2:40:08", "remaining_time": "0:45:11", "throughput": 19952.47, "total_tokens": 191717376} +{"current_steps": 60920, "total_steps": 78105, "loss": 0.1527, "lr": 7.018850838417946e-07, "epoch": 3.899878368862429, "percentage": 78.0, "elapsed_time": "2:40:09", "remaining_time": "0:45:10", "throughput": 19952.76, "total_tokens": 191734720} +{"current_steps": 60925, "total_steps": 78105, "loss": 0.1515, "lr": 7.01497001933667e-07, "epoch": 3.9001984508034058, "percentage": 78.0, "elapsed_time": "2:40:10", "remaining_time": "0:45:09", "throughput": 19952.98, "total_tokens": 191750400} +{"current_steps": 60930, "total_steps": 78105, "loss": 0.1293, "lr": 7.011090098334111e-07, "epoch": 3.9005185327443828, "percentage": 78.01, "elapsed_time": "2:40:10", "remaining_time": "0:45:09", "throughput": 19953.2, "total_tokens": 191766208} +{"current_steps": 60935, "total_steps": 78105, "loss": 0.1245, "lr": 7.007211075604012e-07, "epoch": 3.9008386146853593, "percentage": 78.02, "elapsed_time": "2:40:11", "remaining_time": "0:45:08", "throughput": 19953.44, "total_tokens": 191782592} +{"current_steps": 60940, "total_steps": 78105, "loss": 0.1701, "lr": 7.003332951340069e-07, "epoch": 3.9011586966263363, "percentage": 78.02, "elapsed_time": "2:40:12", "remaining_time": "0:45:07", "throughput": 19953.68, "total_tokens": 191798720} +{"current_steps": 60945, "total_steps": 78105, "loss": 0.1942, "lr": 6.99945572573594e-07, "epoch": 3.9014787785673133, "percentage": 78.03, "elapsed_time": "2:40:12", "remaining_time": "0:45:06", "throughput": 19953.86, "total_tokens": 191813376} +{"current_steps": 60950, "total_steps": 78105, "loss": 0.1709, "lr": 6.995579398985231e-07, "epoch": 3.9017988605082903, "percentage": 78.04, "elapsed_time": "2:40:13", "remaining_time": "0:45:05", "throughput": 19954.1, "total_tokens": 191828992} +{"current_steps": 60955, "total_steps": 78105, "loss": 0.1457, "lr": 6.991703971281494e-07, "epoch": 3.902118942449267, "percentage": 78.04, "elapsed_time": "2:40:14", "remaining_time": "0:45:04", "throughput": 19954.31, "total_tokens": 191844288} +{"current_steps": 60960, "total_steps": 78105, "loss": 0.1594, "lr": 6.987829442818275e-07, "epoch": 3.902439024390244, "percentage": 78.05, "elapsed_time": "2:40:14", "remaining_time": "0:45:04", "throughput": 19954.56, "total_tokens": 191860160} +{"current_steps": 60965, "total_steps": 78105, "loss": 0.2075, "lr": 6.983955813789018e-07, "epoch": 3.902759106331221, "percentage": 78.06, "elapsed_time": "2:40:15", "remaining_time": "0:45:03", "throughput": 19954.77, "total_tokens": 191875392} +{"current_steps": 60970, "total_steps": 78105, "loss": 0.1591, "lr": 6.980083084387182e-07, "epoch": 3.903079188272198, "percentage": 78.06, "elapsed_time": "2:40:16", "remaining_time": "0:45:02", "throughput": 19954.99, "total_tokens": 191891136} +{"current_steps": 60975, "total_steps": 78105, "loss": 0.173, "lr": 6.976211254806118e-07, "epoch": 3.903399270213175, "percentage": 78.07, "elapsed_time": "2:40:16", "remaining_time": "0:45:01", "throughput": 19955.22, "total_tokens": 191906880} +{"current_steps": 60980, "total_steps": 78105, "loss": 0.1801, "lr": 6.972340325239185e-07, "epoch": 3.9037193521541513, "percentage": 78.07, "elapsed_time": "2:40:17", "remaining_time": "0:45:00", "throughput": 19955.45, "total_tokens": 191922752} +{"current_steps": 60985, "total_steps": 78105, "loss": 0.1427, "lr": 6.968470295879679e-07, "epoch": 3.9040394340951283, "percentage": 78.08, "elapsed_time": "2:40:18", "remaining_time": "0:45:00", "throughput": 19955.8, "total_tokens": 191941184} +{"current_steps": 60990, "total_steps": 78105, "loss": 0.1502, "lr": 6.964601166920834e-07, "epoch": 3.9043595160361053, "percentage": 78.09, "elapsed_time": "2:40:19", "remaining_time": "0:44:59", "throughput": 19956.04, "total_tokens": 191957184} +{"current_steps": 60995, "total_steps": 78105, "loss": 0.1786, "lr": 6.960732938555878e-07, "epoch": 3.904679597977082, "percentage": 78.09, "elapsed_time": "2:40:19", "remaining_time": "0:44:58", "throughput": 19956.34, "total_tokens": 191974656} +{"current_steps": 61000, "total_steps": 78105, "loss": 0.1952, "lr": 6.956865610977942e-07, "epoch": 3.904999679918059, "percentage": 78.1, "elapsed_time": "2:40:20", "remaining_time": "0:44:57", "throughput": 19956.57, "total_tokens": 191990528} +{"current_steps": 61005, "total_steps": 78105, "loss": 0.1903, "lr": 6.95299918438016e-07, "epoch": 3.905319761859036, "percentage": 78.11, "elapsed_time": "2:40:21", "remaining_time": "0:44:56", "throughput": 19956.79, "total_tokens": 192005696} +{"current_steps": 61010, "total_steps": 78105, "loss": 0.2206, "lr": 6.949133658955598e-07, "epoch": 3.905639843800013, "percentage": 78.11, "elapsed_time": "2:40:21", "remaining_time": "0:44:56", "throughput": 19956.97, "total_tokens": 192020480} +{"current_steps": 61015, "total_steps": 78105, "loss": 0.1778, "lr": 6.945269034897276e-07, "epoch": 3.90595992574099, "percentage": 78.12, "elapsed_time": "2:40:22", "remaining_time": "0:44:55", "throughput": 19957.18, "total_tokens": 192035648} +{"current_steps": 61020, "total_steps": 78105, "loss": 0.1518, "lr": 6.941405312398175e-07, "epoch": 3.906280007681967, "percentage": 78.13, "elapsed_time": "2:40:23", "remaining_time": "0:44:54", "throughput": 19957.44, "total_tokens": 192051968} +{"current_steps": 61025, "total_steps": 78105, "loss": 0.1313, "lr": 6.937542491651228e-07, "epoch": 3.9066000896229434, "percentage": 78.13, "elapsed_time": "2:40:23", "remaining_time": "0:44:53", "throughput": 19957.65, "total_tokens": 192067392} +{"current_steps": 61030, "total_steps": 78105, "loss": 0.0996, "lr": 6.933680572849327e-07, "epoch": 3.9069201715639204, "percentage": 78.14, "elapsed_time": "2:40:24", "remaining_time": "0:44:52", "throughput": 19957.86, "total_tokens": 192082432} +{"current_steps": 61035, "total_steps": 78105, "loss": 0.1796, "lr": 6.929819556185305e-07, "epoch": 3.9072402535048973, "percentage": 78.14, "elapsed_time": "2:40:25", "remaining_time": "0:44:51", "throughput": 19958.09, "total_tokens": 192098048} +{"current_steps": 61040, "total_steps": 78105, "loss": 0.1404, "lr": 6.925959441851979e-07, "epoch": 3.907560335445874, "percentage": 78.15, "elapsed_time": "2:40:25", "remaining_time": "0:44:51", "throughput": 19958.27, "total_tokens": 192113152} +{"current_steps": 61045, "total_steps": 78105, "loss": 0.1074, "lr": 6.922100230042092e-07, "epoch": 3.907880417386851, "percentage": 78.16, "elapsed_time": "2:40:26", "remaining_time": "0:44:50", "throughput": 19958.53, "total_tokens": 192129600} +{"current_steps": 61050, "total_steps": 78105, "loss": 0.1495, "lr": 6.918241920948357e-07, "epoch": 3.908200499327828, "percentage": 78.16, "elapsed_time": "2:40:27", "remaining_time": "0:44:49", "throughput": 19958.76, "total_tokens": 192145344} +{"current_steps": 61055, "total_steps": 78105, "loss": 0.1553, "lr": 6.914384514763437e-07, "epoch": 3.908520581268805, "percentage": 78.17, "elapsed_time": "2:40:27", "remaining_time": "0:44:48", "throughput": 19958.96, "total_tokens": 192160384} +{"current_steps": 61060, "total_steps": 78105, "loss": 0.2117, "lr": 6.910528011679948e-07, "epoch": 3.908840663209782, "percentage": 78.18, "elapsed_time": "2:40:28", "remaining_time": "0:44:47", "throughput": 19959.22, "total_tokens": 192177024} +{"current_steps": 61065, "total_steps": 78105, "loss": 0.186, "lr": 6.906672411890459e-07, "epoch": 3.909160745150759, "percentage": 78.18, "elapsed_time": "2:40:29", "remaining_time": "0:44:46", "throughput": 19959.42, "total_tokens": 192192128} +{"current_steps": 61070, "total_steps": 78105, "loss": 0.1418, "lr": 6.902817715587518e-07, "epoch": 3.9094808270917354, "percentage": 78.19, "elapsed_time": "2:40:29", "remaining_time": "0:44:46", "throughput": 19959.65, "total_tokens": 192207680} +{"current_steps": 61075, "total_steps": 78105, "loss": 0.1739, "lr": 6.898963922963584e-07, "epoch": 3.9098009090327124, "percentage": 78.2, "elapsed_time": "2:40:30", "remaining_time": "0:44:45", "throughput": 19959.87, "total_tokens": 192223616} +{"current_steps": 61080, "total_steps": 78105, "loss": 0.2011, "lr": 6.895111034211119e-07, "epoch": 3.9101209909736894, "percentage": 78.2, "elapsed_time": "2:40:31", "remaining_time": "0:44:44", "throughput": 19960.07, "total_tokens": 192238720} +{"current_steps": 61085, "total_steps": 78105, "loss": 0.1026, "lr": 6.891259049522489e-07, "epoch": 3.910441072914666, "percentage": 78.21, "elapsed_time": "2:40:31", "remaining_time": "0:44:43", "throughput": 19960.28, "total_tokens": 192254080} +{"current_steps": 61090, "total_steps": 78105, "loss": 0.1356, "lr": 6.887407969090066e-07, "epoch": 3.910761154855643, "percentage": 78.22, "elapsed_time": "2:40:32", "remaining_time": "0:44:42", "throughput": 19960.54, "total_tokens": 192270720} +{"current_steps": 61095, "total_steps": 78105, "loss": 0.1405, "lr": 6.883557793106143e-07, "epoch": 3.91108123679662, "percentage": 78.22, "elapsed_time": "2:40:33", "remaining_time": "0:44:42", "throughput": 19960.75, "total_tokens": 192286400} +{"current_steps": 61100, "total_steps": 78105, "loss": 0.1561, "lr": 6.879708521762974e-07, "epoch": 3.911401318737597, "percentage": 78.23, "elapsed_time": "2:40:33", "remaining_time": "0:44:41", "throughput": 19960.95, "total_tokens": 192301504} +{"current_steps": 61105, "total_steps": 78105, "loss": 0.1513, "lr": 6.875860155252787e-07, "epoch": 3.911721400678574, "percentage": 78.23, "elapsed_time": "2:40:34", "remaining_time": "0:44:40", "throughput": 19961.18, "total_tokens": 192317184} +{"current_steps": 61110, "total_steps": 78105, "loss": 0.1592, "lr": 6.872012693767727e-07, "epoch": 3.912041482619551, "percentage": 78.24, "elapsed_time": "2:40:35", "remaining_time": "0:44:39", "throughput": 19961.44, "total_tokens": 192333760} +{"current_steps": 61115, "total_steps": 78105, "loss": 0.2469, "lr": 6.86816613749994e-07, "epoch": 3.9123615645605274, "percentage": 78.25, "elapsed_time": "2:40:35", "remaining_time": "0:44:38", "throughput": 19961.65, "total_tokens": 192349120} +{"current_steps": 61120, "total_steps": 78105, "loss": 0.2381, "lr": 6.864320486641476e-07, "epoch": 3.9126816465015044, "percentage": 78.25, "elapsed_time": "2:40:36", "remaining_time": "0:44:37", "throughput": 19961.88, "total_tokens": 192364544} +{"current_steps": 61125, "total_steps": 78105, "loss": 0.1527, "lr": 6.860475741384392e-07, "epoch": 3.9130017284424814, "percentage": 78.26, "elapsed_time": "2:40:37", "remaining_time": "0:44:37", "throughput": 19962.11, "total_tokens": 192380800} +{"current_steps": 61130, "total_steps": 78105, "loss": 0.1796, "lr": 6.856631901920662e-07, "epoch": 3.913321810383458, "percentage": 78.27, "elapsed_time": "2:40:38", "remaining_time": "0:44:36", "throughput": 19960.51, "total_tokens": 192396288} +{"current_steps": 61135, "total_steps": 78105, "loss": 0.2359, "lr": 6.852788968442232e-07, "epoch": 3.913641892324435, "percentage": 78.27, "elapsed_time": "2:40:39", "remaining_time": "0:44:35", "throughput": 19960.73, "total_tokens": 192411840} +{"current_steps": 61140, "total_steps": 78105, "loss": 0.223, "lr": 6.848946941140991e-07, "epoch": 3.913961974265412, "percentage": 78.28, "elapsed_time": "2:40:40", "remaining_time": "0:44:34", "throughput": 19960.99, "total_tokens": 192427968} +{"current_steps": 61145, "total_steps": 78105, "loss": 0.1878, "lr": 6.845105820208797e-07, "epoch": 3.914282056206389, "percentage": 78.29, "elapsed_time": "2:40:40", "remaining_time": "0:44:34", "throughput": 19961.18, "total_tokens": 192443072} +{"current_steps": 61150, "total_steps": 78105, "loss": 0.1909, "lr": 6.841265605837447e-07, "epoch": 3.914602138147366, "percentage": 78.29, "elapsed_time": "2:40:41", "remaining_time": "0:44:33", "throughput": 19961.4, "total_tokens": 192458240} +{"current_steps": 61155, "total_steps": 78105, "loss": 0.1758, "lr": 6.837426298218711e-07, "epoch": 3.9149222200883425, "percentage": 78.3, "elapsed_time": "2:40:42", "remaining_time": "0:44:32", "throughput": 19961.73, "total_tokens": 192476032} +{"current_steps": 61160, "total_steps": 78105, "loss": 0.1384, "lr": 6.833587897544302e-07, "epoch": 3.9152423020293194, "percentage": 78.3, "elapsed_time": "2:40:42", "remaining_time": "0:44:31", "throughput": 19961.94, "total_tokens": 192491264} +{"current_steps": 61165, "total_steps": 78105, "loss": 0.1865, "lr": 6.829750404005888e-07, "epoch": 3.9155623839702964, "percentage": 78.31, "elapsed_time": "2:40:43", "remaining_time": "0:44:30", "throughput": 19962.12, "total_tokens": 192505728} +{"current_steps": 61170, "total_steps": 78105, "loss": 0.0989, "lr": 6.825913817795096e-07, "epoch": 3.9158824659112734, "percentage": 78.32, "elapsed_time": "2:40:44", "remaining_time": "0:44:30", "throughput": 19962.34, "total_tokens": 192521344} +{"current_steps": 61175, "total_steps": 78105, "loss": 0.2037, "lr": 6.8220781391035e-07, "epoch": 3.91620254785225, "percentage": 78.32, "elapsed_time": "2:40:44", "remaining_time": "0:44:29", "throughput": 19962.62, "total_tokens": 192538176} +{"current_steps": 61180, "total_steps": 78105, "loss": 0.1416, "lr": 6.818243368122637e-07, "epoch": 3.916522629793227, "percentage": 78.33, "elapsed_time": "2:40:45", "remaining_time": "0:44:28", "throughput": 19962.81, "total_tokens": 192553024} +{"current_steps": 61185, "total_steps": 78105, "loss": 0.1883, "lr": 6.814409505043986e-07, "epoch": 3.916842711734204, "percentage": 78.34, "elapsed_time": "2:40:46", "remaining_time": "0:44:27", "throughput": 19963.08, "total_tokens": 192570112} +{"current_steps": 61190, "total_steps": 78105, "loss": 0.1616, "lr": 6.810576550059014e-07, "epoch": 3.917162793675181, "percentage": 78.34, "elapsed_time": "2:40:46", "remaining_time": "0:44:26", "throughput": 19963.27, "total_tokens": 192584768} +{"current_steps": 61195, "total_steps": 78105, "loss": 0.1655, "lr": 6.806744503359092e-07, "epoch": 3.917482875616158, "percentage": 78.35, "elapsed_time": "2:40:47", "remaining_time": "0:44:25", "throughput": 19963.48, "total_tokens": 192599616} +{"current_steps": 61200, "total_steps": 78105, "loss": 0.172, "lr": 6.802913365135597e-07, "epoch": 3.9178029575571345, "percentage": 78.36, "elapsed_time": "2:40:48", "remaining_time": "0:44:25", "throughput": 19963.7, "total_tokens": 192615168} +{"current_steps": 61205, "total_steps": 78105, "loss": 0.2143, "lr": 6.79908313557981e-07, "epoch": 3.9181230394981115, "percentage": 78.36, "elapsed_time": "2:40:48", "remaining_time": "0:44:24", "throughput": 19963.89, "total_tokens": 192630208} +{"current_steps": 61210, "total_steps": 78105, "loss": 0.1218, "lr": 6.79525381488301e-07, "epoch": 3.9184431214390885, "percentage": 78.37, "elapsed_time": "2:40:49", "remaining_time": "0:44:23", "throughput": 19964.11, "total_tokens": 192645888} +{"current_steps": 61215, "total_steps": 78105, "loss": 0.1763, "lr": 6.791425403236413e-07, "epoch": 3.9187632033800655, "percentage": 78.38, "elapsed_time": "2:40:50", "remaining_time": "0:44:22", "throughput": 19964.33, "total_tokens": 192661248} +{"current_steps": 61220, "total_steps": 78105, "loss": 0.1867, "lr": 6.787597900831178e-07, "epoch": 3.919083285321042, "percentage": 78.38, "elapsed_time": "2:40:51", "remaining_time": "0:44:21", "throughput": 19964.86, "total_tokens": 192689216} +{"current_steps": 61225, "total_steps": 78105, "loss": 0.114, "lr": 6.783771307858453e-07, "epoch": 3.919403367262019, "percentage": 78.39, "elapsed_time": "2:40:52", "remaining_time": "0:44:21", "throughput": 19965.1, "total_tokens": 192705280} +{"current_steps": 61230, "total_steps": 78105, "loss": 0.0957, "lr": 6.779945624509288e-07, "epoch": 3.919723449202996, "percentage": 78.39, "elapsed_time": "2:40:52", "remaining_time": "0:44:20", "throughput": 19965.28, "total_tokens": 192719744} +{"current_steps": 61235, "total_steps": 78105, "loss": 0.1738, "lr": 6.776120850974743e-07, "epoch": 3.920043531143973, "percentage": 78.4, "elapsed_time": "2:40:53", "remaining_time": "0:44:19", "throughput": 19965.52, "total_tokens": 192735680} +{"current_steps": 61240, "total_steps": 78105, "loss": 0.1434, "lr": 6.772296987445798e-07, "epoch": 3.92036361308495, "percentage": 78.41, "elapsed_time": "2:40:54", "remaining_time": "0:44:18", "throughput": 19965.74, "total_tokens": 192751104} +{"current_steps": 61245, "total_steps": 78105, "loss": 0.1488, "lr": 6.768474034113396e-07, "epoch": 3.9206836950259265, "percentage": 78.41, "elapsed_time": "2:40:54", "remaining_time": "0:44:17", "throughput": 19965.99, "total_tokens": 192767168} +{"current_steps": 61250, "total_steps": 78105, "loss": 0.1799, "lr": 6.764651991168436e-07, "epoch": 3.9210037769669035, "percentage": 78.42, "elapsed_time": "2:40:55", "remaining_time": "0:44:17", "throughput": 19966.21, "total_tokens": 192782720} +{"current_steps": 61255, "total_steps": 78105, "loss": 0.1952, "lr": 6.760830858801773e-07, "epoch": 3.9213238589078805, "percentage": 78.43, "elapsed_time": "2:40:56", "remaining_time": "0:44:16", "throughput": 19966.45, "total_tokens": 192798976} +{"current_steps": 61260, "total_steps": 78105, "loss": 0.1964, "lr": 6.757010637204212e-07, "epoch": 3.921643940848857, "percentage": 78.43, "elapsed_time": "2:40:56", "remaining_time": "0:44:15", "throughput": 19966.71, "total_tokens": 192815552} +{"current_steps": 61265, "total_steps": 78105, "loss": 0.1476, "lr": 6.753191326566508e-07, "epoch": 3.921964022789834, "percentage": 78.44, "elapsed_time": "2:40:57", "remaining_time": "0:44:14", "throughput": 19966.95, "total_tokens": 192831744} +{"current_steps": 61270, "total_steps": 78105, "loss": 0.1225, "lr": 6.74937292707939e-07, "epoch": 3.922284104730811, "percentage": 78.45, "elapsed_time": "2:40:58", "remaining_time": "0:44:13", "throughput": 19967.17, "total_tokens": 192847168} +{"current_steps": 61275, "total_steps": 78105, "loss": 0.167, "lr": 6.745555438933527e-07, "epoch": 3.922604186671788, "percentage": 78.45, "elapsed_time": "2:40:58", "remaining_time": "0:44:12", "throughput": 19967.39, "total_tokens": 192862784} +{"current_steps": 61280, "total_steps": 78105, "loss": 0.1852, "lr": 6.741738862319538e-07, "epoch": 3.922924268612765, "percentage": 78.46, "elapsed_time": "2:40:59", "remaining_time": "0:44:12", "throughput": 19967.62, "total_tokens": 192878656} +{"current_steps": 61285, "total_steps": 78105, "loss": 0.1745, "lr": 6.737923197428009e-07, "epoch": 3.923244350553742, "percentage": 78.46, "elapsed_time": "2:41:00", "remaining_time": "0:44:11", "throughput": 19967.8, "total_tokens": 192893376} +{"current_steps": 61290, "total_steps": 78105, "loss": 0.1699, "lr": 6.734108444449472e-07, "epoch": 3.9235644324947185, "percentage": 78.47, "elapsed_time": "2:41:00", "remaining_time": "0:44:10", "throughput": 19968.06, "total_tokens": 192909504} +{"current_steps": 61295, "total_steps": 78105, "loss": 0.1383, "lr": 6.730294603574408e-07, "epoch": 3.9238845144356955, "percentage": 78.48, "elapsed_time": "2:41:01", "remaining_time": "0:44:09", "throughput": 19968.28, "total_tokens": 192925440} +{"current_steps": 61300, "total_steps": 78105, "loss": 0.1798, "lr": 6.726481674993282e-07, "epoch": 3.9242045963766725, "percentage": 78.48, "elapsed_time": "2:41:02", "remaining_time": "0:44:08", "throughput": 19968.54, "total_tokens": 192941696} +{"current_steps": 61305, "total_steps": 78105, "loss": 0.1459, "lr": 6.722669658896464e-07, "epoch": 3.924524678317649, "percentage": 78.49, "elapsed_time": "2:41:02", "remaining_time": "0:44:08", "throughput": 19968.77, "total_tokens": 192957376} +{"current_steps": 61310, "total_steps": 78105, "loss": 0.2058, "lr": 6.718858555474336e-07, "epoch": 3.924844760258626, "percentage": 78.5, "elapsed_time": "2:41:03", "remaining_time": "0:44:07", "throughput": 19969.01, "total_tokens": 192973632} +{"current_steps": 61315, "total_steps": 78105, "loss": 0.2832, "lr": 6.71504836491717e-07, "epoch": 3.925164842199603, "percentage": 78.5, "elapsed_time": "2:41:04", "remaining_time": "0:44:06", "throughput": 19969.23, "total_tokens": 192988992} +{"current_steps": 61320, "total_steps": 78105, "loss": 0.1554, "lr": 6.711239087415258e-07, "epoch": 3.92548492414058, "percentage": 78.51, "elapsed_time": "2:41:04", "remaining_time": "0:44:05", "throughput": 19969.45, "total_tokens": 193004608} +{"current_steps": 61325, "total_steps": 78105, "loss": 0.1474, "lr": 6.7074307231588e-07, "epoch": 3.925805006081557, "percentage": 78.52, "elapsed_time": "2:41:05", "remaining_time": "0:44:04", "throughput": 19969.66, "total_tokens": 193020096} +{"current_steps": 61330, "total_steps": 78105, "loss": 0.1642, "lr": 6.703623272337964e-07, "epoch": 3.926125088022534, "percentage": 78.52, "elapsed_time": "2:41:06", "remaining_time": "0:44:03", "throughput": 19969.89, "total_tokens": 193036096} +{"current_steps": 61335, "total_steps": 78105, "loss": 0.2024, "lr": 6.699816735142891e-07, "epoch": 3.9264451699635106, "percentage": 78.53, "elapsed_time": "2:41:07", "remaining_time": "0:44:03", "throughput": 19970.09, "total_tokens": 193051264} +{"current_steps": 61340, "total_steps": 78105, "loss": 0.162, "lr": 6.696011111763637e-07, "epoch": 3.9267652519044876, "percentage": 78.54, "elapsed_time": "2:41:07", "remaining_time": "0:44:02", "throughput": 19970.27, "total_tokens": 193066176} +{"current_steps": 61345, "total_steps": 78105, "loss": 0.0706, "lr": 6.692206402390258e-07, "epoch": 3.9270853338454645, "percentage": 78.54, "elapsed_time": "2:41:08", "remaining_time": "0:44:01", "throughput": 19970.49, "total_tokens": 193081728} +{"current_steps": 61350, "total_steps": 78105, "loss": 0.1414, "lr": 6.688402607212715e-07, "epoch": 3.927405415786441, "percentage": 78.55, "elapsed_time": "2:41:09", "remaining_time": "0:44:00", "throughput": 19970.69, "total_tokens": 193097280} +{"current_steps": 61355, "total_steps": 78105, "loss": 0.1554, "lr": 6.684599726420973e-07, "epoch": 3.927725497727418, "percentage": 78.55, "elapsed_time": "2:41:09", "remaining_time": "0:43:59", "throughput": 19970.91, "total_tokens": 193112896} +{"current_steps": 61360, "total_steps": 78105, "loss": 0.1971, "lr": 6.680797760204921e-07, "epoch": 3.928045579668395, "percentage": 78.56, "elapsed_time": "2:41:10", "remaining_time": "0:43:59", "throughput": 19971.11, "total_tokens": 193127808} +{"current_steps": 61365, "total_steps": 78105, "loss": 0.189, "lr": 6.676996708754405e-07, "epoch": 3.928365661609372, "percentage": 78.57, "elapsed_time": "2:41:11", "remaining_time": "0:43:58", "throughput": 19971.33, "total_tokens": 193143744} +{"current_steps": 61370, "total_steps": 78105, "loss": 0.1005, "lr": 6.673196572259233e-07, "epoch": 3.928685743550349, "percentage": 78.57, "elapsed_time": "2:41:11", "remaining_time": "0:43:57", "throughput": 19971.65, "total_tokens": 193161472} +{"current_steps": 61375, "total_steps": 78105, "loss": 0.1638, "lr": 6.669397350909162e-07, "epoch": 3.929005825491326, "percentage": 78.58, "elapsed_time": "2:41:12", "remaining_time": "0:43:56", "throughput": 19971.85, "total_tokens": 193176512} +{"current_steps": 61380, "total_steps": 78105, "loss": 0.2051, "lr": 6.6655990448939e-07, "epoch": 3.9293259074323026, "percentage": 78.59, "elapsed_time": "2:41:13", "remaining_time": "0:43:55", "throughput": 19972.07, "total_tokens": 193192000} +{"current_steps": 61385, "total_steps": 78105, "loss": 0.1869, "lr": 6.66180165440313e-07, "epoch": 3.9296459893732796, "percentage": 78.59, "elapsed_time": "2:41:13", "remaining_time": "0:43:54", "throughput": 19972.31, "total_tokens": 193208128} +{"current_steps": 61390, "total_steps": 78105, "loss": 0.1991, "lr": 6.658005179626462e-07, "epoch": 3.9299660713142566, "percentage": 78.6, "elapsed_time": "2:41:14", "remaining_time": "0:43:54", "throughput": 19972.48, "total_tokens": 193222592} +{"current_steps": 61395, "total_steps": 78105, "loss": 0.1891, "lr": 6.65420962075348e-07, "epoch": 3.930286153255233, "percentage": 78.61, "elapsed_time": "2:41:15", "remaining_time": "0:43:53", "throughput": 19972.68, "total_tokens": 193237760} +{"current_steps": 61400, "total_steps": 78105, "loss": 0.1598, "lr": 6.650414977973707e-07, "epoch": 3.93060623519621, "percentage": 78.61, "elapsed_time": "2:41:15", "remaining_time": "0:43:52", "throughput": 19972.93, "total_tokens": 193254336} +{"current_steps": 61405, "total_steps": 78105, "loss": 0.1265, "lr": 6.646621251476628e-07, "epoch": 3.930926317137187, "percentage": 78.62, "elapsed_time": "2:41:16", "remaining_time": "0:43:51", "throughput": 19973.15, "total_tokens": 193269824} +{"current_steps": 61410, "total_steps": 78105, "loss": 0.2535, "lr": 6.642828441451687e-07, "epoch": 3.931246399078164, "percentage": 78.62, "elapsed_time": "2:41:17", "remaining_time": "0:43:50", "throughput": 19973.39, "total_tokens": 193286336} +{"current_steps": 61415, "total_steps": 78105, "loss": 0.1526, "lr": 6.639036548088268e-07, "epoch": 3.931566481019141, "percentage": 78.63, "elapsed_time": "2:41:17", "remaining_time": "0:43:50", "throughput": 19973.65, "total_tokens": 193302848} +{"current_steps": 61420, "total_steps": 78105, "loss": 0.1592, "lr": 6.635245571575741e-07, "epoch": 3.9318865629601176, "percentage": 78.64, "elapsed_time": "2:41:18", "remaining_time": "0:43:49", "throughput": 19973.9, "total_tokens": 193318976} +{"current_steps": 61425, "total_steps": 78105, "loss": 0.1106, "lr": 6.631455512103375e-07, "epoch": 3.9322066449010946, "percentage": 78.64, "elapsed_time": "2:41:19", "remaining_time": "0:43:48", "throughput": 19974.07, "total_tokens": 193333632} +{"current_steps": 61430, "total_steps": 78105, "loss": 0.1728, "lr": 6.62766636986045e-07, "epoch": 3.9325267268420716, "percentage": 78.65, "elapsed_time": "2:41:19", "remaining_time": "0:43:47", "throughput": 19974.3, "total_tokens": 193349568} +{"current_steps": 61435, "total_steps": 78105, "loss": 0.124, "lr": 6.623878145036166e-07, "epoch": 3.9328468087830486, "percentage": 78.66, "elapsed_time": "2:41:20", "remaining_time": "0:43:46", "throughput": 19974.49, "total_tokens": 193364288} +{"current_steps": 61440, "total_steps": 78105, "loss": 0.1758, "lr": 6.620090837819693e-07, "epoch": 3.933166890724025, "percentage": 78.66, "elapsed_time": "2:41:21", "remaining_time": "0:43:45", "throughput": 19974.7, "total_tokens": 193379648} +{"current_steps": 61445, "total_steps": 78105, "loss": 0.2264, "lr": 6.616304448400146e-07, "epoch": 3.933486972665002, "percentage": 78.67, "elapsed_time": "2:41:21", "remaining_time": "0:43:45", "throughput": 19974.9, "total_tokens": 193394944} +{"current_steps": 61450, "total_steps": 78105, "loss": 0.1586, "lr": 6.612518976966589e-07, "epoch": 3.933807054605979, "percentage": 78.68, "elapsed_time": "2:41:22", "remaining_time": "0:43:44", "throughput": 19975.12, "total_tokens": 193410432} +{"current_steps": 61455, "total_steps": 78105, "loss": 0.1256, "lr": 6.608734423708074e-07, "epoch": 3.934127136546956, "percentage": 78.68, "elapsed_time": "2:41:23", "remaining_time": "0:43:43", "throughput": 19975.34, "total_tokens": 193426304} +{"current_steps": 61460, "total_steps": 78105, "loss": 0.1297, "lr": 6.604950788813549e-07, "epoch": 3.934447218487933, "percentage": 78.69, "elapsed_time": "2:41:23", "remaining_time": "0:43:42", "throughput": 19975.57, "total_tokens": 193441664} +{"current_steps": 61465, "total_steps": 78105, "loss": 0.1317, "lr": 6.601168072471973e-07, "epoch": 3.9347673004289097, "percentage": 78.7, "elapsed_time": "2:41:24", "remaining_time": "0:43:41", "throughput": 19975.86, "total_tokens": 193458944} +{"current_steps": 61470, "total_steps": 78105, "loss": 0.1677, "lr": 6.597386274872228e-07, "epoch": 3.9350873823698866, "percentage": 78.7, "elapsed_time": "2:41:25", "remaining_time": "0:43:41", "throughput": 19976.07, "total_tokens": 193474112} +{"current_steps": 61475, "total_steps": 78105, "loss": 0.1896, "lr": 6.593605396203154e-07, "epoch": 3.9354074643108636, "percentage": 78.71, "elapsed_time": "2:41:25", "remaining_time": "0:43:40", "throughput": 19976.27, "total_tokens": 193489088} +{"current_steps": 61480, "total_steps": 78105, "loss": 0.1522, "lr": 6.589825436653554e-07, "epoch": 3.9357275462518406, "percentage": 78.71, "elapsed_time": "2:41:26", "remaining_time": "0:43:39", "throughput": 19976.46, "total_tokens": 193504384} +{"current_steps": 61485, "total_steps": 78105, "loss": 0.1566, "lr": 6.586046396412177e-07, "epoch": 3.936047628192817, "percentage": 78.72, "elapsed_time": "2:41:27", "remaining_time": "0:43:38", "throughput": 19976.77, "total_tokens": 193521664} +{"current_steps": 61490, "total_steps": 78105, "loss": 0.2193, "lr": 6.582268275667725e-07, "epoch": 3.936367710133794, "percentage": 78.73, "elapsed_time": "2:41:28", "remaining_time": "0:43:37", "throughput": 19977.02, "total_tokens": 193537920} +{"current_steps": 61495, "total_steps": 78105, "loss": 0.136, "lr": 6.578491074608864e-07, "epoch": 3.936687792074771, "percentage": 78.73, "elapsed_time": "2:41:28", "remaining_time": "0:43:36", "throughput": 19977.21, "total_tokens": 193553280} +{"current_steps": 61500, "total_steps": 78105, "loss": 0.1589, "lr": 6.574714793424195e-07, "epoch": 3.937007874015748, "percentage": 78.74, "elapsed_time": "2:41:29", "remaining_time": "0:43:36", "throughput": 19977.4, "total_tokens": 193568192} +{"current_steps": 61505, "total_steps": 78105, "loss": 0.1547, "lr": 6.570939432302303e-07, "epoch": 3.937327955956725, "percentage": 78.75, "elapsed_time": "2:41:30", "remaining_time": "0:43:35", "throughput": 19977.63, "total_tokens": 193584000} +{"current_steps": 61510, "total_steps": 78105, "loss": 0.1701, "lr": 6.567164991431704e-07, "epoch": 3.9376480378977017, "percentage": 78.75, "elapsed_time": "2:41:30", "remaining_time": "0:43:34", "throughput": 19977.85, "total_tokens": 193599872} +{"current_steps": 61515, "total_steps": 78105, "loss": 0.2501, "lr": 6.563391471000871e-07, "epoch": 3.9379681198386787, "percentage": 78.76, "elapsed_time": "2:41:31", "remaining_time": "0:43:33", "throughput": 19978.12, "total_tokens": 193616768} +{"current_steps": 61520, "total_steps": 78105, "loss": 0.1324, "lr": 6.559618871198237e-07, "epoch": 3.9382882017796557, "percentage": 78.77, "elapsed_time": "2:41:32", "remaining_time": "0:43:32", "throughput": 19978.35, "total_tokens": 193632768} +{"current_steps": 61525, "total_steps": 78105, "loss": 0.1442, "lr": 6.555847192212175e-07, "epoch": 3.938608283720632, "percentage": 78.77, "elapsed_time": "2:41:32", "remaining_time": "0:43:32", "throughput": 19978.57, "total_tokens": 193648320} +{"current_steps": 61530, "total_steps": 78105, "loss": 0.1977, "lr": 6.552076434231047e-07, "epoch": 3.938928365661609, "percentage": 78.78, "elapsed_time": "2:41:33", "remaining_time": "0:43:31", "throughput": 19978.79, "total_tokens": 193664128} +{"current_steps": 61535, "total_steps": 78105, "loss": 0.2114, "lr": 6.548306597443116e-07, "epoch": 3.939248447602586, "percentage": 78.78, "elapsed_time": "2:41:34", "remaining_time": "0:43:30", "throughput": 19978.99, "total_tokens": 193679488} +{"current_steps": 61540, "total_steps": 78105, "loss": 0.1585, "lr": 6.544537682036655e-07, "epoch": 3.939568529543563, "percentage": 78.79, "elapsed_time": "2:41:34", "remaining_time": "0:43:29", "throughput": 19979.19, "total_tokens": 193694848} +{"current_steps": 61545, "total_steps": 78105, "loss": 0.1558, "lr": 6.540769688199838e-07, "epoch": 3.93988861148454, "percentage": 78.8, "elapsed_time": "2:41:35", "remaining_time": "0:43:28", "throughput": 19979.42, "total_tokens": 193710464} +{"current_steps": 61550, "total_steps": 78105, "loss": 0.2872, "lr": 6.537002616120839e-07, "epoch": 3.940208693425517, "percentage": 78.8, "elapsed_time": "2:41:36", "remaining_time": "0:43:27", "throughput": 19979.6, "total_tokens": 193725248} +{"current_steps": 61555, "total_steps": 78105, "loss": 0.1698, "lr": 6.53323646598776e-07, "epoch": 3.9405287753664937, "percentage": 78.81, "elapsed_time": "2:41:36", "remaining_time": "0:43:27", "throughput": 19979.82, "total_tokens": 193740672} +{"current_steps": 61560, "total_steps": 78105, "loss": 0.1753, "lr": 6.529471237988654e-07, "epoch": 3.9408488573074707, "percentage": 78.82, "elapsed_time": "2:41:37", "remaining_time": "0:43:26", "throughput": 19980.02, "total_tokens": 193756160} +{"current_steps": 61565, "total_steps": 78105, "loss": 0.1949, "lr": 6.525706932311559e-07, "epoch": 3.9411689392484477, "percentage": 78.82, "elapsed_time": "2:41:38", "remaining_time": "0:43:25", "throughput": 19980.21, "total_tokens": 193770880} +{"current_steps": 61570, "total_steps": 78105, "loss": 0.1703, "lr": 6.521943549144416e-07, "epoch": 3.9414890211894242, "percentage": 78.83, "elapsed_time": "2:41:38", "remaining_time": "0:43:24", "throughput": 19980.43, "total_tokens": 193786368} +{"current_steps": 61575, "total_steps": 78105, "loss": 0.1146, "lr": 6.51818108867518e-07, "epoch": 3.9418091031304012, "percentage": 78.84, "elapsed_time": "2:41:39", "remaining_time": "0:43:23", "throughput": 19980.68, "total_tokens": 193802496} +{"current_steps": 61580, "total_steps": 78105, "loss": 0.3015, "lr": 6.514419551091697e-07, "epoch": 3.942129185071378, "percentage": 78.84, "elapsed_time": "2:41:40", "remaining_time": "0:43:23", "throughput": 19980.88, "total_tokens": 193817728} +{"current_steps": 61585, "total_steps": 78105, "loss": 0.1662, "lr": 6.51065893658182e-07, "epoch": 3.942449267012355, "percentage": 78.85, "elapsed_time": "2:41:40", "remaining_time": "0:43:22", "throughput": 19981.09, "total_tokens": 193833024} +{"current_steps": 61590, "total_steps": 78105, "loss": 0.1407, "lr": 6.506899245333329e-07, "epoch": 3.942769348953332, "percentage": 78.86, "elapsed_time": "2:41:41", "remaining_time": "0:43:21", "throughput": 19981.32, "total_tokens": 193848640} +{"current_steps": 61595, "total_steps": 78105, "loss": 0.1651, "lr": 6.503140477533962e-07, "epoch": 3.943089430894309, "percentage": 78.86, "elapsed_time": "2:41:42", "remaining_time": "0:43:20", "throughput": 19981.49, "total_tokens": 193863040} +{"current_steps": 61600, "total_steps": 78105, "loss": 0.109, "lr": 6.499382633371412e-07, "epoch": 3.9434095128352857, "percentage": 78.87, "elapsed_time": "2:41:42", "remaining_time": "0:43:19", "throughput": 19981.72, "total_tokens": 193879168} +{"current_steps": 61605, "total_steps": 78105, "loss": 0.1581, "lr": 6.495625713033329e-07, "epoch": 3.9437295947762627, "percentage": 78.87, "elapsed_time": "2:41:43", "remaining_time": "0:43:18", "throughput": 19981.95, "total_tokens": 193894720} +{"current_steps": 61610, "total_steps": 78105, "loss": 0.1539, "lr": 6.491869716707305e-07, "epoch": 3.9440496767172397, "percentage": 78.88, "elapsed_time": "2:41:44", "remaining_time": "0:43:18", "throughput": 19982.16, "total_tokens": 193910208} +{"current_steps": 61615, "total_steps": 78105, "loss": 0.2289, "lr": 6.488114644580909e-07, "epoch": 3.9443697586582163, "percentage": 78.89, "elapsed_time": "2:41:44", "remaining_time": "0:43:17", "throughput": 19982.41, "total_tokens": 193926528} +{"current_steps": 61620, "total_steps": 78105, "loss": 0.1422, "lr": 6.484360496841641e-07, "epoch": 3.9446898405991933, "percentage": 78.89, "elapsed_time": "2:41:45", "remaining_time": "0:43:16", "throughput": 19982.6, "total_tokens": 193941760} +{"current_steps": 61625, "total_steps": 78105, "loss": 0.1784, "lr": 6.480607273676967e-07, "epoch": 3.9450099225401702, "percentage": 78.9, "elapsed_time": "2:41:46", "remaining_time": "0:43:15", "throughput": 19982.83, "total_tokens": 193957376} +{"current_steps": 61630, "total_steps": 78105, "loss": 0.1341, "lr": 6.476854975274302e-07, "epoch": 3.9453300044811472, "percentage": 78.91, "elapsed_time": "2:41:46", "remaining_time": "0:43:14", "throughput": 19983.12, "total_tokens": 193974784} +{"current_steps": 61635, "total_steps": 78105, "loss": 0.1264, "lr": 6.473103601821015e-07, "epoch": 3.9456500864221242, "percentage": 78.91, "elapsed_time": "2:41:47", "remaining_time": "0:43:14", "throughput": 19983.38, "total_tokens": 193991168} +{"current_steps": 61640, "total_steps": 78105, "loss": 0.1234, "lr": 6.469353153504431e-07, "epoch": 3.945970168363101, "percentage": 78.92, "elapsed_time": "2:41:48", "remaining_time": "0:43:13", "throughput": 19983.57, "total_tokens": 194006144} +{"current_steps": 61645, "total_steps": 78105, "loss": 0.2083, "lr": 6.465603630511822e-07, "epoch": 3.9462902503040778, "percentage": 78.93, "elapsed_time": "2:41:48", "remaining_time": "0:43:12", "throughput": 19983.81, "total_tokens": 194022336} +{"current_steps": 61650, "total_steps": 78105, "loss": 0.2178, "lr": 6.461855033030437e-07, "epoch": 3.9466103322450548, "percentage": 78.93, "elapsed_time": "2:41:49", "remaining_time": "0:43:11", "throughput": 19984.0, "total_tokens": 194037504} +{"current_steps": 61655, "total_steps": 78105, "loss": 0.1497, "lr": 6.458107361247437e-07, "epoch": 3.9469304141860317, "percentage": 78.94, "elapsed_time": "2:41:50", "remaining_time": "0:43:10", "throughput": 19984.2, "total_tokens": 194052544} +{"current_steps": 61660, "total_steps": 78105, "loss": 0.1428, "lr": 6.454360615349983e-07, "epoch": 3.9472504961270083, "percentage": 78.95, "elapsed_time": "2:41:50", "remaining_time": "0:43:09", "throughput": 19984.41, "total_tokens": 194067968} +{"current_steps": 61665, "total_steps": 78105, "loss": 0.2099, "lr": 6.450614795525159e-07, "epoch": 3.9475705780679853, "percentage": 78.95, "elapsed_time": "2:41:51", "remaining_time": "0:43:09", "throughput": 19984.63, "total_tokens": 194083904} +{"current_steps": 61670, "total_steps": 78105, "loss": 0.1603, "lr": 6.44686990196001e-07, "epoch": 3.9478906600089623, "percentage": 78.96, "elapsed_time": "2:41:52", "remaining_time": "0:43:08", "throughput": 19984.83, "total_tokens": 194099136} +{"current_steps": 61675, "total_steps": 78105, "loss": 0.145, "lr": 6.44312593484154e-07, "epoch": 3.9482107419499393, "percentage": 78.96, "elapsed_time": "2:41:52", "remaining_time": "0:43:07", "throughput": 19985.02, "total_tokens": 194114176} +{"current_steps": 61680, "total_steps": 78105, "loss": 0.1703, "lr": 6.439382894356694e-07, "epoch": 3.9485308238909163, "percentage": 78.97, "elapsed_time": "2:41:53", "remaining_time": "0:43:06", "throughput": 19985.24, "total_tokens": 194129664} +{"current_steps": 61685, "total_steps": 78105, "loss": 0.1856, "lr": 6.435640780692401e-07, "epoch": 3.948850905831893, "percentage": 78.98, "elapsed_time": "2:41:54", "remaining_time": "0:43:05", "throughput": 19985.42, "total_tokens": 194144576} +{"current_steps": 61690, "total_steps": 78105, "loss": 0.2219, "lr": 6.431899594035495e-07, "epoch": 3.94917098777287, "percentage": 78.98, "elapsed_time": "2:41:54", "remaining_time": "0:43:05", "throughput": 19985.65, "total_tokens": 194160256} +{"current_steps": 61695, "total_steps": 78105, "loss": 0.1516, "lr": 6.428159334572814e-07, "epoch": 3.949491069713847, "percentage": 78.99, "elapsed_time": "2:41:55", "remaining_time": "0:43:04", "throughput": 19985.84, "total_tokens": 194175488} +{"current_steps": 61700, "total_steps": 78105, "loss": 0.1925, "lr": 6.424420002491116e-07, "epoch": 3.9498111516548238, "percentage": 79.0, "elapsed_time": "2:41:56", "remaining_time": "0:43:03", "throughput": 19986.03, "total_tokens": 194190464} +{"current_steps": 61705, "total_steps": 78105, "loss": 0.183, "lr": 6.420681597977129e-07, "epoch": 3.9501312335958003, "percentage": 79.0, "elapsed_time": "2:41:56", "remaining_time": "0:43:02", "throughput": 19986.25, "total_tokens": 194206208} +{"current_steps": 61710, "total_steps": 78105, "loss": 0.231, "lr": 6.416944121217527e-07, "epoch": 3.9504513155367773, "percentage": 79.01, "elapsed_time": "2:41:57", "remaining_time": "0:43:01", "throughput": 19986.47, "total_tokens": 194222208} +{"current_steps": 61715, "total_steps": 78105, "loss": 0.2852, "lr": 6.413207572398938e-07, "epoch": 3.9507713974777543, "percentage": 79.02, "elapsed_time": "2:41:58", "remaining_time": "0:43:00", "throughput": 19986.73, "total_tokens": 194238784} +{"current_steps": 61720, "total_steps": 78105, "loss": 0.2077, "lr": 6.40947195170795e-07, "epoch": 3.9510914794187313, "percentage": 79.02, "elapsed_time": "2:41:59", "remaining_time": "0:43:00", "throughput": 19986.95, "total_tokens": 194254272} +{"current_steps": 61725, "total_steps": 78105, "loss": 0.1523, "lr": 6.405737259331096e-07, "epoch": 3.9514115613597083, "percentage": 79.03, "elapsed_time": "2:41:59", "remaining_time": "0:42:59", "throughput": 19987.17, "total_tokens": 194269888} +{"current_steps": 61730, "total_steps": 78105, "loss": 0.1398, "lr": 6.402003495454864e-07, "epoch": 3.951731643300685, "percentage": 79.03, "elapsed_time": "2:42:00", "remaining_time": "0:42:58", "throughput": 19987.35, "total_tokens": 194284544} +{"current_steps": 61735, "total_steps": 78105, "loss": 0.1711, "lr": 6.398270660265709e-07, "epoch": 3.952051725241662, "percentage": 79.04, "elapsed_time": "2:42:01", "remaining_time": "0:42:57", "throughput": 19987.51, "total_tokens": 194298752} +{"current_steps": 61740, "total_steps": 78105, "loss": 0.1687, "lr": 6.394538753950028e-07, "epoch": 3.952371807182639, "percentage": 79.05, "elapsed_time": "2:42:01", "remaining_time": "0:42:56", "throughput": 19987.71, "total_tokens": 194313856} +{"current_steps": 61745, "total_steps": 78105, "loss": 0.1807, "lr": 6.390807776694166e-07, "epoch": 3.952691889123616, "percentage": 79.05, "elapsed_time": "2:42:02", "remaining_time": "0:42:56", "throughput": 19987.91, "total_tokens": 194329280} +{"current_steps": 61750, "total_steps": 78105, "loss": 0.1258, "lr": 6.387077728684432e-07, "epoch": 3.9530119710645923, "percentage": 79.06, "elapsed_time": "2:42:02", "remaining_time": "0:42:55", "throughput": 19988.11, "total_tokens": 194344320} +{"current_steps": 61755, "total_steps": 78105, "loss": 0.1547, "lr": 6.383348610107079e-07, "epoch": 3.9533320530055693, "percentage": 79.07, "elapsed_time": "2:42:03", "remaining_time": "0:42:54", "throughput": 19988.37, "total_tokens": 194361088} +{"current_steps": 61760, "total_steps": 78105, "loss": 0.1649, "lr": 6.379620421148342e-07, "epoch": 3.9536521349465463, "percentage": 79.07, "elapsed_time": "2:42:04", "remaining_time": "0:42:53", "throughput": 19988.67, "total_tokens": 194378240} +{"current_steps": 61765, "total_steps": 78105, "loss": 0.0695, "lr": 6.375893161994353e-07, "epoch": 3.9539722168875233, "percentage": 79.08, "elapsed_time": "2:42:05", "remaining_time": "0:42:52", "throughput": 19988.87, "total_tokens": 194393728} +{"current_steps": 61770, "total_steps": 78105, "loss": 0.1269, "lr": 6.372166832831267e-07, "epoch": 3.9542922988285003, "percentage": 79.09, "elapsed_time": "2:42:05", "remaining_time": "0:42:51", "throughput": 19989.13, "total_tokens": 194410432} +{"current_steps": 61775, "total_steps": 78105, "loss": 0.1109, "lr": 6.368441433845126e-07, "epoch": 3.954612380769477, "percentage": 79.09, "elapsed_time": "2:42:06", "remaining_time": "0:42:51", "throughput": 19989.36, "total_tokens": 194426240} +{"current_steps": 61780, "total_steps": 78105, "loss": 0.1346, "lr": 6.364716965221978e-07, "epoch": 3.954932462710454, "percentage": 79.1, "elapsed_time": "2:42:07", "remaining_time": "0:42:50", "throughput": 19989.57, "total_tokens": 194441408} +{"current_steps": 61785, "total_steps": 78105, "loss": 0.2338, "lr": 6.360993427147796e-07, "epoch": 3.955252544651431, "percentage": 79.11, "elapsed_time": "2:42:07", "remaining_time": "0:42:49", "throughput": 19989.87, "total_tokens": 194458752} +{"current_steps": 61790, "total_steps": 78105, "loss": 0.1724, "lr": 6.357270819808509e-07, "epoch": 3.9555726265924074, "percentage": 79.11, "elapsed_time": "2:42:08", "remaining_time": "0:42:48", "throughput": 19990.14, "total_tokens": 194475904} +{"current_steps": 61795, "total_steps": 78105, "loss": 0.1906, "lr": 6.353549143390025e-07, "epoch": 3.9558927085333844, "percentage": 79.12, "elapsed_time": "2:42:09", "remaining_time": "0:42:47", "throughput": 19990.36, "total_tokens": 194491264} +{"current_steps": 61800, "total_steps": 78105, "loss": 0.1908, "lr": 6.349828398078159e-07, "epoch": 3.9562127904743614, "percentage": 79.12, "elapsed_time": "2:42:09", "remaining_time": "0:42:47", "throughput": 19990.57, "total_tokens": 194506432} +{"current_steps": 61805, "total_steps": 78105, "loss": 0.1164, "lr": 6.34610858405873e-07, "epoch": 3.9565328724153384, "percentage": 79.13, "elapsed_time": "2:42:10", "remaining_time": "0:42:46", "throughput": 19990.8, "total_tokens": 194522240} +{"current_steps": 61810, "total_steps": 78105, "loss": 0.1896, "lr": 6.34238970151746e-07, "epoch": 3.9568529543563153, "percentage": 79.14, "elapsed_time": "2:42:11", "remaining_time": "0:42:45", "throughput": 19991.02, "total_tokens": 194537984} +{"current_steps": 61815, "total_steps": 78105, "loss": 0.2008, "lr": 6.338671750640068e-07, "epoch": 3.9571730362972923, "percentage": 79.14, "elapsed_time": "2:42:11", "remaining_time": "0:42:44", "throughput": 19991.24, "total_tokens": 194553344} +{"current_steps": 61820, "total_steps": 78105, "loss": 0.1522, "lr": 6.334954731612211e-07, "epoch": 3.957493118238269, "percentage": 79.15, "elapsed_time": "2:42:12", "remaining_time": "0:42:43", "throughput": 19991.46, "total_tokens": 194569216} +{"current_steps": 61825, "total_steps": 78105, "loss": 0.1726, "lr": 6.331238644619489e-07, "epoch": 3.957813200179246, "percentage": 79.16, "elapsed_time": "2:42:13", "remaining_time": "0:42:43", "throughput": 19991.67, "total_tokens": 194584448} +{"current_steps": 61830, "total_steps": 78105, "loss": 0.2159, "lr": 6.327523489847468e-07, "epoch": 3.958133282120223, "percentage": 79.16, "elapsed_time": "2:42:13", "remaining_time": "0:42:42", "throughput": 19991.88, "total_tokens": 194599680} +{"current_steps": 61835, "total_steps": 78105, "loss": 0.1385, "lr": 6.323809267481662e-07, "epoch": 3.9584533640611994, "percentage": 79.17, "elapsed_time": "2:42:14", "remaining_time": "0:42:41", "throughput": 19992.12, "total_tokens": 194615808} +{"current_steps": 61840, "total_steps": 78105, "loss": 0.2075, "lr": 6.320095977707533e-07, "epoch": 3.9587734460021764, "percentage": 79.18, "elapsed_time": "2:42:15", "remaining_time": "0:42:40", "throughput": 19992.33, "total_tokens": 194631232} +{"current_steps": 61845, "total_steps": 78105, "loss": 0.222, "lr": 6.316383620710526e-07, "epoch": 3.9590935279431534, "percentage": 79.18, "elapsed_time": "2:42:15", "remaining_time": "0:42:39", "throughput": 19992.58, "total_tokens": 194647680} +{"current_steps": 61850, "total_steps": 78105, "loss": 0.1296, "lr": 6.312672196675986e-07, "epoch": 3.9594136098841304, "percentage": 79.19, "elapsed_time": "2:42:16", "remaining_time": "0:42:38", "throughput": 19992.79, "total_tokens": 194663168} +{"current_steps": 61855, "total_steps": 78105, "loss": 0.1991, "lr": 6.308961705789265e-07, "epoch": 3.9597336918251074, "percentage": 79.19, "elapsed_time": "2:42:17", "remaining_time": "0:42:38", "throughput": 19993.01, "total_tokens": 194679168} +{"current_steps": 61860, "total_steps": 78105, "loss": 0.1225, "lr": 6.305252148235638e-07, "epoch": 3.9600537737660844, "percentage": 79.2, "elapsed_time": "2:42:18", "remaining_time": "0:42:37", "throughput": 19993.22, "total_tokens": 194694656} +{"current_steps": 61865, "total_steps": 78105, "loss": 0.1924, "lr": 6.301543524200343e-07, "epoch": 3.960373855707061, "percentage": 79.21, "elapsed_time": "2:42:18", "remaining_time": "0:42:36", "throughput": 19993.44, "total_tokens": 194710016} +{"current_steps": 61870, "total_steps": 78105, "loss": 0.182, "lr": 6.297835833868568e-07, "epoch": 3.960693937648038, "percentage": 79.21, "elapsed_time": "2:42:19", "remaining_time": "0:42:35", "throughput": 19993.64, "total_tokens": 194725376} +{"current_steps": 61875, "total_steps": 78105, "loss": 0.187, "lr": 6.294129077425445e-07, "epoch": 3.961014019589015, "percentage": 79.22, "elapsed_time": "2:42:20", "remaining_time": "0:42:34", "throughput": 19993.86, "total_tokens": 194741440} +{"current_steps": 61880, "total_steps": 78105, "loss": 0.1324, "lr": 6.2904232550561e-07, "epoch": 3.9613341015299914, "percentage": 79.23, "elapsed_time": "2:42:20", "remaining_time": "0:42:34", "throughput": 19994.06, "total_tokens": 194756800} +{"current_steps": 61885, "total_steps": 78105, "loss": 0.1479, "lr": 6.286718366945543e-07, "epoch": 3.9616541834709684, "percentage": 79.23, "elapsed_time": "2:42:21", "remaining_time": "0:42:33", "throughput": 19994.26, "total_tokens": 194771776} +{"current_steps": 61890, "total_steps": 78105, "loss": 0.1099, "lr": 6.28301441327881e-07, "epoch": 3.9619742654119454, "percentage": 79.24, "elapsed_time": "2:42:22", "remaining_time": "0:42:32", "throughput": 19994.44, "total_tokens": 194786432} +{"current_steps": 61895, "total_steps": 78105, "loss": 0.0988, "lr": 6.279311394240839e-07, "epoch": 3.9622943473529224, "percentage": 79.25, "elapsed_time": "2:42:22", "remaining_time": "0:42:31", "throughput": 19994.64, "total_tokens": 194801984} +{"current_steps": 61900, "total_steps": 78105, "loss": 0.1369, "lr": 6.275609310016544e-07, "epoch": 3.9626144292938994, "percentage": 79.25, "elapsed_time": "2:42:23", "remaining_time": "0:42:30", "throughput": 19994.8, "total_tokens": 194816064} +{"current_steps": 61905, "total_steps": 78105, "loss": 0.1972, "lr": 6.271908160790794e-07, "epoch": 3.9629345112348764, "percentage": 79.26, "elapsed_time": "2:42:23", "remaining_time": "0:42:29", "throughput": 19995.02, "total_tokens": 194831424} +{"current_steps": 61910, "total_steps": 78105, "loss": 0.1671, "lr": 6.268207946748387e-07, "epoch": 3.963254593175853, "percentage": 79.27, "elapsed_time": "2:42:24", "remaining_time": "0:42:29", "throughput": 19995.23, "total_tokens": 194846720} +{"current_steps": 61915, "total_steps": 78105, "loss": 0.1602, "lr": 6.26450866807412e-07, "epoch": 3.96357467511683, "percentage": 79.27, "elapsed_time": "2:42:25", "remaining_time": "0:42:28", "throughput": 19995.49, "total_tokens": 194863360} +{"current_steps": 61920, "total_steps": 78105, "loss": 0.249, "lr": 6.260810324952688e-07, "epoch": 3.963894757057807, "percentage": 79.28, "elapsed_time": "2:42:26", "remaining_time": "0:42:27", "throughput": 19995.72, "total_tokens": 194879104} +{"current_steps": 61925, "total_steps": 78105, "loss": 0.1858, "lr": 6.257112917568787e-07, "epoch": 3.9642148389987835, "percentage": 79.28, "elapsed_time": "2:42:26", "remaining_time": "0:42:26", "throughput": 19995.9, "total_tokens": 194893632} +{"current_steps": 61930, "total_steps": 78105, "loss": 0.1371, "lr": 6.253416446107036e-07, "epoch": 3.9645349209397605, "percentage": 79.29, "elapsed_time": "2:42:27", "remaining_time": "0:42:25", "throughput": 19996.14, "total_tokens": 194909632} +{"current_steps": 61935, "total_steps": 78105, "loss": 0.1251, "lr": 6.249720910752022e-07, "epoch": 3.9648550028807374, "percentage": 79.3, "elapsed_time": "2:42:28", "remaining_time": "0:42:25", "throughput": 19996.38, "total_tokens": 194925632} +{"current_steps": 61940, "total_steps": 78105, "loss": 0.1424, "lr": 6.24602631168828e-07, "epoch": 3.9651750848217144, "percentage": 79.3, "elapsed_time": "2:42:28", "remaining_time": "0:42:24", "throughput": 19996.66, "total_tokens": 194942784} +{"current_steps": 61945, "total_steps": 78105, "loss": 0.1561, "lr": 6.242332649100297e-07, "epoch": 3.9654951667626914, "percentage": 79.31, "elapsed_time": "2:42:29", "remaining_time": "0:42:23", "throughput": 19996.88, "total_tokens": 194958720} +{"current_steps": 61950, "total_steps": 78105, "loss": 0.1491, "lr": 6.238639923172515e-07, "epoch": 3.9658152487036684, "percentage": 79.32, "elapsed_time": "2:42:30", "remaining_time": "0:42:22", "throughput": 19997.18, "total_tokens": 194975872} +{"current_steps": 61955, "total_steps": 78105, "loss": 0.1673, "lr": 6.234948134089333e-07, "epoch": 3.966135330644645, "percentage": 79.32, "elapsed_time": "2:42:30", "remaining_time": "0:42:21", "throughput": 19997.41, "total_tokens": 194991808} +{"current_steps": 61960, "total_steps": 78105, "loss": 0.2584, "lr": 6.231257282035088e-07, "epoch": 3.966455412585622, "percentage": 79.33, "elapsed_time": "2:42:31", "remaining_time": "0:42:20", "throughput": 19997.61, "total_tokens": 195006976} +{"current_steps": 61965, "total_steps": 78105, "loss": 0.1614, "lr": 6.227567367194101e-07, "epoch": 3.966775494526599, "percentage": 79.34, "elapsed_time": "2:42:32", "remaining_time": "0:42:20", "throughput": 19997.86, "total_tokens": 195023360} +{"current_steps": 61970, "total_steps": 78105, "loss": 0.1806, "lr": 6.223878389750618e-07, "epoch": 3.9670955764675755, "percentage": 79.34, "elapsed_time": "2:42:32", "remaining_time": "0:42:19", "throughput": 19998.1, "total_tokens": 195039296} +{"current_steps": 61975, "total_steps": 78105, "loss": 0.1318, "lr": 6.220190349888843e-07, "epoch": 3.9674156584085525, "percentage": 79.35, "elapsed_time": "2:42:33", "remaining_time": "0:42:18", "throughput": 19998.29, "total_tokens": 195054336} +{"current_steps": 61980, "total_steps": 78105, "loss": 0.1347, "lr": 6.216503247792946e-07, "epoch": 3.9677357403495295, "percentage": 79.35, "elapsed_time": "2:42:34", "remaining_time": "0:42:17", "throughput": 19998.49, "total_tokens": 195069696} +{"current_steps": 61985, "total_steps": 78105, "loss": 0.1617, "lr": 6.21281708364703e-07, "epoch": 3.9680558222905065, "percentage": 79.36, "elapsed_time": "2:42:34", "remaining_time": "0:42:16", "throughput": 19998.69, "total_tokens": 195084992} +{"current_steps": 61990, "total_steps": 78105, "loss": 0.1866, "lr": 6.209131857635181e-07, "epoch": 3.9683759042314835, "percentage": 79.37, "elapsed_time": "2:42:35", "remaining_time": "0:42:16", "throughput": 19998.98, "total_tokens": 195102208} +{"current_steps": 61995, "total_steps": 78105, "loss": 0.1466, "lr": 6.205447569941398e-07, "epoch": 3.96869598617246, "percentage": 79.37, "elapsed_time": "2:42:36", "remaining_time": "0:42:15", "throughput": 19999.17, "total_tokens": 195117056} +{"current_steps": 62000, "total_steps": 78105, "loss": 0.1537, "lr": 6.201764220749678e-07, "epoch": 3.969016068113437, "percentage": 79.38, "elapsed_time": "2:42:36", "remaining_time": "0:42:14", "throughput": 19999.4, "total_tokens": 195132736} +{"current_steps": 62005, "total_steps": 78105, "loss": 0.1636, "lr": 6.198081810243922e-07, "epoch": 3.969336150054414, "percentage": 79.39, "elapsed_time": "2:42:37", "remaining_time": "0:42:13", "throughput": 19999.68, "total_tokens": 195149760} +{"current_steps": 62010, "total_steps": 78105, "loss": 0.1442, "lr": 6.194400338608034e-07, "epoch": 3.969656231995391, "percentage": 79.39, "elapsed_time": "2:42:38", "remaining_time": "0:42:12", "throughput": 19999.87, "total_tokens": 195165056} +{"current_steps": 62015, "total_steps": 78105, "loss": 0.1662, "lr": 6.190719806025835e-07, "epoch": 3.9699763139363675, "percentage": 79.4, "elapsed_time": "2:42:38", "remaining_time": "0:42:11", "throughput": 20000.08, "total_tokens": 195180224} +{"current_steps": 62020, "total_steps": 78105, "loss": 0.1082, "lr": 6.187040212681117e-07, "epoch": 3.9702963958773445, "percentage": 79.41, "elapsed_time": "2:42:39", "remaining_time": "0:42:11", "throughput": 20000.44, "total_tokens": 195199296} +{"current_steps": 62025, "total_steps": 78105, "loss": 0.22, "lr": 6.183361558757619e-07, "epoch": 3.9706164778183215, "percentage": 79.41, "elapsed_time": "2:42:40", "remaining_time": "0:42:10", "throughput": 20000.66, "total_tokens": 195215232} +{"current_steps": 62030, "total_steps": 78105, "loss": 0.1818, "lr": 6.179683844439025e-07, "epoch": 3.9709365597592985, "percentage": 79.42, "elapsed_time": "2:42:41", "remaining_time": "0:42:09", "throughput": 20000.84, "total_tokens": 195229632} +{"current_steps": 62035, "total_steps": 78105, "loss": 0.1776, "lr": 6.176007069909002e-07, "epoch": 3.9712566417002755, "percentage": 79.43, "elapsed_time": "2:42:41", "remaining_time": "0:42:08", "throughput": 20001.05, "total_tokens": 195245184} +{"current_steps": 62040, "total_steps": 78105, "loss": 0.1291, "lr": 6.17233123535112e-07, "epoch": 3.971576723641252, "percentage": 79.43, "elapsed_time": "2:42:42", "remaining_time": "0:42:07", "throughput": 20001.24, "total_tokens": 195260416} +{"current_steps": 62045, "total_steps": 78105, "loss": 0.1825, "lr": 6.168656340948953e-07, "epoch": 3.971896805582229, "percentage": 79.44, "elapsed_time": "2:42:43", "remaining_time": "0:42:07", "throughput": 20001.49, "total_tokens": 195276544} +{"current_steps": 62050, "total_steps": 78105, "loss": 0.1618, "lr": 6.164982386886001e-07, "epoch": 3.972216887523206, "percentage": 79.44, "elapsed_time": "2:42:43", "remaining_time": "0:42:06", "throughput": 20001.8, "total_tokens": 195294528} +{"current_steps": 62055, "total_steps": 78105, "loss": 0.1169, "lr": 6.16130937334572e-07, "epoch": 3.9725369694641826, "percentage": 79.45, "elapsed_time": "2:42:44", "remaining_time": "0:42:05", "throughput": 20002.01, "total_tokens": 195310080} +{"current_steps": 62060, "total_steps": 78105, "loss": 0.1439, "lr": 6.15763730051152e-07, "epoch": 3.9728570514051595, "percentage": 79.46, "elapsed_time": "2:42:45", "remaining_time": "0:42:04", "throughput": 20002.23, "total_tokens": 195326208} +{"current_steps": 62065, "total_steps": 78105, "loss": 0.1621, "lr": 6.153966168566769e-07, "epoch": 3.9731771333461365, "percentage": 79.46, "elapsed_time": "2:42:45", "remaining_time": "0:42:03", "throughput": 20002.47, "total_tokens": 195341824} +{"current_steps": 62070, "total_steps": 78105, "loss": 0.1332, "lr": 6.150295977694776e-07, "epoch": 3.9734972152871135, "percentage": 79.47, "elapsed_time": "2:42:46", "remaining_time": "0:42:03", "throughput": 20002.68, "total_tokens": 195357440} +{"current_steps": 62075, "total_steps": 78105, "loss": 0.1515, "lr": 6.14662672807883e-07, "epoch": 3.9738172972280905, "percentage": 79.48, "elapsed_time": "2:42:47", "remaining_time": "0:42:02", "throughput": 20002.91, "total_tokens": 195373312} +{"current_steps": 62080, "total_steps": 78105, "loss": 0.1444, "lr": 6.142958419902129e-07, "epoch": 3.9741373791690675, "percentage": 79.48, "elapsed_time": "2:42:47", "remaining_time": "0:42:01", "throughput": 20003.11, "total_tokens": 195388544} +{"current_steps": 62085, "total_steps": 78105, "loss": 0.2397, "lr": 6.139291053347868e-07, "epoch": 3.974457461110044, "percentage": 79.49, "elapsed_time": "2:42:48", "remaining_time": "0:42:00", "throughput": 20003.3, "total_tokens": 195403904} +{"current_steps": 62090, "total_steps": 78105, "loss": 0.148, "lr": 6.135624628599169e-07, "epoch": 3.974777543051021, "percentage": 79.5, "elapsed_time": "2:42:49", "remaining_time": "0:41:59", "throughput": 20003.56, "total_tokens": 195420288} +{"current_steps": 62095, "total_steps": 78105, "loss": 0.1955, "lr": 6.131959145839114e-07, "epoch": 3.975097624991998, "percentage": 79.5, "elapsed_time": "2:42:49", "remaining_time": "0:41:58", "throughput": 20003.81, "total_tokens": 195436608} +{"current_steps": 62100, "total_steps": 78105, "loss": 0.1525, "lr": 6.128294605250743e-07, "epoch": 3.9754177069329746, "percentage": 79.51, "elapsed_time": "2:42:50", "remaining_time": "0:41:58", "throughput": 20003.99, "total_tokens": 195451264} +{"current_steps": 62105, "total_steps": 78105, "loss": 0.1367, "lr": 6.124631007017029e-07, "epoch": 3.9757377888739516, "percentage": 79.51, "elapsed_time": "2:42:51", "remaining_time": "0:41:57", "throughput": 20004.17, "total_tokens": 195465728} +{"current_steps": 62110, "total_steps": 78105, "loss": 0.153, "lr": 6.120968351320941e-07, "epoch": 3.9760578708149286, "percentage": 79.52, "elapsed_time": "2:42:51", "remaining_time": "0:41:56", "throughput": 20004.43, "total_tokens": 195482112} +{"current_steps": 62115, "total_steps": 78105, "loss": 0.1983, "lr": 6.117306638345341e-07, "epoch": 3.9763779527559056, "percentage": 79.53, "elapsed_time": "2:42:52", "remaining_time": "0:41:55", "throughput": 20004.66, "total_tokens": 195498368} +{"current_steps": 62120, "total_steps": 78105, "loss": 0.1494, "lr": 6.113645868273099e-07, "epoch": 3.9766980346968825, "percentage": 79.53, "elapsed_time": "2:42:53", "remaining_time": "0:41:54", "throughput": 20004.86, "total_tokens": 195513280} +{"current_steps": 62125, "total_steps": 78105, "loss": 0.194, "lr": 6.109986041287008e-07, "epoch": 3.9770181166378595, "percentage": 79.54, "elapsed_time": "2:42:53", "remaining_time": "0:41:54", "throughput": 20005.06, "total_tokens": 195528512} +{"current_steps": 62130, "total_steps": 78105, "loss": 0.2468, "lr": 6.106327157569819e-07, "epoch": 3.977338198578836, "percentage": 79.55, "elapsed_time": "2:42:54", "remaining_time": "0:41:53", "throughput": 20005.23, "total_tokens": 195543104} +{"current_steps": 62135, "total_steps": 78105, "loss": 0.1486, "lr": 6.102669217304236e-07, "epoch": 3.977658280519813, "percentage": 79.55, "elapsed_time": "2:42:55", "remaining_time": "0:41:52", "throughput": 20005.42, "total_tokens": 195558272} +{"current_steps": 62140, "total_steps": 78105, "loss": 0.1698, "lr": 6.099012220672914e-07, "epoch": 3.97797836246079, "percentage": 79.56, "elapsed_time": "2:42:55", "remaining_time": "0:41:51", "throughput": 20005.63, "total_tokens": 195573632} +{"current_steps": 62145, "total_steps": 78105, "loss": 0.1665, "lr": 6.095356167858484e-07, "epoch": 3.9782984444017666, "percentage": 79.57, "elapsed_time": "2:42:56", "remaining_time": "0:41:50", "throughput": 20005.92, "total_tokens": 195590592} +{"current_steps": 62150, "total_steps": 78105, "loss": 0.2224, "lr": 6.091701059043484e-07, "epoch": 3.9786185263427436, "percentage": 79.57, "elapsed_time": "2:42:57", "remaining_time": "0:41:50", "throughput": 20006.15, "total_tokens": 195606592} +{"current_steps": 62155, "total_steps": 78105, "loss": 0.151, "lr": 6.088046894410448e-07, "epoch": 3.9789386082837206, "percentage": 79.58, "elapsed_time": "2:42:58", "remaining_time": "0:41:49", "throughput": 20006.39, "total_tokens": 195622656} +{"current_steps": 62160, "total_steps": 78105, "loss": 0.1325, "lr": 6.084393674141842e-07, "epoch": 3.9792586902246976, "percentage": 79.59, "elapsed_time": "2:42:58", "remaining_time": "0:41:48", "throughput": 20006.6, "total_tokens": 195638080} +{"current_steps": 62165, "total_steps": 78105, "loss": 0.1371, "lr": 6.080741398420089e-07, "epoch": 3.9795787721656746, "percentage": 79.59, "elapsed_time": "2:42:59", "remaining_time": "0:41:47", "throughput": 20006.83, "total_tokens": 195654016} +{"current_steps": 62170, "total_steps": 78105, "loss": 0.1487, "lr": 6.077090067427563e-07, "epoch": 3.9798988541066516, "percentage": 79.6, "elapsed_time": "2:43:00", "remaining_time": "0:41:46", "throughput": 20007.05, "total_tokens": 195669504} +{"current_steps": 62175, "total_steps": 78105, "loss": 0.1872, "lr": 6.073439681346593e-07, "epoch": 3.980218936047628, "percentage": 79.6, "elapsed_time": "2:43:00", "remaining_time": "0:41:45", "throughput": 20007.25, "total_tokens": 195684288} +{"current_steps": 62180, "total_steps": 78105, "loss": 0.2259, "lr": 6.069790240359461e-07, "epoch": 3.980539017988605, "percentage": 79.61, "elapsed_time": "2:43:01", "remaining_time": "0:41:45", "throughput": 20007.47, "total_tokens": 195700224} +{"current_steps": 62185, "total_steps": 78105, "loss": 0.2132, "lr": 6.066141744648401e-07, "epoch": 3.980859099929582, "percentage": 79.62, "elapsed_time": "2:43:02", "remaining_time": "0:41:44", "throughput": 20007.67, "total_tokens": 195715520} +{"current_steps": 62190, "total_steps": 78105, "loss": 0.1509, "lr": 6.062494194395591e-07, "epoch": 3.9811791818705586, "percentage": 79.62, "elapsed_time": "2:43:02", "remaining_time": "0:41:43", "throughput": 20007.89, "total_tokens": 195731264} +{"current_steps": 62195, "total_steps": 78105, "loss": 0.2026, "lr": 6.058847589783193e-07, "epoch": 3.9814992638115356, "percentage": 79.63, "elapsed_time": "2:43:03", "remaining_time": "0:41:42", "throughput": 20008.1, "total_tokens": 195746560} +{"current_steps": 62200, "total_steps": 78105, "loss": 0.1928, "lr": 6.055201930993274e-07, "epoch": 3.9818193457525126, "percentage": 79.64, "elapsed_time": "2:43:04", "remaining_time": "0:41:41", "throughput": 20008.32, "total_tokens": 195762304} +{"current_steps": 62205, "total_steps": 78105, "loss": 0.1464, "lr": 6.051557218207896e-07, "epoch": 3.9821394276934896, "percentage": 79.64, "elapsed_time": "2:43:04", "remaining_time": "0:41:41", "throughput": 20008.5, "total_tokens": 195777024} +{"current_steps": 62210, "total_steps": 78105, "loss": 0.1745, "lr": 6.04791345160905e-07, "epoch": 3.9824595096344666, "percentage": 79.65, "elapsed_time": "2:43:05", "remaining_time": "0:41:40", "throughput": 20008.72, "total_tokens": 195792768} +{"current_steps": 62215, "total_steps": 78105, "loss": 0.1862, "lr": 6.044270631378685e-07, "epoch": 3.9827795915754436, "percentage": 79.66, "elapsed_time": "2:43:06", "remaining_time": "0:41:39", "throughput": 20008.9, "total_tokens": 195807360} +{"current_steps": 62220, "total_steps": 78105, "loss": 0.2348, "lr": 6.040628757698719e-07, "epoch": 3.98309967351642, "percentage": 79.66, "elapsed_time": "2:43:06", "remaining_time": "0:41:38", "throughput": 20009.12, "total_tokens": 195823040} +{"current_steps": 62225, "total_steps": 78105, "loss": 0.2013, "lr": 6.036987830750984e-07, "epoch": 3.983419755457397, "percentage": 79.67, "elapsed_time": "2:43:07", "remaining_time": "0:41:37", "throughput": 20009.37, "total_tokens": 195839872} +{"current_steps": 62230, "total_steps": 78105, "loss": 0.1472, "lr": 6.033347850717316e-07, "epoch": 3.983739837398374, "percentage": 79.67, "elapsed_time": "2:43:08", "remaining_time": "0:41:36", "throughput": 20009.67, "total_tokens": 195857152} +{"current_steps": 62235, "total_steps": 78105, "loss": 0.206, "lr": 6.029708817779447e-07, "epoch": 3.9840599193393507, "percentage": 79.68, "elapsed_time": "2:43:08", "remaining_time": "0:41:36", "throughput": 20009.88, "total_tokens": 195872576} +{"current_steps": 62240, "total_steps": 78105, "loss": 0.1288, "lr": 6.026070732119115e-07, "epoch": 3.9843800012803277, "percentage": 79.69, "elapsed_time": "2:43:09", "remaining_time": "0:41:35", "throughput": 20010.09, "total_tokens": 195887744} +{"current_steps": 62245, "total_steps": 78105, "loss": 0.1633, "lr": 6.022433593917981e-07, "epoch": 3.9847000832213046, "percentage": 79.69, "elapsed_time": "2:43:10", "remaining_time": "0:41:34", "throughput": 20010.29, "total_tokens": 195903168} +{"current_steps": 62250, "total_steps": 78105, "loss": 0.0829, "lr": 6.018797403357662e-07, "epoch": 3.9850201651622816, "percentage": 79.7, "elapsed_time": "2:43:10", "remaining_time": "0:41:33", "throughput": 20010.56, "total_tokens": 195920192} +{"current_steps": 62255, "total_steps": 78105, "loss": 0.1791, "lr": 6.015162160619731e-07, "epoch": 3.9853402471032586, "percentage": 79.71, "elapsed_time": "2:43:11", "remaining_time": "0:41:32", "throughput": 20010.79, "total_tokens": 195935680} +{"current_steps": 62260, "total_steps": 78105, "loss": 0.1432, "lr": 6.011527865885705e-07, "epoch": 3.985660329044235, "percentage": 79.71, "elapsed_time": "2:43:12", "remaining_time": "0:41:32", "throughput": 20010.98, "total_tokens": 195950720} +{"current_steps": 62265, "total_steps": 78105, "loss": 0.186, "lr": 6.007894519337088e-07, "epoch": 3.985980410985212, "percentage": 79.72, "elapsed_time": "2:43:12", "remaining_time": "0:41:31", "throughput": 20011.23, "total_tokens": 195966976} +{"current_steps": 62270, "total_steps": 78105, "loss": 0.2086, "lr": 6.004262121155275e-07, "epoch": 3.986300492926189, "percentage": 79.73, "elapsed_time": "2:43:13", "remaining_time": "0:41:30", "throughput": 20011.43, "total_tokens": 195982080} +{"current_steps": 62275, "total_steps": 78105, "loss": 0.2099, "lr": 6.000630671521673e-07, "epoch": 3.986620574867166, "percentage": 79.73, "elapsed_time": "2:43:14", "remaining_time": "0:41:29", "throughput": 20011.66, "total_tokens": 195997952} +{"current_steps": 62280, "total_steps": 78105, "loss": 0.1217, "lr": 5.997000170617612e-07, "epoch": 3.9869406568081427, "percentage": 79.74, "elapsed_time": "2:43:14", "remaining_time": "0:41:28", "throughput": 20011.9, "total_tokens": 196014208} +{"current_steps": 62285, "total_steps": 78105, "loss": 0.1811, "lr": 5.993370618624378e-07, "epoch": 3.9872607387491197, "percentage": 79.75, "elapsed_time": "2:43:15", "remaining_time": "0:41:28", "throughput": 20012.18, "total_tokens": 196031296} +{"current_steps": 62290, "total_steps": 78105, "loss": 0.1576, "lr": 5.989742015723215e-07, "epoch": 3.9875808206900967, "percentage": 79.75, "elapsed_time": "2:43:16", "remaining_time": "0:41:27", "throughput": 20012.38, "total_tokens": 196046592} +{"current_steps": 62295, "total_steps": 78105, "loss": 0.1303, "lr": 5.986114362095316e-07, "epoch": 3.9879009026310737, "percentage": 79.76, "elapsed_time": "2:43:16", "remaining_time": "0:41:26", "throughput": 20012.58, "total_tokens": 196062016} +{"current_steps": 62300, "total_steps": 78105, "loss": 0.1843, "lr": 5.982487657921818e-07, "epoch": 3.9882209845720507, "percentage": 79.76, "elapsed_time": "2:43:17", "remaining_time": "0:41:25", "throughput": 20012.76, "total_tokens": 196077184} +{"current_steps": 62305, "total_steps": 78105, "loss": 0.1572, "lr": 5.978861903383843e-07, "epoch": 3.988541066513027, "percentage": 79.77, "elapsed_time": "2:43:18", "remaining_time": "0:41:24", "throughput": 20012.99, "total_tokens": 196092864} +{"current_steps": 62310, "total_steps": 78105, "loss": 0.1924, "lr": 5.975237098662409e-07, "epoch": 3.988861148454004, "percentage": 79.78, "elapsed_time": "2:43:18", "remaining_time": "0:41:23", "throughput": 20013.18, "total_tokens": 196107776} +{"current_steps": 62315, "total_steps": 78105, "loss": 0.1844, "lr": 5.971613243938548e-07, "epoch": 3.989181230394981, "percentage": 79.78, "elapsed_time": "2:43:19", "remaining_time": "0:41:23", "throughput": 20013.4, "total_tokens": 196123712} +{"current_steps": 62320, "total_steps": 78105, "loss": 0.17, "lr": 5.967990339393206e-07, "epoch": 3.9895013123359577, "percentage": 79.79, "elapsed_time": "2:43:20", "remaining_time": "0:41:22", "throughput": 20013.58, "total_tokens": 196138560} +{"current_steps": 62325, "total_steps": 78105, "loss": 0.2002, "lr": 5.964368385207292e-07, "epoch": 3.9898213942769347, "percentage": 79.8, "elapsed_time": "2:43:20", "remaining_time": "0:41:21", "throughput": 20013.77, "total_tokens": 196153664} +{"current_steps": 62330, "total_steps": 78105, "loss": 0.126, "lr": 5.960747381561669e-07, "epoch": 3.9901414762179117, "percentage": 79.8, "elapsed_time": "2:43:21", "remaining_time": "0:41:20", "throughput": 20014.0, "total_tokens": 196169536} +{"current_steps": 62335, "total_steps": 78105, "loss": 0.1623, "lr": 5.957127328637141e-07, "epoch": 3.9904615581588887, "percentage": 79.81, "elapsed_time": "2:43:22", "remaining_time": "0:41:19", "throughput": 20014.21, "total_tokens": 196184960} +{"current_steps": 62340, "total_steps": 78105, "loss": 0.2614, "lr": 5.953508226614499e-07, "epoch": 3.9907816400998657, "percentage": 79.82, "elapsed_time": "2:43:22", "remaining_time": "0:41:19", "throughput": 20014.46, "total_tokens": 196201152} +{"current_steps": 62345, "total_steps": 78105, "loss": 0.1714, "lr": 5.949890075674433e-07, "epoch": 3.9911017220408427, "percentage": 79.82, "elapsed_time": "2:43:23", "remaining_time": "0:41:18", "throughput": 20014.67, "total_tokens": 196216960} +{"current_steps": 62350, "total_steps": 78105, "loss": 0.153, "lr": 5.946272875997636e-07, "epoch": 3.9914218039818192, "percentage": 79.83, "elapsed_time": "2:43:24", "remaining_time": "0:41:17", "throughput": 20014.89, "total_tokens": 196232384} +{"current_steps": 62355, "total_steps": 78105, "loss": 0.1225, "lr": 5.942656627764723e-07, "epoch": 3.991741885922796, "percentage": 79.83, "elapsed_time": "2:43:24", "remaining_time": "0:41:16", "throughput": 20015.06, "total_tokens": 196247040} +{"current_steps": 62360, "total_steps": 78105, "loss": 0.2269, "lr": 5.93904133115627e-07, "epoch": 3.992061967863773, "percentage": 79.84, "elapsed_time": "2:43:25", "remaining_time": "0:41:15", "throughput": 20015.32, "total_tokens": 196263744} +{"current_steps": 62365, "total_steps": 78105, "loss": 0.1624, "lr": 5.935426986352811e-07, "epoch": 3.9923820498047498, "percentage": 79.85, "elapsed_time": "2:43:26", "remaining_time": "0:41:14", "throughput": 20015.53, "total_tokens": 196279168} +{"current_steps": 62370, "total_steps": 78105, "loss": 0.1554, "lr": 5.931813593534821e-07, "epoch": 3.9927021317457267, "percentage": 79.85, "elapsed_time": "2:43:27", "remaining_time": "0:41:14", "throughput": 20015.77, "total_tokens": 196295168} +{"current_steps": 62375, "total_steps": 78105, "loss": 0.1292, "lr": 5.92820115288274e-07, "epoch": 3.9930222136867037, "percentage": 79.86, "elapsed_time": "2:43:27", "remaining_time": "0:41:13", "throughput": 20016.02, "total_tokens": 196311616} +{"current_steps": 62380, "total_steps": 78105, "loss": 0.1403, "lr": 5.924589664576946e-07, "epoch": 3.9933422956276807, "percentage": 79.87, "elapsed_time": "2:43:28", "remaining_time": "0:41:12", "throughput": 20016.2, "total_tokens": 196326144} +{"current_steps": 62385, "total_steps": 78105, "loss": 0.1459, "lr": 5.920979128797791e-07, "epoch": 3.9936623775686577, "percentage": 79.87, "elapsed_time": "2:43:29", "remaining_time": "0:41:11", "throughput": 20016.46, "total_tokens": 196342464} +{"current_steps": 62390, "total_steps": 78105, "loss": 0.1534, "lr": 5.917369545725557e-07, "epoch": 3.9939824595096347, "percentage": 79.88, "elapsed_time": "2:43:29", "remaining_time": "0:41:10", "throughput": 20016.68, "total_tokens": 196358400} +{"current_steps": 62395, "total_steps": 78105, "loss": 0.1893, "lr": 5.913760915540493e-07, "epoch": 3.9943025414506113, "percentage": 79.89, "elapsed_time": "2:43:30", "remaining_time": "0:41:10", "throughput": 20016.88, "total_tokens": 196373760} +{"current_steps": 62400, "total_steps": 78105, "loss": 0.133, "lr": 5.910153238422789e-07, "epoch": 3.9946226233915882, "percentage": 79.89, "elapsed_time": "2:43:31", "remaining_time": "0:41:09", "throughput": 20017.1, "total_tokens": 196389632} +{"current_steps": 62405, "total_steps": 78105, "loss": 0.1228, "lr": 5.906546514552599e-07, "epoch": 3.9949427053325652, "percentage": 79.9, "elapsed_time": "2:43:31", "remaining_time": "0:41:08", "throughput": 20017.32, "total_tokens": 196405312} +{"current_steps": 62410, "total_steps": 78105, "loss": 0.1461, "lr": 5.90294074411002e-07, "epoch": 3.995262787273542, "percentage": 79.91, "elapsed_time": "2:43:32", "remaining_time": "0:41:07", "throughput": 20017.52, "total_tokens": 196420864} +{"current_steps": 62415, "total_steps": 78105, "loss": 0.2077, "lr": 5.89933592727511e-07, "epoch": 3.9955828692145188, "percentage": 79.91, "elapsed_time": "2:43:33", "remaining_time": "0:41:06", "throughput": 20017.73, "total_tokens": 196436224} +{"current_steps": 62420, "total_steps": 78105, "loss": 0.1708, "lr": 5.895732064227863e-07, "epoch": 3.9959029511554958, "percentage": 79.92, "elapsed_time": "2:43:33", "remaining_time": "0:41:06", "throughput": 20017.96, "total_tokens": 196452416} +{"current_steps": 62425, "total_steps": 78105, "loss": 0.1581, "lr": 5.89212915514826e-07, "epoch": 3.9962230330964728, "percentage": 79.92, "elapsed_time": "2:43:34", "remaining_time": "0:41:05", "throughput": 20018.18, "total_tokens": 196468416} +{"current_steps": 62430, "total_steps": 78105, "loss": 0.1456, "lr": 5.888527200216185e-07, "epoch": 3.9965431150374497, "percentage": 79.93, "elapsed_time": "2:43:35", "remaining_time": "0:41:04", "throughput": 20018.37, "total_tokens": 196483328} +{"current_steps": 62435, "total_steps": 78105, "loss": 0.2012, "lr": 5.884926199611521e-07, "epoch": 3.9968631969784267, "percentage": 79.94, "elapsed_time": "2:43:35", "remaining_time": "0:41:03", "throughput": 20018.58, "total_tokens": 196498752} +{"current_steps": 62440, "total_steps": 78105, "loss": 0.2059, "lr": 5.881326153514075e-07, "epoch": 3.9971832789194033, "percentage": 79.94, "elapsed_time": "2:43:36", "remaining_time": "0:41:02", "throughput": 20018.79, "total_tokens": 196514624} +{"current_steps": 62445, "total_steps": 78105, "loss": 0.2552, "lr": 5.877727062103608e-07, "epoch": 3.9975033608603803, "percentage": 79.95, "elapsed_time": "2:43:37", "remaining_time": "0:41:01", "throughput": 20018.97, "total_tokens": 196529088} +{"current_steps": 62450, "total_steps": 78105, "loss": 0.1713, "lr": 5.874128925559863e-07, "epoch": 3.9978234428013573, "percentage": 79.96, "elapsed_time": "2:43:37", "remaining_time": "0:41:01", "throughput": 20019.17, "total_tokens": 196544128} +{"current_steps": 62455, "total_steps": 78105, "loss": 0.2296, "lr": 5.870531744062479e-07, "epoch": 3.998143524742334, "percentage": 79.96, "elapsed_time": "2:43:38", "remaining_time": "0:41:00", "throughput": 20019.35, "total_tokens": 196558848} +{"current_steps": 62460, "total_steps": 78105, "loss": 0.1552, "lr": 5.866935517791114e-07, "epoch": 3.998463606683311, "percentage": 79.97, "elapsed_time": "2:43:39", "remaining_time": "0:40:59", "throughput": 20019.55, "total_tokens": 196574272} +{"current_steps": 62465, "total_steps": 78105, "loss": 0.2365, "lr": 5.863340246925317e-07, "epoch": 3.998783688624288, "percentage": 79.98, "elapsed_time": "2:43:39", "remaining_time": "0:40:58", "throughput": 20019.76, "total_tokens": 196589696} +{"current_steps": 62470, "total_steps": 78105, "loss": 0.124, "lr": 5.859745931644631e-07, "epoch": 3.999103770565265, "percentage": 79.98, "elapsed_time": "2:43:40", "remaining_time": "0:40:57", "throughput": 20020.03, "total_tokens": 196606656} +{"current_steps": 62475, "total_steps": 78105, "loss": 0.1343, "lr": 5.85615257212854e-07, "epoch": 3.9994238525062418, "percentage": 79.99, "elapsed_time": "2:43:41", "remaining_time": "0:40:57", "throughput": 20020.26, "total_tokens": 196622336} +{"current_steps": 62480, "total_steps": 78105, "loss": 0.1703, "lr": 5.85256016855647e-07, "epoch": 3.9997439344472188, "percentage": 79.99, "elapsed_time": "2:43:41", "remaining_time": "0:40:56", "throughput": 20020.46, "total_tokens": 196637888} +{"current_steps": 62485, "total_steps": 78105, "loss": 0.1188, "lr": 5.848968721107812e-07, "epoch": 4.000064016388196, "percentage": 80.0, "elapsed_time": "2:43:42", "remaining_time": "0:40:55", "throughput": 20020.47, "total_tokens": 196653696} +{"current_steps": 62490, "total_steps": 78105, "loss": 0.1062, "lr": 5.845378229961893e-07, "epoch": 4.000384098329172, "percentage": 80.01, "elapsed_time": "2:43:43", "remaining_time": "0:40:54", "throughput": 20020.69, "total_tokens": 196669312} +{"current_steps": 62495, "total_steps": 78105, "loss": 0.1773, "lr": 5.841788695298017e-07, "epoch": 4.000704180270149, "percentage": 80.01, "elapsed_time": "2:43:43", "remaining_time": "0:40:53", "throughput": 20020.91, "total_tokens": 196685312} +{"current_steps": 62496, "total_steps": 78105, "eval_loss": 0.5554755926132202, "epoch": 4.000768196658345, "percentage": 80.02, "elapsed_time": "2:44:35", "remaining_time": "0:41:06", "throughput": 19917.52, "total_tokens": 196687936} +{"current_steps": 62500, "total_steps": 78105, "loss": 0.1117, "lr": 5.838200117295426e-07, "epoch": 4.001024262211126, "percentage": 80.02, "elapsed_time": "2:45:10", "remaining_time": "0:41:14", "throughput": 19848.34, "total_tokens": 196700928} +{"current_steps": 62505, "total_steps": 78105, "loss": 0.1308, "lr": 5.83461249613331e-07, "epoch": 4.001344344152103, "percentage": 80.03, "elapsed_time": "2:45:10", "remaining_time": "0:41:13", "throughput": 19848.57, "total_tokens": 196717120} +{"current_steps": 62510, "total_steps": 78105, "loss": 0.1165, "lr": 5.831025831990817e-07, "epoch": 4.00166442609308, "percentage": 80.03, "elapsed_time": "2:45:11", "remaining_time": "0:41:12", "throughput": 19848.81, "total_tokens": 196732608} +{"current_steps": 62515, "total_steps": 78105, "loss": 0.0882, "lr": 5.827440125047046e-07, "epoch": 4.001984508034057, "percentage": 80.04, "elapsed_time": "2:45:12", "remaining_time": "0:41:11", "throughput": 19849.04, "total_tokens": 196748416} +{"current_steps": 62520, "total_steps": 78105, "loss": 0.1229, "lr": 5.823855375481049e-07, "epoch": 4.002304589975034, "percentage": 80.05, "elapsed_time": "2:45:12", "remaining_time": "0:41:11", "throughput": 19849.37, "total_tokens": 196766272} +{"current_steps": 62525, "total_steps": 78105, "loss": 0.0923, "lr": 5.820271583471832e-07, "epoch": 4.002624671916011, "percentage": 80.05, "elapsed_time": "2:45:13", "remaining_time": "0:41:10", "throughput": 19849.59, "total_tokens": 196781632} +{"current_steps": 62530, "total_steps": 78105, "loss": 0.1239, "lr": 5.81668874919834e-07, "epoch": 4.002944753856988, "percentage": 80.06, "elapsed_time": "2:45:14", "remaining_time": "0:41:09", "throughput": 19849.83, "total_tokens": 196797312} +{"current_steps": 62535, "total_steps": 78105, "loss": 0.1743, "lr": 5.813106872839503e-07, "epoch": 4.003264835797964, "percentage": 80.07, "elapsed_time": "2:45:14", "remaining_time": "0:41:08", "throughput": 19850.06, "total_tokens": 196813184} +{"current_steps": 62540, "total_steps": 78105, "loss": 0.1132, "lr": 5.809525954574153e-07, "epoch": 4.003584917738941, "percentage": 80.07, "elapsed_time": "2:45:15", "remaining_time": "0:41:07", "throughput": 19850.3, "total_tokens": 196828992} +{"current_steps": 62545, "total_steps": 78105, "loss": 0.092, "lr": 5.805945994581133e-07, "epoch": 4.003904999679918, "percentage": 80.08, "elapsed_time": "2:45:16", "remaining_time": "0:41:06", "throughput": 19850.5, "total_tokens": 196844160} +{"current_steps": 62550, "total_steps": 78105, "loss": 0.1455, "lr": 5.802366993039176e-07, "epoch": 4.004225081620895, "percentage": 80.08, "elapsed_time": "2:45:16", "remaining_time": "0:41:06", "throughput": 19850.71, "total_tokens": 196859456} +{"current_steps": 62555, "total_steps": 78105, "loss": 0.1404, "lr": 5.798788950127024e-07, "epoch": 4.004545163561872, "percentage": 80.09, "elapsed_time": "2:45:17", "remaining_time": "0:41:05", "throughput": 19850.93, "total_tokens": 196875072} +{"current_steps": 62560, "total_steps": 78105, "loss": 0.1236, "lr": 5.795211866023334e-07, "epoch": 4.004865245502849, "percentage": 80.1, "elapsed_time": "2:45:18", "remaining_time": "0:41:04", "throughput": 19851.09, "total_tokens": 196889088} +{"current_steps": 62565, "total_steps": 78105, "loss": 0.0884, "lr": 5.791635740906725e-07, "epoch": 4.005185327443826, "percentage": 80.1, "elapsed_time": "2:45:18", "remaining_time": "0:41:03", "throughput": 19851.26, "total_tokens": 196903744} +{"current_steps": 62570, "total_steps": 78105, "loss": 0.1446, "lr": 5.788060574955787e-07, "epoch": 4.005505409384803, "percentage": 80.11, "elapsed_time": "2:45:19", "remaining_time": "0:41:02", "throughput": 19851.49, "total_tokens": 196919296} +{"current_steps": 62575, "total_steps": 78105, "loss": 0.1382, "lr": 5.784486368349018e-07, "epoch": 4.00582549132578, "percentage": 80.12, "elapsed_time": "2:45:20", "remaining_time": "0:41:02", "throughput": 19851.69, "total_tokens": 196934528} +{"current_steps": 62580, "total_steps": 78105, "loss": 0.1447, "lr": 5.780913121264916e-07, "epoch": 4.006145573266756, "percentage": 80.12, "elapsed_time": "2:45:20", "remaining_time": "0:41:01", "throughput": 19851.96, "total_tokens": 196951104} +{"current_steps": 62585, "total_steps": 78105, "loss": 0.1598, "lr": 5.777340833881906e-07, "epoch": 4.006465655207733, "percentage": 80.13, "elapsed_time": "2:45:21", "remaining_time": "0:41:00", "throughput": 19852.17, "total_tokens": 196966336} +{"current_steps": 62590, "total_steps": 78105, "loss": 0.1354, "lr": 5.773769506378369e-07, "epoch": 4.00678573714871, "percentage": 80.14, "elapsed_time": "2:45:22", "remaining_time": "0:40:59", "throughput": 19852.72, "total_tokens": 196995136} +{"current_steps": 62595, "total_steps": 78105, "loss": 0.1417, "lr": 5.770199138932633e-07, "epoch": 4.007105819089687, "percentage": 80.14, "elapsed_time": "2:45:23", "remaining_time": "0:40:58", "throughput": 19852.94, "total_tokens": 197010496} +{"current_steps": 62600, "total_steps": 78105, "loss": 0.1278, "lr": 5.76662973172299e-07, "epoch": 4.007425901030664, "percentage": 80.15, "elapsed_time": "2:45:24", "remaining_time": "0:40:58", "throughput": 19853.19, "total_tokens": 197026560} +{"current_steps": 62605, "total_steps": 78105, "loss": 0.0678, "lr": 5.763061284927677e-07, "epoch": 4.007745982971641, "percentage": 80.15, "elapsed_time": "2:45:24", "remaining_time": "0:40:57", "throughput": 19853.47, "total_tokens": 197043712} +{"current_steps": 62610, "total_steps": 78105, "loss": 0.1013, "lr": 5.759493798724872e-07, "epoch": 4.008066064912618, "percentage": 80.16, "elapsed_time": "2:45:25", "remaining_time": "0:40:56", "throughput": 19853.75, "total_tokens": 197060608} +{"current_steps": 62615, "total_steps": 78105, "loss": 0.1109, "lr": 5.755927273292733e-07, "epoch": 4.008386146853595, "percentage": 80.17, "elapsed_time": "2:45:26", "remaining_time": "0:40:55", "throughput": 19853.97, "total_tokens": 197076288} +{"current_steps": 62620, "total_steps": 78105, "loss": 0.1732, "lr": 5.752361708809348e-07, "epoch": 4.008706228794572, "percentage": 80.17, "elapsed_time": "2:45:26", "remaining_time": "0:40:54", "throughput": 19854.19, "total_tokens": 197091712} +{"current_steps": 62625, "total_steps": 78105, "loss": 0.1169, "lr": 5.748797105452764e-07, "epoch": 4.009026310735548, "percentage": 80.18, "elapsed_time": "2:45:27", "remaining_time": "0:40:53", "throughput": 19854.37, "total_tokens": 197106496} +{"current_steps": 62630, "total_steps": 78105, "loss": 0.1016, "lr": 5.745233463400977e-07, "epoch": 4.009346392676525, "percentage": 80.19, "elapsed_time": "2:45:28", "remaining_time": "0:40:53", "throughput": 19854.59, "total_tokens": 197122176} +{"current_steps": 62635, "total_steps": 78105, "loss": 0.1032, "lr": 5.741670782831931e-07, "epoch": 4.009666474617502, "percentage": 80.19, "elapsed_time": "2:45:28", "remaining_time": "0:40:52", "throughput": 19854.83, "total_tokens": 197138240} +{"current_steps": 62640, "total_steps": 78105, "loss": 0.1633, "lr": 5.738109063923539e-07, "epoch": 4.009986556558479, "percentage": 80.2, "elapsed_time": "2:45:29", "remaining_time": "0:40:51", "throughput": 19855.03, "total_tokens": 197153408} +{"current_steps": 62645, "total_steps": 78105, "loss": 0.1155, "lr": 5.734548306853646e-07, "epoch": 4.010306638499456, "percentage": 80.21, "elapsed_time": "2:45:30", "remaining_time": "0:40:50", "throughput": 19855.23, "total_tokens": 197168640} +{"current_steps": 62650, "total_steps": 78105, "loss": 0.1154, "lr": 5.730988511800053e-07, "epoch": 4.010626720440433, "percentage": 80.21, "elapsed_time": "2:45:30", "remaining_time": "0:40:49", "throughput": 19855.45, "total_tokens": 197184448} +{"current_steps": 62655, "total_steps": 78105, "loss": 0.1091, "lr": 5.727429678940541e-07, "epoch": 4.01094680238141, "percentage": 80.22, "elapsed_time": "2:45:31", "remaining_time": "0:40:49", "throughput": 19855.7, "total_tokens": 197201088} +{"current_steps": 62660, "total_steps": 78105, "loss": 0.0878, "lr": 5.723871808452788e-07, "epoch": 4.011266884322387, "percentage": 80.23, "elapsed_time": "2:45:32", "remaining_time": "0:40:48", "throughput": 19855.9, "total_tokens": 197216320} +{"current_steps": 62665, "total_steps": 78105, "loss": 0.1662, "lr": 5.720314900514481e-07, "epoch": 4.011586966263364, "percentage": 80.23, "elapsed_time": "2:45:33", "remaining_time": "0:40:47", "throughput": 19856.11, "total_tokens": 197231744} +{"current_steps": 62670, "total_steps": 78105, "loss": 0.136, "lr": 5.71675895530322e-07, "epoch": 4.01190704820434, "percentage": 80.24, "elapsed_time": "2:45:33", "remaining_time": "0:40:46", "throughput": 19856.36, "total_tokens": 197248128} +{"current_steps": 62675, "total_steps": 78105, "loss": 0.0906, "lr": 5.71320397299657e-07, "epoch": 4.012227130145317, "percentage": 80.24, "elapsed_time": "2:45:34", "remaining_time": "0:40:45", "throughput": 19856.64, "total_tokens": 197265408} +{"current_steps": 62680, "total_steps": 78105, "loss": 0.1032, "lr": 5.709649953772064e-07, "epoch": 4.012547212086294, "percentage": 80.25, "elapsed_time": "2:45:35", "remaining_time": "0:40:44", "throughput": 19856.82, "total_tokens": 197280384} +{"current_steps": 62685, "total_steps": 78105, "loss": 0.1049, "lr": 5.706096897807148e-07, "epoch": 4.012867294027271, "percentage": 80.26, "elapsed_time": "2:45:35", "remaining_time": "0:40:44", "throughput": 19857.02, "total_tokens": 197295808} +{"current_steps": 62690, "total_steps": 78105, "loss": 0.1803, "lr": 5.702544805279267e-07, "epoch": 4.013187375968248, "percentage": 80.26, "elapsed_time": "2:45:36", "remaining_time": "0:40:43", "throughput": 19857.2, "total_tokens": 197310272} +{"current_steps": 62695, "total_steps": 78105, "loss": 0.1273, "lr": 5.698993676365769e-07, "epoch": 4.013507457909225, "percentage": 80.27, "elapsed_time": "2:45:37", "remaining_time": "0:40:42", "throughput": 19857.39, "total_tokens": 197325440} +{"current_steps": 62700, "total_steps": 78105, "loss": 0.0859, "lr": 5.695443511243998e-07, "epoch": 4.013827539850202, "percentage": 80.28, "elapsed_time": "2:45:37", "remaining_time": "0:40:41", "throughput": 19857.58, "total_tokens": 197340608} +{"current_steps": 62705, "total_steps": 78105, "loss": 0.1413, "lr": 5.691894310091223e-07, "epoch": 4.014147621791179, "percentage": 80.28, "elapsed_time": "2:45:38", "remaining_time": "0:40:40", "throughput": 19857.74, "total_tokens": 197354816} +{"current_steps": 62710, "total_steps": 78105, "loss": 0.1179, "lr": 5.688346073084674e-07, "epoch": 4.014467703732155, "percentage": 80.29, "elapsed_time": "2:45:39", "remaining_time": "0:40:39", "throughput": 19857.96, "total_tokens": 197370112} +{"current_steps": 62715, "total_steps": 78105, "loss": 0.1382, "lr": 5.684798800401531e-07, "epoch": 4.014787785673132, "percentage": 80.3, "elapsed_time": "2:45:39", "remaining_time": "0:40:39", "throughput": 19858.17, "total_tokens": 197385728} +{"current_steps": 62720, "total_steps": 78105, "loss": 0.1136, "lr": 5.681252492218928e-07, "epoch": 4.015107867614109, "percentage": 80.3, "elapsed_time": "2:45:40", "remaining_time": "0:40:38", "throughput": 19858.36, "total_tokens": 197401088} +{"current_steps": 62725, "total_steps": 78105, "loss": 0.1547, "lr": 5.67770714871394e-07, "epoch": 4.015427949555086, "percentage": 80.31, "elapsed_time": "2:45:41", "remaining_time": "0:40:37", "throughput": 19858.61, "total_tokens": 197417216} +{"current_steps": 62730, "total_steps": 78105, "loss": 0.103, "lr": 5.674162770063621e-07, "epoch": 4.015748031496063, "percentage": 80.31, "elapsed_time": "2:45:41", "remaining_time": "0:40:36", "throughput": 19858.82, "total_tokens": 197432768} +{"current_steps": 62735, "total_steps": 78105, "loss": 0.1073, "lr": 5.670619356444945e-07, "epoch": 4.01606811343704, "percentage": 80.32, "elapsed_time": "2:45:42", "remaining_time": "0:40:35", "throughput": 19858.99, "total_tokens": 197447360} +{"current_steps": 62740, "total_steps": 78105, "loss": 0.0736, "lr": 5.667076908034858e-07, "epoch": 4.016388195378017, "percentage": 80.33, "elapsed_time": "2:45:43", "remaining_time": "0:40:35", "throughput": 19859.22, "total_tokens": 197463232} +{"current_steps": 62745, "total_steps": 78105, "loss": 0.1192, "lr": 5.663535425010245e-07, "epoch": 4.016708277318994, "percentage": 80.33, "elapsed_time": "2:45:43", "remaining_time": "0:40:34", "throughput": 19859.48, "total_tokens": 197480000} +{"current_steps": 62750, "total_steps": 78105, "loss": 0.1443, "lr": 5.659994907547955e-07, "epoch": 4.017028359259971, "percentage": 80.34, "elapsed_time": "2:45:44", "remaining_time": "0:40:33", "throughput": 19859.69, "total_tokens": 197495424} +{"current_steps": 62755, "total_steps": 78105, "loss": 0.1448, "lr": 5.65645535582478e-07, "epoch": 4.017348441200947, "percentage": 80.35, "elapsed_time": "2:45:45", "remaining_time": "0:40:32", "throughput": 19859.91, "total_tokens": 197511168} +{"current_steps": 62760, "total_steps": 78105, "loss": 0.1295, "lr": 5.652916770017464e-07, "epoch": 4.017668523141924, "percentage": 80.35, "elapsed_time": "2:45:45", "remaining_time": "0:40:31", "throughput": 19860.16, "total_tokens": 197527552} +{"current_steps": 62765, "total_steps": 78105, "loss": 0.1673, "lr": 5.64937915030272e-07, "epoch": 4.017988605082901, "percentage": 80.36, "elapsed_time": "2:45:46", "remaining_time": "0:40:30", "throughput": 19860.39, "total_tokens": 197543488} +{"current_steps": 62770, "total_steps": 78105, "loss": 0.1176, "lr": 5.645842496857173e-07, "epoch": 4.018308687023878, "percentage": 80.37, "elapsed_time": "2:45:47", "remaining_time": "0:40:30", "throughput": 19860.6, "total_tokens": 197558656} +{"current_steps": 62775, "total_steps": 78105, "loss": 0.0851, "lr": 5.642306809857457e-07, "epoch": 4.018628768964855, "percentage": 80.37, "elapsed_time": "2:45:47", "remaining_time": "0:40:29", "throughput": 19860.78, "total_tokens": 197573312} +{"current_steps": 62780, "total_steps": 78105, "loss": 0.1452, "lr": 5.63877208948009e-07, "epoch": 4.018948850905832, "percentage": 80.38, "elapsed_time": "2:45:48", "remaining_time": "0:40:28", "throughput": 19860.98, "total_tokens": 197588672} +{"current_steps": 62785, "total_steps": 78105, "loss": 0.127, "lr": 5.635238335901605e-07, "epoch": 4.019268932846809, "percentage": 80.39, "elapsed_time": "2:45:49", "remaining_time": "0:40:27", "throughput": 19861.22, "total_tokens": 197604608} +{"current_steps": 62790, "total_steps": 78105, "loss": 0.1122, "lr": 5.631705549298452e-07, "epoch": 4.019589014787786, "percentage": 80.39, "elapsed_time": "2:45:49", "remaining_time": "0:40:26", "throughput": 19861.44, "total_tokens": 197620480} +{"current_steps": 62795, "total_steps": 78105, "loss": 0.112, "lr": 5.628173729847028e-07, "epoch": 4.019909096728763, "percentage": 80.4, "elapsed_time": "2:45:50", "remaining_time": "0:40:26", "throughput": 19861.63, "total_tokens": 197635456} +{"current_steps": 62800, "total_steps": 78105, "loss": 0.117, "lr": 5.624642877723721e-07, "epoch": 4.020229178669739, "percentage": 80.4, "elapsed_time": "2:45:51", "remaining_time": "0:40:25", "throughput": 19861.84, "total_tokens": 197650368} +{"current_steps": 62805, "total_steps": 78105, "loss": 0.179, "lr": 5.62111299310481e-07, "epoch": 4.020549260610716, "percentage": 80.41, "elapsed_time": "2:45:51", "remaining_time": "0:40:24", "throughput": 19862.07, "total_tokens": 197665920} +{"current_steps": 62810, "total_steps": 78105, "loss": 0.1436, "lr": 5.617584076166583e-07, "epoch": 4.020869342551693, "percentage": 80.42, "elapsed_time": "2:45:52", "remaining_time": "0:40:23", "throughput": 19862.27, "total_tokens": 197680704} +{"current_steps": 62815, "total_steps": 78105, "loss": 0.1605, "lr": 5.614056127085246e-07, "epoch": 4.02118942449267, "percentage": 80.42, "elapsed_time": "2:45:53", "remaining_time": "0:40:22", "throughput": 19862.45, "total_tokens": 197695168} +{"current_steps": 62820, "total_steps": 78105, "loss": 0.0953, "lr": 5.610529146036969e-07, "epoch": 4.021509506433647, "percentage": 80.43, "elapsed_time": "2:45:53", "remaining_time": "0:40:21", "throughput": 19862.64, "total_tokens": 197710016} +{"current_steps": 62825, "total_steps": 78105, "loss": 0.1414, "lr": 5.607003133197872e-07, "epoch": 4.021829588374624, "percentage": 80.44, "elapsed_time": "2:45:54", "remaining_time": "0:40:21", "throughput": 19862.82, "total_tokens": 197725056} +{"current_steps": 62830, "total_steps": 78105, "loss": 0.1614, "lr": 5.603478088744024e-07, "epoch": 4.022149670315601, "percentage": 80.44, "elapsed_time": "2:45:55", "remaining_time": "0:40:20", "throughput": 19863.04, "total_tokens": 197740672} +{"current_steps": 62835, "total_steps": 78105, "loss": 0.1611, "lr": 5.599954012851447e-07, "epoch": 4.022469752256578, "percentage": 80.45, "elapsed_time": "2:45:56", "remaining_time": "0:40:19", "throughput": 19863.53, "total_tokens": 197762816} +{"current_steps": 62840, "total_steps": 78105, "loss": 0.1423, "lr": 5.596430905696107e-07, "epoch": 4.022789834197555, "percentage": 80.46, "elapsed_time": "2:45:56", "remaining_time": "0:40:18", "throughput": 19863.71, "total_tokens": 197777472} +{"current_steps": 62845, "total_steps": 78105, "loss": 0.1156, "lr": 5.592908767453947e-07, "epoch": 4.023109916138531, "percentage": 80.46, "elapsed_time": "2:45:57", "remaining_time": "0:40:17", "throughput": 19863.93, "total_tokens": 197793152} +{"current_steps": 62850, "total_steps": 78105, "loss": 0.1107, "lr": 5.589387598300832e-07, "epoch": 4.023429998079508, "percentage": 80.47, "elapsed_time": "2:45:58", "remaining_time": "0:40:17", "throughput": 19864.12, "total_tokens": 197808256} +{"current_steps": 62855, "total_steps": 78105, "loss": 0.1397, "lr": 5.585867398412598e-07, "epoch": 4.023750080020485, "percentage": 80.48, "elapsed_time": "2:45:58", "remaining_time": "0:40:16", "throughput": 19864.33, "total_tokens": 197823616} +{"current_steps": 62860, "total_steps": 78105, "loss": 0.1653, "lr": 5.582348167965021e-07, "epoch": 4.024070161961462, "percentage": 80.48, "elapsed_time": "2:45:59", "remaining_time": "0:40:15", "throughput": 19864.56, "total_tokens": 197839168} +{"current_steps": 62865, "total_steps": 78105, "loss": 0.1231, "lr": 5.578829907133831e-07, "epoch": 4.024390243902439, "percentage": 80.49, "elapsed_time": "2:46:00", "remaining_time": "0:40:14", "throughput": 19864.86, "total_tokens": 197856320} +{"current_steps": 62870, "total_steps": 78105, "loss": 0.1525, "lr": 5.575312616094716e-07, "epoch": 4.024710325843416, "percentage": 80.49, "elapsed_time": "2:46:00", "remaining_time": "0:40:13", "throughput": 19865.05, "total_tokens": 197871104} +{"current_steps": 62875, "total_steps": 78105, "loss": 0.1495, "lr": 5.57179629502331e-07, "epoch": 4.025030407784393, "percentage": 80.5, "elapsed_time": "2:46:01", "remaining_time": "0:40:12", "throughput": 19865.29, "total_tokens": 197887104} +{"current_steps": 62880, "total_steps": 78105, "loss": 0.1425, "lr": 5.568280944095192e-07, "epoch": 4.02535048972537, "percentage": 80.51, "elapsed_time": "2:46:02", "remaining_time": "0:40:12", "throughput": 19865.49, "total_tokens": 197902144} +{"current_steps": 62885, "total_steps": 78105, "loss": 0.0992, "lr": 5.564766563485921e-07, "epoch": 4.025670571666347, "percentage": 80.51, "elapsed_time": "2:46:02", "remaining_time": "0:40:11", "throughput": 19865.73, "total_tokens": 197918208} +{"current_steps": 62890, "total_steps": 78105, "loss": 0.1317, "lr": 5.56125315337096e-07, "epoch": 4.025990653607323, "percentage": 80.52, "elapsed_time": "2:46:03", "remaining_time": "0:40:10", "throughput": 19865.95, "total_tokens": 197933504} +{"current_steps": 62895, "total_steps": 78105, "loss": 0.1354, "lr": 5.557740713925771e-07, "epoch": 4.0263107355483, "percentage": 80.53, "elapsed_time": "2:46:04", "remaining_time": "0:40:09", "throughput": 19866.18, "total_tokens": 197949248} +{"current_steps": 62900, "total_steps": 78105, "loss": 0.1555, "lr": 5.55422924532574e-07, "epoch": 4.026630817489277, "percentage": 80.53, "elapsed_time": "2:46:04", "remaining_time": "0:40:08", "throughput": 19866.39, "total_tokens": 197964672} +{"current_steps": 62905, "total_steps": 78105, "loss": 0.0964, "lr": 5.550718747746203e-07, "epoch": 4.026950899430254, "percentage": 80.54, "elapsed_time": "2:46:05", "remaining_time": "0:40:08", "throughput": 19866.62, "total_tokens": 197980544} +{"current_steps": 62910, "total_steps": 78105, "loss": 0.0851, "lr": 5.547209221362479e-07, "epoch": 4.027270981371231, "percentage": 80.55, "elapsed_time": "2:46:06", "remaining_time": "0:40:07", "throughput": 19866.82, "total_tokens": 197995520} +{"current_steps": 62915, "total_steps": 78105, "loss": 0.1104, "lr": 5.543700666349785e-07, "epoch": 4.027591063312208, "percentage": 80.55, "elapsed_time": "2:46:06", "remaining_time": "0:40:06", "throughput": 19867.17, "total_tokens": 198014272} +{"current_steps": 62920, "total_steps": 78105, "loss": 0.0735, "lr": 5.54019308288335e-07, "epoch": 4.027911145253185, "percentage": 80.56, "elapsed_time": "2:46:07", "remaining_time": "0:40:05", "throughput": 19867.35, "total_tokens": 198029248} +{"current_steps": 62925, "total_steps": 78105, "loss": 0.1291, "lr": 5.536686471138298e-07, "epoch": 4.028231227194162, "percentage": 80.56, "elapsed_time": "2:46:08", "remaining_time": "0:40:04", "throughput": 19867.56, "total_tokens": 198044224} +{"current_steps": 62930, "total_steps": 78105, "loss": 0.1579, "lr": 5.533180831289748e-07, "epoch": 4.028551309135139, "percentage": 80.57, "elapsed_time": "2:46:08", "remaining_time": "0:40:03", "throughput": 19867.81, "total_tokens": 198060736} +{"current_steps": 62935, "total_steps": 78105, "loss": 0.1227, "lr": 5.529676163512748e-07, "epoch": 4.028871391076115, "percentage": 80.58, "elapsed_time": "2:46:09", "remaining_time": "0:40:03", "throughput": 19868.05, "total_tokens": 198076672} +{"current_steps": 62940, "total_steps": 78105, "loss": 0.1297, "lr": 5.526172467982305e-07, "epoch": 4.029191473017092, "percentage": 80.58, "elapsed_time": "2:46:10", "remaining_time": "0:40:02", "throughput": 19868.31, "total_tokens": 198093312} +{"current_steps": 62945, "total_steps": 78105, "loss": 0.1562, "lr": 5.522669744873372e-07, "epoch": 4.029511554958069, "percentage": 80.59, "elapsed_time": "2:46:10", "remaining_time": "0:40:01", "throughput": 19868.5, "total_tokens": 198108032} +{"current_steps": 62950, "total_steps": 78105, "loss": 0.1627, "lr": 5.519167994360858e-07, "epoch": 4.029831636899046, "percentage": 80.6, "elapsed_time": "2:46:11", "remaining_time": "0:40:00", "throughput": 19868.73, "total_tokens": 198123904} +{"current_steps": 62955, "total_steps": 78105, "loss": 0.117, "lr": 5.515667216619617e-07, "epoch": 4.030151718840023, "percentage": 80.6, "elapsed_time": "2:46:12", "remaining_time": "0:39:59", "throughput": 19868.98, "total_tokens": 198140160} +{"current_steps": 62960, "total_steps": 78105, "loss": 0.0972, "lr": 5.51216741182447e-07, "epoch": 4.030471800781, "percentage": 80.61, "elapsed_time": "2:46:13", "remaining_time": "0:39:59", "throughput": 19869.23, "total_tokens": 198156544} +{"current_steps": 62965, "total_steps": 78105, "loss": 0.0985, "lr": 5.508668580150175e-07, "epoch": 4.030791882721977, "percentage": 80.62, "elapsed_time": "2:46:13", "remaining_time": "0:39:58", "throughput": 19869.5, "total_tokens": 198173504} +{"current_steps": 62970, "total_steps": 78105, "loss": 0.1219, "lr": 5.505170721771447e-07, "epoch": 4.031111964662954, "percentage": 80.62, "elapsed_time": "2:46:14", "remaining_time": "0:39:57", "throughput": 19869.71, "total_tokens": 198188416} +{"current_steps": 62975, "total_steps": 78105, "loss": 0.1055, "lr": 5.501673836862945e-07, "epoch": 4.03143204660393, "percentage": 80.63, "elapsed_time": "2:46:15", "remaining_time": "0:39:56", "throughput": 19869.89, "total_tokens": 198202944} +{"current_steps": 62980, "total_steps": 78105, "loss": 0.1133, "lr": 5.498177925599291e-07, "epoch": 4.031752128544907, "percentage": 80.64, "elapsed_time": "2:46:15", "remaining_time": "0:39:55", "throughput": 19870.09, "total_tokens": 198218240} +{"current_steps": 62985, "total_steps": 78105, "loss": 0.105, "lr": 5.494682988155048e-07, "epoch": 4.032072210485884, "percentage": 80.64, "elapsed_time": "2:46:16", "remaining_time": "0:39:54", "throughput": 19870.33, "total_tokens": 198233984} +{"current_steps": 62990, "total_steps": 78105, "loss": 0.065, "lr": 5.491189024704729e-07, "epoch": 4.032392292426861, "percentage": 80.65, "elapsed_time": "2:46:17", "remaining_time": "0:39:54", "throughput": 19870.52, "total_tokens": 198248640} +{"current_steps": 62995, "total_steps": 78105, "loss": 0.1014, "lr": 5.487696035422829e-07, "epoch": 4.032712374367838, "percentage": 80.65, "elapsed_time": "2:46:17", "remaining_time": "0:39:53", "throughput": 19870.74, "total_tokens": 198264256} +{"current_steps": 63000, "total_steps": 78105, "loss": 0.0645, "lr": 5.484204020483739e-07, "epoch": 4.033032456308815, "percentage": 80.66, "elapsed_time": "2:46:18", "remaining_time": "0:39:52", "throughput": 19870.95, "total_tokens": 198279552} +{"current_steps": 63005, "total_steps": 78105, "loss": 0.1114, "lr": 5.480712980061858e-07, "epoch": 4.033352538249792, "percentage": 80.67, "elapsed_time": "2:46:19", "remaining_time": "0:39:51", "throughput": 19871.21, "total_tokens": 198296192} +{"current_steps": 63010, "total_steps": 78105, "loss": 0.1338, "lr": 5.477222914331487e-07, "epoch": 4.033672620190769, "percentage": 80.67, "elapsed_time": "2:46:19", "remaining_time": "0:39:50", "throughput": 19871.41, "total_tokens": 198310976} +{"current_steps": 63015, "total_steps": 78105, "loss": 0.0964, "lr": 5.47373382346692e-07, "epoch": 4.033992702131746, "percentage": 80.68, "elapsed_time": "2:46:20", "remaining_time": "0:39:49", "throughput": 19871.6, "total_tokens": 198325824} +{"current_steps": 63020, "total_steps": 78105, "loss": 0.1392, "lr": 5.470245707642375e-07, "epoch": 4.034312784072722, "percentage": 80.69, "elapsed_time": "2:46:21", "remaining_time": "0:39:49", "throughput": 19871.81, "total_tokens": 198341248} +{"current_steps": 63025, "total_steps": 78105, "loss": 0.1401, "lr": 5.466758567032027e-07, "epoch": 4.034632866013699, "percentage": 80.69, "elapsed_time": "2:46:21", "remaining_time": "0:39:48", "throughput": 19872.01, "total_tokens": 198356544} +{"current_steps": 63030, "total_steps": 78105, "loss": 0.1066, "lr": 5.463272401810025e-07, "epoch": 4.034952947954676, "percentage": 80.7, "elapsed_time": "2:46:22", "remaining_time": "0:39:47", "throughput": 19872.24, "total_tokens": 198372416} +{"current_steps": 63035, "total_steps": 78105, "loss": 0.1044, "lr": 5.459787212150422e-07, "epoch": 4.035273029895653, "percentage": 80.71, "elapsed_time": "2:46:23", "remaining_time": "0:39:46", "throughput": 19872.48, "total_tokens": 198388096} +{"current_steps": 63040, "total_steps": 78105, "loss": 0.1113, "lr": 5.456302998227273e-07, "epoch": 4.03559311183663, "percentage": 80.71, "elapsed_time": "2:46:23", "remaining_time": "0:39:45", "throughput": 19872.68, "total_tokens": 198403136} +{"current_steps": 63045, "total_steps": 78105, "loss": 0.1056, "lr": 5.452819760214551e-07, "epoch": 4.035913193777607, "percentage": 80.72, "elapsed_time": "2:46:24", "remaining_time": "0:39:45", "throughput": 19872.9, "total_tokens": 198418752} +{"current_steps": 63050, "total_steps": 78105, "loss": 0.1626, "lr": 5.449337498286195e-07, "epoch": 4.036233275718584, "percentage": 80.72, "elapsed_time": "2:46:25", "remaining_time": "0:39:44", "throughput": 19873.12, "total_tokens": 198434240} +{"current_steps": 63055, "total_steps": 78105, "loss": 0.1379, "lr": 5.445856212616085e-07, "epoch": 4.036553357659561, "percentage": 80.73, "elapsed_time": "2:46:25", "remaining_time": "0:39:43", "throughput": 19873.34, "total_tokens": 198449920} +{"current_steps": 63060, "total_steps": 78105, "loss": 0.1009, "lr": 5.442375903378064e-07, "epoch": 4.036873439600538, "percentage": 80.74, "elapsed_time": "2:46:26", "remaining_time": "0:39:42", "throughput": 19873.51, "total_tokens": 198464576} +{"current_steps": 63065, "total_steps": 78105, "loss": 0.21, "lr": 5.438896570745919e-07, "epoch": 4.037193521541514, "percentage": 80.74, "elapsed_time": "2:46:27", "remaining_time": "0:39:41", "throughput": 19873.73, "total_tokens": 198480192} +{"current_steps": 63070, "total_steps": 78105, "loss": 0.1761, "lr": 5.435418214893387e-07, "epoch": 4.037513603482491, "percentage": 80.75, "elapsed_time": "2:46:27", "remaining_time": "0:39:40", "throughput": 19874.02, "total_tokens": 198497408} +{"current_steps": 63075, "total_steps": 78105, "loss": 0.118, "lr": 5.431940835994156e-07, "epoch": 4.037833685423468, "percentage": 80.76, "elapsed_time": "2:46:28", "remaining_time": "0:39:40", "throughput": 19874.19, "total_tokens": 198511808} +{"current_steps": 63080, "total_steps": 78105, "loss": 0.1109, "lr": 5.42846443422188e-07, "epoch": 4.038153767364445, "percentage": 80.76, "elapsed_time": "2:46:29", "remaining_time": "0:39:39", "throughput": 19874.45, "total_tokens": 198528128} +{"current_steps": 63085, "total_steps": 78105, "loss": 0.1732, "lr": 5.424989009750145e-07, "epoch": 4.038473849305422, "percentage": 80.77, "elapsed_time": "2:46:29", "remaining_time": "0:39:38", "throughput": 19874.66, "total_tokens": 198543424} +{"current_steps": 63090, "total_steps": 78105, "loss": 0.085, "lr": 5.421514562752498e-07, "epoch": 4.038793931246399, "percentage": 80.78, "elapsed_time": "2:46:30", "remaining_time": "0:39:37", "throughput": 19874.83, "total_tokens": 198558144} +{"current_steps": 63095, "total_steps": 78105, "loss": 0.1585, "lr": 5.418041093402432e-07, "epoch": 4.039114013187376, "percentage": 80.78, "elapsed_time": "2:46:31", "remaining_time": "0:39:36", "throughput": 19875.06, "total_tokens": 198573760} +{"current_steps": 63100, "total_steps": 78105, "loss": 0.1162, "lr": 5.414568601873396e-07, "epoch": 4.039434095128353, "percentage": 80.79, "elapsed_time": "2:46:31", "remaining_time": "0:39:36", "throughput": 19875.24, "total_tokens": 198588224} +{"current_steps": 63105, "total_steps": 78105, "loss": 0.2928, "lr": 5.411097088338785e-07, "epoch": 4.03975417706933, "percentage": 80.8, "elapsed_time": "2:46:32", "remaining_time": "0:39:35", "throughput": 19875.52, "total_tokens": 198605312} +{"current_steps": 63110, "total_steps": 78105, "loss": 0.1135, "lr": 5.407626552971946e-07, "epoch": 4.040074259010306, "percentage": 80.8, "elapsed_time": "2:46:33", "remaining_time": "0:39:34", "throughput": 19875.73, "total_tokens": 198621248} +{"current_steps": 63115, "total_steps": 78105, "loss": 0.1772, "lr": 5.404156995946197e-07, "epoch": 4.040394340951283, "percentage": 80.81, "elapsed_time": "2:46:33", "remaining_time": "0:39:33", "throughput": 19875.98, "total_tokens": 198637440} +{"current_steps": 63120, "total_steps": 78105, "loss": 0.1699, "lr": 5.400688417434763e-07, "epoch": 4.04071442289226, "percentage": 80.81, "elapsed_time": "2:46:34", "remaining_time": "0:39:32", "throughput": 19876.17, "total_tokens": 198652608} +{"current_steps": 63125, "total_steps": 78105, "loss": 0.1354, "lr": 5.397220817610866e-07, "epoch": 4.041034504833237, "percentage": 80.82, "elapsed_time": "2:46:35", "remaining_time": "0:39:31", "throughput": 19876.42, "total_tokens": 198668992} +{"current_steps": 63130, "total_steps": 78105, "loss": 0.1053, "lr": 5.393754196647655e-07, "epoch": 4.041354586774214, "percentage": 80.83, "elapsed_time": "2:46:35", "remaining_time": "0:39:31", "throughput": 19876.62, "total_tokens": 198684160} +{"current_steps": 63135, "total_steps": 78105, "loss": 0.134, "lr": 5.390288554718229e-07, "epoch": 4.041674668715191, "percentage": 80.83, "elapsed_time": "2:46:36", "remaining_time": "0:39:30", "throughput": 19876.85, "total_tokens": 198700160} +{"current_steps": 63140, "total_steps": 78105, "loss": 0.1397, "lr": 5.38682389199566e-07, "epoch": 4.041994750656168, "percentage": 80.84, "elapsed_time": "2:46:37", "remaining_time": "0:39:29", "throughput": 19877.05, "total_tokens": 198715328} +{"current_steps": 63145, "total_steps": 78105, "loss": 0.1005, "lr": 5.383360208652933e-07, "epoch": 4.042314832597145, "percentage": 80.85, "elapsed_time": "2:46:37", "remaining_time": "0:39:28", "throughput": 19877.25, "total_tokens": 198730624} +{"current_steps": 63150, "total_steps": 78105, "loss": 0.1437, "lr": 5.37989750486303e-07, "epoch": 4.042634914538122, "percentage": 80.85, "elapsed_time": "2:46:38", "remaining_time": "0:39:27", "throughput": 19877.44, "total_tokens": 198745280} +{"current_steps": 63155, "total_steps": 78105, "loss": 0.1493, "lr": 5.376435780798836e-07, "epoch": 4.042954996479098, "percentage": 80.86, "elapsed_time": "2:46:39", "remaining_time": "0:39:27", "throughput": 19877.71, "total_tokens": 198761792} +{"current_steps": 63160, "total_steps": 78105, "loss": 0.1642, "lr": 5.372975036633229e-07, "epoch": 4.043275078420075, "percentage": 80.87, "elapsed_time": "2:46:39", "remaining_time": "0:39:26", "throughput": 19877.99, "total_tokens": 198778560} +{"current_steps": 63165, "total_steps": 78105, "loss": 0.0871, "lr": 5.369515272539017e-07, "epoch": 4.043595160361052, "percentage": 80.87, "elapsed_time": "2:46:40", "remaining_time": "0:39:25", "throughput": 19878.21, "total_tokens": 198793792} +{"current_steps": 63170, "total_steps": 78105, "loss": 0.1275, "lr": 5.366056488688958e-07, "epoch": 4.043915242302029, "percentage": 80.88, "elapsed_time": "2:46:41", "remaining_time": "0:39:24", "throughput": 19878.46, "total_tokens": 198810304} +{"current_steps": 63175, "total_steps": 78105, "loss": 0.1562, "lr": 5.362598685255771e-07, "epoch": 4.044235324243006, "percentage": 80.88, "elapsed_time": "2:46:41", "remaining_time": "0:39:23", "throughput": 19878.69, "total_tokens": 198826240} +{"current_steps": 63180, "total_steps": 78105, "loss": 0.1165, "lr": 5.359141862412118e-07, "epoch": 4.044555406183983, "percentage": 80.89, "elapsed_time": "2:46:42", "remaining_time": "0:39:22", "throughput": 19878.89, "total_tokens": 198841600} +{"current_steps": 63185, "total_steps": 78105, "loss": 0.1027, "lr": 5.355686020330608e-07, "epoch": 4.04487548812496, "percentage": 80.9, "elapsed_time": "2:46:43", "remaining_time": "0:39:22", "throughput": 19879.12, "total_tokens": 198857280} +{"current_steps": 63190, "total_steps": 78105, "loss": 0.1533, "lr": 5.352231159183821e-07, "epoch": 4.045195570065937, "percentage": 80.9, "elapsed_time": "2:46:44", "remaining_time": "0:39:21", "throughput": 19879.43, "total_tokens": 198875008} +{"current_steps": 63195, "total_steps": 78105, "loss": 0.1865, "lr": 5.348777279144269e-07, "epoch": 4.045515652006914, "percentage": 80.91, "elapsed_time": "2:46:44", "remaining_time": "0:39:20", "throughput": 19879.62, "total_tokens": 198889792} +{"current_steps": 63200, "total_steps": 78105, "loss": 0.1483, "lr": 5.34532438038442e-07, "epoch": 4.04583573394789, "percentage": 80.92, "elapsed_time": "2:46:45", "remaining_time": "0:39:19", "throughput": 19879.81, "total_tokens": 198904384} +{"current_steps": 63205, "total_steps": 78105, "loss": 0.1277, "lr": 5.341872463076695e-07, "epoch": 4.046155815888867, "percentage": 80.92, "elapsed_time": "2:46:46", "remaining_time": "0:39:18", "throughput": 19880.07, "total_tokens": 198921152} +{"current_steps": 63210, "total_steps": 78105, "loss": 0.1328, "lr": 5.338421527393461e-07, "epoch": 4.046475897829844, "percentage": 80.93, "elapsed_time": "2:46:46", "remaining_time": "0:39:18", "throughput": 19880.4, "total_tokens": 198939264} +{"current_steps": 63215, "total_steps": 78105, "loss": 0.1319, "lr": 5.334971573507045e-07, "epoch": 4.046795979770821, "percentage": 80.94, "elapsed_time": "2:46:47", "remaining_time": "0:39:17", "throughput": 19880.66, "total_tokens": 198955584} +{"current_steps": 63220, "total_steps": 78105, "loss": 0.1289, "lr": 5.331522601589709e-07, "epoch": 4.047116061711798, "percentage": 80.94, "elapsed_time": "2:46:48", "remaining_time": "0:39:16", "throughput": 19880.88, "total_tokens": 198970880} +{"current_steps": 63225, "total_steps": 78105, "loss": 0.118, "lr": 5.328074611813699e-07, "epoch": 4.047436143652775, "percentage": 80.95, "elapsed_time": "2:46:48", "remaining_time": "0:39:15", "throughput": 19881.11, "total_tokens": 198986624} +{"current_steps": 63230, "total_steps": 78105, "loss": 0.1392, "lr": 5.324627604351163e-07, "epoch": 4.047756225593752, "percentage": 80.96, "elapsed_time": "2:46:49", "remaining_time": "0:39:14", "throughput": 19881.33, "total_tokens": 199002112} +{"current_steps": 63235, "total_steps": 78105, "loss": 0.0864, "lr": 5.321181579374252e-07, "epoch": 4.048076307534729, "percentage": 80.96, "elapsed_time": "2:46:50", "remaining_time": "0:39:13", "throughput": 19881.54, "total_tokens": 199017600} +{"current_steps": 63240, "total_steps": 78105, "loss": 0.1273, "lr": 5.317736537055012e-07, "epoch": 4.048396389475705, "percentage": 80.97, "elapsed_time": "2:46:50", "remaining_time": "0:39:13", "throughput": 19881.77, "total_tokens": 199033536} +{"current_steps": 63245, "total_steps": 78105, "loss": 0.1282, "lr": 5.314292477565499e-07, "epoch": 4.048716471416682, "percentage": 80.97, "elapsed_time": "2:46:51", "remaining_time": "0:39:12", "throughput": 19881.96, "total_tokens": 199048192} +{"current_steps": 63250, "total_steps": 78105, "loss": 0.1739, "lr": 5.310849401077683e-07, "epoch": 4.049036553357659, "percentage": 80.98, "elapsed_time": "2:46:52", "remaining_time": "0:39:11", "throughput": 19882.21, "total_tokens": 199064192} +{"current_steps": 63255, "total_steps": 78105, "loss": 0.1056, "lr": 5.307407307763479e-07, "epoch": 4.049356635298636, "percentage": 80.99, "elapsed_time": "2:46:52", "remaining_time": "0:39:10", "throughput": 19882.44, "total_tokens": 199080256} +{"current_steps": 63260, "total_steps": 78105, "loss": 0.1282, "lr": 5.303966197794797e-07, "epoch": 4.049676717239613, "percentage": 80.99, "elapsed_time": "2:46:53", "remaining_time": "0:39:09", "throughput": 19882.67, "total_tokens": 199096192} +{"current_steps": 63265, "total_steps": 78105, "loss": 0.1329, "lr": 5.300526071343434e-07, "epoch": 4.04999679918059, "percentage": 81.0, "elapsed_time": "2:46:54", "remaining_time": "0:39:09", "throughput": 19882.89, "total_tokens": 199112000} +{"current_steps": 63270, "total_steps": 78105, "loss": 0.1424, "lr": 5.297086928581196e-07, "epoch": 4.050316881121567, "percentage": 81.01, "elapsed_time": "2:46:54", "remaining_time": "0:39:08", "throughput": 19883.09, "total_tokens": 199127040} +{"current_steps": 63275, "total_steps": 78105, "loss": 0.1404, "lr": 5.293648769679807e-07, "epoch": 4.050636963062544, "percentage": 81.01, "elapsed_time": "2:46:55", "remaining_time": "0:39:07", "throughput": 19883.32, "total_tokens": 199142720} +{"current_steps": 63280, "total_steps": 78105, "loss": 0.1317, "lr": 5.290211594810954e-07, "epoch": 4.050957045003521, "percentage": 81.02, "elapsed_time": "2:46:56", "remaining_time": "0:39:06", "throughput": 19883.56, "total_tokens": 199158848} +{"current_steps": 63285, "total_steps": 78105, "loss": 0.1368, "lr": 5.286775404146266e-07, "epoch": 4.051277126944497, "percentage": 81.03, "elapsed_time": "2:46:56", "remaining_time": "0:39:05", "throughput": 19883.77, "total_tokens": 199173760} +{"current_steps": 63290, "total_steps": 78105, "loss": 0.1028, "lr": 5.283340197857334e-07, "epoch": 4.051597208885474, "percentage": 81.03, "elapsed_time": "2:46:57", "remaining_time": "0:39:04", "throughput": 19883.97, "total_tokens": 199188608} +{"current_steps": 63295, "total_steps": 78105, "loss": 0.1147, "lr": 5.279905976115693e-07, "epoch": 4.051917290826451, "percentage": 81.04, "elapsed_time": "2:46:58", "remaining_time": "0:39:04", "throughput": 19884.15, "total_tokens": 199203136} +{"current_steps": 63300, "total_steps": 78105, "loss": 0.1054, "lr": 5.276472739092828e-07, "epoch": 4.052237372767428, "percentage": 81.04, "elapsed_time": "2:46:58", "remaining_time": "0:39:03", "throughput": 19884.38, "total_tokens": 199219008} +{"current_steps": 63305, "total_steps": 78105, "loss": 0.1534, "lr": 5.273040486960171e-07, "epoch": 4.052557454708405, "percentage": 81.05, "elapsed_time": "2:46:59", "remaining_time": "0:39:02", "throughput": 19884.59, "total_tokens": 199234624} +{"current_steps": 63310, "total_steps": 78105, "loss": 0.1307, "lr": 5.269609219889127e-07, "epoch": 4.052877536649382, "percentage": 81.06, "elapsed_time": "2:47:00", "remaining_time": "0:39:01", "throughput": 19884.78, "total_tokens": 199249408} +{"current_steps": 63315, "total_steps": 78105, "loss": 0.1606, "lr": 5.266178938051026e-07, "epoch": 4.053197618590359, "percentage": 81.06, "elapsed_time": "2:47:00", "remaining_time": "0:39:00", "throughput": 19884.96, "total_tokens": 199263872} +{"current_steps": 63320, "total_steps": 78105, "loss": 0.1286, "lr": 5.262749641617157e-07, "epoch": 4.053517700531336, "percentage": 81.07, "elapsed_time": "2:47:01", "remaining_time": "0:38:59", "throughput": 19885.18, "total_tokens": 199279936} +{"current_steps": 63325, "total_steps": 78105, "loss": 0.1539, "lr": 5.259321330758766e-07, "epoch": 4.053837782472313, "percentage": 81.08, "elapsed_time": "2:47:02", "remaining_time": "0:38:59", "throughput": 19885.4, "total_tokens": 199295552} +{"current_steps": 63330, "total_steps": 78105, "loss": 0.0866, "lr": 5.255894005647033e-07, "epoch": 4.054157864413289, "percentage": 81.08, "elapsed_time": "2:47:02", "remaining_time": "0:38:58", "throughput": 19885.6, "total_tokens": 199311104} +{"current_steps": 63335, "total_steps": 78105, "loss": 0.113, "lr": 5.252467666453126e-07, "epoch": 4.054477946354266, "percentage": 81.09, "elapsed_time": "2:47:03", "remaining_time": "0:38:57", "throughput": 19885.81, "total_tokens": 199326656} +{"current_steps": 63340, "total_steps": 78105, "loss": 0.0873, "lr": 5.249042313348107e-07, "epoch": 4.054798028295243, "percentage": 81.1, "elapsed_time": "2:47:04", "remaining_time": "0:38:56", "throughput": 19886.01, "total_tokens": 199341760} +{"current_steps": 63345, "total_steps": 78105, "loss": 0.1428, "lr": 5.245617946503051e-07, "epoch": 4.05511811023622, "percentage": 81.1, "elapsed_time": "2:47:04", "remaining_time": "0:38:55", "throughput": 19886.2, "total_tokens": 199356480} +{"current_steps": 63350, "total_steps": 78105, "loss": 0.1471, "lr": 5.242194566088923e-07, "epoch": 4.055438192177197, "percentage": 81.11, "elapsed_time": "2:47:05", "remaining_time": "0:38:55", "throughput": 19886.41, "total_tokens": 199371712} +{"current_steps": 63355, "total_steps": 78105, "loss": 0.138, "lr": 5.238772172276693e-07, "epoch": 4.055758274118174, "percentage": 81.12, "elapsed_time": "2:47:06", "remaining_time": "0:38:54", "throughput": 19886.61, "total_tokens": 199387200} +{"current_steps": 63360, "total_steps": 78105, "loss": 0.1257, "lr": 5.235350765237246e-07, "epoch": 4.056078356059151, "percentage": 81.12, "elapsed_time": "2:47:06", "remaining_time": "0:38:53", "throughput": 19886.86, "total_tokens": 199403776} +{"current_steps": 63365, "total_steps": 78105, "loss": 0.1356, "lr": 5.231930345141431e-07, "epoch": 4.056398438000128, "percentage": 81.13, "elapsed_time": "2:47:07", "remaining_time": "0:38:52", "throughput": 19887.07, "total_tokens": 199419200} +{"current_steps": 63370, "total_steps": 78105, "loss": 0.1023, "lr": 5.228510912160048e-07, "epoch": 4.056718519941105, "percentage": 81.13, "elapsed_time": "2:47:08", "remaining_time": "0:38:51", "throughput": 19887.3, "total_tokens": 199434752} +{"current_steps": 63375, "total_steps": 78105, "loss": 0.1009, "lr": 5.225092466463838e-07, "epoch": 4.057038601882081, "percentage": 81.14, "elapsed_time": "2:47:08", "remaining_time": "0:38:50", "throughput": 19887.53, "total_tokens": 199450816} +{"current_steps": 63380, "total_steps": 78105, "loss": 0.1229, "lr": 5.221675008223517e-07, "epoch": 4.057358683823058, "percentage": 81.15, "elapsed_time": "2:47:09", "remaining_time": "0:38:50", "throughput": 19887.74, "total_tokens": 199466496} +{"current_steps": 63385, "total_steps": 78105, "loss": 0.0859, "lr": 5.218258537609713e-07, "epoch": 4.057678765764035, "percentage": 81.15, "elapsed_time": "2:47:10", "remaining_time": "0:38:49", "throughput": 19887.96, "total_tokens": 199482304} +{"current_steps": 63390, "total_steps": 78105, "loss": 0.0956, "lr": 5.214843054793045e-07, "epoch": 4.057998847705012, "percentage": 81.16, "elapsed_time": "2:47:10", "remaining_time": "0:38:48", "throughput": 19888.19, "total_tokens": 199498368} +{"current_steps": 63395, "total_steps": 78105, "loss": 0.0836, "lr": 5.21142855994406e-07, "epoch": 4.058318929645989, "percentage": 81.17, "elapsed_time": "2:47:11", "remaining_time": "0:38:47", "throughput": 19888.43, "total_tokens": 199514304} +{"current_steps": 63400, "total_steps": 78105, "loss": 0.1224, "lr": 5.208015053233253e-07, "epoch": 4.058639011586966, "percentage": 81.17, "elapsed_time": "2:47:12", "remaining_time": "0:38:46", "throughput": 19888.66, "total_tokens": 199530048} +{"current_steps": 63405, "total_steps": 78105, "loss": 0.1053, "lr": 5.204602534831085e-07, "epoch": 4.058959093527943, "percentage": 81.18, "elapsed_time": "2:47:12", "remaining_time": "0:38:46", "throughput": 19888.82, "total_tokens": 199544512} +{"current_steps": 63410, "total_steps": 78105, "loss": 0.1026, "lr": 5.201191004907955e-07, "epoch": 4.05927917546892, "percentage": 81.19, "elapsed_time": "2:47:13", "remaining_time": "0:38:45", "throughput": 19889.02, "total_tokens": 199559616} +{"current_steps": 63415, "total_steps": 78105, "loss": 0.1402, "lr": 5.197780463634211e-07, "epoch": 4.059599257409897, "percentage": 81.19, "elapsed_time": "2:47:14", "remaining_time": "0:38:44", "throughput": 19889.24, "total_tokens": 199575488} +{"current_steps": 63420, "total_steps": 78105, "loss": 0.1276, "lr": 5.194370911180177e-07, "epoch": 4.0599193393508735, "percentage": 81.2, "elapsed_time": "2:47:15", "remaining_time": "0:38:43", "throughput": 19889.46, "total_tokens": 199591232} +{"current_steps": 63425, "total_steps": 78105, "loss": 0.1628, "lr": 5.190962347716086e-07, "epoch": 4.0602394212918504, "percentage": 81.2, "elapsed_time": "2:47:15", "remaining_time": "0:38:42", "throughput": 19889.71, "total_tokens": 199607232} +{"current_steps": 63430, "total_steps": 78105, "loss": 0.1143, "lr": 5.187554773412157e-07, "epoch": 4.060559503232827, "percentage": 81.21, "elapsed_time": "2:47:16", "remaining_time": "0:38:41", "throughput": 19889.88, "total_tokens": 199621760} +{"current_steps": 63435, "total_steps": 78105, "loss": 0.0886, "lr": 5.184148188438545e-07, "epoch": 4.060879585173804, "percentage": 81.22, "elapsed_time": "2:47:17", "remaining_time": "0:38:41", "throughput": 19890.14, "total_tokens": 199638016} +{"current_steps": 63440, "total_steps": 78105, "loss": 0.1261, "lr": 5.180742592965354e-07, "epoch": 4.061199667114781, "percentage": 81.22, "elapsed_time": "2:47:17", "remaining_time": "0:38:40", "throughput": 19890.36, "total_tokens": 199653376} +{"current_steps": 63445, "total_steps": 78105, "loss": 0.1589, "lr": 5.177337987162645e-07, "epoch": 4.061519749055758, "percentage": 81.23, "elapsed_time": "2:47:18", "remaining_time": "0:38:39", "throughput": 19890.71, "total_tokens": 199671680} +{"current_steps": 63450, "total_steps": 78105, "loss": 0.1513, "lr": 5.173934371200418e-07, "epoch": 4.061839830996735, "percentage": 81.24, "elapsed_time": "2:47:19", "remaining_time": "0:38:38", "throughput": 19891.0, "total_tokens": 199689408} +{"current_steps": 63455, "total_steps": 78105, "loss": 0.1684, "lr": 5.17053174524865e-07, "epoch": 4.062159912937712, "percentage": 81.24, "elapsed_time": "2:47:19", "remaining_time": "0:38:37", "throughput": 19891.2, "total_tokens": 199704576} +{"current_steps": 63460, "total_steps": 78105, "loss": 0.1274, "lr": 5.167130109477228e-07, "epoch": 4.062479994878689, "percentage": 81.25, "elapsed_time": "2:47:20", "remaining_time": "0:38:37", "throughput": 19891.55, "total_tokens": 199723200} +{"current_steps": 63465, "total_steps": 78105, "loss": 0.0847, "lr": 5.163729464056033e-07, "epoch": 4.0628000768196655, "percentage": 81.26, "elapsed_time": "2:47:21", "remaining_time": "0:38:36", "throughput": 19891.71, "total_tokens": 199737344} +{"current_steps": 63470, "total_steps": 78105, "loss": 0.1188, "lr": 5.160329809154854e-07, "epoch": 4.0631201587606425, "percentage": 81.26, "elapsed_time": "2:47:21", "remaining_time": "0:38:35", "throughput": 19891.93, "total_tokens": 199753152} +{"current_steps": 63475, "total_steps": 78105, "loss": 0.1069, "lr": 5.156931144943466e-07, "epoch": 4.0634402407016195, "percentage": 81.27, "elapsed_time": "2:47:22", "remaining_time": "0:38:34", "throughput": 19892.1, "total_tokens": 199767808} +{"current_steps": 63480, "total_steps": 78105, "loss": 0.1627, "lr": 5.153533471591582e-07, "epoch": 4.0637603226425965, "percentage": 81.28, "elapsed_time": "2:47:23", "remaining_time": "0:38:33", "throughput": 19892.27, "total_tokens": 199782080} +{"current_steps": 63485, "total_steps": 78105, "loss": 0.1198, "lr": 5.150136789268853e-07, "epoch": 4.0640804045835734, "percentage": 81.28, "elapsed_time": "2:47:23", "remaining_time": "0:38:33", "throughput": 19892.48, "total_tokens": 199797312} +{"current_steps": 63490, "total_steps": 78105, "loss": 0.2233, "lr": 5.146741098144911e-07, "epoch": 4.06440048652455, "percentage": 81.29, "elapsed_time": "2:47:24", "remaining_time": "0:38:32", "throughput": 19892.68, "total_tokens": 199812544} +{"current_steps": 63495, "total_steps": 78105, "loss": 0.1031, "lr": 5.143346398389292e-07, "epoch": 4.064720568465527, "percentage": 81.29, "elapsed_time": "2:47:25", "remaining_time": "0:38:31", "throughput": 19892.88, "total_tokens": 199827776} +{"current_steps": 63500, "total_steps": 78105, "loss": 0.1403, "lr": 5.139952690171534e-07, "epoch": 4.065040650406504, "percentage": 81.3, "elapsed_time": "2:47:25", "remaining_time": "0:38:30", "throughput": 19893.09, "total_tokens": 199843264} +{"current_steps": 63505, "total_steps": 78105, "loss": 0.116, "lr": 5.13655997366109e-07, "epoch": 4.0653607323474805, "percentage": 81.31, "elapsed_time": "2:47:26", "remaining_time": "0:38:29", "throughput": 19893.3, "total_tokens": 199858752} +{"current_steps": 63510, "total_steps": 78105, "loss": 0.1519, "lr": 5.133168249027376e-07, "epoch": 4.0656808142884575, "percentage": 81.31, "elapsed_time": "2:47:27", "remaining_time": "0:38:28", "throughput": 19893.51, "total_tokens": 199874304} +{"current_steps": 63515, "total_steps": 78105, "loss": 0.1366, "lr": 5.129777516439757e-07, "epoch": 4.0660008962294345, "percentage": 81.32, "elapsed_time": "2:47:27", "remaining_time": "0:38:28", "throughput": 19893.79, "total_tokens": 199890944} +{"current_steps": 63520, "total_steps": 78105, "loss": 0.1564, "lr": 5.126387776067548e-07, "epoch": 4.0663209781704115, "percentage": 81.33, "elapsed_time": "2:47:28", "remaining_time": "0:38:27", "throughput": 19893.98, "total_tokens": 199905920} +{"current_steps": 63525, "total_steps": 78105, "loss": 0.1467, "lr": 5.122999028080017e-07, "epoch": 4.0666410601113885, "percentage": 81.33, "elapsed_time": "2:47:29", "remaining_time": "0:38:26", "throughput": 19894.22, "total_tokens": 199921728} +{"current_steps": 63530, "total_steps": 78105, "loss": 0.132, "lr": 5.119611272646377e-07, "epoch": 4.0669611420523655, "percentage": 81.34, "elapsed_time": "2:47:29", "remaining_time": "0:38:25", "throughput": 19894.41, "total_tokens": 199936896} +{"current_steps": 63535, "total_steps": 78105, "loss": 0.1072, "lr": 5.116224509935791e-07, "epoch": 4.0672812239933425, "percentage": 81.35, "elapsed_time": "2:47:30", "remaining_time": "0:38:24", "throughput": 19894.6, "total_tokens": 199951808} +{"current_steps": 63540, "total_steps": 78105, "loss": 0.1189, "lr": 5.112838740117396e-07, "epoch": 4.0676013059343195, "percentage": 81.35, "elapsed_time": "2:47:31", "remaining_time": "0:38:24", "throughput": 19894.81, "total_tokens": 199967360} +{"current_steps": 63545, "total_steps": 78105, "loss": 0.1474, "lr": 5.109453963360234e-07, "epoch": 4.0679213878752964, "percentage": 81.36, "elapsed_time": "2:47:31", "remaining_time": "0:38:23", "throughput": 19895.04, "total_tokens": 199983360} +{"current_steps": 63550, "total_steps": 78105, "loss": 0.1933, "lr": 5.106070179833339e-07, "epoch": 4.0682414698162725, "percentage": 81.36, "elapsed_time": "2:47:32", "remaining_time": "0:38:22", "throughput": 19895.26, "total_tokens": 199999040} +{"current_steps": 63555, "total_steps": 78105, "loss": 0.1199, "lr": 5.102687389705676e-07, "epoch": 4.0685615517572495, "percentage": 81.37, "elapsed_time": "2:47:33", "remaining_time": "0:38:21", "throughput": 19895.51, "total_tokens": 200015104} +{"current_steps": 63560, "total_steps": 78105, "loss": 0.1405, "lr": 5.099305593146159e-07, "epoch": 4.0688816336982265, "percentage": 81.38, "elapsed_time": "2:47:33", "remaining_time": "0:38:20", "throughput": 19895.74, "total_tokens": 200031168} +{"current_steps": 63565, "total_steps": 78105, "loss": 0.1306, "lr": 5.095924790323672e-07, "epoch": 4.0692017156392035, "percentage": 81.38, "elapsed_time": "2:47:34", "remaining_time": "0:38:19", "throughput": 19895.94, "total_tokens": 200046208} +{"current_steps": 63570, "total_steps": 78105, "loss": 0.1384, "lr": 5.092544981407013e-07, "epoch": 4.0695217975801805, "percentage": 81.39, "elapsed_time": "2:47:35", "remaining_time": "0:38:19", "throughput": 19896.15, "total_tokens": 200061696} +{"current_steps": 63575, "total_steps": 78105, "loss": 0.1234, "lr": 5.089166166564977e-07, "epoch": 4.0698418795211575, "percentage": 81.4, "elapsed_time": "2:47:35", "remaining_time": "0:38:18", "throughput": 19896.35, "total_tokens": 200076992} +{"current_steps": 63580, "total_steps": 78105, "loss": 0.1477, "lr": 5.085788345966255e-07, "epoch": 4.0701619614621345, "percentage": 81.4, "elapsed_time": "2:47:36", "remaining_time": "0:38:17", "throughput": 19896.55, "total_tokens": 200092224} +{"current_steps": 63585, "total_steps": 78105, "loss": 0.1053, "lr": 5.082411519779542e-07, "epoch": 4.0704820434031115, "percentage": 81.41, "elapsed_time": "2:47:37", "remaining_time": "0:38:16", "throughput": 19896.77, "total_tokens": 200107776} +{"current_steps": 63590, "total_steps": 78105, "loss": 0.0908, "lr": 5.079035688173454e-07, "epoch": 4.0708021253440885, "percentage": 81.42, "elapsed_time": "2:47:37", "remaining_time": "0:38:15", "throughput": 19896.99, "total_tokens": 200123136} +{"current_steps": 63595, "total_steps": 78105, "loss": 0.1172, "lr": 5.075660851316555e-07, "epoch": 4.071122207285065, "percentage": 81.42, "elapsed_time": "2:47:38", "remaining_time": "0:38:15", "throughput": 19897.2, "total_tokens": 200138688} +{"current_steps": 63600, "total_steps": 78105, "loss": 0.0586, "lr": 5.072287009377372e-07, "epoch": 4.071442289226042, "percentage": 81.43, "elapsed_time": "2:47:39", "remaining_time": "0:38:14", "throughput": 19897.47, "total_tokens": 200155456} +{"current_steps": 63605, "total_steps": 78105, "loss": 0.1135, "lr": 5.068914162524368e-07, "epoch": 4.071762371167019, "percentage": 81.44, "elapsed_time": "2:47:40", "remaining_time": "0:38:13", "throughput": 19897.65, "total_tokens": 200170560} +{"current_steps": 63610, "total_steps": 78105, "loss": 0.1052, "lr": 5.065542310925992e-07, "epoch": 4.0720824531079955, "percentage": 81.44, "elapsed_time": "2:47:40", "remaining_time": "0:38:12", "throughput": 19897.87, "total_tokens": 200186048} +{"current_steps": 63615, "total_steps": 78105, "loss": 0.1529, "lr": 5.062171454750581e-07, "epoch": 4.0724025350489725, "percentage": 81.45, "elapsed_time": "2:47:41", "remaining_time": "0:38:11", "throughput": 19898.08, "total_tokens": 200201216} +{"current_steps": 63620, "total_steps": 78105, "loss": 0.1382, "lr": 5.058801594166482e-07, "epoch": 4.0727226169899495, "percentage": 81.45, "elapsed_time": "2:47:42", "remaining_time": "0:38:10", "throughput": 19898.28, "total_tokens": 200216576} +{"current_steps": 63625, "total_steps": 78105, "loss": 0.1454, "lr": 5.05543272934196e-07, "epoch": 4.0730426989309265, "percentage": 81.46, "elapsed_time": "2:47:42", "remaining_time": "0:38:10", "throughput": 19898.51, "total_tokens": 200232640} +{"current_steps": 63630, "total_steps": 78105, "loss": 0.1427, "lr": 5.052064860445241e-07, "epoch": 4.0733627808719035, "percentage": 81.47, "elapsed_time": "2:47:43", "remaining_time": "0:38:09", "throughput": 19898.75, "total_tokens": 200249088} +{"current_steps": 63635, "total_steps": 78105, "loss": 0.1248, "lr": 5.048697987644499e-07, "epoch": 4.0736828628128805, "percentage": 81.47, "elapsed_time": "2:47:44", "remaining_time": "0:38:08", "throughput": 19898.95, "total_tokens": 200264320} +{"current_steps": 63640, "total_steps": 78105, "loss": 0.1284, "lr": 5.045332111107856e-07, "epoch": 4.074002944753857, "percentage": 81.48, "elapsed_time": "2:47:44", "remaining_time": "0:38:07", "throughput": 19899.2, "total_tokens": 200280832} +{"current_steps": 63645, "total_steps": 78105, "loss": 0.157, "lr": 5.041967231003383e-07, "epoch": 4.074323026694834, "percentage": 81.49, "elapsed_time": "2:47:45", "remaining_time": "0:38:06", "throughput": 19899.49, "total_tokens": 200298048} +{"current_steps": 63650, "total_steps": 78105, "loss": 0.1335, "lr": 5.038603347499121e-07, "epoch": 4.074643108635811, "percentage": 81.49, "elapsed_time": "2:47:46", "remaining_time": "0:38:06", "throughput": 19899.68, "total_tokens": 200312768} +{"current_steps": 63655, "total_steps": 78105, "loss": 0.1238, "lr": 5.035240460763019e-07, "epoch": 4.074963190576788, "percentage": 81.5, "elapsed_time": "2:47:46", "remaining_time": "0:38:05", "throughput": 19899.9, "total_tokens": 200328512} +{"current_steps": 63660, "total_steps": 78105, "loss": 0.1152, "lr": 5.031878570963019e-07, "epoch": 4.075283272517765, "percentage": 81.51, "elapsed_time": "2:47:47", "remaining_time": "0:38:04", "throughput": 19900.16, "total_tokens": 200345152} +{"current_steps": 63665, "total_steps": 78105, "loss": 0.1495, "lr": 5.028517678266997e-07, "epoch": 4.075603354458742, "percentage": 81.51, "elapsed_time": "2:47:48", "remaining_time": "0:38:03", "throughput": 19900.37, "total_tokens": 200360832} +{"current_steps": 63670, "total_steps": 78105, "loss": 0.1661, "lr": 5.025157782842774e-07, "epoch": 4.0759234363997185, "percentage": 81.52, "elapsed_time": "2:47:48", "remaining_time": "0:38:02", "throughput": 19900.61, "total_tokens": 200376960} +{"current_steps": 63675, "total_steps": 78105, "loss": 0.1434, "lr": 5.021798884858126e-07, "epoch": 4.0762435183406955, "percentage": 81.52, "elapsed_time": "2:47:49", "remaining_time": "0:38:01", "throughput": 19900.8, "total_tokens": 200392064} +{"current_steps": 63680, "total_steps": 78105, "loss": 0.1084, "lr": 5.018440984480775e-07, "epoch": 4.0765636002816725, "percentage": 81.53, "elapsed_time": "2:47:50", "remaining_time": "0:38:01", "throughput": 19901.03, "total_tokens": 200407936} +{"current_steps": 63685, "total_steps": 78105, "loss": 0.1253, "lr": 5.01508408187841e-07, "epoch": 4.076883682222649, "percentage": 81.54, "elapsed_time": "2:47:50", "remaining_time": "0:38:00", "throughput": 19901.27, "total_tokens": 200424192} +{"current_steps": 63690, "total_steps": 78105, "loss": 0.1007, "lr": 5.011728177218636e-07, "epoch": 4.077203764163626, "percentage": 81.54, "elapsed_time": "2:47:51", "remaining_time": "0:37:59", "throughput": 19901.5, "total_tokens": 200440576} +{"current_steps": 63695, "total_steps": 78105, "loss": 0.1397, "lr": 5.008373270669048e-07, "epoch": 4.077523846104603, "percentage": 81.55, "elapsed_time": "2:47:52", "remaining_time": "0:37:58", "throughput": 19901.69, "total_tokens": 200455680} +{"current_steps": 63700, "total_steps": 78105, "loss": 0.1862, "lr": 5.005019362397167e-07, "epoch": 4.07784392804558, "percentage": 81.56, "elapsed_time": "2:47:52", "remaining_time": "0:37:57", "throughput": 19901.95, "total_tokens": 200472064} +{"current_steps": 63705, "total_steps": 78105, "loss": 0.1342, "lr": 5.00166645257047e-07, "epoch": 4.078164009986557, "percentage": 81.56, "elapsed_time": "2:47:53", "remaining_time": "0:37:57", "throughput": 19902.16, "total_tokens": 200487360} +{"current_steps": 63710, "total_steps": 78105, "loss": 0.1402, "lr": 4.998314541356384e-07, "epoch": 4.078484091927534, "percentage": 81.57, "elapsed_time": "2:47:54", "remaining_time": "0:37:56", "throughput": 19902.37, "total_tokens": 200502464} +{"current_steps": 63715, "total_steps": 78105, "loss": 0.122, "lr": 4.994963628922281e-07, "epoch": 4.078804173868511, "percentage": 81.58, "elapsed_time": "2:47:54", "remaining_time": "0:37:55", "throughput": 19902.59, "total_tokens": 200518144} +{"current_steps": 63720, "total_steps": 78105, "loss": 0.1182, "lr": 4.991613715435495e-07, "epoch": 4.079124255809488, "percentage": 81.58, "elapsed_time": "2:47:55", "remaining_time": "0:37:54", "throughput": 19902.78, "total_tokens": 200533312} +{"current_steps": 63725, "total_steps": 78105, "loss": 0.0836, "lr": 4.988264801063291e-07, "epoch": 4.0794443377504646, "percentage": 81.59, "elapsed_time": "2:47:56", "remaining_time": "0:37:53", "throughput": 19903.02, "total_tokens": 200549248} +{"current_steps": 63730, "total_steps": 78105, "loss": 0.112, "lr": 4.984916885972915e-07, "epoch": 4.079764419691441, "percentage": 81.6, "elapsed_time": "2:47:57", "remaining_time": "0:37:52", "throughput": 19903.28, "total_tokens": 200565696} +{"current_steps": 63735, "total_steps": 78105, "loss": 0.1929, "lr": 4.981569970331534e-07, "epoch": 4.080084501632418, "percentage": 81.6, "elapsed_time": "2:47:57", "remaining_time": "0:37:52", "throughput": 19903.52, "total_tokens": 200581696} +{"current_steps": 63740, "total_steps": 78105, "loss": 0.1161, "lr": 4.978224054306274e-07, "epoch": 4.080404583573395, "percentage": 81.61, "elapsed_time": "2:47:58", "remaining_time": "0:37:51", "throughput": 19903.72, "total_tokens": 200597120} +{"current_steps": 63745, "total_steps": 78105, "loss": 0.0953, "lr": 4.974879138064217e-07, "epoch": 4.080724665514372, "percentage": 81.61, "elapsed_time": "2:47:59", "remaining_time": "0:37:50", "throughput": 19903.91, "total_tokens": 200611712} +{"current_steps": 63750, "total_steps": 78105, "loss": 0.0989, "lr": 4.971535221772391e-07, "epoch": 4.081044747455349, "percentage": 81.62, "elapsed_time": "2:47:59", "remaining_time": "0:37:49", "throughput": 19904.1, "total_tokens": 200626560} +{"current_steps": 63755, "total_steps": 78105, "loss": 0.128, "lr": 4.968192305597769e-07, "epoch": 4.081364829396326, "percentage": 81.63, "elapsed_time": "2:48:00", "remaining_time": "0:37:48", "throughput": 19904.31, "total_tokens": 200641600} +{"current_steps": 63760, "total_steps": 78105, "loss": 0.138, "lr": 4.964850389707282e-07, "epoch": 4.081684911337303, "percentage": 81.63, "elapsed_time": "2:48:00", "remaining_time": "0:37:48", "throughput": 19904.49, "total_tokens": 200656192} +{"current_steps": 63765, "total_steps": 78105, "loss": 0.1218, "lr": 4.9615094742678e-07, "epoch": 4.08200499327828, "percentage": 81.64, "elapsed_time": "2:48:01", "remaining_time": "0:37:47", "throughput": 19904.67, "total_tokens": 200670848} +{"current_steps": 63770, "total_steps": 78105, "loss": 0.1446, "lr": 4.958169559446171e-07, "epoch": 4.082325075219256, "percentage": 81.65, "elapsed_time": "2:48:02", "remaining_time": "0:37:46", "throughput": 19904.88, "total_tokens": 200686144} +{"current_steps": 63775, "total_steps": 78105, "loss": 0.1186, "lr": 4.954830645409147e-07, "epoch": 4.082645157160233, "percentage": 81.65, "elapsed_time": "2:48:02", "remaining_time": "0:37:45", "throughput": 19905.07, "total_tokens": 200701248} +{"current_steps": 63780, "total_steps": 78105, "loss": 0.1228, "lr": 4.951492732323479e-07, "epoch": 4.08296523910121, "percentage": 81.66, "elapsed_time": "2:48:03", "remaining_time": "0:37:44", "throughput": 19905.25, "total_tokens": 200715840} +{"current_steps": 63785, "total_steps": 78105, "loss": 0.1434, "lr": 4.948155820355832e-07, "epoch": 4.083285321042187, "percentage": 81.67, "elapsed_time": "2:48:04", "remaining_time": "0:37:43", "throughput": 19905.5, "total_tokens": 200732672} +{"current_steps": 63790, "total_steps": 78105, "loss": 0.1807, "lr": 4.944819909672832e-07, "epoch": 4.083605402983164, "percentage": 81.67, "elapsed_time": "2:48:04", "remaining_time": "0:37:43", "throughput": 19905.69, "total_tokens": 200747456} +{"current_steps": 63795, "total_steps": 78105, "loss": 0.1366, "lr": 4.941485000441074e-07, "epoch": 4.083925484924141, "percentage": 81.68, "elapsed_time": "2:48:05", "remaining_time": "0:37:42", "throughput": 19905.93, "total_tokens": 200763904} +{"current_steps": 63800, "total_steps": 78105, "loss": 0.1214, "lr": 4.938151092827059e-07, "epoch": 4.084245566865118, "percentage": 81.68, "elapsed_time": "2:48:06", "remaining_time": "0:37:41", "throughput": 19906.15, "total_tokens": 200779648} +{"current_steps": 63805, "total_steps": 78105, "loss": 0.1133, "lr": 4.934818186997298e-07, "epoch": 4.084565648806095, "percentage": 81.69, "elapsed_time": "2:48:06", "remaining_time": "0:37:40", "throughput": 19906.36, "total_tokens": 200795072} +{"current_steps": 63810, "total_steps": 78105, "loss": 0.1034, "lr": 4.931486283118183e-07, "epoch": 4.084885730747072, "percentage": 81.7, "elapsed_time": "2:48:07", "remaining_time": "0:37:39", "throughput": 19906.53, "total_tokens": 200809536} +{"current_steps": 63815, "total_steps": 78105, "loss": 0.1396, "lr": 4.92815538135612e-07, "epoch": 4.085205812688048, "percentage": 81.7, "elapsed_time": "2:48:08", "remaining_time": "0:37:39", "throughput": 19906.77, "total_tokens": 200825472} +{"current_steps": 63820, "total_steps": 78105, "loss": 0.0972, "lr": 4.924825481877427e-07, "epoch": 4.085525894629025, "percentage": 81.71, "elapsed_time": "2:48:08", "remaining_time": "0:37:38", "throughput": 19906.99, "total_tokens": 200841088} +{"current_steps": 63825, "total_steps": 78105, "loss": 0.1445, "lr": 4.921496584848379e-07, "epoch": 4.085845976570002, "percentage": 81.72, "elapsed_time": "2:48:09", "remaining_time": "0:37:37", "throughput": 19907.27, "total_tokens": 200857920} +{"current_steps": 63830, "total_steps": 78105, "loss": 0.1206, "lr": 4.918168690435208e-07, "epoch": 4.086166058510979, "percentage": 81.72, "elapsed_time": "2:48:10", "remaining_time": "0:37:36", "throughput": 19907.53, "total_tokens": 200874880} +{"current_steps": 63835, "total_steps": 78105, "loss": 0.1534, "lr": 4.914841798804085e-07, "epoch": 4.086486140451956, "percentage": 81.73, "elapsed_time": "2:48:11", "remaining_time": "0:37:35", "throughput": 19907.72, "total_tokens": 200889536} +{"current_steps": 63840, "total_steps": 78105, "loss": 0.1226, "lr": 4.911515910121153e-07, "epoch": 4.086806222392933, "percentage": 81.74, "elapsed_time": "2:48:11", "remaining_time": "0:37:34", "throughput": 19907.98, "total_tokens": 200906304} +{"current_steps": 63845, "total_steps": 78105, "loss": 0.1881, "lr": 4.908191024552466e-07, "epoch": 4.08712630433391, "percentage": 81.74, "elapsed_time": "2:48:12", "remaining_time": "0:37:34", "throughput": 19908.19, "total_tokens": 200921856} +{"current_steps": 63850, "total_steps": 78105, "loss": 0.1129, "lr": 4.904867142264074e-07, "epoch": 4.087446386274887, "percentage": 81.75, "elapsed_time": "2:48:13", "remaining_time": "0:37:33", "throughput": 19908.39, "total_tokens": 200937088} +{"current_steps": 63855, "total_steps": 78105, "loss": 0.0849, "lr": 4.901544263421945e-07, "epoch": 4.087766468215864, "percentage": 81.76, "elapsed_time": "2:48:13", "remaining_time": "0:37:32", "throughput": 19908.63, "total_tokens": 200953280} +{"current_steps": 63860, "total_steps": 78105, "loss": 0.1147, "lr": 4.898222388192005e-07, "epoch": 4.08808655015684, "percentage": 81.76, "elapsed_time": "2:48:14", "remaining_time": "0:37:31", "throughput": 19908.84, "total_tokens": 200968960} +{"current_steps": 63865, "total_steps": 78105, "loss": 0.1434, "lr": 4.894901516740133e-07, "epoch": 4.088406632097817, "percentage": 81.77, "elapsed_time": "2:48:15", "remaining_time": "0:37:30", "throughput": 19909.07, "total_tokens": 200984704} +{"current_steps": 63870, "total_steps": 78105, "loss": 0.1706, "lr": 4.891581649232155e-07, "epoch": 4.088726714038794, "percentage": 81.77, "elapsed_time": "2:48:15", "remaining_time": "0:37:30", "throughput": 19909.26, "total_tokens": 201000064} +{"current_steps": 63875, "total_steps": 78105, "loss": 0.1149, "lr": 4.888262785833844e-07, "epoch": 4.089046795979771, "percentage": 81.78, "elapsed_time": "2:48:16", "remaining_time": "0:37:29", "throughput": 19909.44, "total_tokens": 201014848} +{"current_steps": 63880, "total_steps": 78105, "loss": 0.1785, "lr": 4.884944926710943e-07, "epoch": 4.089366877920748, "percentage": 81.79, "elapsed_time": "2:48:17", "remaining_time": "0:37:28", "throughput": 19909.66, "total_tokens": 201030464} +{"current_steps": 63885, "total_steps": 78105, "loss": 0.1517, "lr": 4.881628072029105e-07, "epoch": 4.089686959861725, "percentage": 81.79, "elapsed_time": "2:48:17", "remaining_time": "0:37:27", "throughput": 19909.92, "total_tokens": 201047104} +{"current_steps": 63890, "total_steps": 78105, "loss": 0.1125, "lr": 4.878312221953979e-07, "epoch": 4.090007041802702, "percentage": 81.8, "elapsed_time": "2:48:18", "remaining_time": "0:37:26", "throughput": 19910.12, "total_tokens": 201062272} +{"current_steps": 63895, "total_steps": 78105, "loss": 0.1329, "lr": 4.87499737665112e-07, "epoch": 4.090327123743679, "percentage": 81.81, "elapsed_time": "2:48:19", "remaining_time": "0:37:26", "throughput": 19910.53, "total_tokens": 201082624} +{"current_steps": 63900, "total_steps": 78105, "loss": 0.1418, "lr": 4.871683536286068e-07, "epoch": 4.090647205684656, "percentage": 81.81, "elapsed_time": "2:48:19", "remaining_time": "0:37:25", "throughput": 19910.75, "total_tokens": 201098496} +{"current_steps": 63905, "total_steps": 78105, "loss": 0.1341, "lr": 4.868370701024299e-07, "epoch": 4.090967287625632, "percentage": 81.82, "elapsed_time": "2:48:20", "remaining_time": "0:37:24", "throughput": 19910.92, "total_tokens": 201113152} +{"current_steps": 63910, "total_steps": 78105, "loss": 0.0994, "lr": 4.865058871031228e-07, "epoch": 4.091287369566609, "percentage": 81.83, "elapsed_time": "2:48:21", "remaining_time": "0:37:23", "throughput": 19911.19, "total_tokens": 201129728} +{"current_steps": 63915, "total_steps": 78105, "loss": 0.1385, "lr": 4.86174804647225e-07, "epoch": 4.091607451507586, "percentage": 81.83, "elapsed_time": "2:48:21", "remaining_time": "0:37:22", "throughput": 19911.39, "total_tokens": 201144640} +{"current_steps": 63920, "total_steps": 78105, "loss": 0.1163, "lr": 4.858438227512666e-07, "epoch": 4.091927533448563, "percentage": 81.84, "elapsed_time": "2:48:22", "remaining_time": "0:37:21", "throughput": 19911.63, "total_tokens": 201161024} +{"current_steps": 63925, "total_steps": 78105, "loss": 0.1613, "lr": 4.855129414317769e-07, "epoch": 4.09224761538954, "percentage": 81.84, "elapsed_time": "2:48:23", "remaining_time": "0:37:21", "throughput": 19911.85, "total_tokens": 201176512} +{"current_steps": 63930, "total_steps": 78105, "loss": 0.0998, "lr": 4.851821607052779e-07, "epoch": 4.092567697330517, "percentage": 81.85, "elapsed_time": "2:48:23", "remaining_time": "0:37:20", "throughput": 19912.03, "total_tokens": 201191040} +{"current_steps": 63935, "total_steps": 78105, "loss": 0.1349, "lr": 4.84851480588287e-07, "epoch": 4.092887779271494, "percentage": 81.86, "elapsed_time": "2:48:24", "remaining_time": "0:37:19", "throughput": 19912.28, "total_tokens": 201207296} +{"current_steps": 63940, "total_steps": 78105, "loss": 0.0892, "lr": 4.845209010973164e-07, "epoch": 4.093207861212471, "percentage": 81.86, "elapsed_time": "2:48:25", "remaining_time": "0:37:18", "throughput": 19912.5, "total_tokens": 201223104} +{"current_steps": 63945, "total_steps": 78105, "loss": 0.178, "lr": 4.841904222488741e-07, "epoch": 4.093527943153448, "percentage": 81.87, "elapsed_time": "2:48:26", "remaining_time": "0:37:17", "throughput": 19912.76, "total_tokens": 201239296} +{"current_steps": 63950, "total_steps": 78105, "loss": 0.1348, "lr": 4.838600440594618e-07, "epoch": 4.093848025094424, "percentage": 81.88, "elapsed_time": "2:48:26", "remaining_time": "0:37:17", "throughput": 19912.96, "total_tokens": 201254272} +{"current_steps": 63955, "total_steps": 78105, "loss": 0.1422, "lr": 4.835297665455768e-07, "epoch": 4.094168107035401, "percentage": 81.88, "elapsed_time": "2:48:27", "remaining_time": "0:37:16", "throughput": 19913.13, "total_tokens": 201268544} +{"current_steps": 63960, "total_steps": 78105, "loss": 0.317, "lr": 4.831995897237124e-07, "epoch": 4.094488188976378, "percentage": 81.89, "elapsed_time": "2:48:27", "remaining_time": "0:37:15", "throughput": 19913.34, "total_tokens": 201283840} +{"current_steps": 63965, "total_steps": 78105, "loss": 0.0856, "lr": 4.828695136103557e-07, "epoch": 4.094808270917355, "percentage": 81.9, "elapsed_time": "2:48:28", "remaining_time": "0:37:14", "throughput": 19913.57, "total_tokens": 201300096} +{"current_steps": 63970, "total_steps": 78105, "loss": 0.1197, "lr": 4.825395382219883e-07, "epoch": 4.095128352858332, "percentage": 81.9, "elapsed_time": "2:48:29", "remaining_time": "0:37:13", "throughput": 19913.78, "total_tokens": 201315968} +{"current_steps": 63975, "total_steps": 78105, "loss": 0.1351, "lr": 4.822096635750879e-07, "epoch": 4.095448434799309, "percentage": 81.91, "elapsed_time": "2:48:30", "remaining_time": "0:37:12", "throughput": 19914.02, "total_tokens": 201331968} +{"current_steps": 63980, "total_steps": 78105, "loss": 0.1312, "lr": 4.818798896861265e-07, "epoch": 4.095768516740286, "percentage": 81.92, "elapsed_time": "2:48:30", "remaining_time": "0:37:12", "throughput": 19914.33, "total_tokens": 201349760} +{"current_steps": 63985, "total_steps": 78105, "loss": 0.1428, "lr": 4.815502165715713e-07, "epoch": 4.096088598681263, "percentage": 81.92, "elapsed_time": "2:48:31", "remaining_time": "0:37:11", "throughput": 19914.64, "total_tokens": 201367552} +{"current_steps": 63990, "total_steps": 78105, "loss": 0.1233, "lr": 4.812206442478848e-07, "epoch": 4.09640868062224, "percentage": 81.93, "elapsed_time": "2:48:32", "remaining_time": "0:37:10", "throughput": 19914.83, "total_tokens": 201382272} +{"current_steps": 63995, "total_steps": 78105, "loss": 0.1355, "lr": 4.808911727315233e-07, "epoch": 4.096728762563216, "percentage": 81.93, "elapsed_time": "2:48:32", "remaining_time": "0:37:09", "throughput": 19915.07, "total_tokens": 201398336} +{"current_steps": 64000, "total_steps": 78105, "loss": 0.1449, "lr": 4.805618020389405e-07, "epoch": 4.097048844504193, "percentage": 81.94, "elapsed_time": "2:48:33", "remaining_time": "0:37:08", "throughput": 19915.25, "total_tokens": 201412992} +{"current_steps": 64005, "total_steps": 78105, "loss": 0.2353, "lr": 4.802325321865814e-07, "epoch": 4.09736892644517, "percentage": 81.95, "elapsed_time": "2:48:34", "remaining_time": "0:37:08", "throughput": 19915.44, "total_tokens": 201427840} +{"current_steps": 64010, "total_steps": 78105, "loss": 0.1559, "lr": 4.799033631908894e-07, "epoch": 4.097689008386147, "percentage": 81.95, "elapsed_time": "2:48:34", "remaining_time": "0:37:07", "throughput": 19915.68, "total_tokens": 201444096} +{"current_steps": 64015, "total_steps": 78105, "loss": 0.1141, "lr": 4.795742950683013e-07, "epoch": 4.098009090327124, "percentage": 81.96, "elapsed_time": "2:48:35", "remaining_time": "0:37:06", "throughput": 19915.94, "total_tokens": 201460800} +{"current_steps": 64020, "total_steps": 78105, "loss": 0.1633, "lr": 4.792453278352485e-07, "epoch": 4.098329172268101, "percentage": 81.97, "elapsed_time": "2:48:36", "remaining_time": "0:37:05", "throughput": 19916.19, "total_tokens": 201477440} +{"current_steps": 64025, "total_steps": 78105, "loss": 0.1256, "lr": 4.789164615081593e-07, "epoch": 4.098649254209078, "percentage": 81.97, "elapsed_time": "2:48:36", "remaining_time": "0:37:04", "throughput": 19916.4, "total_tokens": 201492992} +{"current_steps": 64030, "total_steps": 78105, "loss": 0.1386, "lr": 4.785876961034533e-07, "epoch": 4.098969336150055, "percentage": 81.98, "elapsed_time": "2:48:37", "remaining_time": "0:37:04", "throughput": 19916.61, "total_tokens": 201508608} +{"current_steps": 64035, "total_steps": 78105, "loss": 0.0883, "lr": 4.782590316375499e-07, "epoch": 4.099289418091031, "percentage": 81.99, "elapsed_time": "2:48:38", "remaining_time": "0:37:03", "throughput": 19916.81, "total_tokens": 201523840} +{"current_steps": 64040, "total_steps": 78105, "loss": 0.2145, "lr": 4.779304681268584e-07, "epoch": 4.099609500032008, "percentage": 81.99, "elapsed_time": "2:48:38", "remaining_time": "0:37:02", "throughput": 19917.08, "total_tokens": 201540672} +{"current_steps": 64045, "total_steps": 78105, "loss": 0.1174, "lr": 4.776020055877872e-07, "epoch": 4.099929581972985, "percentage": 82.0, "elapsed_time": "2:48:39", "remaining_time": "0:37:01", "throughput": 19917.26, "total_tokens": 201555200} +{"current_steps": 64050, "total_steps": 78105, "loss": 0.121, "lr": 4.772736440367379e-07, "epoch": 4.100249663913962, "percentage": 82.0, "elapsed_time": "2:48:40", "remaining_time": "0:37:00", "throughput": 19917.49, "total_tokens": 201570880} +{"current_steps": 64055, "total_steps": 78105, "loss": 0.1114, "lr": 4.769453834901066e-07, "epoch": 4.100569745854939, "percentage": 82.01, "elapsed_time": "2:48:40", "remaining_time": "0:36:59", "throughput": 19917.68, "total_tokens": 201585664} +{"current_steps": 64060, "total_steps": 78105, "loss": 0.1461, "lr": 4.766172239642852e-07, "epoch": 4.100889827795916, "percentage": 82.02, "elapsed_time": "2:48:41", "remaining_time": "0:36:59", "throughput": 19917.91, "total_tokens": 201601792} +{"current_steps": 64065, "total_steps": 78105, "loss": 0.1063, "lr": 4.7628916547566017e-07, "epoch": 4.101209909736893, "percentage": 82.02, "elapsed_time": "2:48:42", "remaining_time": "0:36:58", "throughput": 19918.14, "total_tokens": 201617280} +{"current_steps": 64070, "total_steps": 78105, "loss": 0.1303, "lr": 4.7596120804061314e-07, "epoch": 4.10152999167787, "percentage": 82.03, "elapsed_time": "2:48:42", "remaining_time": "0:36:57", "throughput": 19918.38, "total_tokens": 201633472} +{"current_steps": 64075, "total_steps": 78105, "loss": 0.1477, "lr": 4.756333516755199e-07, "epoch": 4.101850073618847, "percentage": 82.04, "elapsed_time": "2:48:43", "remaining_time": "0:36:56", "throughput": 19918.58, "total_tokens": 201648512} +{"current_steps": 64080, "total_steps": 78105, "loss": 0.1502, "lr": 4.753055963967529e-07, "epoch": 4.102170155559823, "percentage": 82.04, "elapsed_time": "2:48:44", "remaining_time": "0:36:55", "throughput": 19918.84, "total_tokens": 201665152} +{"current_steps": 64085, "total_steps": 78105, "loss": 0.1126, "lr": 4.749779422206785e-07, "epoch": 4.1024902375008, "percentage": 82.05, "elapsed_time": "2:48:45", "remaining_time": "0:36:55", "throughput": 19919.07, "total_tokens": 201680896} +{"current_steps": 64090, "total_steps": 78105, "loss": 0.1152, "lr": 4.746503891636575e-07, "epoch": 4.102810319441777, "percentage": 82.06, "elapsed_time": "2:48:45", "remaining_time": "0:36:54", "throughput": 19919.26, "total_tokens": 201696064} +{"current_steps": 64095, "total_steps": 78105, "loss": 0.1107, "lr": 4.743229372420466e-07, "epoch": 4.103130401382754, "percentage": 82.06, "elapsed_time": "2:48:46", "remaining_time": "0:36:53", "throughput": 19919.46, "total_tokens": 201711104} +{"current_steps": 64100, "total_steps": 78105, "loss": 0.1282, "lr": 4.739955864721968e-07, "epoch": 4.103450483323731, "percentage": 82.07, "elapsed_time": "2:48:47", "remaining_time": "0:36:52", "throughput": 19919.71, "total_tokens": 201727232} +{"current_steps": 64105, "total_steps": 78105, "loss": 0.1236, "lr": 4.7366833687045365e-07, "epoch": 4.103770565264708, "percentage": 82.08, "elapsed_time": "2:48:47", "remaining_time": "0:36:51", "throughput": 19919.92, "total_tokens": 201742464} +{"current_steps": 64110, "total_steps": 78105, "loss": 0.1583, "lr": 4.7334118845316024e-07, "epoch": 4.104090647205685, "percentage": 82.08, "elapsed_time": "2:48:48", "remaining_time": "0:36:50", "throughput": 19920.13, "total_tokens": 201757824} +{"current_steps": 64115, "total_steps": 78105, "loss": 0.1226, "lr": 4.730141412366501e-07, "epoch": 4.104410729146662, "percentage": 82.09, "elapsed_time": "2:48:49", "remaining_time": "0:36:50", "throughput": 19920.34, "total_tokens": 201773440} +{"current_steps": 64120, "total_steps": 78105, "loss": 0.0671, "lr": 4.726871952372569e-07, "epoch": 4.104730811087639, "percentage": 82.09, "elapsed_time": "2:48:49", "remaining_time": "0:36:49", "throughput": 19920.56, "total_tokens": 201789248} +{"current_steps": 64125, "total_steps": 78105, "loss": 0.1372, "lr": 4.7236035047130425e-07, "epoch": 4.105050893028615, "percentage": 82.1, "elapsed_time": "2:48:50", "remaining_time": "0:36:48", "throughput": 19920.77, "total_tokens": 201804928} +{"current_steps": 64130, "total_steps": 78105, "loss": 0.1448, "lr": 4.720336069551143e-07, "epoch": 4.105370974969592, "percentage": 82.11, "elapsed_time": "2:48:51", "remaining_time": "0:36:47", "throughput": 19920.98, "total_tokens": 201820352} +{"current_steps": 64135, "total_steps": 78105, "loss": 0.103, "lr": 4.717069647050029e-07, "epoch": 4.105691056910569, "percentage": 82.11, "elapsed_time": "2:48:51", "remaining_time": "0:36:46", "throughput": 19921.22, "total_tokens": 201836160} +{"current_steps": 64140, "total_steps": 78105, "loss": 0.1144, "lr": 4.7138042373728016e-07, "epoch": 4.106011138851546, "percentage": 82.12, "elapsed_time": "2:48:52", "remaining_time": "0:36:46", "throughput": 19921.47, "total_tokens": 201852736} +{"current_steps": 64145, "total_steps": 78105, "loss": 0.0645, "lr": 4.710539840682538e-07, "epoch": 4.106331220792523, "percentage": 82.13, "elapsed_time": "2:48:53", "remaining_time": "0:36:45", "throughput": 19921.69, "total_tokens": 201868608} +{"current_steps": 64150, "total_steps": 78105, "loss": 0.0904, "lr": 4.7072764571422127e-07, "epoch": 4.1066513027335, "percentage": 82.13, "elapsed_time": "2:48:53", "remaining_time": "0:36:44", "throughput": 19921.91, "total_tokens": 201884864} +{"current_steps": 64155, "total_steps": 78105, "loss": 0.2092, "lr": 4.70401408691481e-07, "epoch": 4.106971384674477, "percentage": 82.14, "elapsed_time": "2:48:54", "remaining_time": "0:36:43", "throughput": 19922.13, "total_tokens": 201900352} +{"current_steps": 64160, "total_steps": 78105, "loss": 0.144, "lr": 4.7007527301632274e-07, "epoch": 4.107291466615454, "percentage": 82.15, "elapsed_time": "2:48:55", "remaining_time": "0:36:42", "throughput": 19922.35, "total_tokens": 201915968} +{"current_steps": 64165, "total_steps": 78105, "loss": 0.1251, "lr": 4.697492387050315e-07, "epoch": 4.107611548556431, "percentage": 82.15, "elapsed_time": "2:48:55", "remaining_time": "0:36:42", "throughput": 19922.59, "total_tokens": 201932032} +{"current_steps": 64170, "total_steps": 78105, "loss": 0.1085, "lr": 4.6942330577388837e-07, "epoch": 4.107931630497407, "percentage": 82.16, "elapsed_time": "2:48:56", "remaining_time": "0:36:41", "throughput": 19922.82, "total_tokens": 201947584} +{"current_steps": 64175, "total_steps": 78105, "loss": 0.1524, "lr": 4.6909747423916826e-07, "epoch": 4.108251712438384, "percentage": 82.17, "elapsed_time": "2:48:57", "remaining_time": "0:36:40", "throughput": 19923.03, "total_tokens": 201963264} +{"current_steps": 64180, "total_steps": 78105, "loss": 0.1561, "lr": 4.6877174411714186e-07, "epoch": 4.108571794379361, "percentage": 82.17, "elapsed_time": "2:48:57", "remaining_time": "0:36:39", "throughput": 19923.27, "total_tokens": 201979328} +{"current_steps": 64185, "total_steps": 78105, "loss": 0.1357, "lr": 4.6844611542407354e-07, "epoch": 4.108891876320338, "percentage": 82.18, "elapsed_time": "2:48:58", "remaining_time": "0:36:38", "throughput": 19923.48, "total_tokens": 201994688} +{"current_steps": 64190, "total_steps": 78105, "loss": 0.2014, "lr": 4.681205881762249e-07, "epoch": 4.109211958261315, "percentage": 82.18, "elapsed_time": "2:48:59", "remaining_time": "0:36:37", "throughput": 19923.66, "total_tokens": 202009536} +{"current_steps": 64195, "total_steps": 78105, "loss": 0.0846, "lr": 4.677951623898502e-07, "epoch": 4.109532040202292, "percentage": 82.19, "elapsed_time": "2:48:59", "remaining_time": "0:36:37", "throughput": 19923.9, "total_tokens": 202025600} +{"current_steps": 64200, "total_steps": 78105, "loss": 0.187, "lr": 4.674698380812001e-07, "epoch": 4.109852122143269, "percentage": 82.2, "elapsed_time": "2:49:00", "remaining_time": "0:36:36", "throughput": 19924.12, "total_tokens": 202041664} +{"current_steps": 64205, "total_steps": 78105, "loss": 0.1034, "lr": 4.671446152665191e-07, "epoch": 4.110172204084246, "percentage": 82.2, "elapsed_time": "2:49:01", "remaining_time": "0:36:35", "throughput": 19924.33, "total_tokens": 202057024} +{"current_steps": 64210, "total_steps": 78105, "loss": 0.1511, "lr": 4.668194939620471e-07, "epoch": 4.110492286025223, "percentage": 82.21, "elapsed_time": "2:49:01", "remaining_time": "0:36:34", "throughput": 19924.54, "total_tokens": 202072512} +{"current_steps": 64215, "total_steps": 78105, "loss": 0.1393, "lr": 4.664944741840191e-07, "epoch": 4.110812367966199, "percentage": 82.22, "elapsed_time": "2:49:02", "remaining_time": "0:36:33", "throughput": 19924.77, "total_tokens": 202088704} +{"current_steps": 64220, "total_steps": 78105, "loss": 0.0914, "lr": 4.6616955594866507e-07, "epoch": 4.111132449907176, "percentage": 82.22, "elapsed_time": "2:49:03", "remaining_time": "0:36:33", "throughput": 19924.95, "total_tokens": 202103744} +{"current_steps": 64225, "total_steps": 78105, "loss": 0.1151, "lr": 4.65844739272209e-07, "epoch": 4.111452531848153, "percentage": 82.23, "elapsed_time": "2:49:03", "remaining_time": "0:36:32", "throughput": 19925.13, "total_tokens": 202118528} +{"current_steps": 64230, "total_steps": 78105, "loss": 0.112, "lr": 4.6552002417087236e-07, "epoch": 4.11177261378913, "percentage": 82.24, "elapsed_time": "2:49:04", "remaining_time": "0:36:31", "throughput": 19925.33, "total_tokens": 202133696} +{"current_steps": 64235, "total_steps": 78105, "loss": 0.1342, "lr": 4.651954106608672e-07, "epoch": 4.112092695730107, "percentage": 82.24, "elapsed_time": "2:49:05", "remaining_time": "0:36:30", "throughput": 19925.63, "total_tokens": 202151296} +{"current_steps": 64240, "total_steps": 78105, "loss": 0.0695, "lr": 4.6487089875840496e-07, "epoch": 4.112412777671084, "percentage": 82.25, "elapsed_time": "2:49:05", "remaining_time": "0:36:29", "throughput": 19925.86, "total_tokens": 202167296} +{"current_steps": 64245, "total_steps": 78105, "loss": 0.1241, "lr": 4.6454648847968935e-07, "epoch": 4.112732859612061, "percentage": 82.25, "elapsed_time": "2:49:06", "remaining_time": "0:36:28", "throughput": 19926.07, "total_tokens": 202182272} +{"current_steps": 64250, "total_steps": 78105, "loss": 0.1315, "lr": 4.642221798409191e-07, "epoch": 4.113052941553038, "percentage": 82.26, "elapsed_time": "2:49:07", "remaining_time": "0:36:28", "throughput": 19926.33, "total_tokens": 202198784} +{"current_steps": 64255, "total_steps": 78105, "loss": 0.0888, "lr": 4.6389797285829067e-07, "epoch": 4.113373023494015, "percentage": 82.27, "elapsed_time": "2:49:07", "remaining_time": "0:36:27", "throughput": 19926.53, "total_tokens": 202213824} +{"current_steps": 64260, "total_steps": 78105, "loss": 0.1836, "lr": 4.635738675479906e-07, "epoch": 4.113693105434991, "percentage": 82.27, "elapsed_time": "2:49:08", "remaining_time": "0:36:26", "throughput": 19926.7, "total_tokens": 202228416} +{"current_steps": 64265, "total_steps": 78105, "loss": 0.1495, "lr": 4.6324986392620533e-07, "epoch": 4.114013187375968, "percentage": 82.28, "elapsed_time": "2:49:09", "remaining_time": "0:36:25", "throughput": 19926.9, "total_tokens": 202243648} +{"current_steps": 64270, "total_steps": 78105, "loss": 0.1623, "lr": 4.629259620091114e-07, "epoch": 4.114333269316945, "percentage": 82.29, "elapsed_time": "2:49:09", "remaining_time": "0:36:24", "throughput": 19927.08, "total_tokens": 202258624} +{"current_steps": 64275, "total_steps": 78105, "loss": 0.1202, "lr": 4.626021618128851e-07, "epoch": 4.114653351257922, "percentage": 82.29, "elapsed_time": "2:49:10", "remaining_time": "0:36:24", "throughput": 19927.28, "total_tokens": 202273792} +{"current_steps": 64280, "total_steps": 78105, "loss": 0.2068, "lr": 4.6227846335369445e-07, "epoch": 4.114973433198899, "percentage": 82.3, "elapsed_time": "2:49:11", "remaining_time": "0:36:23", "throughput": 19927.49, "total_tokens": 202289280} +{"current_steps": 64285, "total_steps": 78105, "loss": 0.1566, "lr": 4.6195486664770307e-07, "epoch": 4.115293515139876, "percentage": 82.31, "elapsed_time": "2:49:11", "remaining_time": "0:36:22", "throughput": 19927.7, "total_tokens": 202304448} +{"current_steps": 64290, "total_steps": 78105, "loss": 0.147, "lr": 4.6163137171106985e-07, "epoch": 4.115613597080853, "percentage": 82.31, "elapsed_time": "2:49:12", "remaining_time": "0:36:21", "throughput": 19927.89, "total_tokens": 202319808} +{"current_steps": 64295, "total_steps": 78105, "loss": 0.1479, "lr": 4.613079785599486e-07, "epoch": 4.11593367902183, "percentage": 82.32, "elapsed_time": "2:49:13", "remaining_time": "0:36:20", "throughput": 19928.11, "total_tokens": 202335360} +{"current_steps": 64300, "total_steps": 78105, "loss": 0.1095, "lr": 4.609846872104876e-07, "epoch": 4.116253760962806, "percentage": 82.33, "elapsed_time": "2:49:13", "remaining_time": "0:36:20", "throughput": 19928.35, "total_tokens": 202351488} +{"current_steps": 64305, "total_steps": 78105, "loss": 0.107, "lr": 4.6066149767882986e-07, "epoch": 4.116573842903783, "percentage": 82.33, "elapsed_time": "2:49:14", "remaining_time": "0:36:19", "throughput": 19928.55, "total_tokens": 202366592} +{"current_steps": 64310, "total_steps": 78105, "loss": 0.1555, "lr": 4.603384099811151e-07, "epoch": 4.11689392484476, "percentage": 82.34, "elapsed_time": "2:49:15", "remaining_time": "0:36:18", "throughput": 19928.75, "total_tokens": 202381760} +{"current_steps": 64315, "total_steps": 78105, "loss": 0.1152, "lr": 4.600154241334759e-07, "epoch": 4.117214006785737, "percentage": 82.34, "elapsed_time": "2:49:15", "remaining_time": "0:36:17", "throughput": 19928.97, "total_tokens": 202397696} +{"current_steps": 64320, "total_steps": 78105, "loss": 0.1218, "lr": 4.596925401520405e-07, "epoch": 4.117534088726714, "percentage": 82.35, "elapsed_time": "2:49:16", "remaining_time": "0:36:16", "throughput": 19929.15, "total_tokens": 202412608} +{"current_steps": 64325, "total_steps": 78105, "loss": 0.1414, "lr": 4.5936975805293213e-07, "epoch": 4.117854170667691, "percentage": 82.36, "elapsed_time": "2:49:17", "remaining_time": "0:36:15", "throughput": 19929.4, "total_tokens": 202429248} +{"current_steps": 64330, "total_steps": 78105, "loss": 0.1261, "lr": 4.590470778522688e-07, "epoch": 4.118174252608668, "percentage": 82.36, "elapsed_time": "2:49:18", "remaining_time": "0:36:15", "throughput": 19929.67, "total_tokens": 202445568} +{"current_steps": 64335, "total_steps": 78105, "loss": 0.1538, "lr": 4.58724499566163e-07, "epoch": 4.118494334549645, "percentage": 82.37, "elapsed_time": "2:49:18", "remaining_time": "0:36:14", "throughput": 19929.9, "total_tokens": 202461568} +{"current_steps": 64340, "total_steps": 78105, "loss": 0.1237, "lr": 4.584020232107239e-07, "epoch": 4.118814416490622, "percentage": 82.38, "elapsed_time": "2:49:19", "remaining_time": "0:36:13", "throughput": 19930.1, "total_tokens": 202476800} +{"current_steps": 64345, "total_steps": 78105, "loss": 0.1777, "lr": 4.580796488020525e-07, "epoch": 4.119134498431598, "percentage": 82.38, "elapsed_time": "2:49:20", "remaining_time": "0:36:12", "throughput": 19930.31, "total_tokens": 202492416} +{"current_steps": 64350, "total_steps": 78105, "loss": 0.1355, "lr": 4.577573763562487e-07, "epoch": 4.119454580372575, "percentage": 82.39, "elapsed_time": "2:49:20", "remaining_time": "0:36:11", "throughput": 19930.52, "total_tokens": 202507648} +{"current_steps": 64355, "total_steps": 78105, "loss": 0.0978, "lr": 4.574352058894027e-07, "epoch": 4.119774662313552, "percentage": 82.4, "elapsed_time": "2:49:21", "remaining_time": "0:36:11", "throughput": 19930.73, "total_tokens": 202523328} +{"current_steps": 64360, "total_steps": 78105, "loss": 0.0897, "lr": 4.5711313741760354e-07, "epoch": 4.120094744254529, "percentage": 82.4, "elapsed_time": "2:49:22", "remaining_time": "0:36:10", "throughput": 19931.0, "total_tokens": 202540736} +{"current_steps": 64365, "total_steps": 78105, "loss": 0.169, "lr": 4.5679117095693363e-07, "epoch": 4.120414826195506, "percentage": 82.41, "elapsed_time": "2:49:22", "remaining_time": "0:36:09", "throughput": 19931.21, "total_tokens": 202556160} +{"current_steps": 64370, "total_steps": 78105, "loss": 0.166, "lr": 4.5646930652346904e-07, "epoch": 4.120734908136483, "percentage": 82.41, "elapsed_time": "2:49:23", "remaining_time": "0:36:08", "throughput": 19931.39, "total_tokens": 202571072} +{"current_steps": 64375, "total_steps": 78105, "loss": 0.1584, "lr": 4.561475441332844e-07, "epoch": 4.12105499007746, "percentage": 82.42, "elapsed_time": "2:49:24", "remaining_time": "0:36:07", "throughput": 19931.57, "total_tokens": 202586048} +{"current_steps": 64380, "total_steps": 78105, "loss": 0.1511, "lr": 4.558258838024437e-07, "epoch": 4.121375072018437, "percentage": 82.43, "elapsed_time": "2:49:24", "remaining_time": "0:36:06", "throughput": 19931.78, "total_tokens": 202601408} +{"current_steps": 64385, "total_steps": 78105, "loss": 0.0973, "lr": 4.555043255470118e-07, "epoch": 4.121695153959414, "percentage": 82.43, "elapsed_time": "2:49:25", "remaining_time": "0:36:06", "throughput": 19932.03, "total_tokens": 202617920} +{"current_steps": 64390, "total_steps": 78105, "loss": 0.1305, "lr": 4.551828693830443e-07, "epoch": 4.12201523590039, "percentage": 82.44, "elapsed_time": "2:49:26", "remaining_time": "0:36:05", "throughput": 19932.26, "total_tokens": 202634176} +{"current_steps": 64395, "total_steps": 78105, "loss": 0.1261, "lr": 4.548615153265931e-07, "epoch": 4.122335317841367, "percentage": 82.45, "elapsed_time": "2:49:26", "remaining_time": "0:36:04", "throughput": 19932.5, "total_tokens": 202650432} +{"current_steps": 64400, "total_steps": 78105, "loss": 0.132, "lr": 4.5454026339370514e-07, "epoch": 4.122655399782344, "percentage": 82.45, "elapsed_time": "2:49:27", "remaining_time": "0:36:03", "throughput": 19932.69, "total_tokens": 202665280} +{"current_steps": 64405, "total_steps": 78105, "loss": 0.1161, "lr": 4.54219113600422e-07, "epoch": 4.122975481723321, "percentage": 82.46, "elapsed_time": "2:49:28", "remaining_time": "0:36:02", "throughput": 19932.91, "total_tokens": 202681472} +{"current_steps": 64410, "total_steps": 78105, "loss": 0.1338, "lr": 4.5389806596278035e-07, "epoch": 4.123295563664298, "percentage": 82.47, "elapsed_time": "2:49:28", "remaining_time": "0:36:02", "throughput": 19933.13, "total_tokens": 202697088} +{"current_steps": 64415, "total_steps": 78105, "loss": 0.1066, "lr": 4.5357712049681145e-07, "epoch": 4.123615645605275, "percentage": 82.47, "elapsed_time": "2:49:29", "remaining_time": "0:36:01", "throughput": 19933.38, "total_tokens": 202713536} +{"current_steps": 64420, "total_steps": 78105, "loss": 0.1069, "lr": 4.5325627721854114e-07, "epoch": 4.123935727546252, "percentage": 82.48, "elapsed_time": "2:49:30", "remaining_time": "0:36:00", "throughput": 19933.58, "total_tokens": 202728512} +{"current_steps": 64425, "total_steps": 78105, "loss": 0.1338, "lr": 4.529355361439919e-07, "epoch": 4.124255809487229, "percentage": 82.49, "elapsed_time": "2:49:30", "remaining_time": "0:35:59", "throughput": 19933.79, "total_tokens": 202744256} +{"current_steps": 64430, "total_steps": 78105, "loss": 0.1358, "lr": 4.5261489728917917e-07, "epoch": 4.124575891428206, "percentage": 82.49, "elapsed_time": "2:49:31", "remaining_time": "0:35:58", "throughput": 19933.98, "total_tokens": 202759424} +{"current_steps": 64435, "total_steps": 78105, "loss": 0.1301, "lr": 4.522943606701141e-07, "epoch": 4.124895973369182, "percentage": 82.5, "elapsed_time": "2:49:32", "remaining_time": "0:35:58", "throughput": 19934.18, "total_tokens": 202774784} +{"current_steps": 64440, "total_steps": 78105, "loss": 0.0963, "lr": 4.519739263028025e-07, "epoch": 4.125216055310159, "percentage": 82.5, "elapsed_time": "2:49:32", "remaining_time": "0:35:57", "throughput": 19934.52, "total_tokens": 202793536} +{"current_steps": 64445, "total_steps": 78105, "loss": 0.1323, "lr": 4.516535942032452e-07, "epoch": 4.125536137251136, "percentage": 82.51, "elapsed_time": "2:49:33", "remaining_time": "0:35:56", "throughput": 19934.75, "total_tokens": 202809536} +{"current_steps": 64450, "total_steps": 78105, "loss": 0.1842, "lr": 4.5133336438743793e-07, "epoch": 4.125856219192113, "percentage": 82.52, "elapsed_time": "2:49:34", "remaining_time": "0:35:55", "throughput": 19934.98, "total_tokens": 202825408} +{"current_steps": 64455, "total_steps": 78105, "loss": 0.0924, "lr": 4.5101323687137074e-07, "epoch": 4.12617630113309, "percentage": 82.52, "elapsed_time": "2:49:35", "remaining_time": "0:35:54", "throughput": 19935.18, "total_tokens": 202840448} +{"current_steps": 64460, "total_steps": 78105, "loss": 0.1397, "lr": 4.5069321167103106e-07, "epoch": 4.126496383074067, "percentage": 82.53, "elapsed_time": "2:49:35", "remaining_time": "0:35:54", "throughput": 19935.39, "total_tokens": 202856064} +{"current_steps": 64465, "total_steps": 78105, "loss": 0.1081, "lr": 4.503732888023968e-07, "epoch": 4.126816465015044, "percentage": 82.54, "elapsed_time": "2:49:36", "remaining_time": "0:35:53", "throughput": 19935.61, "total_tokens": 202871936} +{"current_steps": 64470, "total_steps": 78105, "loss": 0.1444, "lr": 4.5005346828144477e-07, "epoch": 4.127136546956021, "percentage": 82.54, "elapsed_time": "2:49:37", "remaining_time": "0:35:52", "throughput": 19935.82, "total_tokens": 202887232} +{"current_steps": 64475, "total_steps": 78105, "loss": 0.1141, "lr": 4.4973375012414474e-07, "epoch": 4.127456628896998, "percentage": 82.55, "elapsed_time": "2:49:37", "remaining_time": "0:35:51", "throughput": 19936.01, "total_tokens": 202902528} +{"current_steps": 64480, "total_steps": 78105, "loss": 0.1175, "lr": 4.4941413434646116e-07, "epoch": 4.127776710837974, "percentage": 82.56, "elapsed_time": "2:49:38", "remaining_time": "0:35:50", "throughput": 19936.21, "total_tokens": 202917632} +{"current_steps": 64485, "total_steps": 78105, "loss": 0.1459, "lr": 4.4909462096435594e-07, "epoch": 4.128096792778951, "percentage": 82.56, "elapsed_time": "2:49:39", "remaining_time": "0:35:49", "throughput": 19936.43, "total_tokens": 202933376} +{"current_steps": 64490, "total_steps": 78105, "loss": 0.1393, "lr": 4.4877520999378115e-07, "epoch": 4.128416874719928, "percentage": 82.57, "elapsed_time": "2:49:39", "remaining_time": "0:35:49", "throughput": 19936.65, "total_tokens": 202949056} +{"current_steps": 64495, "total_steps": 78105, "loss": 0.0822, "lr": 4.484559014506895e-07, "epoch": 4.128736956660905, "percentage": 82.57, "elapsed_time": "2:49:40", "remaining_time": "0:35:48", "throughput": 19936.87, "total_tokens": 202964736} +{"current_steps": 64500, "total_steps": 78105, "loss": 0.1429, "lr": 4.4813669535102243e-07, "epoch": 4.129057038601882, "percentage": 82.58, "elapsed_time": "2:49:41", "remaining_time": "0:35:47", "throughput": 19937.06, "total_tokens": 202979776} +{"current_steps": 64505, "total_steps": 78105, "loss": 0.1544, "lr": 4.4781759171072194e-07, "epoch": 4.129377120542859, "percentage": 82.59, "elapsed_time": "2:49:41", "remaining_time": "0:35:46", "throughput": 19937.29, "total_tokens": 202996096} +{"current_steps": 64510, "total_steps": 78105, "loss": 0.1191, "lr": 4.4749859054572133e-07, "epoch": 4.129697202483836, "percentage": 82.59, "elapsed_time": "2:49:42", "remaining_time": "0:35:45", "throughput": 19937.48, "total_tokens": 203011136} +{"current_steps": 64515, "total_steps": 78105, "loss": 0.0833, "lr": 4.471796918719501e-07, "epoch": 4.130017284424813, "percentage": 82.6, "elapsed_time": "2:49:43", "remaining_time": "0:35:45", "throughput": 19937.7, "total_tokens": 203026816} +{"current_steps": 64520, "total_steps": 78105, "loss": 0.1392, "lr": 4.4686089570533247e-07, "epoch": 4.13033736636579, "percentage": 82.61, "elapsed_time": "2:49:43", "remaining_time": "0:35:44", "throughput": 19937.95, "total_tokens": 203043456} +{"current_steps": 64525, "total_steps": 78105, "loss": 0.1316, "lr": 4.465422020617871e-07, "epoch": 4.130657448306766, "percentage": 82.61, "elapsed_time": "2:49:44", "remaining_time": "0:35:43", "throughput": 19938.17, "total_tokens": 203059456} +{"current_steps": 64530, "total_steps": 78105, "loss": 0.0822, "lr": 4.462236109572279e-07, "epoch": 4.130977530247743, "percentage": 82.62, "elapsed_time": "2:49:45", "remaining_time": "0:35:42", "throughput": 19938.4, "total_tokens": 203075328} +{"current_steps": 64535, "total_steps": 78105, "loss": 0.1321, "lr": 4.4590512240756334e-07, "epoch": 4.13129761218872, "percentage": 82.63, "elapsed_time": "2:49:45", "remaining_time": "0:35:41", "throughput": 19938.63, "total_tokens": 203091136} +{"current_steps": 64540, "total_steps": 78105, "loss": 0.1423, "lr": 4.455867364286984e-07, "epoch": 4.131617694129697, "percentage": 82.63, "elapsed_time": "2:49:46", "remaining_time": "0:35:40", "throughput": 19938.83, "total_tokens": 203106432} +{"current_steps": 64545, "total_steps": 78105, "loss": 0.1326, "lr": 4.452684530365306e-07, "epoch": 4.131937776070674, "percentage": 82.64, "elapsed_time": "2:49:47", "remaining_time": "0:35:40", "throughput": 19939.03, "total_tokens": 203121600} +{"current_steps": 64550, "total_steps": 78105, "loss": 0.1625, "lr": 4.449502722469537e-07, "epoch": 4.132257858011651, "percentage": 82.65, "elapsed_time": "2:49:48", "remaining_time": "0:35:39", "throughput": 19939.59, "total_tokens": 203150528} +{"current_steps": 64555, "total_steps": 78105, "loss": 0.1392, "lr": 4.4463219407585577e-07, "epoch": 4.132577939952628, "percentage": 82.65, "elapsed_time": "2:49:48", "remaining_time": "0:35:38", "throughput": 19939.82, "total_tokens": 203166848} +{"current_steps": 64560, "total_steps": 78105, "loss": 0.1145, "lr": 4.4431421853912e-07, "epoch": 4.132898021893605, "percentage": 82.66, "elapsed_time": "2:49:49", "remaining_time": "0:35:37", "throughput": 19940.03, "total_tokens": 203182336} +{"current_steps": 64565, "total_steps": 78105, "loss": 0.1261, "lr": 4.439963456526239e-07, "epoch": 4.133218103834581, "percentage": 82.66, "elapsed_time": "2:49:50", "remaining_time": "0:35:37", "throughput": 19940.24, "total_tokens": 203197504} +{"current_steps": 64570, "total_steps": 78105, "loss": 0.0983, "lr": 4.436785754322423e-07, "epoch": 4.133538185775558, "percentage": 82.67, "elapsed_time": "2:49:50", "remaining_time": "0:35:36", "throughput": 19940.42, "total_tokens": 203212032} +{"current_steps": 64575, "total_steps": 78105, "loss": 0.1492, "lr": 4.433609078938403e-07, "epoch": 4.133858267716535, "percentage": 82.68, "elapsed_time": "2:49:51", "remaining_time": "0:35:35", "throughput": 19940.65, "total_tokens": 203227520} +{"current_steps": 64580, "total_steps": 78105, "loss": 0.1942, "lr": 4.4304334305328296e-07, "epoch": 4.134178349657512, "percentage": 82.68, "elapsed_time": "2:49:52", "remaining_time": "0:35:34", "throughput": 19940.84, "total_tokens": 203242432} +{"current_steps": 64585, "total_steps": 78105, "loss": 0.142, "lr": 4.4272588092642596e-07, "epoch": 4.134498431598489, "percentage": 82.69, "elapsed_time": "2:49:52", "remaining_time": "0:35:33", "throughput": 19941.06, "total_tokens": 203257984} +{"current_steps": 64590, "total_steps": 78105, "loss": 0.1, "lr": 4.4240852152912267e-07, "epoch": 4.134818513539466, "percentage": 82.7, "elapsed_time": "2:49:53", "remaining_time": "0:35:32", "throughput": 19941.23, "total_tokens": 203272640} +{"current_steps": 64595, "total_steps": 78105, "loss": 0.0801, "lr": 4.420912648772205e-07, "epoch": 4.135138595480443, "percentage": 82.7, "elapsed_time": "2:49:54", "remaining_time": "0:35:32", "throughput": 19941.42, "total_tokens": 203287680} +{"current_steps": 64600, "total_steps": 78105, "loss": 0.1625, "lr": 4.417741109865606e-07, "epoch": 4.13545867742142, "percentage": 82.71, "elapsed_time": "2:49:54", "remaining_time": "0:35:31", "throughput": 19941.65, "total_tokens": 203303616} +{"current_steps": 64605, "total_steps": 78105, "loss": 0.1081, "lr": 4.4145705987298197e-07, "epoch": 4.135778759362397, "percentage": 82.72, "elapsed_time": "2:49:55", "remaining_time": "0:35:30", "throughput": 19941.85, "total_tokens": 203318784} +{"current_steps": 64610, "total_steps": 78105, "loss": 0.0931, "lr": 4.411401115523142e-07, "epoch": 4.136098841303373, "percentage": 82.72, "elapsed_time": "2:49:56", "remaining_time": "0:35:29", "throughput": 19942.08, "total_tokens": 203334848} +{"current_steps": 64615, "total_steps": 78105, "loss": 0.1182, "lr": 4.4082326604038525e-07, "epoch": 4.13641892324435, "percentage": 82.73, "elapsed_time": "2:49:56", "remaining_time": "0:35:28", "throughput": 19942.26, "total_tokens": 203349696} +{"current_steps": 64620, "total_steps": 78105, "loss": 0.1232, "lr": 4.4050652335301676e-07, "epoch": 4.136739005185327, "percentage": 82.73, "elapsed_time": "2:49:57", "remaining_time": "0:35:28", "throughput": 19942.48, "total_tokens": 203365376} +{"current_steps": 64625, "total_steps": 78105, "loss": 0.1279, "lr": 4.40189883506025e-07, "epoch": 4.137059087126304, "percentage": 82.74, "elapsed_time": "2:49:58", "remaining_time": "0:35:27", "throughput": 19942.72, "total_tokens": 203381376} +{"current_steps": 64630, "total_steps": 78105, "loss": 0.1339, "lr": 4.3987334651522134e-07, "epoch": 4.137379169067281, "percentage": 82.75, "elapsed_time": "2:49:59", "remaining_time": "0:35:26", "throughput": 19942.99, "total_tokens": 203398528} +{"current_steps": 64635, "total_steps": 78105, "loss": 0.1897, "lr": 4.395569123964119e-07, "epoch": 4.137699251008258, "percentage": 82.75, "elapsed_time": "2:49:59", "remaining_time": "0:35:25", "throughput": 19943.22, "total_tokens": 203414784} +{"current_steps": 64640, "total_steps": 78105, "loss": 0.17, "lr": 4.392405811653977e-07, "epoch": 4.138019332949235, "percentage": 82.76, "elapsed_time": "2:50:00", "remaining_time": "0:35:24", "throughput": 19943.48, "total_tokens": 203431168} +{"current_steps": 64645, "total_steps": 78105, "loss": 0.1441, "lr": 4.389243528379747e-07, "epoch": 4.138339414890212, "percentage": 82.77, "elapsed_time": "2:50:01", "remaining_time": "0:35:23", "throughput": 19943.67, "total_tokens": 203446144} +{"current_steps": 64650, "total_steps": 78105, "loss": 0.1901, "lr": 4.386082274299333e-07, "epoch": 4.138659496831189, "percentage": 82.77, "elapsed_time": "2:50:01", "remaining_time": "0:35:23", "throughput": 19943.84, "total_tokens": 203460992} +{"current_steps": 64655, "total_steps": 78105, "loss": 0.1195, "lr": 4.382922049570604e-07, "epoch": 4.138979578772165, "percentage": 82.78, "elapsed_time": "2:50:02", "remaining_time": "0:35:22", "throughput": 19944.06, "total_tokens": 203476608} +{"current_steps": 64660, "total_steps": 78105, "loss": 0.1279, "lr": 4.3797628543513547e-07, "epoch": 4.139299660713142, "percentage": 82.79, "elapsed_time": "2:50:03", "remaining_time": "0:35:21", "throughput": 19944.29, "total_tokens": 203492416} +{"current_steps": 64665, "total_steps": 78105, "loss": 0.154, "lr": 4.376604688799344e-07, "epoch": 4.139619742654119, "percentage": 82.79, "elapsed_time": "2:50:03", "remaining_time": "0:35:20", "throughput": 19944.51, "total_tokens": 203508160} +{"current_steps": 64670, "total_steps": 78105, "loss": 0.1593, "lr": 4.3734475530722686e-07, "epoch": 4.139939824595096, "percentage": 82.8, "elapsed_time": "2:50:04", "remaining_time": "0:35:19", "throughput": 19944.76, "total_tokens": 203524800} +{"current_steps": 64675, "total_steps": 78105, "loss": 0.1384, "lr": 4.370291447327785e-07, "epoch": 4.140259906536073, "percentage": 82.81, "elapsed_time": "2:50:05", "remaining_time": "0:35:19", "throughput": 19945.04, "total_tokens": 203541952} +{"current_steps": 64680, "total_steps": 78105, "loss": 0.156, "lr": 4.3671363717234877e-07, "epoch": 4.14057998847705, "percentage": 82.81, "elapsed_time": "2:50:05", "remaining_time": "0:35:18", "throughput": 19945.29, "total_tokens": 203558400} +{"current_steps": 64685, "total_steps": 78105, "loss": 0.1089, "lr": 4.363982326416924e-07, "epoch": 4.140900070418027, "percentage": 82.82, "elapsed_time": "2:50:06", "remaining_time": "0:35:17", "throughput": 19945.82, "total_tokens": 203586816} +{"current_steps": 64690, "total_steps": 78105, "loss": 0.1465, "lr": 4.3608293115656066e-07, "epoch": 4.141220152359004, "percentage": 82.82, "elapsed_time": "2:50:07", "remaining_time": "0:35:16", "throughput": 19946.03, "total_tokens": 203602304} +{"current_steps": 64695, "total_steps": 78105, "loss": 0.1297, "lr": 4.3576773273269533e-07, "epoch": 4.141540234299981, "percentage": 82.83, "elapsed_time": "2:50:08", "remaining_time": "0:35:15", "throughput": 19946.22, "total_tokens": 203617216} +{"current_steps": 64700, "total_steps": 78105, "loss": 0.1259, "lr": 4.3545263738583784e-07, "epoch": 4.141860316240957, "percentage": 82.84, "elapsed_time": "2:50:08", "remaining_time": "0:35:15", "throughput": 19946.42, "total_tokens": 203632832} +{"current_steps": 64705, "total_steps": 78105, "loss": 0.1186, "lr": 4.3513764513172183e-07, "epoch": 4.142180398181934, "percentage": 82.84, "elapsed_time": "2:50:09", "remaining_time": "0:35:14", "throughput": 19946.62, "total_tokens": 203648256} +{"current_steps": 64710, "total_steps": 78105, "loss": 0.1011, "lr": 4.3482275598607596e-07, "epoch": 4.142500480122911, "percentage": 82.85, "elapsed_time": "2:50:10", "remaining_time": "0:35:13", "throughput": 19946.87, "total_tokens": 203664448} +{"current_steps": 64715, "total_steps": 78105, "loss": 0.083, "lr": 4.3450796996462567e-07, "epoch": 4.142820562063888, "percentage": 82.86, "elapsed_time": "2:50:11", "remaining_time": "0:35:12", "throughput": 19947.13, "total_tokens": 203681472} +{"current_steps": 64720, "total_steps": 78105, "loss": 0.1433, "lr": 4.3419328708308727e-07, "epoch": 4.143140644004865, "percentage": 82.86, "elapsed_time": "2:50:11", "remaining_time": "0:35:11", "throughput": 19947.4, "total_tokens": 203698240} +{"current_steps": 64725, "total_steps": 78105, "loss": 0.1355, "lr": 4.338787073571768e-07, "epoch": 4.143460725945842, "percentage": 82.87, "elapsed_time": "2:50:12", "remaining_time": "0:35:11", "throughput": 19947.63, "total_tokens": 203714304} +{"current_steps": 64730, "total_steps": 78105, "loss": 0.1808, "lr": 4.3356423080260085e-07, "epoch": 4.143780807886819, "percentage": 82.88, "elapsed_time": "2:50:13", "remaining_time": "0:35:10", "throughput": 19947.85, "total_tokens": 203729728} +{"current_steps": 64735, "total_steps": 78105, "loss": 0.1245, "lr": 4.332498574350638e-07, "epoch": 4.144100889827796, "percentage": 82.88, "elapsed_time": "2:50:13", "remaining_time": "0:35:09", "throughput": 19948.1, "total_tokens": 203745792} +{"current_steps": 64740, "total_steps": 78105, "loss": 0.131, "lr": 4.329355872702637e-07, "epoch": 4.144420971768773, "percentage": 82.89, "elapsed_time": "2:50:14", "remaining_time": "0:35:08", "throughput": 19948.3, "total_tokens": 203761216} +{"current_steps": 64745, "total_steps": 78105, "loss": 0.0968, "lr": 4.3262142032389347e-07, "epoch": 4.144741053709749, "percentage": 82.89, "elapsed_time": "2:50:15", "remaining_time": "0:35:07", "throughput": 19948.52, "total_tokens": 203776640} +{"current_steps": 64750, "total_steps": 78105, "loss": 0.1456, "lr": 4.3230735661164093e-07, "epoch": 4.145061135650726, "percentage": 82.9, "elapsed_time": "2:50:15", "remaining_time": "0:35:07", "throughput": 19948.75, "total_tokens": 203792640} +{"current_steps": 64755, "total_steps": 78105, "loss": 0.1417, "lr": 4.3199339614918874e-07, "epoch": 4.145381217591703, "percentage": 82.91, "elapsed_time": "2:50:16", "remaining_time": "0:35:06", "throughput": 19948.97, "total_tokens": 203808128} +{"current_steps": 64760, "total_steps": 78105, "loss": 0.1232, "lr": 4.316795389522138e-07, "epoch": 4.14570129953268, "percentage": 82.91, "elapsed_time": "2:50:17", "remaining_time": "0:35:05", "throughput": 19949.22, "total_tokens": 203824704} +{"current_steps": 64765, "total_steps": 78105, "loss": 0.1134, "lr": 4.313657850363903e-07, "epoch": 4.146021381473657, "percentage": 82.92, "elapsed_time": "2:50:17", "remaining_time": "0:35:04", "throughput": 19949.37, "total_tokens": 203838784} +{"current_steps": 64770, "total_steps": 78105, "loss": 0.117, "lr": 4.310521344173835e-07, "epoch": 4.146341463414634, "percentage": 82.93, "elapsed_time": "2:50:18", "remaining_time": "0:35:03", "throughput": 19949.63, "total_tokens": 203855616} +{"current_steps": 64775, "total_steps": 78105, "loss": 0.1809, "lr": 4.3073858711085656e-07, "epoch": 4.146661545355611, "percentage": 82.93, "elapsed_time": "2:50:19", "remaining_time": "0:35:02", "throughput": 19949.82, "total_tokens": 203870912} +{"current_steps": 64780, "total_steps": 78105, "loss": 0.1523, "lr": 4.3042514313246656e-07, "epoch": 4.146981627296588, "percentage": 82.94, "elapsed_time": "2:50:19", "remaining_time": "0:35:02", "throughput": 19950.07, "total_tokens": 203887360} +{"current_steps": 64785, "total_steps": 78105, "loss": 0.153, "lr": 4.3011180249786485e-07, "epoch": 4.147301709237565, "percentage": 82.95, "elapsed_time": "2:50:20", "remaining_time": "0:35:01", "throughput": 19950.26, "total_tokens": 203902272} +{"current_steps": 64790, "total_steps": 78105, "loss": 0.1738, "lr": 4.2979856522269796e-07, "epoch": 4.147621791178541, "percentage": 82.95, "elapsed_time": "2:50:21", "remaining_time": "0:35:00", "throughput": 19950.45, "total_tokens": 203917632} +{"current_steps": 64795, "total_steps": 78105, "loss": 0.1056, "lr": 4.2948543132260686e-07, "epoch": 4.147941873119518, "percentage": 82.96, "elapsed_time": "2:50:21", "remaining_time": "0:34:59", "throughput": 19950.64, "total_tokens": 203932416} +{"current_steps": 64800, "total_steps": 78105, "loss": 0.1178, "lr": 4.291724008132295e-07, "epoch": 4.148261955060495, "percentage": 82.97, "elapsed_time": "2:50:22", "remaining_time": "0:34:58", "throughput": 19950.81, "total_tokens": 203946816} +{"current_steps": 64805, "total_steps": 78105, "loss": 0.1153, "lr": 4.2885947371019476e-07, "epoch": 4.148582037001472, "percentage": 82.97, "elapsed_time": "2:50:23", "remaining_time": "0:34:58", "throughput": 19951.05, "total_tokens": 203963136} +{"current_steps": 64810, "total_steps": 78105, "loss": 0.1212, "lr": 4.2854665002913103e-07, "epoch": 4.148902118942449, "percentage": 82.98, "elapsed_time": "2:50:23", "remaining_time": "0:34:57", "throughput": 19951.26, "total_tokens": 203978496} +{"current_steps": 64815, "total_steps": 78105, "loss": 0.1427, "lr": 4.282339297856564e-07, "epoch": 4.149222200883426, "percentage": 82.98, "elapsed_time": "2:50:24", "remaining_time": "0:34:56", "throughput": 19951.48, "total_tokens": 203994368} +{"current_steps": 64820, "total_steps": 78105, "loss": 0.1349, "lr": 4.279213129953885e-07, "epoch": 4.149542282824403, "percentage": 82.99, "elapsed_time": "2:50:25", "remaining_time": "0:34:55", "throughput": 19951.7, "total_tokens": 204010112} +{"current_steps": 64825, "total_steps": 78105, "loss": 0.1063, "lr": 4.276087996739375e-07, "epoch": 4.14986236476538, "percentage": 83.0, "elapsed_time": "2:50:25", "remaining_time": "0:34:54", "throughput": 19951.91, "total_tokens": 204026048} +{"current_steps": 64830, "total_steps": 78105, "loss": 0.1247, "lr": 4.272963898369073e-07, "epoch": 4.150182446706356, "percentage": 83.0, "elapsed_time": "2:50:26", "remaining_time": "0:34:54", "throughput": 19952.1, "total_tokens": 204040896} +{"current_steps": 64835, "total_steps": 78105, "loss": 0.1024, "lr": 4.269840834999006e-07, "epoch": 4.150502528647333, "percentage": 83.01, "elapsed_time": "2:50:27", "remaining_time": "0:34:53", "throughput": 19952.31, "total_tokens": 204056704} +{"current_steps": 64840, "total_steps": 78105, "loss": 0.1437, "lr": 4.2667188067850943e-07, "epoch": 4.15082261058831, "percentage": 83.02, "elapsed_time": "2:50:27", "remaining_time": "0:34:52", "throughput": 19952.54, "total_tokens": 204072832} +{"current_steps": 64845, "total_steps": 78105, "loss": 0.1359, "lr": 4.2635978138832576e-07, "epoch": 4.151142692529287, "percentage": 83.02, "elapsed_time": "2:50:28", "remaining_time": "0:34:51", "throughput": 19952.79, "total_tokens": 204089152} +{"current_steps": 64850, "total_steps": 78105, "loss": 0.0895, "lr": 4.2604778564493326e-07, "epoch": 4.151462774470264, "percentage": 83.03, "elapsed_time": "2:50:29", "remaining_time": "0:34:50", "throughput": 19953.04, "total_tokens": 204105600} +{"current_steps": 64855, "total_steps": 78105, "loss": 0.1332, "lr": 4.2573589346391166e-07, "epoch": 4.151782856411241, "percentage": 83.04, "elapsed_time": "2:50:30", "remaining_time": "0:34:50", "throughput": 19953.34, "total_tokens": 204123392} +{"current_steps": 64860, "total_steps": 78105, "loss": 0.133, "lr": 4.2542410486083484e-07, "epoch": 4.152102938352218, "percentage": 83.04, "elapsed_time": "2:50:30", "remaining_time": "0:34:49", "throughput": 19953.55, "total_tokens": 204139008} +{"current_steps": 64865, "total_steps": 78105, "loss": 0.187, "lr": 4.2511241985127236e-07, "epoch": 4.152423020293195, "percentage": 83.05, "elapsed_time": "2:50:31", "remaining_time": "0:34:48", "throughput": 19953.75, "total_tokens": 204154368} +{"current_steps": 64870, "total_steps": 78105, "loss": 0.0906, "lr": 4.248008384507879e-07, "epoch": 4.152743102234172, "percentage": 83.05, "elapsed_time": "2:50:32", "remaining_time": "0:34:47", "throughput": 19953.96, "total_tokens": 204169792} +{"current_steps": 64875, "total_steps": 78105, "loss": 0.1672, "lr": 4.2448936067494033e-07, "epoch": 4.153063184175148, "percentage": 83.06, "elapsed_time": "2:50:32", "remaining_time": "0:34:46", "throughput": 19954.14, "total_tokens": 204185088} +{"current_steps": 64880, "total_steps": 78105, "loss": 0.143, "lr": 4.241779865392823e-07, "epoch": 4.153383266116125, "percentage": 83.07, "elapsed_time": "2:50:33", "remaining_time": "0:34:45", "throughput": 19954.34, "total_tokens": 204200576} +{"current_steps": 64885, "total_steps": 78105, "loss": 0.1264, "lr": 4.2386671605936377e-07, "epoch": 4.153703348057102, "percentage": 83.07, "elapsed_time": "2:50:34", "remaining_time": "0:34:45", "throughput": 19954.59, "total_tokens": 204216768} +{"current_steps": 64890, "total_steps": 78105, "loss": 0.1559, "lr": 4.2355554925072733e-07, "epoch": 4.154023429998079, "percentage": 83.08, "elapsed_time": "2:50:34", "remaining_time": "0:34:44", "throughput": 19954.77, "total_tokens": 204231488} +{"current_steps": 64895, "total_steps": 78105, "loss": 0.1119, "lr": 4.232444861289109e-07, "epoch": 4.154343511939056, "percentage": 83.09, "elapsed_time": "2:50:35", "remaining_time": "0:34:43", "throughput": 19954.96, "total_tokens": 204246592} +{"current_steps": 64900, "total_steps": 78105, "loss": 0.1392, "lr": 4.2293352670944755e-07, "epoch": 4.154663593880033, "percentage": 83.09, "elapsed_time": "2:50:36", "remaining_time": "0:34:42", "throughput": 19955.2, "total_tokens": 204262656} +{"current_steps": 64905, "total_steps": 78105, "loss": 0.1221, "lr": 4.226226710078646e-07, "epoch": 4.15498367582101, "percentage": 83.1, "elapsed_time": "2:50:36", "remaining_time": "0:34:41", "throughput": 19955.43, "total_tokens": 204278592} +{"current_steps": 64910, "total_steps": 78105, "loss": 0.1017, "lr": 4.2231191903968487e-07, "epoch": 4.155303757761987, "percentage": 83.11, "elapsed_time": "2:50:37", "remaining_time": "0:34:41", "throughput": 19955.69, "total_tokens": 204295360} +{"current_steps": 64915, "total_steps": 78105, "loss": 0.0691, "lr": 4.220012708204252e-07, "epoch": 4.155623839702964, "percentage": 83.11, "elapsed_time": "2:50:38", "remaining_time": "0:34:40", "throughput": 19955.88, "total_tokens": 204310336} +{"current_steps": 64920, "total_steps": 78105, "loss": 0.0884, "lr": 4.216907263655992e-07, "epoch": 4.15594392164394, "percentage": 83.12, "elapsed_time": "2:50:38", "remaining_time": "0:34:39", "throughput": 19956.24, "total_tokens": 204330432} +{"current_steps": 64925, "total_steps": 78105, "loss": 0.093, "lr": 4.213802856907115e-07, "epoch": 4.156264003584917, "percentage": 83.13, "elapsed_time": "2:50:39", "remaining_time": "0:34:38", "throughput": 19956.51, "total_tokens": 204347264} +{"current_steps": 64930, "total_steps": 78105, "loss": 0.1339, "lr": 4.21069948811266e-07, "epoch": 4.156584085525894, "percentage": 83.13, "elapsed_time": "2:50:40", "remaining_time": "0:34:37", "throughput": 19956.68, "total_tokens": 204361920} +{"current_steps": 64935, "total_steps": 78105, "loss": 0.1068, "lr": 4.207597157427584e-07, "epoch": 4.156904167466871, "percentage": 83.14, "elapsed_time": "2:50:40", "remaining_time": "0:34:37", "throughput": 19956.85, "total_tokens": 204376576} +{"current_steps": 64940, "total_steps": 78105, "loss": 0.1804, "lr": 4.2044958650068024e-07, "epoch": 4.157224249407848, "percentage": 83.14, "elapsed_time": "2:50:41", "remaining_time": "0:34:36", "throughput": 19957.08, "total_tokens": 204392640} +{"current_steps": 64945, "total_steps": 78105, "loss": 0.1378, "lr": 4.2013956110051766e-07, "epoch": 4.157544331348825, "percentage": 83.15, "elapsed_time": "2:50:42", "remaining_time": "0:34:35", "throughput": 19957.33, "total_tokens": 204409216} +{"current_steps": 64950, "total_steps": 78105, "loss": 0.1157, "lr": 4.1982963955775143e-07, "epoch": 4.157864413289802, "percentage": 83.16, "elapsed_time": "2:50:42", "remaining_time": "0:34:34", "throughput": 19957.53, "total_tokens": 204424448} +{"current_steps": 64955, "total_steps": 78105, "loss": 0.0977, "lr": 4.195198218878588e-07, "epoch": 4.158184495230779, "percentage": 83.16, "elapsed_time": "2:50:43", "remaining_time": "0:34:33", "throughput": 19957.72, "total_tokens": 204439360} +{"current_steps": 64960, "total_steps": 78105, "loss": 0.1166, "lr": 4.192101081063082e-07, "epoch": 4.158504577171756, "percentage": 83.17, "elapsed_time": "2:50:44", "remaining_time": "0:34:32", "throughput": 19957.9, "total_tokens": 204454208} +{"current_steps": 64965, "total_steps": 78105, "loss": 0.1392, "lr": 4.1890049822856716e-07, "epoch": 4.1588246591127325, "percentage": 83.18, "elapsed_time": "2:50:44", "remaining_time": "0:34:32", "throughput": 19958.13, "total_tokens": 204470208} +{"current_steps": 64970, "total_steps": 78105, "loss": 0.1047, "lr": 4.185909922700951e-07, "epoch": 4.1591447410537095, "percentage": 83.18, "elapsed_time": "2:50:45", "remaining_time": "0:34:31", "throughput": 19958.33, "total_tokens": 204485312} +{"current_steps": 64975, "total_steps": 78105, "loss": 0.127, "lr": 4.182815902463472e-07, "epoch": 4.1594648229946865, "percentage": 83.19, "elapsed_time": "2:50:46", "remaining_time": "0:34:30", "throughput": 19958.57, "total_tokens": 204501440} +{"current_steps": 64980, "total_steps": 78105, "loss": 0.0809, "lr": 4.179722921727736e-07, "epoch": 4.159784904935663, "percentage": 83.2, "elapsed_time": "2:50:46", "remaining_time": "0:34:29", "throughput": 19958.74, "total_tokens": 204515904} +{"current_steps": 64985, "total_steps": 78105, "loss": 0.1538, "lr": 4.1766309806481857e-07, "epoch": 4.16010498687664, "percentage": 83.2, "elapsed_time": "2:50:47", "remaining_time": "0:34:28", "throughput": 19958.95, "total_tokens": 204531328} +{"current_steps": 64990, "total_steps": 78105, "loss": 0.1203, "lr": 4.1735400793792153e-07, "epoch": 4.160425068817617, "percentage": 83.21, "elapsed_time": "2:50:48", "remaining_time": "0:34:28", "throughput": 19959.14, "total_tokens": 204546560} +{"current_steps": 64995, "total_steps": 78105, "loss": 0.1002, "lr": 4.170450218075181e-07, "epoch": 4.160745150758594, "percentage": 83.21, "elapsed_time": "2:50:48", "remaining_time": "0:34:27", "throughput": 19959.36, "total_tokens": 204562432} +{"current_steps": 65000, "total_steps": 78105, "loss": 0.1078, "lr": 4.167361396890357e-07, "epoch": 4.161065232699571, "percentage": 83.22, "elapsed_time": "2:50:49", "remaining_time": "0:34:26", "throughput": 19959.55, "total_tokens": 204577856} +{"current_steps": 65005, "total_steps": 78105, "loss": 0.1839, "lr": 4.1642736159789974e-07, "epoch": 4.161385314640548, "percentage": 83.23, "elapsed_time": "2:50:50", "remaining_time": "0:34:25", "throughput": 19959.79, "total_tokens": 204593984} +{"current_steps": 65010, "total_steps": 78105, "loss": 0.2009, "lr": 4.1611868754952824e-07, "epoch": 4.1617053965815245, "percentage": 83.23, "elapsed_time": "2:50:50", "remaining_time": "0:34:24", "throughput": 19959.99, "total_tokens": 204609152} +{"current_steps": 65015, "total_steps": 78105, "loss": 0.0927, "lr": 4.158101175593349e-07, "epoch": 4.1620254785225015, "percentage": 83.24, "elapsed_time": "2:50:51", "remaining_time": "0:34:24", "throughput": 19960.2, "total_tokens": 204624896} +{"current_steps": 65020, "total_steps": 78105, "loss": 0.1504, "lr": 4.155016516427285e-07, "epoch": 4.1623455604634785, "percentage": 83.25, "elapsed_time": "2:50:52", "remaining_time": "0:34:23", "throughput": 19960.42, "total_tokens": 204640576} +{"current_steps": 65025, "total_steps": 78105, "loss": 0.1276, "lr": 4.1519328981511094e-07, "epoch": 4.1626656424044555, "percentage": 83.25, "elapsed_time": "2:50:53", "remaining_time": "0:34:22", "throughput": 19960.65, "total_tokens": 204657024} +{"current_steps": 65030, "total_steps": 78105, "loss": 0.0998, "lr": 4.1488503209188233e-07, "epoch": 4.1629857243454325, "percentage": 83.26, "elapsed_time": "2:50:53", "remaining_time": "0:34:21", "throughput": 19960.86, "total_tokens": 204672576} +{"current_steps": 65035, "total_steps": 78105, "loss": 0.146, "lr": 4.14576878488433e-07, "epoch": 4.1633058062864094, "percentage": 83.27, "elapsed_time": "2:50:54", "remaining_time": "0:34:20", "throughput": 19961.08, "total_tokens": 204688128} +{"current_steps": 65040, "total_steps": 78105, "loss": 0.1338, "lr": 4.1426882902015325e-07, "epoch": 4.163625888227386, "percentage": 83.27, "elapsed_time": "2:50:55", "remaining_time": "0:34:20", "throughput": 19961.36, "total_tokens": 204705472} +{"current_steps": 65045, "total_steps": 78105, "loss": 0.1259, "lr": 4.1396088370242257e-07, "epoch": 4.163945970168363, "percentage": 83.28, "elapsed_time": "2:50:55", "remaining_time": "0:34:19", "throughput": 19961.6, "total_tokens": 204721984} +{"current_steps": 65050, "total_steps": 78105, "loss": 0.146, "lr": 4.136530425506202e-07, "epoch": 4.16426605210934, "percentage": 83.29, "elapsed_time": "2:50:56", "remaining_time": "0:34:18", "throughput": 19961.87, "total_tokens": 204738816} +{"current_steps": 65055, "total_steps": 78105, "loss": 0.119, "lr": 4.133453055801176e-07, "epoch": 4.1645861340503165, "percentage": 83.29, "elapsed_time": "2:50:57", "remaining_time": "0:34:17", "throughput": 19962.07, "total_tokens": 204753856} +{"current_steps": 65060, "total_steps": 78105, "loss": 0.1108, "lr": 4.13037672806281e-07, "epoch": 4.1649062159912935, "percentage": 83.3, "elapsed_time": "2:50:57", "remaining_time": "0:34:16", "throughput": 19962.26, "total_tokens": 204768960} +{"current_steps": 65065, "total_steps": 78105, "loss": 0.0814, "lr": 4.1273014424447336e-07, "epoch": 4.1652262979322705, "percentage": 83.3, "elapsed_time": "2:50:58", "remaining_time": "0:34:15", "throughput": 19962.45, "total_tokens": 204784384} +{"current_steps": 65070, "total_steps": 78105, "loss": 0.0996, "lr": 4.124227199100489e-07, "epoch": 4.1655463798732475, "percentage": 83.31, "elapsed_time": "2:50:59", "remaining_time": "0:34:15", "throughput": 19962.66, "total_tokens": 204800576} +{"current_steps": 65075, "total_steps": 78105, "loss": 0.0984, "lr": 4.121153998183608e-07, "epoch": 4.1658664618142245, "percentage": 83.32, "elapsed_time": "2:50:59", "remaining_time": "0:34:14", "throughput": 19962.9, "total_tokens": 204816896} +{"current_steps": 65080, "total_steps": 78105, "loss": 0.1195, "lr": 4.1180818398475405e-07, "epoch": 4.1661865437552015, "percentage": 83.32, "elapsed_time": "2:51:00", "remaining_time": "0:34:13", "throughput": 19963.13, "total_tokens": 204832512} +{"current_steps": 65085, "total_steps": 78105, "loss": 0.1197, "lr": 4.1150107242456994e-07, "epoch": 4.1665066256961785, "percentage": 83.33, "elapsed_time": "2:51:01", "remaining_time": "0:34:12", "throughput": 19963.41, "total_tokens": 204849920} +{"current_steps": 65090, "total_steps": 78105, "loss": 0.1707, "lr": 4.111940651531432e-07, "epoch": 4.1668267076371555, "percentage": 83.34, "elapsed_time": "2:51:01", "remaining_time": "0:34:11", "throughput": 19963.61, "total_tokens": 204864960} +{"current_steps": 65095, "total_steps": 78105, "loss": 0.1008, "lr": 4.1088716218580517e-07, "epoch": 4.167146789578132, "percentage": 83.34, "elapsed_time": "2:51:02", "remaining_time": "0:34:11", "throughput": 19963.83, "total_tokens": 204880832} +{"current_steps": 65100, "total_steps": 78105, "loss": 0.1414, "lr": 4.1058036353787996e-07, "epoch": 4.1674668715191086, "percentage": 83.35, "elapsed_time": "2:51:03", "remaining_time": "0:34:10", "throughput": 19964.04, "total_tokens": 204896448} +{"current_steps": 65105, "total_steps": 78105, "loss": 0.1297, "lr": 4.1027366922468836e-07, "epoch": 4.1677869534600855, "percentage": 83.36, "elapsed_time": "2:51:03", "remaining_time": "0:34:09", "throughput": 19964.26, "total_tokens": 204911936} +{"current_steps": 65110, "total_steps": 78105, "loss": 0.1235, "lr": 4.09967079261544e-07, "epoch": 4.1681070354010625, "percentage": 83.36, "elapsed_time": "2:51:04", "remaining_time": "0:34:08", "throughput": 19964.5, "total_tokens": 204928128} +{"current_steps": 65115, "total_steps": 78105, "loss": 0.1563, "lr": 4.096605936637582e-07, "epoch": 4.1684271173420395, "percentage": 83.37, "elapsed_time": "2:51:05", "remaining_time": "0:34:07", "throughput": 19964.81, "total_tokens": 204945984} +{"current_steps": 65120, "total_steps": 78105, "loss": 0.1165, "lr": 4.093542124466332e-07, "epoch": 4.1687471992830165, "percentage": 83.37, "elapsed_time": "2:51:06", "remaining_time": "0:34:07", "throughput": 19965.01, "total_tokens": 204961664} +{"current_steps": 65125, "total_steps": 78105, "loss": 0.1322, "lr": 4.090479356254695e-07, "epoch": 4.1690672812239935, "percentage": 83.38, "elapsed_time": "2:51:06", "remaining_time": "0:34:06", "throughput": 19965.28, "total_tokens": 204978816} +{"current_steps": 65130, "total_steps": 78105, "loss": 0.1032, "lr": 4.0874176321556076e-07, "epoch": 4.1693873631649705, "percentage": 83.39, "elapsed_time": "2:51:07", "remaining_time": "0:34:05", "throughput": 19965.48, "total_tokens": 204994048} +{"current_steps": 65135, "total_steps": 78105, "loss": 0.0664, "lr": 4.084356952321952e-07, "epoch": 4.1697074451059475, "percentage": 83.39, "elapsed_time": "2:51:08", "remaining_time": "0:34:04", "throughput": 19965.72, "total_tokens": 205010240} +{"current_steps": 65140, "total_steps": 78105, "loss": 0.1436, "lr": 4.0812973169065655e-07, "epoch": 4.170027527046924, "percentage": 83.4, "elapsed_time": "2:51:08", "remaining_time": "0:34:03", "throughput": 19965.95, "total_tokens": 205026112} +{"current_steps": 65145, "total_steps": 78105, "loss": 0.1175, "lr": 4.0782387260622225e-07, "epoch": 4.170347608987901, "percentage": 83.41, "elapsed_time": "2:51:09", "remaining_time": "0:34:03", "throughput": 19966.14, "total_tokens": 205041408} +{"current_steps": 65150, "total_steps": 78105, "loss": 0.1286, "lr": 4.07518117994167e-07, "epoch": 4.170667690928878, "percentage": 83.41, "elapsed_time": "2:51:10", "remaining_time": "0:34:02", "throughput": 19966.36, "total_tokens": 205057408} +{"current_steps": 65155, "total_steps": 78105, "loss": 0.1448, "lr": 4.0721246786975673e-07, "epoch": 4.170987772869855, "percentage": 83.42, "elapsed_time": "2:51:10", "remaining_time": "0:34:01", "throughput": 19966.57, "total_tokens": 205073152} +{"current_steps": 65160, "total_steps": 78105, "loss": 0.1165, "lr": 4.069069222482555e-07, "epoch": 4.1713078548108316, "percentage": 83.43, "elapsed_time": "2:51:11", "remaining_time": "0:34:00", "throughput": 19966.77, "total_tokens": 205088384} +{"current_steps": 65165, "total_steps": 78105, "loss": 0.144, "lr": 4.066014811449198e-07, "epoch": 4.1716279367518085, "percentage": 83.43, "elapsed_time": "2:51:12", "remaining_time": "0:33:59", "throughput": 19967.02, "total_tokens": 205104832} +{"current_steps": 65170, "total_steps": 78105, "loss": 0.1505, "lr": 4.0629614457500214e-07, "epoch": 4.1719480186927855, "percentage": 83.44, "elapsed_time": "2:51:12", "remaining_time": "0:33:58", "throughput": 19967.23, "total_tokens": 205120576} +{"current_steps": 65175, "total_steps": 78105, "loss": 0.1021, "lr": 4.059909125537495e-07, "epoch": 4.1722681006337625, "percentage": 83.45, "elapsed_time": "2:51:13", "remaining_time": "0:33:58", "throughput": 19967.45, "total_tokens": 205136128} +{"current_steps": 65180, "total_steps": 78105, "loss": 0.1506, "lr": 4.0568578509640245e-07, "epoch": 4.1725881825747395, "percentage": 83.45, "elapsed_time": "2:51:14", "remaining_time": "0:33:57", "throughput": 19967.63, "total_tokens": 205150912} +{"current_steps": 65185, "total_steps": 78105, "loss": 0.1284, "lr": 4.053807622181999e-07, "epoch": 4.172908264515716, "percentage": 83.46, "elapsed_time": "2:51:14", "remaining_time": "0:33:56", "throughput": 19967.86, "total_tokens": 205167040} +{"current_steps": 65190, "total_steps": 78105, "loss": 0.1668, "lr": 4.0507584393437023e-07, "epoch": 4.173228346456693, "percentage": 83.46, "elapsed_time": "2:51:15", "remaining_time": "0:33:55", "throughput": 19968.14, "total_tokens": 205184320} +{"current_steps": 65195, "total_steps": 78105, "loss": 0.136, "lr": 4.047710302601415e-07, "epoch": 4.17354842839767, "percentage": 83.47, "elapsed_time": "2:51:16", "remaining_time": "0:33:54", "throughput": 19968.37, "total_tokens": 205200192} +{"current_steps": 65200, "total_steps": 78105, "loss": 0.1075, "lr": 4.0446632121073387e-07, "epoch": 4.173868510338647, "percentage": 83.48, "elapsed_time": "2:51:16", "remaining_time": "0:33:54", "throughput": 19968.59, "total_tokens": 205216128} +{"current_steps": 65205, "total_steps": 78105, "loss": 0.1893, "lr": 4.0416171680136285e-07, "epoch": 4.174188592279624, "percentage": 83.48, "elapsed_time": "2:51:17", "remaining_time": "0:33:53", "throughput": 19968.9, "total_tokens": 205234624} +{"current_steps": 65210, "total_steps": 78105, "loss": 0.117, "lr": 4.0385721704723906e-07, "epoch": 4.174508674220601, "percentage": 83.49, "elapsed_time": "2:51:18", "remaining_time": "0:33:52", "throughput": 19969.12, "total_tokens": 205249984} +{"current_steps": 65215, "total_steps": 78105, "loss": 0.1858, "lr": 4.035528219635673e-07, "epoch": 4.174828756161578, "percentage": 83.5, "elapsed_time": "2:51:19", "remaining_time": "0:33:51", "throughput": 19969.32, "total_tokens": 205265152} +{"current_steps": 65220, "total_steps": 78105, "loss": 0.1583, "lr": 4.032485315655471e-07, "epoch": 4.1751488381025545, "percentage": 83.5, "elapsed_time": "2:51:19", "remaining_time": "0:33:50", "throughput": 19969.61, "total_tokens": 205282368} +{"current_steps": 65225, "total_steps": 78105, "loss": 0.1332, "lr": 4.029443458683746e-07, "epoch": 4.1754689200435315, "percentage": 83.51, "elapsed_time": "2:51:20", "remaining_time": "0:33:50", "throughput": 19969.81, "total_tokens": 205297664} +{"current_steps": 65230, "total_steps": 78105, "loss": 0.0907, "lr": 4.026402648872374e-07, "epoch": 4.175789001984508, "percentage": 83.52, "elapsed_time": "2:51:21", "remaining_time": "0:33:49", "throughput": 19970.02, "total_tokens": 205312960} +{"current_steps": 65235, "total_steps": 78105, "loss": 0.1385, "lr": 4.023362886373211e-07, "epoch": 4.176109083925485, "percentage": 83.52, "elapsed_time": "2:51:21", "remaining_time": "0:33:48", "throughput": 19970.23, "total_tokens": 205328448} +{"current_steps": 65240, "total_steps": 78105, "loss": 0.1852, "lr": 4.0203241713380414e-07, "epoch": 4.176429165866462, "percentage": 83.53, "elapsed_time": "2:51:22", "remaining_time": "0:33:47", "throughput": 19970.53, "total_tokens": 205346112} +{"current_steps": 65245, "total_steps": 78105, "loss": 0.1236, "lr": 4.017286503918605e-07, "epoch": 4.176749247807439, "percentage": 83.53, "elapsed_time": "2:51:23", "remaining_time": "0:33:46", "throughput": 19970.74, "total_tokens": 205361664} +{"current_steps": 65250, "total_steps": 78105, "loss": 0.1978, "lr": 4.0142498842665853e-07, "epoch": 4.177069329748416, "percentage": 83.54, "elapsed_time": "2:51:23", "remaining_time": "0:33:46", "throughput": 19970.93, "total_tokens": 205377152} +{"current_steps": 65255, "total_steps": 78105, "loss": 0.1269, "lr": 4.0112143125336093e-07, "epoch": 4.177389411689393, "percentage": 83.55, "elapsed_time": "2:51:24", "remaining_time": "0:33:45", "throughput": 19971.13, "total_tokens": 205392320} +{"current_steps": 65260, "total_steps": 78105, "loss": 0.212, "lr": 4.0081797888712776e-07, "epoch": 4.17770949363037, "percentage": 83.55, "elapsed_time": "2:51:25", "remaining_time": "0:33:44", "throughput": 19971.3, "total_tokens": 205407296} +{"current_steps": 65265, "total_steps": 78105, "loss": 0.1213, "lr": 4.0051463134310906e-07, "epoch": 4.178029575571347, "percentage": 83.56, "elapsed_time": "2:51:25", "remaining_time": "0:33:43", "throughput": 19971.51, "total_tokens": 205422720} +{"current_steps": 65270, "total_steps": 78105, "loss": 0.1097, "lr": 4.00211388636455e-07, "epoch": 4.178349657512324, "percentage": 83.57, "elapsed_time": "2:51:26", "remaining_time": "0:33:42", "throughput": 19971.72, "total_tokens": 205438656} +{"current_steps": 65275, "total_steps": 78105, "loss": 0.1315, "lr": 3.999082507823057e-07, "epoch": 4.1786697394533, "percentage": 83.57, "elapsed_time": "2:51:27", "remaining_time": "0:33:41", "throughput": 19971.91, "total_tokens": 205453760} +{"current_steps": 65280, "total_steps": 78105, "loss": 0.1704, "lr": 3.9960521779580014e-07, "epoch": 4.178989821394277, "percentage": 83.58, "elapsed_time": "2:51:27", "remaining_time": "0:33:41", "throughput": 19972.12, "total_tokens": 205469056} +{"current_steps": 65285, "total_steps": 78105, "loss": 0.1219, "lr": 3.9930228969206925e-07, "epoch": 4.179309903335254, "percentage": 83.59, "elapsed_time": "2:51:28", "remaining_time": "0:33:40", "throughput": 19972.33, "total_tokens": 205484736} +{"current_steps": 65290, "total_steps": 78105, "loss": 0.1858, "lr": 3.9899946648623983e-07, "epoch": 4.179629985276231, "percentage": 83.59, "elapsed_time": "2:51:29", "remaining_time": "0:33:39", "throughput": 19972.5, "total_tokens": 205499520} +{"current_steps": 65295, "total_steps": 78105, "loss": 0.1224, "lr": 3.986967481934334e-07, "epoch": 4.179950067217208, "percentage": 83.6, "elapsed_time": "2:51:29", "remaining_time": "0:33:38", "throughput": 19972.68, "total_tokens": 205514304} +{"current_steps": 65300, "total_steps": 78105, "loss": 0.1172, "lr": 3.9839413482876565e-07, "epoch": 4.180270149158185, "percentage": 83.61, "elapsed_time": "2:51:30", "remaining_time": "0:33:37", "throughput": 19972.9, "total_tokens": 205529984} +{"current_steps": 65305, "total_steps": 78105, "loss": 0.1216, "lr": 3.9809162640734837e-07, "epoch": 4.180590231099162, "percentage": 83.61, "elapsed_time": "2:51:31", "remaining_time": "0:33:37", "throughput": 19973.11, "total_tokens": 205545664} +{"current_steps": 65310, "total_steps": 78105, "loss": 0.1885, "lr": 3.97789222944287e-07, "epoch": 4.180910313040139, "percentage": 83.62, "elapsed_time": "2:51:31", "remaining_time": "0:33:36", "throughput": 19973.32, "total_tokens": 205561152} +{"current_steps": 65315, "total_steps": 78105, "loss": 0.1337, "lr": 3.974869244546817e-07, "epoch": 4.181230394981116, "percentage": 83.62, "elapsed_time": "2:51:32", "remaining_time": "0:33:35", "throughput": 19973.52, "total_tokens": 205576512} +{"current_steps": 65320, "total_steps": 78105, "loss": 0.1512, "lr": 3.971847309536275e-07, "epoch": 4.181550476922092, "percentage": 83.63, "elapsed_time": "2:51:33", "remaining_time": "0:33:34", "throughput": 19973.7, "total_tokens": 205591104} +{"current_steps": 65325, "total_steps": 78105, "loss": 0.1086, "lr": 3.96882642456215e-07, "epoch": 4.181870558863069, "percentage": 83.64, "elapsed_time": "2:51:33", "remaining_time": "0:33:33", "throughput": 19973.9, "total_tokens": 205606464} +{"current_steps": 65330, "total_steps": 78105, "loss": 0.1306, "lr": 3.9658065897752867e-07, "epoch": 4.182190640804046, "percentage": 83.64, "elapsed_time": "2:51:34", "remaining_time": "0:33:33", "throughput": 19974.1, "total_tokens": 205621888} +{"current_steps": 65335, "total_steps": 78105, "loss": 0.1259, "lr": 3.962787805326479e-07, "epoch": 4.182510722745023, "percentage": 83.65, "elapsed_time": "2:51:35", "remaining_time": "0:33:32", "throughput": 19974.32, "total_tokens": 205637440} +{"current_steps": 65340, "total_steps": 78105, "loss": 0.1316, "lr": 3.959770071366462e-07, "epoch": 4.182830804686, "percentage": 83.66, "elapsed_time": "2:51:35", "remaining_time": "0:33:31", "throughput": 19974.55, "total_tokens": 205653440} +{"current_steps": 65345, "total_steps": 78105, "loss": 0.1136, "lr": 3.9567533880459446e-07, "epoch": 4.183150886626977, "percentage": 83.66, "elapsed_time": "2:51:36", "remaining_time": "0:33:30", "throughput": 19974.74, "total_tokens": 205668992} +{"current_steps": 65350, "total_steps": 78105, "loss": 0.1497, "lr": 3.953737755515541e-07, "epoch": 4.183470968567954, "percentage": 83.67, "elapsed_time": "2:51:37", "remaining_time": "0:33:29", "throughput": 19974.92, "total_tokens": 205683904} +{"current_steps": 65355, "total_steps": 78105, "loss": 0.1018, "lr": 3.950723173925855e-07, "epoch": 4.183791050508931, "percentage": 83.68, "elapsed_time": "2:51:37", "remaining_time": "0:33:28", "throughput": 19975.12, "total_tokens": 205699200} +{"current_steps": 65360, "total_steps": 78105, "loss": 0.098, "lr": 3.9477096434274107e-07, "epoch": 4.184111132449907, "percentage": 83.68, "elapsed_time": "2:51:38", "remaining_time": "0:33:28", "throughput": 19975.39, "total_tokens": 205716096} +{"current_steps": 65365, "total_steps": 78105, "loss": 0.1628, "lr": 3.944697164170691e-07, "epoch": 4.184431214390884, "percentage": 83.69, "elapsed_time": "2:51:39", "remaining_time": "0:33:27", "throughput": 19975.62, "total_tokens": 205732416} +{"current_steps": 65370, "total_steps": 78105, "loss": 0.1253, "lr": 3.941685736306122e-07, "epoch": 4.184751296331861, "percentage": 83.7, "elapsed_time": "2:51:39", "remaining_time": "0:33:26", "throughput": 19975.81, "total_tokens": 205747520} +{"current_steps": 65375, "total_steps": 78105, "loss": 0.1618, "lr": 3.93867535998407e-07, "epoch": 4.185071378272838, "percentage": 83.7, "elapsed_time": "2:51:40", "remaining_time": "0:33:25", "throughput": 19976.06, "total_tokens": 205764160} +{"current_steps": 65380, "total_steps": 78105, "loss": 0.1273, "lr": 3.935666035354882e-07, "epoch": 4.185391460213815, "percentage": 83.71, "elapsed_time": "2:51:41", "remaining_time": "0:33:24", "throughput": 19976.31, "total_tokens": 205780416} +{"current_steps": 65385, "total_steps": 78105, "loss": 0.1622, "lr": 3.932657762568798e-07, "epoch": 4.185711542154792, "percentage": 83.71, "elapsed_time": "2:51:41", "remaining_time": "0:33:24", "throughput": 19976.52, "total_tokens": 205796288} +{"current_steps": 65390, "total_steps": 78105, "loss": 0.1556, "lr": 3.9296505417760535e-07, "epoch": 4.186031624095769, "percentage": 83.72, "elapsed_time": "2:51:42", "remaining_time": "0:33:23", "throughput": 19976.75, "total_tokens": 205812672} +{"current_steps": 65395, "total_steps": 78105, "loss": 0.1712, "lr": 3.926644373126812e-07, "epoch": 4.186351706036746, "percentage": 83.73, "elapsed_time": "2:51:43", "remaining_time": "0:33:22", "throughput": 19977.03, "total_tokens": 205829952} +{"current_steps": 65400, "total_steps": 78105, "loss": 0.1052, "lr": 3.9236392567711835e-07, "epoch": 4.186671787977723, "percentage": 83.73, "elapsed_time": "2:51:44", "remaining_time": "0:33:21", "throughput": 19977.25, "total_tokens": 205845760} +{"current_steps": 65405, "total_steps": 78105, "loss": 0.1405, "lr": 3.9206351928592263e-07, "epoch": 4.186991869918699, "percentage": 83.74, "elapsed_time": "2:51:44", "remaining_time": "0:33:20", "throughput": 19977.5, "total_tokens": 205862784} +{"current_steps": 65410, "total_steps": 78105, "loss": 0.1094, "lr": 3.917632181540945e-07, "epoch": 4.187311951859676, "percentage": 83.75, "elapsed_time": "2:51:45", "remaining_time": "0:33:20", "throughput": 19977.72, "total_tokens": 205878976} +{"current_steps": 65415, "total_steps": 78105, "loss": 0.1794, "lr": 3.9146302229663093e-07, "epoch": 4.187632033800653, "percentage": 83.75, "elapsed_time": "2:51:46", "remaining_time": "0:33:19", "throughput": 19977.93, "total_tokens": 205894528} +{"current_steps": 65420, "total_steps": 78105, "loss": 0.1093, "lr": 3.911629317285198e-07, "epoch": 4.18795211574163, "percentage": 83.76, "elapsed_time": "2:51:46", "remaining_time": "0:33:18", "throughput": 19978.16, "total_tokens": 205910592} +{"current_steps": 65425, "total_steps": 78105, "loss": 0.1483, "lr": 3.9086294646474817e-07, "epoch": 4.188272197682607, "percentage": 83.77, "elapsed_time": "2:51:47", "remaining_time": "0:33:17", "throughput": 19978.38, "total_tokens": 205926656} +{"current_steps": 65430, "total_steps": 78105, "loss": 0.1685, "lr": 3.9056306652029473e-07, "epoch": 4.188592279623584, "percentage": 83.77, "elapsed_time": "2:51:48", "remaining_time": "0:33:16", "throughput": 19978.57, "total_tokens": 205941952} +{"current_steps": 65435, "total_steps": 78105, "loss": 0.1527, "lr": 3.90263291910134e-07, "epoch": 4.188912361564561, "percentage": 83.78, "elapsed_time": "2:51:48", "remaining_time": "0:33:16", "throughput": 19978.81, "total_tokens": 205958336} +{"current_steps": 65440, "total_steps": 78105, "loss": 0.1384, "lr": 3.899636226492354e-07, "epoch": 4.189232443505538, "percentage": 83.78, "elapsed_time": "2:51:49", "remaining_time": "0:33:15", "throughput": 19979.02, "total_tokens": 205973824} +{"current_steps": 65445, "total_steps": 78105, "loss": 0.111, "lr": 3.896640587525627e-07, "epoch": 4.189552525446515, "percentage": 83.79, "elapsed_time": "2:51:50", "remaining_time": "0:33:14", "throughput": 19979.21, "total_tokens": 205989056} +{"current_steps": 65450, "total_steps": 78105, "loss": 0.0883, "lr": 3.893646002350737e-07, "epoch": 4.189872607387491, "percentage": 83.8, "elapsed_time": "2:51:50", "remaining_time": "0:33:13", "throughput": 19979.48, "total_tokens": 206005824} +{"current_steps": 65455, "total_steps": 78105, "loss": 0.1226, "lr": 3.890652471117243e-07, "epoch": 4.190192689328468, "percentage": 83.8, "elapsed_time": "2:51:51", "remaining_time": "0:33:12", "throughput": 19979.68, "total_tokens": 206021376} +{"current_steps": 65460, "total_steps": 78105, "loss": 0.1542, "lr": 3.887659993974596e-07, "epoch": 4.190512771269445, "percentage": 83.81, "elapsed_time": "2:51:52", "remaining_time": "0:33:12", "throughput": 19979.86, "total_tokens": 206036416} +{"current_steps": 65465, "total_steps": 78105, "loss": 0.0965, "lr": 3.88466857107225e-07, "epoch": 4.190832853210422, "percentage": 83.82, "elapsed_time": "2:51:52", "remaining_time": "0:33:11", "throughput": 19980.19, "total_tokens": 206055040} +{"current_steps": 65470, "total_steps": 78105, "loss": 0.1405, "lr": 3.881678202559558e-07, "epoch": 4.191152935151399, "percentage": 83.82, "elapsed_time": "2:51:53", "remaining_time": "0:33:10", "throughput": 19980.42, "total_tokens": 206070848} +{"current_steps": 65475, "total_steps": 78105, "loss": 0.1526, "lr": 3.878688888585863e-07, "epoch": 4.191473017092376, "percentage": 83.83, "elapsed_time": "2:51:54", "remaining_time": "0:33:09", "throughput": 19980.61, "total_tokens": 206086272} +{"current_steps": 65480, "total_steps": 78105, "loss": 0.1044, "lr": 3.875700629300425e-07, "epoch": 4.191793099033353, "percentage": 83.84, "elapsed_time": "2:51:55", "remaining_time": "0:33:08", "throughput": 19980.89, "total_tokens": 206103744} +{"current_steps": 65485, "total_steps": 78105, "loss": 0.1167, "lr": 3.8727134248524617e-07, "epoch": 4.19211318097433, "percentage": 83.84, "elapsed_time": "2:51:55", "remaining_time": "0:33:08", "throughput": 19981.17, "total_tokens": 206121152} +{"current_steps": 65490, "total_steps": 78105, "loss": 0.1162, "lr": 3.8697272753911526e-07, "epoch": 4.192433262915307, "percentage": 83.85, "elapsed_time": "2:51:56", "remaining_time": "0:33:07", "throughput": 19981.4, "total_tokens": 206137472} +{"current_steps": 65495, "total_steps": 78105, "loss": 0.1418, "lr": 3.8667421810655887e-07, "epoch": 4.192753344856283, "percentage": 83.86, "elapsed_time": "2:51:57", "remaining_time": "0:33:06", "throughput": 19981.62, "total_tokens": 206153280} +{"current_steps": 65500, "total_steps": 78105, "loss": 0.1267, "lr": 3.863758142024854e-07, "epoch": 4.19307342679726, "percentage": 83.86, "elapsed_time": "2:51:57", "remaining_time": "0:33:05", "throughput": 19981.88, "total_tokens": 206169984} +{"current_steps": 65505, "total_steps": 78105, "loss": 0.1162, "lr": 3.860775158417929e-07, "epoch": 4.193393508738237, "percentage": 83.87, "elapsed_time": "2:51:58", "remaining_time": "0:33:04", "throughput": 19982.1, "total_tokens": 206185472} +{"current_steps": 65510, "total_steps": 78105, "loss": 0.1482, "lr": 3.8577932303937886e-07, "epoch": 4.193713590679214, "percentage": 83.87, "elapsed_time": "2:51:59", "remaining_time": "0:33:03", "throughput": 19982.3, "total_tokens": 206200704} +{"current_steps": 65515, "total_steps": 78105, "loss": 0.1442, "lr": 3.8548123581013287e-07, "epoch": 4.194033672620191, "percentage": 83.88, "elapsed_time": "2:51:59", "remaining_time": "0:33:03", "throughput": 19982.49, "total_tokens": 206215680} +{"current_steps": 65520, "total_steps": 78105, "loss": 0.1025, "lr": 3.851832541689399e-07, "epoch": 4.194353754561168, "percentage": 83.89, "elapsed_time": "2:52:00", "remaining_time": "0:33:02", "throughput": 19982.69, "total_tokens": 206230976} +{"current_steps": 65525, "total_steps": 78105, "loss": 0.1415, "lr": 3.848853781306794e-07, "epoch": 4.194673836502145, "percentage": 83.89, "elapsed_time": "2:52:01", "remaining_time": "0:33:01", "throughput": 19982.89, "total_tokens": 206246720} +{"current_steps": 65530, "total_steps": 78105, "loss": 0.1326, "lr": 3.845876077102256e-07, "epoch": 4.194993918443122, "percentage": 83.9, "elapsed_time": "2:52:01", "remaining_time": "0:33:00", "throughput": 19983.16, "total_tokens": 206263808} +{"current_steps": 65535, "total_steps": 78105, "loss": 0.1183, "lr": 3.842899429224481e-07, "epoch": 4.195314000384099, "percentage": 83.91, "elapsed_time": "2:52:02", "remaining_time": "0:32:59", "throughput": 19983.4, "total_tokens": 206280256} +{"current_steps": 65540, "total_steps": 78105, "loss": 0.1151, "lr": 3.8399238378221073e-07, "epoch": 4.195634082325075, "percentage": 83.91, "elapsed_time": "2:52:03", "remaining_time": "0:32:59", "throughput": 19983.61, "total_tokens": 206295744} +{"current_steps": 65545, "total_steps": 78105, "loss": 0.0828, "lr": 3.8369493030437187e-07, "epoch": 4.195954164266052, "percentage": 83.92, "elapsed_time": "2:52:03", "remaining_time": "0:32:58", "throughput": 19983.89, "total_tokens": 206313664} +{"current_steps": 65550, "total_steps": 78105, "loss": 0.1232, "lr": 3.833975825037847e-07, "epoch": 4.196274246207029, "percentage": 83.93, "elapsed_time": "2:52:04", "remaining_time": "0:32:57", "throughput": 19984.08, "total_tokens": 206329216} +{"current_steps": 65555, "total_steps": 78105, "loss": 0.1237, "lr": 3.8310034039529736e-07, "epoch": 4.196594328148006, "percentage": 83.93, "elapsed_time": "2:52:05", "remaining_time": "0:32:56", "throughput": 19984.28, "total_tokens": 206344576} +{"current_steps": 65560, "total_steps": 78105, "loss": 0.0961, "lr": 3.828032039937524e-07, "epoch": 4.196914410088983, "percentage": 83.94, "elapsed_time": "2:52:06", "remaining_time": "0:32:55", "throughput": 19984.49, "total_tokens": 206360192} +{"current_steps": 65565, "total_steps": 78105, "loss": 0.1549, "lr": 3.825061733139873e-07, "epoch": 4.19723449202996, "percentage": 83.94, "elapsed_time": "2:52:06", "remaining_time": "0:32:55", "throughput": 19984.7, "total_tokens": 206375808} +{"current_steps": 65570, "total_steps": 78105, "loss": 0.1523, "lr": 3.822092483708337e-07, "epoch": 4.197554573970937, "percentage": 83.95, "elapsed_time": "2:52:07", "remaining_time": "0:32:54", "throughput": 19984.88, "total_tokens": 206390720} +{"current_steps": 65575, "total_steps": 78105, "loss": 0.13, "lr": 3.819124291791204e-07, "epoch": 4.197874655911914, "percentage": 83.96, "elapsed_time": "2:52:08", "remaining_time": "0:32:53", "throughput": 19985.18, "total_tokens": 206408320} +{"current_steps": 65580, "total_steps": 78105, "loss": 0.1249, "lr": 3.8161571575366654e-07, "epoch": 4.198194737852891, "percentage": 83.96, "elapsed_time": "2:52:08", "remaining_time": "0:32:52", "throughput": 19985.42, "total_tokens": 206424000} +{"current_steps": 65585, "total_steps": 78105, "loss": 0.1413, "lr": 3.813191081092898e-07, "epoch": 4.198514819793867, "percentage": 83.97, "elapsed_time": "2:52:09", "remaining_time": "0:32:51", "throughput": 19985.63, "total_tokens": 206439936} +{"current_steps": 65590, "total_steps": 78105, "loss": 0.1154, "lr": 3.810226062608013e-07, "epoch": 4.198834901734844, "percentage": 83.98, "elapsed_time": "2:52:10", "remaining_time": "0:32:51", "throughput": 19985.86, "total_tokens": 206455872} +{"current_steps": 65595, "total_steps": 78105, "loss": 0.1157, "lr": 3.8072621022300584e-07, "epoch": 4.199154983675821, "percentage": 83.98, "elapsed_time": "2:52:10", "remaining_time": "0:32:50", "throughput": 19986.03, "total_tokens": 206470656} +{"current_steps": 65600, "total_steps": 78105, "loss": 0.1487, "lr": 3.804299200107056e-07, "epoch": 4.199475065616798, "percentage": 83.99, "elapsed_time": "2:52:11", "remaining_time": "0:32:49", "throughput": 19986.26, "total_tokens": 206486656} +{"current_steps": 65605, "total_steps": 78105, "loss": 0.1272, "lr": 3.801337356386936e-07, "epoch": 4.199795147557775, "percentage": 84.0, "elapsed_time": "2:52:12", "remaining_time": "0:32:48", "throughput": 19986.48, "total_tokens": 206502400} +{"current_steps": 65610, "total_steps": 78105, "loss": 0.1387, "lr": 3.798376571217621e-07, "epoch": 4.200115229498752, "percentage": 84.0, "elapsed_time": "2:52:12", "remaining_time": "0:32:47", "throughput": 19986.74, "total_tokens": 206519232} +{"current_steps": 65615, "total_steps": 78105, "loss": 0.0857, "lr": 3.795416844746935e-07, "epoch": 4.200435311439729, "percentage": 84.01, "elapsed_time": "2:52:13", "remaining_time": "0:32:47", "throughput": 19986.92, "total_tokens": 206534016} +{"current_steps": 65620, "total_steps": 78105, "loss": 0.093, "lr": 3.792458177122685e-07, "epoch": 4.200755393380706, "percentage": 84.02, "elapsed_time": "2:52:14", "remaining_time": "0:32:46", "throughput": 19987.08, "total_tokens": 206548352} +{"current_steps": 65625, "total_steps": 78105, "loss": 0.1267, "lr": 3.7895005684926073e-07, "epoch": 4.201075475321682, "percentage": 84.02, "elapsed_time": "2:52:14", "remaining_time": "0:32:45", "throughput": 19987.35, "total_tokens": 206565312} +{"current_steps": 65630, "total_steps": 78105, "loss": 0.162, "lr": 3.78654401900439e-07, "epoch": 4.201395557262659, "percentage": 84.03, "elapsed_time": "2:52:15", "remaining_time": "0:32:44", "throughput": 19987.52, "total_tokens": 206579584} +{"current_steps": 65635, "total_steps": 78105, "loss": 0.1325, "lr": 3.7835885288056653e-07, "epoch": 4.201715639203636, "percentage": 84.03, "elapsed_time": "2:52:16", "remaining_time": "0:32:43", "throughput": 19987.68, "total_tokens": 206594112} +{"current_steps": 65640, "total_steps": 78105, "loss": 0.107, "lr": 3.78063409804402e-07, "epoch": 4.202035721144613, "percentage": 84.04, "elapsed_time": "2:52:16", "remaining_time": "0:32:42", "throughput": 19987.88, "total_tokens": 206609728} +{"current_steps": 65645, "total_steps": 78105, "loss": 0.1051, "lr": 3.7776807268669805e-07, "epoch": 4.20235580308559, "percentage": 84.05, "elapsed_time": "2:52:17", "remaining_time": "0:32:42", "throughput": 19988.08, "total_tokens": 206624896} +{"current_steps": 65650, "total_steps": 78105, "loss": 0.1849, "lr": 3.774728415422016e-07, "epoch": 4.202675885026567, "percentage": 84.05, "elapsed_time": "2:52:18", "remaining_time": "0:32:41", "throughput": 19988.29, "total_tokens": 206640064} +{"current_steps": 65655, "total_steps": 78105, "loss": 0.1867, "lr": 3.77177716385656e-07, "epoch": 4.202995966967544, "percentage": 84.06, "elapsed_time": "2:52:18", "remaining_time": "0:32:40", "throughput": 19988.47, "total_tokens": 206655296} +{"current_steps": 65660, "total_steps": 78105, "loss": 0.12, "lr": 3.768826972317982e-07, "epoch": 4.203316048908521, "percentage": 84.07, "elapsed_time": "2:52:19", "remaining_time": "0:32:39", "throughput": 19988.73, "total_tokens": 206672192} +{"current_steps": 65665, "total_steps": 78105, "loss": 0.1247, "lr": 3.765877840953594e-07, "epoch": 4.203636130849498, "percentage": 84.07, "elapsed_time": "2:52:20", "remaining_time": "0:32:38", "throughput": 19988.99, "total_tokens": 206688832} +{"current_steps": 65670, "total_steps": 78105, "loss": 0.1091, "lr": 3.762929769910662e-07, "epoch": 4.203956212790474, "percentage": 84.08, "elapsed_time": "2:52:20", "remaining_time": "0:32:38", "throughput": 19989.36, "total_tokens": 206708096} +{"current_steps": 65675, "total_steps": 78105, "loss": 0.1137, "lr": 3.7599827593363976e-07, "epoch": 4.204276294731451, "percentage": 84.09, "elapsed_time": "2:52:21", "remaining_time": "0:32:37", "throughput": 19989.57, "total_tokens": 206724160} +{"current_steps": 65680, "total_steps": 78105, "loss": 0.1201, "lr": 3.7570368093779535e-07, "epoch": 4.204596376672428, "percentage": 84.09, "elapsed_time": "2:52:22", "remaining_time": "0:32:36", "throughput": 19989.78, "total_tokens": 206739648} +{"current_steps": 65685, "total_steps": 78105, "loss": 0.1037, "lr": 3.754091920182451e-07, "epoch": 4.204916458613405, "percentage": 84.1, "elapsed_time": "2:52:22", "remaining_time": "0:32:35", "throughput": 19990.03, "total_tokens": 206756480} +{"current_steps": 65690, "total_steps": 78105, "loss": 0.1038, "lr": 3.7511480918969224e-07, "epoch": 4.205236540554382, "percentage": 84.1, "elapsed_time": "2:52:23", "remaining_time": "0:32:34", "throughput": 19990.23, "total_tokens": 206772096} +{"current_steps": 65695, "total_steps": 78105, "loss": 0.1128, "lr": 3.748205324668386e-07, "epoch": 4.205556622495359, "percentage": 84.11, "elapsed_time": "2:52:24", "remaining_time": "0:32:34", "throughput": 19990.43, "total_tokens": 206787520} +{"current_steps": 65700, "total_steps": 78105, "loss": 0.1101, "lr": 3.7452636186437683e-07, "epoch": 4.205876704436336, "percentage": 84.12, "elapsed_time": "2:52:24", "remaining_time": "0:32:33", "throughput": 19990.59, "total_tokens": 206801856} +{"current_steps": 65705, "total_steps": 78105, "loss": 0.1327, "lr": 3.7423229739699805e-07, "epoch": 4.206196786377313, "percentage": 84.12, "elapsed_time": "2:52:25", "remaining_time": "0:32:32", "throughput": 19990.82, "total_tokens": 206817856} +{"current_steps": 65710, "total_steps": 78105, "loss": 0.0846, "lr": 3.7393833907938576e-07, "epoch": 4.20651686831829, "percentage": 84.13, "elapsed_time": "2:52:26", "remaining_time": "0:32:31", "throughput": 19991.06, "total_tokens": 206834432} +{"current_steps": 65715, "total_steps": 78105, "loss": 0.1401, "lr": 3.7364448692621763e-07, "epoch": 4.206836950259266, "percentage": 84.14, "elapsed_time": "2:52:27", "remaining_time": "0:32:30", "throughput": 19991.27, "total_tokens": 206850048} +{"current_steps": 65720, "total_steps": 78105, "loss": 0.096, "lr": 3.7335074095216946e-07, "epoch": 4.207157032200243, "percentage": 84.14, "elapsed_time": "2:52:27", "remaining_time": "0:32:30", "throughput": 19991.53, "total_tokens": 206866560} +{"current_steps": 65725, "total_steps": 78105, "loss": 0.1242, "lr": 3.730571011719067e-07, "epoch": 4.20747711414122, "percentage": 84.15, "elapsed_time": "2:52:28", "remaining_time": "0:32:29", "throughput": 19991.76, "total_tokens": 206882880} +{"current_steps": 65730, "total_steps": 78105, "loss": 0.1136, "lr": 3.7276356760009455e-07, "epoch": 4.207797196082197, "percentage": 84.16, "elapsed_time": "2:52:29", "remaining_time": "0:32:28", "throughput": 19991.95, "total_tokens": 206897920} +{"current_steps": 65735, "total_steps": 78105, "loss": 0.091, "lr": 3.724701402513886e-07, "epoch": 4.208117278023174, "percentage": 84.16, "elapsed_time": "2:52:29", "remaining_time": "0:32:27", "throughput": 19992.15, "total_tokens": 206913408} +{"current_steps": 65740, "total_steps": 78105, "loss": 0.143, "lr": 3.7217681914044234e-07, "epoch": 4.208437359964151, "percentage": 84.17, "elapsed_time": "2:52:30", "remaining_time": "0:32:26", "throughput": 19992.41, "total_tokens": 206930432} +{"current_steps": 65745, "total_steps": 78105, "loss": 0.1638, "lr": 3.718836042819021e-07, "epoch": 4.208757441905128, "percentage": 84.18, "elapsed_time": "2:52:31", "remaining_time": "0:32:26", "throughput": 19992.69, "total_tokens": 206947456} +{"current_steps": 65750, "total_steps": 78105, "loss": 0.1498, "lr": 3.715904956904101e-07, "epoch": 4.209077523846105, "percentage": 84.18, "elapsed_time": "2:52:31", "remaining_time": "0:32:25", "throughput": 19992.89, "total_tokens": 206963008} +{"current_steps": 65755, "total_steps": 78105, "loss": 0.1051, "lr": 3.7129749338060214e-07, "epoch": 4.209397605787082, "percentage": 84.19, "elapsed_time": "2:52:32", "remaining_time": "0:32:24", "throughput": 19993.13, "total_tokens": 206979008} +{"current_steps": 65760, "total_steps": 78105, "loss": 0.1227, "lr": 3.7100459736710866e-07, "epoch": 4.209717687728058, "percentage": 84.19, "elapsed_time": "2:52:33", "remaining_time": "0:32:23", "throughput": 19993.35, "total_tokens": 206995072} +{"current_steps": 65765, "total_steps": 78105, "loss": 0.1134, "lr": 3.707118076645566e-07, "epoch": 4.210037769669035, "percentage": 84.2, "elapsed_time": "2:52:33", "remaining_time": "0:32:22", "throughput": 19993.55, "total_tokens": 207010624} +{"current_steps": 65770, "total_steps": 78105, "loss": 0.1118, "lr": 3.70419124287566e-07, "epoch": 4.210357851610012, "percentage": 84.21, "elapsed_time": "2:52:34", "remaining_time": "0:32:21", "throughput": 19993.78, "total_tokens": 207026688} +{"current_steps": 65775, "total_steps": 78105, "loss": 0.1667, "lr": 3.7012654725075147e-07, "epoch": 4.210677933550989, "percentage": 84.21, "elapsed_time": "2:52:35", "remaining_time": "0:32:21", "throughput": 19994.03, "total_tokens": 207043200} +{"current_steps": 65780, "total_steps": 78105, "loss": 0.1023, "lr": 3.698340765687233e-07, "epoch": 4.210998015491966, "percentage": 84.22, "elapsed_time": "2:52:35", "remaining_time": "0:32:20", "throughput": 19994.27, "total_tokens": 207059328} +{"current_steps": 65785, "total_steps": 78105, "loss": 0.085, "lr": 3.695417122560857e-07, "epoch": 4.211318097432943, "percentage": 84.23, "elapsed_time": "2:52:36", "remaining_time": "0:32:19", "throughput": 19994.45, "total_tokens": 207074560} +{"current_steps": 65790, "total_steps": 78105, "loss": 0.1445, "lr": 3.6924945432743777e-07, "epoch": 4.21163817937392, "percentage": 84.23, "elapsed_time": "2:52:37", "remaining_time": "0:32:18", "throughput": 19994.63, "total_tokens": 207089408} +{"current_steps": 65795, "total_steps": 78105, "loss": 0.1427, "lr": 3.6895730279737336e-07, "epoch": 4.211958261314897, "percentage": 84.24, "elapsed_time": "2:52:38", "remaining_time": "0:32:18", "throughput": 19994.02, "total_tokens": 207105728} +{"current_steps": 65800, "total_steps": 78105, "loss": 0.1415, "lr": 3.6866525768048057e-07, "epoch": 4.212278343255874, "percentage": 84.25, "elapsed_time": "2:52:39", "remaining_time": "0:32:17", "throughput": 19993.58, "total_tokens": 207121408} +{"current_steps": 65805, "total_steps": 78105, "loss": 0.1295, "lr": 3.6837331899134436e-07, "epoch": 4.21259842519685, "percentage": 84.25, "elapsed_time": "2:52:40", "remaining_time": "0:32:16", "throughput": 19993.84, "total_tokens": 207138560} +{"current_steps": 65810, "total_steps": 78105, "loss": 0.1691, "lr": 3.680814867445401e-07, "epoch": 4.212918507137827, "percentage": 84.26, "elapsed_time": "2:52:40", "remaining_time": "0:32:15", "throughput": 19994.04, "total_tokens": 207154048} +{"current_steps": 65815, "total_steps": 78105, "loss": 0.1338, "lr": 3.6778976095464215e-07, "epoch": 4.213238589078804, "percentage": 84.26, "elapsed_time": "2:52:41", "remaining_time": "0:32:14", "throughput": 19994.21, "total_tokens": 207169088} +{"current_steps": 65820, "total_steps": 78105, "loss": 0.1106, "lr": 3.674981416362172e-07, "epoch": 4.213558671019781, "percentage": 84.27, "elapsed_time": "2:52:42", "remaining_time": "0:32:14", "throughput": 19994.41, "total_tokens": 207184384} +{"current_steps": 65825, "total_steps": 78105, "loss": 0.1202, "lr": 3.6720662880382703e-07, "epoch": 4.213878752960758, "percentage": 84.28, "elapsed_time": "2:52:42", "remaining_time": "0:32:13", "throughput": 19994.6, "total_tokens": 207199552} +{"current_steps": 65830, "total_steps": 78105, "loss": 0.1289, "lr": 3.669152224720293e-07, "epoch": 4.214198834901735, "percentage": 84.28, "elapsed_time": "2:52:43", "remaining_time": "0:32:12", "throughput": 19994.8, "total_tokens": 207215232} +{"current_steps": 65835, "total_steps": 78105, "loss": 0.1326, "lr": 3.6662392265537334e-07, "epoch": 4.214518916842712, "percentage": 84.29, "elapsed_time": "2:52:44", "remaining_time": "0:32:11", "throughput": 19994.99, "total_tokens": 207230464} +{"current_steps": 65840, "total_steps": 78105, "loss": 0.1068, "lr": 3.663327293684077e-07, "epoch": 4.214838998783689, "percentage": 84.3, "elapsed_time": "2:52:44", "remaining_time": "0:32:10", "throughput": 19995.19, "total_tokens": 207246208} +{"current_steps": 65845, "total_steps": 78105, "loss": 0.1396, "lr": 3.660416426256705e-07, "epoch": 4.215159080724666, "percentage": 84.3, "elapsed_time": "2:52:45", "remaining_time": "0:32:09", "throughput": 19995.4, "total_tokens": 207261824} +{"current_steps": 65850, "total_steps": 78105, "loss": 0.1417, "lr": 3.657506624416987e-07, "epoch": 4.215479162665642, "percentage": 84.31, "elapsed_time": "2:52:46", "remaining_time": "0:32:09", "throughput": 19995.62, "total_tokens": 207277952} +{"current_steps": 65855, "total_steps": 78105, "loss": 0.1551, "lr": 3.6545978883102214e-07, "epoch": 4.215799244606619, "percentage": 84.32, "elapsed_time": "2:52:46", "remaining_time": "0:32:08", "throughput": 19995.86, "total_tokens": 207294464} +{"current_steps": 65860, "total_steps": 78105, "loss": 0.126, "lr": 3.6516902180816523e-07, "epoch": 4.216119326547596, "percentage": 84.32, "elapsed_time": "2:52:47", "remaining_time": "0:32:07", "throughput": 19996.06, "total_tokens": 207309760} +{"current_steps": 65865, "total_steps": 78105, "loss": 0.1593, "lr": 3.648783613876475e-07, "epoch": 4.216439408488573, "percentage": 84.33, "elapsed_time": "2:52:48", "remaining_time": "0:32:06", "throughput": 19996.24, "total_tokens": 207324928} +{"current_steps": 65870, "total_steps": 78105, "loss": 0.1487, "lr": 3.645878075839829e-07, "epoch": 4.21675949042955, "percentage": 84.34, "elapsed_time": "2:52:48", "remaining_time": "0:32:05", "throughput": 19996.49, "total_tokens": 207341312} +{"current_steps": 65875, "total_steps": 78105, "loss": 0.1265, "lr": 3.642973604116801e-07, "epoch": 4.217079572370527, "percentage": 84.34, "elapsed_time": "2:52:49", "remaining_time": "0:32:05", "throughput": 19996.74, "total_tokens": 207357824} +{"current_steps": 65880, "total_steps": 78105, "loss": 0.1482, "lr": 3.6400701988524215e-07, "epoch": 4.217399654311504, "percentage": 84.35, "elapsed_time": "2:52:50", "remaining_time": "0:32:04", "throughput": 19997.06, "total_tokens": 207376448} +{"current_steps": 65885, "total_steps": 78105, "loss": 0.1313, "lr": 3.6371678601916843e-07, "epoch": 4.217719736252481, "percentage": 84.35, "elapsed_time": "2:52:51", "remaining_time": "0:32:03", "throughput": 19997.32, "total_tokens": 207393408} +{"current_steps": 65890, "total_steps": 78105, "loss": 0.1284, "lr": 3.634266588279506e-07, "epoch": 4.218039818193457, "percentage": 84.36, "elapsed_time": "2:52:51", "remaining_time": "0:32:02", "throughput": 19997.54, "total_tokens": 207409280} +{"current_steps": 65895, "total_steps": 78105, "loss": 0.1631, "lr": 3.6313663832607676e-07, "epoch": 4.218359900134434, "percentage": 84.37, "elapsed_time": "2:52:52", "remaining_time": "0:32:01", "throughput": 19997.71, "total_tokens": 207424192} +{"current_steps": 65900, "total_steps": 78105, "loss": 0.107, "lr": 3.6284672452802845e-07, "epoch": 4.218679982075411, "percentage": 84.37, "elapsed_time": "2:52:53", "remaining_time": "0:32:01", "throughput": 19997.94, "total_tokens": 207440512} +{"current_steps": 65905, "total_steps": 78105, "loss": 0.1197, "lr": 3.6255691744828263e-07, "epoch": 4.219000064016388, "percentage": 84.38, "elapsed_time": "2:52:53", "remaining_time": "0:32:00", "throughput": 19998.13, "total_tokens": 207455872} +{"current_steps": 65910, "total_steps": 78105, "loss": 0.093, "lr": 3.622672171013106e-07, "epoch": 4.219320145957365, "percentage": 84.39, "elapsed_time": "2:52:54", "remaining_time": "0:31:59", "throughput": 19998.41, "total_tokens": 207473152} +{"current_steps": 65915, "total_steps": 78105, "loss": 0.1283, "lr": 3.619776235015796e-07, "epoch": 4.219640227898342, "percentage": 84.39, "elapsed_time": "2:52:55", "remaining_time": "0:31:58", "throughput": 19998.6, "total_tokens": 207488256} +{"current_steps": 65920, "total_steps": 78105, "loss": 0.1359, "lr": 3.616881366635483e-07, "epoch": 4.219960309839319, "percentage": 84.4, "elapsed_time": "2:52:55", "remaining_time": "0:31:57", "throughput": 19998.76, "total_tokens": 207502912} +{"current_steps": 65925, "total_steps": 78105, "loss": 0.1323, "lr": 3.613987566016747e-07, "epoch": 4.220280391780296, "percentage": 84.41, "elapsed_time": "2:52:56", "remaining_time": "0:31:57", "throughput": 19998.96, "total_tokens": 207518400} +{"current_steps": 65930, "total_steps": 78105, "loss": 0.1187, "lr": 3.611094833304063e-07, "epoch": 4.220600473721273, "percentage": 84.41, "elapsed_time": "2:52:57", "remaining_time": "0:31:56", "throughput": 19999.15, "total_tokens": 207533376} +{"current_steps": 65935, "total_steps": 78105, "loss": 0.1428, "lr": 3.608203168641899e-07, "epoch": 4.220920555662249, "percentage": 84.42, "elapsed_time": "2:52:57", "remaining_time": "0:31:55", "throughput": 19999.36, "total_tokens": 207549184} +{"current_steps": 65940, "total_steps": 78105, "loss": 0.1186, "lr": 3.6053125721746435e-07, "epoch": 4.221240637603226, "percentage": 84.42, "elapsed_time": "2:52:58", "remaining_time": "0:31:54", "throughput": 19999.57, "total_tokens": 207565440} +{"current_steps": 65945, "total_steps": 78105, "loss": 0.1489, "lr": 3.602423044046632e-07, "epoch": 4.221560719544203, "percentage": 84.43, "elapsed_time": "2:52:59", "remaining_time": "0:31:53", "throughput": 19999.8, "total_tokens": 207581568} +{"current_steps": 65950, "total_steps": 78105, "loss": 0.1264, "lr": 3.599534584402167e-07, "epoch": 4.22188080148518, "percentage": 84.44, "elapsed_time": "2:52:59", "remaining_time": "0:31:53", "throughput": 19999.99, "total_tokens": 207596992} +{"current_steps": 65955, "total_steps": 78105, "loss": 0.0979, "lr": 3.596647193385466e-07, "epoch": 4.222200883426157, "percentage": 84.44, "elapsed_time": "2:53:00", "remaining_time": "0:31:52", "throughput": 20000.18, "total_tokens": 207612352} +{"current_steps": 65960, "total_steps": 78105, "loss": 0.1098, "lr": 3.5937608711407223e-07, "epoch": 4.222520965367134, "percentage": 84.45, "elapsed_time": "2:53:01", "remaining_time": "0:31:51", "throughput": 20000.37, "total_tokens": 207627840} +{"current_steps": 65965, "total_steps": 78105, "loss": 0.1227, "lr": 3.590875617812059e-07, "epoch": 4.222841047308111, "percentage": 84.46, "elapsed_time": "2:53:01", "remaining_time": "0:31:50", "throughput": 20000.52, "total_tokens": 207642368} +{"current_steps": 65970, "total_steps": 78105, "loss": 0.0972, "lr": 3.58799143354355e-07, "epoch": 4.223161129249088, "percentage": 84.46, "elapsed_time": "2:53:02", "remaining_time": "0:31:49", "throughput": 20000.8, "total_tokens": 207659776} +{"current_steps": 65975, "total_steps": 78105, "loss": 0.1464, "lr": 3.585108318479219e-07, "epoch": 4.223481211190065, "percentage": 84.47, "elapsed_time": "2:53:03", "remaining_time": "0:31:49", "throughput": 20000.95, "total_tokens": 207674432} +{"current_steps": 65980, "total_steps": 78105, "loss": 0.1076, "lr": 3.582226272763034e-07, "epoch": 4.223801293131041, "percentage": 84.48, "elapsed_time": "2:53:03", "remaining_time": "0:31:48", "throughput": 20001.13, "total_tokens": 207689792} +{"current_steps": 65985, "total_steps": 78105, "loss": 0.1273, "lr": 3.5793452965389053e-07, "epoch": 4.224121375072018, "percentage": 84.48, "elapsed_time": "2:53:04", "remaining_time": "0:31:47", "throughput": 20001.41, "total_tokens": 207707200} +{"current_steps": 65990, "total_steps": 78105, "loss": 0.1367, "lr": 3.576465389950698e-07, "epoch": 4.224441457012995, "percentage": 84.49, "elapsed_time": "2:53:05", "remaining_time": "0:31:46", "throughput": 20001.56, "total_tokens": 207721408} +{"current_steps": 65995, "total_steps": 78105, "loss": 0.1176, "lr": 3.5735865531422107e-07, "epoch": 4.224761538953972, "percentage": 84.5, "elapsed_time": "2:53:06", "remaining_time": "0:31:45", "throughput": 20001.88, "total_tokens": 207739584} +{"current_steps": 66000, "total_steps": 78105, "loss": 0.1428, "lr": 3.5707087862572125e-07, "epoch": 4.225081620894949, "percentage": 84.5, "elapsed_time": "2:53:06", "remaining_time": "0:31:45", "throughput": 20002.09, "total_tokens": 207754944} +{"current_steps": 66005, "total_steps": 78105, "loss": 0.1425, "lr": 3.5678320894393964e-07, "epoch": 4.225401702835926, "percentage": 84.51, "elapsed_time": "2:53:09", "remaining_time": "0:31:44", "throughput": 19998.26, "total_tokens": 207771968} +{"current_steps": 66010, "total_steps": 78105, "loss": 0.1372, "lr": 3.5649564628324086e-07, "epoch": 4.225721784776903, "percentage": 84.51, "elapsed_time": "2:53:10", "remaining_time": "0:31:43", "throughput": 19998.58, "total_tokens": 207790720} +{"current_steps": 66015, "total_steps": 78105, "loss": 0.1162, "lr": 3.562081906579845e-07, "epoch": 4.22604186671788, "percentage": 84.52, "elapsed_time": "2:53:10", "remaining_time": "0:31:42", "throughput": 19998.78, "total_tokens": 207806144} +{"current_steps": 66020, "total_steps": 78105, "loss": 0.1124, "lr": 3.559208420825244e-07, "epoch": 4.226361948658857, "percentage": 84.53, "elapsed_time": "2:53:11", "remaining_time": "0:31:42", "throughput": 19998.98, "total_tokens": 207821504} +{"current_steps": 66025, "total_steps": 78105, "loss": 0.0644, "lr": 3.5563360057120937e-07, "epoch": 4.226682030599833, "percentage": 84.53, "elapsed_time": "2:53:12", "remaining_time": "0:31:41", "throughput": 19999.2, "total_tokens": 207837568} +{"current_steps": 66030, "total_steps": 78105, "loss": 0.1241, "lr": 3.553464661383821e-07, "epoch": 4.22700211254081, "percentage": 84.54, "elapsed_time": "2:53:12", "remaining_time": "0:31:40", "throughput": 19999.38, "total_tokens": 207852352} +{"current_steps": 66035, "total_steps": 78105, "loss": 0.1247, "lr": 3.550594387983825e-07, "epoch": 4.227322194481787, "percentage": 84.55, "elapsed_time": "2:53:13", "remaining_time": "0:31:39", "throughput": 19999.57, "total_tokens": 207867200} +{"current_steps": 66040, "total_steps": 78105, "loss": 0.1117, "lr": 3.547725185655407e-07, "epoch": 4.227642276422764, "percentage": 84.55, "elapsed_time": "2:53:15", "remaining_time": "0:31:39", "throughput": 19997.65, "total_tokens": 207883008} +{"current_steps": 66045, "total_steps": 78105, "loss": 0.143, "lr": 3.5448570545418587e-07, "epoch": 4.227962358363741, "percentage": 84.56, "elapsed_time": "2:53:16", "remaining_time": "0:31:38", "throughput": 19997.85, "total_tokens": 207898560} +{"current_steps": 66050, "total_steps": 78105, "loss": 0.1124, "lr": 3.5419899947863905e-07, "epoch": 4.228282440304718, "percentage": 84.57, "elapsed_time": "2:53:16", "remaining_time": "0:31:37", "throughput": 19998.09, "total_tokens": 207915200} +{"current_steps": 66055, "total_steps": 78105, "loss": 0.1827, "lr": 3.539124006532169e-07, "epoch": 4.228602522245695, "percentage": 84.57, "elapsed_time": "2:53:19", "remaining_time": "0:31:37", "throughput": 19995.4, "total_tokens": 207932224} +{"current_steps": 66060, "total_steps": 78105, "loss": 0.1057, "lr": 3.53625908992232e-07, "epoch": 4.228922604186672, "percentage": 84.58, "elapsed_time": "2:53:19", "remaining_time": "0:31:36", "throughput": 19995.61, "total_tokens": 207947968} +{"current_steps": 66065, "total_steps": 78105, "loss": 0.1429, "lr": 3.53339524509988e-07, "epoch": 4.229242686127649, "percentage": 84.58, "elapsed_time": "2:53:20", "remaining_time": "0:31:35", "throughput": 19995.79, "total_tokens": 207963072} +{"current_steps": 66070, "total_steps": 78105, "loss": 0.1562, "lr": 3.530532472207876e-07, "epoch": 4.229562768068625, "percentage": 84.59, "elapsed_time": "2:53:21", "remaining_time": "0:31:34", "throughput": 19996.0, "total_tokens": 207978880} +{"current_steps": 66075, "total_steps": 78105, "loss": 0.1663, "lr": 3.527670771389238e-07, "epoch": 4.229882850009602, "percentage": 84.6, "elapsed_time": "2:53:21", "remaining_time": "0:31:33", "throughput": 19996.16, "total_tokens": 207993536} +{"current_steps": 66080, "total_steps": 78105, "loss": 0.139, "lr": 3.5248101427868857e-07, "epoch": 4.230202931950579, "percentage": 84.6, "elapsed_time": "2:53:22", "remaining_time": "0:31:32", "throughput": 19996.4, "total_tokens": 208010048} +{"current_steps": 66085, "total_steps": 78105, "loss": 0.1293, "lr": 3.5219505865436515e-07, "epoch": 4.230523013891556, "percentage": 84.61, "elapsed_time": "2:53:23", "remaining_time": "0:31:32", "throughput": 19996.64, "total_tokens": 208026368} +{"current_steps": 66090, "total_steps": 78105, "loss": 0.0796, "lr": 3.5190921028023325e-07, "epoch": 4.230843095832533, "percentage": 84.62, "elapsed_time": "2:53:23", "remaining_time": "0:31:31", "throughput": 19996.84, "total_tokens": 208041792} +{"current_steps": 66095, "total_steps": 78105, "loss": 0.1153, "lr": 3.5162346917056613e-07, "epoch": 4.23116317777351, "percentage": 84.62, "elapsed_time": "2:53:24", "remaining_time": "0:31:30", "throughput": 19997.04, "total_tokens": 208057152} +{"current_steps": 66100, "total_steps": 78105, "loss": 0.1223, "lr": 3.513378353396327e-07, "epoch": 4.231483259714487, "percentage": 84.63, "elapsed_time": "2:53:25", "remaining_time": "0:31:29", "throughput": 19997.23, "total_tokens": 208072512} +{"current_steps": 66105, "total_steps": 78105, "loss": 0.1408, "lr": 3.5105230880169537e-07, "epoch": 4.231803341655464, "percentage": 84.64, "elapsed_time": "2:53:25", "remaining_time": "0:31:28", "throughput": 19997.42, "total_tokens": 208087808} +{"current_steps": 66110, "total_steps": 78105, "loss": 0.1467, "lr": 3.507668895710123e-07, "epoch": 4.232123423596441, "percentage": 84.64, "elapsed_time": "2:53:26", "remaining_time": "0:31:28", "throughput": 19997.6, "total_tokens": 208102720} +{"current_steps": 66115, "total_steps": 78105, "loss": 0.1314, "lr": 3.5048157766183587e-07, "epoch": 4.232443505537417, "percentage": 84.65, "elapsed_time": "2:53:27", "remaining_time": "0:31:27", "throughput": 19997.8, "total_tokens": 208118272} +{"current_steps": 66120, "total_steps": 78105, "loss": 0.1444, "lr": 3.501963730884134e-07, "epoch": 4.232763587478394, "percentage": 84.66, "elapsed_time": "2:53:27", "remaining_time": "0:31:26", "throughput": 19997.98, "total_tokens": 208133184} +{"current_steps": 66125, "total_steps": 78105, "loss": 0.1083, "lr": 3.4991127586498587e-07, "epoch": 4.233083669419371, "percentage": 84.66, "elapsed_time": "2:53:28", "remaining_time": "0:31:25", "throughput": 19998.17, "total_tokens": 208148608} +{"current_steps": 66130, "total_steps": 78105, "loss": 0.1114, "lr": 3.4962628600579e-07, "epoch": 4.233403751360348, "percentage": 84.67, "elapsed_time": "2:53:29", "remaining_time": "0:31:24", "throughput": 19998.44, "total_tokens": 208165440} +{"current_steps": 66135, "total_steps": 78105, "loss": 0.1222, "lr": 3.493414035250564e-07, "epoch": 4.233723833301325, "percentage": 84.67, "elapsed_time": "2:53:29", "remaining_time": "0:31:24", "throughput": 19998.64, "total_tokens": 208181184} +{"current_steps": 66140, "total_steps": 78105, "loss": 0.1674, "lr": 3.490566284370103e-07, "epoch": 4.234043915242302, "percentage": 84.68, "elapsed_time": "2:53:30", "remaining_time": "0:31:23", "throughput": 19998.85, "total_tokens": 208196992} +{"current_steps": 66145, "total_steps": 78105, "loss": 0.1058, "lr": 3.4877196075587344e-07, "epoch": 4.234363997183279, "percentage": 84.69, "elapsed_time": "2:53:31", "remaining_time": "0:31:22", "throughput": 19999.11, "total_tokens": 208214272} +{"current_steps": 66150, "total_steps": 78105, "loss": 0.1147, "lr": 3.484874004958583e-07, "epoch": 4.234684079124256, "percentage": 84.69, "elapsed_time": "2:53:31", "remaining_time": "0:31:21", "throughput": 19999.34, "total_tokens": 208230656} +{"current_steps": 66155, "total_steps": 78105, "loss": 0.1606, "lr": 3.482029476711768e-07, "epoch": 4.235004161065232, "percentage": 84.7, "elapsed_time": "2:53:32", "remaining_time": "0:31:20", "throughput": 19999.6, "total_tokens": 208247936} +{"current_steps": 66160, "total_steps": 78105, "loss": 0.1125, "lr": 3.479186022960304e-07, "epoch": 4.235324243006209, "percentage": 84.71, "elapsed_time": "2:53:33", "remaining_time": "0:31:20", "throughput": 19999.85, "total_tokens": 208264576} +{"current_steps": 66165, "total_steps": 78105, "loss": 0.0993, "lr": 3.4763436438462003e-07, "epoch": 4.235644324947186, "percentage": 84.71, "elapsed_time": "2:53:34", "remaining_time": "0:31:19", "throughput": 20000.1, "total_tokens": 208281408} +{"current_steps": 66170, "total_steps": 78105, "loss": 0.0993, "lr": 3.473502339511381e-07, "epoch": 4.235964406888163, "percentage": 84.72, "elapsed_time": "2:53:34", "remaining_time": "0:31:18", "throughput": 20000.33, "total_tokens": 208297344} +{"current_steps": 66175, "total_steps": 78105, "loss": 0.1202, "lr": 3.470662110097725e-07, "epoch": 4.23628448882914, "percentage": 84.73, "elapsed_time": "2:53:43", "remaining_time": "0:31:19", "throughput": 19985.16, "total_tokens": 208314176} +{"current_steps": 66180, "total_steps": 78105, "loss": 0.075, "lr": 3.467822955747069e-07, "epoch": 4.236604570770117, "percentage": 84.73, "elapsed_time": "2:53:44", "remaining_time": "0:31:18", "throughput": 19985.35, "total_tokens": 208329408} +{"current_steps": 66185, "total_steps": 78105, "loss": 0.1311, "lr": 3.4649848766011655e-07, "epoch": 4.236924652711094, "percentage": 84.74, "elapsed_time": "2:53:44", "remaining_time": "0:31:17", "throughput": 19985.57, "total_tokens": 208345280} +{"current_steps": 66190, "total_steps": 78105, "loss": 0.1266, "lr": 3.462147872801755e-07, "epoch": 4.237244734652071, "percentage": 84.74, "elapsed_time": "2:53:45", "remaining_time": "0:31:16", "throughput": 19985.73, "total_tokens": 208359488} +{"current_steps": 66195, "total_steps": 78105, "loss": 0.1105, "lr": 3.45931194449049e-07, "epoch": 4.237564816593048, "percentage": 84.75, "elapsed_time": "2:53:46", "remaining_time": "0:31:15", "throughput": 19985.89, "total_tokens": 208373888} +{"current_steps": 66200, "total_steps": 78105, "loss": 0.1334, "lr": 3.456477091808985e-07, "epoch": 4.237884898534024, "percentage": 84.76, "elapsed_time": "2:53:46", "remaining_time": "0:31:15", "throughput": 19986.08, "total_tokens": 208388800} +{"current_steps": 66205, "total_steps": 78105, "loss": 0.139, "lr": 3.4536433148988e-07, "epoch": 4.238204980475001, "percentage": 84.76, "elapsed_time": "2:53:47", "remaining_time": "0:31:14", "throughput": 19986.33, "total_tokens": 208405568} +{"current_steps": 66210, "total_steps": 78105, "loss": 0.1348, "lr": 3.450810613901437e-07, "epoch": 4.238525062415978, "percentage": 84.77, "elapsed_time": "2:53:48", "remaining_time": "0:31:13", "throughput": 19986.53, "total_tokens": 208421312} +{"current_steps": 66215, "total_steps": 78105, "loss": 0.0886, "lr": 3.447978988958345e-07, "epoch": 4.238845144356955, "percentage": 84.78, "elapsed_time": "2:53:48", "remaining_time": "0:31:12", "throughput": 19986.72, "total_tokens": 208436736} +{"current_steps": 66220, "total_steps": 78105, "loss": 0.153, "lr": 3.4451484402109213e-07, "epoch": 4.239165226297932, "percentage": 84.78, "elapsed_time": "2:53:49", "remaining_time": "0:31:11", "throughput": 19986.94, "total_tokens": 208452672} +{"current_steps": 66225, "total_steps": 78105, "loss": 0.1039, "lr": 3.442318967800504e-07, "epoch": 4.239485308238909, "percentage": 84.79, "elapsed_time": "2:53:50", "remaining_time": "0:31:11", "throughput": 19987.15, "total_tokens": 208468672} +{"current_steps": 66230, "total_steps": 78105, "loss": 0.111, "lr": 3.439490571868395e-07, "epoch": 4.239805390179886, "percentage": 84.8, "elapsed_time": "2:53:50", "remaining_time": "0:31:10", "throughput": 19987.33, "total_tokens": 208483584} +{"current_steps": 66235, "total_steps": 78105, "loss": 0.1269, "lr": 3.436663252555822e-07, "epoch": 4.240125472120863, "percentage": 84.8, "elapsed_time": "2:53:51", "remaining_time": "0:31:09", "throughput": 19987.56, "total_tokens": 208499840} +{"current_steps": 66240, "total_steps": 78105, "loss": 0.1484, "lr": 3.4338370100039654e-07, "epoch": 4.24044555406184, "percentage": 84.81, "elapsed_time": "2:53:52", "remaining_time": "0:31:08", "throughput": 19987.78, "total_tokens": 208515776} +{"current_steps": 66245, "total_steps": 78105, "loss": 0.1339, "lr": 3.431011844353954e-07, "epoch": 4.240765636002816, "percentage": 84.82, "elapsed_time": "2:53:52", "remaining_time": "0:31:07", "throughput": 19987.97, "total_tokens": 208531264} +{"current_steps": 66250, "total_steps": 78105, "loss": 0.1122, "lr": 3.4281877557468644e-07, "epoch": 4.241085717943793, "percentage": 84.82, "elapsed_time": "2:53:53", "remaining_time": "0:31:07", "throughput": 19988.18, "total_tokens": 208547008} +{"current_steps": 66255, "total_steps": 78105, "loss": 0.0951, "lr": 3.425364744323714e-07, "epoch": 4.24140579988477, "percentage": 84.83, "elapsed_time": "2:53:54", "remaining_time": "0:31:06", "throughput": 19988.35, "total_tokens": 208562112} +{"current_steps": 66260, "total_steps": 78105, "loss": 0.1762, "lr": 3.4225428102254627e-07, "epoch": 4.241725881825747, "percentage": 84.83, "elapsed_time": "2:53:54", "remaining_time": "0:31:05", "throughput": 19988.55, "total_tokens": 208577344} +{"current_steps": 66265, "total_steps": 78105, "loss": 0.117, "lr": 3.4197219535930445e-07, "epoch": 4.242045963766724, "percentage": 84.84, "elapsed_time": "2:53:55", "remaining_time": "0:31:04", "throughput": 19988.8, "total_tokens": 208593920} +{"current_steps": 66270, "total_steps": 78105, "loss": 0.1237, "lr": 3.416902174567291e-07, "epoch": 4.242366045707701, "percentage": 84.85, "elapsed_time": "2:53:56", "remaining_time": "0:31:03", "throughput": 19989.0, "total_tokens": 208609088} +{"current_steps": 66275, "total_steps": 78105, "loss": 0.1098, "lr": 3.4140834732890286e-07, "epoch": 4.242686127648678, "percentage": 84.85, "elapsed_time": "2:53:56", "remaining_time": "0:31:02", "throughput": 19989.17, "total_tokens": 208624192} +{"current_steps": 66280, "total_steps": 78105, "loss": 0.1449, "lr": 3.411265849898998e-07, "epoch": 4.243006209589655, "percentage": 84.86, "elapsed_time": "2:53:57", "remaining_time": "0:31:02", "throughput": 19989.42, "total_tokens": 208640960} +{"current_steps": 66285, "total_steps": 78105, "loss": 0.1017, "lr": 3.4084493045379003e-07, "epoch": 4.243326291530632, "percentage": 84.87, "elapsed_time": "2:53:58", "remaining_time": "0:31:01", "throughput": 19989.63, "total_tokens": 208656768} +{"current_steps": 66290, "total_steps": 78105, "loss": 0.1363, "lr": 3.4056338373463783e-07, "epoch": 4.243646373471608, "percentage": 84.87, "elapsed_time": "2:53:58", "remaining_time": "0:31:00", "throughput": 19989.81, "total_tokens": 208672192} +{"current_steps": 66295, "total_steps": 78105, "loss": 0.1305, "lr": 3.402819448465017e-07, "epoch": 4.243966455412585, "percentage": 84.88, "elapsed_time": "2:53:59", "remaining_time": "0:30:59", "throughput": 19990.02, "total_tokens": 208687680} +{"current_steps": 66300, "total_steps": 78105, "loss": 0.1184, "lr": 3.4000061380343645e-07, "epoch": 4.244286537353562, "percentage": 84.89, "elapsed_time": "2:54:00", "remaining_time": "0:30:58", "throughput": 19990.22, "total_tokens": 208702656} +{"current_steps": 66305, "total_steps": 78105, "loss": 0.1418, "lr": 3.397193906194887e-07, "epoch": 4.244606619294539, "percentage": 84.89, "elapsed_time": "2:54:00", "remaining_time": "0:30:58", "throughput": 19990.41, "total_tokens": 208718272} +{"current_steps": 66310, "total_steps": 78105, "loss": 0.1569, "lr": 3.3943827530870276e-07, "epoch": 4.244926701235516, "percentage": 84.9, "elapsed_time": "2:54:01", "remaining_time": "0:30:57", "throughput": 19990.59, "total_tokens": 208733248} +{"current_steps": 66315, "total_steps": 78105, "loss": 0.1097, "lr": 3.3915726788511514e-07, "epoch": 4.245246783176493, "percentage": 84.9, "elapsed_time": "2:54:02", "remaining_time": "0:30:56", "throughput": 19990.81, "total_tokens": 208748928} +{"current_steps": 66320, "total_steps": 78105, "loss": 0.1422, "lr": 3.388763683627583e-07, "epoch": 4.24556686511747, "percentage": 84.91, "elapsed_time": "2:54:02", "remaining_time": "0:30:55", "throughput": 19991.0, "total_tokens": 208763968} +{"current_steps": 66325, "total_steps": 78105, "loss": 0.1638, "lr": 3.385955767556587e-07, "epoch": 4.245886947058447, "percentage": 84.92, "elapsed_time": "2:54:03", "remaining_time": "0:30:54", "throughput": 19991.17, "total_tokens": 208778816} +{"current_steps": 66330, "total_steps": 78105, "loss": 0.1093, "lr": 3.3831489307783765e-07, "epoch": 4.246207028999424, "percentage": 84.92, "elapsed_time": "2:54:04", "remaining_time": "0:30:54", "throughput": 19991.38, "total_tokens": 208794624} +{"current_steps": 66335, "total_steps": 78105, "loss": 0.1696, "lr": 3.3803431734331093e-07, "epoch": 4.2465271109404, "percentage": 84.93, "elapsed_time": "2:54:04", "remaining_time": "0:30:53", "throughput": 19991.56, "total_tokens": 208810176} +{"current_steps": 66340, "total_steps": 78105, "loss": 0.1384, "lr": 3.377538495660893e-07, "epoch": 4.246847192881377, "percentage": 84.94, "elapsed_time": "2:54:05", "remaining_time": "0:30:52", "throughput": 19991.72, "total_tokens": 208824704} +{"current_steps": 66345, "total_steps": 78105, "loss": 0.1278, "lr": 3.3747348976017694e-07, "epoch": 4.247167274822354, "percentage": 84.94, "elapsed_time": "2:54:06", "remaining_time": "0:30:51", "throughput": 19991.91, "total_tokens": 208840704} +{"current_steps": 66350, "total_steps": 78105, "loss": 0.0788, "lr": 3.371932379395748e-07, "epoch": 4.247487356763331, "percentage": 84.95, "elapsed_time": "2:54:06", "remaining_time": "0:30:50", "throughput": 19992.12, "total_tokens": 208856448} +{"current_steps": 66355, "total_steps": 78105, "loss": 0.1371, "lr": 3.3691309411827716e-07, "epoch": 4.247807438704308, "percentage": 84.96, "elapsed_time": "2:54:07", "remaining_time": "0:30:50", "throughput": 19992.31, "total_tokens": 208871616} +{"current_steps": 66360, "total_steps": 78105, "loss": 0.0801, "lr": 3.3663305831027197e-07, "epoch": 4.248127520645285, "percentage": 84.96, "elapsed_time": "2:54:08", "remaining_time": "0:30:49", "throughput": 19992.48, "total_tokens": 208886912} +{"current_steps": 66365, "total_steps": 78105, "loss": 0.1693, "lr": 3.3635313052954365e-07, "epoch": 4.248447602586262, "percentage": 84.97, "elapsed_time": "2:54:08", "remaining_time": "0:30:48", "throughput": 19992.72, "total_tokens": 208903232} +{"current_steps": 66370, "total_steps": 78105, "loss": 0.1533, "lr": 3.3607331079006935e-07, "epoch": 4.248767684527239, "percentage": 84.98, "elapsed_time": "2:54:09", "remaining_time": "0:30:47", "throughput": 19992.88, "total_tokens": 208917696} +{"current_steps": 66375, "total_steps": 78105, "loss": 0.2296, "lr": 3.3579359910582337e-07, "epoch": 4.249087766468216, "percentage": 84.98, "elapsed_time": "2:54:10", "remaining_time": "0:30:46", "throughput": 19993.08, "total_tokens": 208933376} +{"current_steps": 66380, "total_steps": 78105, "loss": 0.1136, "lr": 3.35513995490771e-07, "epoch": 4.249407848409192, "percentage": 84.99, "elapsed_time": "2:54:10", "remaining_time": "0:30:45", "throughput": 19993.26, "total_tokens": 208948736} +{"current_steps": 66385, "total_steps": 78105, "loss": 0.1421, "lr": 3.3523449995887664e-07, "epoch": 4.249727930350169, "percentage": 84.99, "elapsed_time": "2:54:11", "remaining_time": "0:30:45", "throughput": 19993.48, "total_tokens": 208964800} +{"current_steps": 66390, "total_steps": 78105, "loss": 0.1526, "lr": 3.3495511252409436e-07, "epoch": 4.250048012291146, "percentage": 85.0, "elapsed_time": "2:54:12", "remaining_time": "0:30:44", "throughput": 19993.64, "total_tokens": 208979520} +{"current_steps": 66395, "total_steps": 78105, "loss": 0.0669, "lr": 3.3467583320037687e-07, "epoch": 4.250368094232123, "percentage": 85.01, "elapsed_time": "2:54:12", "remaining_time": "0:30:43", "throughput": 19993.86, "total_tokens": 208995328} +{"current_steps": 66400, "total_steps": 78105, "loss": 0.0922, "lr": 3.343966620016695e-07, "epoch": 4.2506881761731, "percentage": 85.01, "elapsed_time": "2:54:13", "remaining_time": "0:30:42", "throughput": 19994.04, "total_tokens": 209010624} +{"current_steps": 66402, "total_steps": 78105, "eval_loss": 0.6279151439666748, "epoch": 4.250816208949491, "percentage": 85.02, "elapsed_time": "2:55:04", "remaining_time": "0:30:51", "throughput": 19896.96, "total_tokens": 209017024} +{"current_steps": 66405, "total_steps": 78105, "loss": 0.1591, "lr": 3.3411759894191207e-07, "epoch": 4.251008258114077, "percentage": 85.02, "elapsed_time": "2:55:38", "remaining_time": "0:30:56", "throughput": 19834.88, "total_tokens": 209026368} +{"current_steps": 66410, "total_steps": 78105, "loss": 0.1292, "lr": 3.3383864403504136e-07, "epoch": 4.251328340055054, "percentage": 85.03, "elapsed_time": "2:55:38", "remaining_time": "0:30:55", "throughput": 19835.07, "total_tokens": 209041344} +{"current_steps": 66415, "total_steps": 78105, "loss": 0.1414, "lr": 3.335597972949842e-07, "epoch": 4.251648421996031, "percentage": 85.03, "elapsed_time": "2:55:39", "remaining_time": "0:30:55", "throughput": 19835.26, "total_tokens": 209056320} +{"current_steps": 66420, "total_steps": 78105, "loss": 0.1266, "lr": 3.332810587356669e-07, "epoch": 4.251968503937007, "percentage": 85.04, "elapsed_time": "2:55:40", "remaining_time": "0:30:54", "throughput": 19835.45, "total_tokens": 209071488} +{"current_steps": 66425, "total_steps": 78105, "loss": 0.1375, "lr": 3.3300242837100763e-07, "epoch": 4.252288585877984, "percentage": 85.05, "elapsed_time": "2:55:40", "remaining_time": "0:30:53", "throughput": 19835.65, "total_tokens": 209086720} +{"current_steps": 66430, "total_steps": 78105, "loss": 0.1353, "lr": 3.327239062149196e-07, "epoch": 4.252608667818961, "percentage": 85.05, "elapsed_time": "2:55:41", "remaining_time": "0:30:52", "throughput": 19835.88, "total_tokens": 209102784} +{"current_steps": 66435, "total_steps": 78105, "loss": 0.1139, "lr": 3.3244549228131054e-07, "epoch": 4.252928749759938, "percentage": 85.06, "elapsed_time": "2:55:42", "remaining_time": "0:30:51", "throughput": 19836.1, "total_tokens": 209118592} +{"current_steps": 66440, "total_steps": 78105, "loss": 0.1902, "lr": 3.3216718658408337e-07, "epoch": 4.253248831700915, "percentage": 85.06, "elapsed_time": "2:55:43", "remaining_time": "0:30:51", "throughput": 19836.36, "total_tokens": 209134912} +{"current_steps": 66445, "total_steps": 78105, "loss": 0.1814, "lr": 3.318889891371352e-07, "epoch": 4.253568913641892, "percentage": 85.07, "elapsed_time": "2:55:43", "remaining_time": "0:30:50", "throughput": 19836.68, "total_tokens": 209153792} +{"current_steps": 66450, "total_steps": 78105, "loss": 0.0806, "lr": 3.316108999543574e-07, "epoch": 4.253888995582869, "percentage": 85.08, "elapsed_time": "2:55:44", "remaining_time": "0:30:49", "throughput": 19836.89, "total_tokens": 209169472} +{"current_steps": 66455, "total_steps": 78105, "loss": 0.1387, "lr": 3.313329190496362e-07, "epoch": 4.254209077523846, "percentage": 85.08, "elapsed_time": "2:55:45", "remaining_time": "0:30:48", "throughput": 19837.12, "total_tokens": 209185536} +{"current_steps": 66460, "total_steps": 78105, "loss": 0.1188, "lr": 3.3105504643685367e-07, "epoch": 4.254529159464823, "percentage": 85.09, "elapsed_time": "2:55:45", "remaining_time": "0:30:47", "throughput": 19837.34, "total_tokens": 209201280} +{"current_steps": 66465, "total_steps": 78105, "loss": 0.129, "lr": 3.3077728212988354e-07, "epoch": 4.2548492414058, "percentage": 85.1, "elapsed_time": "2:55:46", "remaining_time": "0:30:47", "throughput": 19837.53, "total_tokens": 209216640} +{"current_steps": 66470, "total_steps": 78105, "loss": 0.1634, "lr": 3.3049962614259744e-07, "epoch": 4.2551693233467764, "percentage": 85.1, "elapsed_time": "2:55:47", "remaining_time": "0:30:46", "throughput": 19837.71, "total_tokens": 209231232} +{"current_steps": 66475, "total_steps": 78105, "loss": 0.0657, "lr": 3.3022207848885963e-07, "epoch": 4.255489405287753, "percentage": 85.11, "elapsed_time": "2:55:47", "remaining_time": "0:30:45", "throughput": 19837.89, "total_tokens": 209246080} +{"current_steps": 66480, "total_steps": 78105, "loss": 0.1012, "lr": 3.2994463918252924e-07, "epoch": 4.25580948722873, "percentage": 85.12, "elapsed_time": "2:55:48", "remaining_time": "0:30:44", "throughput": 19838.16, "total_tokens": 209262976} +{"current_steps": 66485, "total_steps": 78105, "loss": 0.082, "lr": 3.296673082374599e-07, "epoch": 4.256129569169707, "percentage": 85.12, "elapsed_time": "2:55:49", "remaining_time": "0:30:43", "throughput": 19838.37, "total_tokens": 209278784} +{"current_steps": 66490, "total_steps": 78105, "loss": 0.0905, "lr": 3.2939008566750003e-07, "epoch": 4.256449651110684, "percentage": 85.13, "elapsed_time": "2:55:50", "remaining_time": "0:30:42", "throughput": 19838.77, "total_tokens": 209299648} +{"current_steps": 66495, "total_steps": 78105, "loss": 0.1748, "lr": 3.291129714864938e-07, "epoch": 4.256769733051661, "percentage": 85.14, "elapsed_time": "2:55:50", "remaining_time": "0:30:42", "throughput": 19838.98, "total_tokens": 209315328} +{"current_steps": 66500, "total_steps": 78105, "loss": 0.1073, "lr": 3.2883596570827736e-07, "epoch": 4.257089814992638, "percentage": 85.14, "elapsed_time": "2:55:51", "remaining_time": "0:30:41", "throughput": 19839.19, "total_tokens": 209330752} +{"current_steps": 66505, "total_steps": 78105, "loss": 0.1558, "lr": 3.2855906834668384e-07, "epoch": 4.257409896933615, "percentage": 85.15, "elapsed_time": "2:55:52", "remaining_time": "0:30:40", "throughput": 19839.39, "total_tokens": 209345856} +{"current_steps": 66510, "total_steps": 78105, "loss": 0.1235, "lr": 3.282822794155399e-07, "epoch": 4.2577299788745915, "percentage": 85.15, "elapsed_time": "2:55:52", "remaining_time": "0:30:39", "throughput": 19839.65, "total_tokens": 209362560} +{"current_steps": 66515, "total_steps": 78105, "loss": 0.1399, "lr": 3.280055989286668e-07, "epoch": 4.2580500608155685, "percentage": 85.16, "elapsed_time": "2:55:53", "remaining_time": "0:30:38", "throughput": 19839.83, "total_tokens": 209377152} +{"current_steps": 66520, "total_steps": 78105, "loss": 0.1494, "lr": 3.277290268998806e-07, "epoch": 4.2583701427565455, "percentage": 85.17, "elapsed_time": "2:55:54", "remaining_time": "0:30:38", "throughput": 19840.1, "total_tokens": 209394496} +{"current_steps": 66525, "total_steps": 78105, "loss": 0.164, "lr": 3.2745256334299145e-07, "epoch": 4.2586902246975225, "percentage": 85.17, "elapsed_time": "2:55:54", "remaining_time": "0:30:37", "throughput": 19840.28, "total_tokens": 209409152} +{"current_steps": 66530, "total_steps": 78105, "loss": 0.1273, "lr": 3.271762082718058e-07, "epoch": 4.259010306638499, "percentage": 85.18, "elapsed_time": "2:55:55", "remaining_time": "0:30:36", "throughput": 19840.5, "total_tokens": 209425280} +{"current_steps": 66535, "total_steps": 78105, "loss": 0.1372, "lr": 3.268999617001217e-07, "epoch": 4.259330388579476, "percentage": 85.19, "elapsed_time": "2:55:56", "remaining_time": "0:30:35", "throughput": 19840.73, "total_tokens": 209441600} +{"current_steps": 66540, "total_steps": 78105, "loss": 0.1437, "lr": 3.2662382364173434e-07, "epoch": 4.259650470520453, "percentage": 85.19, "elapsed_time": "2:55:56", "remaining_time": "0:30:34", "throughput": 19840.97, "total_tokens": 209457664} +{"current_steps": 66545, "total_steps": 78105, "loss": 0.1344, "lr": 3.2634779411043287e-07, "epoch": 4.25997055246143, "percentage": 85.2, "elapsed_time": "2:55:57", "remaining_time": "0:30:34", "throughput": 19841.12, "total_tokens": 209472000} +{"current_steps": 66550, "total_steps": 78105, "loss": 0.084, "lr": 3.260718731200005e-07, "epoch": 4.260290634402407, "percentage": 85.21, "elapsed_time": "2:55:58", "remaining_time": "0:30:33", "throughput": 19841.38, "total_tokens": 209488704} +{"current_steps": 66555, "total_steps": 78105, "loss": 0.1185, "lr": 3.2579606068421476e-07, "epoch": 4.2606107163433835, "percentage": 85.21, "elapsed_time": "2:55:58", "remaining_time": "0:30:32", "throughput": 19841.56, "total_tokens": 209503744} +{"current_steps": 66560, "total_steps": 78105, "loss": 0.1368, "lr": 3.2552035681684913e-07, "epoch": 4.2609307982843605, "percentage": 85.22, "elapsed_time": "2:55:59", "remaining_time": "0:30:31", "throughput": 19841.83, "total_tokens": 209520768} +{"current_steps": 66565, "total_steps": 78105, "loss": 0.1241, "lr": 3.2524476153167033e-07, "epoch": 4.2612508802253375, "percentage": 85.23, "elapsed_time": "2:56:00", "remaining_time": "0:30:30", "throughput": 19842.07, "total_tokens": 209537728} +{"current_steps": 66570, "total_steps": 78105, "loss": 0.1202, "lr": 3.2496927484244017e-07, "epoch": 4.2615709621663145, "percentage": 85.23, "elapsed_time": "2:56:00", "remaining_time": "0:30:29", "throughput": 19842.31, "total_tokens": 209553984} +{"current_steps": 66575, "total_steps": 78105, "loss": 0.0852, "lr": 3.2469389676291456e-07, "epoch": 4.2618910441072915, "percentage": 85.24, "elapsed_time": "2:56:01", "remaining_time": "0:30:29", "throughput": 19842.53, "total_tokens": 209569600} +{"current_steps": 66580, "total_steps": 78105, "loss": 0.1302, "lr": 3.244186273068456e-07, "epoch": 4.2622111260482685, "percentage": 85.24, "elapsed_time": "2:56:02", "remaining_time": "0:30:28", "throughput": 19842.73, "total_tokens": 209584512} +{"current_steps": 66585, "total_steps": 78105, "loss": 0.1274, "lr": 3.2414346648797834e-07, "epoch": 4.2625312079892455, "percentage": 85.25, "elapsed_time": "2:56:02", "remaining_time": "0:30:27", "throughput": 19842.91, "total_tokens": 209599808} +{"current_steps": 66590, "total_steps": 78105, "loss": 0.1476, "lr": 3.2386841432005237e-07, "epoch": 4.262851289930222, "percentage": 85.26, "elapsed_time": "2:56:03", "remaining_time": "0:30:26", "throughput": 19843.11, "total_tokens": 209615552} +{"current_steps": 66595, "total_steps": 78105, "loss": 0.1607, "lr": 3.235934708168031e-07, "epoch": 4.263171371871199, "percentage": 85.26, "elapsed_time": "2:56:04", "remaining_time": "0:30:25", "throughput": 19843.33, "total_tokens": 209631360} +{"current_steps": 66600, "total_steps": 78105, "loss": 0.1514, "lr": 3.233186359919588e-07, "epoch": 4.2634914538121755, "percentage": 85.27, "elapsed_time": "2:56:04", "remaining_time": "0:30:25", "throughput": 19843.51, "total_tokens": 209646272} +{"current_steps": 66605, "total_steps": 78105, "loss": 0.0887, "lr": 3.2304390985924473e-07, "epoch": 4.2638115357531525, "percentage": 85.28, "elapsed_time": "2:56:05", "remaining_time": "0:30:24", "throughput": 19843.69, "total_tokens": 209661376} +{"current_steps": 66610, "total_steps": 78105, "loss": 0.1717, "lr": 3.2276929243237776e-07, "epoch": 4.2641316176941295, "percentage": 85.28, "elapsed_time": "2:56:06", "remaining_time": "0:30:23", "throughput": 19843.98, "total_tokens": 209678720} +{"current_steps": 66615, "total_steps": 78105, "loss": 0.1424, "lr": 3.224947837250722e-07, "epoch": 4.2644516996351065, "percentage": 85.29, "elapsed_time": "2:56:07", "remaining_time": "0:30:22", "throughput": 19844.18, "total_tokens": 209694272} +{"current_steps": 66620, "total_steps": 78105, "loss": 0.1312, "lr": 3.2222038375103404e-07, "epoch": 4.2647717815760835, "percentage": 85.3, "elapsed_time": "2:56:07", "remaining_time": "0:30:21", "throughput": 19844.39, "total_tokens": 209710016} +{"current_steps": 66625, "total_steps": 78105, "loss": 0.1292, "lr": 3.21946092523967e-07, "epoch": 4.2650918635170605, "percentage": 85.3, "elapsed_time": "2:56:08", "remaining_time": "0:30:21", "throughput": 19844.59, "total_tokens": 209725376} +{"current_steps": 66630, "total_steps": 78105, "loss": 0.1617, "lr": 3.216719100575669e-07, "epoch": 4.2654119454580375, "percentage": 85.31, "elapsed_time": "2:56:09", "remaining_time": "0:30:20", "throughput": 19844.76, "total_tokens": 209740160} +{"current_steps": 66635, "total_steps": 78105, "loss": 0.2161, "lr": 3.2139783636552483e-07, "epoch": 4.2657320273990145, "percentage": 85.31, "elapsed_time": "2:56:09", "remaining_time": "0:30:19", "throughput": 19845.01, "total_tokens": 209757248} +{"current_steps": 66640, "total_steps": 78105, "loss": 0.1192, "lr": 3.2112387146152726e-07, "epoch": 4.2660521093399915, "percentage": 85.32, "elapsed_time": "2:56:10", "remaining_time": "0:30:18", "throughput": 19845.2, "total_tokens": 209772416} +{"current_steps": 66645, "total_steps": 78105, "loss": 0.1435, "lr": 3.2085001535925364e-07, "epoch": 4.266372191280968, "percentage": 85.33, "elapsed_time": "2:56:11", "remaining_time": "0:30:17", "throughput": 19845.42, "total_tokens": 209788352} +{"current_steps": 66650, "total_steps": 78105, "loss": 0.1802, "lr": 3.205762680723798e-07, "epoch": 4.2666922732219446, "percentage": 85.33, "elapsed_time": "2:56:11", "remaining_time": "0:30:16", "throughput": 19845.69, "total_tokens": 209805248} +{"current_steps": 66655, "total_steps": 78105, "loss": 0.1442, "lr": 3.20302629614575e-07, "epoch": 4.2670123551629215, "percentage": 85.34, "elapsed_time": "2:56:12", "remaining_time": "0:30:16", "throughput": 19845.92, "total_tokens": 209821440} +{"current_steps": 66660, "total_steps": 78105, "loss": 0.1423, "lr": 3.2002909999950343e-07, "epoch": 4.2673324371038985, "percentage": 85.35, "elapsed_time": "2:56:13", "remaining_time": "0:30:15", "throughput": 19846.14, "total_tokens": 209837312} +{"current_steps": 66665, "total_steps": 78105, "loss": 0.1184, "lr": 3.197556792408232e-07, "epoch": 4.2676525190448755, "percentage": 85.35, "elapsed_time": "2:56:13", "remaining_time": "0:30:14", "throughput": 19846.4, "total_tokens": 209854336} +{"current_steps": 66670, "total_steps": 78105, "loss": 0.117, "lr": 3.1948236735218826e-07, "epoch": 4.2679726009858525, "percentage": 85.36, "elapsed_time": "2:56:14", "remaining_time": "0:30:13", "throughput": 19846.57, "total_tokens": 209869120} +{"current_steps": 66675, "total_steps": 78105, "loss": 0.1468, "lr": 3.1920916434724586e-07, "epoch": 4.2682926829268295, "percentage": 85.37, "elapsed_time": "2:56:15", "remaining_time": "0:30:12", "throughput": 19846.82, "total_tokens": 209885952} +{"current_steps": 66680, "total_steps": 78105, "loss": 0.1449, "lr": 3.1893607023963823e-07, "epoch": 4.2686127648678065, "percentage": 85.37, "elapsed_time": "2:56:15", "remaining_time": "0:30:12", "throughput": 19846.99, "total_tokens": 209900736} +{"current_steps": 66685, "total_steps": 78105, "loss": 0.1056, "lr": 3.1866308504300226e-07, "epoch": 4.268932846808783, "percentage": 85.38, "elapsed_time": "2:56:16", "remaining_time": "0:30:11", "throughput": 19847.21, "total_tokens": 209916544} +{"current_steps": 66690, "total_steps": 78105, "loss": 0.108, "lr": 3.183902087709706e-07, "epoch": 4.26925292874976, "percentage": 85.39, "elapsed_time": "2:56:17", "remaining_time": "0:30:10", "throughput": 19847.41, "total_tokens": 209932160} +{"current_steps": 66695, "total_steps": 78105, "loss": 0.1569, "lr": 3.181174414371674e-07, "epoch": 4.269573010690737, "percentage": 85.39, "elapsed_time": "2:56:18", "remaining_time": "0:30:09", "throughput": 19847.66, "total_tokens": 209948928} +{"current_steps": 66700, "total_steps": 78105, "loss": 0.1881, "lr": 3.178447830552145e-07, "epoch": 4.269893092631714, "percentage": 85.4, "elapsed_time": "2:56:18", "remaining_time": "0:30:08", "throughput": 19847.83, "total_tokens": 209963776} +{"current_steps": 66705, "total_steps": 78105, "loss": 0.1396, "lr": 3.175722336387266e-07, "epoch": 4.270213174572691, "percentage": 85.4, "elapsed_time": "2:56:19", "remaining_time": "0:30:08", "throughput": 19848.05, "total_tokens": 209979904} +{"current_steps": 66710, "total_steps": 78105, "loss": 0.1545, "lr": 3.172997932013139e-07, "epoch": 4.2705332565136676, "percentage": 85.41, "elapsed_time": "2:56:20", "remaining_time": "0:30:07", "throughput": 19848.26, "total_tokens": 209995136} +{"current_steps": 66715, "total_steps": 78105, "loss": 0.1718, "lr": 3.1702746175657997e-07, "epoch": 4.2708533384546445, "percentage": 85.42, "elapsed_time": "2:56:20", "remaining_time": "0:30:06", "throughput": 19848.48, "total_tokens": 210011072} +{"current_steps": 66720, "total_steps": 78105, "loss": 0.1476, "lr": 3.1675523931812337e-07, "epoch": 4.2711734203956215, "percentage": 85.42, "elapsed_time": "2:56:21", "remaining_time": "0:30:05", "throughput": 19848.67, "total_tokens": 210025856} +{"current_steps": 66725, "total_steps": 78105, "loss": 0.0998, "lr": 3.164831258995391e-07, "epoch": 4.2714935023365985, "percentage": 85.43, "elapsed_time": "2:56:22", "remaining_time": "0:30:04", "throughput": 19848.92, "total_tokens": 210042240} +{"current_steps": 66730, "total_steps": 78105, "loss": 0.1297, "lr": 3.1621112151441315e-07, "epoch": 4.2718135842775755, "percentage": 85.44, "elapsed_time": "2:56:22", "remaining_time": "0:30:03", "throughput": 19849.13, "total_tokens": 210057664} +{"current_steps": 66735, "total_steps": 78105, "loss": 0.1096, "lr": 3.159392261763292e-07, "epoch": 4.272133666218552, "percentage": 85.44, "elapsed_time": "2:56:23", "remaining_time": "0:30:03", "throughput": 19849.3, "total_tokens": 210072384} +{"current_steps": 66740, "total_steps": 78105, "loss": 0.1216, "lr": 3.156674398988638e-07, "epoch": 4.272453748159529, "percentage": 85.45, "elapsed_time": "2:56:24", "remaining_time": "0:30:02", "throughput": 19849.5, "total_tokens": 210087488} +{"current_steps": 66745, "total_steps": 78105, "loss": 0.1317, "lr": 3.153957626955889e-07, "epoch": 4.272773830100506, "percentage": 85.46, "elapsed_time": "2:56:24", "remaining_time": "0:30:01", "throughput": 19849.77, "total_tokens": 210104512} +{"current_steps": 66750, "total_steps": 78105, "loss": 0.1093, "lr": 3.151241945800704e-07, "epoch": 4.273093912041483, "percentage": 85.46, "elapsed_time": "2:56:25", "remaining_time": "0:30:00", "throughput": 19850.11, "total_tokens": 210123136} +{"current_steps": 66755, "total_steps": 78105, "loss": 0.1306, "lr": 3.148527355658684e-07, "epoch": 4.27341399398246, "percentage": 85.47, "elapsed_time": "2:56:26", "remaining_time": "0:29:59", "throughput": 19850.32, "total_tokens": 210138624} +{"current_steps": 66760, "total_steps": 78105, "loss": 0.1056, "lr": 3.1458138566653975e-07, "epoch": 4.273734075923437, "percentage": 85.47, "elapsed_time": "2:56:26", "remaining_time": "0:29:59", "throughput": 19850.54, "total_tokens": 210154304} +{"current_steps": 66765, "total_steps": 78105, "loss": 0.1623, "lr": 3.143101448956323e-07, "epoch": 4.274054157864414, "percentage": 85.48, "elapsed_time": "2:56:27", "remaining_time": "0:29:58", "throughput": 19850.76, "total_tokens": 210169728} +{"current_steps": 66770, "total_steps": 78105, "loss": 0.1264, "lr": 3.140390132666921e-07, "epoch": 4.2743742398053906, "percentage": 85.49, "elapsed_time": "2:56:28", "remaining_time": "0:29:57", "throughput": 19850.94, "total_tokens": 210184704} +{"current_steps": 66775, "total_steps": 78105, "loss": 0.0836, "lr": 3.1376799079325726e-07, "epoch": 4.274694321746367, "percentage": 85.49, "elapsed_time": "2:56:28", "remaining_time": "0:29:56", "throughput": 19851.14, "total_tokens": 210200448} +{"current_steps": 66780, "total_steps": 78105, "loss": 0.1466, "lr": 3.1349707748886097e-07, "epoch": 4.275014403687344, "percentage": 85.5, "elapsed_time": "2:56:29", "remaining_time": "0:29:55", "throughput": 19851.33, "total_tokens": 210215360} +{"current_steps": 66785, "total_steps": 78105, "loss": 0.1295, "lr": 3.1322627336703186e-07, "epoch": 4.275334485628321, "percentage": 85.51, "elapsed_time": "2:56:30", "remaining_time": "0:29:55", "throughput": 19851.57, "total_tokens": 210231616} +{"current_steps": 66790, "total_steps": 78105, "loss": 0.103, "lr": 3.1295557844129217e-07, "epoch": 4.275654567569298, "percentage": 85.51, "elapsed_time": "2:56:30", "remaining_time": "0:29:54", "throughput": 19851.82, "total_tokens": 210248448} +{"current_steps": 66795, "total_steps": 78105, "loss": 0.138, "lr": 3.126849927251588e-07, "epoch": 4.275974649510275, "percentage": 85.52, "elapsed_time": "2:56:31", "remaining_time": "0:29:53", "throughput": 19852.0, "total_tokens": 210263040} +{"current_steps": 66800, "total_steps": 78105, "loss": 0.1231, "lr": 3.124145162321437e-07, "epoch": 4.276294731451252, "percentage": 85.53, "elapsed_time": "2:56:32", "remaining_time": "0:29:52", "throughput": 19852.21, "total_tokens": 210278464} +{"current_steps": 66805, "total_steps": 78105, "loss": 0.1155, "lr": 3.1214414897575236e-07, "epoch": 4.276614813392229, "percentage": 85.53, "elapsed_time": "2:56:32", "remaining_time": "0:29:51", "throughput": 19852.42, "total_tokens": 210294016} +{"current_steps": 66810, "total_steps": 78105, "loss": 0.1451, "lr": 3.118738909694871e-07, "epoch": 4.276934895333206, "percentage": 85.54, "elapsed_time": "2:56:33", "remaining_time": "0:29:50", "throughput": 19852.63, "total_tokens": 210310080} +{"current_steps": 66815, "total_steps": 78105, "loss": 0.1678, "lr": 3.1160374222684144e-07, "epoch": 4.277254977274183, "percentage": 85.55, "elapsed_time": "2:56:34", "remaining_time": "0:29:50", "throughput": 19852.86, "total_tokens": 210326144} +{"current_steps": 66820, "total_steps": 78105, "loss": 0.1058, "lr": 3.113337027613061e-07, "epoch": 4.277575059215159, "percentage": 85.55, "elapsed_time": "2:56:34", "remaining_time": "0:29:49", "throughput": 19853.01, "total_tokens": 210340544} +{"current_steps": 66825, "total_steps": 78105, "loss": 0.095, "lr": 3.110637725863655e-07, "epoch": 4.277895141156136, "percentage": 85.56, "elapsed_time": "2:56:35", "remaining_time": "0:29:48", "throughput": 19853.28, "total_tokens": 210357696} +{"current_steps": 66830, "total_steps": 78105, "loss": 0.107, "lr": 3.1079395171549787e-07, "epoch": 4.278215223097113, "percentage": 85.56, "elapsed_time": "2:56:36", "remaining_time": "0:29:47", "throughput": 19853.47, "total_tokens": 210372800} +{"current_steps": 66835, "total_steps": 78105, "loss": 0.1143, "lr": 3.1052424016217804e-07, "epoch": 4.27853530503809, "percentage": 85.57, "elapsed_time": "2:56:36", "remaining_time": "0:29:46", "throughput": 19853.69, "total_tokens": 210388672} +{"current_steps": 66840, "total_steps": 78105, "loss": 0.1163, "lr": 3.1025463793987215e-07, "epoch": 4.278855386979067, "percentage": 85.58, "elapsed_time": "2:56:37", "remaining_time": "0:29:46", "throughput": 19853.86, "total_tokens": 210403136} +{"current_steps": 66845, "total_steps": 78105, "loss": 0.1007, "lr": 3.0998514506204474e-07, "epoch": 4.279175468920044, "percentage": 85.58, "elapsed_time": "2:56:38", "remaining_time": "0:29:45", "throughput": 19854.1, "total_tokens": 210419136} +{"current_steps": 66850, "total_steps": 78105, "loss": 0.1155, "lr": 3.097157615421506e-07, "epoch": 4.279495550861021, "percentage": 85.59, "elapsed_time": "2:56:38", "remaining_time": "0:29:44", "throughput": 19854.33, "total_tokens": 210435392} +{"current_steps": 66855, "total_steps": 78105, "loss": 0.1402, "lr": 3.0944648739364313e-07, "epoch": 4.279815632801998, "percentage": 85.6, "elapsed_time": "2:56:39", "remaining_time": "0:29:43", "throughput": 19854.5, "total_tokens": 210450240} +{"current_steps": 66860, "total_steps": 78105, "loss": 0.1489, "lr": 3.09177322629968e-07, "epoch": 4.280135714742975, "percentage": 85.6, "elapsed_time": "2:56:40", "remaining_time": "0:29:42", "throughput": 19854.75, "total_tokens": 210467072} +{"current_steps": 66865, "total_steps": 78105, "loss": 0.1579, "lr": 3.0890826726456576e-07, "epoch": 4.280455796683951, "percentage": 85.61, "elapsed_time": "2:56:40", "remaining_time": "0:29:42", "throughput": 19854.96, "total_tokens": 210482368} +{"current_steps": 66870, "total_steps": 78105, "loss": 0.1348, "lr": 3.0863932131087154e-07, "epoch": 4.280775878624928, "percentage": 85.62, "elapsed_time": "2:56:41", "remaining_time": "0:29:41", "throughput": 19855.26, "total_tokens": 210500480} +{"current_steps": 66875, "total_steps": 78105, "loss": 0.1665, "lr": 3.0837048478231493e-07, "epoch": 4.281095960565905, "percentage": 85.62, "elapsed_time": "2:56:42", "remaining_time": "0:29:40", "throughput": 19855.49, "total_tokens": 210516672} +{"current_steps": 66880, "total_steps": 78105, "loss": 0.1429, "lr": 3.0810175769232093e-07, "epoch": 4.281416042506882, "percentage": 85.63, "elapsed_time": "2:56:43", "remaining_time": "0:29:39", "throughput": 19855.74, "total_tokens": 210532992} +{"current_steps": 66885, "total_steps": 78105, "loss": 0.1077, "lr": 3.07833140054308e-07, "epoch": 4.281736124447859, "percentage": 85.63, "elapsed_time": "2:56:43", "remaining_time": "0:29:38", "throughput": 19855.94, "total_tokens": 210548544} +{"current_steps": 66890, "total_steps": 78105, "loss": 0.1233, "lr": 3.075646318816894e-07, "epoch": 4.282056206388836, "percentage": 85.64, "elapsed_time": "2:56:44", "remaining_time": "0:29:37", "throughput": 19856.12, "total_tokens": 210563456} +{"current_steps": 66895, "total_steps": 78105, "loss": 0.1286, "lr": 3.0729623318787344e-07, "epoch": 4.282376288329813, "percentage": 85.65, "elapsed_time": "2:56:45", "remaining_time": "0:29:37", "throughput": 19856.32, "total_tokens": 210579136} +{"current_steps": 66900, "total_steps": 78105, "loss": 0.1611, "lr": 3.070279439862617e-07, "epoch": 4.28269637027079, "percentage": 85.65, "elapsed_time": "2:56:45", "remaining_time": "0:29:36", "throughput": 19856.48, "total_tokens": 210593664} +{"current_steps": 66905, "total_steps": 78105, "loss": 0.151, "lr": 3.06759764290252e-07, "epoch": 4.283016452211767, "percentage": 85.66, "elapsed_time": "2:56:46", "remaining_time": "0:29:35", "throughput": 19856.7, "total_tokens": 210609728} +{"current_steps": 66910, "total_steps": 78105, "loss": 0.1032, "lr": 3.064916941132354e-07, "epoch": 4.283336534152743, "percentage": 85.67, "elapsed_time": "2:56:47", "remaining_time": "0:29:34", "throughput": 19856.93, "total_tokens": 210625792} +{"current_steps": 66915, "total_steps": 78105, "loss": 0.0661, "lr": 3.0622373346859746e-07, "epoch": 4.28365661609372, "percentage": 85.67, "elapsed_time": "2:56:47", "remaining_time": "0:29:33", "throughput": 19857.14, "total_tokens": 210641216} +{"current_steps": 66920, "total_steps": 78105, "loss": 0.1357, "lr": 3.0595588236972063e-07, "epoch": 4.283976698034697, "percentage": 85.68, "elapsed_time": "2:56:48", "remaining_time": "0:29:33", "throughput": 19857.36, "total_tokens": 210657088} +{"current_steps": 66925, "total_steps": 78105, "loss": 0.1477, "lr": 3.056881408299772e-07, "epoch": 4.284296779975674, "percentage": 85.69, "elapsed_time": "2:56:49", "remaining_time": "0:29:32", "throughput": 19857.56, "total_tokens": 210672128} +{"current_steps": 66930, "total_steps": 78105, "loss": 0.1822, "lr": 3.0542050886273904e-07, "epoch": 4.284616861916651, "percentage": 85.69, "elapsed_time": "2:56:49", "remaining_time": "0:29:31", "throughput": 19857.78, "total_tokens": 210687808} +{"current_steps": 66935, "total_steps": 78105, "loss": 0.1298, "lr": 3.051529864813696e-07, "epoch": 4.284936943857628, "percentage": 85.7, "elapsed_time": "2:56:50", "remaining_time": "0:29:30", "throughput": 19857.97, "total_tokens": 210703040} +{"current_steps": 66940, "total_steps": 78105, "loss": 0.0893, "lr": 3.0488557369922735e-07, "epoch": 4.285257025798605, "percentage": 85.71, "elapsed_time": "2:56:51", "remaining_time": "0:29:29", "throughput": 19858.15, "total_tokens": 210717824} +{"current_steps": 66945, "total_steps": 78105, "loss": 0.1294, "lr": 3.046182705296655e-07, "epoch": 4.285577107739582, "percentage": 85.71, "elapsed_time": "2:56:51", "remaining_time": "0:29:29", "throughput": 19858.35, "total_tokens": 210732928} +{"current_steps": 66950, "total_steps": 78105, "loss": 0.0836, "lr": 3.0435107698603167e-07, "epoch": 4.285897189680558, "percentage": 85.72, "elapsed_time": "2:56:52", "remaining_time": "0:29:28", "throughput": 19858.58, "total_tokens": 210748864} +{"current_steps": 66955, "total_steps": 78105, "loss": 0.1691, "lr": 3.0408399308166916e-07, "epoch": 4.286217271621535, "percentage": 85.72, "elapsed_time": "2:56:53", "remaining_time": "0:29:27", "throughput": 19858.84, "total_tokens": 210765888} +{"current_steps": 66960, "total_steps": 78105, "loss": 0.1109, "lr": 3.038170188299128e-07, "epoch": 4.286537353562512, "percentage": 85.73, "elapsed_time": "2:56:53", "remaining_time": "0:29:26", "throughput": 19859.04, "total_tokens": 210781504} +{"current_steps": 66965, "total_steps": 78105, "loss": 0.1941, "lr": 3.035501542440958e-07, "epoch": 4.286857435503489, "percentage": 85.74, "elapsed_time": "2:56:54", "remaining_time": "0:29:25", "throughput": 19859.34, "total_tokens": 210799424} +{"current_steps": 66970, "total_steps": 78105, "loss": 0.1214, "lr": 3.032833993375431e-07, "epoch": 4.287177517444466, "percentage": 85.74, "elapsed_time": "2:56:55", "remaining_time": "0:29:24", "throughput": 19859.56, "total_tokens": 210815424} +{"current_steps": 66975, "total_steps": 78105, "loss": 0.1333, "lr": 3.030167541235751e-07, "epoch": 4.287497599385443, "percentage": 85.75, "elapsed_time": "2:56:55", "remaining_time": "0:29:24", "throughput": 19859.77, "total_tokens": 210830784} +{"current_steps": 66980, "total_steps": 78105, "loss": 0.1684, "lr": 3.027502186155068e-07, "epoch": 4.28781768132642, "percentage": 85.76, "elapsed_time": "2:56:56", "remaining_time": "0:29:23", "throughput": 19859.98, "total_tokens": 210846656} +{"current_steps": 66985, "total_steps": 78105, "loss": 0.1467, "lr": 3.024837928266475e-07, "epoch": 4.288137763267397, "percentage": 85.76, "elapsed_time": "2:56:57", "remaining_time": "0:29:22", "throughput": 19860.17, "total_tokens": 210861760} +{"current_steps": 66990, "total_steps": 78105, "loss": 0.1134, "lr": 3.0221747677030094e-07, "epoch": 4.288457845208374, "percentage": 85.77, "elapsed_time": "2:56:58", "remaining_time": "0:29:21", "throughput": 19860.44, "total_tokens": 210878784} +{"current_steps": 66995, "total_steps": 78105, "loss": 0.1247, "lr": 3.019512704597655e-07, "epoch": 4.288777927149351, "percentage": 85.78, "elapsed_time": "2:56:58", "remaining_time": "0:29:20", "throughput": 19860.65, "total_tokens": 210894336} +{"current_steps": 67000, "total_steps": 78105, "loss": 0.0949, "lr": 3.0168517390833485e-07, "epoch": 4.289098009090327, "percentage": 85.78, "elapsed_time": "2:56:59", "remaining_time": "0:29:20", "throughput": 19860.89, "total_tokens": 210910912} +{"current_steps": 67005, "total_steps": 78105, "loss": 0.1361, "lr": 3.0141918712929595e-07, "epoch": 4.289418091031304, "percentage": 85.79, "elapsed_time": "2:57:00", "remaining_time": "0:29:19", "throughput": 19861.08, "total_tokens": 210926144} +{"current_steps": 67010, "total_steps": 78105, "loss": 0.1263, "lr": 3.0115331013593096e-07, "epoch": 4.289738172972281, "percentage": 85.79, "elapsed_time": "2:57:00", "remaining_time": "0:29:18", "throughput": 19861.32, "total_tokens": 210942080} +{"current_steps": 67015, "total_steps": 78105, "loss": 0.1491, "lr": 3.0088754294151616e-07, "epoch": 4.290058254913258, "percentage": 85.8, "elapsed_time": "2:57:01", "remaining_time": "0:29:17", "throughput": 19861.58, "total_tokens": 210958848} +{"current_steps": 67020, "total_steps": 78105, "loss": 0.1034, "lr": 3.006218855593229e-07, "epoch": 4.290378336854235, "percentage": 85.81, "elapsed_time": "2:57:02", "remaining_time": "0:29:16", "throughput": 19861.78, "total_tokens": 210974336} +{"current_steps": 67025, "total_steps": 78105, "loss": 0.1211, "lr": 3.003563380026159e-07, "epoch": 4.290698418795212, "percentage": 85.81, "elapsed_time": "2:57:02", "remaining_time": "0:29:16", "throughput": 19862.08, "total_tokens": 210992256} +{"current_steps": 67030, "total_steps": 78105, "loss": 0.1435, "lr": 3.0009090028465693e-07, "epoch": 4.291018500736189, "percentage": 85.82, "elapsed_time": "2:57:03", "remaining_time": "0:29:15", "throughput": 19862.27, "total_tokens": 211007424} +{"current_steps": 67035, "total_steps": 78105, "loss": 0.117, "lr": 2.9982557241869827e-07, "epoch": 4.291338582677166, "percentage": 85.83, "elapsed_time": "2:57:04", "remaining_time": "0:29:14", "throughput": 19862.55, "total_tokens": 211024640} +{"current_steps": 67040, "total_steps": 78105, "loss": 0.099, "lr": 2.9956035441799147e-07, "epoch": 4.291658664618142, "percentage": 85.83, "elapsed_time": "2:57:04", "remaining_time": "0:29:13", "throughput": 19862.76, "total_tokens": 211040512} +{"current_steps": 67045, "total_steps": 78105, "loss": 0.124, "lr": 2.992952462957779e-07, "epoch": 4.291978746559119, "percentage": 85.84, "elapsed_time": "2:57:05", "remaining_time": "0:29:12", "throughput": 19862.99, "total_tokens": 211056704} +{"current_steps": 67050, "total_steps": 78105, "loss": 0.1007, "lr": 2.990302480652971e-07, "epoch": 4.292298828500096, "percentage": 85.85, "elapsed_time": "2:57:06", "remaining_time": "0:29:12", "throughput": 19863.21, "total_tokens": 211072896} +{"current_steps": 67055, "total_steps": 78105, "loss": 0.1816, "lr": 2.9876535973978134e-07, "epoch": 4.292618910441073, "percentage": 85.85, "elapsed_time": "2:57:06", "remaining_time": "0:29:11", "throughput": 19863.42, "total_tokens": 211088128} +{"current_steps": 67060, "total_steps": 78105, "loss": 0.128, "lr": 2.9850058133245707e-07, "epoch": 4.29293899238205, "percentage": 85.86, "elapsed_time": "2:57:07", "remaining_time": "0:29:10", "throughput": 19863.64, "total_tokens": 211104256} +{"current_steps": 67065, "total_steps": 78105, "loss": 0.1425, "lr": 2.982359128565476e-07, "epoch": 4.293259074323027, "percentage": 85.87, "elapsed_time": "2:57:08", "remaining_time": "0:29:09", "throughput": 19863.82, "total_tokens": 211118912} +{"current_steps": 67070, "total_steps": 78105, "loss": 0.1409, "lr": 2.979713543252671e-07, "epoch": 4.293579156264004, "percentage": 85.87, "elapsed_time": "2:57:08", "remaining_time": "0:29:08", "throughput": 19864.0, "total_tokens": 211133888} +{"current_steps": 67075, "total_steps": 78105, "loss": 0.1371, "lr": 2.977069057518284e-07, "epoch": 4.293899238204981, "percentage": 85.88, "elapsed_time": "2:57:09", "remaining_time": "0:29:07", "throughput": 19864.25, "total_tokens": 211150464} +{"current_steps": 67080, "total_steps": 78105, "loss": 0.1069, "lr": 2.9744256714943407e-07, "epoch": 4.294219320145958, "percentage": 85.88, "elapsed_time": "2:57:10", "remaining_time": "0:29:07", "throughput": 19864.43, "total_tokens": 211165120} +{"current_steps": 67085, "total_steps": 78105, "loss": 0.1297, "lr": 2.97178338531286e-07, "epoch": 4.294539402086934, "percentage": 85.89, "elapsed_time": "2:57:11", "remaining_time": "0:29:06", "throughput": 19864.64, "total_tokens": 211181056} +{"current_steps": 67090, "total_steps": 78105, "loss": 0.1655, "lr": 2.969142199105776e-07, "epoch": 4.294859484027911, "percentage": 85.9, "elapsed_time": "2:57:11", "remaining_time": "0:29:05", "throughput": 19864.86, "total_tokens": 211197120} +{"current_steps": 67095, "total_steps": 78105, "loss": 0.1848, "lr": 2.9665021130049767e-07, "epoch": 4.295179565968888, "percentage": 85.9, "elapsed_time": "2:57:12", "remaining_time": "0:29:04", "throughput": 19865.05, "total_tokens": 211212032} +{"current_steps": 67100, "total_steps": 78105, "loss": 0.139, "lr": 2.963863127142294e-07, "epoch": 4.295499647909865, "percentage": 85.91, "elapsed_time": "2:57:12", "remaining_time": "0:29:03", "throughput": 19865.23, "total_tokens": 211226624} +{"current_steps": 67105, "total_steps": 78105, "loss": 0.1133, "lr": 2.9612252416494985e-07, "epoch": 4.295819729850842, "percentage": 85.92, "elapsed_time": "2:57:13", "remaining_time": "0:29:03", "throughput": 19865.39, "total_tokens": 211240896} +{"current_steps": 67110, "total_steps": 78105, "loss": 0.1605, "lr": 2.958588456658323e-07, "epoch": 4.296139811791819, "percentage": 85.92, "elapsed_time": "2:57:14", "remaining_time": "0:29:02", "throughput": 19865.59, "total_tokens": 211256192} +{"current_steps": 67115, "total_steps": 78105, "loss": 0.1421, "lr": 2.955952772300433e-07, "epoch": 4.296459893732796, "percentage": 85.93, "elapsed_time": "2:57:14", "remaining_time": "0:29:01", "throughput": 19865.83, "total_tokens": 211272512} +{"current_steps": 67120, "total_steps": 78105, "loss": 0.1326, "lr": 2.9533181887074353e-07, "epoch": 4.296779975673773, "percentage": 85.94, "elapsed_time": "2:57:15", "remaining_time": "0:29:00", "throughput": 19866.02, "total_tokens": 211287744} +{"current_steps": 67125, "total_steps": 78105, "loss": 0.1269, "lr": 2.950684706010892e-07, "epoch": 4.29710005761475, "percentage": 85.94, "elapsed_time": "2:57:16", "remaining_time": "0:28:59", "throughput": 19866.22, "total_tokens": 211302528} +{"current_steps": 67130, "total_steps": 78105, "loss": 0.119, "lr": 2.9480523243423043e-07, "epoch": 4.297420139555726, "percentage": 85.95, "elapsed_time": "2:57:16", "remaining_time": "0:28:59", "throughput": 19866.41, "total_tokens": 211318208} +{"current_steps": 67135, "total_steps": 78105, "loss": 0.0983, "lr": 2.945421043833119e-07, "epoch": 4.297740221496703, "percentage": 85.95, "elapsed_time": "2:57:17", "remaining_time": "0:28:58", "throughput": 19866.59, "total_tokens": 211333120} +{"current_steps": 67140, "total_steps": 78105, "loss": 0.1105, "lr": 2.942790864614728e-07, "epoch": 4.29806030343768, "percentage": 85.96, "elapsed_time": "2:57:18", "remaining_time": "0:28:57", "throughput": 19866.81, "total_tokens": 211348992} +{"current_steps": 67145, "total_steps": 78105, "loss": 0.1588, "lr": 2.9401617868184626e-07, "epoch": 4.298380385378657, "percentage": 85.97, "elapsed_time": "2:57:18", "remaining_time": "0:28:56", "throughput": 19867.04, "total_tokens": 211364992} +{"current_steps": 67150, "total_steps": 78105, "loss": 0.103, "lr": 2.9375338105756243e-07, "epoch": 4.298700467319634, "percentage": 85.97, "elapsed_time": "2:57:19", "remaining_time": "0:28:55", "throughput": 19867.23, "total_tokens": 211380224} +{"current_steps": 67155, "total_steps": 78105, "loss": 0.1047, "lr": 2.934906936017418e-07, "epoch": 4.299020549260611, "percentage": 85.98, "elapsed_time": "2:57:20", "remaining_time": "0:28:54", "throughput": 19867.47, "total_tokens": 211396416} +{"current_steps": 67160, "total_steps": 78105, "loss": 0.075, "lr": 2.932281163275033e-07, "epoch": 4.299340631201588, "percentage": 85.99, "elapsed_time": "2:57:20", "remaining_time": "0:28:54", "throughput": 19867.67, "total_tokens": 211411520} +{"current_steps": 67165, "total_steps": 78105, "loss": 0.1115, "lr": 2.929656492479577e-07, "epoch": 4.299660713142565, "percentage": 85.99, "elapsed_time": "2:57:21", "remaining_time": "0:28:53", "throughput": 19867.88, "total_tokens": 211427136} +{"current_steps": 67170, "total_steps": 78105, "loss": 0.1317, "lr": 2.927032923762116e-07, "epoch": 4.299980795083542, "percentage": 86.0, "elapsed_time": "2:57:22", "remaining_time": "0:28:52", "throughput": 19868.07, "total_tokens": 211442304} +{"current_steps": 67175, "total_steps": 78105, "loss": 0.127, "lr": 2.9244104572536595e-07, "epoch": 4.300300877024518, "percentage": 86.01, "elapsed_time": "2:57:22", "remaining_time": "0:28:51", "throughput": 19868.27, "total_tokens": 211457344} +{"current_steps": 67180, "total_steps": 78105, "loss": 0.1585, "lr": 2.9217890930851505e-07, "epoch": 4.300620958965495, "percentage": 86.01, "elapsed_time": "2:57:23", "remaining_time": "0:28:50", "throughput": 19868.5, "total_tokens": 211473472} +{"current_steps": 67185, "total_steps": 78105, "loss": 0.1279, "lr": 2.9191688313875035e-07, "epoch": 4.300941040906472, "percentage": 86.02, "elapsed_time": "2:57:24", "remaining_time": "0:28:50", "throughput": 19868.73, "total_tokens": 211489536} +{"current_steps": 67190, "total_steps": 78105, "loss": 0.1289, "lr": 2.916549672291538e-07, "epoch": 4.301261122847449, "percentage": 86.03, "elapsed_time": "2:57:25", "remaining_time": "0:28:49", "throughput": 19868.95, "total_tokens": 211505216} +{"current_steps": 67195, "total_steps": 78105, "loss": 0.1253, "lr": 2.9139316159280617e-07, "epoch": 4.301581204788426, "percentage": 86.03, "elapsed_time": "2:57:25", "remaining_time": "0:28:48", "throughput": 19868.8, "total_tokens": 211522816} +{"current_steps": 67200, "total_steps": 78105, "loss": 0.119, "lr": 2.911314662427797e-07, "epoch": 4.301901286729403, "percentage": 86.04, "elapsed_time": "2:57:26", "remaining_time": "0:28:47", "throughput": 19868.99, "total_tokens": 211537856} +{"current_steps": 67205, "total_steps": 78105, "loss": 0.1219, "lr": 2.9086988119214207e-07, "epoch": 4.30222136867038, "percentage": 86.04, "elapsed_time": "2:57:27", "remaining_time": "0:28:46", "throughput": 19869.21, "total_tokens": 211553664} +{"current_steps": 67210, "total_steps": 78105, "loss": 0.1333, "lr": 2.906084064539558e-07, "epoch": 4.302541450611357, "percentage": 86.05, "elapsed_time": "2:57:28", "remaining_time": "0:28:46", "throughput": 19869.48, "total_tokens": 211570880} +{"current_steps": 67215, "total_steps": 78105, "loss": 0.1191, "lr": 2.903470420412774e-07, "epoch": 4.302861532552333, "percentage": 86.06, "elapsed_time": "2:57:28", "remaining_time": "0:28:45", "throughput": 19869.7, "total_tokens": 211586880} +{"current_steps": 67220, "total_steps": 78105, "loss": 0.1304, "lr": 2.9008578796715814e-07, "epoch": 4.30318161449331, "percentage": 86.06, "elapsed_time": "2:57:29", "remaining_time": "0:28:44", "throughput": 19869.91, "total_tokens": 211602752} +{"current_steps": 67225, "total_steps": 78105, "loss": 0.1196, "lr": 2.8982464424464286e-07, "epoch": 4.303501696434287, "percentage": 86.07, "elapsed_time": "2:57:30", "remaining_time": "0:28:43", "throughput": 19870.1, "total_tokens": 211617984} +{"current_steps": 67230, "total_steps": 78105, "loss": 0.1474, "lr": 2.895636108867733e-07, "epoch": 4.303821778375264, "percentage": 86.08, "elapsed_time": "2:57:30", "remaining_time": "0:28:42", "throughput": 19870.3, "total_tokens": 211632960} +{"current_steps": 67235, "total_steps": 78105, "loss": 0.1015, "lr": 2.89302687906583e-07, "epoch": 4.304141860316241, "percentage": 86.08, "elapsed_time": "2:57:31", "remaining_time": "0:28:42", "throughput": 19870.51, "total_tokens": 211648704} +{"current_steps": 67240, "total_steps": 78105, "loss": 0.1068, "lr": 2.8904187531710147e-07, "epoch": 4.304461942257218, "percentage": 86.09, "elapsed_time": "2:57:32", "remaining_time": "0:28:41", "throughput": 19870.67, "total_tokens": 211663296} +{"current_steps": 67245, "total_steps": 78105, "loss": 0.1566, "lr": 2.8878117313135225e-07, "epoch": 4.304782024198195, "percentage": 86.1, "elapsed_time": "2:57:32", "remaining_time": "0:28:40", "throughput": 19870.92, "total_tokens": 211679744} +{"current_steps": 67250, "total_steps": 78105, "loss": 0.1574, "lr": 2.885205813623534e-07, "epoch": 4.305102106139172, "percentage": 86.1, "elapsed_time": "2:57:33", "remaining_time": "0:28:39", "throughput": 19871.12, "total_tokens": 211695296} +{"current_steps": 67255, "total_steps": 78105, "loss": 0.1124, "lr": 2.8826010002311697e-07, "epoch": 4.305422188080149, "percentage": 86.11, "elapsed_time": "2:57:34", "remaining_time": "0:28:38", "throughput": 19871.36, "total_tokens": 211711616} +{"current_steps": 67260, "total_steps": 78105, "loss": 0.1695, "lr": 2.8799972912665176e-07, "epoch": 4.305742270021126, "percentage": 86.11, "elapsed_time": "2:57:34", "remaining_time": "0:28:37", "throughput": 19871.56, "total_tokens": 211727232} +{"current_steps": 67265, "total_steps": 78105, "loss": 0.1305, "lr": 2.877394686859569e-07, "epoch": 4.306062351962102, "percentage": 86.12, "elapsed_time": "2:57:35", "remaining_time": "0:28:37", "throughput": 19871.73, "total_tokens": 211742016} +{"current_steps": 67270, "total_steps": 78105, "loss": 0.1308, "lr": 2.87479318714031e-07, "epoch": 4.306382433903079, "percentage": 86.13, "elapsed_time": "2:57:36", "remaining_time": "0:28:36", "throughput": 19871.9, "total_tokens": 211756544} +{"current_steps": 67275, "total_steps": 78105, "loss": 0.1009, "lr": 2.872192792238623e-07, "epoch": 4.306702515844056, "percentage": 86.13, "elapsed_time": "2:57:36", "remaining_time": "0:28:35", "throughput": 19872.09, "total_tokens": 211771968} +{"current_steps": 67280, "total_steps": 78105, "loss": 0.1001, "lr": 2.8695935022843737e-07, "epoch": 4.307022597785033, "percentage": 86.14, "elapsed_time": "2:57:37", "remaining_time": "0:28:34", "throughput": 19872.3, "total_tokens": 211787712} +{"current_steps": 67285, "total_steps": 78105, "loss": 0.0968, "lr": 2.8669953174073506e-07, "epoch": 4.30734267972601, "percentage": 86.15, "elapsed_time": "2:57:38", "remaining_time": "0:28:33", "throughput": 19872.54, "total_tokens": 211803712} +{"current_steps": 67290, "total_steps": 78105, "loss": 0.1407, "lr": 2.86439823773729e-07, "epoch": 4.307662761666987, "percentage": 86.15, "elapsed_time": "2:57:38", "remaining_time": "0:28:33", "throughput": 19872.74, "total_tokens": 211819200} +{"current_steps": 67295, "total_steps": 78105, "loss": 0.0711, "lr": 2.86180226340389e-07, "epoch": 4.307982843607964, "percentage": 86.16, "elapsed_time": "2:57:39", "remaining_time": "0:28:32", "throughput": 19872.98, "total_tokens": 211835840} +{"current_steps": 67300, "total_steps": 78105, "loss": 0.1331, "lr": 2.8592073945367645e-07, "epoch": 4.308302925548941, "percentage": 86.17, "elapsed_time": "2:57:40", "remaining_time": "0:28:31", "throughput": 19873.18, "total_tokens": 211851264} +{"current_steps": 67305, "total_steps": 78105, "loss": 0.0881, "lr": 2.8566136312655006e-07, "epoch": 4.308623007489917, "percentage": 86.17, "elapsed_time": "2:57:40", "remaining_time": "0:28:30", "throughput": 19873.39, "total_tokens": 211866688} +{"current_steps": 67310, "total_steps": 78105, "loss": 0.0945, "lr": 2.854020973719604e-07, "epoch": 4.308943089430894, "percentage": 86.18, "elapsed_time": "2:57:41", "remaining_time": "0:28:29", "throughput": 19873.59, "total_tokens": 211881984} +{"current_steps": 67315, "total_steps": 78105, "loss": 0.1264, "lr": 2.851429422028551e-07, "epoch": 4.309263171371871, "percentage": 86.19, "elapsed_time": "2:57:42", "remaining_time": "0:28:29", "throughput": 19873.8, "total_tokens": 211897792} +{"current_steps": 67320, "total_steps": 78105, "loss": 0.1337, "lr": 2.848838976321744e-07, "epoch": 4.309583253312848, "percentage": 86.19, "elapsed_time": "2:57:42", "remaining_time": "0:28:28", "throughput": 19874.03, "total_tokens": 211913792} +{"current_steps": 67325, "total_steps": 78105, "loss": 0.1773, "lr": 2.8462496367285377e-07, "epoch": 4.309903335253825, "percentage": 86.2, "elapsed_time": "2:57:43", "remaining_time": "0:28:27", "throughput": 19874.29, "total_tokens": 211930880} +{"current_steps": 67330, "total_steps": 78105, "loss": 0.1356, "lr": 2.8436614033782286e-07, "epoch": 4.310223417194802, "percentage": 86.2, "elapsed_time": "2:57:44", "remaining_time": "0:28:26", "throughput": 19874.49, "total_tokens": 211946368} +{"current_steps": 67335, "total_steps": 78105, "loss": 0.1384, "lr": 2.841074276400063e-07, "epoch": 4.310543499135779, "percentage": 86.21, "elapsed_time": "2:57:44", "remaining_time": "0:28:25", "throughput": 19874.71, "total_tokens": 211962240} +{"current_steps": 67340, "total_steps": 78105, "loss": 0.1178, "lr": 2.838488255923219e-07, "epoch": 4.310863581076756, "percentage": 86.22, "elapsed_time": "2:57:45", "remaining_time": "0:28:25", "throughput": 19874.95, "total_tokens": 211978624} +{"current_steps": 67345, "total_steps": 78105, "loss": 0.0958, "lr": 2.835903342076843e-07, "epoch": 4.311183663017733, "percentage": 86.22, "elapsed_time": "2:57:46", "remaining_time": "0:28:24", "throughput": 19875.14, "total_tokens": 211993792} +{"current_steps": 67350, "total_steps": 78105, "loss": 0.1116, "lr": 2.8333195349900064e-07, "epoch": 4.311503744958709, "percentage": 86.23, "elapsed_time": "2:57:46", "remaining_time": "0:28:23", "throughput": 19875.37, "total_tokens": 212009856} +{"current_steps": 67355, "total_steps": 78105, "loss": 0.1179, "lr": 2.830736834791728e-07, "epoch": 4.311823826899686, "percentage": 86.24, "elapsed_time": "2:57:47", "remaining_time": "0:28:22", "throughput": 19875.56, "total_tokens": 212025216} +{"current_steps": 67360, "total_steps": 78105, "loss": 0.0726, "lr": 2.8281552416109773e-07, "epoch": 4.312143908840663, "percentage": 86.24, "elapsed_time": "2:57:48", "remaining_time": "0:28:21", "throughput": 19875.8, "total_tokens": 212041920} +{"current_steps": 67365, "total_steps": 78105, "loss": 0.1373, "lr": 2.8255747555766654e-07, "epoch": 4.31246399078164, "percentage": 86.25, "elapsed_time": "2:57:49", "remaining_time": "0:28:20", "throughput": 19876.05, "total_tokens": 212058432} +{"current_steps": 67370, "total_steps": 78105, "loss": 0.1017, "lr": 2.822995376817647e-07, "epoch": 4.312784072722617, "percentage": 86.26, "elapsed_time": "2:57:49", "remaining_time": "0:28:20", "throughput": 19876.23, "total_tokens": 212073216} +{"current_steps": 67375, "total_steps": 78105, "loss": 0.1448, "lr": 2.8204171054627194e-07, "epoch": 4.313104154663594, "percentage": 86.26, "elapsed_time": "2:57:50", "remaining_time": "0:28:19", "throughput": 19876.47, "total_tokens": 212089792} +{"current_steps": 67380, "total_steps": 78105, "loss": 0.1223, "lr": 2.817839941640643e-07, "epoch": 4.313424236604571, "percentage": 86.27, "elapsed_time": "2:57:51", "remaining_time": "0:28:18", "throughput": 19876.69, "total_tokens": 212105984} +{"current_steps": 67385, "total_steps": 78105, "loss": 0.1467, "lr": 2.815263885480088e-07, "epoch": 4.313744318545548, "percentage": 86.27, "elapsed_time": "2:57:51", "remaining_time": "0:28:17", "throughput": 19876.85, "total_tokens": 212120640} +{"current_steps": 67390, "total_steps": 78105, "loss": 0.1213, "lr": 2.8126889371097006e-07, "epoch": 4.314064400486525, "percentage": 86.28, "elapsed_time": "2:57:52", "remaining_time": "0:28:16", "throughput": 19877.05, "total_tokens": 212136064} +{"current_steps": 67395, "total_steps": 78105, "loss": 0.1132, "lr": 2.810115096658059e-07, "epoch": 4.314384482427501, "percentage": 86.29, "elapsed_time": "2:57:53", "remaining_time": "0:28:16", "throughput": 19877.25, "total_tokens": 212151488} +{"current_steps": 67400, "total_steps": 78105, "loss": 0.1291, "lr": 2.8075423642536876e-07, "epoch": 4.314704564368478, "percentage": 86.29, "elapsed_time": "2:57:53", "remaining_time": "0:28:15", "throughput": 19877.42, "total_tokens": 212166016} +{"current_steps": 67405, "total_steps": 78105, "loss": 0.1273, "lr": 2.8049707400250534e-07, "epoch": 4.315024646309455, "percentage": 86.3, "elapsed_time": "2:57:54", "remaining_time": "0:28:14", "throughput": 19877.6, "total_tokens": 212180928} +{"current_steps": 67410, "total_steps": 78105, "loss": 0.1066, "lr": 2.8024002241005654e-07, "epoch": 4.315344728250432, "percentage": 86.31, "elapsed_time": "2:57:55", "remaining_time": "0:28:13", "throughput": 19877.79, "total_tokens": 212196352} +{"current_steps": 67415, "total_steps": 78105, "loss": 0.213, "lr": 2.799830816608598e-07, "epoch": 4.315664810191409, "percentage": 86.31, "elapsed_time": "2:57:55", "remaining_time": "0:28:12", "throughput": 19878.02, "total_tokens": 212212480} +{"current_steps": 67420, "total_steps": 78105, "loss": 0.1763, "lr": 2.797262517677435e-07, "epoch": 4.315984892132386, "percentage": 86.32, "elapsed_time": "2:57:56", "remaining_time": "0:28:12", "throughput": 19878.19, "total_tokens": 212227072} +{"current_steps": 67425, "total_steps": 78105, "loss": 0.1169, "lr": 2.7946953274353346e-07, "epoch": 4.316304974073363, "percentage": 86.33, "elapsed_time": "2:57:57", "remaining_time": "0:28:11", "throughput": 19878.38, "total_tokens": 212242176} +{"current_steps": 67430, "total_steps": 78105, "loss": 0.1239, "lr": 2.792129246010486e-07, "epoch": 4.31662505601434, "percentage": 86.33, "elapsed_time": "2:57:57", "remaining_time": "0:28:10", "throughput": 19878.58, "total_tokens": 212257408} +{"current_steps": 67435, "total_steps": 78105, "loss": 0.1388, "lr": 2.7895642735310285e-07, "epoch": 4.316945137955317, "percentage": 86.34, "elapsed_time": "2:57:58", "remaining_time": "0:28:09", "throughput": 19878.82, "total_tokens": 212273856} +{"current_steps": 67440, "total_steps": 78105, "loss": 0.1176, "lr": 2.78700041012504e-07, "epoch": 4.317265219896293, "percentage": 86.35, "elapsed_time": "2:57:59", "remaining_time": "0:28:08", "throughput": 19879.1, "total_tokens": 212291456} +{"current_steps": 67445, "total_steps": 78105, "loss": 0.11, "lr": 2.7844376559205464e-07, "epoch": 4.31758530183727, "percentage": 86.35, "elapsed_time": "2:57:59", "remaining_time": "0:28:07", "throughput": 19879.3, "total_tokens": 212306688} +{"current_steps": 67450, "total_steps": 78105, "loss": 0.1237, "lr": 2.78187601104552e-07, "epoch": 4.317905383778247, "percentage": 86.36, "elapsed_time": "2:58:00", "remaining_time": "0:28:07", "throughput": 19879.52, "total_tokens": 212322944} +{"current_steps": 67455, "total_steps": 78105, "loss": 0.1884, "lr": 2.779315475627872e-07, "epoch": 4.318225465719224, "percentage": 86.36, "elapsed_time": "2:58:01", "remaining_time": "0:28:06", "throughput": 19879.72, "total_tokens": 212338688} +{"current_steps": 67460, "total_steps": 78105, "loss": 0.1658, "lr": 2.7767560497954704e-07, "epoch": 4.318545547660201, "percentage": 86.37, "elapsed_time": "2:58:01", "remaining_time": "0:28:05", "throughput": 19879.97, "total_tokens": 212355392} +{"current_steps": 67465, "total_steps": 78105, "loss": 0.1622, "lr": 2.774197733676115e-07, "epoch": 4.318865629601178, "percentage": 86.38, "elapsed_time": "2:58:02", "remaining_time": "0:28:04", "throughput": 19880.21, "total_tokens": 212372032} +{"current_steps": 67470, "total_steps": 78105, "loss": 0.1957, "lr": 2.771640527397554e-07, "epoch": 4.319185711542155, "percentage": 86.38, "elapsed_time": "2:58:03", "remaining_time": "0:28:03", "throughput": 19880.43, "total_tokens": 212388288} +{"current_steps": 67475, "total_steps": 78105, "loss": 0.1136, "lr": 2.7690844310874847e-07, "epoch": 4.319505793483132, "percentage": 86.39, "elapsed_time": "2:58:03", "remaining_time": "0:28:03", "throughput": 19880.61, "total_tokens": 212402944} +{"current_steps": 67480, "total_steps": 78105, "loss": 0.2043, "lr": 2.766529444873539e-07, "epoch": 4.319825875424108, "percentage": 86.4, "elapsed_time": "2:58:04", "remaining_time": "0:28:02", "throughput": 19880.87, "total_tokens": 212419776} +{"current_steps": 67485, "total_steps": 78105, "loss": 0.1526, "lr": 2.763975568883298e-07, "epoch": 4.320145957365085, "percentage": 86.4, "elapsed_time": "2:58:05", "remaining_time": "0:28:01", "throughput": 19881.04, "total_tokens": 212434688} +{"current_steps": 67490, "total_steps": 78105, "loss": 0.0931, "lr": 2.761422803244307e-07, "epoch": 4.320466039306062, "percentage": 86.41, "elapsed_time": "2:58:05", "remaining_time": "0:28:00", "throughput": 19881.27, "total_tokens": 212451072} +{"current_steps": 67495, "total_steps": 78105, "loss": 0.1119, "lr": 2.758871148084011e-07, "epoch": 4.320786121247039, "percentage": 86.42, "elapsed_time": "2:58:06", "remaining_time": "0:27:59", "throughput": 19881.46, "total_tokens": 212466432} +{"current_steps": 67500, "total_steps": 78105, "loss": 0.1451, "lr": 2.7563206035298525e-07, "epoch": 4.321106203188016, "percentage": 86.42, "elapsed_time": "2:58:07", "remaining_time": "0:27:59", "throughput": 19881.63, "total_tokens": 212481472} +{"current_steps": 67505, "total_steps": 78105, "loss": 0.0936, "lr": 2.7537711697091685e-07, "epoch": 4.321426285128993, "percentage": 86.43, "elapsed_time": "2:58:07", "remaining_time": "0:27:58", "throughput": 19881.82, "total_tokens": 212496576} +{"current_steps": 67510, "total_steps": 78105, "loss": 0.1878, "lr": 2.7512228467492826e-07, "epoch": 4.32174636706997, "percentage": 86.43, "elapsed_time": "2:58:08", "remaining_time": "0:27:57", "throughput": 19882.01, "total_tokens": 212511808} +{"current_steps": 67515, "total_steps": 78105, "loss": 0.113, "lr": 2.748675634777434e-07, "epoch": 4.322066449010947, "percentage": 86.44, "elapsed_time": "2:58:09", "remaining_time": "0:27:56", "throughput": 19882.19, "total_tokens": 212526848} +{"current_steps": 67520, "total_steps": 78105, "loss": 0.0943, "lr": 2.7461295339208214e-07, "epoch": 4.322386530951924, "percentage": 86.45, "elapsed_time": "2:58:09", "remaining_time": "0:27:55", "throughput": 19882.39, "total_tokens": 212542272} +{"current_steps": 67525, "total_steps": 78105, "loss": 0.1284, "lr": 2.7435845443065906e-07, "epoch": 4.322706612892901, "percentage": 86.45, "elapsed_time": "2:58:10", "remaining_time": "0:27:55", "throughput": 19882.62, "total_tokens": 212558400} +{"current_steps": 67530, "total_steps": 78105, "loss": 0.1714, "lr": 2.741040666061809e-07, "epoch": 4.323026694833877, "percentage": 86.46, "elapsed_time": "2:58:11", "remaining_time": "0:27:54", "throughput": 19882.83, "total_tokens": 212574144} +{"current_steps": 67535, "total_steps": 78105, "loss": 0.1202, "lr": 2.738497899313525e-07, "epoch": 4.323346776774854, "percentage": 86.47, "elapsed_time": "2:58:12", "remaining_time": "0:27:53", "throughput": 19883.03, "total_tokens": 212589888} +{"current_steps": 67540, "total_steps": 78105, "loss": 0.1282, "lr": 2.7359562441886867e-07, "epoch": 4.323666858715831, "percentage": 86.47, "elapsed_time": "2:58:12", "remaining_time": "0:27:52", "throughput": 19883.24, "total_tokens": 212605696} +{"current_steps": 67545, "total_steps": 78105, "loss": 0.0927, "lr": 2.7334157008142323e-07, "epoch": 4.323986940656808, "percentage": 86.48, "elapsed_time": "2:58:13", "remaining_time": "0:27:51", "throughput": 19883.44, "total_tokens": 212621248} +{"current_steps": 67550, "total_steps": 78105, "loss": 0.1703, "lr": 2.730876269317015e-07, "epoch": 4.324307022597785, "percentage": 86.49, "elapsed_time": "2:58:14", "remaining_time": "0:27:51", "throughput": 19883.73, "total_tokens": 212638784} +{"current_steps": 67555, "total_steps": 78105, "loss": 0.164, "lr": 2.728337949823842e-07, "epoch": 4.324627104538762, "percentage": 86.49, "elapsed_time": "2:58:14", "remaining_time": "0:27:50", "throughput": 19883.91, "total_tokens": 212653888} +{"current_steps": 67560, "total_steps": 78105, "loss": 0.0987, "lr": 2.7258007424614595e-07, "epoch": 4.324947186479739, "percentage": 86.5, "elapsed_time": "2:58:15", "remaining_time": "0:27:49", "throughput": 19884.12, "total_tokens": 212669504} +{"current_steps": 67565, "total_steps": 78105, "loss": 0.118, "lr": 2.723264647356569e-07, "epoch": 4.325267268420716, "percentage": 86.51, "elapsed_time": "2:58:16", "remaining_time": "0:27:48", "throughput": 19884.32, "total_tokens": 212684864} +{"current_steps": 67570, "total_steps": 78105, "loss": 0.117, "lr": 2.7207296646358017e-07, "epoch": 4.325587350361692, "percentage": 86.51, "elapsed_time": "2:58:16", "remaining_time": "0:27:47", "throughput": 19884.56, "total_tokens": 212701632} +{"current_steps": 67575, "total_steps": 78105, "loss": 0.1121, "lr": 2.7181957944257515e-07, "epoch": 4.325907432302669, "percentage": 86.52, "elapsed_time": "2:58:17", "remaining_time": "0:27:46", "throughput": 19884.83, "total_tokens": 212718784} +{"current_steps": 67580, "total_steps": 78105, "loss": 0.1139, "lr": 2.715663036852939e-07, "epoch": 4.326227514243646, "percentage": 86.52, "elapsed_time": "2:58:18", "remaining_time": "0:27:46", "throughput": 19885.06, "total_tokens": 212735296} +{"current_steps": 67585, "total_steps": 78105, "loss": 0.1374, "lr": 2.7131313920438414e-07, "epoch": 4.326547596184623, "percentage": 86.53, "elapsed_time": "2:58:18", "remaining_time": "0:27:45", "throughput": 19885.26, "total_tokens": 212750848} +{"current_steps": 67590, "total_steps": 78105, "loss": 0.1773, "lr": 2.7106008601248737e-07, "epoch": 4.3268676781256, "percentage": 86.54, "elapsed_time": "2:58:19", "remaining_time": "0:27:44", "throughput": 19885.46, "total_tokens": 212766400} +{"current_steps": 67595, "total_steps": 78105, "loss": 0.1624, "lr": 2.7080714412223987e-07, "epoch": 4.327187760066577, "percentage": 86.54, "elapsed_time": "2:58:20", "remaining_time": "0:27:43", "throughput": 19885.66, "total_tokens": 212781504} +{"current_steps": 67600, "total_steps": 78105, "loss": 0.1569, "lr": 2.705543135462721e-07, "epoch": 4.327507842007554, "percentage": 86.55, "elapsed_time": "2:58:20", "remaining_time": "0:27:42", "throughput": 19885.93, "total_tokens": 212798784} +{"current_steps": 67605, "total_steps": 78105, "loss": 0.1138, "lr": 2.703015942972087e-07, "epoch": 4.327827923948531, "percentage": 86.56, "elapsed_time": "2:58:21", "remaining_time": "0:27:42", "throughput": 19886.22, "total_tokens": 212816384} +{"current_steps": 67610, "total_steps": 78105, "loss": 0.1259, "lr": 2.700489863876704e-07, "epoch": 4.328148005889508, "percentage": 86.56, "elapsed_time": "2:58:22", "remaining_time": "0:27:41", "throughput": 19886.42, "total_tokens": 212831808} +{"current_steps": 67615, "total_steps": 78105, "loss": 0.1065, "lr": 2.697964898302696e-07, "epoch": 4.328468087830484, "percentage": 86.57, "elapsed_time": "2:58:23", "remaining_time": "0:27:40", "throughput": 19886.62, "total_tokens": 212847360} +{"current_steps": 67620, "total_steps": 78105, "loss": 0.1117, "lr": 2.695441046376157e-07, "epoch": 4.328788169771461, "percentage": 86.58, "elapsed_time": "2:58:23", "remaining_time": "0:27:39", "throughput": 19886.82, "total_tokens": 212862976} +{"current_steps": 67625, "total_steps": 78105, "loss": 0.1428, "lr": 2.6929183082231136e-07, "epoch": 4.329108251712438, "percentage": 86.58, "elapsed_time": "2:58:24", "remaining_time": "0:27:38", "throughput": 19886.98, "total_tokens": 212877504} +{"current_steps": 67630, "total_steps": 78105, "loss": 0.1325, "lr": 2.6903966839695346e-07, "epoch": 4.329428333653415, "percentage": 86.59, "elapsed_time": "2:58:25", "remaining_time": "0:27:38", "throughput": 19887.16, "total_tokens": 212892544} +{"current_steps": 67635, "total_steps": 78105, "loss": 0.119, "lr": 2.6878761737413413e-07, "epoch": 4.329748415594392, "percentage": 86.59, "elapsed_time": "2:58:25", "remaining_time": "0:27:37", "throughput": 19887.35, "total_tokens": 212907968} +{"current_steps": 67640, "total_steps": 78105, "loss": 0.1283, "lr": 2.685356777664386e-07, "epoch": 4.330068497535369, "percentage": 86.6, "elapsed_time": "2:58:26", "remaining_time": "0:27:36", "throughput": 19887.56, "total_tokens": 212923840} +{"current_steps": 67645, "total_steps": 78105, "loss": 0.1442, "lr": 2.68283849586449e-07, "epoch": 4.330388579476346, "percentage": 86.61, "elapsed_time": "2:58:27", "remaining_time": "0:27:35", "throughput": 19887.76, "total_tokens": 212939200} +{"current_steps": 67650, "total_steps": 78105, "loss": 0.1297, "lr": 2.6803213284673834e-07, "epoch": 4.330708661417323, "percentage": 86.61, "elapsed_time": "2:58:27", "remaining_time": "0:27:34", "throughput": 19887.97, "total_tokens": 212955008} +{"current_steps": 67655, "total_steps": 78105, "loss": 0.127, "lr": 2.6778052755987743e-07, "epoch": 4.3310287433583, "percentage": 86.62, "elapsed_time": "2:58:28", "remaining_time": "0:27:34", "throughput": 19888.17, "total_tokens": 212970752} +{"current_steps": 67660, "total_steps": 78105, "loss": 0.1288, "lr": 2.675290337384301e-07, "epoch": 4.331348825299276, "percentage": 86.63, "elapsed_time": "2:58:29", "remaining_time": "0:27:33", "throughput": 19888.35, "total_tokens": 212986048} +{"current_steps": 67665, "total_steps": 78105, "loss": 0.1173, "lr": 2.6727765139495404e-07, "epoch": 4.331668907240253, "percentage": 86.63, "elapsed_time": "2:58:29", "remaining_time": "0:27:32", "throughput": 19888.55, "total_tokens": 213001728} +{"current_steps": 67670, "total_steps": 78105, "loss": 0.1382, "lr": 2.670263805420026e-07, "epoch": 4.33198898918123, "percentage": 86.64, "elapsed_time": "2:58:30", "remaining_time": "0:27:31", "throughput": 19888.81, "total_tokens": 213018624} +{"current_steps": 67675, "total_steps": 78105, "loss": 0.1322, "lr": 2.667752211921221e-07, "epoch": 4.332309071122207, "percentage": 86.65, "elapsed_time": "2:58:31", "remaining_time": "0:27:30", "throughput": 19889.01, "total_tokens": 213034112} +{"current_steps": 67680, "total_steps": 78105, "loss": 0.1283, "lr": 2.66524173357855e-07, "epoch": 4.332629153063184, "percentage": 86.65, "elapsed_time": "2:58:31", "remaining_time": "0:27:29", "throughput": 19889.19, "total_tokens": 213049024} +{"current_steps": 67685, "total_steps": 78105, "loss": 0.1605, "lr": 2.662732370517368e-07, "epoch": 4.332949235004161, "percentage": 86.66, "elapsed_time": "2:58:32", "remaining_time": "0:27:29", "throughput": 19889.41, "total_tokens": 213064640} +{"current_steps": 67690, "total_steps": 78105, "loss": 0.1374, "lr": 2.6602241228629757e-07, "epoch": 4.333269316945138, "percentage": 86.67, "elapsed_time": "2:58:33", "remaining_time": "0:27:28", "throughput": 19889.57, "total_tokens": 213079040} +{"current_steps": 67695, "total_steps": 78105, "loss": 0.1308, "lr": 2.65771699074063e-07, "epoch": 4.333589398886115, "percentage": 86.67, "elapsed_time": "2:58:33", "remaining_time": "0:27:27", "throughput": 19889.76, "total_tokens": 213094336} +{"current_steps": 67700, "total_steps": 78105, "loss": 0.1318, "lr": 2.655210974275524e-07, "epoch": 4.333909480827092, "percentage": 86.68, "elapsed_time": "2:58:34", "remaining_time": "0:27:26", "throughput": 19889.98, "total_tokens": 213110592} +{"current_steps": 67705, "total_steps": 78105, "loss": 0.1614, "lr": 2.652706073592792e-07, "epoch": 4.334229562768068, "percentage": 86.68, "elapsed_time": "2:58:35", "remaining_time": "0:27:25", "throughput": 19890.19, "total_tokens": 213126336} +{"current_steps": 67710, "total_steps": 78105, "loss": 0.1108, "lr": 2.650202288817516e-07, "epoch": 4.334549644709045, "percentage": 86.69, "elapsed_time": "2:58:35", "remaining_time": "0:27:25", "throughput": 19890.39, "total_tokens": 213141760} +{"current_steps": 67715, "total_steps": 78105, "loss": 0.1333, "lr": 2.6476996200747144e-07, "epoch": 4.334869726650022, "percentage": 86.7, "elapsed_time": "2:58:36", "remaining_time": "0:27:24", "throughput": 19890.57, "total_tokens": 213156736} +{"current_steps": 67720, "total_steps": 78105, "loss": 0.1304, "lr": 2.6451980674893766e-07, "epoch": 4.335189808590999, "percentage": 86.7, "elapsed_time": "2:58:37", "remaining_time": "0:27:23", "throughput": 19890.75, "total_tokens": 213171712} +{"current_steps": 67725, "total_steps": 78105, "loss": 0.1565, "lr": 2.6426976311863947e-07, "epoch": 4.335509890531976, "percentage": 86.71, "elapsed_time": "2:58:37", "remaining_time": "0:27:22", "throughput": 19890.95, "total_tokens": 213187200} +{"current_steps": 67730, "total_steps": 78105, "loss": 0.1332, "lr": 2.640198311290648e-07, "epoch": 4.335829972472953, "percentage": 86.72, "elapsed_time": "2:58:38", "remaining_time": "0:27:21", "throughput": 19891.14, "total_tokens": 213202112} +{"current_steps": 67735, "total_steps": 78105, "loss": 0.1567, "lr": 2.6377001079269215e-07, "epoch": 4.33615005441393, "percentage": 86.72, "elapsed_time": "2:58:39", "remaining_time": "0:27:21", "throughput": 19891.35, "total_tokens": 213217856} +{"current_steps": 67740, "total_steps": 78105, "loss": 0.1348, "lr": 2.635203021219976e-07, "epoch": 4.336470136354907, "percentage": 86.73, "elapsed_time": "2:58:39", "remaining_time": "0:27:20", "throughput": 19891.51, "total_tokens": 213232256} +{"current_steps": 67745, "total_steps": 78105, "loss": 0.1511, "lr": 2.6327070512944947e-07, "epoch": 4.336790218295883, "percentage": 86.74, "elapsed_time": "2:58:40", "remaining_time": "0:27:19", "throughput": 19891.7, "total_tokens": 213247488} +{"current_steps": 67750, "total_steps": 78105, "loss": 0.1696, "lr": 2.6302121982751144e-07, "epoch": 4.33711030023686, "percentage": 86.74, "elapsed_time": "2:58:41", "remaining_time": "0:27:18", "throughput": 19891.87, "total_tokens": 213262272} +{"current_steps": 67755, "total_steps": 78105, "loss": 0.1443, "lr": 2.6277184622864245e-07, "epoch": 4.337430382177837, "percentage": 86.75, "elapsed_time": "2:58:41", "remaining_time": "0:27:17", "throughput": 19892.11, "total_tokens": 213278528} +{"current_steps": 67760, "total_steps": 78105, "loss": 0.1079, "lr": 2.625225843452933e-07, "epoch": 4.337750464118814, "percentage": 86.76, "elapsed_time": "2:58:42", "remaining_time": "0:27:17", "throughput": 19892.31, "total_tokens": 213294208} +{"current_steps": 67765, "total_steps": 78105, "loss": 0.1091, "lr": 2.6227343418991246e-07, "epoch": 4.338070546059791, "percentage": 86.76, "elapsed_time": "2:58:43", "remaining_time": "0:27:16", "throughput": 19892.5, "total_tokens": 213309440} +{"current_steps": 67770, "total_steps": 78105, "loss": 0.1213, "lr": 2.6202439577493966e-07, "epoch": 4.338390628000768, "percentage": 86.77, "elapsed_time": "2:58:43", "remaining_time": "0:27:15", "throughput": 19892.77, "total_tokens": 213326464} +{"current_steps": 67775, "total_steps": 78105, "loss": 0.1333, "lr": 2.6177546911281185e-07, "epoch": 4.338710709941745, "percentage": 86.77, "elapsed_time": "2:58:44", "remaining_time": "0:27:14", "throughput": 19892.99, "total_tokens": 213342336} +{"current_steps": 67780, "total_steps": 78105, "loss": 0.2306, "lr": 2.6152665421595865e-07, "epoch": 4.339030791882722, "percentage": 86.78, "elapsed_time": "2:58:45", "remaining_time": "0:27:13", "throughput": 19893.19, "total_tokens": 213358080} +{"current_steps": 67785, "total_steps": 78105, "loss": 0.128, "lr": 2.6127795109680443e-07, "epoch": 4.339350873823699, "percentage": 86.79, "elapsed_time": "2:58:45", "remaining_time": "0:27:12", "throughput": 19893.35, "total_tokens": 213372672} +{"current_steps": 67790, "total_steps": 78105, "loss": 0.1503, "lr": 2.6102935976776825e-07, "epoch": 4.339670955764676, "percentage": 86.79, "elapsed_time": "2:58:46", "remaining_time": "0:27:12", "throughput": 19893.56, "total_tokens": 213388160} +{"current_steps": 67795, "total_steps": 78105, "loss": 0.0951, "lr": 2.607808802412634e-07, "epoch": 4.339991037705652, "percentage": 86.8, "elapsed_time": "2:58:47", "remaining_time": "0:27:11", "throughput": 19893.74, "total_tokens": 213403456} +{"current_steps": 67800, "total_steps": 78105, "loss": 0.1334, "lr": 2.605325125296976e-07, "epoch": 4.340311119646629, "percentage": 86.81, "elapsed_time": "2:58:47", "remaining_time": "0:27:10", "throughput": 19894.01, "total_tokens": 213420608} +{"current_steps": 67805, "total_steps": 78105, "loss": 0.1515, "lr": 2.602842566454733e-07, "epoch": 4.340631201587606, "percentage": 86.81, "elapsed_time": "2:58:48", "remaining_time": "0:27:09", "throughput": 19894.2, "total_tokens": 213436096} +{"current_steps": 67810, "total_steps": 78105, "loss": 0.0939, "lr": 2.6003611260098705e-07, "epoch": 4.340951283528583, "percentage": 86.82, "elapsed_time": "2:58:49", "remaining_time": "0:27:08", "throughput": 19894.41, "total_tokens": 213452096} +{"current_steps": 67815, "total_steps": 78105, "loss": 0.1105, "lr": 2.5978808040862976e-07, "epoch": 4.34127136546956, "percentage": 86.83, "elapsed_time": "2:58:49", "remaining_time": "0:27:08", "throughput": 19894.6, "total_tokens": 213467200} +{"current_steps": 67820, "total_steps": 78105, "loss": 0.1204, "lr": 2.5954016008078704e-07, "epoch": 4.341591447410537, "percentage": 86.83, "elapsed_time": "2:58:50", "remaining_time": "0:27:07", "throughput": 19894.8, "total_tokens": 213482816} +{"current_steps": 67825, "total_steps": 78105, "loss": 0.1339, "lr": 2.5929235162983845e-07, "epoch": 4.341911529351514, "percentage": 86.84, "elapsed_time": "2:58:51", "remaining_time": "0:27:06", "throughput": 19894.99, "total_tokens": 213497664} +{"current_steps": 67830, "total_steps": 78105, "loss": 0.1269, "lr": 2.590446550681586e-07, "epoch": 4.342231611292491, "percentage": 86.84, "elapsed_time": "2:58:51", "remaining_time": "0:27:05", "throughput": 19895.18, "total_tokens": 213513024} +{"current_steps": 67835, "total_steps": 78105, "loss": 0.1238, "lr": 2.5879707040811527e-07, "epoch": 4.342551693233467, "percentage": 86.85, "elapsed_time": "2:58:52", "remaining_time": "0:27:04", "throughput": 19895.35, "total_tokens": 213527872} +{"current_steps": 67840, "total_steps": 78105, "loss": 0.1203, "lr": 2.585495976620733e-07, "epoch": 4.342871775174444, "percentage": 86.86, "elapsed_time": "2:58:53", "remaining_time": "0:27:04", "throughput": 19895.56, "total_tokens": 213543424} +{"current_steps": 67845, "total_steps": 78105, "loss": 0.1121, "lr": 2.583022368423882e-07, "epoch": 4.343191857115421, "percentage": 86.86, "elapsed_time": "2:58:53", "remaining_time": "0:27:03", "throughput": 19895.74, "total_tokens": 213558144} +{"current_steps": 67850, "total_steps": 78105, "loss": 0.1569, "lr": 2.5805498796141304e-07, "epoch": 4.343511939056398, "percentage": 86.87, "elapsed_time": "2:58:54", "remaining_time": "0:27:02", "throughput": 19895.92, "total_tokens": 213573184} +{"current_steps": 67855, "total_steps": 78105, "loss": 0.1484, "lr": 2.578078510314941e-07, "epoch": 4.343832020997375, "percentage": 86.88, "elapsed_time": "2:58:55", "remaining_time": "0:27:01", "throughput": 19896.08, "total_tokens": 213587712} +{"current_steps": 67860, "total_steps": 78105, "loss": 0.123, "lr": 2.57560826064972e-07, "epoch": 4.344152102938352, "percentage": 86.88, "elapsed_time": "2:58:55", "remaining_time": "0:27:00", "throughput": 19896.29, "total_tokens": 213603456} +{"current_steps": 67865, "total_steps": 78105, "loss": 0.1645, "lr": 2.573139130741817e-07, "epoch": 4.344472184879329, "percentage": 86.89, "elapsed_time": "2:58:56", "remaining_time": "0:27:00", "throughput": 19896.5, "total_tokens": 213619264} +{"current_steps": 67870, "total_steps": 78105, "loss": 0.1573, "lr": 2.570671120714524e-07, "epoch": 4.344792266820306, "percentage": 86.9, "elapsed_time": "2:58:57", "remaining_time": "0:26:59", "throughput": 19896.72, "total_tokens": 213635392} +{"current_steps": 67875, "total_steps": 78105, "loss": 0.1601, "lr": 2.5682042306910957e-07, "epoch": 4.345112348761283, "percentage": 86.9, "elapsed_time": "2:58:57", "remaining_time": "0:26:58", "throughput": 19896.98, "total_tokens": 213652224} +{"current_steps": 67880, "total_steps": 78105, "loss": 0.1461, "lr": 2.565738460794695e-07, "epoch": 4.345432430702259, "percentage": 86.91, "elapsed_time": "2:58:58", "remaining_time": "0:26:57", "throughput": 19897.17, "total_tokens": 213667712} +{"current_steps": 67885, "total_steps": 78105, "loss": 0.1313, "lr": 2.5632738111484644e-07, "epoch": 4.345752512643236, "percentage": 86.92, "elapsed_time": "2:58:59", "remaining_time": "0:26:56", "throughput": 19897.32, "total_tokens": 213681984} +{"current_steps": 67890, "total_steps": 78105, "loss": 0.1267, "lr": 2.5608102818754725e-07, "epoch": 4.346072594584213, "percentage": 86.92, "elapsed_time": "2:58:59", "remaining_time": "0:26:55", "throughput": 19897.51, "total_tokens": 213696768} +{"current_steps": 67895, "total_steps": 78105, "loss": 0.1883, "lr": 2.558347873098732e-07, "epoch": 4.34639267652519, "percentage": 86.93, "elapsed_time": "2:59:00", "remaining_time": "0:26:55", "throughput": 19897.7, "total_tokens": 213712256} +{"current_steps": 67900, "total_steps": 78105, "loss": 0.1275, "lr": 2.555886584941203e-07, "epoch": 4.346712758466167, "percentage": 86.93, "elapsed_time": "2:59:01", "remaining_time": "0:26:54", "throughput": 19897.89, "total_tokens": 213727552} +{"current_steps": 67905, "total_steps": 78105, "loss": 0.1666, "lr": 2.553426417525792e-07, "epoch": 4.347032840407144, "percentage": 86.94, "elapsed_time": "2:59:01", "remaining_time": "0:26:53", "throughput": 19898.11, "total_tokens": 213743488} +{"current_steps": 67910, "total_steps": 78105, "loss": 0.1325, "lr": 2.5509673709753465e-07, "epoch": 4.347352922348121, "percentage": 86.95, "elapsed_time": "2:59:02", "remaining_time": "0:26:52", "throughput": 19898.28, "total_tokens": 213758592} +{"current_steps": 67915, "total_steps": 78105, "loss": 0.125, "lr": 2.548509445412659e-07, "epoch": 4.347673004289098, "percentage": 86.95, "elapsed_time": "2:59:03", "remaining_time": "0:26:51", "throughput": 19898.46, "total_tokens": 213773824} +{"current_steps": 67920, "total_steps": 78105, "loss": 0.1177, "lr": 2.5460526409604586e-07, "epoch": 4.347993086230075, "percentage": 86.96, "elapsed_time": "2:59:03", "remaining_time": "0:26:51", "throughput": 19898.64, "total_tokens": 213788416} +{"current_steps": 67925, "total_steps": 78105, "loss": 0.1054, "lr": 2.5435969577414345e-07, "epoch": 4.348313168171051, "percentage": 86.97, "elapsed_time": "2:59:04", "remaining_time": "0:26:50", "throughput": 19898.8, "total_tokens": 213803136} +{"current_steps": 67930, "total_steps": 78105, "loss": 0.1535, "lr": 2.541142395878207e-07, "epoch": 4.348633250112028, "percentage": 86.97, "elapsed_time": "2:59:05", "remaining_time": "0:26:49", "throughput": 19899.05, "total_tokens": 213819648} +{"current_steps": 67935, "total_steps": 78105, "loss": 0.1334, "lr": 2.538688955493346e-07, "epoch": 4.348953332053005, "percentage": 86.98, "elapsed_time": "2:59:05", "remaining_time": "0:26:48", "throughput": 19899.27, "total_tokens": 213835584} +{"current_steps": 67940, "total_steps": 78105, "loss": 0.1374, "lr": 2.5362366367093603e-07, "epoch": 4.349273413993982, "percentage": 86.99, "elapsed_time": "2:59:06", "remaining_time": "0:26:47", "throughput": 19899.5, "total_tokens": 213851968} +{"current_steps": 67945, "total_steps": 78105, "loss": 0.1607, "lr": 2.5337854396487034e-07, "epoch": 4.349593495934959, "percentage": 86.99, "elapsed_time": "2:59:07", "remaining_time": "0:26:47", "throughput": 19899.72, "total_tokens": 213868480} +{"current_steps": 67950, "total_steps": 78105, "loss": 0.1363, "lr": 2.5313353644337903e-07, "epoch": 4.349913577875936, "percentage": 87.0, "elapsed_time": "2:59:08", "remaining_time": "0:26:46", "throughput": 19899.97, "total_tokens": 213885120} +{"current_steps": 67955, "total_steps": 78105, "loss": 0.1133, "lr": 2.528886411186943e-07, "epoch": 4.350233659816913, "percentage": 87.0, "elapsed_time": "2:59:08", "remaining_time": "0:26:45", "throughput": 19900.21, "total_tokens": 213902080} +{"current_steps": 67960, "total_steps": 78105, "loss": 0.1242, "lr": 2.5264385800304687e-07, "epoch": 4.35055374175789, "percentage": 87.01, "elapsed_time": "2:59:09", "remaining_time": "0:26:44", "throughput": 19900.48, "total_tokens": 213919232} +{"current_steps": 67965, "total_steps": 78105, "loss": 0.1588, "lr": 2.523991871086584e-07, "epoch": 4.350873823698867, "percentage": 87.02, "elapsed_time": "2:59:10", "remaining_time": "0:26:43", "throughput": 19900.69, "total_tokens": 213935360} +{"current_steps": 67970, "total_steps": 78105, "loss": 0.1353, "lr": 2.5215462844774744e-07, "epoch": 4.351193905639843, "percentage": 87.02, "elapsed_time": "2:59:10", "remaining_time": "0:26:43", "throughput": 19900.89, "total_tokens": 213950720} +{"current_steps": 67975, "total_steps": 78105, "loss": 0.1115, "lr": 2.519101820325259e-07, "epoch": 4.35151398758082, "percentage": 87.03, "elapsed_time": "2:59:11", "remaining_time": "0:26:42", "throughput": 19901.12, "total_tokens": 213967040} +{"current_steps": 67980, "total_steps": 78105, "loss": 0.143, "lr": 2.516658478751996e-07, "epoch": 4.351834069521797, "percentage": 87.04, "elapsed_time": "2:59:12", "remaining_time": "0:26:41", "throughput": 19901.39, "total_tokens": 213984384} +{"current_steps": 67985, "total_steps": 78105, "loss": 0.1467, "lr": 2.5142162598797063e-07, "epoch": 4.352154151462774, "percentage": 87.04, "elapsed_time": "2:59:12", "remaining_time": "0:26:40", "throughput": 19901.62, "total_tokens": 214000704} +{"current_steps": 67990, "total_steps": 78105, "loss": 0.1568, "lr": 2.511775163830321e-07, "epoch": 4.352474233403751, "percentage": 87.05, "elapsed_time": "2:59:13", "remaining_time": "0:26:39", "throughput": 19901.94, "total_tokens": 214019712} +{"current_steps": 67995, "total_steps": 78105, "loss": 0.0906, "lr": 2.509335190725759e-07, "epoch": 4.352794315344728, "percentage": 87.06, "elapsed_time": "2:59:14", "remaining_time": "0:26:39", "throughput": 19902.15, "total_tokens": 214035328} +{"current_steps": 68000, "total_steps": 78105, "loss": 0.1677, "lr": 2.506896340687839e-07, "epoch": 4.353114397285705, "percentage": 87.06, "elapsed_time": "2:59:15", "remaining_time": "0:26:38", "throughput": 19902.34, "total_tokens": 214050688} +{"current_steps": 68005, "total_steps": 78105, "loss": 0.1416, "lr": 2.504458613838356e-07, "epoch": 4.353434479226682, "percentage": 87.07, "elapsed_time": "2:59:15", "remaining_time": "0:26:37", "throughput": 19902.5, "total_tokens": 214065280} +{"current_steps": 68010, "total_steps": 78105, "loss": 0.152, "lr": 2.502022010299035e-07, "epoch": 4.3537545611676585, "percentage": 87.08, "elapsed_time": "2:59:16", "remaining_time": "0:26:36", "throughput": 19902.72, "total_tokens": 214081408} +{"current_steps": 68015, "total_steps": 78105, "loss": 0.2511, "lr": 2.4995865301915473e-07, "epoch": 4.3540746431086355, "percentage": 87.08, "elapsed_time": "2:59:17", "remaining_time": "0:26:35", "throughput": 19902.93, "total_tokens": 214097152} +{"current_steps": 68020, "total_steps": 78105, "loss": 0.1267, "lr": 2.49715217363751e-07, "epoch": 4.3543947250496124, "percentage": 87.09, "elapsed_time": "2:59:17", "remaining_time": "0:26:34", "throughput": 19903.14, "total_tokens": 214112832} +{"current_steps": 68025, "total_steps": 78105, "loss": 0.1704, "lr": 2.4947189407584784e-07, "epoch": 4.354714806990589, "percentage": 87.09, "elapsed_time": "2:59:18", "remaining_time": "0:26:34", "throughput": 19903.35, "total_tokens": 214128512} +{"current_steps": 68030, "total_steps": 78105, "loss": 0.1271, "lr": 2.4922868316759527e-07, "epoch": 4.355034888931566, "percentage": 87.1, "elapsed_time": "2:59:19", "remaining_time": "0:26:33", "throughput": 19903.56, "total_tokens": 214144512} +{"current_steps": 68035, "total_steps": 78105, "loss": 0.1152, "lr": 2.4898558465113935e-07, "epoch": 4.355354970872543, "percentage": 87.11, "elapsed_time": "2:59:19", "remaining_time": "0:26:32", "throughput": 19903.77, "total_tokens": 214160000} +{"current_steps": 68040, "total_steps": 78105, "loss": 0.1667, "lr": 2.487425985386172e-07, "epoch": 4.35567505281352, "percentage": 87.11, "elapsed_time": "2:59:20", "remaining_time": "0:26:31", "throughput": 19904.02, "total_tokens": 214176768} +{"current_steps": 68045, "total_steps": 78105, "loss": 0.1652, "lr": 2.4849972484216387e-07, "epoch": 4.355995134754497, "percentage": 87.12, "elapsed_time": "2:59:21", "remaining_time": "0:26:30", "throughput": 19904.19, "total_tokens": 214191552} +{"current_steps": 68050, "total_steps": 78105, "loss": 0.1493, "lr": 2.4825696357390656e-07, "epoch": 4.356315216695474, "percentage": 87.13, "elapsed_time": "2:59:21", "remaining_time": "0:26:30", "throughput": 19904.37, "total_tokens": 214206656} +{"current_steps": 68055, "total_steps": 78105, "loss": 0.1189, "lr": 2.4801431474596766e-07, "epoch": 4.356635298636451, "percentage": 87.13, "elapsed_time": "2:59:22", "remaining_time": "0:26:29", "throughput": 19904.56, "total_tokens": 214221952} +{"current_steps": 68060, "total_steps": 78105, "loss": 0.1283, "lr": 2.477717783704636e-07, "epoch": 4.3569553805774275, "percentage": 87.14, "elapsed_time": "2:59:23", "remaining_time": "0:26:28", "throughput": 19904.76, "total_tokens": 214237376} +{"current_steps": 68065, "total_steps": 78105, "loss": 0.1263, "lr": 2.4752935445950535e-07, "epoch": 4.3572754625184045, "percentage": 87.15, "elapsed_time": "2:59:23", "remaining_time": "0:26:27", "throughput": 19904.99, "total_tokens": 214253632} +{"current_steps": 68070, "total_steps": 78105, "loss": 0.1959, "lr": 2.472870430251992e-07, "epoch": 4.3575955444593815, "percentage": 87.15, "elapsed_time": "2:59:24", "remaining_time": "0:26:26", "throughput": 19905.2, "total_tokens": 214269376} +{"current_steps": 68075, "total_steps": 78105, "loss": 0.1753, "lr": 2.4704484407964303e-07, "epoch": 4.3579156264003585, "percentage": 87.16, "elapsed_time": "2:59:25", "remaining_time": "0:26:26", "throughput": 19905.39, "total_tokens": 214284864} +{"current_steps": 68080, "total_steps": 78105, "loss": 0.1114, "lr": 2.4680275763493273e-07, "epoch": 4.3582357083413354, "percentage": 87.16, "elapsed_time": "2:59:25", "remaining_time": "0:26:25", "throughput": 19905.63, "total_tokens": 214300992} +{"current_steps": 68085, "total_steps": 78105, "loss": 0.1027, "lr": 2.465607837031564e-07, "epoch": 4.358555790282312, "percentage": 87.17, "elapsed_time": "2:59:26", "remaining_time": "0:26:24", "throughput": 19905.82, "total_tokens": 214316352} +{"current_steps": 68090, "total_steps": 78105, "loss": 0.1452, "lr": 2.463189222963966e-07, "epoch": 4.358875872223289, "percentage": 87.18, "elapsed_time": "2:59:27", "remaining_time": "0:26:23", "throughput": 19906.08, "total_tokens": 214333632} +{"current_steps": 68095, "total_steps": 78105, "loss": 0.1413, "lr": 2.4607717342673086e-07, "epoch": 4.359195954164266, "percentage": 87.18, "elapsed_time": "2:59:27", "remaining_time": "0:26:22", "throughput": 19906.27, "total_tokens": 214348480} +{"current_steps": 68100, "total_steps": 78105, "loss": 0.2, "lr": 2.458355371062304e-07, "epoch": 4.3595160361052425, "percentage": 87.19, "elapsed_time": "2:59:28", "remaining_time": "0:26:22", "throughput": 19906.48, "total_tokens": 214364288} +{"current_steps": 68105, "total_steps": 78105, "loss": 0.1063, "lr": 2.455940133469628e-07, "epoch": 4.3598361180462195, "percentage": 87.2, "elapsed_time": "2:59:29", "remaining_time": "0:26:21", "throughput": 19906.67, "total_tokens": 214379648} +{"current_steps": 68110, "total_steps": 78105, "loss": 0.1157, "lr": 2.4535260216098615e-07, "epoch": 4.3601561999871965, "percentage": 87.2, "elapsed_time": "2:59:29", "remaining_time": "0:26:20", "throughput": 19906.85, "total_tokens": 214394496} +{"current_steps": 68115, "total_steps": 78105, "loss": 0.1066, "lr": 2.451113035603572e-07, "epoch": 4.3604762819281735, "percentage": 87.21, "elapsed_time": "2:59:30", "remaining_time": "0:26:19", "throughput": 19907.06, "total_tokens": 214409664} +{"current_steps": 68120, "total_steps": 78105, "loss": 0.1429, "lr": 2.4487011755712443e-07, "epoch": 4.3607963638691505, "percentage": 87.22, "elapsed_time": "2:59:31", "remaining_time": "0:26:18", "throughput": 19907.28, "total_tokens": 214425600} +{"current_steps": 68125, "total_steps": 78105, "loss": 0.1468, "lr": 2.446290441633314e-07, "epoch": 4.3611164458101275, "percentage": 87.22, "elapsed_time": "2:59:31", "remaining_time": "0:26:18", "throughput": 19907.43, "total_tokens": 214439872} +{"current_steps": 68130, "total_steps": 78105, "loss": 0.1316, "lr": 2.4438808339101615e-07, "epoch": 4.3614365277511045, "percentage": 87.23, "elapsed_time": "2:59:32", "remaining_time": "0:26:17", "throughput": 19907.61, "total_tokens": 214454912} +{"current_steps": 68135, "total_steps": 78105, "loss": 0.1236, "lr": 2.441472352522109e-07, "epoch": 4.3617566096920815, "percentage": 87.24, "elapsed_time": "2:59:33", "remaining_time": "0:26:16", "throughput": 19907.8, "total_tokens": 214470208} +{"current_steps": 68140, "total_steps": 78105, "loss": 0.1019, "lr": 2.4390649975894244e-07, "epoch": 4.3620766916330584, "percentage": 87.24, "elapsed_time": "2:59:33", "remaining_time": "0:26:15", "throughput": 19908.01, "total_tokens": 214485888} +{"current_steps": 68145, "total_steps": 78105, "loss": 0.1106, "lr": 2.4366587692323196e-07, "epoch": 4.3623967735740345, "percentage": 87.25, "elapsed_time": "2:59:34", "remaining_time": "0:26:14", "throughput": 19908.22, "total_tokens": 214501696} +{"current_steps": 68150, "total_steps": 78105, "loss": 0.1187, "lr": 2.4342536675709406e-07, "epoch": 4.3627168555150115, "percentage": 87.25, "elapsed_time": "2:59:35", "remaining_time": "0:26:13", "throughput": 19908.4, "total_tokens": 214517184} +{"current_steps": 68155, "total_steps": 78105, "loss": 0.1068, "lr": 2.431849692725399e-07, "epoch": 4.3630369374559885, "percentage": 87.26, "elapsed_time": "2:59:35", "remaining_time": "0:26:13", "throughput": 19908.62, "total_tokens": 214533120} +{"current_steps": 68160, "total_steps": 78105, "loss": 0.0825, "lr": 2.4294468448157304e-07, "epoch": 4.3633570193969655, "percentage": 87.27, "elapsed_time": "2:59:36", "remaining_time": "0:26:12", "throughput": 19908.79, "total_tokens": 214547904} +{"current_steps": 68165, "total_steps": 78105, "loss": 0.0626, "lr": 2.4270451239619186e-07, "epoch": 4.3636771013379425, "percentage": 87.27, "elapsed_time": "2:59:37", "remaining_time": "0:26:11", "throughput": 19909.02, "total_tokens": 214564160} +{"current_steps": 68170, "total_steps": 78105, "loss": 0.1123, "lr": 2.424644530283898e-07, "epoch": 4.3639971832789195, "percentage": 87.28, "elapsed_time": "2:59:37", "remaining_time": "0:26:10", "throughput": 19909.17, "total_tokens": 214578368} +{"current_steps": 68175, "total_steps": 78105, "loss": 0.1215, "lr": 2.422245063901532e-07, "epoch": 4.3643172652198965, "percentage": 87.29, "elapsed_time": "2:59:38", "remaining_time": "0:26:09", "throughput": 19909.4, "total_tokens": 214594496} +{"current_steps": 68180, "total_steps": 78105, "loss": 0.1454, "lr": 2.4198467249346547e-07, "epoch": 4.3646373471608735, "percentage": 87.29, "elapsed_time": "2:59:39", "remaining_time": "0:26:09", "throughput": 19909.63, "total_tokens": 214610880} +{"current_steps": 68185, "total_steps": 78105, "loss": 0.1073, "lr": 2.417449513503009e-07, "epoch": 4.3649574291018505, "percentage": 87.3, "elapsed_time": "2:59:39", "remaining_time": "0:26:08", "throughput": 19909.84, "total_tokens": 214626816} +{"current_steps": 68190, "total_steps": 78105, "loss": 0.1574, "lr": 2.4150534297263135e-07, "epoch": 4.365277511042827, "percentage": 87.31, "elapsed_time": "2:59:40", "remaining_time": "0:26:07", "throughput": 19910.02, "total_tokens": 214642048} +{"current_steps": 68195, "total_steps": 78105, "loss": 0.1334, "lr": 2.4126584737241996e-07, "epoch": 4.365597592983804, "percentage": 87.31, "elapsed_time": "2:59:41", "remaining_time": "0:26:06", "throughput": 19910.22, "total_tokens": 214657664} +{"current_steps": 68200, "total_steps": 78105, "loss": 0.1465, "lr": 2.410264645616273e-07, "epoch": 4.365917674924781, "percentage": 87.32, "elapsed_time": "2:59:41", "remaining_time": "0:26:05", "throughput": 19910.43, "total_tokens": 214673600} +{"current_steps": 68205, "total_steps": 78105, "loss": 0.1223, "lr": 2.407871945522064e-07, "epoch": 4.3662377568657575, "percentage": 87.32, "elapsed_time": "2:59:42", "remaining_time": "0:26:05", "throughput": 19910.6, "total_tokens": 214688576} +{"current_steps": 68210, "total_steps": 78105, "loss": 0.1337, "lr": 2.4054803735610535e-07, "epoch": 4.3665578388067345, "percentage": 87.33, "elapsed_time": "2:59:43", "remaining_time": "0:26:04", "throughput": 19910.78, "total_tokens": 214703232} +{"current_steps": 68215, "total_steps": 78105, "loss": 0.1444, "lr": 2.403089929852659e-07, "epoch": 4.3668779207477115, "percentage": 87.34, "elapsed_time": "2:59:43", "remaining_time": "0:26:03", "throughput": 19910.97, "total_tokens": 214718528} +{"current_steps": 68220, "total_steps": 78105, "loss": 0.1182, "lr": 2.400700614516249e-07, "epoch": 4.3671980026886885, "percentage": 87.34, "elapsed_time": "2:59:44", "remaining_time": "0:26:02", "throughput": 19911.19, "total_tokens": 214734400} +{"current_steps": 68225, "total_steps": 78105, "loss": 0.1145, "lr": 2.39831242767114e-07, "epoch": 4.3675180846296655, "percentage": 87.35, "elapsed_time": "2:59:45", "remaining_time": "0:26:01", "throughput": 19911.42, "total_tokens": 214750720} +{"current_steps": 68230, "total_steps": 78105, "loss": 0.1521, "lr": 2.3959253694365773e-07, "epoch": 4.3678381665706425, "percentage": 87.36, "elapsed_time": "2:59:45", "remaining_time": "0:26:01", "throughput": 19911.67, "total_tokens": 214767168} +{"current_steps": 68235, "total_steps": 78105, "loss": 0.1359, "lr": 2.3935394399317635e-07, "epoch": 4.368158248511619, "percentage": 87.36, "elapsed_time": "2:59:46", "remaining_time": "0:26:00", "throughput": 19911.87, "total_tokens": 214782592} +{"current_steps": 68240, "total_steps": 78105, "loss": 0.1161, "lr": 2.3911546392758387e-07, "epoch": 4.368478330452596, "percentage": 87.37, "elapsed_time": "2:59:47", "remaining_time": "0:25:59", "throughput": 19912.05, "total_tokens": 214797632} +{"current_steps": 68245, "total_steps": 78105, "loss": 0.1449, "lr": 2.3887709675878835e-07, "epoch": 4.368798412393573, "percentage": 87.38, "elapsed_time": "2:59:48", "remaining_time": "0:25:58", "throughput": 19912.28, "total_tokens": 214813888} +{"current_steps": 68250, "total_steps": 78105, "loss": 0.1307, "lr": 2.38638842498693e-07, "epoch": 4.36911849433455, "percentage": 87.38, "elapsed_time": "2:59:48", "remaining_time": "0:25:57", "throughput": 19912.44, "total_tokens": 214828224} +{"current_steps": 68255, "total_steps": 78105, "loss": 0.1223, "lr": 2.3840070115919462e-07, "epoch": 4.369438576275527, "percentage": 87.39, "elapsed_time": "2:59:49", "remaining_time": "0:25:57", "throughput": 19912.62, "total_tokens": 214843200} +{"current_steps": 68260, "total_steps": 78105, "loss": 0.1461, "lr": 2.381626727521849e-07, "epoch": 4.3697586582165036, "percentage": 87.4, "elapsed_time": "2:59:49", "remaining_time": "0:25:56", "throughput": 19912.85, "total_tokens": 214859392} +{"current_steps": 68265, "total_steps": 78105, "loss": 0.1309, "lr": 2.3792475728955046e-07, "epoch": 4.3700787401574805, "percentage": 87.4, "elapsed_time": "2:59:50", "remaining_time": "0:25:55", "throughput": 19913.06, "total_tokens": 214875584} +{"current_steps": 68270, "total_steps": 78105, "loss": 0.1253, "lr": 2.3768695478317005e-07, "epoch": 4.3703988220984575, "percentage": 87.41, "elapsed_time": "2:59:51", "remaining_time": "0:25:54", "throughput": 19913.26, "total_tokens": 214890816} +{"current_steps": 68275, "total_steps": 78105, "loss": 0.1477, "lr": 2.374492652449198e-07, "epoch": 4.370718904039434, "percentage": 87.41, "elapsed_time": "2:59:51", "remaining_time": "0:25:53", "throughput": 19913.44, "total_tokens": 214905728} +{"current_steps": 68280, "total_steps": 78105, "loss": 0.1718, "lr": 2.3721168868666793e-07, "epoch": 4.371038985980411, "percentage": 87.42, "elapsed_time": "2:59:52", "remaining_time": "0:25:52", "throughput": 19913.65, "total_tokens": 214921792} +{"current_steps": 68285, "total_steps": 78105, "loss": 0.1627, "lr": 2.369742251202778e-07, "epoch": 4.371359067921388, "percentage": 87.43, "elapsed_time": "2:59:53", "remaining_time": "0:25:52", "throughput": 19913.81, "total_tokens": 214936256} +{"current_steps": 68290, "total_steps": 78105, "loss": 0.0926, "lr": 2.3673687455760735e-07, "epoch": 4.371679149862365, "percentage": 87.43, "elapsed_time": "2:59:54", "remaining_time": "0:25:51", "throughput": 19914.0, "total_tokens": 214951936} +{"current_steps": 68295, "total_steps": 78105, "loss": 0.125, "lr": 2.3649963701050772e-07, "epoch": 4.371999231803342, "percentage": 87.44, "elapsed_time": "2:59:54", "remaining_time": "0:25:50", "throughput": 19914.16, "total_tokens": 214966464} +{"current_steps": 68300, "total_steps": 78105, "loss": 0.1055, "lr": 2.3626251249082744e-07, "epoch": 4.372319313744319, "percentage": 87.45, "elapsed_time": "2:59:55", "remaining_time": "0:25:49", "throughput": 19914.41, "total_tokens": 214983104} +{"current_steps": 68305, "total_steps": 78105, "loss": 0.1297, "lr": 2.3602550101040454e-07, "epoch": 4.372639395685296, "percentage": 87.45, "elapsed_time": "2:59:56", "remaining_time": "0:25:48", "throughput": 19914.64, "total_tokens": 214999360} +{"current_steps": 68310, "total_steps": 78105, "loss": 0.1403, "lr": 2.3578860258107628e-07, "epoch": 4.372959477626273, "percentage": 87.46, "elapsed_time": "2:59:56", "remaining_time": "0:25:48", "throughput": 19914.91, "total_tokens": 215016640} +{"current_steps": 68315, "total_steps": 78105, "loss": 0.1308, "lr": 2.355518172146712e-07, "epoch": 4.37327955956725, "percentage": 87.47, "elapsed_time": "2:59:57", "remaining_time": "0:25:47", "throughput": 19915.09, "total_tokens": 215031808} +{"current_steps": 68320, "total_steps": 78105, "loss": 0.1332, "lr": 2.3531514492301345e-07, "epoch": 4.3735996415082266, "percentage": 87.47, "elapsed_time": "2:59:58", "remaining_time": "0:25:46", "throughput": 19915.36, "total_tokens": 215049024} +{"current_steps": 68325, "total_steps": 78105, "loss": 0.1569, "lr": 2.3507858571792081e-07, "epoch": 4.373919723449203, "percentage": 87.48, "elapsed_time": "2:59:58", "remaining_time": "0:25:45", "throughput": 19915.55, "total_tokens": 215064512} +{"current_steps": 68330, "total_steps": 78105, "loss": 0.1687, "lr": 2.3484213961120577e-07, "epoch": 4.37423980539018, "percentage": 87.48, "elapsed_time": "2:59:59", "remaining_time": "0:25:44", "throughput": 19915.75, "total_tokens": 215079936} +{"current_steps": 68335, "total_steps": 78105, "loss": 0.1534, "lr": 2.3460580661467642e-07, "epoch": 4.374559887331157, "percentage": 87.49, "elapsed_time": "3:00:00", "remaining_time": "0:25:44", "throughput": 19915.94, "total_tokens": 215095552} +{"current_steps": 68340, "total_steps": 78105, "loss": 0.1338, "lr": 2.3436958674013216e-07, "epoch": 4.374879969272134, "percentage": 87.5, "elapsed_time": "3:00:00", "remaining_time": "0:25:43", "throughput": 19916.15, "total_tokens": 215111232} +{"current_steps": 68345, "total_steps": 78105, "loss": 0.186, "lr": 2.3413347999936998e-07, "epoch": 4.375200051213111, "percentage": 87.5, "elapsed_time": "3:00:01", "remaining_time": "0:25:42", "throughput": 19916.33, "total_tokens": 215126080} +{"current_steps": 68350, "total_steps": 78105, "loss": 0.0991, "lr": 2.338974864041793e-07, "epoch": 4.375520133154088, "percentage": 87.51, "elapsed_time": "3:00:02", "remaining_time": "0:25:41", "throughput": 19916.51, "total_tokens": 215141184} +{"current_steps": 68355, "total_steps": 78105, "loss": 0.1234, "lr": 2.3366160596634436e-07, "epoch": 4.375840215095065, "percentage": 87.52, "elapsed_time": "3:00:02", "remaining_time": "0:25:40", "throughput": 19916.68, "total_tokens": 215155968} +{"current_steps": 68360, "total_steps": 78105, "loss": 0.1445, "lr": 2.33425838697644e-07, "epoch": 4.376160297036042, "percentage": 87.52, "elapsed_time": "3:00:03", "remaining_time": "0:25:40", "throughput": 19916.9, "total_tokens": 215172160} +{"current_steps": 68365, "total_steps": 78105, "loss": 0.1426, "lr": 2.331901846098511e-07, "epoch": 4.376480378977018, "percentage": 87.53, "elapsed_time": "3:00:04", "remaining_time": "0:25:39", "throughput": 19917.13, "total_tokens": 215188160} +{"current_steps": 68370, "total_steps": 78105, "loss": 0.1611, "lr": 2.3295464371473287e-07, "epoch": 4.376800460917995, "percentage": 87.54, "elapsed_time": "3:00:04", "remaining_time": "0:25:38", "throughput": 19917.36, "total_tokens": 215204544} +{"current_steps": 68375, "total_steps": 78105, "loss": 0.1618, "lr": 2.3271921602405129e-07, "epoch": 4.377120542858972, "percentage": 87.54, "elapsed_time": "3:00:05", "remaining_time": "0:25:37", "throughput": 19917.58, "total_tokens": 215220864} +{"current_steps": 68380, "total_steps": 78105, "loss": 0.1636, "lr": 2.324839015495617e-07, "epoch": 4.377440624799949, "percentage": 87.55, "elapsed_time": "3:00:06", "remaining_time": "0:25:36", "throughput": 19917.77, "total_tokens": 215236352} +{"current_steps": 68385, "total_steps": 78105, "loss": 0.1369, "lr": 2.3224870030301577e-07, "epoch": 4.377760706740926, "percentage": 87.56, "elapsed_time": "3:00:06", "remaining_time": "0:25:36", "throughput": 19918.01, "total_tokens": 215252928} +{"current_steps": 68390, "total_steps": 78105, "loss": 0.1762, "lr": 2.3201361229615694e-07, "epoch": 4.378080788681903, "percentage": 87.56, "elapsed_time": "3:00:07", "remaining_time": "0:25:35", "throughput": 19918.28, "total_tokens": 215270080} +{"current_steps": 68395, "total_steps": 78105, "loss": 0.1265, "lr": 2.3177863754072493e-07, "epoch": 4.37840087062288, "percentage": 87.57, "elapsed_time": "3:00:08", "remaining_time": "0:25:34", "throughput": 19918.57, "total_tokens": 215287808} +{"current_steps": 68400, "total_steps": 78105, "loss": 0.0845, "lr": 2.3154377604845318e-07, "epoch": 4.378720952563857, "percentage": 87.57, "elapsed_time": "3:00:09", "remaining_time": "0:25:33", "throughput": 19918.71, "total_tokens": 215301824} +{"current_steps": 68405, "total_steps": 78105, "loss": 0.1127, "lr": 2.3130902783106867e-07, "epoch": 4.379041034504834, "percentage": 87.58, "elapsed_time": "3:00:09", "remaining_time": "0:25:32", "throughput": 19918.9, "total_tokens": 215317440} +{"current_steps": 68410, "total_steps": 78105, "loss": 0.1349, "lr": 2.310743929002951e-07, "epoch": 4.37936111644581, "percentage": 87.59, "elapsed_time": "3:00:10", "remaining_time": "0:25:32", "throughput": 19919.13, "total_tokens": 215333952} +{"current_steps": 68415, "total_steps": 78105, "loss": 0.126, "lr": 2.3083987126784723e-07, "epoch": 4.379681198386787, "percentage": 87.59, "elapsed_time": "3:00:11", "remaining_time": "0:25:31", "throughput": 19919.35, "total_tokens": 215350272} +{"current_steps": 68420, "total_steps": 78105, "loss": 0.1614, "lr": 2.306054629454374e-07, "epoch": 4.380001280327764, "percentage": 87.6, "elapsed_time": "3:00:11", "remaining_time": "0:25:30", "throughput": 19919.54, "total_tokens": 215365760} +{"current_steps": 68425, "total_steps": 78105, "loss": 0.1559, "lr": 2.303711679447687e-07, "epoch": 4.380321362268741, "percentage": 87.61, "elapsed_time": "3:00:12", "remaining_time": "0:25:29", "throughput": 19919.8, "total_tokens": 215382912} +{"current_steps": 68430, "total_steps": 78105, "loss": 0.1158, "lr": 2.3013698627754267e-07, "epoch": 4.380641444209718, "percentage": 87.61, "elapsed_time": "3:00:13", "remaining_time": "0:25:28", "throughput": 19920.05, "total_tokens": 215399744} +{"current_steps": 68435, "total_steps": 78105, "loss": 0.1577, "lr": 2.299029179554521e-07, "epoch": 4.380961526150695, "percentage": 87.62, "elapsed_time": "3:00:13", "remaining_time": "0:25:28", "throughput": 19920.24, "total_tokens": 215414912} +{"current_steps": 68440, "total_steps": 78105, "loss": 0.1296, "lr": 2.296689629901852e-07, "epoch": 4.381281608091672, "percentage": 87.63, "elapsed_time": "3:00:14", "remaining_time": "0:25:27", "throughput": 19920.43, "total_tokens": 215430464} +{"current_steps": 68445, "total_steps": 78105, "loss": 0.0872, "lr": 2.2943512139342455e-07, "epoch": 4.381601690032649, "percentage": 87.63, "elapsed_time": "3:00:15", "remaining_time": "0:25:26", "throughput": 19920.65, "total_tokens": 215446336} +{"current_steps": 68450, "total_steps": 78105, "loss": 0.1934, "lr": 2.2920139317684665e-07, "epoch": 4.381921771973626, "percentage": 87.64, "elapsed_time": "3:00:15", "remaining_time": "0:25:25", "throughput": 19920.83, "total_tokens": 215461312} +{"current_steps": 68455, "total_steps": 78105, "loss": 0.1609, "lr": 2.2896777835212354e-07, "epoch": 4.382241853914602, "percentage": 87.64, "elapsed_time": "3:00:16", "remaining_time": "0:25:24", "throughput": 19921.06, "total_tokens": 215477504} +{"current_steps": 68460, "total_steps": 78105, "loss": 0.1693, "lr": 2.2873427693092003e-07, "epoch": 4.382561935855579, "percentage": 87.65, "elapsed_time": "3:00:17", "remaining_time": "0:25:23", "throughput": 19921.23, "total_tokens": 215492416} +{"current_steps": 68465, "total_steps": 78105, "loss": 0.1326, "lr": 2.285008889248963e-07, "epoch": 4.382882017796556, "percentage": 87.66, "elapsed_time": "3:00:17", "remaining_time": "0:25:23", "throughput": 19921.46, "total_tokens": 215508800} +{"current_steps": 68470, "total_steps": 78105, "loss": 0.1533, "lr": 2.2826761434570633e-07, "epoch": 4.383202099737533, "percentage": 87.66, "elapsed_time": "3:00:18", "remaining_time": "0:25:22", "throughput": 19921.65, "total_tokens": 215524160} +{"current_steps": 68475, "total_steps": 78105, "loss": 0.0927, "lr": 2.2803445320499856e-07, "epoch": 4.38352218167851, "percentage": 87.67, "elapsed_time": "3:00:19", "remaining_time": "0:25:21", "throughput": 19921.87, "total_tokens": 215540416} +{"current_steps": 68480, "total_steps": 78105, "loss": 0.1327, "lr": 2.2780140551441621e-07, "epoch": 4.383842263619487, "percentage": 87.68, "elapsed_time": "3:00:19", "remaining_time": "0:25:20", "throughput": 19922.11, "total_tokens": 215557120} +{"current_steps": 68485, "total_steps": 78105, "loss": 0.1328, "lr": 2.2756847128559606e-07, "epoch": 4.384162345560464, "percentage": 87.68, "elapsed_time": "3:00:20", "remaining_time": "0:25:19", "throughput": 19922.33, "total_tokens": 215572992} +{"current_steps": 68490, "total_steps": 78105, "loss": 0.1588, "lr": 2.2733565053016938e-07, "epoch": 4.384482427501441, "percentage": 87.69, "elapsed_time": "3:00:21", "remaining_time": "0:25:19", "throughput": 19922.51, "total_tokens": 215587712} +{"current_steps": 68495, "total_steps": 78105, "loss": 0.2038, "lr": 2.2710294325976324e-07, "epoch": 4.384802509442418, "percentage": 87.7, "elapsed_time": "3:00:21", "remaining_time": "0:25:18", "throughput": 19922.71, "total_tokens": 215603456} +{"current_steps": 68500, "total_steps": 78105, "loss": 0.1214, "lr": 2.2687034948599613e-07, "epoch": 4.385122591383394, "percentage": 87.7, "elapsed_time": "3:00:22", "remaining_time": "0:25:17", "throughput": 19922.92, "total_tokens": 215619520} +{"current_steps": 68505, "total_steps": 78105, "loss": 0.1287, "lr": 2.2663786922048402e-07, "epoch": 4.385442673324371, "percentage": 87.71, "elapsed_time": "3:00:23", "remaining_time": "0:25:16", "throughput": 19923.08, "total_tokens": 215633856} +{"current_steps": 68510, "total_steps": 78105, "loss": 0.1602, "lr": 2.2640550247483517e-07, "epoch": 4.385762755265348, "percentage": 87.72, "elapsed_time": "3:00:23", "remaining_time": "0:25:15", "throughput": 19923.28, "total_tokens": 215649280} +{"current_steps": 68515, "total_steps": 78105, "loss": 0.1408, "lr": 2.2617324926065276e-07, "epoch": 4.386082837206325, "percentage": 87.72, "elapsed_time": "3:00:24", "remaining_time": "0:25:15", "throughput": 19923.46, "total_tokens": 215664192} +{"current_steps": 68520, "total_steps": 78105, "loss": 0.1775, "lr": 2.2594110958953448e-07, "epoch": 4.386402919147302, "percentage": 87.73, "elapsed_time": "3:00:25", "remaining_time": "0:25:14", "throughput": 19923.71, "total_tokens": 215681024} +{"current_steps": 68525, "total_steps": 78105, "loss": 0.1583, "lr": 2.257090834730713e-07, "epoch": 4.386723001088279, "percentage": 87.73, "elapsed_time": "3:00:26", "remaining_time": "0:25:13", "throughput": 19923.94, "total_tokens": 215697216} +{"current_steps": 68530, "total_steps": 78105, "loss": 0.131, "lr": 2.2547717092285148e-07, "epoch": 4.387043083029256, "percentage": 87.74, "elapsed_time": "3:00:26", "remaining_time": "0:25:12", "throughput": 19924.18, "total_tokens": 215713536} +{"current_steps": 68535, "total_steps": 78105, "loss": 0.1456, "lr": 2.25245371950453e-07, "epoch": 4.387363164970233, "percentage": 87.75, "elapsed_time": "3:00:27", "remaining_time": "0:25:11", "throughput": 19924.38, "total_tokens": 215729280} +{"current_steps": 68540, "total_steps": 78105, "loss": 0.1976, "lr": 2.2501368656745236e-07, "epoch": 4.387683246911209, "percentage": 87.75, "elapsed_time": "3:00:28", "remaining_time": "0:25:11", "throughput": 19924.61, "total_tokens": 215745664} +{"current_steps": 68545, "total_steps": 78105, "loss": 0.1521, "lr": 2.2478211478541818e-07, "epoch": 4.388003328852186, "percentage": 87.76, "elapsed_time": "3:00:28", "remaining_time": "0:25:10", "throughput": 19924.78, "total_tokens": 215760192} +{"current_steps": 68550, "total_steps": 78105, "loss": 0.1721, "lr": 2.245506566159142e-07, "epoch": 4.388323410793163, "percentage": 87.77, "elapsed_time": "3:00:29", "remaining_time": "0:25:09", "throughput": 19925.03, "total_tokens": 215776896} +{"current_steps": 68555, "total_steps": 78105, "loss": 0.1071, "lr": 2.2431931207049784e-07, "epoch": 4.38864349273414, "percentage": 87.77, "elapsed_time": "3:00:30", "remaining_time": "0:25:08", "throughput": 19925.29, "total_tokens": 215793920} +{"current_steps": 68560, "total_steps": 78105, "loss": 0.1205, "lr": 2.2408808116072155e-07, "epoch": 4.388963574675117, "percentage": 87.78, "elapsed_time": "3:00:30", "remaining_time": "0:25:07", "throughput": 19925.49, "total_tokens": 215809856} +{"current_steps": 68565, "total_steps": 78105, "loss": 0.1373, "lr": 2.2385696389813165e-07, "epoch": 4.389283656616094, "percentage": 87.79, "elapsed_time": "3:00:31", "remaining_time": "0:25:07", "throughput": 19925.68, "total_tokens": 215824704} +{"current_steps": 68570, "total_steps": 78105, "loss": 0.16, "lr": 2.2362596029426865e-07, "epoch": 4.389603738557071, "percentage": 87.79, "elapsed_time": "3:00:32", "remaining_time": "0:25:06", "throughput": 19925.87, "total_tokens": 215840064} +{"current_steps": 68575, "total_steps": 78105, "loss": 0.1306, "lr": 2.2339507036066826e-07, "epoch": 4.389923820498048, "percentage": 87.8, "elapsed_time": "3:00:32", "remaining_time": "0:25:05", "throughput": 19926.06, "total_tokens": 215855296} +{"current_steps": 68580, "total_steps": 78105, "loss": 0.0857, "lr": 2.2316429410885991e-07, "epoch": 4.390243902439025, "percentage": 87.8, "elapsed_time": "3:00:33", "remaining_time": "0:25:04", "throughput": 19926.26, "total_tokens": 215870976} +{"current_steps": 68585, "total_steps": 78105, "loss": 0.117, "lr": 2.229336315503669e-07, "epoch": 4.390563984380002, "percentage": 87.81, "elapsed_time": "3:00:34", "remaining_time": "0:25:03", "throughput": 19926.47, "total_tokens": 215886848} +{"current_steps": 68590, "total_steps": 78105, "loss": 0.1023, "lr": 2.2270308269670748e-07, "epoch": 4.390884066320978, "percentage": 87.82, "elapsed_time": "3:00:34", "remaining_time": "0:25:03", "throughput": 19926.65, "total_tokens": 215902144} +{"current_steps": 68595, "total_steps": 78105, "loss": 0.1323, "lr": 2.2247264755939414e-07, "epoch": 4.391204148261955, "percentage": 87.82, "elapsed_time": "3:00:35", "remaining_time": "0:25:02", "throughput": 19926.87, "total_tokens": 215918208} +{"current_steps": 68600, "total_steps": 78105, "loss": 0.1176, "lr": 2.2224232614993373e-07, "epoch": 4.391524230202932, "percentage": 87.83, "elapsed_time": "3:00:36", "remaining_time": "0:25:01", "throughput": 19927.11, "total_tokens": 215934848} +{"current_steps": 68605, "total_steps": 78105, "loss": 0.1315, "lr": 2.220121184798271e-07, "epoch": 4.391844312143909, "percentage": 87.84, "elapsed_time": "3:00:36", "remaining_time": "0:25:00", "throughput": 19927.29, "total_tokens": 215950016} +{"current_steps": 68610, "total_steps": 78105, "loss": 0.1965, "lr": 2.2178202456056924e-07, "epoch": 4.392164394084886, "percentage": 87.84, "elapsed_time": "3:00:37", "remaining_time": "0:24:59", "throughput": 19927.46, "total_tokens": 215964864} +{"current_steps": 68615, "total_steps": 78105, "loss": 0.1214, "lr": 2.215520444036509e-07, "epoch": 4.392484476025863, "percentage": 87.85, "elapsed_time": "3:00:38", "remaining_time": "0:24:59", "throughput": 19927.64, "total_tokens": 215980224} +{"current_steps": 68620, "total_steps": 78105, "loss": 0.1851, "lr": 2.2132217802055488e-07, "epoch": 4.39280455796684, "percentage": 87.86, "elapsed_time": "3:00:38", "remaining_time": "0:24:58", "throughput": 19927.84, "total_tokens": 215995456} +{"current_steps": 68625, "total_steps": 78105, "loss": 0.1647, "lr": 2.2109242542276032e-07, "epoch": 4.393124639907817, "percentage": 87.86, "elapsed_time": "3:00:39", "remaining_time": "0:24:57", "throughput": 19928.06, "total_tokens": 216011264} +{"current_steps": 68630, "total_steps": 78105, "loss": 0.1623, "lr": 2.208627866217397e-07, "epoch": 4.393444721848793, "percentage": 87.87, "elapsed_time": "3:00:40", "remaining_time": "0:24:56", "throughput": 19928.25, "total_tokens": 216026624} +{"current_steps": 68635, "total_steps": 78105, "loss": 0.176, "lr": 2.2063326162895972e-07, "epoch": 4.39376480378977, "percentage": 87.88, "elapsed_time": "3:00:40", "remaining_time": "0:24:55", "throughput": 19928.46, "total_tokens": 216041984} +{"current_steps": 68640, "total_steps": 78105, "loss": 0.1405, "lr": 2.2040385045588253e-07, "epoch": 4.394084885730747, "percentage": 87.88, "elapsed_time": "3:00:41", "remaining_time": "0:24:54", "throughput": 19928.64, "total_tokens": 216057024} +{"current_steps": 68645, "total_steps": 78105, "loss": 0.1438, "lr": 2.2017455311396208e-07, "epoch": 4.394404967671724, "percentage": 87.89, "elapsed_time": "3:00:42", "remaining_time": "0:24:54", "throughput": 19928.9, "total_tokens": 216074496} +{"current_steps": 68650, "total_steps": 78105, "loss": 0.1325, "lr": 2.199453696146503e-07, "epoch": 4.394725049612701, "percentage": 87.89, "elapsed_time": "3:00:42", "remaining_time": "0:24:53", "throughput": 19929.15, "total_tokens": 216090880} +{"current_steps": 68655, "total_steps": 78105, "loss": 0.1628, "lr": 2.1971629996938943e-07, "epoch": 4.395045131553678, "percentage": 87.9, "elapsed_time": "3:00:43", "remaining_time": "0:24:52", "throughput": 19929.36, "total_tokens": 216106624} +{"current_steps": 68660, "total_steps": 78105, "loss": 0.1105, "lr": 2.1948734418961948e-07, "epoch": 4.395365213494655, "percentage": 87.91, "elapsed_time": "3:00:44", "remaining_time": "0:24:51", "throughput": 19929.57, "total_tokens": 216122752} +{"current_steps": 68665, "total_steps": 78105, "loss": 0.1333, "lr": 2.192585022867727e-07, "epoch": 4.395685295435632, "percentage": 87.91, "elapsed_time": "3:00:44", "remaining_time": "0:24:50", "throughput": 19929.75, "total_tokens": 216137536} +{"current_steps": 68670, "total_steps": 78105, "loss": 0.1093, "lr": 2.1902977427227662e-07, "epoch": 4.396005377376609, "percentage": 87.92, "elapsed_time": "3:00:45", "remaining_time": "0:24:50", "throughput": 19929.93, "total_tokens": 216152832} +{"current_steps": 68675, "total_steps": 78105, "loss": 0.0979, "lr": 2.1880116015755236e-07, "epoch": 4.396325459317585, "percentage": 87.93, "elapsed_time": "3:00:46", "remaining_time": "0:24:49", "throughput": 19930.11, "total_tokens": 216168128} +{"current_steps": 68680, "total_steps": 78105, "loss": 0.139, "lr": 2.185726599540153e-07, "epoch": 4.396645541258562, "percentage": 87.93, "elapsed_time": "3:00:46", "remaining_time": "0:24:48", "throughput": 19930.28, "total_tokens": 216183040} +{"current_steps": 68685, "total_steps": 78105, "loss": 0.1028, "lr": 2.183442736730765e-07, "epoch": 4.396965623199539, "percentage": 87.94, "elapsed_time": "3:00:47", "remaining_time": "0:24:47", "throughput": 19930.47, "total_tokens": 216198272} +{"current_steps": 68690, "total_steps": 78105, "loss": 0.1174, "lr": 2.1811600132613992e-07, "epoch": 4.397285705140516, "percentage": 87.95, "elapsed_time": "3:00:48", "remaining_time": "0:24:46", "throughput": 19930.66, "total_tokens": 216213440} +{"current_steps": 68695, "total_steps": 78105, "loss": 0.1425, "lr": 2.1788784292460452e-07, "epoch": 4.397605787081493, "percentage": 87.95, "elapsed_time": "3:00:48", "remaining_time": "0:24:46", "throughput": 19930.92, "total_tokens": 216230528} +{"current_steps": 68700, "total_steps": 78105, "loss": 0.1405, "lr": 2.1765979847986313e-07, "epoch": 4.39792586902247, "percentage": 87.96, "elapsed_time": "3:00:49", "remaining_time": "0:24:45", "throughput": 19931.12, "total_tokens": 216246400} +{"current_steps": 68705, "total_steps": 78105, "loss": 0.16, "lr": 2.1743186800330301e-07, "epoch": 4.398245950963447, "percentage": 87.96, "elapsed_time": "3:00:50", "remaining_time": "0:24:44", "throughput": 19931.32, "total_tokens": 216262016} +{"current_steps": 68710, "total_steps": 78105, "loss": 0.172, "lr": 2.1720405150630591e-07, "epoch": 4.398566032904424, "percentage": 87.97, "elapsed_time": "3:00:51", "remaining_time": "0:24:43", "throughput": 19931.51, "total_tokens": 216277376} +{"current_steps": 68715, "total_steps": 78105, "loss": 0.1409, "lr": 2.16976349000248e-07, "epoch": 4.398886114845401, "percentage": 87.98, "elapsed_time": "3:00:51", "remaining_time": "0:24:42", "throughput": 19931.67, "total_tokens": 216291968} +{"current_steps": 68720, "total_steps": 78105, "loss": 0.1377, "lr": 2.167487604964988e-07, "epoch": 4.399206196786377, "percentage": 87.98, "elapsed_time": "3:00:52", "remaining_time": "0:24:42", "throughput": 19931.88, "total_tokens": 216307712} +{"current_steps": 68725, "total_steps": 78105, "loss": 0.1266, "lr": 2.1652128600642447e-07, "epoch": 4.399526278727354, "percentage": 87.99, "elapsed_time": "3:00:52", "remaining_time": "0:24:41", "throughput": 19932.04, "total_tokens": 216322368} +{"current_steps": 68730, "total_steps": 78105, "loss": 0.1413, "lr": 2.1629392554138207e-07, "epoch": 4.399846360668331, "percentage": 88.0, "elapsed_time": "3:00:53", "remaining_time": "0:24:40", "throughput": 19932.21, "total_tokens": 216337344} +{"current_steps": 68735, "total_steps": 78105, "loss": 0.0947, "lr": 2.1606667911272637e-07, "epoch": 4.400166442609308, "percentage": 88.0, "elapsed_time": "3:00:54", "remaining_time": "0:24:39", "throughput": 19932.37, "total_tokens": 216351680} +{"current_steps": 68740, "total_steps": 78105, "loss": 0.1737, "lr": 2.1583954673180335e-07, "epoch": 4.400486524550285, "percentage": 88.01, "elapsed_time": "3:00:54", "remaining_time": "0:24:38", "throughput": 19932.59, "total_tokens": 216367936} +{"current_steps": 68745, "total_steps": 78105, "loss": 0.111, "lr": 2.1561252840995584e-07, "epoch": 4.400806606491262, "percentage": 88.02, "elapsed_time": "3:00:55", "remaining_time": "0:24:38", "throughput": 19932.8, "total_tokens": 216383936} +{"current_steps": 68750, "total_steps": 78105, "loss": 0.1774, "lr": 2.1538562415852005e-07, "epoch": 4.401126688432239, "percentage": 88.02, "elapsed_time": "3:00:56", "remaining_time": "0:24:37", "throughput": 19932.97, "total_tokens": 216399040} +{"current_steps": 68755, "total_steps": 78105, "loss": 0.1669, "lr": 2.1515883398882553e-07, "epoch": 4.401446770373216, "percentage": 88.03, "elapsed_time": "3:00:57", "remaining_time": "0:24:36", "throughput": 19933.18, "total_tokens": 216414656} +{"current_steps": 68760, "total_steps": 78105, "loss": 0.1553, "lr": 2.1493215791219823e-07, "epoch": 4.401766852314193, "percentage": 88.04, "elapsed_time": "3:00:57", "remaining_time": "0:24:35", "throughput": 19933.39, "total_tokens": 216430336} +{"current_steps": 68765, "total_steps": 78105, "loss": 0.1275, "lr": 2.1470559593995577e-07, "epoch": 4.402086934255169, "percentage": 88.04, "elapsed_time": "3:00:58", "remaining_time": "0:24:34", "throughput": 19933.64, "total_tokens": 216447296} +{"current_steps": 68770, "total_steps": 78105, "loss": 0.1531, "lr": 2.1447914808341242e-07, "epoch": 4.402407016196146, "percentage": 88.05, "elapsed_time": "3:00:59", "remaining_time": "0:24:34", "throughput": 19933.8, "total_tokens": 216462016} +{"current_steps": 68775, "total_steps": 78105, "loss": 0.1497, "lr": 2.142528143538758e-07, "epoch": 4.402727098137123, "percentage": 88.05, "elapsed_time": "3:00:59", "remaining_time": "0:24:33", "throughput": 19934.02, "total_tokens": 216478336} +{"current_steps": 68780, "total_steps": 78105, "loss": 0.1606, "lr": 2.140265947626477e-07, "epoch": 4.4030471800781, "percentage": 88.06, "elapsed_time": "3:01:00", "remaining_time": "0:24:32", "throughput": 19934.23, "total_tokens": 216494336} +{"current_steps": 68785, "total_steps": 78105, "loss": 0.1079, "lr": 2.138004893210241e-07, "epoch": 4.403367262019077, "percentage": 88.07, "elapsed_time": "3:01:01", "remaining_time": "0:24:31", "throughput": 19934.41, "total_tokens": 216509312} +{"current_steps": 68790, "total_steps": 78105, "loss": 0.1321, "lr": 2.1357449804029567e-07, "epoch": 4.403687343960054, "percentage": 88.07, "elapsed_time": "3:01:01", "remaining_time": "0:24:30", "throughput": 19934.68, "total_tokens": 216526848} +{"current_steps": 68795, "total_steps": 78105, "loss": 0.1595, "lr": 2.133486209317473e-07, "epoch": 4.404007425901031, "percentage": 88.08, "elapsed_time": "3:01:02", "remaining_time": "0:24:30", "throughput": 19934.97, "total_tokens": 216544448} +{"current_steps": 68800, "total_steps": 78105, "loss": 0.1079, "lr": 2.1312285800665745e-07, "epoch": 4.404327507842008, "percentage": 88.09, "elapsed_time": "3:01:03", "remaining_time": "0:24:29", "throughput": 19935.16, "total_tokens": 216559872} +{"current_steps": 68805, "total_steps": 78105, "loss": 0.0746, "lr": 2.1289720927630069e-07, "epoch": 4.404647589782984, "percentage": 88.09, "elapsed_time": "3:01:03", "remaining_time": "0:24:28", "throughput": 19935.32, "total_tokens": 216574464} +{"current_steps": 68810, "total_steps": 78105, "loss": 0.1298, "lr": 2.1267167475194444e-07, "epoch": 4.404967671723961, "percentage": 88.1, "elapsed_time": "3:01:04", "remaining_time": "0:24:27", "throughput": 19935.53, "total_tokens": 216590080} +{"current_steps": 68815, "total_steps": 78105, "loss": 0.1356, "lr": 2.1244625444485023e-07, "epoch": 4.405287753664938, "percentage": 88.11, "elapsed_time": "3:01:05", "remaining_time": "0:24:26", "throughput": 19935.77, "total_tokens": 216606528} +{"current_steps": 68820, "total_steps": 78105, "loss": 0.1249, "lr": 2.122209483662749e-07, "epoch": 4.405607835605915, "percentage": 88.11, "elapsed_time": "3:01:05", "remaining_time": "0:24:25", "throughput": 19935.94, "total_tokens": 216621248} +{"current_steps": 68825, "total_steps": 78105, "loss": 0.1037, "lr": 2.1199575652746857e-07, "epoch": 4.405927917546892, "percentage": 88.12, "elapsed_time": "3:01:06", "remaining_time": "0:24:25", "throughput": 19936.12, "total_tokens": 216636416} +{"current_steps": 68830, "total_steps": 78105, "loss": 0.1393, "lr": 2.1177067893967646e-07, "epoch": 4.406247999487869, "percentage": 88.12, "elapsed_time": "3:01:07", "remaining_time": "0:24:24", "throughput": 19936.34, "total_tokens": 216652544} +{"current_steps": 68835, "total_steps": 78105, "loss": 0.1389, "lr": 2.1154571561413762e-07, "epoch": 4.406568081428846, "percentage": 88.13, "elapsed_time": "3:01:07", "remaining_time": "0:24:23", "throughput": 19936.53, "total_tokens": 216668032} +{"current_steps": 68840, "total_steps": 78105, "loss": 0.1304, "lr": 2.1132086656208527e-07, "epoch": 4.406888163369823, "percentage": 88.14, "elapsed_time": "3:01:08", "remaining_time": "0:24:22", "throughput": 19936.72, "total_tokens": 216683136} +{"current_steps": 68845, "total_steps": 78105, "loss": 0.1055, "lr": 2.1109613179474825e-07, "epoch": 4.4072082453108, "percentage": 88.14, "elapsed_time": "3:01:09", "remaining_time": "0:24:21", "throughput": 19936.93, "total_tokens": 216699008} +{"current_steps": 68850, "total_steps": 78105, "loss": 0.1204, "lr": 2.1087151132334727e-07, "epoch": 4.407528327251777, "percentage": 88.15, "elapsed_time": "3:01:09", "remaining_time": "0:24:21", "throughput": 19937.11, "total_tokens": 216714368} +{"current_steps": 68855, "total_steps": 78105, "loss": 0.1576, "lr": 2.1064700515909975e-07, "epoch": 4.407848409192753, "percentage": 88.16, "elapsed_time": "3:01:10", "remaining_time": "0:24:20", "throughput": 19937.33, "total_tokens": 216730432} +{"current_steps": 68860, "total_steps": 78105, "loss": 0.114, "lr": 2.1042261331321616e-07, "epoch": 4.40816849113373, "percentage": 88.16, "elapsed_time": "3:01:11", "remaining_time": "0:24:19", "throughput": 19937.6, "total_tokens": 216747904} +{"current_steps": 68865, "total_steps": 78105, "loss": 0.154, "lr": 2.1019833579690063e-07, "epoch": 4.408488573074707, "percentage": 88.17, "elapsed_time": "3:01:11", "remaining_time": "0:24:18", "throughput": 19937.82, "total_tokens": 216763648} +{"current_steps": 68870, "total_steps": 78105, "loss": 0.1425, "lr": 2.099741726213539e-07, "epoch": 4.408808655015684, "percentage": 88.18, "elapsed_time": "3:01:12", "remaining_time": "0:24:17", "throughput": 19938.03, "total_tokens": 216779136} +{"current_steps": 68875, "total_steps": 78105, "loss": 0.1038, "lr": 2.097501237977681e-07, "epoch": 4.409128736956661, "percentage": 88.18, "elapsed_time": "3:01:13", "remaining_time": "0:24:17", "throughput": 19938.23, "total_tokens": 216794816} +{"current_steps": 68880, "total_steps": 78105, "loss": 0.1107, "lr": 2.095261893373321e-07, "epoch": 4.409448818897638, "percentage": 88.19, "elapsed_time": "3:01:13", "remaining_time": "0:24:16", "throughput": 19938.38, "total_tokens": 216809280} +{"current_steps": 68885, "total_steps": 78105, "loss": 0.1353, "lr": 2.0930236925122692e-07, "epoch": 4.409768900838615, "percentage": 88.2, "elapsed_time": "3:01:14", "remaining_time": "0:24:15", "throughput": 19938.55, "total_tokens": 216824128} +{"current_steps": 68890, "total_steps": 78105, "loss": 0.1449, "lr": 2.0907866355063006e-07, "epoch": 4.410088982779592, "percentage": 88.2, "elapsed_time": "3:01:15", "remaining_time": "0:24:14", "throughput": 19938.79, "total_tokens": 216840256} +{"current_steps": 68895, "total_steps": 78105, "loss": 0.121, "lr": 2.0885507224671197e-07, "epoch": 4.410409064720568, "percentage": 88.21, "elapsed_time": "3:01:16", "remaining_time": "0:24:13", "throughput": 19939.04, "total_tokens": 216857600} +{"current_steps": 68900, "total_steps": 78105, "loss": 0.1349, "lr": 2.086315953506371e-07, "epoch": 4.410729146661545, "percentage": 88.21, "elapsed_time": "3:01:16", "remaining_time": "0:24:13", "throughput": 19939.24, "total_tokens": 216873152} +{"current_steps": 68905, "total_steps": 78105, "loss": 0.1164, "lr": 2.084082328735651e-07, "epoch": 4.411049228602522, "percentage": 88.22, "elapsed_time": "3:01:17", "remaining_time": "0:24:12", "throughput": 19939.45, "total_tokens": 216889152} +{"current_steps": 68910, "total_steps": 78105, "loss": 0.1164, "lr": 2.0818498482664955e-07, "epoch": 4.411369310543499, "percentage": 88.23, "elapsed_time": "3:01:18", "remaining_time": "0:24:11", "throughput": 19939.62, "total_tokens": 216904192} +{"current_steps": 68915, "total_steps": 78105, "loss": 0.1648, "lr": 2.0796185122103797e-07, "epoch": 4.411689392484476, "percentage": 88.23, "elapsed_time": "3:01:18", "remaining_time": "0:24:10", "throughput": 19939.82, "total_tokens": 216919552} +{"current_steps": 68920, "total_steps": 78105, "loss": 0.0874, "lr": 2.0773883206787275e-07, "epoch": 4.412009474425453, "percentage": 88.24, "elapsed_time": "3:01:19", "remaining_time": "0:24:09", "throughput": 19940.01, "total_tokens": 216934720} +{"current_steps": 68925, "total_steps": 78105, "loss": 0.1486, "lr": 2.0751592737829062e-07, "epoch": 4.41232955636643, "percentage": 88.25, "elapsed_time": "3:01:20", "remaining_time": "0:24:09", "throughput": 19940.2, "total_tokens": 216950336} +{"current_steps": 68930, "total_steps": 78105, "loss": 0.1377, "lr": 2.0729313716342209e-07, "epoch": 4.412649638307407, "percentage": 88.25, "elapsed_time": "3:01:20", "remaining_time": "0:24:08", "throughput": 19940.38, "total_tokens": 216965504} +{"current_steps": 68935, "total_steps": 78105, "loss": 0.151, "lr": 2.0707046143439187e-07, "epoch": 4.412969720248384, "percentage": 88.26, "elapsed_time": "3:01:21", "remaining_time": "0:24:07", "throughput": 19940.55, "total_tokens": 216980672} +{"current_steps": 68940, "total_steps": 78105, "loss": 0.1334, "lr": 2.068479002023191e-07, "epoch": 4.41328980218936, "percentage": 88.27, "elapsed_time": "3:01:22", "remaining_time": "0:24:06", "throughput": 19940.77, "total_tokens": 216996608} +{"current_steps": 68945, "total_steps": 78105, "loss": 0.1647, "lr": 2.0662545347831798e-07, "epoch": 4.413609884130337, "percentage": 88.27, "elapsed_time": "3:01:22", "remaining_time": "0:24:05", "throughput": 19941.0, "total_tokens": 217013184} +{"current_steps": 68950, "total_steps": 78105, "loss": 0.1388, "lr": 2.0640312127349544e-07, "epoch": 4.413929966071314, "percentage": 88.28, "elapsed_time": "3:01:23", "remaining_time": "0:24:05", "throughput": 19941.19, "total_tokens": 217028544} +{"current_steps": 68955, "total_steps": 78105, "loss": 0.121, "lr": 2.0618090359895482e-07, "epoch": 4.414250048012291, "percentage": 88.29, "elapsed_time": "3:01:24", "remaining_time": "0:24:04", "throughput": 19941.43, "total_tokens": 217045120} +{"current_steps": 68960, "total_steps": 78105, "loss": 0.1209, "lr": 2.0595880046579113e-07, "epoch": 4.414570129953268, "percentage": 88.29, "elapsed_time": "3:01:24", "remaining_time": "0:24:03", "throughput": 19941.6, "total_tokens": 217060224} +{"current_steps": 68965, "total_steps": 78105, "loss": 0.1759, "lr": 2.0573681188509637e-07, "epoch": 4.414890211894245, "percentage": 88.3, "elapsed_time": "3:01:25", "remaining_time": "0:24:02", "throughput": 19941.77, "total_tokens": 217074880} +{"current_steps": 68970, "total_steps": 78105, "loss": 0.1506, "lr": 2.055149378679541e-07, "epoch": 4.415210293835222, "percentage": 88.3, "elapsed_time": "3:01:26", "remaining_time": "0:24:01", "throughput": 19941.98, "total_tokens": 217090624} +{"current_steps": 68975, "total_steps": 78105, "loss": 0.0857, "lr": 2.052931784254447e-07, "epoch": 4.415530375776199, "percentage": 88.31, "elapsed_time": "3:01:26", "remaining_time": "0:24:01", "throughput": 19942.23, "total_tokens": 217107136} +{"current_steps": 68980, "total_steps": 78105, "loss": 0.1358, "lr": 2.050715335686415e-07, "epoch": 4.415850457717176, "percentage": 88.32, "elapsed_time": "3:01:27", "remaining_time": "0:24:00", "throughput": 19942.42, "total_tokens": 217122368} +{"current_steps": 68985, "total_steps": 78105, "loss": 0.1529, "lr": 2.048500033086115e-07, "epoch": 4.416170539658152, "percentage": 88.32, "elapsed_time": "3:01:28", "remaining_time": "0:23:59", "throughput": 19942.64, "total_tokens": 217138816} +{"current_steps": 68990, "total_steps": 78105, "loss": 0.0889, "lr": 2.04628587656418e-07, "epoch": 4.416490621599129, "percentage": 88.33, "elapsed_time": "3:01:28", "remaining_time": "0:23:58", "throughput": 19942.82, "total_tokens": 217153536} +{"current_steps": 68995, "total_steps": 78105, "loss": 0.1377, "lr": 2.044072866231159e-07, "epoch": 4.416810703540106, "percentage": 88.34, "elapsed_time": "3:01:29", "remaining_time": "0:23:57", "throughput": 19943.04, "total_tokens": 217169984} +{"current_steps": 69000, "total_steps": 78105, "loss": 0.1121, "lr": 2.0418610021975732e-07, "epoch": 4.417130785481083, "percentage": 88.34, "elapsed_time": "3:01:30", "remaining_time": "0:23:57", "throughput": 19943.28, "total_tokens": 217186560} +{"current_steps": 69005, "total_steps": 78105, "loss": 0.1765, "lr": 2.0396502845738602e-07, "epoch": 4.41745086742206, "percentage": 88.35, "elapsed_time": "3:01:30", "remaining_time": "0:23:56", "throughput": 19943.55, "total_tokens": 217203776} +{"current_steps": 69010, "total_steps": 78105, "loss": 0.0829, "lr": 2.0374407134704178e-07, "epoch": 4.417770949363037, "percentage": 88.36, "elapsed_time": "3:01:31", "remaining_time": "0:23:55", "throughput": 19943.74, "total_tokens": 217219264} +{"current_steps": 69015, "total_steps": 78105, "loss": 0.0862, "lr": 2.0352322889975796e-07, "epoch": 4.418091031304014, "percentage": 88.36, "elapsed_time": "3:01:32", "remaining_time": "0:23:54", "throughput": 19943.98, "total_tokens": 217235840} +{"current_steps": 69020, "total_steps": 78105, "loss": 0.1079, "lr": 2.0330250112656215e-07, "epoch": 4.418411113244991, "percentage": 88.37, "elapsed_time": "3:01:32", "remaining_time": "0:23:53", "throughput": 19944.18, "total_tokens": 217251456} +{"current_steps": 69025, "total_steps": 78105, "loss": 0.1418, "lr": 2.0308188803847635e-07, "epoch": 4.418731195185968, "percentage": 88.37, "elapsed_time": "3:01:33", "remaining_time": "0:23:53", "throughput": 19944.35, "total_tokens": 217266240} +{"current_steps": 69030, "total_steps": 78105, "loss": 0.1094, "lr": 2.0286138964651676e-07, "epoch": 4.419051277126944, "percentage": 88.38, "elapsed_time": "3:01:34", "remaining_time": "0:23:52", "throughput": 19944.6, "total_tokens": 217282944} +{"current_steps": 69035, "total_steps": 78105, "loss": 0.1358, "lr": 2.0264100596169345e-07, "epoch": 4.419371359067921, "percentage": 88.39, "elapsed_time": "3:01:34", "remaining_time": "0:23:51", "throughput": 19944.79, "total_tokens": 217298112} +{"current_steps": 69040, "total_steps": 78105, "loss": 0.1167, "lr": 2.0242073699501236e-07, "epoch": 4.419691441008898, "percentage": 88.39, "elapsed_time": "3:01:35", "remaining_time": "0:23:50", "throughput": 19944.98, "total_tokens": 217313664} +{"current_steps": 69045, "total_steps": 78105, "loss": 0.1408, "lr": 2.0220058275747217e-07, "epoch": 4.420011522949875, "percentage": 88.4, "elapsed_time": "3:01:36", "remaining_time": "0:23:49", "throughput": 19945.16, "total_tokens": 217328320} +{"current_steps": 69050, "total_steps": 78105, "loss": 0.2011, "lr": 2.0198054326006606e-07, "epoch": 4.420331604890852, "percentage": 88.41, "elapsed_time": "3:01:36", "remaining_time": "0:23:48", "throughput": 19945.33, "total_tokens": 217343552} +{"current_steps": 69055, "total_steps": 78105, "loss": 0.1643, "lr": 2.017606185137816e-07, "epoch": 4.420651686831829, "percentage": 88.41, "elapsed_time": "3:01:37", "remaining_time": "0:23:48", "throughput": 19945.56, "total_tokens": 217359616} +{"current_steps": 69060, "total_steps": 78105, "loss": 0.1961, "lr": 2.0154080852960056e-07, "epoch": 4.420971768772806, "percentage": 88.42, "elapsed_time": "3:01:38", "remaining_time": "0:23:47", "throughput": 19945.76, "total_tokens": 217375424} +{"current_steps": 69065, "total_steps": 78105, "loss": 0.1649, "lr": 2.0132111331849947e-07, "epoch": 4.421291850713783, "percentage": 88.43, "elapsed_time": "3:01:39", "remaining_time": "0:23:46", "throughput": 19945.94, "total_tokens": 217390848} +{"current_steps": 69070, "total_steps": 78105, "loss": 0.1364, "lr": 2.0110153289144812e-07, "epoch": 4.421611932654759, "percentage": 88.43, "elapsed_time": "3:01:39", "remaining_time": "0:23:45", "throughput": 19946.19, "total_tokens": 217407616} +{"current_steps": 69075, "total_steps": 78105, "loss": 0.1023, "lr": 2.0088206725941278e-07, "epoch": 4.421932014595736, "percentage": 88.44, "elapsed_time": "3:01:40", "remaining_time": "0:23:44", "throughput": 19946.4, "total_tokens": 217423296} +{"current_steps": 69080, "total_steps": 78105, "loss": 0.111, "lr": 2.006627164333502e-07, "epoch": 4.422252096536713, "percentage": 88.45, "elapsed_time": "3:01:41", "remaining_time": "0:23:44", "throughput": 19946.58, "total_tokens": 217438656} +{"current_steps": 69085, "total_steps": 78105, "loss": 0.1101, "lr": 2.004434804242153e-07, "epoch": 4.42257217847769, "percentage": 88.45, "elapsed_time": "3:01:41", "remaining_time": "0:23:43", "throughput": 19946.82, "total_tokens": 217455040} +{"current_steps": 69090, "total_steps": 78105, "loss": 0.1491, "lr": 2.0022435924295508e-07, "epoch": 4.422892260418667, "percentage": 88.46, "elapsed_time": "3:01:42", "remaining_time": "0:23:42", "throughput": 19947.01, "total_tokens": 217470848} +{"current_steps": 69095, "total_steps": 78105, "loss": 0.1008, "lr": 2.0000535290051083e-07, "epoch": 4.423212342359644, "percentage": 88.46, "elapsed_time": "3:01:43", "remaining_time": "0:23:41", "throughput": 19947.28, "total_tokens": 217488064} +{"current_steps": 69100, "total_steps": 78105, "loss": 0.1556, "lr": 1.997864614078196e-07, "epoch": 4.423532424300621, "percentage": 88.47, "elapsed_time": "3:01:43", "remaining_time": "0:23:40", "throughput": 19947.46, "total_tokens": 217503296} +{"current_steps": 69105, "total_steps": 78105, "loss": 0.1135, "lr": 1.9956768477581044e-07, "epoch": 4.423852506241598, "percentage": 88.48, "elapsed_time": "3:01:44", "remaining_time": "0:23:40", "throughput": 19947.65, "total_tokens": 217518464} +{"current_steps": 69110, "total_steps": 78105, "loss": 0.1434, "lr": 1.9934902301540964e-07, "epoch": 4.424172588182575, "percentage": 88.48, "elapsed_time": "3:01:45", "remaining_time": "0:23:39", "throughput": 19947.85, "total_tokens": 217533952} +{"current_steps": 69115, "total_steps": 78105, "loss": 0.1169, "lr": 1.99130476137534e-07, "epoch": 4.424492670123552, "percentage": 88.49, "elapsed_time": "3:01:45", "remaining_time": "0:23:38", "throughput": 19948.02, "total_tokens": 217548992} +{"current_steps": 69120, "total_steps": 78105, "loss": 0.1238, "lr": 1.9891204415309785e-07, "epoch": 4.424812752064528, "percentage": 88.5, "elapsed_time": "3:01:46", "remaining_time": "0:23:37", "throughput": 19948.21, "total_tokens": 217564160} +{"current_steps": 69125, "total_steps": 78105, "loss": 0.1162, "lr": 1.9869372707300856e-07, "epoch": 4.425132834005505, "percentage": 88.5, "elapsed_time": "3:01:47", "remaining_time": "0:23:36", "throughput": 19948.42, "total_tokens": 217580288} +{"current_steps": 69130, "total_steps": 78105, "loss": 0.1178, "lr": 1.984755249081674e-07, "epoch": 4.425452915946482, "percentage": 88.51, "elapsed_time": "3:01:47", "remaining_time": "0:23:36", "throughput": 19948.59, "total_tokens": 217595008} +{"current_steps": 69135, "total_steps": 78105, "loss": 0.0866, "lr": 1.9825743766947014e-07, "epoch": 4.425772997887459, "percentage": 88.52, "elapsed_time": "3:01:48", "remaining_time": "0:23:35", "throughput": 19948.77, "total_tokens": 217609984} +{"current_steps": 69140, "total_steps": 78105, "loss": 0.0968, "lr": 1.980394653678072e-07, "epoch": 4.426093079828436, "percentage": 88.52, "elapsed_time": "3:01:49", "remaining_time": "0:23:34", "throughput": 19948.94, "total_tokens": 217624640} +{"current_steps": 69145, "total_steps": 78105, "loss": 0.1389, "lr": 1.978216080140624e-07, "epoch": 4.426413161769413, "percentage": 88.53, "elapsed_time": "3:01:49", "remaining_time": "0:23:33", "throughput": 19949.15, "total_tokens": 217640896} +{"current_steps": 69150, "total_steps": 78105, "loss": 0.1078, "lr": 1.9760386561911537e-07, "epoch": 4.42673324371039, "percentage": 88.53, "elapsed_time": "3:01:50", "remaining_time": "0:23:32", "throughput": 19949.35, "total_tokens": 217656512} +{"current_steps": 69155, "total_steps": 78105, "loss": 0.0876, "lr": 1.9738623819383852e-07, "epoch": 4.427053325651367, "percentage": 88.54, "elapsed_time": "3:01:51", "remaining_time": "0:23:32", "throughput": 19949.55, "total_tokens": 217671616} +{"current_steps": 69160, "total_steps": 78105, "loss": 0.1675, "lr": 1.9716872574909923e-07, "epoch": 4.427373407592343, "percentage": 88.55, "elapsed_time": "3:01:51", "remaining_time": "0:23:31", "throughput": 19949.73, "total_tokens": 217686656} +{"current_steps": 69165, "total_steps": 78105, "loss": 0.1062, "lr": 1.9695132829575857e-07, "epoch": 4.42769348953332, "percentage": 88.55, "elapsed_time": "3:01:52", "remaining_time": "0:23:30", "throughput": 19949.97, "total_tokens": 217703296} +{"current_steps": 69170, "total_steps": 78105, "loss": 0.128, "lr": 1.9673404584467258e-07, "epoch": 4.428013571474297, "percentage": 88.56, "elapsed_time": "3:01:53", "remaining_time": "0:23:29", "throughput": 19950.14, "total_tokens": 217718016} +{"current_steps": 69175, "total_steps": 78105, "loss": 0.12, "lr": 1.9651687840669092e-07, "epoch": 4.428333653415274, "percentage": 88.57, "elapsed_time": "3:01:53", "remaining_time": "0:23:28", "throughput": 19950.31, "total_tokens": 217732992} +{"current_steps": 69180, "total_steps": 78105, "loss": 0.1107, "lr": 1.962998259926577e-07, "epoch": 4.428653735356251, "percentage": 88.57, "elapsed_time": "3:01:54", "remaining_time": "0:23:28", "throughput": 19950.49, "total_tokens": 217748096} +{"current_steps": 69185, "total_steps": 78105, "loss": 0.1051, "lr": 1.9608288861341225e-07, "epoch": 4.428973817297228, "percentage": 88.58, "elapsed_time": "3:01:55", "remaining_time": "0:23:27", "throughput": 19950.7, "total_tokens": 217764096} +{"current_steps": 69190, "total_steps": 78105, "loss": 0.1685, "lr": 1.9586606627978593e-07, "epoch": 4.429293899238205, "percentage": 88.59, "elapsed_time": "3:01:55", "remaining_time": "0:23:26", "throughput": 19950.94, "total_tokens": 217780608} +{"current_steps": 69195, "total_steps": 78105, "loss": 0.0954, "lr": 1.9564935900260734e-07, "epoch": 4.429613981179182, "percentage": 88.59, "elapsed_time": "3:01:56", "remaining_time": "0:23:25", "throughput": 19951.15, "total_tokens": 217796352} +{"current_steps": 69200, "total_steps": 78105, "loss": 0.0962, "lr": 1.9543276679269613e-07, "epoch": 4.429934063120159, "percentage": 88.6, "elapsed_time": "3:01:57", "remaining_time": "0:23:24", "throughput": 19951.31, "total_tokens": 217811136} +{"current_steps": 69205, "total_steps": 78105, "loss": 0.1146, "lr": 1.9521628966086864e-07, "epoch": 4.430254145061135, "percentage": 88.61, "elapsed_time": "3:01:57", "remaining_time": "0:23:24", "throughput": 19951.53, "total_tokens": 217827456} +{"current_steps": 69210, "total_steps": 78105, "loss": 0.1101, "lr": 1.9499992761793458e-07, "epoch": 4.430574227002112, "percentage": 88.61, "elapsed_time": "3:01:58", "remaining_time": "0:23:23", "throughput": 19951.74, "total_tokens": 217843264} +{"current_steps": 69215, "total_steps": 78105, "loss": 0.1223, "lr": 1.9478368067469749e-07, "epoch": 4.430894308943089, "percentage": 88.62, "elapsed_time": "3:01:59", "remaining_time": "0:23:22", "throughput": 19951.92, "total_tokens": 217858432} +{"current_steps": 69220, "total_steps": 78105, "loss": 0.126, "lr": 1.9456754884195655e-07, "epoch": 4.431214390884066, "percentage": 88.62, "elapsed_time": "3:01:59", "remaining_time": "0:23:21", "throughput": 19952.13, "total_tokens": 217874112} +{"current_steps": 69225, "total_steps": 78105, "loss": 0.1397, "lr": 1.9435153213050312e-07, "epoch": 4.431534472825043, "percentage": 88.63, "elapsed_time": "3:02:00", "remaining_time": "0:23:20", "throughput": 19952.31, "total_tokens": 217889344} +{"current_steps": 69230, "total_steps": 78105, "loss": 0.0912, "lr": 1.9413563055112465e-07, "epoch": 4.43185455476602, "percentage": 88.64, "elapsed_time": "3:02:01", "remaining_time": "0:23:20", "throughput": 19952.49, "total_tokens": 217904576} +{"current_steps": 69235, "total_steps": 78105, "loss": 0.1608, "lr": 1.939198441146023e-07, "epoch": 4.432174636706997, "percentage": 88.64, "elapsed_time": "3:02:01", "remaining_time": "0:23:19", "throughput": 19952.65, "total_tokens": 217919232} +{"current_steps": 69240, "total_steps": 78105, "loss": 0.1051, "lr": 1.9370417283171072e-07, "epoch": 4.432494718647974, "percentage": 88.65, "elapsed_time": "3:02:02", "remaining_time": "0:23:18", "throughput": 19952.83, "total_tokens": 217934144} +{"current_steps": 69245, "total_steps": 78105, "loss": 0.1495, "lr": 1.9348861671321966e-07, "epoch": 4.432814800588951, "percentage": 88.66, "elapsed_time": "3:02:03", "remaining_time": "0:23:17", "throughput": 19953.0, "total_tokens": 217949056} +{"current_steps": 69250, "total_steps": 78105, "loss": 0.1505, "lr": 1.932731757698933e-07, "epoch": 4.433134882529927, "percentage": 88.66, "elapsed_time": "3:02:03", "remaining_time": "0:23:16", "throughput": 19953.26, "total_tokens": 217966336} +{"current_steps": 69255, "total_steps": 78105, "loss": 0.1432, "lr": 1.9305785001248883e-07, "epoch": 4.433454964470904, "percentage": 88.67, "elapsed_time": "3:02:04", "remaining_time": "0:23:16", "throughput": 19953.46, "total_tokens": 217981824} +{"current_steps": 69260, "total_steps": 78105, "loss": 0.1343, "lr": 1.9284263945175908e-07, "epoch": 4.433775046411881, "percentage": 88.68, "elapsed_time": "3:02:05", "remaining_time": "0:23:15", "throughput": 19953.65, "total_tokens": 217997632} +{"current_steps": 69265, "total_steps": 78105, "loss": 0.1941, "lr": 1.9262754409844985e-07, "epoch": 4.434095128352858, "percentage": 88.68, "elapsed_time": "3:02:05", "remaining_time": "0:23:14", "throughput": 19953.83, "total_tokens": 218012416} +{"current_steps": 69270, "total_steps": 78105, "loss": 0.1536, "lr": 1.9241256396330288e-07, "epoch": 4.434415210293835, "percentage": 88.69, "elapsed_time": "3:02:06", "remaining_time": "0:23:13", "throughput": 19954.04, "total_tokens": 218027904} +{"current_steps": 69275, "total_steps": 78105, "loss": 0.1174, "lr": 1.921976990570529e-07, "epoch": 4.434735292234812, "percentage": 88.69, "elapsed_time": "3:02:07", "remaining_time": "0:23:12", "throughput": 19954.24, "total_tokens": 218043840} +{"current_steps": 69280, "total_steps": 78105, "loss": 0.1416, "lr": 1.919829493904285e-07, "epoch": 4.435055374175789, "percentage": 88.7, "elapsed_time": "3:02:07", "remaining_time": "0:23:12", "throughput": 19954.43, "total_tokens": 218059520} +{"current_steps": 69285, "total_steps": 78105, "loss": 0.1487, "lr": 1.9176831497415393e-07, "epoch": 4.435375456116766, "percentage": 88.71, "elapsed_time": "3:02:08", "remaining_time": "0:23:11", "throughput": 19954.61, "total_tokens": 218074880} +{"current_steps": 69290, "total_steps": 78105, "loss": 0.1598, "lr": 1.915537958189459e-07, "epoch": 4.435695538057743, "percentage": 88.71, "elapsed_time": "3:02:09", "remaining_time": "0:23:10", "throughput": 19954.85, "total_tokens": 218091648} +{"current_steps": 69295, "total_steps": 78105, "loss": 0.1524, "lr": 1.9133939193551804e-07, "epoch": 4.436015619998719, "percentage": 88.72, "elapsed_time": "3:02:09", "remaining_time": "0:23:09", "throughput": 19955.05, "total_tokens": 218107456} +{"current_steps": 69300, "total_steps": 78105, "loss": 0.1667, "lr": 1.9112510333457485e-07, "epoch": 4.436335701939696, "percentage": 88.73, "elapsed_time": "3:02:10", "remaining_time": "0:23:08", "throughput": 19955.3, "total_tokens": 218124416} +{"current_steps": 69305, "total_steps": 78105, "loss": 0.1479, "lr": 1.9091093002681803e-07, "epoch": 4.436655783880673, "percentage": 88.73, "elapsed_time": "3:02:11", "remaining_time": "0:23:08", "throughput": 19955.51, "total_tokens": 218140224} +{"current_steps": 69310, "total_steps": 78105, "loss": 0.1211, "lr": 1.906968720229413e-07, "epoch": 4.43697586582165, "percentage": 88.74, "elapsed_time": "3:02:12", "remaining_time": "0:23:07", "throughput": 19955.76, "total_tokens": 218157376} +{"current_steps": 69315, "total_steps": 78105, "loss": 0.1096, "lr": 1.904829293336341e-07, "epoch": 4.437295947762627, "percentage": 88.75, "elapsed_time": "3:02:12", "remaining_time": "0:23:06", "throughput": 19955.93, "total_tokens": 218172288} +{"current_steps": 69320, "total_steps": 78105, "loss": 0.1438, "lr": 1.9026910196957986e-07, "epoch": 4.437616029703604, "percentage": 88.75, "elapsed_time": "3:02:13", "remaining_time": "0:23:05", "throughput": 19956.13, "total_tokens": 218187904} +{"current_steps": 69325, "total_steps": 78105, "loss": 0.1146, "lr": 1.9005538994145507e-07, "epoch": 4.437936111644581, "percentage": 88.76, "elapsed_time": "3:02:14", "remaining_time": "0:23:04", "throughput": 19956.34, "total_tokens": 218203968} +{"current_steps": 69330, "total_steps": 78105, "loss": 0.1408, "lr": 1.8984179325993308e-07, "epoch": 4.438256193585558, "percentage": 88.77, "elapsed_time": "3:02:14", "remaining_time": "0:23:03", "throughput": 19956.53, "total_tokens": 218219776} +{"current_steps": 69335, "total_steps": 78105, "loss": 0.1296, "lr": 1.8962831193567788e-07, "epoch": 4.438576275526534, "percentage": 88.77, "elapsed_time": "3:02:15", "remaining_time": "0:23:03", "throughput": 19956.71, "total_tokens": 218234880} +{"current_steps": 69340, "total_steps": 78105, "loss": 0.1389, "lr": 1.8941494597935124e-07, "epoch": 4.438896357467511, "percentage": 88.78, "elapsed_time": "3:02:16", "remaining_time": "0:23:02", "throughput": 19956.96, "total_tokens": 218251968} +{"current_steps": 69345, "total_steps": 78105, "loss": 0.1744, "lr": 1.89201695401606e-07, "epoch": 4.439216439408488, "percentage": 88.78, "elapsed_time": "3:02:16", "remaining_time": "0:23:01", "throughput": 19957.14, "total_tokens": 218266816} +{"current_steps": 69350, "total_steps": 78105, "loss": 0.1143, "lr": 1.88988560213092e-07, "epoch": 4.439536521349465, "percentage": 88.79, "elapsed_time": "3:02:17", "remaining_time": "0:23:00", "throughput": 19957.41, "total_tokens": 218284160} +{"current_steps": 69355, "total_steps": 78105, "loss": 0.1526, "lr": 1.8877554042445178e-07, "epoch": 4.439856603290442, "percentage": 88.8, "elapsed_time": "3:02:18", "remaining_time": "0:22:59", "throughput": 19957.61, "total_tokens": 218299712} +{"current_steps": 69360, "total_steps": 78105, "loss": 0.1798, "lr": 1.885626360463222e-07, "epoch": 4.440176685231419, "percentage": 88.8, "elapsed_time": "3:02:18", "remaining_time": "0:22:59", "throughput": 19957.86, "total_tokens": 218316544} +{"current_steps": 69365, "total_steps": 78105, "loss": 0.1062, "lr": 1.8834984708933468e-07, "epoch": 4.440496767172396, "percentage": 88.81, "elapsed_time": "3:02:19", "remaining_time": "0:22:58", "throughput": 19958.03, "total_tokens": 218331200} +{"current_steps": 69370, "total_steps": 78105, "loss": 0.1513, "lr": 1.8813717356411492e-07, "epoch": 4.440816849113373, "percentage": 88.82, "elapsed_time": "3:02:20", "remaining_time": "0:22:57", "throughput": 19958.21, "total_tokens": 218346368} +{"current_steps": 69375, "total_steps": 78105, "loss": 0.1303, "lr": 1.879246154812825e-07, "epoch": 4.44113693105435, "percentage": 88.82, "elapsed_time": "3:02:20", "remaining_time": "0:22:56", "throughput": 19958.39, "total_tokens": 218361664} +{"current_steps": 69380, "total_steps": 78105, "loss": 0.1369, "lr": 1.877121728514522e-07, "epoch": 4.441457012995327, "percentage": 88.83, "elapsed_time": "3:02:21", "remaining_time": "0:22:55", "throughput": 19958.61, "total_tokens": 218377920} +{"current_steps": 69385, "total_steps": 78105, "loss": 0.1277, "lr": 1.8749984568523089e-07, "epoch": 4.441777094936303, "percentage": 88.84, "elapsed_time": "3:02:22", "remaining_time": "0:22:55", "throughput": 19958.8, "total_tokens": 218393280} +{"current_steps": 69390, "total_steps": 78105, "loss": 0.0977, "lr": 1.8728763399322252e-07, "epoch": 4.44209717687728, "percentage": 88.84, "elapsed_time": "3:02:22", "remaining_time": "0:22:54", "throughput": 19958.95, "total_tokens": 218407936} +{"current_steps": 69395, "total_steps": 78105, "loss": 0.1393, "lr": 1.8707553778602282e-07, "epoch": 4.442417258818257, "percentage": 88.85, "elapsed_time": "3:02:23", "remaining_time": "0:22:53", "throughput": 19959.12, "total_tokens": 218422976} +{"current_steps": 69400, "total_steps": 78105, "loss": 0.1009, "lr": 1.868635570742236e-07, "epoch": 4.442737340759234, "percentage": 88.85, "elapsed_time": "3:02:24", "remaining_time": "0:22:52", "throughput": 19959.32, "total_tokens": 218438656} +{"current_steps": 69405, "total_steps": 78105, "loss": 0.1584, "lr": 1.8665169186840943e-07, "epoch": 4.443057422700211, "percentage": 88.86, "elapsed_time": "3:02:24", "remaining_time": "0:22:51", "throughput": 19959.52, "total_tokens": 218454080} +{"current_steps": 69410, "total_steps": 78105, "loss": 0.1521, "lr": 1.8643994217915961e-07, "epoch": 4.443377504641188, "percentage": 88.87, "elapsed_time": "3:02:25", "remaining_time": "0:22:51", "throughput": 19959.68, "total_tokens": 218469056} +{"current_steps": 69415, "total_steps": 78105, "loss": 0.1504, "lr": 1.8622830801704905e-07, "epoch": 4.443697586582165, "percentage": 88.87, "elapsed_time": "3:02:26", "remaining_time": "0:22:50", "throughput": 19959.84, "total_tokens": 218483776} +{"current_steps": 69420, "total_steps": 78105, "loss": 0.1207, "lr": 1.86016789392644e-07, "epoch": 4.444017668523142, "percentage": 88.88, "elapsed_time": "3:02:26", "remaining_time": "0:22:49", "throughput": 19960.03, "total_tokens": 218499136} +{"current_steps": 69425, "total_steps": 78105, "loss": 0.128, "lr": 1.858053863165077e-07, "epoch": 4.444337750464118, "percentage": 88.89, "elapsed_time": "3:02:27", "remaining_time": "0:22:48", "throughput": 19960.17, "total_tokens": 218513088} +{"current_steps": 69430, "total_steps": 78105, "loss": 0.1121, "lr": 1.8559409879919636e-07, "epoch": 4.444657832405095, "percentage": 88.89, "elapsed_time": "3:02:28", "remaining_time": "0:22:47", "throughput": 19960.35, "total_tokens": 218528384} +{"current_steps": 69435, "total_steps": 78105, "loss": 0.1311, "lr": 1.8538292685126026e-07, "epoch": 4.444977914346072, "percentage": 88.9, "elapsed_time": "3:02:28", "remaining_time": "0:22:47", "throughput": 19960.51, "total_tokens": 218543040} +{"current_steps": 69440, "total_steps": 78105, "loss": 0.1148, "lr": 1.8517187048324446e-07, "epoch": 4.445297996287049, "percentage": 88.91, "elapsed_time": "3:02:29", "remaining_time": "0:22:46", "throughput": 19960.7, "total_tokens": 218558400} +{"current_steps": 69445, "total_steps": 78105, "loss": 0.0942, "lr": 1.8496092970568757e-07, "epoch": 4.445618078228026, "percentage": 88.91, "elapsed_time": "3:02:30", "remaining_time": "0:22:45", "throughput": 19960.94, "total_tokens": 218575232} +{"current_steps": 69450, "total_steps": 78105, "loss": 0.11, "lr": 1.847501045291239e-07, "epoch": 4.445938160169003, "percentage": 88.92, "elapsed_time": "3:02:30", "remaining_time": "0:22:44", "throughput": 19961.12, "total_tokens": 218590208} +{"current_steps": 69455, "total_steps": 78105, "loss": 0.1019, "lr": 1.8453939496407975e-07, "epoch": 4.44625824210998, "percentage": 88.93, "elapsed_time": "3:02:31", "remaining_time": "0:22:43", "throughput": 19961.34, "total_tokens": 218606272} +{"current_steps": 69460, "total_steps": 78105, "loss": 0.1144, "lr": 1.8432880102107754e-07, "epoch": 4.446578324050957, "percentage": 88.93, "elapsed_time": "3:02:32", "remaining_time": "0:22:43", "throughput": 19961.52, "total_tokens": 218621376} +{"current_steps": 69465, "total_steps": 78105, "loss": 0.141, "lr": 1.8411832271063328e-07, "epoch": 4.446898405991934, "percentage": 88.94, "elapsed_time": "3:02:32", "remaining_time": "0:22:42", "throughput": 19961.71, "total_tokens": 218636416} +{"current_steps": 69470, "total_steps": 78105, "loss": 0.1628, "lr": 1.8390796004325696e-07, "epoch": 4.44721848793291, "percentage": 88.94, "elapsed_time": "3:02:33", "remaining_time": "0:22:41", "throughput": 19961.96, "total_tokens": 218653440} +{"current_steps": 69475, "total_steps": 78105, "loss": 0.1397, "lr": 1.8369771302945316e-07, "epoch": 4.447538569873887, "percentage": 88.95, "elapsed_time": "3:02:34", "remaining_time": "0:22:40", "throughput": 19962.15, "total_tokens": 218668800} +{"current_steps": 69480, "total_steps": 78105, "loss": 0.1364, "lr": 1.834875816797202e-07, "epoch": 4.447858651814864, "percentage": 88.96, "elapsed_time": "3:02:34", "remaining_time": "0:22:39", "throughput": 19962.3, "total_tokens": 218683328} +{"current_steps": 69485, "total_steps": 78105, "loss": 0.1205, "lr": 1.8327756600455132e-07, "epoch": 4.448178733755841, "percentage": 88.96, "elapsed_time": "3:02:35", "remaining_time": "0:22:39", "throughput": 19962.5, "total_tokens": 218699328} +{"current_steps": 69490, "total_steps": 78105, "loss": 0.1402, "lr": 1.8306766601443343e-07, "epoch": 4.448498815696818, "percentage": 88.97, "elapsed_time": "3:02:36", "remaining_time": "0:22:38", "throughput": 19962.67, "total_tokens": 218714304} +{"current_steps": 69495, "total_steps": 78105, "loss": 0.1515, "lr": 1.828578817198473e-07, "epoch": 4.448818897637795, "percentage": 88.98, "elapsed_time": "3:02:36", "remaining_time": "0:22:37", "throughput": 19962.86, "total_tokens": 218729664} +{"current_steps": 69500, "total_steps": 78105, "loss": 0.1253, "lr": 1.8264821313126958e-07, "epoch": 4.449138979578772, "percentage": 88.98, "elapsed_time": "3:02:37", "remaining_time": "0:22:36", "throughput": 19963.06, "total_tokens": 218745600} +{"current_steps": 69505, "total_steps": 78105, "loss": 0.1192, "lr": 1.8243866025916907e-07, "epoch": 4.449459061519749, "percentage": 88.99, "elapsed_time": "3:02:38", "remaining_time": "0:22:35", "throughput": 19963.3, "total_tokens": 218762304} +{"current_steps": 69510, "total_steps": 78105, "loss": 0.153, "lr": 1.8222922311401047e-07, "epoch": 4.449779143460726, "percentage": 89.0, "elapsed_time": "3:02:38", "remaining_time": "0:22:35", "throughput": 19963.45, "total_tokens": 218776960} +{"current_steps": 69515, "total_steps": 78105, "loss": 0.1357, "lr": 1.8201990170625157e-07, "epoch": 4.450099225401702, "percentage": 89.0, "elapsed_time": "3:02:39", "remaining_time": "0:22:34", "throughput": 19963.68, "total_tokens": 218793280} +{"current_steps": 69520, "total_steps": 78105, "loss": 0.0949, "lr": 1.818106960463445e-07, "epoch": 4.450419307342679, "percentage": 89.01, "elapsed_time": "3:02:40", "remaining_time": "0:22:33", "throughput": 19963.92, "total_tokens": 218810240} +{"current_steps": 69525, "total_steps": 78105, "loss": 0.1111, "lr": 1.8160160614473703e-07, "epoch": 4.450739389283656, "percentage": 89.01, "elapsed_time": "3:02:40", "remaining_time": "0:22:32", "throughput": 19964.11, "total_tokens": 218825792} +{"current_steps": 69530, "total_steps": 78105, "loss": 0.1376, "lr": 1.8139263201186836e-07, "epoch": 4.451059471224633, "percentage": 89.02, "elapsed_time": "3:02:41", "remaining_time": "0:22:31", "throughput": 19964.33, "total_tokens": 218842176} +{"current_steps": 69535, "total_steps": 78105, "loss": 0.1484, "lr": 1.8118377365817534e-07, "epoch": 4.45137955316561, "percentage": 89.03, "elapsed_time": "3:02:42", "remaining_time": "0:22:31", "throughput": 19964.51, "total_tokens": 218857344} +{"current_steps": 69540, "total_steps": 78105, "loss": 0.1222, "lr": 1.8097503109408553e-07, "epoch": 4.451699635106587, "percentage": 89.03, "elapsed_time": "3:02:42", "remaining_time": "0:22:30", "throughput": 19964.71, "total_tokens": 218872832} +{"current_steps": 69545, "total_steps": 78105, "loss": 0.1965, "lr": 1.807664043300239e-07, "epoch": 4.452019717047564, "percentage": 89.04, "elapsed_time": "3:02:43", "remaining_time": "0:22:29", "throughput": 19964.95, "total_tokens": 218889536} +{"current_steps": 69550, "total_steps": 78105, "loss": 0.1365, "lr": 1.805578933764074e-07, "epoch": 4.452339798988541, "percentage": 89.05, "elapsed_time": "3:02:44", "remaining_time": "0:22:28", "throughput": 19965.13, "total_tokens": 218904640} +{"current_steps": 69555, "total_steps": 78105, "loss": 0.1715, "lr": 1.8034949824364824e-07, "epoch": 4.452659880929518, "percentage": 89.05, "elapsed_time": "3:02:45", "remaining_time": "0:22:27", "throughput": 19965.33, "total_tokens": 218919872} +{"current_steps": 69560, "total_steps": 78105, "loss": 0.1422, "lr": 1.8014121894215252e-07, "epoch": 4.4529799628704945, "percentage": 89.06, "elapsed_time": "3:02:45", "remaining_time": "0:22:27", "throughput": 19965.52, "total_tokens": 218935424} +{"current_steps": 69565, "total_steps": 78105, "loss": 0.0803, "lr": 1.7993305548232003e-07, "epoch": 4.4533000448114715, "percentage": 89.07, "elapsed_time": "3:02:46", "remaining_time": "0:22:26", "throughput": 19965.73, "total_tokens": 218951040} +{"current_steps": 69570, "total_steps": 78105, "loss": 0.2246, "lr": 1.7972500787454717e-07, "epoch": 4.4536201267524484, "percentage": 89.07, "elapsed_time": "3:02:47", "remaining_time": "0:22:25", "throughput": 19965.96, "total_tokens": 218967680} +{"current_steps": 69575, "total_steps": 78105, "loss": 0.12, "lr": 1.7951707612922032e-07, "epoch": 4.453940208693425, "percentage": 89.08, "elapsed_time": "3:02:47", "remaining_time": "0:22:24", "throughput": 19966.14, "total_tokens": 218983104} +{"current_steps": 69580, "total_steps": 78105, "loss": 0.1104, "lr": 1.7930926025672451e-07, "epoch": 4.454260290634402, "percentage": 89.09, "elapsed_time": "3:02:48", "remaining_time": "0:22:23", "throughput": 19966.27, "total_tokens": 218997248} +{"current_steps": 69585, "total_steps": 78105, "loss": 0.1288, "lr": 1.791015602674359e-07, "epoch": 4.454580372575379, "percentage": 89.09, "elapsed_time": "3:02:49", "remaining_time": "0:22:23", "throughput": 19966.46, "total_tokens": 219012608} +{"current_steps": 69590, "total_steps": 78105, "loss": 0.1238, "lr": 1.7889397617172649e-07, "epoch": 4.454900454516356, "percentage": 89.1, "elapsed_time": "3:02:49", "remaining_time": "0:22:22", "throughput": 19966.6, "total_tokens": 219027072} +{"current_steps": 69595, "total_steps": 78105, "loss": 0.1223, "lr": 1.7868650797996156e-07, "epoch": 4.455220536457333, "percentage": 89.1, "elapsed_time": "3:02:50", "remaining_time": "0:22:21", "throughput": 19966.84, "total_tokens": 219043968} +{"current_steps": 69600, "total_steps": 78105, "loss": 0.1511, "lr": 1.7847915570250117e-07, "epoch": 4.4555406183983095, "percentage": 89.11, "elapsed_time": "3:02:51", "remaining_time": "0:22:20", "throughput": 19967.02, "total_tokens": 219059200} +{"current_steps": 69605, "total_steps": 78105, "loss": 0.1452, "lr": 1.78271919349699e-07, "epoch": 4.4558607003392865, "percentage": 89.12, "elapsed_time": "3:02:51", "remaining_time": "0:22:19", "throughput": 19967.19, "total_tokens": 219074368} +{"current_steps": 69610, "total_steps": 78105, "loss": 0.1056, "lr": 1.7806479893190455e-07, "epoch": 4.4561807822802635, "percentage": 89.12, "elapsed_time": "3:02:52", "remaining_time": "0:22:19", "throughput": 19967.41, "total_tokens": 219090176} +{"current_steps": 69615, "total_steps": 78105, "loss": 0.0991, "lr": 1.7785779445945867e-07, "epoch": 4.4565008642212405, "percentage": 89.13, "elapsed_time": "3:02:53", "remaining_time": "0:22:18", "throughput": 19967.61, "total_tokens": 219106240} +{"current_steps": 69620, "total_steps": 78105, "loss": 0.1892, "lr": 1.7765090594269924e-07, "epoch": 4.4568209461622175, "percentage": 89.14, "elapsed_time": "3:02:53", "remaining_time": "0:22:17", "throughput": 19967.83, "total_tokens": 219122240} +{"current_steps": 69625, "total_steps": 78105, "loss": 0.1367, "lr": 1.7744413339195715e-07, "epoch": 4.4571410281031945, "percentage": 89.14, "elapsed_time": "3:02:54", "remaining_time": "0:22:16", "throughput": 19968.02, "total_tokens": 219137152} +{"current_steps": 69630, "total_steps": 78105, "loss": 0.1387, "lr": 1.772374768175572e-07, "epoch": 4.4574611100441714, "percentage": 89.15, "elapsed_time": "3:02:55", "remaining_time": "0:22:15", "throughput": 19968.18, "total_tokens": 219152128} +{"current_steps": 69635, "total_steps": 78105, "loss": 0.1311, "lr": 1.7703093622981865e-07, "epoch": 4.457781191985148, "percentage": 89.16, "elapsed_time": "3:02:55", "remaining_time": "0:22:15", "throughput": 19968.36, "total_tokens": 219167552} +{"current_steps": 69640, "total_steps": 78105, "loss": 0.1422, "lr": 1.7682451163905517e-07, "epoch": 4.458101273926125, "percentage": 89.16, "elapsed_time": "3:02:56", "remaining_time": "0:22:14", "throughput": 19968.6, "total_tokens": 219184576} +{"current_steps": 69645, "total_steps": 78105, "loss": 0.1922, "lr": 1.766182030555752e-07, "epoch": 4.458421355867102, "percentage": 89.17, "elapsed_time": "3:02:57", "remaining_time": "0:22:13", "throughput": 19968.83, "total_tokens": 219201024} +{"current_steps": 69650, "total_steps": 78105, "loss": 0.1521, "lr": 1.764120104896791e-07, "epoch": 4.4587414378080785, "percentage": 89.17, "elapsed_time": "3:02:57", "remaining_time": "0:22:12", "throughput": 19969.05, "total_tokens": 219217216} +{"current_steps": 69655, "total_steps": 78105, "loss": 0.0957, "lr": 1.7620593395166474e-07, "epoch": 4.4590615197490555, "percentage": 89.18, "elapsed_time": "3:02:58", "remaining_time": "0:22:11", "throughput": 19969.3, "total_tokens": 219234240} +{"current_steps": 69660, "total_steps": 78105, "loss": 0.1359, "lr": 1.7599997345182195e-07, "epoch": 4.4593816016900325, "percentage": 89.19, "elapsed_time": "3:02:59", "remaining_time": "0:22:11", "throughput": 19969.5, "total_tokens": 219250112} +{"current_steps": 69665, "total_steps": 78105, "loss": 0.129, "lr": 1.7579412900043501e-07, "epoch": 4.4597016836310095, "percentage": 89.19, "elapsed_time": "3:02:59", "remaining_time": "0:22:10", "throughput": 19969.66, "total_tokens": 219264896} +{"current_steps": 69670, "total_steps": 78105, "loss": 0.1221, "lr": 1.7558840060778292e-07, "epoch": 4.4600217655719865, "percentage": 89.2, "elapsed_time": "3:03:00", "remaining_time": "0:22:09", "throughput": 19969.86, "total_tokens": 219280384} +{"current_steps": 69675, "total_steps": 78105, "loss": 0.1147, "lr": 1.753827882841383e-07, "epoch": 4.4603418475129635, "percentage": 89.21, "elapsed_time": "3:03:01", "remaining_time": "0:22:08", "throughput": 19970.09, "total_tokens": 219296576} +{"current_steps": 69680, "total_steps": 78105, "loss": 0.1177, "lr": 1.7517729203976958e-07, "epoch": 4.4606619294539405, "percentage": 89.21, "elapsed_time": "3:03:01", "remaining_time": "0:22:07", "throughput": 19970.26, "total_tokens": 219311232} +{"current_steps": 69685, "total_steps": 78105, "loss": 0.1277, "lr": 1.7497191188493662e-07, "epoch": 4.4609820113949175, "percentage": 89.22, "elapsed_time": "3:03:02", "remaining_time": "0:22:07", "throughput": 19970.43, "total_tokens": 219326592} +{"current_steps": 69690, "total_steps": 78105, "loss": 0.124, "lr": 1.7476664782989594e-07, "epoch": 4.461302093335894, "percentage": 89.23, "elapsed_time": "3:03:03", "remaining_time": "0:22:06", "throughput": 19970.63, "total_tokens": 219342272} +{"current_steps": 69695, "total_steps": 78105, "loss": 0.1427, "lr": 1.745614998848974e-07, "epoch": 4.4616221752768705, "percentage": 89.23, "elapsed_time": "3:03:03", "remaining_time": "0:22:05", "throughput": 19970.84, "total_tokens": 219357952} +{"current_steps": 69700, "total_steps": 78105, "loss": 0.1702, "lr": 1.7435646806018448e-07, "epoch": 4.4619422572178475, "percentage": 89.24, "elapsed_time": "3:03:04", "remaining_time": "0:22:04", "throughput": 19971.07, "total_tokens": 219374720} +{"current_steps": 69705, "total_steps": 78105, "loss": 0.1301, "lr": 1.741515523659959e-07, "epoch": 4.4622623391588245, "percentage": 89.25, "elapsed_time": "3:03:05", "remaining_time": "0:22:03", "throughput": 19971.27, "total_tokens": 219390400} +{"current_steps": 69710, "total_steps": 78105, "loss": 0.1124, "lr": 1.7394675281256407e-07, "epoch": 4.4625824210998015, "percentage": 89.25, "elapsed_time": "3:03:05", "remaining_time": "0:22:03", "throughput": 19971.5, "total_tokens": 219406656} +{"current_steps": 69715, "total_steps": 78105, "loss": 0.1343, "lr": 1.7374206941011523e-07, "epoch": 4.4629025030407785, "percentage": 89.26, "elapsed_time": "3:03:06", "remaining_time": "0:22:02", "throughput": 19971.69, "total_tokens": 219421824} +{"current_steps": 69720, "total_steps": 78105, "loss": 0.119, "lr": 1.7353750216887039e-07, "epoch": 4.4632225849817555, "percentage": 89.26, "elapsed_time": "3:03:07", "remaining_time": "0:22:01", "throughput": 19971.89, "total_tokens": 219437440} +{"current_steps": 69725, "total_steps": 78105, "loss": 0.1894, "lr": 1.7333305109904413e-07, "epoch": 4.4635426669227325, "percentage": 89.27, "elapsed_time": "3:03:07", "remaining_time": "0:22:00", "throughput": 19972.06, "total_tokens": 219452224} +{"current_steps": 69730, "total_steps": 78105, "loss": 0.1614, "lr": 1.731287162108472e-07, "epoch": 4.4638627488637095, "percentage": 89.28, "elapsed_time": "3:03:08", "remaining_time": "0:21:59", "throughput": 19972.21, "total_tokens": 219466880} +{"current_steps": 69735, "total_steps": 78105, "loss": 0.1149, "lr": 1.7292449751448087e-07, "epoch": 4.464182830804686, "percentage": 89.28, "elapsed_time": "3:03:09", "remaining_time": "0:21:58", "throughput": 19972.4, "total_tokens": 219482496} +{"current_steps": 69740, "total_steps": 78105, "loss": 0.1285, "lr": 1.7272039502014449e-07, "epoch": 4.464502912745663, "percentage": 89.29, "elapsed_time": "3:03:09", "remaining_time": "0:21:58", "throughput": 19972.61, "total_tokens": 219498368} +{"current_steps": 69745, "total_steps": 78105, "loss": 0.1362, "lr": 1.7251640873802905e-07, "epoch": 4.46482299468664, "percentage": 89.3, "elapsed_time": "3:03:10", "remaining_time": "0:21:57", "throughput": 19972.78, "total_tokens": 219513408} +{"current_steps": 69750, "total_steps": 78105, "loss": 0.1121, "lr": 1.7231253867832032e-07, "epoch": 4.465143076627617, "percentage": 89.3, "elapsed_time": "3:03:11", "remaining_time": "0:21:56", "throughput": 19972.97, "total_tokens": 219529088} +{"current_steps": 69755, "total_steps": 78105, "loss": 0.1074, "lr": 1.7210878485119985e-07, "epoch": 4.4654631585685935, "percentage": 89.31, "elapsed_time": "3:03:11", "remaining_time": "0:21:55", "throughput": 19973.16, "total_tokens": 219544576} +{"current_steps": 69760, "total_steps": 78105, "loss": 0.1674, "lr": 1.7190514726684037e-07, "epoch": 4.4657832405095705, "percentage": 89.32, "elapsed_time": "3:03:12", "remaining_time": "0:21:54", "throughput": 19973.34, "total_tokens": 219560064} +{"current_steps": 69765, "total_steps": 78105, "loss": 0.1022, "lr": 1.7170162593541178e-07, "epoch": 4.4661033224505475, "percentage": 89.32, "elapsed_time": "3:03:13", "remaining_time": "0:21:54", "throughput": 19973.52, "total_tokens": 219575424} +{"current_steps": 69770, "total_steps": 78105, "loss": 0.1462, "lr": 1.7149822086707567e-07, "epoch": 4.4664234043915245, "percentage": 89.33, "elapsed_time": "3:03:14", "remaining_time": "0:21:53", "throughput": 19973.77, "total_tokens": 219592256} +{"current_steps": 69775, "total_steps": 78105, "loss": 0.097, "lr": 1.7129493207199006e-07, "epoch": 4.4667434863325015, "percentage": 89.33, "elapsed_time": "3:03:14", "remaining_time": "0:21:52", "throughput": 19973.98, "total_tokens": 219608320} +{"current_steps": 69780, "total_steps": 78105, "loss": 0.1632, "lr": 1.7109175956030595e-07, "epoch": 4.467063568273478, "percentage": 89.34, "elapsed_time": "3:03:15", "remaining_time": "0:21:51", "throughput": 19974.19, "total_tokens": 219624256} +{"current_steps": 69785, "total_steps": 78105, "loss": 0.1218, "lr": 1.7088870334216827e-07, "epoch": 4.467383650214455, "percentage": 89.35, "elapsed_time": "3:03:16", "remaining_time": "0:21:50", "throughput": 19974.35, "total_tokens": 219638656} +{"current_steps": 69790, "total_steps": 78105, "loss": 0.1249, "lr": 1.7068576342771703e-07, "epoch": 4.467703732155432, "percentage": 89.35, "elapsed_time": "3:03:16", "remaining_time": "0:21:50", "throughput": 19974.52, "total_tokens": 219653632} +{"current_steps": 69795, "total_steps": 78105, "loss": 0.1586, "lr": 1.7048293982708518e-07, "epoch": 4.468023814096409, "percentage": 89.36, "elapsed_time": "3:03:17", "remaining_time": "0:21:49", "throughput": 19974.73, "total_tokens": 219669760} +{"current_steps": 69800, "total_steps": 78105, "loss": 0.1466, "lr": 1.7028023255040244e-07, "epoch": 4.468343896037386, "percentage": 89.37, "elapsed_time": "3:03:18", "remaining_time": "0:21:48", "throughput": 19974.91, "total_tokens": 219685184} +{"current_steps": 69805, "total_steps": 78105, "loss": 0.1214, "lr": 1.7007764160778872e-07, "epoch": 4.468663977978363, "percentage": 89.37, "elapsed_time": "3:03:18", "remaining_time": "0:21:47", "throughput": 19975.18, "total_tokens": 219702720} +{"current_steps": 69810, "total_steps": 78105, "loss": 0.1596, "lr": 1.6987516700936207e-07, "epoch": 4.46898405991934, "percentage": 89.38, "elapsed_time": "3:03:19", "remaining_time": "0:21:46", "throughput": 19975.45, "total_tokens": 219720256} +{"current_steps": 69815, "total_steps": 78105, "loss": 0.1266, "lr": 1.6967280876523217e-07, "epoch": 4.4693041418603165, "percentage": 89.39, "elapsed_time": "3:03:20", "remaining_time": "0:21:46", "throughput": 19975.64, "total_tokens": 219735872} +{"current_steps": 69820, "total_steps": 78105, "loss": 0.1267, "lr": 1.694705668855043e-07, "epoch": 4.4696242238012935, "percentage": 89.39, "elapsed_time": "3:03:20", "remaining_time": "0:21:45", "throughput": 19975.84, "total_tokens": 219751680} +{"current_steps": 69825, "total_steps": 78105, "loss": 0.144, "lr": 1.692684413802767e-07, "epoch": 4.46994430574227, "percentage": 89.4, "elapsed_time": "3:03:21", "remaining_time": "0:21:44", "throughput": 19976.02, "total_tokens": 219767040} +{"current_steps": 69830, "total_steps": 78105, "loss": 0.1171, "lr": 1.6906643225964304e-07, "epoch": 4.470264387683247, "percentage": 89.41, "elapsed_time": "3:03:22", "remaining_time": "0:21:43", "throughput": 19976.28, "total_tokens": 219784384} +{"current_steps": 69835, "total_steps": 78105, "loss": 0.1216, "lr": 1.6886453953368964e-07, "epoch": 4.470584469624224, "percentage": 89.41, "elapsed_time": "3:03:22", "remaining_time": "0:21:42", "throughput": 19976.47, "total_tokens": 219800192} +{"current_steps": 69840, "total_steps": 78105, "loss": 0.114, "lr": 1.6866276321249986e-07, "epoch": 4.470904551565201, "percentage": 89.42, "elapsed_time": "3:03:23", "remaining_time": "0:21:42", "throughput": 19976.69, "total_tokens": 219816448} +{"current_steps": 69845, "total_steps": 78105, "loss": 0.156, "lr": 1.684611033061473e-07, "epoch": 4.471224633506178, "percentage": 89.42, "elapsed_time": "3:03:24", "remaining_time": "0:21:41", "throughput": 19976.88, "total_tokens": 219831808} +{"current_steps": 69850, "total_steps": 78105, "loss": 0.1274, "lr": 1.6825955982470306e-07, "epoch": 4.471544715447155, "percentage": 89.43, "elapsed_time": "3:03:24", "remaining_time": "0:21:40", "throughput": 19977.07, "total_tokens": 219847168} +{"current_steps": 69855, "total_steps": 78105, "loss": 0.1213, "lr": 1.6805813277823075e-07, "epoch": 4.471864797388132, "percentage": 89.44, "elapsed_time": "3:03:25", "remaining_time": "0:21:39", "throughput": 19977.25, "total_tokens": 219862272} +{"current_steps": 69860, "total_steps": 78105, "loss": 0.1077, "lr": 1.6785682217678844e-07, "epoch": 4.472184879329109, "percentage": 89.44, "elapsed_time": "3:03:26", "remaining_time": "0:21:38", "throughput": 19977.46, "total_tokens": 219878272} +{"current_steps": 69865, "total_steps": 78105, "loss": 0.1285, "lr": 1.6765562803042924e-07, "epoch": 4.472504961270085, "percentage": 89.45, "elapsed_time": "3:03:26", "remaining_time": "0:21:38", "throughput": 19977.61, "total_tokens": 219892992} +{"current_steps": 69870, "total_steps": 78105, "loss": 0.1122, "lr": 1.6745455034919838e-07, "epoch": 4.472825043211062, "percentage": 89.46, "elapsed_time": "3:03:27", "remaining_time": "0:21:37", "throughput": 19977.81, "total_tokens": 219908736} +{"current_steps": 69875, "total_steps": 78105, "loss": 0.1247, "lr": 1.6725358914313844e-07, "epoch": 4.473145125152039, "percentage": 89.46, "elapsed_time": "3:03:28", "remaining_time": "0:21:36", "throughput": 19977.99, "total_tokens": 219923840} +{"current_steps": 69880, "total_steps": 78105, "loss": 0.1047, "lr": 1.6705274442228275e-07, "epoch": 4.473465207093016, "percentage": 89.47, "elapsed_time": "3:03:28", "remaining_time": "0:21:35", "throughput": 19978.19, "total_tokens": 219939392} +{"current_steps": 69885, "total_steps": 78105, "loss": 0.1226, "lr": 1.6685201619666164e-07, "epoch": 4.473785289033993, "percentage": 89.48, "elapsed_time": "3:03:29", "remaining_time": "0:21:34", "throughput": 19978.37, "total_tokens": 219954496} +{"current_steps": 69890, "total_steps": 78105, "loss": 0.1456, "lr": 1.6665140447629762e-07, "epoch": 4.47410537097497, "percentage": 89.48, "elapsed_time": "3:03:30", "remaining_time": "0:21:34", "throughput": 19978.59, "total_tokens": 219970752} +{"current_steps": 69895, "total_steps": 78105, "loss": 0.1304, "lr": 1.6645090927120854e-07, "epoch": 4.474425452915947, "percentage": 89.49, "elapsed_time": "3:03:30", "remaining_time": "0:21:33", "throughput": 19978.75, "total_tokens": 219985472} +{"current_steps": 69900, "total_steps": 78105, "loss": 0.1338, "lr": 1.6625053059140612e-07, "epoch": 4.474745534856924, "percentage": 89.49, "elapsed_time": "3:03:31", "remaining_time": "0:21:32", "throughput": 19978.88, "total_tokens": 219999616} +{"current_steps": 69905, "total_steps": 78105, "loss": 0.0955, "lr": 1.660502684468962e-07, "epoch": 4.475065616797901, "percentage": 89.5, "elapsed_time": "3:03:32", "remaining_time": "0:21:31", "throughput": 19979.04, "total_tokens": 220014144} +{"current_steps": 69910, "total_steps": 78105, "loss": 0.1256, "lr": 1.6585012284767858e-07, "epoch": 4.475385698738878, "percentage": 89.51, "elapsed_time": "3:03:32", "remaining_time": "0:21:30", "throughput": 19979.22, "total_tokens": 220029696} +{"current_steps": 69915, "total_steps": 78105, "loss": 0.1082, "lr": 1.6565009380374752e-07, "epoch": 4.475705780679854, "percentage": 89.51, "elapsed_time": "3:03:33", "remaining_time": "0:21:30", "throughput": 19979.44, "total_tokens": 220045760} +{"current_steps": 69920, "total_steps": 78105, "loss": 0.1459, "lr": 1.6545018132509194e-07, "epoch": 4.476025862620831, "percentage": 89.52, "elapsed_time": "3:03:34", "remaining_time": "0:21:29", "throughput": 19979.64, "total_tokens": 220061696} +{"current_steps": 69925, "total_steps": 78105, "loss": 0.1237, "lr": 1.6525038542169386e-07, "epoch": 4.476345944561808, "percentage": 89.53, "elapsed_time": "3:03:34", "remaining_time": "0:21:28", "throughput": 19979.83, "total_tokens": 220077184} +{"current_steps": 69930, "total_steps": 78105, "loss": 0.1802, "lr": 1.6505070610353057e-07, "epoch": 4.476666026502785, "percentage": 89.53, "elapsed_time": "3:03:35", "remaining_time": "0:21:27", "throughput": 19980.05, "total_tokens": 220093440} +{"current_steps": 69935, "total_steps": 78105, "loss": 0.1719, "lr": 1.6485114338057245e-07, "epoch": 4.476986108443762, "percentage": 89.54, "elapsed_time": "3:03:36", "remaining_time": "0:21:26", "throughput": 19980.27, "total_tokens": 220110080} +{"current_steps": 69940, "total_steps": 78105, "loss": 0.1265, "lr": 1.6465169726278485e-07, "epoch": 4.477306190384739, "percentage": 89.55, "elapsed_time": "3:03:37", "remaining_time": "0:21:26", "throughput": 19980.49, "total_tokens": 220126080} +{"current_steps": 69945, "total_steps": 78105, "loss": 0.1821, "lr": 1.644523677601273e-07, "epoch": 4.477626272325716, "percentage": 89.55, "elapsed_time": "3:03:37", "remaining_time": "0:21:25", "throughput": 19980.65, "total_tokens": 220140864} +{"current_steps": 69950, "total_steps": 78105, "loss": 0.1111, "lr": 1.6425315488255294e-07, "epoch": 4.477946354266693, "percentage": 89.56, "elapsed_time": "3:03:38", "remaining_time": "0:21:24", "throughput": 19980.84, "total_tokens": 220156544} +{"current_steps": 69955, "total_steps": 78105, "loss": 0.1253, "lr": 1.640540586400094e-07, "epoch": 4.478266436207669, "percentage": 89.57, "elapsed_time": "3:03:39", "remaining_time": "0:21:23", "throughput": 19981.0, "total_tokens": 220171840} +{"current_steps": 69960, "total_steps": 78105, "loss": 0.0854, "lr": 1.6385507904243924e-07, "epoch": 4.478586518148646, "percentage": 89.57, "elapsed_time": "3:03:39", "remaining_time": "0:21:22", "throughput": 19981.17, "total_tokens": 220186752} +{"current_steps": 69965, "total_steps": 78105, "loss": 0.0735, "lr": 1.6365621609977734e-07, "epoch": 4.478906600089623, "percentage": 89.58, "elapsed_time": "3:03:40", "remaining_time": "0:21:22", "throughput": 19981.32, "total_tokens": 220201088} +{"current_steps": 69970, "total_steps": 78105, "loss": 0.124, "lr": 1.634574698219546e-07, "epoch": 4.4792266820306, "percentage": 89.58, "elapsed_time": "3:03:41", "remaining_time": "0:21:21", "throughput": 19981.52, "total_tokens": 220216768} +{"current_steps": 69975, "total_steps": 78105, "loss": 0.1255, "lr": 1.6325884021889533e-07, "epoch": 4.479546763971577, "percentage": 89.59, "elapsed_time": "3:03:41", "remaining_time": "0:21:20", "throughput": 19981.69, "total_tokens": 220231936} +{"current_steps": 69980, "total_steps": 78105, "loss": 0.1017, "lr": 1.6306032730051742e-07, "epoch": 4.479866845912554, "percentage": 89.6, "elapsed_time": "3:03:42", "remaining_time": "0:21:19", "throughput": 19981.86, "total_tokens": 220246592} +{"current_steps": 69985, "total_steps": 78105, "loss": 0.1462, "lr": 1.6286193107673487e-07, "epoch": 4.480186927853531, "percentage": 89.6, "elapsed_time": "3:03:42", "remaining_time": "0:21:18", "throughput": 19982.05, "total_tokens": 220261888} +{"current_steps": 69990, "total_steps": 78105, "loss": 0.1161, "lr": 1.6266365155745312e-07, "epoch": 4.480507009794508, "percentage": 89.61, "elapsed_time": "3:03:43", "remaining_time": "0:21:18", "throughput": 19982.21, "total_tokens": 220276800} +{"current_steps": 69995, "total_steps": 78105, "loss": 0.1647, "lr": 1.624654887525745e-07, "epoch": 4.480827091735485, "percentage": 89.62, "elapsed_time": "3:03:44", "remaining_time": "0:21:17", "throughput": 19982.4, "total_tokens": 220292160} +{"current_steps": 70000, "total_steps": 78105, "loss": 0.1887, "lr": 1.6226744267199308e-07, "epoch": 4.481147173676461, "percentage": 89.62, "elapsed_time": "3:03:44", "remaining_time": "0:21:16", "throughput": 19982.59, "total_tokens": 220307648} +{"current_steps": 70005, "total_steps": 78105, "loss": 0.1344, "lr": 1.6206951332559894e-07, "epoch": 4.481467255617438, "percentage": 89.63, "elapsed_time": "3:03:45", "remaining_time": "0:21:15", "throughput": 19982.79, "total_tokens": 220323648} +{"current_steps": 70010, "total_steps": 78105, "loss": 0.1439, "lr": 1.6187170072327562e-07, "epoch": 4.481787337558415, "percentage": 89.64, "elapsed_time": "3:03:46", "remaining_time": "0:21:14", "throughput": 19983.01, "total_tokens": 220339968} +{"current_steps": 70015, "total_steps": 78105, "loss": 0.1211, "lr": 1.6167400487490047e-07, "epoch": 4.482107419499392, "percentage": 89.64, "elapsed_time": "3:03:47", "remaining_time": "0:21:14", "throughput": 19983.18, "total_tokens": 220355136} +{"current_steps": 70020, "total_steps": 78105, "loss": 0.1041, "lr": 1.614764257903459e-07, "epoch": 4.482427501440369, "percentage": 89.65, "elapsed_time": "3:03:47", "remaining_time": "0:21:13", "throughput": 19983.36, "total_tokens": 220370304} +{"current_steps": 70025, "total_steps": 78105, "loss": 0.1214, "lr": 1.6127896347947702e-07, "epoch": 4.482747583381346, "percentage": 89.65, "elapsed_time": "3:03:48", "remaining_time": "0:21:12", "throughput": 19983.57, "total_tokens": 220386240} +{"current_steps": 70030, "total_steps": 78105, "loss": 0.157, "lr": 1.6108161795215572e-07, "epoch": 4.483067665322323, "percentage": 89.66, "elapsed_time": "3:03:49", "remaining_time": "0:21:11", "throughput": 19983.76, "total_tokens": 220401600} +{"current_steps": 70035, "total_steps": 78105, "loss": 0.1292, "lr": 1.6088438921823463e-07, "epoch": 4.4833877472633, "percentage": 89.67, "elapsed_time": "3:03:49", "remaining_time": "0:21:10", "throughput": 19983.97, "total_tokens": 220417728} +{"current_steps": 70040, "total_steps": 78105, "loss": 0.1869, "lr": 1.6068727728756368e-07, "epoch": 4.483707829204277, "percentage": 89.67, "elapsed_time": "3:03:50", "remaining_time": "0:21:10", "throughput": 19984.24, "total_tokens": 220435200} +{"current_steps": 70045, "total_steps": 78105, "loss": 0.1442, "lr": 1.6049028216998468e-07, "epoch": 4.484027911145253, "percentage": 89.68, "elapsed_time": "3:03:51", "remaining_time": "0:21:09", "throughput": 19984.39, "total_tokens": 220449600} +{"current_steps": 70050, "total_steps": 78105, "loss": 0.1654, "lr": 1.6029340387533539e-07, "epoch": 4.48434799308623, "percentage": 89.69, "elapsed_time": "3:03:51", "remaining_time": "0:21:08", "throughput": 19984.58, "total_tokens": 220465280} +{"current_steps": 70055, "total_steps": 78105, "loss": 0.1323, "lr": 1.6009664241344618e-07, "epoch": 4.484668075027207, "percentage": 89.69, "elapsed_time": "3:03:52", "remaining_time": "0:21:07", "throughput": 19984.79, "total_tokens": 220481472} +{"current_steps": 70060, "total_steps": 78105, "loss": 0.1493, "lr": 1.5989999779414234e-07, "epoch": 4.484988156968184, "percentage": 89.7, "elapsed_time": "3:03:53", "remaining_time": "0:21:06", "throughput": 19984.99, "total_tokens": 220497344} +{"current_steps": 70065, "total_steps": 78105, "loss": 0.1714, "lr": 1.5970347002724346e-07, "epoch": 4.485308238909161, "percentage": 89.71, "elapsed_time": "3:03:53", "remaining_time": "0:21:06", "throughput": 19985.22, "total_tokens": 220513984} +{"current_steps": 70070, "total_steps": 78105, "loss": 0.1036, "lr": 1.5950705912256366e-07, "epoch": 4.485628320850138, "percentage": 89.71, "elapsed_time": "3:03:54", "remaining_time": "0:21:05", "throughput": 19985.41, "total_tokens": 220529472} +{"current_steps": 70075, "total_steps": 78105, "loss": 0.173, "lr": 1.5931076508990951e-07, "epoch": 4.485948402791115, "percentage": 89.72, "elapsed_time": "3:03:55", "remaining_time": "0:21:04", "throughput": 19985.6, "total_tokens": 220545344} +{"current_steps": 70080, "total_steps": 78105, "loss": 0.1361, "lr": 1.591145879390843e-07, "epoch": 4.486268484732092, "percentage": 89.73, "elapsed_time": "3:03:55", "remaining_time": "0:21:03", "throughput": 19985.83, "total_tokens": 220561920} +{"current_steps": 70085, "total_steps": 78105, "loss": 0.1346, "lr": 1.5891852767988243e-07, "epoch": 4.486588566673069, "percentage": 89.73, "elapsed_time": "3:03:56", "remaining_time": "0:21:02", "throughput": 19986.01, "total_tokens": 220576960} +{"current_steps": 70090, "total_steps": 78105, "loss": 0.1342, "lr": 1.5872258432209549e-07, "epoch": 4.486908648614045, "percentage": 89.74, "elapsed_time": "3:03:57", "remaining_time": "0:21:02", "throughput": 19986.19, "total_tokens": 220592192} +{"current_steps": 70095, "total_steps": 78105, "loss": 0.1167, "lr": 1.585267578755073e-07, "epoch": 4.487228730555022, "percentage": 89.74, "elapsed_time": "3:03:57", "remaining_time": "0:21:01", "throughput": 19986.42, "total_tokens": 220608640} +{"current_steps": 70100, "total_steps": 78105, "loss": 0.1202, "lr": 1.5833104834989648e-07, "epoch": 4.487548812495999, "percentage": 89.75, "elapsed_time": "3:03:58", "remaining_time": "0:21:00", "throughput": 19986.64, "total_tokens": 220625024} +{"current_steps": 70105, "total_steps": 78105, "loss": 0.1104, "lr": 1.5813545575503632e-07, "epoch": 4.487868894436976, "percentage": 89.76, "elapsed_time": "3:03:59", "remaining_time": "0:20:59", "throughput": 19986.8, "total_tokens": 220639744} +{"current_steps": 70110, "total_steps": 78105, "loss": 0.0776, "lr": 1.5793998010069255e-07, "epoch": 4.488188976377953, "percentage": 89.76, "elapsed_time": "3:03:59", "remaining_time": "0:20:58", "throughput": 19986.98, "total_tokens": 220655040} +{"current_steps": 70115, "total_steps": 78105, "loss": 0.0842, "lr": 1.5774462139662716e-07, "epoch": 4.48850905831893, "percentage": 89.77, "elapsed_time": "3:04:00", "remaining_time": "0:20:58", "throughput": 19987.24, "total_tokens": 220672384} +{"current_steps": 70120, "total_steps": 78105, "loss": 0.1939, "lr": 1.5754937965259482e-07, "epoch": 4.488829140259907, "percentage": 89.78, "elapsed_time": "3:04:01", "remaining_time": "0:20:57", "throughput": 19987.43, "total_tokens": 220687744} +{"current_steps": 70125, "total_steps": 78105, "loss": 0.1449, "lr": 1.573542548783452e-07, "epoch": 4.489149222200884, "percentage": 89.78, "elapsed_time": "3:04:01", "remaining_time": "0:20:56", "throughput": 19987.61, "total_tokens": 220702848} +{"current_steps": 70130, "total_steps": 78105, "loss": 0.143, "lr": 1.5715924708362196e-07, "epoch": 4.48946930414186, "percentage": 89.79, "elapsed_time": "3:04:02", "remaining_time": "0:20:55", "throughput": 19987.82, "total_tokens": 220718720} +{"current_steps": 70135, "total_steps": 78105, "loss": 0.1038, "lr": 1.5696435627816225e-07, "epoch": 4.489789386082837, "percentage": 89.8, "elapsed_time": "3:04:03", "remaining_time": "0:20:54", "throughput": 19988.04, "total_tokens": 220735104} +{"current_steps": 70140, "total_steps": 78105, "loss": 0.1334, "lr": 1.5676958247169805e-07, "epoch": 4.490109468023814, "percentage": 89.8, "elapsed_time": "3:04:04", "remaining_time": "0:20:54", "throughput": 19988.2, "total_tokens": 220749888} +{"current_steps": 70145, "total_steps": 78105, "loss": 0.1412, "lr": 1.5657492567395516e-07, "epoch": 4.490429549964791, "percentage": 89.81, "elapsed_time": "3:04:04", "remaining_time": "0:20:53", "throughput": 19988.37, "total_tokens": 220764864} +{"current_steps": 70150, "total_steps": 78105, "loss": 0.1492, "lr": 1.5638038589465443e-07, "epoch": 4.490749631905768, "percentage": 89.81, "elapsed_time": "3:04:05", "remaining_time": "0:20:52", "throughput": 19988.57, "total_tokens": 220780672} +{"current_steps": 70155, "total_steps": 78105, "loss": 0.1121, "lr": 1.5618596314351003e-07, "epoch": 4.491069713846745, "percentage": 89.82, "elapsed_time": "3:04:06", "remaining_time": "0:20:51", "throughput": 19988.74, "total_tokens": 220795712} +{"current_steps": 70160, "total_steps": 78105, "loss": 0.1244, "lr": 1.5599165743022975e-07, "epoch": 4.491389795787722, "percentage": 89.83, "elapsed_time": "3:04:06", "remaining_time": "0:20:50", "throughput": 19988.9, "total_tokens": 220810624} +{"current_steps": 70165, "total_steps": 78105, "loss": 0.1525, "lr": 1.5579746876451696e-07, "epoch": 4.491709877728699, "percentage": 89.83, "elapsed_time": "3:04:07", "remaining_time": "0:20:50", "throughput": 19989.09, "total_tokens": 220826048} +{"current_steps": 70170, "total_steps": 78105, "loss": 0.1572, "lr": 1.5560339715606776e-07, "epoch": 4.492029959669676, "percentage": 89.84, "elapsed_time": "3:04:08", "remaining_time": "0:20:49", "throughput": 19989.38, "total_tokens": 220844352} +{"current_steps": 70175, "total_steps": 78105, "loss": 0.1244, "lr": 1.5540944261457357e-07, "epoch": 4.492350041610653, "percentage": 89.85, "elapsed_time": "3:04:08", "remaining_time": "0:20:48", "throughput": 19989.55, "total_tokens": 220859712} +{"current_steps": 70180, "total_steps": 78105, "loss": 0.0878, "lr": 1.5521560514971916e-07, "epoch": 4.492670123551629, "percentage": 89.85, "elapsed_time": "3:04:09", "remaining_time": "0:20:47", "throughput": 19989.73, "total_tokens": 220874880} +{"current_steps": 70185, "total_steps": 78105, "loss": 0.2133, "lr": 1.5502188477118345e-07, "epoch": 4.492990205492606, "percentage": 89.86, "elapsed_time": "3:04:10", "remaining_time": "0:20:46", "throughput": 19989.93, "total_tokens": 220891136} +{"current_steps": 70190, "total_steps": 78105, "loss": 0.1168, "lr": 1.548282814886412e-07, "epoch": 4.493310287433583, "percentage": 89.87, "elapsed_time": "3:04:10", "remaining_time": "0:20:46", "throughput": 19990.12, "total_tokens": 220906624} +{"current_steps": 70195, "total_steps": 78105, "loss": 0.1324, "lr": 1.546347953117583e-07, "epoch": 4.49363036937456, "percentage": 89.87, "elapsed_time": "3:04:11", "remaining_time": "0:20:45", "throughput": 19990.35, "total_tokens": 220923200} +{"current_steps": 70200, "total_steps": 78105, "loss": 0.1657, "lr": 1.5444142625019753e-07, "epoch": 4.493950451315537, "percentage": 89.88, "elapsed_time": "3:04:12", "remaining_time": "0:20:44", "throughput": 19990.59, "total_tokens": 220939776} +{"current_steps": 70205, "total_steps": 78105, "loss": 0.151, "lr": 1.5424817431361428e-07, "epoch": 4.494270533256514, "percentage": 89.89, "elapsed_time": "3:04:12", "remaining_time": "0:20:43", "throughput": 19990.74, "total_tokens": 220954240} +{"current_steps": 70210, "total_steps": 78105, "loss": 0.1397, "lr": 1.540550395116583e-07, "epoch": 4.494590615197491, "percentage": 89.89, "elapsed_time": "3:04:13", "remaining_time": "0:20:42", "throughput": 19990.89, "total_tokens": 220968640} +{"current_steps": 70215, "total_steps": 78105, "loss": 0.1244, "lr": 1.5386202185397465e-07, "epoch": 4.494910697138468, "percentage": 89.9, "elapsed_time": "3:04:14", "remaining_time": "0:20:42", "throughput": 19991.12, "total_tokens": 220985216} +{"current_steps": 70220, "total_steps": 78105, "loss": 0.1985, "lr": 1.5366912135020062e-07, "epoch": 4.495230779079444, "percentage": 89.9, "elapsed_time": "3:04:14", "remaining_time": "0:20:41", "throughput": 19991.33, "total_tokens": 221001408} +{"current_steps": 70225, "total_steps": 78105, "loss": 0.1547, "lr": 1.5347633800996963e-07, "epoch": 4.495550861020421, "percentage": 89.91, "elapsed_time": "3:04:15", "remaining_time": "0:20:40", "throughput": 19991.51, "total_tokens": 221016576} +{"current_steps": 70230, "total_steps": 78105, "loss": 0.1679, "lr": 1.5328367184290677e-07, "epoch": 4.495870942961398, "percentage": 89.92, "elapsed_time": "3:04:16", "remaining_time": "0:20:39", "throughput": 19991.69, "total_tokens": 221031680} +{"current_steps": 70235, "total_steps": 78105, "loss": 0.1326, "lr": 1.530911228586346e-07, "epoch": 4.496191024902375, "percentage": 89.92, "elapsed_time": "3:04:16", "remaining_time": "0:20:38", "throughput": 19991.89, "total_tokens": 221047424} +{"current_steps": 70240, "total_steps": 78105, "loss": 0.1601, "lr": 1.528986910667668e-07, "epoch": 4.496511106843352, "percentage": 89.93, "elapsed_time": "3:04:17", "remaining_time": "0:20:38", "throughput": 19992.07, "total_tokens": 221062784} +{"current_steps": 70245, "total_steps": 78105, "loss": 0.1514, "lr": 1.5270637647691267e-07, "epoch": 4.496831188784329, "percentage": 89.94, "elapsed_time": "3:04:18", "remaining_time": "0:20:37", "throughput": 19992.25, "total_tokens": 221077824} +{"current_steps": 70250, "total_steps": 78105, "loss": 0.1055, "lr": 1.525141790986759e-07, "epoch": 4.497151270725306, "percentage": 89.94, "elapsed_time": "3:04:18", "remaining_time": "0:20:36", "throughput": 19992.45, "total_tokens": 221093568} +{"current_steps": 70255, "total_steps": 78105, "loss": 0.1353, "lr": 1.5232209894165295e-07, "epoch": 4.497471352666283, "percentage": 89.95, "elapsed_time": "3:04:19", "remaining_time": "0:20:35", "throughput": 19992.69, "total_tokens": 221110144} +{"current_steps": 70260, "total_steps": 78105, "loss": 0.1503, "lr": 1.5213013601543619e-07, "epoch": 4.49779143460726, "percentage": 89.96, "elapsed_time": "3:04:20", "remaining_time": "0:20:34", "throughput": 19992.88, "total_tokens": 221125632} +{"current_steps": 70265, "total_steps": 78105, "loss": 0.0863, "lr": 1.5193829032961015e-07, "epoch": 4.498111516548236, "percentage": 89.96, "elapsed_time": "3:04:20", "remaining_time": "0:20:34", "throughput": 19993.1, "total_tokens": 221142016} +{"current_steps": 70270, "total_steps": 78105, "loss": 0.1553, "lr": 1.517465618937558e-07, "epoch": 4.498431598489213, "percentage": 89.97, "elapsed_time": "3:04:21", "remaining_time": "0:20:33", "throughput": 19993.32, "total_tokens": 221158592} +{"current_steps": 70275, "total_steps": 78105, "loss": 0.1032, "lr": 1.5155495071744658e-07, "epoch": 4.49875168043019, "percentage": 89.98, "elapsed_time": "3:04:22", "remaining_time": "0:20:32", "throughput": 19993.5, "total_tokens": 221174016} +{"current_steps": 70280, "total_steps": 78105, "loss": 0.1602, "lr": 1.513634568102504e-07, "epoch": 4.499071762371167, "percentage": 89.98, "elapsed_time": "3:04:22", "remaining_time": "0:20:31", "throughput": 19993.66, "total_tokens": 221188480} +{"current_steps": 70285, "total_steps": 78105, "loss": 0.1548, "lr": 1.5117208018172957e-07, "epoch": 4.499391844312144, "percentage": 89.99, "elapsed_time": "3:04:23", "remaining_time": "0:20:30", "throughput": 19993.86, "total_tokens": 221204480} +{"current_steps": 70290, "total_steps": 78105, "loss": 0.1047, "lr": 1.5098082084144068e-07, "epoch": 4.499711926253121, "percentage": 89.99, "elapsed_time": "3:04:24", "remaining_time": "0:20:30", "throughput": 19994.13, "total_tokens": 221222208} +{"current_steps": 70295, "total_steps": 78105, "loss": 0.1387, "lr": 1.5078967879893353e-07, "epoch": 4.500032008194098, "percentage": 90.0, "elapsed_time": "3:04:25", "remaining_time": "0:20:29", "throughput": 19994.3, "total_tokens": 221237504} +{"current_steps": 70300, "total_steps": 78105, "loss": 0.1378, "lr": 1.5059865406375384e-07, "epoch": 4.500352090135075, "percentage": 90.01, "elapsed_time": "3:04:25", "remaining_time": "0:20:28", "throughput": 19994.49, "total_tokens": 221253056} +{"current_steps": 70305, "total_steps": 78105, "loss": 0.1645, "lr": 1.50407746645439e-07, "epoch": 4.500672172076051, "percentage": 90.01, "elapsed_time": "3:04:26", "remaining_time": "0:20:27", "throughput": 19994.7, "total_tokens": 221269248} +{"current_steps": 70308, "total_steps": 78105, "eval_loss": 0.6086835265159607, "epoch": 4.500864221240637, "percentage": 90.02, "elapsed_time": "3:05:17", "remaining_time": "0:20:32", "throughput": 19903.16, "total_tokens": 221278272} +{"current_steps": 70310, "total_steps": 78105, "loss": 0.1307, "lr": 1.502169565535236e-07, "epoch": 4.500992254017028, "percentage": 90.02, "elapsed_time": "3:05:52", "remaining_time": "0:20:36", "throughput": 19841.96, "total_tokens": 221286720} +{"current_steps": 70315, "total_steps": 78105, "loss": 0.1483, "lr": 1.5002628379753305e-07, "epoch": 4.501312335958005, "percentage": 90.03, "elapsed_time": "3:05:53", "remaining_time": "0:20:35", "throughput": 19842.13, "total_tokens": 221301824} +{"current_steps": 70320, "total_steps": 78105, "loss": 0.169, "lr": 1.4983572838698972e-07, "epoch": 4.501632417898982, "percentage": 90.03, "elapsed_time": "3:05:53", "remaining_time": "0:20:34", "throughput": 19842.31, "total_tokens": 221316800} +{"current_steps": 70325, "total_steps": 78105, "loss": 0.121, "lr": 1.4964529033140856e-07, "epoch": 4.501952499839959, "percentage": 90.04, "elapsed_time": "3:05:54", "remaining_time": "0:20:34", "throughput": 19842.57, "total_tokens": 221334400} +{"current_steps": 70330, "total_steps": 78105, "loss": 0.1592, "lr": 1.494549696402986e-07, "epoch": 4.502272581780936, "percentage": 90.05, "elapsed_time": "3:05:55", "remaining_time": "0:20:33", "throughput": 19842.76, "total_tokens": 221349504} +{"current_steps": 70335, "total_steps": 78105, "loss": 0.1331, "lr": 1.4926476632316473e-07, "epoch": 4.502592663721913, "percentage": 90.05, "elapsed_time": "3:05:55", "remaining_time": "0:20:32", "throughput": 19842.95, "total_tokens": 221364800} +{"current_steps": 70340, "total_steps": 78105, "loss": 0.1543, "lr": 1.4907468038950324e-07, "epoch": 4.50291274566289, "percentage": 90.06, "elapsed_time": "3:05:56", "remaining_time": "0:20:31", "throughput": 19843.15, "total_tokens": 221380224} +{"current_steps": 70345, "total_steps": 78105, "loss": 0.1287, "lr": 1.4888471184880683e-07, "epoch": 4.503232827603867, "percentage": 90.06, "elapsed_time": "3:05:57", "remaining_time": "0:20:30", "throughput": 19843.36, "total_tokens": 221396224} +{"current_steps": 70350, "total_steps": 78105, "loss": 0.1297, "lr": 1.486948607105615e-07, "epoch": 4.503552909544844, "percentage": 90.07, "elapsed_time": "3:05:57", "remaining_time": "0:20:29", "throughput": 19843.54, "total_tokens": 221411328} +{"current_steps": 70355, "total_steps": 78105, "loss": 0.1418, "lr": 1.4850512698424747e-07, "epoch": 4.50387299148582, "percentage": 90.08, "elapsed_time": "3:05:58", "remaining_time": "0:20:29", "throughput": 19843.73, "total_tokens": 221426816} +{"current_steps": 70360, "total_steps": 78105, "loss": 0.1001, "lr": 1.483155106793388e-07, "epoch": 4.504193073426797, "percentage": 90.08, "elapsed_time": "3:05:59", "remaining_time": "0:20:28", "throughput": 19843.88, "total_tokens": 221441024} +{"current_steps": 70365, "total_steps": 78105, "loss": 0.0924, "lr": 1.4812601180530405e-07, "epoch": 4.504513155367774, "percentage": 90.09, "elapsed_time": "3:05:59", "remaining_time": "0:20:27", "throughput": 19844.08, "total_tokens": 221456960} +{"current_steps": 70370, "total_steps": 78105, "loss": 0.1594, "lr": 1.4793663037160565e-07, "epoch": 4.504833237308751, "percentage": 90.1, "elapsed_time": "3:06:00", "remaining_time": "0:20:26", "throughput": 19844.27, "total_tokens": 221472384} +{"current_steps": 70375, "total_steps": 78105, "loss": 0.1194, "lr": 1.477473663877002e-07, "epoch": 4.505153319249728, "percentage": 90.1, "elapsed_time": "3:06:01", "remaining_time": "0:20:25", "throughput": 19844.48, "total_tokens": 221488320} +{"current_steps": 70380, "total_steps": 78105, "loss": 0.1285, "lr": 1.475582198630393e-07, "epoch": 4.505473401190705, "percentage": 90.11, "elapsed_time": "3:06:01", "remaining_time": "0:20:25", "throughput": 19844.71, "total_tokens": 221504576} +{"current_steps": 70385, "total_steps": 78105, "loss": 0.1567, "lr": 1.4736919080706734e-07, "epoch": 4.505793483131682, "percentage": 90.12, "elapsed_time": "3:06:02", "remaining_time": "0:20:24", "throughput": 19844.9, "total_tokens": 221519744} +{"current_steps": 70390, "total_steps": 78105, "loss": 0.1245, "lr": 1.4718027922922374e-07, "epoch": 4.506113565072659, "percentage": 90.12, "elapsed_time": "3:06:03", "remaining_time": "0:20:23", "throughput": 19845.13, "total_tokens": 221535872} +{"current_steps": 70395, "total_steps": 78105, "loss": 0.1983, "lr": 1.4699148513894173e-07, "epoch": 4.506433647013635, "percentage": 90.13, "elapsed_time": "3:06:03", "remaining_time": "0:20:22", "throughput": 19845.33, "total_tokens": 221551744} +{"current_steps": 70400, "total_steps": 78105, "loss": 0.1324, "lr": 1.4680280854564826e-07, "epoch": 4.506753728954612, "percentage": 90.14, "elapsed_time": "3:06:04", "remaining_time": "0:20:21", "throughput": 19845.55, "total_tokens": 221567488} +{"current_steps": 70405, "total_steps": 78105, "loss": 0.0993, "lr": 1.4661424945876524e-07, "epoch": 4.507073810895589, "percentage": 90.14, "elapsed_time": "3:06:05", "remaining_time": "0:20:21", "throughput": 19845.73, "total_tokens": 221582528} +{"current_steps": 70410, "total_steps": 78105, "loss": 0.1916, "lr": 1.4642580788770843e-07, "epoch": 4.507393892836566, "percentage": 90.15, "elapsed_time": "3:06:05", "remaining_time": "0:20:20", "throughput": 19845.94, "total_tokens": 221598208} +{"current_steps": 70415, "total_steps": 78105, "loss": 0.1744, "lr": 1.4623748384188703e-07, "epoch": 4.507713974777543, "percentage": 90.15, "elapsed_time": "3:06:06", "remaining_time": "0:20:19", "throughput": 19846.16, "total_tokens": 221614848} +{"current_steps": 70420, "total_steps": 78105, "loss": 0.1177, "lr": 1.4604927733070595e-07, "epoch": 4.50803405671852, "percentage": 90.16, "elapsed_time": "3:06:07", "remaining_time": "0:20:18", "throughput": 19846.36, "total_tokens": 221630336} +{"current_steps": 70425, "total_steps": 78105, "loss": 0.0852, "lr": 1.4586118836356216e-07, "epoch": 4.508354138659497, "percentage": 90.17, "elapsed_time": "3:06:08", "remaining_time": "0:20:17", "throughput": 19846.61, "total_tokens": 221647808} +{"current_steps": 70430, "total_steps": 78105, "loss": 0.1256, "lr": 1.4567321694984843e-07, "epoch": 4.508674220600474, "percentage": 90.17, "elapsed_time": "3:06:08", "remaining_time": "0:20:17", "throughput": 19846.8, "total_tokens": 221663232} +{"current_steps": 70435, "total_steps": 78105, "loss": 0.1454, "lr": 1.454853630989514e-07, "epoch": 4.508994302541451, "percentage": 90.18, "elapsed_time": "3:06:09", "remaining_time": "0:20:16", "throughput": 19846.98, "total_tokens": 221678272} +{"current_steps": 70440, "total_steps": 78105, "loss": 0.1047, "lr": 1.452976268202505e-07, "epoch": 4.509314384482428, "percentage": 90.19, "elapsed_time": "3:06:10", "remaining_time": "0:20:15", "throughput": 19847.22, "total_tokens": 221695040} +{"current_steps": 70445, "total_steps": 78105, "loss": 0.1347, "lr": 1.451100081231216e-07, "epoch": 4.509634466423404, "percentage": 90.19, "elapsed_time": "3:06:10", "remaining_time": "0:20:14", "throughput": 19847.48, "total_tokens": 221712704} +{"current_steps": 70450, "total_steps": 78105, "loss": 0.1447, "lr": 1.4492250701693218e-07, "epoch": 4.509954548364381, "percentage": 90.2, "elapsed_time": "3:06:11", "remaining_time": "0:20:13", "throughput": 19847.69, "total_tokens": 221728448} +{"current_steps": 70455, "total_steps": 78105, "loss": 0.1322, "lr": 1.447351235110464e-07, "epoch": 4.510274630305358, "percentage": 90.21, "elapsed_time": "3:06:12", "remaining_time": "0:20:13", "throughput": 19847.88, "total_tokens": 221743936} +{"current_steps": 70460, "total_steps": 78105, "loss": 0.0946, "lr": 1.4454785761481933e-07, "epoch": 4.510594712246335, "percentage": 90.21, "elapsed_time": "3:06:12", "remaining_time": "0:20:12", "throughput": 19848.06, "total_tokens": 221758976} +{"current_steps": 70465, "total_steps": 78105, "loss": 0.0909, "lr": 1.443607093376037e-07, "epoch": 4.510914794187312, "percentage": 90.22, "elapsed_time": "3:06:13", "remaining_time": "0:20:11", "throughput": 19848.32, "total_tokens": 221776512} +{"current_steps": 70470, "total_steps": 78105, "loss": 0.1663, "lr": 1.4417367868874433e-07, "epoch": 4.511234876128289, "percentage": 90.22, "elapsed_time": "3:06:14", "remaining_time": "0:20:10", "throughput": 19848.52, "total_tokens": 221792512} +{"current_steps": 70475, "total_steps": 78105, "loss": 0.1203, "lr": 1.4398676567758035e-07, "epoch": 4.511554958069266, "percentage": 90.23, "elapsed_time": "3:06:14", "remaining_time": "0:20:09", "throughput": 19848.71, "total_tokens": 221807680} +{"current_steps": 70480, "total_steps": 78105, "loss": 0.1465, "lr": 1.4379997031344518e-07, "epoch": 4.511875040010243, "percentage": 90.24, "elapsed_time": "3:06:15", "remaining_time": "0:20:09", "throughput": 19848.92, "total_tokens": 221824000} +{"current_steps": 70485, "total_steps": 78105, "loss": 0.1207, "lr": 1.436132926056663e-07, "epoch": 4.512195121951219, "percentage": 90.24, "elapsed_time": "3:06:16", "remaining_time": "0:20:08", "throughput": 19849.09, "total_tokens": 221839168} +{"current_steps": 70490, "total_steps": 78105, "loss": 0.0961, "lr": 1.434267325635655e-07, "epoch": 4.512515203892196, "percentage": 90.25, "elapsed_time": "3:06:17", "remaining_time": "0:20:07", "throughput": 19849.37, "total_tokens": 221857024} +{"current_steps": 70495, "total_steps": 78105, "loss": 0.141, "lr": 1.4324029019645913e-07, "epoch": 4.512835285833173, "percentage": 90.26, "elapsed_time": "3:06:17", "remaining_time": "0:20:06", "throughput": 19849.67, "total_tokens": 221875392} +{"current_steps": 70500, "total_steps": 78105, "loss": 0.1099, "lr": 1.4305396551365675e-07, "epoch": 4.51315536777415, "percentage": 90.26, "elapsed_time": "3:06:18", "remaining_time": "0:20:05", "throughput": 19849.86, "total_tokens": 221891328} +{"current_steps": 70505, "total_steps": 78105, "loss": 0.1362, "lr": 1.4286775852446226e-07, "epoch": 4.513475449715127, "percentage": 90.27, "elapsed_time": "3:06:19", "remaining_time": "0:20:05", "throughput": 19850.05, "total_tokens": 221906560} +{"current_steps": 70510, "total_steps": 78105, "loss": 0.1105, "lr": 1.4268166923817412e-07, "epoch": 4.513795531656104, "percentage": 90.28, "elapsed_time": "3:06:19", "remaining_time": "0:20:04", "throughput": 19850.22, "total_tokens": 221921216} +{"current_steps": 70515, "total_steps": 78105, "loss": 0.0972, "lr": 1.4249569766408484e-07, "epoch": 4.514115613597081, "percentage": 90.28, "elapsed_time": "3:06:20", "remaining_time": "0:20:03", "throughput": 19850.44, "total_tokens": 221937472} +{"current_steps": 70520, "total_steps": 78105, "loss": 0.1015, "lr": 1.4230984381148032e-07, "epoch": 4.514435695538058, "percentage": 90.29, "elapsed_time": "3:06:21", "remaining_time": "0:20:02", "throughput": 19850.65, "total_tokens": 221953472} +{"current_steps": 70525, "total_steps": 78105, "loss": 0.1435, "lr": 1.4212410768964126e-07, "epoch": 4.514755777479035, "percentage": 90.3, "elapsed_time": "3:06:21", "remaining_time": "0:20:01", "throughput": 19850.83, "total_tokens": 221969024} +{"current_steps": 70530, "total_steps": 78105, "loss": 0.2193, "lr": 1.419384893078432e-07, "epoch": 4.515075859420012, "percentage": 90.3, "elapsed_time": "3:06:22", "remaining_time": "0:20:01", "throughput": 19851.01, "total_tokens": 221984832} +{"current_steps": 70535, "total_steps": 78105, "loss": 0.1949, "lr": 1.4175298867535353e-07, "epoch": 4.515395941360988, "percentage": 90.31, "elapsed_time": "3:06:23", "remaining_time": "0:20:00", "throughput": 19851.19, "total_tokens": 222000192} +{"current_steps": 70540, "total_steps": 78105, "loss": 0.153, "lr": 1.4156760580143675e-07, "epoch": 4.515716023301965, "percentage": 90.31, "elapsed_time": "3:06:23", "remaining_time": "0:19:59", "throughput": 19851.35, "total_tokens": 222015168} +{"current_steps": 70545, "total_steps": 78105, "loss": 0.1466, "lr": 1.4138234069534822e-07, "epoch": 4.516036105242942, "percentage": 90.32, "elapsed_time": "3:06:24", "remaining_time": "0:19:58", "throughput": 19851.53, "total_tokens": 222030272} +{"current_steps": 70550, "total_steps": 78105, "loss": 0.1251, "lr": 1.411971933663406e-07, "epoch": 4.516356187183919, "percentage": 90.33, "elapsed_time": "3:06:25", "remaining_time": "0:19:57", "throughput": 19851.69, "total_tokens": 222045120} +{"current_steps": 70555, "total_steps": 78105, "loss": 0.1185, "lr": 1.4101216382365833e-07, "epoch": 4.516676269124896, "percentage": 90.33, "elapsed_time": "3:06:25", "remaining_time": "0:19:56", "throughput": 19851.9, "total_tokens": 222061440} +{"current_steps": 70560, "total_steps": 78105, "loss": 0.0941, "lr": 1.408272520765408e-07, "epoch": 4.516996351065873, "percentage": 90.34, "elapsed_time": "3:06:26", "remaining_time": "0:19:56", "throughput": 19852.08, "total_tokens": 222076480} +{"current_steps": 70565, "total_steps": 78105, "loss": 0.1247, "lr": 1.406424581342228e-07, "epoch": 4.51731643300685, "percentage": 90.35, "elapsed_time": "3:06:27", "remaining_time": "0:19:55", "throughput": 19852.27, "total_tokens": 222091648} +{"current_steps": 70570, "total_steps": 78105, "loss": 0.1387, "lr": 1.4045778200592997e-07, "epoch": 4.517636514947827, "percentage": 90.35, "elapsed_time": "3:06:27", "remaining_time": "0:19:54", "throughput": 19852.48, "total_tokens": 222107712} +{"current_steps": 70575, "total_steps": 78105, "loss": 0.1016, "lr": 1.4027322370088553e-07, "epoch": 4.517956596888803, "percentage": 90.36, "elapsed_time": "3:06:28", "remaining_time": "0:19:53", "throughput": 19852.69, "total_tokens": 222124032} +{"current_steps": 70580, "total_steps": 78105, "loss": 0.1439, "lr": 1.4008878322830488e-07, "epoch": 4.51827667882978, "percentage": 90.37, "elapsed_time": "3:06:29", "remaining_time": "0:19:52", "throughput": 19852.93, "total_tokens": 222140672} +{"current_steps": 70585, "total_steps": 78105, "loss": 0.1312, "lr": 1.3990446059739816e-07, "epoch": 4.518596760770757, "percentage": 90.37, "elapsed_time": "3:06:29", "remaining_time": "0:19:52", "throughput": 19853.1, "total_tokens": 222155712} +{"current_steps": 70590, "total_steps": 78105, "loss": 0.1327, "lr": 1.3972025581736936e-07, "epoch": 4.518916842711734, "percentage": 90.38, "elapsed_time": "3:06:30", "remaining_time": "0:19:51", "throughput": 19853.3, "total_tokens": 222171968} +{"current_steps": 70595, "total_steps": 78105, "loss": 0.136, "lr": 1.3953616889741673e-07, "epoch": 4.519236924652711, "percentage": 90.38, "elapsed_time": "3:06:31", "remaining_time": "0:19:50", "throughput": 19853.47, "total_tokens": 222186816} +{"current_steps": 70600, "total_steps": 78105, "loss": 0.1054, "lr": 1.393521998467326e-07, "epoch": 4.519557006593688, "percentage": 90.39, "elapsed_time": "3:06:32", "remaining_time": "0:19:49", "throughput": 19853.67, "total_tokens": 222202944} +{"current_steps": 70605, "total_steps": 78105, "loss": 0.1645, "lr": 1.3916834867450352e-07, "epoch": 4.519877088534665, "percentage": 90.4, "elapsed_time": "3:06:32", "remaining_time": "0:19:48", "throughput": 19853.85, "total_tokens": 222218112} +{"current_steps": 70610, "total_steps": 78105, "loss": 0.1397, "lr": 1.3898461538990965e-07, "epoch": 4.520197170475642, "percentage": 90.4, "elapsed_time": "3:06:33", "remaining_time": "0:19:48", "throughput": 19854.03, "total_tokens": 222233344} +{"current_steps": 70615, "total_steps": 78105, "loss": 0.1085, "lr": 1.3880100000212642e-07, "epoch": 4.520517252416619, "percentage": 90.41, "elapsed_time": "3:06:34", "remaining_time": "0:19:47", "throughput": 19854.19, "total_tokens": 222247936} +{"current_steps": 70620, "total_steps": 78105, "loss": 0.0946, "lr": 1.3861750252032207e-07, "epoch": 4.520837334357595, "percentage": 90.42, "elapsed_time": "3:06:34", "remaining_time": "0:19:46", "throughput": 19854.37, "total_tokens": 222263168} +{"current_steps": 70625, "total_steps": 78105, "loss": 0.1761, "lr": 1.3843412295365955e-07, "epoch": 4.521157416298572, "percentage": 90.42, "elapsed_time": "3:06:35", "remaining_time": "0:19:45", "throughput": 19854.67, "total_tokens": 222281536} +{"current_steps": 70630, "total_steps": 78105, "loss": 0.1108, "lr": 1.382508613112962e-07, "epoch": 4.521477498239549, "percentage": 90.43, "elapsed_time": "3:06:36", "remaining_time": "0:19:44", "throughput": 19854.88, "total_tokens": 222297472} +{"current_steps": 70635, "total_steps": 78105, "loss": 0.1148, "lr": 1.3806771760238285e-07, "epoch": 4.521797580180526, "percentage": 90.44, "elapsed_time": "3:06:36", "remaining_time": "0:19:44", "throughput": 19855.08, "total_tokens": 222313088} +{"current_steps": 70640, "total_steps": 78105, "loss": 0.1408, "lr": 1.3788469183606462e-07, "epoch": 4.522117662121503, "percentage": 90.44, "elapsed_time": "3:06:37", "remaining_time": "0:19:43", "throughput": 19855.3, "total_tokens": 222329344} +{"current_steps": 70645, "total_steps": 78105, "loss": 0.0952, "lr": 1.3770178402148116e-07, "epoch": 4.52243774406248, "percentage": 90.45, "elapsed_time": "3:06:38", "remaining_time": "0:19:42", "throughput": 19855.48, "total_tokens": 222344128} +{"current_steps": 70650, "total_steps": 78105, "loss": 0.1312, "lr": 1.3751899416776626e-07, "epoch": 4.522757826003457, "percentage": 90.46, "elapsed_time": "3:06:38", "remaining_time": "0:19:41", "throughput": 19855.68, "total_tokens": 222359808} +{"current_steps": 70655, "total_steps": 78105, "loss": 0.0929, "lr": 1.3733632228404626e-07, "epoch": 4.523077907944434, "percentage": 90.46, "elapsed_time": "3:06:39", "remaining_time": "0:19:40", "throughput": 19855.87, "total_tokens": 222375488} +{"current_steps": 70660, "total_steps": 78105, "loss": 0.1294, "lr": 1.371537683794441e-07, "epoch": 4.52339798988541, "percentage": 90.47, "elapsed_time": "3:06:40", "remaining_time": "0:19:40", "throughput": 19856.1, "total_tokens": 222392000} +{"current_steps": 70665, "total_steps": 78105, "loss": 0.1487, "lr": 1.3697133246307532e-07, "epoch": 4.523718071826387, "percentage": 90.47, "elapsed_time": "3:06:40", "remaining_time": "0:19:39", "throughput": 19856.28, "total_tokens": 222407168} +{"current_steps": 70670, "total_steps": 78105, "loss": 0.1413, "lr": 1.3678901454404925e-07, "epoch": 4.524038153767364, "percentage": 90.48, "elapsed_time": "3:06:41", "remaining_time": "0:19:38", "throughput": 19856.5, "total_tokens": 222423808} +{"current_steps": 70675, "total_steps": 78105, "loss": 0.1716, "lr": 1.3660681463147086e-07, "epoch": 4.524358235708341, "percentage": 90.49, "elapsed_time": "3:06:42", "remaining_time": "0:19:37", "throughput": 19856.69, "total_tokens": 222438912} +{"current_steps": 70680, "total_steps": 78105, "loss": 0.1222, "lr": 1.36424732734437e-07, "epoch": 4.524678317649318, "percentage": 90.49, "elapsed_time": "3:06:42", "remaining_time": "0:19:36", "throughput": 19856.85, "total_tokens": 222453696} +{"current_steps": 70685, "total_steps": 78105, "loss": 0.112, "lr": 1.362427688620413e-07, "epoch": 4.524998399590295, "percentage": 90.5, "elapsed_time": "3:06:43", "remaining_time": "0:19:36", "throughput": 19857.05, "total_tokens": 222469120} +{"current_steps": 70690, "total_steps": 78105, "loss": 0.1283, "lr": 1.3606092302336866e-07, "epoch": 4.525318481531272, "percentage": 90.51, "elapsed_time": "3:06:44", "remaining_time": "0:19:35", "throughput": 19857.24, "total_tokens": 222484544} +{"current_steps": 70695, "total_steps": 78105, "loss": 0.0812, "lr": 1.3587919522750044e-07, "epoch": 4.525638563472249, "percentage": 90.51, "elapsed_time": "3:06:44", "remaining_time": "0:19:34", "throughput": 19857.48, "total_tokens": 222501184} +{"current_steps": 70700, "total_steps": 78105, "loss": 0.1355, "lr": 1.3569758548351103e-07, "epoch": 4.525958645413226, "percentage": 90.52, "elapsed_time": "3:06:45", "remaining_time": "0:19:33", "throughput": 19857.66, "total_tokens": 222516608} +{"current_steps": 70705, "total_steps": 78105, "loss": 0.1258, "lr": 1.3551609380046903e-07, "epoch": 4.526278727354203, "percentage": 90.53, "elapsed_time": "3:06:46", "remaining_time": "0:19:32", "throughput": 19857.86, "total_tokens": 222532352} +{"current_steps": 70710, "total_steps": 78105, "loss": 0.0945, "lr": 1.353347201874372e-07, "epoch": 4.526598809295179, "percentage": 90.53, "elapsed_time": "3:06:46", "remaining_time": "0:19:32", "throughput": 19858.08, "total_tokens": 222548736} +{"current_steps": 70715, "total_steps": 78105, "loss": 0.1458, "lr": 1.3515346465347245e-07, "epoch": 4.526918891236156, "percentage": 90.54, "elapsed_time": "3:06:47", "remaining_time": "0:19:31", "throughput": 19858.29, "total_tokens": 222564736} +{"current_steps": 70720, "total_steps": 78105, "loss": 0.1355, "lr": 1.3497232720762505e-07, "epoch": 4.527238973177133, "percentage": 90.54, "elapsed_time": "3:06:48", "remaining_time": "0:19:30", "throughput": 19858.46, "total_tokens": 222579520} +{"current_steps": 70725, "total_steps": 78105, "loss": 0.1466, "lr": 1.3479130785894107e-07, "epoch": 4.52755905511811, "percentage": 90.55, "elapsed_time": "3:06:48", "remaining_time": "0:19:29", "throughput": 19858.65, "total_tokens": 222594752} +{"current_steps": 70730, "total_steps": 78105, "loss": 0.2823, "lr": 1.3461040661645912e-07, "epoch": 4.527879137059087, "percentage": 90.56, "elapsed_time": "3:06:49", "remaining_time": "0:19:28", "throughput": 19858.93, "total_tokens": 222612672} +{"current_steps": 70735, "total_steps": 78105, "loss": 0.1378, "lr": 1.3442962348921285e-07, "epoch": 4.528199219000064, "percentage": 90.56, "elapsed_time": "3:06:50", "remaining_time": "0:19:28", "throughput": 19859.13, "total_tokens": 222627968} +{"current_steps": 70740, "total_steps": 78105, "loss": 0.1256, "lr": 1.3424895848622942e-07, "epoch": 4.528519300941041, "percentage": 90.57, "elapsed_time": "3:06:51", "remaining_time": "0:19:27", "throughput": 19859.37, "total_tokens": 222644480} +{"current_steps": 70745, "total_steps": 78105, "loss": 0.114, "lr": 1.3406841161653e-07, "epoch": 4.528839382882018, "percentage": 90.58, "elapsed_time": "3:06:51", "remaining_time": "0:19:26", "throughput": 19859.55, "total_tokens": 222659392} +{"current_steps": 70750, "total_steps": 78105, "loss": 0.1296, "lr": 1.338879828891307e-07, "epoch": 4.529159464822994, "percentage": 90.58, "elapsed_time": "3:06:52", "remaining_time": "0:19:25", "throughput": 19859.75, "total_tokens": 222675072} +{"current_steps": 70755, "total_steps": 78105, "loss": 0.228, "lr": 1.3370767231304037e-07, "epoch": 4.529479546763971, "percentage": 90.59, "elapsed_time": "3:06:53", "remaining_time": "0:19:24", "throughput": 19859.93, "total_tokens": 222690304} +{"current_steps": 70760, "total_steps": 78105, "loss": 0.1558, "lr": 1.3352747989726385e-07, "epoch": 4.529799628704948, "percentage": 90.6, "elapsed_time": "3:06:53", "remaining_time": "0:19:24", "throughput": 19860.09, "total_tokens": 222705344} +{"current_steps": 70765, "total_steps": 78105, "loss": 0.1268, "lr": 1.3334740565079801e-07, "epoch": 4.530119710645925, "percentage": 90.6, "elapsed_time": "3:06:54", "remaining_time": "0:19:23", "throughput": 19860.26, "total_tokens": 222720192} +{"current_steps": 70770, "total_steps": 78105, "loss": 0.1253, "lr": 1.33167449582636e-07, "epoch": 4.530439792586902, "percentage": 90.61, "elapsed_time": "3:06:55", "remaining_time": "0:19:22", "throughput": 19860.46, "total_tokens": 222735616} +{"current_steps": 70775, "total_steps": 78105, "loss": 0.0883, "lr": 1.3298761170176255e-07, "epoch": 4.530759874527879, "percentage": 90.62, "elapsed_time": "3:06:55", "remaining_time": "0:19:21", "throughput": 19860.68, "total_tokens": 222751936} +{"current_steps": 70780, "total_steps": 78105, "loss": 0.1138, "lr": 1.3280789201715854e-07, "epoch": 4.531079956468856, "percentage": 90.62, "elapsed_time": "3:06:56", "remaining_time": "0:19:20", "throughput": 19860.89, "total_tokens": 222767872} +{"current_steps": 70785, "total_steps": 78105, "loss": 0.1461, "lr": 1.326282905377982e-07, "epoch": 4.531400038409833, "percentage": 90.63, "elapsed_time": "3:06:57", "remaining_time": "0:19:19", "throughput": 19861.07, "total_tokens": 222783360} +{"current_steps": 70790, "total_steps": 78105, "loss": 0.1418, "lr": 1.324488072726496e-07, "epoch": 4.53172012035081, "percentage": 90.63, "elapsed_time": "3:06:57", "remaining_time": "0:19:19", "throughput": 19861.31, "total_tokens": 222799936} +{"current_steps": 70795, "total_steps": 78105, "loss": 0.135, "lr": 1.322694422306761e-07, "epoch": 4.532040202291787, "percentage": 90.64, "elapsed_time": "3:06:58", "remaining_time": "0:19:18", "throughput": 19861.59, "total_tokens": 222817664} +{"current_steps": 70800, "total_steps": 78105, "loss": 0.1051, "lr": 1.3209019542083313e-07, "epoch": 4.532360284232763, "percentage": 90.65, "elapsed_time": "3:06:59", "remaining_time": "0:19:17", "throughput": 19861.8, "total_tokens": 222833664} +{"current_steps": 70805, "total_steps": 78105, "loss": 0.2186, "lr": 1.3191106685207178e-07, "epoch": 4.53268036617374, "percentage": 90.65, "elapsed_time": "3:06:59", "remaining_time": "0:19:16", "throughput": 19862.03, "total_tokens": 222849856} +{"current_steps": 70810, "total_steps": 78105, "loss": 0.1267, "lr": 1.3173205653333715e-07, "epoch": 4.533000448114717, "percentage": 90.66, "elapsed_time": "3:07:00", "remaining_time": "0:19:15", "throughput": 19862.23, "total_tokens": 222865280} +{"current_steps": 70815, "total_steps": 78105, "loss": 0.1373, "lr": 1.3155316447356765e-07, "epoch": 4.533320530055694, "percentage": 90.67, "elapsed_time": "3:07:01", "remaining_time": "0:19:15", "throughput": 19862.42, "total_tokens": 222880832} +{"current_steps": 70820, "total_steps": 78105, "loss": 0.1698, "lr": 1.313743906816961e-07, "epoch": 4.533640611996671, "percentage": 90.67, "elapsed_time": "3:07:01", "remaining_time": "0:19:14", "throughput": 19862.61, "total_tokens": 222895936} +{"current_steps": 70825, "total_steps": 78105, "loss": 0.079, "lr": 1.3119573516664984e-07, "epoch": 4.533960693937648, "percentage": 90.68, "elapsed_time": "3:07:02", "remaining_time": "0:19:13", "throughput": 19862.8, "total_tokens": 222911232} +{"current_steps": 70830, "total_steps": 78105, "loss": 0.1053, "lr": 1.3101719793735007e-07, "epoch": 4.534280775878625, "percentage": 90.69, "elapsed_time": "3:07:03", "remaining_time": "0:19:12", "throughput": 19863.01, "total_tokens": 222927232} +{"current_steps": 70835, "total_steps": 78105, "loss": 0.1166, "lr": 1.3083877900271185e-07, "epoch": 4.534600857819602, "percentage": 90.69, "elapsed_time": "3:07:03", "remaining_time": "0:19:11", "throughput": 19863.21, "total_tokens": 222942848} +{"current_steps": 70840, "total_steps": 78105, "loss": 0.1313, "lr": 1.306604783716442e-07, "epoch": 4.534920939760578, "percentage": 90.7, "elapsed_time": "3:07:04", "remaining_time": "0:19:11", "throughput": 19863.44, "total_tokens": 222959296} +{"current_steps": 70845, "total_steps": 78105, "loss": 0.1726, "lr": 1.3048229605305108e-07, "epoch": 4.535241021701555, "percentage": 90.7, "elapsed_time": "3:07:05", "remaining_time": "0:19:10", "throughput": 19863.7, "total_tokens": 222976320} +{"current_steps": 70850, "total_steps": 78105, "loss": 0.1038, "lr": 1.3030423205582955e-07, "epoch": 4.535561103642532, "percentage": 90.71, "elapsed_time": "3:07:05", "remaining_time": "0:19:09", "throughput": 19863.88, "total_tokens": 222991360} +{"current_steps": 70855, "total_steps": 78105, "loss": 0.133, "lr": 1.3012628638887165e-07, "epoch": 4.535881185583509, "percentage": 90.72, "elapsed_time": "3:07:06", "remaining_time": "0:19:08", "throughput": 19864.03, "total_tokens": 223006080} +{"current_steps": 70860, "total_steps": 78105, "loss": 0.1226, "lr": 1.2994845906106252e-07, "epoch": 4.536201267524486, "percentage": 90.72, "elapsed_time": "3:07:07", "remaining_time": "0:19:07", "throughput": 19864.22, "total_tokens": 223021504} +{"current_steps": 70865, "total_steps": 78105, "loss": 0.1884, "lr": 1.2977075008128255e-07, "epoch": 4.536521349465463, "percentage": 90.73, "elapsed_time": "3:07:08", "remaining_time": "0:19:07", "throughput": 19864.46, "total_tokens": 223038336} +{"current_steps": 70870, "total_steps": 78105, "loss": 0.1449, "lr": 1.295931594584049e-07, "epoch": 4.53684143140644, "percentage": 90.74, "elapsed_time": "3:07:08", "remaining_time": "0:19:06", "throughput": 19864.73, "total_tokens": 223056256} +{"current_steps": 70875, "total_steps": 78105, "loss": 0.118, "lr": 1.2941568720129778e-07, "epoch": 4.537161513347417, "percentage": 90.74, "elapsed_time": "3:07:09", "remaining_time": "0:19:05", "throughput": 19864.96, "total_tokens": 223072768} +{"current_steps": 70880, "total_steps": 78105, "loss": 0.1638, "lr": 1.2923833331882378e-07, "epoch": 4.537481595288394, "percentage": 90.75, "elapsed_time": "3:07:10", "remaining_time": "0:19:04", "throughput": 19865.22, "total_tokens": 223090432} +{"current_steps": 70885, "total_steps": 78105, "loss": 0.1098, "lr": 1.290610978198381e-07, "epoch": 4.53780167722937, "percentage": 90.76, "elapsed_time": "3:07:10", "remaining_time": "0:19:03", "throughput": 19865.38, "total_tokens": 223105280} +{"current_steps": 70890, "total_steps": 78105, "loss": 0.0926, "lr": 1.288839807131917e-07, "epoch": 4.538121759170347, "percentage": 90.76, "elapsed_time": "3:07:11", "remaining_time": "0:19:03", "throughput": 19865.54, "total_tokens": 223120448} +{"current_steps": 70895, "total_steps": 78105, "loss": 0.137, "lr": 1.2870698200772857e-07, "epoch": 4.538441841111324, "percentage": 90.77, "elapsed_time": "3:07:12", "remaining_time": "0:19:02", "throughput": 19865.73, "total_tokens": 223135872} +{"current_steps": 70900, "total_steps": 78105, "loss": 0.0738, "lr": 1.285301017122867e-07, "epoch": 4.538761923052301, "percentage": 90.78, "elapsed_time": "3:07:12", "remaining_time": "0:19:01", "throughput": 19865.89, "total_tokens": 223150592} +{"current_steps": 70905, "total_steps": 78105, "loss": 0.1483, "lr": 1.283533398356998e-07, "epoch": 4.539082004993278, "percentage": 90.78, "elapsed_time": "3:07:13", "remaining_time": "0:19:00", "throughput": 19866.12, "total_tokens": 223167360} +{"current_steps": 70910, "total_steps": 78105, "loss": 0.0748, "lr": 1.2817669638679307e-07, "epoch": 4.539402086934255, "percentage": 90.79, "elapsed_time": "3:07:14", "remaining_time": "0:18:59", "throughput": 19866.32, "total_tokens": 223183040} +{"current_steps": 70915, "total_steps": 78105, "loss": 0.1027, "lr": 1.2800017137438837e-07, "epoch": 4.539722168875232, "percentage": 90.79, "elapsed_time": "3:07:14", "remaining_time": "0:18:59", "throughput": 19866.51, "total_tokens": 223198720} +{"current_steps": 70920, "total_steps": 78105, "loss": 0.1212, "lr": 1.2782376480729915e-07, "epoch": 4.540042250816209, "percentage": 90.8, "elapsed_time": "3:07:15", "remaining_time": "0:18:58", "throughput": 19866.72, "total_tokens": 223214656} +{"current_steps": 70925, "total_steps": 78105, "loss": 0.1335, "lr": 1.2764747669433502e-07, "epoch": 4.540362332757185, "percentage": 90.81, "elapsed_time": "3:07:16", "remaining_time": "0:18:57", "throughput": 19866.9, "total_tokens": 223229568} +{"current_steps": 70930, "total_steps": 78105, "loss": 0.1703, "lr": 1.27471307044299e-07, "epoch": 4.540682414698162, "percentage": 90.81, "elapsed_time": "3:07:16", "remaining_time": "0:18:56", "throughput": 19867.12, "total_tokens": 223245696} +{"current_steps": 70935, "total_steps": 78105, "loss": 0.0976, "lr": 1.2729525586598761e-07, "epoch": 4.541002496639139, "percentage": 90.82, "elapsed_time": "3:07:17", "remaining_time": "0:18:55", "throughput": 19867.4, "total_tokens": 223263488} +{"current_steps": 70940, "total_steps": 78105, "loss": 0.1428, "lr": 1.2711932316819247e-07, "epoch": 4.541322578580116, "percentage": 90.83, "elapsed_time": "3:07:18", "remaining_time": "0:18:55", "throughput": 19867.6, "total_tokens": 223278976} +{"current_steps": 70945, "total_steps": 78105, "loss": 0.1352, "lr": 1.2694350895969848e-07, "epoch": 4.541642660521093, "percentage": 90.83, "elapsed_time": "3:07:19", "remaining_time": "0:18:54", "throughput": 19867.79, "total_tokens": 223294208} +{"current_steps": 70950, "total_steps": 78105, "loss": 0.1224, "lr": 1.2676781324928418e-07, "epoch": 4.54196274246207, "percentage": 90.84, "elapsed_time": "3:07:19", "remaining_time": "0:18:53", "throughput": 19867.99, "total_tokens": 223310208} +{"current_steps": 70955, "total_steps": 78105, "loss": 0.1099, "lr": 1.2659223604572446e-07, "epoch": 4.542282824403047, "percentage": 90.85, "elapsed_time": "3:07:20", "remaining_time": "0:18:52", "throughput": 19868.26, "total_tokens": 223328000} +{"current_steps": 70960, "total_steps": 78105, "loss": 0.1044, "lr": 1.2641677735778513e-07, "epoch": 4.542602906344024, "percentage": 90.85, "elapsed_time": "3:07:21", "remaining_time": "0:18:51", "throughput": 19868.44, "total_tokens": 223343104} +{"current_steps": 70965, "total_steps": 78105, "loss": 0.1606, "lr": 1.2624143719422889e-07, "epoch": 4.542922988285001, "percentage": 90.86, "elapsed_time": "3:07:21", "remaining_time": "0:18:51", "throughput": 19868.6, "total_tokens": 223357632} +{"current_steps": 70970, "total_steps": 78105, "loss": 0.1414, "lr": 1.2606621556381064e-07, "epoch": 4.543243070225978, "percentage": 90.86, "elapsed_time": "3:07:22", "remaining_time": "0:18:50", "throughput": 19868.79, "total_tokens": 223373056} +{"current_steps": 70975, "total_steps": 78105, "loss": 0.1308, "lr": 1.2589111247528012e-07, "epoch": 4.543563152166954, "percentage": 90.87, "elapsed_time": "3:07:23", "remaining_time": "0:18:49", "throughput": 19869.04, "total_tokens": 223389952} +{"current_steps": 70980, "total_steps": 78105, "loss": 0.1504, "lr": 1.2571612793738142e-07, "epoch": 4.543883234107931, "percentage": 90.88, "elapsed_time": "3:07:23", "remaining_time": "0:18:48", "throughput": 19869.23, "total_tokens": 223405440} +{"current_steps": 70985, "total_steps": 78105, "loss": 0.1406, "lr": 1.255412619588517e-07, "epoch": 4.544203316048908, "percentage": 90.88, "elapsed_time": "3:07:24", "remaining_time": "0:18:47", "throughput": 19869.43, "total_tokens": 223420928} +{"current_steps": 70990, "total_steps": 78105, "loss": 0.1182, "lr": 1.2536651454842368e-07, "epoch": 4.544523397989885, "percentage": 90.89, "elapsed_time": "3:07:25", "remaining_time": "0:18:47", "throughput": 19869.62, "total_tokens": 223436288} +{"current_steps": 70995, "total_steps": 78105, "loss": 0.117, "lr": 1.251918857148224e-07, "epoch": 4.544843479930862, "percentage": 90.9, "elapsed_time": "3:07:25", "remaining_time": "0:18:46", "throughput": 19869.82, "total_tokens": 223451968} +{"current_steps": 71000, "total_steps": 78105, "loss": 0.1489, "lr": 1.2501737546676912e-07, "epoch": 4.545163561871839, "percentage": 90.9, "elapsed_time": "3:07:26", "remaining_time": "0:18:45", "throughput": 19870.03, "total_tokens": 223468096} +{"current_steps": 71005, "total_steps": 78105, "loss": 0.0986, "lr": 1.2484298381297666e-07, "epoch": 4.545483643812816, "percentage": 90.91, "elapsed_time": "3:07:27", "remaining_time": "0:18:44", "throughput": 19870.24, "total_tokens": 223484032} +{"current_steps": 71010, "total_steps": 78105, "loss": 0.1461, "lr": 1.2466871076215415e-07, "epoch": 4.545803725753793, "percentage": 90.92, "elapsed_time": "3:07:27", "remaining_time": "0:18:43", "throughput": 19870.44, "total_tokens": 223499136} +{"current_steps": 71015, "total_steps": 78105, "loss": 0.1309, "lr": 1.244945563230035e-07, "epoch": 4.546123807694769, "percentage": 90.92, "elapsed_time": "3:07:28", "remaining_time": "0:18:43", "throughput": 19870.65, "total_tokens": 223515328} +{"current_steps": 71020, "total_steps": 78105, "loss": 0.0979, "lr": 1.2432052050422084e-07, "epoch": 4.546443889635746, "percentage": 90.93, "elapsed_time": "3:07:29", "remaining_time": "0:18:42", "throughput": 19870.84, "total_tokens": 223530880} +{"current_steps": 71025, "total_steps": 78105, "loss": 0.1408, "lr": 1.241466033144975e-07, "epoch": 4.546763971576723, "percentage": 90.94, "elapsed_time": "3:07:29", "remaining_time": "0:18:41", "throughput": 19871.01, "total_tokens": 223545728} +{"current_steps": 71030, "total_steps": 78105, "loss": 0.2186, "lr": 1.239728047625169e-07, "epoch": 4.5470840535177, "percentage": 90.94, "elapsed_time": "3:07:30", "remaining_time": "0:18:40", "throughput": 19871.21, "total_tokens": 223561600} +{"current_steps": 71035, "total_steps": 78105, "loss": 0.1206, "lr": 1.2379912485695838e-07, "epoch": 4.547404135458677, "percentage": 90.95, "elapsed_time": "3:07:31", "remaining_time": "0:18:39", "throughput": 19871.37, "total_tokens": 223576000} +{"current_steps": 71040, "total_steps": 78105, "loss": 0.1115, "lr": 1.2362556360649452e-07, "epoch": 4.547724217399654, "percentage": 90.95, "elapsed_time": "3:07:31", "remaining_time": "0:18:39", "throughput": 19871.56, "total_tokens": 223591168} +{"current_steps": 71045, "total_steps": 78105, "loss": 0.1072, "lr": 1.2345212101979199e-07, "epoch": 4.548044299340631, "percentage": 90.96, "elapsed_time": "3:07:32", "remaining_time": "0:18:38", "throughput": 19871.78, "total_tokens": 223607168} +{"current_steps": 71050, "total_steps": 78105, "loss": 0.1287, "lr": 1.2327879710551133e-07, "epoch": 4.548364381281608, "percentage": 90.97, "elapsed_time": "3:07:33", "remaining_time": "0:18:37", "throughput": 19871.96, "total_tokens": 223622400} +{"current_steps": 71055, "total_steps": 78105, "loss": 0.1505, "lr": 1.2310559187230787e-07, "epoch": 4.548684463222585, "percentage": 90.97, "elapsed_time": "3:07:33", "remaining_time": "0:18:36", "throughput": 19872.15, "total_tokens": 223637696} +{"current_steps": 71060, "total_steps": 78105, "loss": 0.1425, "lr": 1.2293250532883022e-07, "epoch": 4.549004545163562, "percentage": 90.98, "elapsed_time": "3:07:34", "remaining_time": "0:18:35", "throughput": 19872.34, "total_tokens": 223653248} +{"current_steps": 71065, "total_steps": 78105, "loss": 0.1391, "lr": 1.227595374837215e-07, "epoch": 4.549324627104538, "percentage": 90.99, "elapsed_time": "3:07:35", "remaining_time": "0:18:34", "throughput": 19872.55, "total_tokens": 223669248} +{"current_steps": 71070, "total_steps": 78105, "loss": 0.12, "lr": 1.2258668834561866e-07, "epoch": 4.549644709045515, "percentage": 90.99, "elapsed_time": "3:07:35", "remaining_time": "0:18:34", "throughput": 19872.79, "total_tokens": 223685568} +{"current_steps": 71075, "total_steps": 78105, "loss": 0.1335, "lr": 1.2241395792315342e-07, "epoch": 4.549964790986492, "percentage": 91.0, "elapsed_time": "3:07:36", "remaining_time": "0:18:33", "throughput": 19872.98, "total_tokens": 223701184} +{"current_steps": 71080, "total_steps": 78105, "loss": 0.1077, "lr": 1.2224134622495083e-07, "epoch": 4.550284872927469, "percentage": 91.01, "elapsed_time": "3:07:37", "remaining_time": "0:18:32", "throughput": 19873.21, "total_tokens": 223717632} +{"current_steps": 71085, "total_steps": 78105, "loss": 0.1536, "lr": 1.2206885325963008e-07, "epoch": 4.550604954868446, "percentage": 91.01, "elapsed_time": "3:07:37", "remaining_time": "0:18:31", "throughput": 19873.39, "total_tokens": 223732928} +{"current_steps": 71090, "total_steps": 78105, "loss": 0.1241, "lr": 1.218964790358043e-07, "epoch": 4.550925036809423, "percentage": 91.02, "elapsed_time": "3:07:38", "remaining_time": "0:18:30", "throughput": 19873.64, "total_tokens": 223749760} +{"current_steps": 71095, "total_steps": 78105, "loss": 0.1209, "lr": 1.2172422356208164e-07, "epoch": 4.5512451187504, "percentage": 91.02, "elapsed_time": "3:07:39", "remaining_time": "0:18:30", "throughput": 19873.85, "total_tokens": 223765568} +{"current_steps": 71100, "total_steps": 78105, "loss": 0.1231, "lr": 1.215520868470632e-07, "epoch": 4.551565200691377, "percentage": 91.03, "elapsed_time": "3:07:39", "remaining_time": "0:18:29", "throughput": 19874.06, "total_tokens": 223781568} +{"current_steps": 71105, "total_steps": 78105, "loss": 0.1464, "lr": 1.2138006889934411e-07, "epoch": 4.5518852826323535, "percentage": 91.04, "elapsed_time": "3:07:40", "remaining_time": "0:18:28", "throughput": 19874.24, "total_tokens": 223797184} +{"current_steps": 71110, "total_steps": 78105, "loss": 0.1143, "lr": 1.2120816972751553e-07, "epoch": 4.5522053645733305, "percentage": 91.04, "elapsed_time": "3:07:41", "remaining_time": "0:18:27", "throughput": 19874.43, "total_tokens": 223812672} +{"current_steps": 71115, "total_steps": 78105, "loss": 0.1232, "lr": 1.210363893401595e-07, "epoch": 4.5525254465143075, "percentage": 91.05, "elapsed_time": "3:07:42", "remaining_time": "0:18:26", "throughput": 19874.64, "total_tokens": 223828608} +{"current_steps": 71120, "total_steps": 78105, "loss": 0.1691, "lr": 1.2086472774585495e-07, "epoch": 4.5528455284552845, "percentage": 91.06, "elapsed_time": "3:07:42", "remaining_time": "0:18:26", "throughput": 19874.81, "total_tokens": 223843776} +{"current_steps": 71125, "total_steps": 78105, "loss": 0.1783, "lr": 1.206931849531734e-07, "epoch": 4.553165610396261, "percentage": 91.06, "elapsed_time": "3:07:43", "remaining_time": "0:18:25", "throughput": 19875.02, "total_tokens": 223859776} +{"current_steps": 71130, "total_steps": 78105, "loss": 0.1281, "lr": 1.2052176097068075e-07, "epoch": 4.553485692337238, "percentage": 91.07, "elapsed_time": "3:07:44", "remaining_time": "0:18:24", "throughput": 19875.21, "total_tokens": 223875264} +{"current_steps": 71135, "total_steps": 78105, "loss": 0.1319, "lr": 1.2035045580693737e-07, "epoch": 4.553805774278215, "percentage": 91.08, "elapsed_time": "3:07:44", "remaining_time": "0:18:23", "throughput": 19875.43, "total_tokens": 223891264} +{"current_steps": 71140, "total_steps": 78105, "loss": 0.177, "lr": 1.2017926947049647e-07, "epoch": 4.554125856219192, "percentage": 91.08, "elapsed_time": "3:07:45", "remaining_time": "0:18:22", "throughput": 19875.62, "total_tokens": 223906944} +{"current_steps": 71145, "total_steps": 78105, "loss": 0.0987, "lr": 1.2000820196990782e-07, "epoch": 4.554445938160169, "percentage": 91.09, "elapsed_time": "3:07:46", "remaining_time": "0:18:22", "throughput": 19875.79, "total_tokens": 223921728} +{"current_steps": 71150, "total_steps": 78105, "loss": 0.1643, "lr": 1.1983725331371182e-07, "epoch": 4.5547660201011455, "percentage": 91.1, "elapsed_time": "3:07:46", "remaining_time": "0:18:21", "throughput": 19876.0, "total_tokens": 223937664} +{"current_steps": 71155, "total_steps": 78105, "loss": 0.1039, "lr": 1.1966642351044583e-07, "epoch": 4.5550861020421225, "percentage": 91.1, "elapsed_time": "3:07:47", "remaining_time": "0:18:20", "throughput": 19876.23, "total_tokens": 223954368} +{"current_steps": 71160, "total_steps": 78105, "loss": 0.1078, "lr": 1.1949571256863997e-07, "epoch": 4.5554061839830995, "percentage": 91.11, "elapsed_time": "3:07:48", "remaining_time": "0:18:19", "throughput": 19876.42, "total_tokens": 223969728} +{"current_steps": 71165, "total_steps": 78105, "loss": 0.1415, "lr": 1.193251204968185e-07, "epoch": 4.5557262659240765, "percentage": 91.11, "elapsed_time": "3:07:48", "remaining_time": "0:18:18", "throughput": 19876.61, "total_tokens": 223985216} +{"current_steps": 71170, "total_steps": 78105, "loss": 0.1583, "lr": 1.1915464730350018e-07, "epoch": 4.5560463478650535, "percentage": 91.12, "elapsed_time": "3:07:49", "remaining_time": "0:18:18", "throughput": 19876.78, "total_tokens": 224000192} +{"current_steps": 71175, "total_steps": 78105, "loss": 0.0869, "lr": 1.1898429299719739e-07, "epoch": 4.5563664298060305, "percentage": 91.13, "elapsed_time": "3:07:50", "remaining_time": "0:18:17", "throughput": 19877.02, "total_tokens": 224017088} +{"current_steps": 71180, "total_steps": 78105, "loss": 0.1612, "lr": 1.1881405758641662e-07, "epoch": 4.5566865117470075, "percentage": 91.13, "elapsed_time": "3:07:50", "remaining_time": "0:18:16", "throughput": 19877.2, "total_tokens": 224032320} +{"current_steps": 71185, "total_steps": 78105, "loss": 0.1646, "lr": 1.1864394107965916e-07, "epoch": 4.557006593687984, "percentage": 91.14, "elapsed_time": "3:07:51", "remaining_time": "0:18:15", "throughput": 19877.39, "total_tokens": 224047872} +{"current_steps": 71190, "total_steps": 78105, "loss": 0.1223, "lr": 1.1847394348541846e-07, "epoch": 4.5573266756289605, "percentage": 91.15, "elapsed_time": "3:07:52", "remaining_time": "0:18:14", "throughput": 19877.61, "total_tokens": 224064064} +{"current_steps": 71195, "total_steps": 78105, "loss": 0.1125, "lr": 1.1830406481218443e-07, "epoch": 4.5576467575699375, "percentage": 91.15, "elapsed_time": "3:07:52", "remaining_time": "0:18:14", "throughput": 19877.77, "total_tokens": 224078528} +{"current_steps": 71200, "total_steps": 78105, "loss": 0.1145, "lr": 1.181343050684397e-07, "epoch": 4.5579668395109145, "percentage": 91.16, "elapsed_time": "3:07:53", "remaining_time": "0:18:13", "throughput": 19877.95, "total_tokens": 224093696} +{"current_steps": 71205, "total_steps": 78105, "loss": 0.0946, "lr": 1.1796466426266112e-07, "epoch": 4.5582869214518915, "percentage": 91.17, "elapsed_time": "3:07:54", "remaining_time": "0:18:12", "throughput": 19878.16, "total_tokens": 224109184} +{"current_steps": 71210, "total_steps": 78105, "loss": 0.104, "lr": 1.1779514240331969e-07, "epoch": 4.5586070033928685, "percentage": 91.17, "elapsed_time": "3:07:54", "remaining_time": "0:18:11", "throughput": 19878.32, "total_tokens": 224124160} +{"current_steps": 71215, "total_steps": 78105, "loss": 0.1091, "lr": 1.1762573949888002e-07, "epoch": 4.5589270853338455, "percentage": 91.18, "elapsed_time": "3:07:55", "remaining_time": "0:18:10", "throughput": 19878.59, "total_tokens": 224141568} +{"current_steps": 71220, "total_steps": 78105, "loss": 0.0917, "lr": 1.1745645555780227e-07, "epoch": 4.5592471672748225, "percentage": 91.18, "elapsed_time": "3:07:56", "remaining_time": "0:18:10", "throughput": 19878.77, "total_tokens": 224156992} +{"current_steps": 71225, "total_steps": 78105, "loss": 0.1227, "lr": 1.1728729058853806e-07, "epoch": 4.5595672492157995, "percentage": 91.19, "elapsed_time": "3:07:56", "remaining_time": "0:18:09", "throughput": 19878.94, "total_tokens": 224171840} +{"current_steps": 71230, "total_steps": 78105, "loss": 0.1402, "lr": 1.1711824459953641e-07, "epoch": 4.5598873311567765, "percentage": 91.2, "elapsed_time": "3:07:57", "remaining_time": "0:18:08", "throughput": 19879.18, "total_tokens": 224188544} +{"current_steps": 71235, "total_steps": 78105, "loss": 0.14, "lr": 1.1694931759923672e-07, "epoch": 4.5602074130977535, "percentage": 91.2, "elapsed_time": "3:07:58", "remaining_time": "0:18:07", "throughput": 19879.37, "total_tokens": 224203648} +{"current_steps": 71240, "total_steps": 78105, "loss": 0.1254, "lr": 1.1678050959607584e-07, "epoch": 4.56052749503873, "percentage": 91.21, "elapsed_time": "3:07:58", "remaining_time": "0:18:06", "throughput": 19879.56, "total_tokens": 224219328} +{"current_steps": 71245, "total_steps": 78105, "loss": 0.1012, "lr": 1.1661182059848231e-07, "epoch": 4.5608475769797066, "percentage": 91.22, "elapsed_time": "3:07:59", "remaining_time": "0:18:06", "throughput": 19879.79, "total_tokens": 224235712} +{"current_steps": 71250, "total_steps": 78105, "loss": 0.113, "lr": 1.1644325061487965e-07, "epoch": 4.5611676589206835, "percentage": 91.22, "elapsed_time": "3:08:00", "remaining_time": "0:18:05", "throughput": 19880.01, "total_tokens": 224252480} +{"current_steps": 71255, "total_steps": 78105, "loss": 0.1115, "lr": 1.1627479965368615e-07, "epoch": 4.5614877408616605, "percentage": 91.23, "elapsed_time": "3:08:00", "remaining_time": "0:18:04", "throughput": 19880.24, "total_tokens": 224268800} +{"current_steps": 71260, "total_steps": 78105, "loss": 0.0969, "lr": 1.161064677233123e-07, "epoch": 4.5618078228026375, "percentage": 91.24, "elapsed_time": "3:08:01", "remaining_time": "0:18:03", "throughput": 19880.45, "total_tokens": 224285120} +{"current_steps": 71265, "total_steps": 78105, "loss": 0.1204, "lr": 1.1593825483216442e-07, "epoch": 4.5621279047436145, "percentage": 91.24, "elapsed_time": "3:08:02", "remaining_time": "0:18:02", "throughput": 19880.61, "total_tokens": 224299776} +{"current_steps": 71270, "total_steps": 78105, "loss": 0.1214, "lr": 1.1577016098864191e-07, "epoch": 4.5624479866845915, "percentage": 91.25, "elapsed_time": "3:08:03", "remaining_time": "0:18:02", "throughput": 19880.82, "total_tokens": 224315968} +{"current_steps": 71275, "total_steps": 78105, "loss": 0.1247, "lr": 1.156021862011386e-07, "epoch": 4.5627680686255685, "percentage": 91.26, "elapsed_time": "3:08:03", "remaining_time": "0:18:01", "throughput": 19881.07, "total_tokens": 224333120} +{"current_steps": 71280, "total_steps": 78105, "loss": 0.1869, "lr": 1.1543433047804225e-07, "epoch": 4.563088150566545, "percentage": 91.26, "elapsed_time": "3:08:04", "remaining_time": "0:18:00", "throughput": 19881.25, "total_tokens": 224348352} +{"current_steps": 71285, "total_steps": 78105, "loss": 0.1544, "lr": 1.1526659382773475e-07, "epoch": 4.563408232507522, "percentage": 91.27, "elapsed_time": "3:08:05", "remaining_time": "0:17:59", "throughput": 19881.46, "total_tokens": 224364416} +{"current_steps": 71290, "total_steps": 78105, "loss": 0.0978, "lr": 1.150989762585919e-07, "epoch": 4.563728314448499, "percentage": 91.27, "elapsed_time": "3:08:05", "remaining_time": "0:17:58", "throughput": 19881.64, "total_tokens": 224379520} +{"current_steps": 71295, "total_steps": 78105, "loss": 0.1321, "lr": 1.1493147777898367e-07, "epoch": 4.564048396389476, "percentage": 91.28, "elapsed_time": "3:08:06", "remaining_time": "0:17:58", "throughput": 19881.82, "total_tokens": 224394368} +{"current_steps": 71300, "total_steps": 78105, "loss": 0.1334, "lr": 1.1476409839727365e-07, "epoch": 4.564368478330453, "percentage": 91.29, "elapsed_time": "3:08:07", "remaining_time": "0:17:57", "throughput": 19882.03, "total_tokens": 224410304} +{"current_steps": 71305, "total_steps": 78105, "loss": 0.1488, "lr": 1.1459683812182099e-07, "epoch": 4.5646885602714296, "percentage": 91.29, "elapsed_time": "3:08:07", "remaining_time": "0:17:56", "throughput": 19882.29, "total_tokens": 224427328} +{"current_steps": 71310, "total_steps": 78105, "loss": 0.1515, "lr": 1.144296969609765e-07, "epoch": 4.5650086422124065, "percentage": 91.3, "elapsed_time": "3:08:08", "remaining_time": "0:17:55", "throughput": 19882.49, "total_tokens": 224443328} +{"current_steps": 71315, "total_steps": 78105, "loss": 0.0841, "lr": 1.142626749230874e-07, "epoch": 4.5653287241533835, "percentage": 91.31, "elapsed_time": "3:08:09", "remaining_time": "0:17:54", "throughput": 19882.71, "total_tokens": 224459776} +{"current_steps": 71320, "total_steps": 78105, "loss": 0.1807, "lr": 1.1409577201649341e-07, "epoch": 4.5656488060943605, "percentage": 91.31, "elapsed_time": "3:08:09", "remaining_time": "0:17:54", "throughput": 19882.91, "total_tokens": 224474944} +{"current_steps": 71325, "total_steps": 78105, "loss": 0.1202, "lr": 1.1392898824952869e-07, "epoch": 4.5659688880353375, "percentage": 91.32, "elapsed_time": "3:08:10", "remaining_time": "0:17:53", "throughput": 19883.09, "total_tokens": 224490560} +{"current_steps": 71330, "total_steps": 78105, "loss": 0.1203, "lr": 1.1376232363052187e-07, "epoch": 4.566288969976314, "percentage": 91.33, "elapsed_time": "3:08:11", "remaining_time": "0:17:52", "throughput": 19883.26, "total_tokens": 224505280} +{"current_steps": 71335, "total_steps": 78105, "loss": 0.1206, "lr": 1.1359577816779488e-07, "epoch": 4.566609051917291, "percentage": 91.33, "elapsed_time": "3:08:11", "remaining_time": "0:17:51", "throughput": 19883.45, "total_tokens": 224520768} +{"current_steps": 71340, "total_steps": 78105, "loss": 0.1084, "lr": 1.1342935186966497e-07, "epoch": 4.566929133858268, "percentage": 91.34, "elapsed_time": "3:08:12", "remaining_time": "0:17:50", "throughput": 19883.66, "total_tokens": 224536960} +{"current_steps": 71345, "total_steps": 78105, "loss": 0.1134, "lr": 1.1326304474444134e-07, "epoch": 4.567249215799245, "percentage": 91.34, "elapsed_time": "3:08:13", "remaining_time": "0:17:50", "throughput": 19883.89, "total_tokens": 224553792} +{"current_steps": 71350, "total_steps": 78105, "loss": 0.1283, "lr": 1.1309685680042953e-07, "epoch": 4.567569297740222, "percentage": 91.35, "elapsed_time": "3:08:13", "remaining_time": "0:17:49", "throughput": 19884.08, "total_tokens": 224569280} +{"current_steps": 71355, "total_steps": 78105, "loss": 0.1325, "lr": 1.1293078804592767e-07, "epoch": 4.567889379681199, "percentage": 91.36, "elapsed_time": "3:08:14", "remaining_time": "0:17:48", "throughput": 19884.24, "total_tokens": 224583936} +{"current_steps": 71360, "total_steps": 78105, "loss": 0.1016, "lr": 1.1276483848922853e-07, "epoch": 4.568209461622176, "percentage": 91.36, "elapsed_time": "3:08:15", "remaining_time": "0:17:47", "throughput": 19884.46, "total_tokens": 224600192} +{"current_steps": 71365, "total_steps": 78105, "loss": 0.1563, "lr": 1.1259900813861885e-07, "epoch": 4.5685295435631526, "percentage": 91.37, "elapsed_time": "3:08:15", "remaining_time": "0:17:46", "throughput": 19884.63, "total_tokens": 224615424} +{"current_steps": 71370, "total_steps": 78105, "loss": 0.1742, "lr": 1.1243329700237864e-07, "epoch": 4.568849625504129, "percentage": 91.38, "elapsed_time": "3:08:16", "remaining_time": "0:17:46", "throughput": 19884.86, "total_tokens": 224631616} +{"current_steps": 71375, "total_steps": 78105, "loss": 0.1483, "lr": 1.122677050887841e-07, "epoch": 4.569169707445106, "percentage": 91.38, "elapsed_time": "3:08:17", "remaining_time": "0:17:45", "throughput": 19885.08, "total_tokens": 224647936} +{"current_steps": 71380, "total_steps": 78105, "loss": 0.1854, "lr": 1.1210223240610218e-07, "epoch": 4.569489789386083, "percentage": 91.39, "elapsed_time": "3:08:17", "remaining_time": "0:17:44", "throughput": 19885.23, "total_tokens": 224662208} +{"current_steps": 71385, "total_steps": 78105, "loss": 0.1174, "lr": 1.1193687896259742e-07, "epoch": 4.56980987132706, "percentage": 91.4, "elapsed_time": "3:08:18", "remaining_time": "0:17:43", "throughput": 19885.46, "total_tokens": 224678720} +{"current_steps": 71390, "total_steps": 78105, "loss": 0.143, "lr": 1.1177164476652569e-07, "epoch": 4.570129953268037, "percentage": 91.4, "elapsed_time": "3:08:19", "remaining_time": "0:17:42", "throughput": 19885.63, "total_tokens": 224693824} +{"current_steps": 71395, "total_steps": 78105, "loss": 0.1392, "lr": 1.1160652982613818e-07, "epoch": 4.570450035209014, "percentage": 91.41, "elapsed_time": "3:08:19", "remaining_time": "0:17:42", "throughput": 19885.82, "total_tokens": 224709120} +{"current_steps": 71400, "total_steps": 78105, "loss": 0.1682, "lr": 1.1144153414967967e-07, "epoch": 4.570770117149991, "percentage": 91.42, "elapsed_time": "3:08:20", "remaining_time": "0:17:41", "throughput": 19885.99, "total_tokens": 224724288} +{"current_steps": 71405, "total_steps": 78105, "loss": 0.1275, "lr": 1.112766577453897e-07, "epoch": 4.571090199090968, "percentage": 91.42, "elapsed_time": "3:08:21", "remaining_time": "0:17:40", "throughput": 19886.17, "total_tokens": 224739584} +{"current_steps": 71410, "total_steps": 78105, "loss": 0.1433, "lr": 1.1111190062150057e-07, "epoch": 4.571410281031945, "percentage": 91.43, "elapsed_time": "3:08:21", "remaining_time": "0:17:39", "throughput": 19886.38, "total_tokens": 224755136} +{"current_steps": 71415, "total_steps": 78105, "loss": 0.1184, "lr": 1.1094726278624068e-07, "epoch": 4.571730362972921, "percentage": 91.43, "elapsed_time": "3:08:22", "remaining_time": "0:17:38", "throughput": 19886.59, "total_tokens": 224771008} +{"current_steps": 71420, "total_steps": 78105, "loss": 0.1282, "lr": 1.1078274424782959e-07, "epoch": 4.572050444913898, "percentage": 91.44, "elapsed_time": "3:08:23", "remaining_time": "0:17:38", "throughput": 19886.76, "total_tokens": 224786176} +{"current_steps": 71425, "total_steps": 78105, "loss": 0.1075, "lr": 1.1061834501448376e-07, "epoch": 4.572370526854875, "percentage": 91.45, "elapsed_time": "3:08:23", "remaining_time": "0:17:37", "throughput": 19886.95, "total_tokens": 224801600} +{"current_steps": 71430, "total_steps": 78105, "loss": 0.0999, "lr": 1.1045406509441164e-07, "epoch": 4.572690608795852, "percentage": 91.45, "elapsed_time": "3:08:24", "remaining_time": "0:17:36", "throughput": 19887.12, "total_tokens": 224816576} +{"current_steps": 71435, "total_steps": 78105, "loss": 0.1159, "lr": 1.1028990449581694e-07, "epoch": 4.573010690736829, "percentage": 91.46, "elapsed_time": "3:08:25", "remaining_time": "0:17:35", "throughput": 19887.29, "total_tokens": 224831104} +{"current_steps": 71440, "total_steps": 78105, "loss": 0.1193, "lr": 1.10125863226897e-07, "epoch": 4.573330772677806, "percentage": 91.47, "elapsed_time": "3:08:25", "remaining_time": "0:17:34", "throughput": 19887.43, "total_tokens": 224845376} +{"current_steps": 71445, "total_steps": 78105, "loss": 0.1172, "lr": 1.0996194129584276e-07, "epoch": 4.573650854618783, "percentage": 91.47, "elapsed_time": "3:08:26", "remaining_time": "0:17:33", "throughput": 19887.62, "total_tokens": 224860992} +{"current_steps": 71450, "total_steps": 78105, "loss": 0.1335, "lr": 1.0979813871084044e-07, "epoch": 4.57397093655976, "percentage": 91.48, "elapsed_time": "3:08:27", "remaining_time": "0:17:33", "throughput": 19887.8, "total_tokens": 224876672} +{"current_steps": 71455, "total_steps": 78105, "loss": 0.1455, "lr": 1.0963445548006824e-07, "epoch": 4.574291018500736, "percentage": 91.49, "elapsed_time": "3:08:27", "remaining_time": "0:17:32", "throughput": 19888.0, "total_tokens": 224892608} +{"current_steps": 71460, "total_steps": 78105, "loss": 0.1401, "lr": 1.0947089161170099e-07, "epoch": 4.574611100441713, "percentage": 91.49, "elapsed_time": "3:08:28", "remaining_time": "0:17:31", "throughput": 19888.19, "total_tokens": 224907968} +{"current_steps": 71465, "total_steps": 78105, "loss": 0.0782, "lr": 1.0930744711390523e-07, "epoch": 4.57493118238269, "percentage": 91.5, "elapsed_time": "3:08:29", "remaining_time": "0:17:30", "throughput": 19888.35, "total_tokens": 224922496} +{"current_steps": 71470, "total_steps": 78105, "loss": 0.1133, "lr": 1.0914412199484303e-07, "epoch": 4.575251264323667, "percentage": 91.51, "elapsed_time": "3:08:29", "remaining_time": "0:17:29", "throughput": 19888.53, "total_tokens": 224937856} +{"current_steps": 71475, "total_steps": 78105, "loss": 0.1167, "lr": 1.0898091626266983e-07, "epoch": 4.575571346264644, "percentage": 91.51, "elapsed_time": "3:08:30", "remaining_time": "0:17:29", "throughput": 19888.74, "total_tokens": 224953600} +{"current_steps": 71480, "total_steps": 78105, "loss": 0.1403, "lr": 1.0881782992553546e-07, "epoch": 4.575891428205621, "percentage": 91.52, "elapsed_time": "3:08:31", "remaining_time": "0:17:28", "throughput": 19888.98, "total_tokens": 224970432} +{"current_steps": 71485, "total_steps": 78105, "loss": 0.1316, "lr": 1.0865486299158317e-07, "epoch": 4.576211510146598, "percentage": 91.52, "elapsed_time": "3:08:31", "remaining_time": "0:17:27", "throughput": 19889.18, "total_tokens": 224986304} +{"current_steps": 71490, "total_steps": 78105, "loss": 0.1415, "lr": 1.0849201546895089e-07, "epoch": 4.576531592087575, "percentage": 91.53, "elapsed_time": "3:08:32", "remaining_time": "0:17:26", "throughput": 19889.37, "total_tokens": 225001536} +{"current_steps": 71495, "total_steps": 78105, "loss": 0.2262, "lr": 1.083292873657707e-07, "epoch": 4.576851674028552, "percentage": 91.54, "elapsed_time": "3:08:33", "remaining_time": "0:17:25", "throughput": 19889.54, "total_tokens": 225016384} +{"current_steps": 71500, "total_steps": 78105, "loss": 0.1604, "lr": 1.0816667869016806e-07, "epoch": 4.577171755969529, "percentage": 91.54, "elapsed_time": "3:08:33", "remaining_time": "0:17:25", "throughput": 19889.71, "total_tokens": 225031296} +{"current_steps": 71505, "total_steps": 78105, "loss": 0.1623, "lr": 1.0800418945026314e-07, "epoch": 4.577491837910505, "percentage": 91.55, "elapsed_time": "3:08:34", "remaining_time": "0:17:24", "throughput": 19889.91, "total_tokens": 225047168} +{"current_steps": 71510, "total_steps": 78105, "loss": 0.14, "lr": 1.0784181965416918e-07, "epoch": 4.577811919851482, "percentage": 91.56, "elapsed_time": "3:08:35", "remaining_time": "0:17:23", "throughput": 19890.1, "total_tokens": 225062720} +{"current_steps": 71515, "total_steps": 78105, "loss": 0.1263, "lr": 1.0767956930999468e-07, "epoch": 4.578132001792459, "percentage": 91.56, "elapsed_time": "3:08:35", "remaining_time": "0:17:22", "throughput": 19890.31, "total_tokens": 225078720} +{"current_steps": 71520, "total_steps": 78105, "loss": 0.1385, "lr": 1.0751743842584123e-07, "epoch": 4.578452083733436, "percentage": 91.57, "elapsed_time": "3:08:36", "remaining_time": "0:17:21", "throughput": 19890.52, "total_tokens": 225094848} +{"current_steps": 71525, "total_steps": 78105, "loss": 0.1383, "lr": 1.0735542700980512e-07, "epoch": 4.578772165674413, "percentage": 91.58, "elapsed_time": "3:08:37", "remaining_time": "0:17:21", "throughput": 19890.72, "total_tokens": 225110528} +{"current_steps": 71530, "total_steps": 78105, "loss": 0.1028, "lr": 1.071935350699757e-07, "epoch": 4.57909224761539, "percentage": 91.58, "elapsed_time": "3:08:38", "remaining_time": "0:17:20", "throughput": 19890.89, "total_tokens": 225125504} +{"current_steps": 71535, "total_steps": 78105, "loss": 0.1912, "lr": 1.070317626144382e-07, "epoch": 4.579412329556367, "percentage": 91.59, "elapsed_time": "3:08:38", "remaining_time": "0:17:19", "throughput": 19891.08, "total_tokens": 225141312} +{"current_steps": 71540, "total_steps": 78105, "loss": 0.1729, "lr": 1.0687010965126921e-07, "epoch": 4.579732411497344, "percentage": 91.59, "elapsed_time": "3:08:39", "remaining_time": "0:17:18", "throughput": 19891.27, "total_tokens": 225156544} +{"current_steps": 71545, "total_steps": 78105, "loss": 0.0871, "lr": 1.0670857618854197e-07, "epoch": 4.58005249343832, "percentage": 91.6, "elapsed_time": "3:08:40", "remaining_time": "0:17:17", "throughput": 19891.48, "total_tokens": 225172800} +{"current_steps": 71550, "total_steps": 78105, "loss": 0.1186, "lr": 1.0654716223432199e-07, "epoch": 4.580372575379297, "percentage": 91.61, "elapsed_time": "3:08:40", "remaining_time": "0:17:17", "throughput": 19891.65, "total_tokens": 225187648} +{"current_steps": 71555, "total_steps": 78105, "loss": 0.112, "lr": 1.0638586779666976e-07, "epoch": 4.580692657320274, "percentage": 91.61, "elapsed_time": "3:08:41", "remaining_time": "0:17:16", "throughput": 19891.83, "total_tokens": 225202752} +{"current_steps": 71560, "total_steps": 78105, "loss": 0.1416, "lr": 1.0622469288363968e-07, "epoch": 4.581012739261251, "percentage": 91.62, "elapsed_time": "3:08:42", "remaining_time": "0:17:15", "throughput": 19892.03, "total_tokens": 225218240} +{"current_steps": 71565, "total_steps": 78105, "loss": 0.1091, "lr": 1.0606363750327947e-07, "epoch": 4.581332821202228, "percentage": 91.63, "elapsed_time": "3:08:42", "remaining_time": "0:17:14", "throughput": 19892.34, "total_tokens": 225236992} +{"current_steps": 71570, "total_steps": 78105, "loss": 0.1602, "lr": 1.0590270166363187e-07, "epoch": 4.581652903143205, "percentage": 91.63, "elapsed_time": "3:08:43", "remaining_time": "0:17:13", "throughput": 19892.52, "total_tokens": 225252416} +{"current_steps": 71575, "total_steps": 78105, "loss": 0.1033, "lr": 1.0574188537273267e-07, "epoch": 4.581972985084182, "percentage": 91.64, "elapsed_time": "3:08:44", "remaining_time": "0:17:13", "throughput": 19892.71, "total_tokens": 225268160} +{"current_steps": 71580, "total_steps": 78105, "loss": 0.1463, "lr": 1.0558118863861266e-07, "epoch": 4.582293067025159, "percentage": 91.65, "elapsed_time": "3:08:44", "remaining_time": "0:17:12", "throughput": 19892.88, "total_tokens": 225282944} +{"current_steps": 71585, "total_steps": 78105, "loss": 0.1086, "lr": 1.0542061146929599e-07, "epoch": 4.582613148966136, "percentage": 91.65, "elapsed_time": "3:08:45", "remaining_time": "0:17:11", "throughput": 19893.07, "total_tokens": 225298432} +{"current_steps": 71590, "total_steps": 78105, "loss": 0.1555, "lr": 1.0526015387280098e-07, "epoch": 4.582933230907113, "percentage": 91.66, "elapsed_time": "3:08:46", "remaining_time": "0:17:10", "throughput": 19893.26, "total_tokens": 225313984} +{"current_steps": 71595, "total_steps": 78105, "loss": 0.0699, "lr": 1.0509981585714035e-07, "epoch": 4.583253312848089, "percentage": 91.67, "elapsed_time": "3:08:46", "remaining_time": "0:17:09", "throughput": 19893.42, "total_tokens": 225328768} +{"current_steps": 71600, "total_steps": 78105, "loss": 0.1617, "lr": 1.0493959743031995e-07, "epoch": 4.583573394789066, "percentage": 91.67, "elapsed_time": "3:08:47", "remaining_time": "0:17:09", "throughput": 19893.6, "total_tokens": 225343808} +{"current_steps": 71605, "total_steps": 78105, "loss": 0.1486, "lr": 1.0477949860034115e-07, "epoch": 4.583893476730043, "percentage": 91.68, "elapsed_time": "3:08:48", "remaining_time": "0:17:08", "throughput": 19893.79, "total_tokens": 225359360} +{"current_steps": 71610, "total_steps": 78105, "loss": 0.1467, "lr": 1.0461951937519726e-07, "epoch": 4.58421355867102, "percentage": 91.68, "elapsed_time": "3:08:48", "remaining_time": "0:17:07", "throughput": 19893.97, "total_tokens": 225374848} +{"current_steps": 71615, "total_steps": 78105, "loss": 0.1638, "lr": 1.0445965976287803e-07, "epoch": 4.584533640611997, "percentage": 91.69, "elapsed_time": "3:08:49", "remaining_time": "0:17:06", "throughput": 19894.15, "total_tokens": 225390144} +{"current_steps": 71620, "total_steps": 78105, "loss": 0.1237, "lr": 1.0429991977136539e-07, "epoch": 4.584853722552974, "percentage": 91.7, "elapsed_time": "3:08:50", "remaining_time": "0:17:05", "throughput": 19894.31, "total_tokens": 225405056} +{"current_steps": 71625, "total_steps": 78105, "loss": 0.1357, "lr": 1.0414029940863601e-07, "epoch": 4.585173804493951, "percentage": 91.7, "elapsed_time": "3:08:50", "remaining_time": "0:17:05", "throughput": 19894.5, "total_tokens": 225420864} +{"current_steps": 71630, "total_steps": 78105, "loss": 0.1428, "lr": 1.0398079868266048e-07, "epoch": 4.585493886434928, "percentage": 91.71, "elapsed_time": "3:08:51", "remaining_time": "0:17:04", "throughput": 19894.78, "total_tokens": 225439104} +{"current_steps": 71635, "total_steps": 78105, "loss": 0.1331, "lr": 1.0382141760140352e-07, "epoch": 4.585813968375904, "percentage": 91.72, "elapsed_time": "3:08:52", "remaining_time": "0:17:03", "throughput": 19895.0, "total_tokens": 225455104} +{"current_steps": 71640, "total_steps": 78105, "loss": 0.1329, "lr": 1.036621561728235e-07, "epoch": 4.586134050316881, "percentage": 91.72, "elapsed_time": "3:08:52", "remaining_time": "0:17:02", "throughput": 19895.15, "total_tokens": 225469440} +{"current_steps": 71645, "total_steps": 78105, "loss": 0.1366, "lr": 1.0350301440487376e-07, "epoch": 4.586454132257858, "percentage": 91.73, "elapsed_time": "3:08:53", "remaining_time": "0:17:01", "throughput": 19895.33, "total_tokens": 225484800} +{"current_steps": 71650, "total_steps": 78105, "loss": 0.1327, "lr": 1.033439923054999e-07, "epoch": 4.586774214198835, "percentage": 91.74, "elapsed_time": "3:08:54", "remaining_time": "0:17:01", "throughput": 19895.49, "total_tokens": 225499648} +{"current_steps": 71655, "total_steps": 78105, "loss": 0.111, "lr": 1.031850898826442e-07, "epoch": 4.587094296139812, "percentage": 91.74, "elapsed_time": "3:08:54", "remaining_time": "0:17:00", "throughput": 19895.68, "total_tokens": 225515008} +{"current_steps": 71660, "total_steps": 78105, "loss": 0.2199, "lr": 1.0302630714423972e-07, "epoch": 4.587414378080789, "percentage": 91.75, "elapsed_time": "3:08:55", "remaining_time": "0:16:59", "throughput": 19895.86, "total_tokens": 225530304} +{"current_steps": 71665, "total_steps": 78105, "loss": 0.0972, "lr": 1.0286764409821654e-07, "epoch": 4.587734460021766, "percentage": 91.75, "elapsed_time": "3:08:56", "remaining_time": "0:16:58", "throughput": 19896.04, "total_tokens": 225545664} +{"current_steps": 71670, "total_steps": 78105, "loss": 0.204, "lr": 1.027091007524969e-07, "epoch": 4.588054541962743, "percentage": 91.76, "elapsed_time": "3:08:56", "remaining_time": "0:16:57", "throughput": 19896.22, "total_tokens": 225561088} +{"current_steps": 71675, "total_steps": 78105, "loss": 0.1343, "lr": 1.0255067711499756e-07, "epoch": 4.58837462390372, "percentage": 91.77, "elapsed_time": "3:08:57", "remaining_time": "0:16:57", "throughput": 19896.44, "total_tokens": 225577216} +{"current_steps": 71680, "total_steps": 78105, "loss": 0.1733, "lr": 1.0239237319363021e-07, "epoch": 4.588694705844697, "percentage": 91.77, "elapsed_time": "3:08:58", "remaining_time": "0:16:56", "throughput": 19896.62, "total_tokens": 225592512} +{"current_steps": 71685, "total_steps": 78105, "loss": 0.1738, "lr": 1.0223418899629828e-07, "epoch": 4.589014787785673, "percentage": 91.78, "elapsed_time": "3:08:58", "remaining_time": "0:16:55", "throughput": 19896.82, "total_tokens": 225608320} +{"current_steps": 71690, "total_steps": 78105, "loss": 0.143, "lr": 1.0207612453090182e-07, "epoch": 4.58933486972665, "percentage": 91.79, "elapsed_time": "3:08:59", "remaining_time": "0:16:54", "throughput": 19897.03, "total_tokens": 225624128} +{"current_steps": 71695, "total_steps": 78105, "loss": 0.0987, "lr": 1.0191817980533315e-07, "epoch": 4.589654951667627, "percentage": 91.79, "elapsed_time": "3:09:00", "remaining_time": "0:16:53", "throughput": 19897.23, "total_tokens": 225640320} +{"current_steps": 71700, "total_steps": 78105, "loss": 0.1262, "lr": 1.0176035482747981e-07, "epoch": 4.589975033608604, "percentage": 91.8, "elapsed_time": "3:09:00", "remaining_time": "0:16:53", "throughput": 19897.42, "total_tokens": 225655808} +{"current_steps": 71705, "total_steps": 78105, "loss": 0.1595, "lr": 1.016026496052222e-07, "epoch": 4.590295115549581, "percentage": 91.81, "elapsed_time": "3:09:01", "remaining_time": "0:16:52", "throughput": 19897.59, "total_tokens": 225670912} +{"current_steps": 71710, "total_steps": 78105, "loss": 0.1142, "lr": 1.014450641464354e-07, "epoch": 4.590615197490558, "percentage": 91.81, "elapsed_time": "3:09:02", "remaining_time": "0:16:51", "throughput": 19897.8, "total_tokens": 225687104} +{"current_steps": 71715, "total_steps": 78105, "loss": 0.1299, "lr": 1.0128759845898838e-07, "epoch": 4.590935279431535, "percentage": 91.82, "elapsed_time": "3:09:02", "remaining_time": "0:16:50", "throughput": 19897.97, "total_tokens": 225702080} +{"current_steps": 71720, "total_steps": 78105, "loss": 0.0972, "lr": 1.0113025255074432e-07, "epoch": 4.591255361372511, "percentage": 91.83, "elapsed_time": "3:09:03", "remaining_time": "0:16:49", "throughput": 19898.14, "total_tokens": 225717056} +{"current_steps": 71725, "total_steps": 78105, "loss": 0.1374, "lr": 1.0097302642956025e-07, "epoch": 4.591575443313488, "percentage": 91.83, "elapsed_time": "3:09:04", "remaining_time": "0:16:49", "throughput": 19898.31, "total_tokens": 225732224} +{"current_steps": 71730, "total_steps": 78105, "loss": 0.1285, "lr": 1.0081592010328711e-07, "epoch": 4.591895525254465, "percentage": 91.84, "elapsed_time": "3:09:04", "remaining_time": "0:16:48", "throughput": 19898.52, "total_tokens": 225748352} +{"current_steps": 71735, "total_steps": 78105, "loss": 0.118, "lr": 1.006589335797703e-07, "epoch": 4.592215607195442, "percentage": 91.84, "elapsed_time": "3:09:05", "remaining_time": "0:16:47", "throughput": 19898.72, "total_tokens": 225763840} +{"current_steps": 71740, "total_steps": 78105, "loss": 0.1691, "lr": 1.0050206686684827e-07, "epoch": 4.592535689136419, "percentage": 91.85, "elapsed_time": "3:09:06", "remaining_time": "0:16:46", "throughput": 19898.91, "total_tokens": 225779200} +{"current_steps": 71745, "total_steps": 78105, "loss": 0.1638, "lr": 1.0034531997235475e-07, "epoch": 4.592855771077396, "percentage": 91.86, "elapsed_time": "3:09:06", "remaining_time": "0:16:45", "throughput": 19899.09, "total_tokens": 225794496} +{"current_steps": 71750, "total_steps": 78105, "loss": 0.1313, "lr": 1.0018869290411654e-07, "epoch": 4.593175853018373, "percentage": 91.86, "elapsed_time": "3:09:07", "remaining_time": "0:16:45", "throughput": 19899.35, "total_tokens": 225811904} +{"current_steps": 71755, "total_steps": 78105, "loss": 0.1251, "lr": 1.0003218566995487e-07, "epoch": 4.59349593495935, "percentage": 91.87, "elapsed_time": "3:09:08", "remaining_time": "0:16:44", "throughput": 19899.59, "total_tokens": 225828288} +{"current_steps": 71760, "total_steps": 78105, "loss": 0.1297, "lr": 9.987579827768462e-08, "epoch": 4.593816016900327, "percentage": 91.88, "elapsed_time": "3:09:09", "remaining_time": "0:16:43", "throughput": 19899.78, "total_tokens": 225843904} +{"current_steps": 71765, "total_steps": 78105, "loss": 0.1375, "lr": 9.971953073511565e-08, "epoch": 4.594136098841304, "percentage": 91.88, "elapsed_time": "3:09:09", "remaining_time": "0:16:42", "throughput": 19899.99, "total_tokens": 225860032} +{"current_steps": 71770, "total_steps": 78105, "loss": 0.0914, "lr": 9.95633830500503e-08, "epoch": 4.59445618078228, "percentage": 91.89, "elapsed_time": "3:09:10", "remaining_time": "0:16:41", "throughput": 19900.16, "total_tokens": 225875072} +{"current_steps": 71775, "total_steps": 78105, "loss": 0.1657, "lr": 9.940735523028628e-08, "epoch": 4.594776262723257, "percentage": 91.9, "elapsed_time": "3:09:11", "remaining_time": "0:16:41", "throughput": 19900.37, "total_tokens": 225891392} +{"current_steps": 71780, "total_steps": 78105, "loss": 0.1509, "lr": 9.92514472836148e-08, "epoch": 4.595096344664234, "percentage": 91.9, "elapsed_time": "3:09:11", "remaining_time": "0:16:40", "throughput": 19900.54, "total_tokens": 225906176} +{"current_steps": 71785, "total_steps": 78105, "loss": 0.1462, "lr": 9.909565921782077e-08, "epoch": 4.595416426605211, "percentage": 91.91, "elapsed_time": "3:09:12", "remaining_time": "0:16:39", "throughput": 19900.81, "total_tokens": 225923520} +{"current_steps": 71790, "total_steps": 78105, "loss": 0.1051, "lr": 9.893999104068408e-08, "epoch": 4.595736508546188, "percentage": 91.91, "elapsed_time": "3:09:13", "remaining_time": "0:16:38", "throughput": 19901.04, "total_tokens": 225939968} +{"current_steps": 71795, "total_steps": 78105, "loss": 0.1622, "lr": 9.878444275997712e-08, "epoch": 4.596056590487165, "percentage": 91.92, "elapsed_time": "3:09:13", "remaining_time": "0:16:37", "throughput": 19901.2, "total_tokens": 225955136} +{"current_steps": 71800, "total_steps": 78105, "loss": 0.1444, "lr": 9.862901438346839e-08, "epoch": 4.596376672428142, "percentage": 91.93, "elapsed_time": "3:09:14", "remaining_time": "0:16:37", "throughput": 19901.38, "total_tokens": 225970432} +{"current_steps": 71805, "total_steps": 78105, "loss": 0.0974, "lr": 9.847370591891752e-08, "epoch": 4.596696754369119, "percentage": 91.93, "elapsed_time": "3:09:15", "remaining_time": "0:16:36", "throughput": 19901.6, "total_tokens": 225986816} +{"current_steps": 71810, "total_steps": 78105, "loss": 0.1092, "lr": 9.83185173740811e-08, "epoch": 4.597016836310095, "percentage": 91.94, "elapsed_time": "3:09:15", "remaining_time": "0:16:35", "throughput": 19901.8, "total_tokens": 226002688} +{"current_steps": 71815, "total_steps": 78105, "loss": 0.1662, "lr": 9.816344875670791e-08, "epoch": 4.597336918251072, "percentage": 91.95, "elapsed_time": "3:09:16", "remaining_time": "0:16:34", "throughput": 19901.97, "total_tokens": 226017728} +{"current_steps": 71820, "total_steps": 78105, "loss": 0.0916, "lr": 9.80085000745415e-08, "epoch": 4.597657000192049, "percentage": 91.95, "elapsed_time": "3:09:17", "remaining_time": "0:16:33", "throughput": 19902.16, "total_tokens": 226033280} +{"current_steps": 71825, "total_steps": 78105, "loss": 0.1357, "lr": 9.785367133531898e-08, "epoch": 4.597977082133026, "percentage": 91.96, "elapsed_time": "3:09:17", "remaining_time": "0:16:33", "throughput": 19902.36, "total_tokens": 226049472} +{"current_steps": 71830, "total_steps": 78105, "loss": 0.1376, "lr": 9.76989625467717e-08, "epoch": 4.598297164074003, "percentage": 91.97, "elapsed_time": "3:09:18", "remaining_time": "0:16:32", "throughput": 19902.59, "total_tokens": 226065792} +{"current_steps": 71835, "total_steps": 78105, "loss": 0.0953, "lr": 9.75443737166254e-08, "epoch": 4.59861724601498, "percentage": 91.97, "elapsed_time": "3:09:19", "remaining_time": "0:16:31", "throughput": 19902.84, "total_tokens": 226082944} +{"current_steps": 71840, "total_steps": 78105, "loss": 0.1213, "lr": 9.738990485259864e-08, "epoch": 4.598937327955957, "percentage": 91.98, "elapsed_time": "3:09:19", "remaining_time": "0:16:30", "throughput": 19903.03, "total_tokens": 226098368} +{"current_steps": 71845, "total_steps": 78105, "loss": 0.1013, "lr": 9.723555596240553e-08, "epoch": 4.599257409896934, "percentage": 91.99, "elapsed_time": "3:09:20", "remaining_time": "0:16:29", "throughput": 19903.21, "total_tokens": 226113280} +{"current_steps": 71850, "total_steps": 78105, "loss": 0.1224, "lr": 9.708132705375351e-08, "epoch": 4.599577491837911, "percentage": 91.99, "elapsed_time": "3:09:21", "remaining_time": "0:16:29", "throughput": 19903.38, "total_tokens": 226128512} +{"current_steps": 71855, "total_steps": 78105, "loss": 0.1327, "lr": 9.692721813434336e-08, "epoch": 4.599897573778888, "percentage": 92.0, "elapsed_time": "3:09:22", "remaining_time": "0:16:28", "throughput": 19903.58, "total_tokens": 226144640} +{"current_steps": 71860, "total_steps": 78105, "loss": 0.1267, "lr": 9.677322921187088e-08, "epoch": 4.600217655719864, "percentage": 92.0, "elapsed_time": "3:09:22", "remaining_time": "0:16:27", "throughput": 19903.76, "total_tokens": 226159488} +{"current_steps": 71865, "total_steps": 78105, "loss": 0.1427, "lr": 9.661936029402546e-08, "epoch": 4.600537737660841, "percentage": 92.01, "elapsed_time": "3:09:23", "remaining_time": "0:16:26", "throughput": 19904.0, "total_tokens": 226176448} +{"current_steps": 71870, "total_steps": 78105, "loss": 0.1368, "lr": 9.646561138849014e-08, "epoch": 4.600857819601818, "percentage": 92.02, "elapsed_time": "3:09:24", "remaining_time": "0:16:25", "throughput": 19904.2, "total_tokens": 226192576} +{"current_steps": 71875, "total_steps": 78105, "loss": 0.1371, "lr": 9.631198250294294e-08, "epoch": 4.601177901542795, "percentage": 92.02, "elapsed_time": "3:09:24", "remaining_time": "0:16:25", "throughput": 19904.34, "total_tokens": 226206912} +{"current_steps": 71880, "total_steps": 78105, "loss": 0.1285, "lr": 9.615847364505465e-08, "epoch": 4.601497983483772, "percentage": 92.03, "elapsed_time": "3:09:25", "remaining_time": "0:16:24", "throughput": 19904.52, "total_tokens": 226222208} +{"current_steps": 71885, "total_steps": 78105, "loss": 0.129, "lr": 9.600508482249166e-08, "epoch": 4.601818065424749, "percentage": 92.04, "elapsed_time": "3:09:26", "remaining_time": "0:16:23", "throughput": 19904.77, "total_tokens": 226239168} +{"current_steps": 71890, "total_steps": 78105, "loss": 0.1246, "lr": 9.585181604291228e-08, "epoch": 4.602138147365726, "percentage": 92.04, "elapsed_time": "3:09:26", "remaining_time": "0:16:22", "throughput": 19904.95, "total_tokens": 226254656} +{"current_steps": 71895, "total_steps": 78105, "loss": 0.1405, "lr": 9.569866731397038e-08, "epoch": 4.602458229306703, "percentage": 92.05, "elapsed_time": "3:09:27", "remaining_time": "0:16:21", "throughput": 19905.11, "total_tokens": 226269376} +{"current_steps": 71900, "total_steps": 78105, "loss": 0.1312, "lr": 9.554563864331373e-08, "epoch": 4.602778311247679, "percentage": 92.06, "elapsed_time": "3:09:28", "remaining_time": "0:16:21", "throughput": 19905.3, "total_tokens": 226284416} +{"current_steps": 71905, "total_steps": 78105, "loss": 0.143, "lr": 9.539273003858318e-08, "epoch": 4.603098393188656, "percentage": 92.06, "elapsed_time": "3:09:28", "remaining_time": "0:16:20", "throughput": 19905.49, "total_tokens": 226300224} +{"current_steps": 71910, "total_steps": 78105, "loss": 0.1151, "lr": 9.523994150741511e-08, "epoch": 4.603418475129633, "percentage": 92.07, "elapsed_time": "3:09:29", "remaining_time": "0:16:19", "throughput": 19905.68, "total_tokens": 226315840} +{"current_steps": 71915, "total_steps": 78105, "loss": 0.0849, "lr": 9.508727305743815e-08, "epoch": 4.60373855707061, "percentage": 92.07, "elapsed_time": "3:09:30", "remaining_time": "0:16:18", "throughput": 19905.84, "total_tokens": 226330752} +{"current_steps": 71920, "total_steps": 78105, "loss": 0.1479, "lr": 9.49347246962759e-08, "epoch": 4.604058639011587, "percentage": 92.08, "elapsed_time": "3:09:30", "remaining_time": "0:16:17", "throughput": 19906.04, "total_tokens": 226346560} +{"current_steps": 71925, "total_steps": 78105, "loss": 0.1116, "lr": 9.478229643154618e-08, "epoch": 4.604378720952564, "percentage": 92.09, "elapsed_time": "3:09:31", "remaining_time": "0:16:17", "throughput": 19906.21, "total_tokens": 226361408} +{"current_steps": 71930, "total_steps": 78105, "loss": 0.1724, "lr": 9.462998827086012e-08, "epoch": 4.604698802893541, "percentage": 92.09, "elapsed_time": "3:09:32", "remaining_time": "0:16:16", "throughput": 19906.44, "total_tokens": 226378240} +{"current_steps": 71935, "total_steps": 78105, "loss": 0.109, "lr": 9.447780022182357e-08, "epoch": 4.605018884834518, "percentage": 92.1, "elapsed_time": "3:09:32", "remaining_time": "0:16:15", "throughput": 19906.64, "total_tokens": 226393856} +{"current_steps": 71940, "total_steps": 78105, "loss": 0.1899, "lr": 9.432573229203573e-08, "epoch": 4.605338966775495, "percentage": 92.11, "elapsed_time": "3:09:33", "remaining_time": "0:16:14", "throughput": 19906.84, "total_tokens": 226409600} +{"current_steps": 71945, "total_steps": 78105, "loss": 0.1598, "lr": 9.417378448908998e-08, "epoch": 4.605659048716472, "percentage": 92.11, "elapsed_time": "3:09:34", "remaining_time": "0:16:13", "throughput": 19907.05, "total_tokens": 226425792} +{"current_steps": 71950, "total_steps": 78105, "loss": 0.1339, "lr": 9.402195682057385e-08, "epoch": 4.605979130657448, "percentage": 92.12, "elapsed_time": "3:09:34", "remaining_time": "0:16:13", "throughput": 19907.23, "total_tokens": 226441216} +{"current_steps": 71955, "total_steps": 78105, "loss": 0.1129, "lr": 9.387024929406879e-08, "epoch": 4.606299212598425, "percentage": 92.13, "elapsed_time": "3:09:35", "remaining_time": "0:16:12", "throughput": 19907.41, "total_tokens": 226456512} +{"current_steps": 71960, "total_steps": 78105, "loss": 0.1145, "lr": 9.371866191715067e-08, "epoch": 4.606619294539402, "percentage": 92.13, "elapsed_time": "3:09:36", "remaining_time": "0:16:11", "throughput": 19907.68, "total_tokens": 226474112} +{"current_steps": 71965, "total_steps": 78105, "loss": 0.1759, "lr": 9.356719469738873e-08, "epoch": 4.606939376480379, "percentage": 92.14, "elapsed_time": "3:09:36", "remaining_time": "0:16:10", "throughput": 19907.89, "total_tokens": 226489920} +{"current_steps": 71970, "total_steps": 78105, "loss": 0.1302, "lr": 9.341584764234635e-08, "epoch": 4.607259458421356, "percentage": 92.15, "elapsed_time": "3:09:37", "remaining_time": "0:16:09", "throughput": 19908.03, "total_tokens": 226504576} +{"current_steps": 71975, "total_steps": 78105, "loss": 0.1262, "lr": 9.326462075958137e-08, "epoch": 4.607579540362333, "percentage": 92.15, "elapsed_time": "3:09:38", "remaining_time": "0:16:09", "throughput": 19908.2, "total_tokens": 226519488} +{"current_steps": 71980, "total_steps": 78105, "loss": 0.1337, "lr": 9.31135140566447e-08, "epoch": 4.60789962230331, "percentage": 92.16, "elapsed_time": "3:09:38", "remaining_time": "0:16:08", "throughput": 19908.37, "total_tokens": 226534592} +{"current_steps": 71985, "total_steps": 78105, "loss": 0.1339, "lr": 9.296252754108225e-08, "epoch": 4.608219704244286, "percentage": 92.16, "elapsed_time": "3:09:39", "remaining_time": "0:16:07", "throughput": 19908.56, "total_tokens": 226550272} +{"current_steps": 71990, "total_steps": 78105, "loss": 0.1727, "lr": 9.2811661220433e-08, "epoch": 4.608539786185263, "percentage": 92.17, "elapsed_time": "3:09:40", "remaining_time": "0:16:06", "throughput": 19908.79, "total_tokens": 226567104} +{"current_steps": 71995, "total_steps": 78105, "loss": 0.1343, "lr": 9.266091510223146e-08, "epoch": 4.60885986812624, "percentage": 92.18, "elapsed_time": "3:09:40", "remaining_time": "0:16:05", "throughput": 19908.99, "total_tokens": 226582784} +{"current_steps": 72000, "total_steps": 78105, "loss": 0.1683, "lr": 9.251028919400385e-08, "epoch": 4.609179950067217, "percentage": 92.18, "elapsed_time": "3:09:41", "remaining_time": "0:16:05", "throughput": 19909.22, "total_tokens": 226599296} +{"current_steps": 72005, "total_steps": 78105, "loss": 0.1576, "lr": 9.235978350327246e-08, "epoch": 4.609500032008194, "percentage": 92.19, "elapsed_time": "3:09:42", "remaining_time": "0:16:04", "throughput": 19909.44, "total_tokens": 226615744} +{"current_steps": 72010, "total_steps": 78105, "loss": 0.1205, "lr": 9.220939803755269e-08, "epoch": 4.609820113949171, "percentage": 92.2, "elapsed_time": "3:09:42", "remaining_time": "0:16:03", "throughput": 19909.61, "total_tokens": 226630720} +{"current_steps": 72015, "total_steps": 78105, "loss": 0.1782, "lr": 9.205913280435352e-08, "epoch": 4.610140195890148, "percentage": 92.2, "elapsed_time": "3:09:43", "remaining_time": "0:16:02", "throughput": 19909.87, "total_tokens": 226647936} +{"current_steps": 72020, "total_steps": 78105, "loss": 0.1098, "lr": 9.190898781117925e-08, "epoch": 4.610460277831125, "percentage": 92.21, "elapsed_time": "3:09:44", "remaining_time": "0:16:01", "throughput": 19910.06, "total_tokens": 226663680} +{"current_steps": 72025, "total_steps": 78105, "loss": 0.1331, "lr": 9.175896306552634e-08, "epoch": 4.610780359772102, "percentage": 92.22, "elapsed_time": "3:09:45", "remaining_time": "0:16:01", "throughput": 19910.24, "total_tokens": 226678976} +{"current_steps": 72030, "total_steps": 78105, "loss": 0.1393, "lr": 9.160905857488717e-08, "epoch": 4.611100441713079, "percentage": 92.22, "elapsed_time": "3:09:45", "remaining_time": "0:16:00", "throughput": 19910.41, "total_tokens": 226694144} +{"current_steps": 72035, "total_steps": 78105, "loss": 0.1737, "lr": 9.145927434674629e-08, "epoch": 4.611420523654055, "percentage": 92.23, "elapsed_time": "3:09:46", "remaining_time": "0:15:59", "throughput": 19910.56, "total_tokens": 226708800} +{"current_steps": 72040, "total_steps": 78105, "loss": 0.0947, "lr": 9.13096103885841e-08, "epoch": 4.611740605595032, "percentage": 92.23, "elapsed_time": "3:09:47", "remaining_time": "0:15:58", "throughput": 19910.75, "total_tokens": 226724480} +{"current_steps": 72045, "total_steps": 78105, "loss": 0.1318, "lr": 9.116006670787325e-08, "epoch": 4.612060687536009, "percentage": 92.24, "elapsed_time": "3:09:47", "remaining_time": "0:15:57", "throughput": 19910.92, "total_tokens": 226739456} +{"current_steps": 72050, "total_steps": 78105, "loss": 0.1285, "lr": 9.101064331208165e-08, "epoch": 4.612380769476986, "percentage": 92.25, "elapsed_time": "3:09:48", "remaining_time": "0:15:57", "throughput": 19911.1, "total_tokens": 226754944} +{"current_steps": 72055, "total_steps": 78105, "loss": 0.1325, "lr": 9.086134020867055e-08, "epoch": 4.612700851417963, "percentage": 92.25, "elapsed_time": "3:09:49", "remaining_time": "0:15:56", "throughput": 19911.28, "total_tokens": 226769920} +{"current_steps": 72060, "total_steps": 78105, "loss": 0.1794, "lr": 9.071215740509537e-08, "epoch": 4.61302093335894, "percentage": 92.26, "elapsed_time": "3:09:49", "remaining_time": "0:15:55", "throughput": 19911.47, "total_tokens": 226784960} +{"current_steps": 72065, "total_steps": 78105, "loss": 0.0837, "lr": 9.056309490880544e-08, "epoch": 4.613341015299917, "percentage": 92.27, "elapsed_time": "3:09:50", "remaining_time": "0:15:54", "throughput": 19911.66, "total_tokens": 226800768} +{"current_steps": 72070, "total_steps": 78105, "loss": 0.0679, "lr": 9.041415272724396e-08, "epoch": 4.613661097240894, "percentage": 92.27, "elapsed_time": "3:09:51", "remaining_time": "0:15:53", "throughput": 19911.86, "total_tokens": 226816896} +{"current_steps": 72075, "total_steps": 78105, "loss": 0.1183, "lr": 9.026533086784889e-08, "epoch": 4.61398117918187, "percentage": 92.28, "elapsed_time": "3:09:51", "remaining_time": "0:15:53", "throughput": 19912.05, "total_tokens": 226832896} +{"current_steps": 72080, "total_steps": 78105, "loss": 0.1624, "lr": 9.011662933805149e-08, "epoch": 4.614301261122847, "percentage": 92.29, "elapsed_time": "3:09:52", "remaining_time": "0:15:52", "throughput": 19912.22, "total_tokens": 226847936} +{"current_steps": 72085, "total_steps": 78105, "loss": 0.1095, "lr": 8.996804814527693e-08, "epoch": 4.614621343063824, "percentage": 92.29, "elapsed_time": "3:09:53", "remaining_time": "0:15:51", "throughput": 19912.41, "total_tokens": 226863360} +{"current_steps": 72090, "total_steps": 78105, "loss": 0.1501, "lr": 8.981958729694457e-08, "epoch": 4.614941425004801, "percentage": 92.3, "elapsed_time": "3:09:53", "remaining_time": "0:15:50", "throughput": 19912.6, "total_tokens": 226878784} +{"current_steps": 72095, "total_steps": 78105, "loss": 0.1179, "lr": 8.967124680046819e-08, "epoch": 4.615261506945778, "percentage": 92.31, "elapsed_time": "3:09:54", "remaining_time": "0:15:49", "throughput": 19912.83, "total_tokens": 226895616} +{"current_steps": 72100, "total_steps": 78105, "loss": 0.0891, "lr": 8.952302666325408e-08, "epoch": 4.615581588886755, "percentage": 92.31, "elapsed_time": "3:09:55", "remaining_time": "0:15:49", "throughput": 19913.05, "total_tokens": 226912000} +{"current_steps": 72105, "total_steps": 78105, "loss": 0.1361, "lr": 8.937492689270522e-08, "epoch": 4.615901670827732, "percentage": 92.32, "elapsed_time": "3:09:55", "remaining_time": "0:15:48", "throughput": 19913.25, "total_tokens": 226928192} +{"current_steps": 72110, "total_steps": 78105, "loss": 0.1639, "lr": 8.922694749621513e-08, "epoch": 4.616221752768709, "percentage": 92.32, "elapsed_time": "3:09:56", "remaining_time": "0:15:47", "throughput": 19913.44, "total_tokens": 226943936} +{"current_steps": 72115, "total_steps": 78105, "loss": 0.1455, "lr": 8.907908848117485e-08, "epoch": 4.616541834709686, "percentage": 92.33, "elapsed_time": "3:09:57", "remaining_time": "0:15:46", "throughput": 19913.64, "total_tokens": 226959872} +{"current_steps": 72120, "total_steps": 78105, "loss": 0.0895, "lr": 8.893134985496599e-08, "epoch": 4.616861916650663, "percentage": 92.34, "elapsed_time": "3:09:57", "remaining_time": "0:15:45", "throughput": 19913.8, "total_tokens": 226974912} +{"current_steps": 72125, "total_steps": 78105, "loss": 0.1536, "lr": 8.878373162496734e-08, "epoch": 4.617181998591639, "percentage": 92.34, "elapsed_time": "3:09:58", "remaining_time": "0:15:45", "throughput": 19914.0, "total_tokens": 226990656} +{"current_steps": 72130, "total_steps": 78105, "loss": 0.1079, "lr": 8.863623379854942e-08, "epoch": 4.617502080532616, "percentage": 92.35, "elapsed_time": "3:09:59", "remaining_time": "0:15:44", "throughput": 19914.18, "total_tokens": 227005632} +{"current_steps": 72135, "total_steps": 78105, "loss": 0.0973, "lr": 8.848885638307741e-08, "epoch": 4.617822162473593, "percentage": 92.36, "elapsed_time": "3:09:59", "remaining_time": "0:15:43", "throughput": 19914.37, "total_tokens": 227021440} +{"current_steps": 72140, "total_steps": 78105, "loss": 0.1035, "lr": 8.834159938591131e-08, "epoch": 4.61814224441457, "percentage": 92.36, "elapsed_time": "3:10:00", "remaining_time": "0:15:42", "throughput": 19914.55, "total_tokens": 227036672} +{"current_steps": 72145, "total_steps": 78105, "loss": 0.117, "lr": 8.819446281440325e-08, "epoch": 4.618462326355547, "percentage": 92.37, "elapsed_time": "3:10:01", "remaining_time": "0:15:41", "throughput": 19914.77, "total_tokens": 227052928} +{"current_steps": 72150, "total_steps": 78105, "loss": 0.1554, "lr": 8.804744667590153e-08, "epoch": 4.618782408296524, "percentage": 92.38, "elapsed_time": "3:10:01", "remaining_time": "0:15:41", "throughput": 19914.97, "total_tokens": 227068544} +{"current_steps": 72155, "total_steps": 78105, "loss": 0.117, "lr": 8.790055097774668e-08, "epoch": 4.619102490237501, "percentage": 92.38, "elapsed_time": "3:10:02", "remaining_time": "0:15:40", "throughput": 19915.14, "total_tokens": 227083520} +{"current_steps": 72160, "total_steps": 78105, "loss": 0.1018, "lr": 8.775377572727423e-08, "epoch": 4.619422572178478, "percentage": 92.39, "elapsed_time": "3:10:03", "remaining_time": "0:15:39", "throughput": 19915.34, "total_tokens": 227099264} +{"current_steps": 72165, "total_steps": 78105, "loss": 0.1249, "lr": 8.760712093181356e-08, "epoch": 4.619742654119454, "percentage": 92.39, "elapsed_time": "3:10:03", "remaining_time": "0:15:38", "throughput": 19915.52, "total_tokens": 227114496} +{"current_steps": 72170, "total_steps": 78105, "loss": 0.1567, "lr": 8.746058659868717e-08, "epoch": 4.620062736060431, "percentage": 92.4, "elapsed_time": "3:10:04", "remaining_time": "0:15:37", "throughput": 19915.68, "total_tokens": 227129536} +{"current_steps": 72175, "total_steps": 78105, "loss": 0.1458, "lr": 8.731417273521308e-08, "epoch": 4.620382818001408, "percentage": 92.41, "elapsed_time": "3:10:05", "remaining_time": "0:15:37", "throughput": 19915.84, "total_tokens": 227144256} +{"current_steps": 72180, "total_steps": 78105, "loss": 0.113, "lr": 8.716787934870158e-08, "epoch": 4.620702899942385, "percentage": 92.41, "elapsed_time": "3:10:05", "remaining_time": "0:15:36", "throughput": 19916.04, "total_tokens": 227159808} +{"current_steps": 72185, "total_steps": 78105, "loss": 0.1498, "lr": 8.702170644645819e-08, "epoch": 4.621022981883362, "percentage": 92.42, "elapsed_time": "3:10:06", "remaining_time": "0:15:35", "throughput": 19916.26, "total_tokens": 227176192} +{"current_steps": 72190, "total_steps": 78105, "loss": 0.2111, "lr": 8.687565403578208e-08, "epoch": 4.621343063824339, "percentage": 92.43, "elapsed_time": "3:10:07", "remaining_time": "0:15:34", "throughput": 19916.45, "total_tokens": 227191872} +{"current_steps": 72195, "total_steps": 78105, "loss": 0.1301, "lr": 8.67297221239663e-08, "epoch": 4.621663145765316, "percentage": 92.43, "elapsed_time": "3:10:07", "remaining_time": "0:15:33", "throughput": 19916.68, "total_tokens": 227208768} +{"current_steps": 72200, "total_steps": 78105, "loss": 0.1165, "lr": 8.658391071829808e-08, "epoch": 4.621983227706293, "percentage": 92.44, "elapsed_time": "3:10:08", "remaining_time": "0:15:33", "throughput": 19916.83, "total_tokens": 227223168} +{"current_steps": 72205, "total_steps": 78105, "loss": 0.1173, "lr": 8.643821982605826e-08, "epoch": 4.62230330964727, "percentage": 92.45, "elapsed_time": "3:10:09", "remaining_time": "0:15:32", "throughput": 19917.01, "total_tokens": 227238784} +{"current_steps": 72210, "total_steps": 78105, "loss": 0.1041, "lr": 8.629264945452214e-08, "epoch": 4.622623391588247, "percentage": 92.45, "elapsed_time": "3:10:09", "remaining_time": "0:15:31", "throughput": 19917.23, "total_tokens": 227255424} +{"current_steps": 72215, "total_steps": 78105, "loss": 0.1124, "lr": 8.614719961095836e-08, "epoch": 4.622943473529223, "percentage": 92.46, "elapsed_time": "3:10:10", "remaining_time": "0:15:30", "throughput": 19917.4, "total_tokens": 227270144} +{"current_steps": 72220, "total_steps": 78105, "loss": 0.1159, "lr": 8.600187030262997e-08, "epoch": 4.6232635554702, "percentage": 92.47, "elapsed_time": "3:10:11", "remaining_time": "0:15:29", "throughput": 19917.63, "total_tokens": 227286656} +{"current_steps": 72225, "total_steps": 78105, "loss": 0.0744, "lr": 8.58566615367945e-08, "epoch": 4.623583637411177, "percentage": 92.47, "elapsed_time": "3:10:12", "remaining_time": "0:15:29", "throughput": 19917.81, "total_tokens": 227302336} +{"current_steps": 72230, "total_steps": 78105, "loss": 0.1673, "lr": 8.571157332070228e-08, "epoch": 4.623903719352154, "percentage": 92.48, "elapsed_time": "3:10:12", "remaining_time": "0:15:28", "throughput": 19918.02, "total_tokens": 227318208} +{"current_steps": 72235, "total_steps": 78105, "loss": 0.1104, "lr": 8.556660566159864e-08, "epoch": 4.624223801293131, "percentage": 92.48, "elapsed_time": "3:10:13", "remaining_time": "0:15:27", "throughput": 19918.21, "total_tokens": 227334016} +{"current_steps": 72240, "total_steps": 78105, "loss": 0.1376, "lr": 8.54217585667222e-08, "epoch": 4.624543883234108, "percentage": 92.49, "elapsed_time": "3:10:14", "remaining_time": "0:15:26", "throughput": 19918.44, "total_tokens": 227350720} +{"current_steps": 72245, "total_steps": 78105, "loss": 0.1202, "lr": 8.527703204330607e-08, "epoch": 4.624863965175085, "percentage": 92.5, "elapsed_time": "3:10:14", "remaining_time": "0:15:25", "throughput": 19918.65, "total_tokens": 227367040} +{"current_steps": 72250, "total_steps": 78105, "loss": 0.1063, "lr": 8.513242609857753e-08, "epoch": 4.625184047116061, "percentage": 92.5, "elapsed_time": "3:10:15", "remaining_time": "0:15:25", "throughput": 19918.82, "total_tokens": 227382144} +{"current_steps": 72255, "total_steps": 78105, "loss": 0.153, "lr": 8.498794073975636e-08, "epoch": 4.625504129057038, "percentage": 92.51, "elapsed_time": "3:10:16", "remaining_time": "0:15:24", "throughput": 19919.01, "total_tokens": 227397824} +{"current_steps": 72260, "total_steps": 78105, "loss": 0.1076, "lr": 8.4843575974059e-08, "epoch": 4.625824210998015, "percentage": 92.52, "elapsed_time": "3:10:16", "remaining_time": "0:15:23", "throughput": 19919.17, "total_tokens": 227412544} +{"current_steps": 72265, "total_steps": 78105, "loss": 0.1476, "lr": 8.46993318086925e-08, "epoch": 4.626144292938992, "percentage": 92.52, "elapsed_time": "3:10:17", "remaining_time": "0:15:22", "throughput": 19919.38, "total_tokens": 227428864} +{"current_steps": 72270, "total_steps": 78105, "loss": 0.1022, "lr": 8.455520825086105e-08, "epoch": 4.626464374879969, "percentage": 92.53, "elapsed_time": "3:10:18", "remaining_time": "0:15:21", "throughput": 19919.55, "total_tokens": 227444032} +{"current_steps": 72275, "total_steps": 78105, "loss": 0.1015, "lr": 8.44112053077606e-08, "epoch": 4.626784456820946, "percentage": 92.54, "elapsed_time": "3:10:18", "remaining_time": "0:15:21", "throughput": 19919.76, "total_tokens": 227460032} +{"current_steps": 72280, "total_steps": 78105, "loss": 0.1472, "lr": 8.426732298658258e-08, "epoch": 4.627104538761923, "percentage": 92.54, "elapsed_time": "3:10:19", "remaining_time": "0:15:20", "throughput": 19919.97, "total_tokens": 227476672} +{"current_steps": 72285, "total_steps": 78105, "loss": 0.1316, "lr": 8.412356129451099e-08, "epoch": 4.6274246207029, "percentage": 92.55, "elapsed_time": "3:10:20", "remaining_time": "0:15:19", "throughput": 19920.15, "total_tokens": 227491904} +{"current_steps": 72290, "total_steps": 78105, "loss": 0.1001, "lr": 8.397992023872508e-08, "epoch": 4.627744702643877, "percentage": 92.55, "elapsed_time": "3:10:20", "remaining_time": "0:15:18", "throughput": 19920.35, "total_tokens": 227507840} +{"current_steps": 72295, "total_steps": 78105, "loss": 0.1326, "lr": 8.383639982639747e-08, "epoch": 4.628064784584854, "percentage": 92.56, "elapsed_time": "3:10:21", "remaining_time": "0:15:17", "throughput": 19920.55, "total_tokens": 227523520} +{"current_steps": 72300, "total_steps": 78105, "loss": 0.1299, "lr": 8.369300006469461e-08, "epoch": 4.62838486652583, "percentage": 92.57, "elapsed_time": "3:10:22", "remaining_time": "0:15:17", "throughput": 19920.76, "total_tokens": 227539648} +{"current_steps": 72305, "total_steps": 78105, "loss": 0.1256, "lr": 8.35497209607769e-08, "epoch": 4.628704948466807, "percentage": 92.57, "elapsed_time": "3:10:22", "remaining_time": "0:15:16", "throughput": 19920.95, "total_tokens": 227555264} +{"current_steps": 72310, "total_steps": 78105, "loss": 0.1384, "lr": 8.340656252179974e-08, "epoch": 4.629025030407784, "percentage": 92.58, "elapsed_time": "3:10:23", "remaining_time": "0:15:15", "throughput": 19921.18, "total_tokens": 227571648} +{"current_steps": 72315, "total_steps": 78105, "loss": 0.1439, "lr": 8.326352475491101e-08, "epoch": 4.629345112348761, "percentage": 92.59, "elapsed_time": "3:10:24", "remaining_time": "0:15:14", "throughput": 19921.34, "total_tokens": 227586496} +{"current_steps": 72320, "total_steps": 78105, "loss": 0.1583, "lr": 8.312060766725388e-08, "epoch": 4.629665194289738, "percentage": 92.59, "elapsed_time": "3:10:24", "remaining_time": "0:15:13", "throughput": 19921.51, "total_tokens": 227601472} +{"current_steps": 72325, "total_steps": 78105, "loss": 0.1593, "lr": 8.297781126596433e-08, "epoch": 4.629985276230715, "percentage": 92.6, "elapsed_time": "3:10:25", "remaining_time": "0:15:13", "throughput": 19921.71, "total_tokens": 227617536} +{"current_steps": 72330, "total_steps": 78105, "loss": 0.1117, "lr": 8.283513555817274e-08, "epoch": 4.630305358171692, "percentage": 92.61, "elapsed_time": "3:10:26", "remaining_time": "0:15:12", "throughput": 19921.89, "total_tokens": 227632704} +{"current_steps": 72335, "total_steps": 78105, "loss": 0.156, "lr": 8.269258055100454e-08, "epoch": 4.630625440112669, "percentage": 92.61, "elapsed_time": "3:10:26", "remaining_time": "0:15:11", "throughput": 19922.1, "total_tokens": 227648640} +{"current_steps": 72340, "total_steps": 78105, "loss": 0.1008, "lr": 8.255014625157709e-08, "epoch": 4.630945522053645, "percentage": 92.62, "elapsed_time": "3:10:27", "remaining_time": "0:15:10", "throughput": 19922.31, "total_tokens": 227664960} +{"current_steps": 72345, "total_steps": 78105, "loss": 0.1675, "lr": 8.240783266700387e-08, "epoch": 4.631265603994622, "percentage": 92.63, "elapsed_time": "3:10:28", "remaining_time": "0:15:09", "throughput": 19922.51, "total_tokens": 227680576} +{"current_steps": 72350, "total_steps": 78105, "loss": 0.0895, "lr": 8.226563980439e-08, "epoch": 4.631585685935599, "percentage": 92.63, "elapsed_time": "3:10:29", "remaining_time": "0:15:09", "throughput": 19922.74, "total_tokens": 227697280} +{"current_steps": 72355, "total_steps": 78105, "loss": 0.1265, "lr": 8.212356767083706e-08, "epoch": 4.631905767876576, "percentage": 92.64, "elapsed_time": "3:10:29", "remaining_time": "0:15:08", "throughput": 19922.94, "total_tokens": 227713152} +{"current_steps": 72360, "total_steps": 78105, "loss": 0.1322, "lr": 8.198161627343881e-08, "epoch": 4.632225849817553, "percentage": 92.64, "elapsed_time": "3:10:30", "remaining_time": "0:15:07", "throughput": 19923.13, "total_tokens": 227728576} +{"current_steps": 72365, "total_steps": 78105, "loss": 0.1192, "lr": 8.183978561928319e-08, "epoch": 4.63254593175853, "percentage": 92.65, "elapsed_time": "3:10:31", "remaining_time": "0:15:06", "throughput": 19923.34, "total_tokens": 227744448} +{"current_steps": 72370, "total_steps": 78105, "loss": 0.1155, "lr": 8.169807571545369e-08, "epoch": 4.632866013699507, "percentage": 92.66, "elapsed_time": "3:10:31", "remaining_time": "0:15:05", "throughput": 19923.53, "total_tokens": 227760000} +{"current_steps": 72375, "total_steps": 78105, "loss": 0.1282, "lr": 8.15564865690252e-08, "epoch": 4.633186095640484, "percentage": 92.66, "elapsed_time": "3:10:32", "remaining_time": "0:15:05", "throughput": 19923.7, "total_tokens": 227775488} +{"current_steps": 72380, "total_steps": 78105, "loss": 0.1357, "lr": 8.141501818706877e-08, "epoch": 4.633506177581461, "percentage": 92.67, "elapsed_time": "3:10:33", "remaining_time": "0:15:04", "throughput": 19923.86, "total_tokens": 227790272} +{"current_steps": 72385, "total_steps": 78105, "loss": 0.1583, "lr": 8.127367057664842e-08, "epoch": 4.633826259522438, "percentage": 92.68, "elapsed_time": "3:10:33", "remaining_time": "0:15:03", "throughput": 19924.06, "total_tokens": 227805824} +{"current_steps": 72390, "total_steps": 78105, "loss": 0.0813, "lr": 8.113244374482243e-08, "epoch": 4.634146341463414, "percentage": 92.68, "elapsed_time": "3:10:34", "remaining_time": "0:15:02", "throughput": 19924.31, "total_tokens": 227822912} +{"current_steps": 72395, "total_steps": 78105, "loss": 0.1038, "lr": 8.099133769864265e-08, "epoch": 4.634466423404391, "percentage": 92.69, "elapsed_time": "3:10:35", "remaining_time": "0:15:01", "throughput": 19924.46, "total_tokens": 227837504} +{"current_steps": 72400, "total_steps": 78105, "loss": 0.1134, "lr": 8.08503524451551e-08, "epoch": 4.634786505345368, "percentage": 92.7, "elapsed_time": "3:10:35", "remaining_time": "0:15:01", "throughput": 19924.65, "total_tokens": 227852992} +{"current_steps": 72405, "total_steps": 78105, "loss": 0.1408, "lr": 8.070948799140027e-08, "epoch": 4.635106587286345, "percentage": 92.7, "elapsed_time": "3:10:36", "remaining_time": "0:15:00", "throughput": 19924.84, "total_tokens": 227868480} +{"current_steps": 72410, "total_steps": 78105, "loss": 0.1485, "lr": 8.056874434441197e-08, "epoch": 4.635426669227322, "percentage": 92.71, "elapsed_time": "3:10:37", "remaining_time": "0:14:59", "throughput": 19924.99, "total_tokens": 227882880} +{"current_steps": 72415, "total_steps": 78105, "loss": 0.156, "lr": 8.042812151121793e-08, "epoch": 4.635746751168299, "percentage": 92.71, "elapsed_time": "3:10:37", "remaining_time": "0:14:58", "throughput": 19925.2, "total_tokens": 227899264} +{"current_steps": 72420, "total_steps": 78105, "loss": 0.1853, "lr": 8.028761949884084e-08, "epoch": 4.636066833109276, "percentage": 92.72, "elapsed_time": "3:10:38", "remaining_time": "0:14:57", "throughput": 19925.37, "total_tokens": 227914432} +{"current_steps": 72425, "total_steps": 78105, "loss": 0.1024, "lr": 8.014723831429594e-08, "epoch": 4.636386915050253, "percentage": 92.73, "elapsed_time": "3:10:39", "remaining_time": "0:14:57", "throughput": 19925.58, "total_tokens": 227930432} +{"current_steps": 72430, "total_steps": 78105, "loss": 0.2161, "lr": 8.000697796459373e-08, "epoch": 4.636706996991229, "percentage": 92.73, "elapsed_time": "3:10:39", "remaining_time": "0:14:56", "throughput": 19925.79, "total_tokens": 227946432} +{"current_steps": 72435, "total_steps": 78105, "loss": 0.1531, "lr": 7.986683845673748e-08, "epoch": 4.637027078932206, "percentage": 92.74, "elapsed_time": "3:10:40", "remaining_time": "0:14:55", "throughput": 19925.94, "total_tokens": 227960832} +{"current_steps": 72440, "total_steps": 78105, "loss": 0.1102, "lr": 7.972681979772551e-08, "epoch": 4.637347160873183, "percentage": 92.75, "elapsed_time": "3:10:41", "remaining_time": "0:14:54", "throughput": 19926.12, "total_tokens": 227976192} +{"current_steps": 72445, "total_steps": 78105, "loss": 0.1153, "lr": 7.958692199454915e-08, "epoch": 4.63766724281416, "percentage": 92.75, "elapsed_time": "3:10:41", "remaining_time": "0:14:53", "throughput": 19926.31, "total_tokens": 227991680} +{"current_steps": 72450, "total_steps": 78105, "loss": 0.1571, "lr": 7.944714505419448e-08, "epoch": 4.637987324755137, "percentage": 92.76, "elapsed_time": "3:10:42", "remaining_time": "0:14:53", "throughput": 19926.49, "total_tokens": 228007232} +{"current_steps": 72455, "total_steps": 78105, "loss": 0.2076, "lr": 7.930748898364149e-08, "epoch": 4.638307406696114, "percentage": 92.77, "elapsed_time": "3:10:43", "remaining_time": "0:14:52", "throughput": 19926.67, "total_tokens": 228022720} +{"current_steps": 72460, "total_steps": 78105, "loss": 0.1597, "lr": 7.91679537898632e-08, "epoch": 4.638627488637091, "percentage": 92.77, "elapsed_time": "3:10:43", "remaining_time": "0:14:51", "throughput": 19926.87, "total_tokens": 228038016} +{"current_steps": 72465, "total_steps": 78105, "loss": 0.1065, "lr": 7.902853947982764e-08, "epoch": 4.638947570578068, "percentage": 92.78, "elapsed_time": "3:10:44", "remaining_time": "0:14:50", "throughput": 19927.04, "total_tokens": 228052864} +{"current_steps": 72470, "total_steps": 78105, "loss": 0.1465, "lr": 7.888924606049675e-08, "epoch": 4.639267652519045, "percentage": 92.79, "elapsed_time": "3:10:45", "remaining_time": "0:14:49", "throughput": 19927.23, "total_tokens": 228068352} +{"current_steps": 72475, "total_steps": 78105, "loss": 0.1201, "lr": 7.875007353882552e-08, "epoch": 4.639587734460022, "percentage": 92.79, "elapsed_time": "3:10:45", "remaining_time": "0:14:49", "throughput": 19927.46, "total_tokens": 228084672} +{"current_steps": 72480, "total_steps": 78105, "loss": 0.1088, "lr": 7.861102192176395e-08, "epoch": 4.639907816400998, "percentage": 92.8, "elapsed_time": "3:10:46", "remaining_time": "0:14:48", "throughput": 19927.68, "total_tokens": 228100864} +{"current_steps": 72485, "total_steps": 78105, "loss": 0.1338, "lr": 7.84720912162551e-08, "epoch": 4.640227898341975, "percentage": 92.8, "elapsed_time": "3:10:47", "remaining_time": "0:14:47", "throughput": 19927.85, "total_tokens": 228115968} +{"current_steps": 72490, "total_steps": 78105, "loss": 0.1203, "lr": 7.83332814292373e-08, "epoch": 4.640547980282952, "percentage": 92.81, "elapsed_time": "3:10:47", "remaining_time": "0:14:46", "throughput": 19928.09, "total_tokens": 228132736} +{"current_steps": 72495, "total_steps": 78105, "loss": 0.1164, "lr": 7.819459256764083e-08, "epoch": 4.640868062223929, "percentage": 92.82, "elapsed_time": "3:10:48", "remaining_time": "0:14:45", "throughput": 19928.29, "total_tokens": 228148800} +{"current_steps": 72500, "total_steps": 78105, "loss": 0.1113, "lr": 7.805602463839212e-08, "epoch": 4.641188144164906, "percentage": 92.82, "elapsed_time": "3:10:49", "remaining_time": "0:14:45", "throughput": 19928.44, "total_tokens": 228163712} +{"current_steps": 72505, "total_steps": 78105, "loss": 0.1175, "lr": 7.79175776484098e-08, "epoch": 4.641508226105883, "percentage": 92.83, "elapsed_time": "3:10:49", "remaining_time": "0:14:44", "throughput": 19928.6, "total_tokens": 228178432} +{"current_steps": 72510, "total_steps": 78105, "loss": 0.1329, "lr": 7.777925160460776e-08, "epoch": 4.64182830804686, "percentage": 92.84, "elapsed_time": "3:10:50", "remaining_time": "0:14:43", "throughput": 19928.78, "total_tokens": 228193856} +{"current_steps": 72515, "total_steps": 78105, "loss": 0.1489, "lr": 7.764104651389298e-08, "epoch": 4.642148389987836, "percentage": 92.84, "elapsed_time": "3:10:51", "remaining_time": "0:14:42", "throughput": 19928.94, "total_tokens": 228208768} +{"current_steps": 72520, "total_steps": 78105, "loss": 0.1213, "lr": 7.750296238316663e-08, "epoch": 4.642468471928813, "percentage": 92.85, "elapsed_time": "3:10:51", "remaining_time": "0:14:41", "throughput": 19929.16, "total_tokens": 228225344} +{"current_steps": 72525, "total_steps": 78105, "loss": 0.1263, "lr": 7.7364999219324e-08, "epoch": 4.64278855386979, "percentage": 92.86, "elapsed_time": "3:10:52", "remaining_time": "0:14:41", "throughput": 19929.32, "total_tokens": 228239872} +{"current_steps": 72530, "total_steps": 78105, "loss": 0.1204, "lr": 7.722715702925432e-08, "epoch": 4.643108635810767, "percentage": 92.86, "elapsed_time": "3:10:53", "remaining_time": "0:14:40", "throughput": 19929.49, "total_tokens": 228255040} +{"current_steps": 72535, "total_steps": 78105, "loss": 0.109, "lr": 7.708943581984041e-08, "epoch": 4.643428717751744, "percentage": 92.87, "elapsed_time": "3:10:53", "remaining_time": "0:14:39", "throughput": 19929.68, "total_tokens": 228270592} +{"current_steps": 72540, "total_steps": 78105, "loss": 0.139, "lr": 7.695183559795982e-08, "epoch": 4.643748799692721, "percentage": 92.87, "elapsed_time": "3:10:54", "remaining_time": "0:14:38", "throughput": 19929.84, "total_tokens": 228285632} +{"current_steps": 72545, "total_steps": 78105, "loss": 0.1182, "lr": 7.681435637048318e-08, "epoch": 4.644068881633698, "percentage": 92.88, "elapsed_time": "3:10:55", "remaining_time": "0:14:37", "throughput": 19930.1, "total_tokens": 228303552} +{"current_steps": 72550, "total_steps": 78105, "loss": 0.127, "lr": 7.667699814427582e-08, "epoch": 4.644388963574675, "percentage": 92.89, "elapsed_time": "3:10:55", "remaining_time": "0:14:37", "throughput": 19930.32, "total_tokens": 228319936} +{"current_steps": 72555, "total_steps": 78105, "loss": 0.1141, "lr": 7.65397609261967e-08, "epoch": 4.644709045515652, "percentage": 92.89, "elapsed_time": "3:10:56", "remaining_time": "0:14:36", "throughput": 19930.56, "total_tokens": 228336896} +{"current_steps": 72560, "total_steps": 78105, "loss": 0.1452, "lr": 7.640264472309811e-08, "epoch": 4.645029127456629, "percentage": 92.9, "elapsed_time": "3:10:57", "remaining_time": "0:14:35", "throughput": 19930.73, "total_tokens": 228352128} +{"current_steps": 72565, "total_steps": 78105, "loss": 0.1243, "lr": 7.626564954182792e-08, "epoch": 4.645349209397605, "percentage": 92.91, "elapsed_time": "3:10:57", "remaining_time": "0:14:34", "throughput": 19930.94, "total_tokens": 228368448} +{"current_steps": 72570, "total_steps": 78105, "loss": 0.1149, "lr": 7.61287753892262e-08, "epoch": 4.645669291338582, "percentage": 92.91, "elapsed_time": "3:10:58", "remaining_time": "0:14:33", "throughput": 19931.15, "total_tokens": 228384512} +{"current_steps": 72575, "total_steps": 78105, "loss": 0.157, "lr": 7.599202227212805e-08, "epoch": 4.645989373279559, "percentage": 92.92, "elapsed_time": "3:10:59", "remaining_time": "0:14:33", "throughput": 19931.3, "total_tokens": 228398912} +{"current_steps": 72580, "total_steps": 78105, "loss": 0.1111, "lr": 7.585539019736188e-08, "epoch": 4.646309455220536, "percentage": 92.93, "elapsed_time": "3:10:59", "remaining_time": "0:14:32", "throughput": 19931.46, "total_tokens": 228413888} +{"current_steps": 72585, "total_steps": 78105, "loss": 0.1115, "lr": 7.571887917175086e-08, "epoch": 4.646629537161513, "percentage": 92.93, "elapsed_time": "3:11:00", "remaining_time": "0:14:31", "throughput": 19931.64, "total_tokens": 228429056} +{"current_steps": 72590, "total_steps": 78105, "loss": 0.1235, "lr": 7.55824892021112e-08, "epoch": 4.64694961910249, "percentage": 92.94, "elapsed_time": "3:11:01", "remaining_time": "0:14:30", "throughput": 19931.84, "total_tokens": 228444736} +{"current_steps": 72595, "total_steps": 78105, "loss": 0.139, "lr": 7.544622029525356e-08, "epoch": 4.647269701043467, "percentage": 92.95, "elapsed_time": "3:11:01", "remaining_time": "0:14:29", "throughput": 19932.0, "total_tokens": 228459840} +{"current_steps": 72600, "total_steps": 78105, "loss": 0.12, "lr": 7.531007245798305e-08, "epoch": 4.647589782984444, "percentage": 92.95, "elapsed_time": "3:11:02", "remaining_time": "0:14:29", "throughput": 19932.21, "total_tokens": 228475712} +{"current_steps": 72605, "total_steps": 78105, "loss": 0.1487, "lr": 7.51740456970973e-08, "epoch": 4.6479098649254205, "percentage": 92.96, "elapsed_time": "3:11:03", "remaining_time": "0:14:28", "throughput": 19932.36, "total_tokens": 228490176} +{"current_steps": 72610, "total_steps": 78105, "loss": 0.1478, "lr": 7.503814001938975e-08, "epoch": 4.6482299468663975, "percentage": 92.96, "elapsed_time": "3:11:03", "remaining_time": "0:14:27", "throughput": 19932.56, "total_tokens": 228506176} +{"current_steps": 72615, "total_steps": 78105, "loss": 0.1512, "lr": 7.490235543164609e-08, "epoch": 4.6485500288073744, "percentage": 92.97, "elapsed_time": "3:11:04", "remaining_time": "0:14:26", "throughput": 19932.78, "total_tokens": 228522240} +{"current_steps": 72620, "total_steps": 78105, "loss": 0.1258, "lr": 7.4766691940647e-08, "epoch": 4.648870110748351, "percentage": 92.98, "elapsed_time": "3:11:05", "remaining_time": "0:14:25", "throughput": 19932.96, "total_tokens": 228537792} +{"current_steps": 72625, "total_steps": 78105, "loss": 0.179, "lr": 7.463114955316653e-08, "epoch": 4.649190192689328, "percentage": 92.98, "elapsed_time": "3:11:06", "remaining_time": "0:14:25", "throughput": 19933.14, "total_tokens": 228553600} +{"current_steps": 72630, "total_steps": 78105, "loss": 0.112, "lr": 7.44957282759734e-08, "epoch": 4.649510274630305, "percentage": 92.99, "elapsed_time": "3:11:06", "remaining_time": "0:14:24", "throughput": 19933.34, "total_tokens": 228569728} +{"current_steps": 72635, "total_steps": 78105, "loss": 0.2158, "lr": 7.436042811582944e-08, "epoch": 4.649830356571282, "percentage": 93.0, "elapsed_time": "3:11:07", "remaining_time": "0:14:23", "throughput": 19933.53, "total_tokens": 228585280} +{"current_steps": 72640, "total_steps": 78105, "loss": 0.094, "lr": 7.422524907949119e-08, "epoch": 4.650150438512259, "percentage": 93.0, "elapsed_time": "3:11:08", "remaining_time": "0:14:22", "throughput": 19933.7, "total_tokens": 228600512} +{"current_steps": 72645, "total_steps": 78105, "loss": 0.1175, "lr": 7.409019117370825e-08, "epoch": 4.650470520453236, "percentage": 93.01, "elapsed_time": "3:11:08", "remaining_time": "0:14:21", "throughput": 19933.88, "total_tokens": 228615744} +{"current_steps": 72650, "total_steps": 78105, "loss": 0.1566, "lr": 7.39552544052255e-08, "epoch": 4.650790602394213, "percentage": 93.02, "elapsed_time": "3:11:09", "remaining_time": "0:14:21", "throughput": 19934.12, "total_tokens": 228632960} +{"current_steps": 72655, "total_steps": 78105, "loss": 0.1128, "lr": 7.382043878078033e-08, "epoch": 4.6511106843351895, "percentage": 93.02, "elapsed_time": "3:11:10", "remaining_time": "0:14:20", "throughput": 19934.32, "total_tokens": 228648512} +{"current_steps": 72660, "total_steps": 78105, "loss": 0.1539, "lr": 7.368574430710484e-08, "epoch": 4.6514307662761665, "percentage": 93.03, "elapsed_time": "3:11:10", "remaining_time": "0:14:19", "throughput": 19934.5, "total_tokens": 228664064} +{"current_steps": 72665, "total_steps": 78105, "loss": 0.1434, "lr": 7.355117099092535e-08, "epoch": 4.6517508482171435, "percentage": 93.04, "elapsed_time": "3:11:11", "remaining_time": "0:14:18", "throughput": 19934.69, "total_tokens": 228679552} +{"current_steps": 72670, "total_steps": 78105, "loss": 0.1441, "lr": 7.341671883896117e-08, "epoch": 4.6520709301581205, "percentage": 93.04, "elapsed_time": "3:11:12", "remaining_time": "0:14:18", "throughput": 19934.93, "total_tokens": 228696576} +{"current_steps": 72675, "total_steps": 78105, "loss": 0.0874, "lr": 7.328238785792669e-08, "epoch": 4.6523910120990974, "percentage": 93.05, "elapsed_time": "3:11:12", "remaining_time": "0:14:17", "throughput": 19935.09, "total_tokens": 228711872} +{"current_steps": 72680, "total_steps": 78105, "loss": 0.1149, "lr": 7.3148178054529e-08, "epoch": 4.652711094040074, "percentage": 93.05, "elapsed_time": "3:11:13", "remaining_time": "0:14:16", "throughput": 19935.25, "total_tokens": 228726656} +{"current_steps": 72685, "total_steps": 78105, "loss": 0.1334, "lr": 7.301408943547111e-08, "epoch": 4.653031175981051, "percentage": 93.06, "elapsed_time": "3:11:14", "remaining_time": "0:14:15", "throughput": 19935.41, "total_tokens": 228741696} +{"current_steps": 72690, "total_steps": 78105, "loss": 0.1242, "lr": 7.28801220074471e-08, "epoch": 4.653351257922028, "percentage": 93.07, "elapsed_time": "3:11:14", "remaining_time": "0:14:14", "throughput": 19935.61, "total_tokens": 228757440} +{"current_steps": 72695, "total_steps": 78105, "loss": 0.1562, "lr": 7.274627577714771e-08, "epoch": 4.6536713398630045, "percentage": 93.07, "elapsed_time": "3:11:15", "remaining_time": "0:14:14", "throughput": 19935.83, "total_tokens": 228774080} +{"current_steps": 72700, "total_steps": 78105, "loss": 0.1858, "lr": 7.261255075125623e-08, "epoch": 4.6539914218039815, "percentage": 93.08, "elapsed_time": "3:11:16", "remaining_time": "0:14:13", "throughput": 19935.99, "total_tokens": 228788672} +{"current_steps": 72705, "total_steps": 78105, "loss": 0.1244, "lr": 7.247894693645007e-08, "epoch": 4.6543115037449585, "percentage": 93.09, "elapsed_time": "3:11:16", "remaining_time": "0:14:12", "throughput": 19936.17, "total_tokens": 228804160} +{"current_steps": 72710, "total_steps": 78105, "loss": 0.1227, "lr": 7.234546433940087e-08, "epoch": 4.6546315856859355, "percentage": 93.09, "elapsed_time": "3:11:17", "remaining_time": "0:14:11", "throughput": 19936.36, "total_tokens": 228819968} +{"current_steps": 72715, "total_steps": 78105, "loss": 0.1291, "lr": 7.221210296677384e-08, "epoch": 4.6549516676269125, "percentage": 93.1, "elapsed_time": "3:11:18", "remaining_time": "0:14:10", "throughput": 19936.56, "total_tokens": 228835584} +{"current_steps": 72720, "total_steps": 78105, "loss": 0.1333, "lr": 7.207886282522891e-08, "epoch": 4.6552717495678895, "percentage": 93.11, "elapsed_time": "3:11:18", "remaining_time": "0:14:10", "throughput": 19936.75, "total_tokens": 228851392} +{"current_steps": 72725, "total_steps": 78105, "loss": 0.0877, "lr": 7.194574392141829e-08, "epoch": 4.6555918315088665, "percentage": 93.11, "elapsed_time": "3:11:19", "remaining_time": "0:14:09", "throughput": 19936.97, "total_tokens": 228867712} +{"current_steps": 72730, "total_steps": 78105, "loss": 0.0871, "lr": 7.181274626199053e-08, "epoch": 4.6559119134498435, "percentage": 93.12, "elapsed_time": "3:11:20", "remaining_time": "0:14:08", "throughput": 19937.13, "total_tokens": 228882880} +{"current_steps": 72735, "total_steps": 78105, "loss": 0.1208, "lr": 7.167986985358616e-08, "epoch": 4.65623199539082, "percentage": 93.12, "elapsed_time": "3:11:20", "remaining_time": "0:14:07", "throughput": 19937.35, "total_tokens": 228899456} +{"current_steps": 72740, "total_steps": 78105, "loss": 0.1594, "lr": 7.154711470284043e-08, "epoch": 4.656552077331797, "percentage": 93.13, "elapsed_time": "3:11:21", "remaining_time": "0:14:06", "throughput": 19937.56, "total_tokens": 228915328} +{"current_steps": 72745, "total_steps": 78105, "loss": 0.1059, "lr": 7.141448081638219e-08, "epoch": 4.6568721592727735, "percentage": 93.14, "elapsed_time": "3:11:22", "remaining_time": "0:14:06", "throughput": 19937.77, "total_tokens": 228931328} +{"current_steps": 72750, "total_steps": 78105, "loss": 0.1116, "lr": 7.128196820083505e-08, "epoch": 4.6571922412137505, "percentage": 93.14, "elapsed_time": "3:11:23", "remaining_time": "0:14:05", "throughput": 19938.02, "total_tokens": 228948928} +{"current_steps": 72755, "total_steps": 78105, "loss": 0.204, "lr": 7.114957686281538e-08, "epoch": 4.6575123231547275, "percentage": 93.15, "elapsed_time": "3:11:23", "remaining_time": "0:14:04", "throughput": 19938.22, "total_tokens": 228964928} +{"current_steps": 72760, "total_steps": 78105, "loss": 0.1163, "lr": 7.101730680893481e-08, "epoch": 4.6578324050957045, "percentage": 93.16, "elapsed_time": "3:11:24", "remaining_time": "0:14:03", "throughput": 19938.47, "total_tokens": 228982080} +{"current_steps": 72765, "total_steps": 78105, "loss": 0.1596, "lr": 7.088515804579754e-08, "epoch": 4.6581524870366815, "percentage": 93.16, "elapsed_time": "3:11:25", "remaining_time": "0:14:02", "throughput": 19938.69, "total_tokens": 228998528} +{"current_steps": 72770, "total_steps": 78105, "loss": 0.1141, "lr": 7.075313058000272e-08, "epoch": 4.6584725689776585, "percentage": 93.17, "elapsed_time": "3:11:25", "remaining_time": "0:14:02", "throughput": 19938.87, "total_tokens": 229014272} +{"current_steps": 72775, "total_steps": 78105, "loss": 0.133, "lr": 7.062122441814312e-08, "epoch": 4.6587926509186355, "percentage": 93.18, "elapsed_time": "3:11:26", "remaining_time": "0:14:01", "throughput": 19939.11, "total_tokens": 229031168} +{"current_steps": 72780, "total_steps": 78105, "loss": 0.1359, "lr": 7.048943956680543e-08, "epoch": 4.659112732859612, "percentage": 93.18, "elapsed_time": "3:11:27", "remaining_time": "0:14:00", "throughput": 19939.29, "total_tokens": 229046976} +{"current_steps": 72785, "total_steps": 78105, "loss": 0.1714, "lr": 7.035777603257021e-08, "epoch": 4.659432814800589, "percentage": 93.19, "elapsed_time": "3:11:27", "remaining_time": "0:13:59", "throughput": 19939.5, "total_tokens": 229062912} +{"current_steps": 72790, "total_steps": 78105, "loss": 0.1059, "lr": 7.022623382201193e-08, "epoch": 4.659752896741566, "percentage": 93.2, "elapsed_time": "3:11:28", "remaining_time": "0:13:58", "throughput": 19939.68, "total_tokens": 229078464} +{"current_steps": 72795, "total_steps": 78105, "loss": 0.1921, "lr": 7.009481294169978e-08, "epoch": 4.6600729786825426, "percentage": 93.2, "elapsed_time": "3:11:29", "remaining_time": "0:13:58", "throughput": 19939.85, "total_tokens": 229093696} +{"current_steps": 72800, "total_steps": 78105, "loss": 0.1094, "lr": 6.996351339819518e-08, "epoch": 4.6603930606235195, "percentage": 93.21, "elapsed_time": "3:11:29", "remaining_time": "0:13:57", "throughput": 19940.12, "total_tokens": 229111296} +{"current_steps": 72805, "total_steps": 78105, "loss": 0.1231, "lr": 6.983233519805593e-08, "epoch": 4.6607131425644965, "percentage": 93.21, "elapsed_time": "3:11:30", "remaining_time": "0:13:56", "throughput": 19940.3, "total_tokens": 229126720} +{"current_steps": 72810, "total_steps": 78105, "loss": 0.1054, "lr": 6.970127834783069e-08, "epoch": 4.6610332245054735, "percentage": 93.22, "elapsed_time": "3:11:31", "remaining_time": "0:13:55", "throughput": 19940.54, "total_tokens": 229144064} +{"current_steps": 72815, "total_steps": 78105, "loss": 0.1294, "lr": 6.957034285406506e-08, "epoch": 4.6613533064464505, "percentage": 93.23, "elapsed_time": "3:11:32", "remaining_time": "0:13:54", "throughput": 19940.72, "total_tokens": 229159360} +{"current_steps": 72820, "total_steps": 78105, "loss": 0.1443, "lr": 6.943952872329684e-08, "epoch": 4.6616733883874275, "percentage": 93.23, "elapsed_time": "3:11:32", "remaining_time": "0:13:54", "throughput": 19940.93, "total_tokens": 229176000} +{"current_steps": 72825, "total_steps": 78105, "loss": 0.1475, "lr": 6.930883596205834e-08, "epoch": 4.6619934703284045, "percentage": 93.24, "elapsed_time": "3:11:33", "remaining_time": "0:13:53", "throughput": 19941.11, "total_tokens": 229191552} +{"current_steps": 72830, "total_steps": 78105, "loss": 0.1755, "lr": 6.917826457687543e-08, "epoch": 4.662313552269381, "percentage": 93.25, "elapsed_time": "3:11:34", "remaining_time": "0:13:52", "throughput": 19941.31, "total_tokens": 229207296} +{"current_steps": 72835, "total_steps": 78105, "loss": 0.1581, "lr": 6.904781457426818e-08, "epoch": 4.662633634210358, "percentage": 93.25, "elapsed_time": "3:11:34", "remaining_time": "0:13:51", "throughput": 19941.47, "total_tokens": 229222656} +{"current_steps": 72840, "total_steps": 78105, "loss": 0.1397, "lr": 6.891748596075082e-08, "epoch": 4.662953716151335, "percentage": 93.26, "elapsed_time": "3:11:35", "remaining_time": "0:13:50", "throughput": 19941.72, "total_tokens": 229239744} +{"current_steps": 72845, "total_steps": 78105, "loss": 0.1136, "lr": 6.87872787428312e-08, "epoch": 4.663273798092312, "percentage": 93.27, "elapsed_time": "3:11:36", "remaining_time": "0:13:50", "throughput": 19941.88, "total_tokens": 229254528} +{"current_steps": 72850, "total_steps": 78105, "loss": 0.1586, "lr": 6.865719292701107e-08, "epoch": 4.663593880033289, "percentage": 93.27, "elapsed_time": "3:11:36", "remaining_time": "0:13:49", "throughput": 19942.03, "total_tokens": 229269184} +{"current_steps": 72855, "total_steps": 78105, "loss": 0.1257, "lr": 6.852722851978661e-08, "epoch": 4.6639139619742656, "percentage": 93.28, "elapsed_time": "3:11:37", "remaining_time": "0:13:48", "throughput": 19942.19, "total_tokens": 229283776} +{"current_steps": 72860, "total_steps": 78105, "loss": 0.1441, "lr": 6.839738552764707e-08, "epoch": 4.6642340439152425, "percentage": 93.28, "elapsed_time": "3:11:38", "remaining_time": "0:13:47", "throughput": 19942.34, "total_tokens": 229298240} +{"current_steps": 72865, "total_steps": 78105, "loss": 0.0929, "lr": 6.826766395707618e-08, "epoch": 4.6645541258562195, "percentage": 93.29, "elapsed_time": "3:11:38", "remaining_time": "0:13:46", "throughput": 19942.59, "total_tokens": 229315584} +{"current_steps": 72870, "total_steps": 78105, "loss": 0.1165, "lr": 6.813806381455207e-08, "epoch": 4.664874207797196, "percentage": 93.3, "elapsed_time": "3:11:39", "remaining_time": "0:13:46", "throughput": 19942.83, "total_tokens": 229332480} +{"current_steps": 72875, "total_steps": 78105, "loss": 0.11, "lr": 6.80085851065454e-08, "epoch": 4.665194289738173, "percentage": 93.3, "elapsed_time": "3:11:40", "remaining_time": "0:13:45", "throughput": 19943.02, "total_tokens": 229348288} +{"current_steps": 72880, "total_steps": 78105, "loss": 0.1362, "lr": 6.787922783952294e-08, "epoch": 4.66551437167915, "percentage": 93.31, "elapsed_time": "3:11:40", "remaining_time": "0:13:44", "throughput": 19943.26, "total_tokens": 229365248} +{"current_steps": 72885, "total_steps": 78105, "loss": 0.129, "lr": 6.774999201994259e-08, "epoch": 4.665834453620127, "percentage": 93.32, "elapsed_time": "3:11:41", "remaining_time": "0:13:43", "throughput": 19943.48, "total_tokens": 229381760} +{"current_steps": 72890, "total_steps": 78105, "loss": 0.1462, "lr": 6.762087765425918e-08, "epoch": 4.666154535561104, "percentage": 93.32, "elapsed_time": "3:11:42", "remaining_time": "0:13:42", "throughput": 19943.62, "total_tokens": 229396160} +{"current_steps": 72895, "total_steps": 78105, "loss": 0.0713, "lr": 6.749188474891893e-08, "epoch": 4.666474617502081, "percentage": 93.33, "elapsed_time": "3:11:42", "remaining_time": "0:13:42", "throughput": 19943.8, "total_tokens": 229411712} +{"current_steps": 72900, "total_steps": 78105, "loss": 0.1271, "lr": 6.736301331036366e-08, "epoch": 4.666794699443058, "percentage": 93.34, "elapsed_time": "3:11:43", "remaining_time": "0:13:41", "throughput": 19944.01, "total_tokens": 229427968} +{"current_steps": 72905, "total_steps": 78105, "loss": 0.0927, "lr": 6.723426334502847e-08, "epoch": 4.667114781384035, "percentage": 93.34, "elapsed_time": "3:11:44", "remaining_time": "0:13:40", "throughput": 19944.18, "total_tokens": 229443200} +{"current_steps": 72910, "total_steps": 78105, "loss": 0.1143, "lr": 6.710563485934185e-08, "epoch": 4.667434863325012, "percentage": 93.35, "elapsed_time": "3:11:44", "remaining_time": "0:13:39", "throughput": 19944.38, "total_tokens": 229459200} +{"current_steps": 72915, "total_steps": 78105, "loss": 0.1747, "lr": 6.69771278597281e-08, "epoch": 4.6677549452659886, "percentage": 93.36, "elapsed_time": "3:11:45", "remaining_time": "0:13:38", "throughput": 19944.61, "total_tokens": 229475712} +{"current_steps": 72920, "total_steps": 78105, "loss": 0.1402, "lr": 6.684874235260291e-08, "epoch": 4.668075027206965, "percentage": 93.36, "elapsed_time": "3:11:46", "remaining_time": "0:13:38", "throughput": 19944.81, "total_tokens": 229491520} +{"current_steps": 72925, "total_steps": 78105, "loss": 0.17, "lr": 6.672047834437783e-08, "epoch": 4.668395109147942, "percentage": 93.37, "elapsed_time": "3:11:47", "remaining_time": "0:13:37", "throughput": 19945.02, "total_tokens": 229507392} +{"current_steps": 72930, "total_steps": 78105, "loss": 0.1274, "lr": 6.659233584145746e-08, "epoch": 4.668715191088919, "percentage": 93.37, "elapsed_time": "3:11:47", "remaining_time": "0:13:36", "throughput": 19945.25, "total_tokens": 229524352} +{"current_steps": 72935, "total_steps": 78105, "loss": 0.1653, "lr": 6.646431485024085e-08, "epoch": 4.669035273029896, "percentage": 93.38, "elapsed_time": "3:11:48", "remaining_time": "0:13:35", "throughput": 19945.43, "total_tokens": 229539968} +{"current_steps": 72940, "total_steps": 78105, "loss": 0.1243, "lr": 6.633641537712066e-08, "epoch": 4.669355354970873, "percentage": 93.39, "elapsed_time": "3:11:49", "remaining_time": "0:13:34", "throughput": 19945.6, "total_tokens": 229554944} +{"current_steps": 72945, "total_steps": 78105, "loss": 0.1262, "lr": 6.620863742848288e-08, "epoch": 4.66967543691185, "percentage": 93.39, "elapsed_time": "3:11:49", "remaining_time": "0:13:34", "throughput": 19945.81, "total_tokens": 229571200} +{"current_steps": 72950, "total_steps": 78105, "loss": 0.1765, "lr": 6.608098101070908e-08, "epoch": 4.669995518852827, "percentage": 93.4, "elapsed_time": "3:11:50", "remaining_time": "0:13:33", "throughput": 19946.0, "total_tokens": 229586880} +{"current_steps": 72955, "total_steps": 78105, "loss": 0.1208, "lr": 6.595344613017307e-08, "epoch": 4.670315600793804, "percentage": 93.41, "elapsed_time": "3:11:51", "remaining_time": "0:13:32", "throughput": 19946.17, "total_tokens": 229601792} +{"current_steps": 72960, "total_steps": 78105, "loss": 0.1383, "lr": 6.582603279324362e-08, "epoch": 4.67063568273478, "percentage": 93.41, "elapsed_time": "3:11:51", "remaining_time": "0:13:31", "throughput": 19946.35, "total_tokens": 229617024} +{"current_steps": 72965, "total_steps": 78105, "loss": 0.1161, "lr": 6.569874100628288e-08, "epoch": 4.670955764675757, "percentage": 93.42, "elapsed_time": "3:11:52", "remaining_time": "0:13:30", "throughput": 19946.58, "total_tokens": 229633728} +{"current_steps": 72970, "total_steps": 78105, "loss": 0.1409, "lr": 6.557157077564713e-08, "epoch": 4.671275846616734, "percentage": 93.43, "elapsed_time": "3:11:53", "remaining_time": "0:13:30", "throughput": 19946.76, "total_tokens": 229648960} +{"current_steps": 72975, "total_steps": 78105, "loss": 0.1149, "lr": 6.54445221076866e-08, "epoch": 4.671595928557711, "percentage": 93.43, "elapsed_time": "3:11:53", "remaining_time": "0:13:29", "throughput": 19946.95, "total_tokens": 229664704} +{"current_steps": 72980, "total_steps": 78105, "loss": 0.1062, "lr": 6.531759500874563e-08, "epoch": 4.671916010498688, "percentage": 93.44, "elapsed_time": "3:11:54", "remaining_time": "0:13:28", "throughput": 19947.1, "total_tokens": 229679232} +{"current_steps": 72985, "total_steps": 78105, "loss": 0.1484, "lr": 6.519078948516194e-08, "epoch": 4.672236092439665, "percentage": 93.44, "elapsed_time": "3:11:55", "remaining_time": "0:13:27", "throughput": 19947.34, "total_tokens": 229696128} +{"current_steps": 72990, "total_steps": 78105, "loss": 0.1004, "lr": 6.506410554326825e-08, "epoch": 4.672556174380642, "percentage": 93.45, "elapsed_time": "3:11:55", "remaining_time": "0:13:27", "throughput": 19947.5, "total_tokens": 229710656} +{"current_steps": 72995, "total_steps": 78105, "loss": 0.1867, "lr": 6.493754318938949e-08, "epoch": 4.672876256321619, "percentage": 93.46, "elapsed_time": "3:11:56", "remaining_time": "0:13:26", "throughput": 19947.69, "total_tokens": 229726592} +{"current_steps": 73000, "total_steps": 78105, "loss": 0.1095, "lr": 6.481110242984645e-08, "epoch": 4.673196338262596, "percentage": 93.46, "elapsed_time": "3:11:57", "remaining_time": "0:13:25", "throughput": 19947.88, "total_tokens": 229742336} +{"current_steps": 73005, "total_steps": 78105, "loss": 0.0999, "lr": 6.468478327095212e-08, "epoch": 4.673516420203573, "percentage": 93.47, "elapsed_time": "3:11:57", "remaining_time": "0:13:24", "throughput": 19948.04, "total_tokens": 229757312} +{"current_steps": 73010, "total_steps": 78105, "loss": 0.1799, "lr": 6.455858571901508e-08, "epoch": 4.673836502144549, "percentage": 93.48, "elapsed_time": "3:11:58", "remaining_time": "0:13:23", "throughput": 19948.18, "total_tokens": 229771520} +{"current_steps": 73015, "total_steps": 78105, "loss": 0.0701, "lr": 6.44325097803361e-08, "epoch": 4.674156584085526, "percentage": 93.48, "elapsed_time": "3:11:59", "remaining_time": "0:13:23", "throughput": 19948.38, "total_tokens": 229787648} +{"current_steps": 73020, "total_steps": 78105, "loss": 0.0831, "lr": 6.430655546121129e-08, "epoch": 4.674476666026503, "percentage": 93.49, "elapsed_time": "3:11:59", "remaining_time": "0:13:22", "throughput": 19948.55, "total_tokens": 229803072} +{"current_steps": 73025, "total_steps": 78105, "loss": 0.1144, "lr": 6.418072276793031e-08, "epoch": 4.67479674796748, "percentage": 93.5, "elapsed_time": "3:12:00", "remaining_time": "0:13:21", "throughput": 19948.74, "total_tokens": 229818880} +{"current_steps": 73030, "total_steps": 78105, "loss": 0.1126, "lr": 6.405501170677591e-08, "epoch": 4.675116829908457, "percentage": 93.5, "elapsed_time": "3:12:01", "remaining_time": "0:13:20", "throughput": 19948.94, "total_tokens": 229834944} +{"current_steps": 73035, "total_steps": 78105, "loss": 0.1023, "lr": 6.392942228402616e-08, "epoch": 4.675436911849434, "percentage": 93.51, "elapsed_time": "3:12:01", "remaining_time": "0:13:19", "throughput": 19949.12, "total_tokens": 229850624} +{"current_steps": 73040, "total_steps": 78105, "loss": 0.1349, "lr": 6.380395450595184e-08, "epoch": 4.675756993790411, "percentage": 93.52, "elapsed_time": "3:12:02", "remaining_time": "0:13:19", "throughput": 19949.28, "total_tokens": 229865920} +{"current_steps": 73045, "total_steps": 78105, "loss": 0.109, "lr": 6.367860837881851e-08, "epoch": 4.676077075731387, "percentage": 93.52, "elapsed_time": "3:12:03", "remaining_time": "0:13:18", "throughput": 19949.43, "total_tokens": 229880320} +{"current_steps": 73050, "total_steps": 78105, "loss": 0.149, "lr": 6.355338390888505e-08, "epoch": 4.676397157672364, "percentage": 93.53, "elapsed_time": "3:12:03", "remaining_time": "0:13:17", "throughput": 19949.65, "total_tokens": 229896640} +{"current_steps": 73055, "total_steps": 78105, "loss": 0.1463, "lr": 6.342828110240451e-08, "epoch": 4.676717239613341, "percentage": 93.53, "elapsed_time": "3:12:04", "remaining_time": "0:13:16", "throughput": 19949.83, "total_tokens": 229911936} +{"current_steps": 73060, "total_steps": 78105, "loss": 0.1401, "lr": 6.330329996562412e-08, "epoch": 4.677037321554318, "percentage": 93.54, "elapsed_time": "3:12:05", "remaining_time": "0:13:15", "throughput": 19950.06, "total_tokens": 229929088} +{"current_steps": 73065, "total_steps": 78105, "loss": 0.1218, "lr": 6.317844050478445e-08, "epoch": 4.677357403495295, "percentage": 93.55, "elapsed_time": "3:12:05", "remaining_time": "0:13:15", "throughput": 19950.24, "total_tokens": 229944512} +{"current_steps": 73070, "total_steps": 78105, "loss": 0.1105, "lr": 6.305370272612078e-08, "epoch": 4.677677485436272, "percentage": 93.55, "elapsed_time": "3:12:06", "remaining_time": "0:13:14", "throughput": 19950.42, "total_tokens": 229960064} +{"current_steps": 73075, "total_steps": 78105, "loss": 0.2026, "lr": 6.292908663586144e-08, "epoch": 4.677997567377249, "percentage": 93.56, "elapsed_time": "3:12:07", "remaining_time": "0:13:13", "throughput": 19950.6, "total_tokens": 229975232} +{"current_steps": 73080, "total_steps": 78105, "loss": 0.1484, "lr": 6.280459224022928e-08, "epoch": 4.678317649318226, "percentage": 93.57, "elapsed_time": "3:12:07", "remaining_time": "0:13:12", "throughput": 19950.84, "total_tokens": 229992256} +{"current_steps": 73085, "total_steps": 78105, "loss": 0.1312, "lr": 6.268021954544095e-08, "epoch": 4.678637731259203, "percentage": 93.57, "elapsed_time": "3:12:08", "remaining_time": "0:13:11", "throughput": 19950.99, "total_tokens": 230006976} +{"current_steps": 73090, "total_steps": 78105, "loss": 0.136, "lr": 6.255596855770707e-08, "epoch": 4.67895781320018, "percentage": 93.58, "elapsed_time": "3:12:09", "remaining_time": "0:13:11", "throughput": 19951.14, "total_tokens": 230021568} +{"current_steps": 73095, "total_steps": 78105, "loss": 0.1165, "lr": 6.243183928323181e-08, "epoch": 4.679277895141156, "percentage": 93.59, "elapsed_time": "3:12:09", "remaining_time": "0:13:10", "throughput": 19951.35, "total_tokens": 230037632} +{"current_steps": 73100, "total_steps": 78105, "loss": 0.1218, "lr": 6.230783172821359e-08, "epoch": 4.679597977082133, "percentage": 93.59, "elapsed_time": "3:12:10", "remaining_time": "0:13:09", "throughput": 19951.53, "total_tokens": 230053184} +{"current_steps": 73105, "total_steps": 78105, "loss": 0.1192, "lr": 6.218394589884464e-08, "epoch": 4.67991805902311, "percentage": 93.6, "elapsed_time": "3:12:11", "remaining_time": "0:13:08", "throughput": 19951.7, "total_tokens": 230068416} +{"current_steps": 73110, "total_steps": 78105, "loss": 0.1385, "lr": 6.206018180131168e-08, "epoch": 4.680238140964087, "percentage": 93.6, "elapsed_time": "3:12:11", "remaining_time": "0:13:07", "throughput": 19951.9, "total_tokens": 230084672} +{"current_steps": 73115, "total_steps": 78105, "loss": 0.0944, "lr": 6.193653944179423e-08, "epoch": 4.680558222905064, "percentage": 93.61, "elapsed_time": "3:12:12", "remaining_time": "0:13:07", "throughput": 19952.13, "total_tokens": 230101376} +{"current_steps": 73120, "total_steps": 78105, "loss": 0.1419, "lr": 6.181301882646679e-08, "epoch": 4.680878304846041, "percentage": 93.62, "elapsed_time": "3:12:13", "remaining_time": "0:13:06", "throughput": 19952.31, "total_tokens": 230116864} +{"current_steps": 73125, "total_steps": 78105, "loss": 0.1384, "lr": 6.168961996149692e-08, "epoch": 4.681198386787018, "percentage": 93.62, "elapsed_time": "3:12:14", "remaining_time": "0:13:05", "throughput": 19952.52, "total_tokens": 230133248} +{"current_steps": 73130, "total_steps": 78105, "loss": 0.1422, "lr": 6.156634285304691e-08, "epoch": 4.681518468727995, "percentage": 93.63, "elapsed_time": "3:12:14", "remaining_time": "0:13:04", "throughput": 19952.71, "total_tokens": 230148800} +{"current_steps": 73135, "total_steps": 78105, "loss": 0.1046, "lr": 6.144318750727213e-08, "epoch": 4.681838550668971, "percentage": 93.64, "elapsed_time": "3:12:15", "remaining_time": "0:13:03", "throughput": 19952.88, "total_tokens": 230164032} +{"current_steps": 73140, "total_steps": 78105, "loss": 0.1448, "lr": 6.132015393032264e-08, "epoch": 4.682158632609948, "percentage": 93.64, "elapsed_time": "3:12:16", "remaining_time": "0:13:03", "throughput": 19953.08, "total_tokens": 230180288} +{"current_steps": 73145, "total_steps": 78105, "loss": 0.1762, "lr": 6.119724212834244e-08, "epoch": 4.682478714550925, "percentage": 93.65, "elapsed_time": "3:12:16", "remaining_time": "0:13:02", "throughput": 19953.27, "total_tokens": 230195904} +{"current_steps": 73150, "total_steps": 78105, "loss": 0.1421, "lr": 6.107445210746826e-08, "epoch": 4.682798796491902, "percentage": 93.66, "elapsed_time": "3:12:17", "remaining_time": "0:13:01", "throughput": 19953.45, "total_tokens": 230211264} +{"current_steps": 73155, "total_steps": 78105, "loss": 0.121, "lr": 6.095178387383217e-08, "epoch": 4.683118878432879, "percentage": 93.66, "elapsed_time": "3:12:18", "remaining_time": "0:13:00", "throughput": 19953.64, "total_tokens": 230226816} +{"current_steps": 73160, "total_steps": 78105, "loss": 0.1636, "lr": 6.082923743355951e-08, "epoch": 4.683438960373856, "percentage": 93.67, "elapsed_time": "3:12:18", "remaining_time": "0:12:59", "throughput": 19953.84, "total_tokens": 230242816} +{"current_steps": 73165, "total_steps": 78105, "loss": 0.1264, "lr": 6.070681279276958e-08, "epoch": 4.683759042314833, "percentage": 93.68, "elapsed_time": "3:12:19", "remaining_time": "0:12:59", "throughput": 19954.01, "total_tokens": 230257792} +{"current_steps": 73170, "total_steps": 78105, "loss": 0.1512, "lr": 6.05845099575758e-08, "epoch": 4.68407912425581, "percentage": 93.68, "elapsed_time": "3:12:20", "remaining_time": "0:12:58", "throughput": 19954.17, "total_tokens": 230272448} +{"current_steps": 73175, "total_steps": 78105, "loss": 0.187, "lr": 6.046232893408499e-08, "epoch": 4.684399206196787, "percentage": 93.69, "elapsed_time": "3:12:20", "remaining_time": "0:12:57", "throughput": 19954.35, "total_tokens": 230287936} +{"current_steps": 73180, "total_steps": 78105, "loss": 0.1061, "lr": 6.034026972839807e-08, "epoch": 4.684719288137764, "percentage": 93.69, "elapsed_time": "3:12:21", "remaining_time": "0:12:56", "throughput": 19954.53, "total_tokens": 230303424} +{"current_steps": 73185, "total_steps": 78105, "loss": 0.0877, "lr": 6.021833234661045e-08, "epoch": 4.68503937007874, "percentage": 93.7, "elapsed_time": "3:12:22", "remaining_time": "0:12:55", "throughput": 19954.73, "total_tokens": 230319552} +{"current_steps": 73190, "total_steps": 78105, "loss": 0.1239, "lr": 6.009651679481116e-08, "epoch": 4.685359452019717, "percentage": 93.71, "elapsed_time": "3:12:22", "remaining_time": "0:12:55", "throughput": 19954.9, "total_tokens": 230334528} +{"current_steps": 73195, "total_steps": 78105, "loss": 0.1369, "lr": 5.997482307908281e-08, "epoch": 4.685679533960694, "percentage": 93.71, "elapsed_time": "3:12:23", "remaining_time": "0:12:54", "throughput": 19955.07, "total_tokens": 230349568} +{"current_steps": 73200, "total_steps": 78105, "loss": 0.1013, "lr": 5.985325120550223e-08, "epoch": 4.685999615901671, "percentage": 93.72, "elapsed_time": "3:12:24", "remaining_time": "0:12:53", "throughput": 19955.27, "total_tokens": 230365696} +{"current_steps": 73205, "total_steps": 78105, "loss": 0.1414, "lr": 5.973180118013982e-08, "epoch": 4.686319697842648, "percentage": 93.73, "elapsed_time": "3:12:24", "remaining_time": "0:12:52", "throughput": 19955.48, "total_tokens": 230382016} +{"current_steps": 73210, "total_steps": 78105, "loss": 0.1835, "lr": 5.961047300906047e-08, "epoch": 4.686639779783625, "percentage": 93.73, "elapsed_time": "3:12:25", "remaining_time": "0:12:51", "throughput": 19955.66, "total_tokens": 230397696} +{"current_steps": 73215, "total_steps": 78105, "loss": 0.1048, "lr": 5.9489266698322387e-08, "epoch": 4.686959861724602, "percentage": 93.74, "elapsed_time": "3:12:26", "remaining_time": "0:12:51", "throughput": 19955.83, "total_tokens": 230412864} +{"current_steps": 73220, "total_steps": 78105, "loss": 0.0997, "lr": 5.936818225397878e-08, "epoch": 4.687279943665579, "percentage": 93.75, "elapsed_time": "3:12:26", "remaining_time": "0:12:50", "throughput": 19956.01, "total_tokens": 230427968} +{"current_steps": 73225, "total_steps": 78105, "loss": 0.1102, "lr": 5.924721968207453e-08, "epoch": 4.687600025606555, "percentage": 93.75, "elapsed_time": "3:12:27", "remaining_time": "0:12:49", "throughput": 19956.21, "total_tokens": 230443776} +{"current_steps": 73230, "total_steps": 78105, "loss": 0.1537, "lr": 5.912637898865148e-08, "epoch": 4.687920107547532, "percentage": 93.76, "elapsed_time": "3:12:28", "remaining_time": "0:12:48", "throughput": 19956.38, "total_tokens": 230458880} +{"current_steps": 73235, "total_steps": 78105, "loss": 0.1252, "lr": 5.900566017974257e-08, "epoch": 4.688240189488509, "percentage": 93.76, "elapsed_time": "3:12:28", "remaining_time": "0:12:47", "throughput": 19956.55, "total_tokens": 230473920} +{"current_steps": 73240, "total_steps": 78105, "loss": 0.153, "lr": 5.888506326137633e-08, "epoch": 4.688560271429486, "percentage": 93.77, "elapsed_time": "3:12:29", "remaining_time": "0:12:47", "throughput": 19956.76, "total_tokens": 230489792} +{"current_steps": 73245, "total_steps": 78105, "loss": 0.1257, "lr": 5.876458823957487e-08, "epoch": 4.688880353370463, "percentage": 93.78, "elapsed_time": "3:12:30", "remaining_time": "0:12:46", "throughput": 19956.93, "total_tokens": 230504832} +{"current_steps": 73250, "total_steps": 78105, "loss": 0.1214, "lr": 5.8644235120353665e-08, "epoch": 4.68920043531144, "percentage": 93.78, "elapsed_time": "3:12:30", "remaining_time": "0:12:45", "throughput": 19957.12, "total_tokens": 230520192} +{"current_steps": 73255, "total_steps": 78105, "loss": 0.1276, "lr": 5.852400390972318e-08, "epoch": 4.689520517252417, "percentage": 93.79, "elapsed_time": "3:12:31", "remaining_time": "0:12:44", "throughput": 19957.3, "total_tokens": 230535680} +{"current_steps": 73260, "total_steps": 78105, "loss": 0.1105, "lr": 5.84038946136864e-08, "epoch": 4.689840599193394, "percentage": 93.8, "elapsed_time": "3:12:32", "remaining_time": "0:12:43", "throughput": 19957.47, "total_tokens": 230550528} +{"current_steps": 73265, "total_steps": 78105, "loss": 0.1535, "lr": 5.828390723824184e-08, "epoch": 4.690160681134371, "percentage": 93.8, "elapsed_time": "3:12:32", "remaining_time": "0:12:43", "throughput": 19957.64, "total_tokens": 230565440} +{"current_steps": 73270, "total_steps": 78105, "loss": 0.1366, "lr": 5.816404178938001e-08, "epoch": 4.690480763075348, "percentage": 93.81, "elapsed_time": "3:12:33", "remaining_time": "0:12:42", "throughput": 19957.87, "total_tokens": 230582208} +{"current_steps": 73275, "total_steps": 78105, "loss": 0.1885, "lr": 5.804429827308694e-08, "epoch": 4.690800845016324, "percentage": 93.82, "elapsed_time": "3:12:34", "remaining_time": "0:12:41", "throughput": 19958.06, "total_tokens": 230597696} +{"current_steps": 73280, "total_steps": 78105, "loss": 0.0957, "lr": 5.792467669534202e-08, "epoch": 4.691120926957301, "percentage": 93.82, "elapsed_time": "3:12:34", "remaining_time": "0:12:40", "throughput": 19958.27, "total_tokens": 230613888} +{"current_steps": 73285, "total_steps": 78105, "loss": 0.2248, "lr": 5.7805177062118525e-08, "epoch": 4.691441008898278, "percentage": 93.83, "elapsed_time": "3:12:35", "remaining_time": "0:12:40", "throughput": 19958.46, "total_tokens": 230629632} +{"current_steps": 73290, "total_steps": 78105, "loss": 0.163, "lr": 5.768579937938335e-08, "epoch": 4.691761090839255, "percentage": 93.84, "elapsed_time": "3:12:36", "remaining_time": "0:12:39", "throughput": 19958.66, "total_tokens": 230645440} +{"current_steps": 73295, "total_steps": 78105, "loss": 0.1181, "lr": 5.756654365309783e-08, "epoch": 4.692081172780232, "percentage": 93.84, "elapsed_time": "3:12:36", "remaining_time": "0:12:38", "throughput": 19958.82, "total_tokens": 230660544} +{"current_steps": 73300, "total_steps": 78105, "loss": 0.16, "lr": 5.744740988921721e-08, "epoch": 4.692401254721209, "percentage": 93.85, "elapsed_time": "3:12:37", "remaining_time": "0:12:37", "throughput": 19959.05, "total_tokens": 230677312} +{"current_steps": 73305, "total_steps": 78105, "loss": 0.1196, "lr": 5.732839809368979e-08, "epoch": 4.692721336662186, "percentage": 93.85, "elapsed_time": "3:12:38", "remaining_time": "0:12:36", "throughput": 19959.25, "total_tokens": 230693312} +{"current_steps": 73310, "total_steps": 78105, "loss": 0.157, "lr": 5.7209508272459135e-08, "epoch": 4.693041418603162, "percentage": 93.86, "elapsed_time": "3:12:38", "remaining_time": "0:12:36", "throughput": 19959.45, "total_tokens": 230709632} +{"current_steps": 73315, "total_steps": 78105, "loss": 0.15, "lr": 5.7090740431461346e-08, "epoch": 4.693361500544139, "percentage": 93.87, "elapsed_time": "3:12:39", "remaining_time": "0:12:35", "throughput": 19959.65, "total_tokens": 230725632} +{"current_steps": 73320, "total_steps": 78105, "loss": 0.1249, "lr": 5.69720945766275e-08, "epoch": 4.693681582485116, "percentage": 93.87, "elapsed_time": "3:12:40", "remaining_time": "0:12:34", "throughput": 19959.85, "total_tokens": 230741632} +{"current_steps": 73325, "total_steps": 78105, "loss": 0.1326, "lr": 5.685357071388203e-08, "epoch": 4.694001664426093, "percentage": 93.88, "elapsed_time": "3:12:40", "remaining_time": "0:12:33", "throughput": 19960.02, "total_tokens": 230757184} +{"current_steps": 73330, "total_steps": 78105, "loss": 0.1176, "lr": 5.673516884914326e-08, "epoch": 4.69432174636707, "percentage": 93.89, "elapsed_time": "3:12:41", "remaining_time": "0:12:32", "throughput": 19960.16, "total_tokens": 230771520} +{"current_steps": 73335, "total_steps": 78105, "loss": 0.142, "lr": 5.6616888988323404e-08, "epoch": 4.694641828308047, "percentage": 93.89, "elapsed_time": "3:12:42", "remaining_time": "0:12:32", "throughput": 19960.34, "total_tokens": 230787136} +{"current_steps": 73340, "total_steps": 78105, "loss": 0.2031, "lr": 5.64987311373294e-08, "epoch": 4.694961910249024, "percentage": 93.9, "elapsed_time": "3:12:43", "remaining_time": "0:12:31", "throughput": 19960.61, "total_tokens": 230805184} +{"current_steps": 73345, "total_steps": 78105, "loss": 0.0862, "lr": 5.6380695302060696e-08, "epoch": 4.695281992190001, "percentage": 93.91, "elapsed_time": "3:12:43", "remaining_time": "0:12:30", "throughput": 19960.78, "total_tokens": 230820288} +{"current_steps": 73350, "total_steps": 78105, "loss": 0.1266, "lr": 5.626278148841174e-08, "epoch": 4.695602074130978, "percentage": 93.91, "elapsed_time": "3:12:44", "remaining_time": "0:12:29", "throughput": 19960.95, "total_tokens": 230835456} +{"current_steps": 73355, "total_steps": 78105, "loss": 0.1365, "lr": 5.614498970227061e-08, "epoch": 4.695922156071955, "percentage": 93.92, "elapsed_time": "3:12:45", "remaining_time": "0:12:28", "throughput": 19961.18, "total_tokens": 230852480} +{"current_steps": 73360, "total_steps": 78105, "loss": 0.1164, "lr": 5.6027319949519264e-08, "epoch": 4.696242238012931, "percentage": 93.92, "elapsed_time": "3:12:45", "remaining_time": "0:12:28", "throughput": 19961.4, "total_tokens": 230868992} +{"current_steps": 73365, "total_steps": 78105, "loss": 0.1405, "lr": 5.590977223603328e-08, "epoch": 4.696562319953908, "percentage": 93.93, "elapsed_time": "3:12:46", "remaining_time": "0:12:27", "throughput": 19961.63, "total_tokens": 230885760} +{"current_steps": 73370, "total_steps": 78105, "loss": 0.0959, "lr": 5.579234656768212e-08, "epoch": 4.696882401894885, "percentage": 93.94, "elapsed_time": "3:12:47", "remaining_time": "0:12:26", "throughput": 19961.88, "total_tokens": 230903232} +{"current_steps": 73375, "total_steps": 78105, "loss": 0.106, "lr": 5.567504295033027e-08, "epoch": 4.697202483835862, "percentage": 93.94, "elapsed_time": "3:12:47", "remaining_time": "0:12:25", "throughput": 19962.05, "total_tokens": 230918272} +{"current_steps": 73380, "total_steps": 78105, "loss": 0.1122, "lr": 5.555786138983443e-08, "epoch": 4.697522565776839, "percentage": 93.95, "elapsed_time": "3:12:48", "remaining_time": "0:12:24", "throughput": 19962.24, "total_tokens": 230934272} +{"current_steps": 73385, "total_steps": 78105, "loss": 0.1117, "lr": 5.5440801892046594e-08, "epoch": 4.697842647717816, "percentage": 93.96, "elapsed_time": "3:12:49", "remaining_time": "0:12:24", "throughput": 19962.44, "total_tokens": 230950144} +{"current_steps": 73390, "total_steps": 78105, "loss": 0.1378, "lr": 5.53238644628118e-08, "epoch": 4.698162729658793, "percentage": 93.96, "elapsed_time": "3:12:49", "remaining_time": "0:12:23", "throughput": 19962.61, "total_tokens": 230965184} +{"current_steps": 73395, "total_steps": 78105, "loss": 0.1683, "lr": 5.5207049107969257e-08, "epoch": 4.69848281159977, "percentage": 93.97, "elapsed_time": "3:12:50", "remaining_time": "0:12:22", "throughput": 19962.81, "total_tokens": 230981312} +{"current_steps": 73400, "total_steps": 78105, "loss": 0.1005, "lr": 5.509035583335237e-08, "epoch": 4.698802893540746, "percentage": 93.98, "elapsed_time": "3:12:51", "remaining_time": "0:12:21", "throughput": 19962.97, "total_tokens": 230996288} +{"current_steps": 73405, "total_steps": 78105, "loss": 0.1176, "lr": 5.497378464478787e-08, "epoch": 4.699122975481723, "percentage": 93.98, "elapsed_time": "3:12:51", "remaining_time": "0:12:20", "throughput": 19963.13, "total_tokens": 231011520} +{"current_steps": 73410, "total_steps": 78105, "loss": 0.1081, "lr": 5.485733554809719e-08, "epoch": 4.6994430574227, "percentage": 93.99, "elapsed_time": "3:12:52", "remaining_time": "0:12:20", "throughput": 19963.31, "total_tokens": 231026880} +{"current_steps": 73415, "total_steps": 78105, "loss": 0.147, "lr": 5.474100854909431e-08, "epoch": 4.699763139363677, "percentage": 94.0, "elapsed_time": "3:12:53", "remaining_time": "0:12:19", "throughput": 19963.52, "total_tokens": 231043072} +{"current_steps": 73420, "total_steps": 78105, "loss": 0.1295, "lr": 5.462480365358902e-08, "epoch": 4.700083221304654, "percentage": 94.0, "elapsed_time": "3:12:53", "remaining_time": "0:12:18", "throughput": 19963.68, "total_tokens": 231058048} +{"current_steps": 73425, "total_steps": 78105, "loss": 0.1301, "lr": 5.450872086738335e-08, "epoch": 4.700403303245631, "percentage": 94.01, "elapsed_time": "3:12:54", "remaining_time": "0:12:17", "throughput": 19963.85, "total_tokens": 231073344} +{"current_steps": 73430, "total_steps": 78105, "loss": 0.1716, "lr": 5.439276019627432e-08, "epoch": 4.700723385186608, "percentage": 94.01, "elapsed_time": "3:12:55", "remaining_time": "0:12:16", "throughput": 19964.03, "total_tokens": 231088704} +{"current_steps": 73435, "total_steps": 78105, "loss": 0.1269, "lr": 5.427692164605203e-08, "epoch": 4.701043467127585, "percentage": 94.02, "elapsed_time": "3:12:55", "remaining_time": "0:12:16", "throughput": 19964.2, "total_tokens": 231103744} +{"current_steps": 73440, "total_steps": 78105, "loss": 0.1007, "lr": 5.416120522250101e-08, "epoch": 4.701363549068562, "percentage": 94.03, "elapsed_time": "3:12:56", "remaining_time": "0:12:15", "throughput": 19964.39, "total_tokens": 231119360} +{"current_steps": 73445, "total_steps": 78105, "loss": 0.1522, "lr": 5.404561093139915e-08, "epoch": 4.701683631009539, "percentage": 94.03, "elapsed_time": "3:12:57", "remaining_time": "0:12:14", "throughput": 19964.55, "total_tokens": 231134848} +{"current_steps": 73450, "total_steps": 78105, "loss": 0.1559, "lr": 5.393013877851932e-08, "epoch": 4.702003712950515, "percentage": 94.04, "elapsed_time": "3:12:57", "remaining_time": "0:12:13", "throughput": 19964.76, "total_tokens": 231151040} +{"current_steps": 73455, "total_steps": 78105, "loss": 0.1347, "lr": 5.381478876962692e-08, "epoch": 4.702323794891492, "percentage": 94.05, "elapsed_time": "3:12:58", "remaining_time": "0:12:12", "throughput": 19964.92, "total_tokens": 231165824} +{"current_steps": 73460, "total_steps": 78105, "loss": 0.1306, "lr": 5.36995609104829e-08, "epoch": 4.702643876832469, "percentage": 94.05, "elapsed_time": "3:12:59", "remaining_time": "0:12:12", "throughput": 19965.06, "total_tokens": 231180608} +{"current_steps": 73465, "total_steps": 78105, "loss": 0.1666, "lr": 5.3584455206839855e-08, "epoch": 4.702963958773446, "percentage": 94.06, "elapsed_time": "3:12:59", "remaining_time": "0:12:11", "throughput": 19965.28, "total_tokens": 231196992} +{"current_steps": 73470, "total_steps": 78105, "loss": 0.1021, "lr": 5.346947166444655e-08, "epoch": 4.703284040714423, "percentage": 94.07, "elapsed_time": "3:13:00", "remaining_time": "0:12:10", "throughput": 19965.43, "total_tokens": 231211712} +{"current_steps": 73475, "total_steps": 78105, "loss": 0.1717, "lr": 5.335461028904421e-08, "epoch": 4.7036041226554, "percentage": 94.07, "elapsed_time": "3:13:01", "remaining_time": "0:12:09", "throughput": 19965.6, "total_tokens": 231226688} +{"current_steps": 73480, "total_steps": 78105, "loss": 0.1499, "lr": 5.3239871086368245e-08, "epoch": 4.703924204596377, "percentage": 94.08, "elapsed_time": "3:13:01", "remaining_time": "0:12:08", "throughput": 19965.78, "total_tokens": 231241856} +{"current_steps": 73485, "total_steps": 78105, "loss": 0.0853, "lr": 5.31252540621488e-08, "epoch": 4.704244286537354, "percentage": 94.08, "elapsed_time": "3:13:02", "remaining_time": "0:12:08", "throughput": 19966.0, "total_tokens": 231258560} +{"current_steps": 73490, "total_steps": 78105, "loss": 0.1042, "lr": 5.301075922210852e-08, "epoch": 4.70456436847833, "percentage": 94.09, "elapsed_time": "3:13:03", "remaining_time": "0:12:07", "throughput": 19966.18, "total_tokens": 231274240} +{"current_steps": 73495, "total_steps": 78105, "loss": 0.1418, "lr": 5.2896386571965605e-08, "epoch": 4.704884450419307, "percentage": 94.1, "elapsed_time": "3:13:03", "remaining_time": "0:12:06", "throughput": 19966.36, "total_tokens": 231290048} +{"current_steps": 73500, "total_steps": 78105, "loss": 0.1013, "lr": 5.278213611742994e-08, "epoch": 4.705204532360284, "percentage": 94.1, "elapsed_time": "3:13:04", "remaining_time": "0:12:05", "throughput": 19966.55, "total_tokens": 231306048} +{"current_steps": 73505, "total_steps": 78105, "loss": 0.1174, "lr": 5.266800786420778e-08, "epoch": 4.705524614301261, "percentage": 94.11, "elapsed_time": "3:13:05", "remaining_time": "0:12:05", "throughput": 19966.78, "total_tokens": 231322816} +{"current_steps": 73510, "total_steps": 78105, "loss": 0.137, "lr": 5.2554001817997636e-08, "epoch": 4.705844696242238, "percentage": 94.12, "elapsed_time": "3:13:06", "remaining_time": "0:12:04", "throughput": 19966.92, "total_tokens": 231337280} +{"current_steps": 73515, "total_steps": 78105, "loss": 0.1297, "lr": 5.2440117984492166e-08, "epoch": 4.706164778183215, "percentage": 94.12, "elapsed_time": "3:13:06", "remaining_time": "0:12:03", "throughput": 19967.1, "total_tokens": 231352960} +{"current_steps": 73520, "total_steps": 78105, "loss": 0.1608, "lr": 5.23263563693785e-08, "epoch": 4.706484860124192, "percentage": 94.13, "elapsed_time": "3:13:07", "remaining_time": "0:12:02", "throughput": 19967.27, "total_tokens": 231368064} +{"current_steps": 73525, "total_steps": 78105, "loss": 0.1151, "lr": 5.22127169783368e-08, "epoch": 4.706804942065169, "percentage": 94.14, "elapsed_time": "3:13:08", "remaining_time": "0:12:01", "throughput": 19967.49, "total_tokens": 231384832} +{"current_steps": 73530, "total_steps": 78105, "loss": 0.1378, "lr": 5.209919981704198e-08, "epoch": 4.707125024006146, "percentage": 94.14, "elapsed_time": "3:13:08", "remaining_time": "0:12:01", "throughput": 19967.7, "total_tokens": 231401088} +{"current_steps": 73535, "total_steps": 78105, "loss": 0.105, "lr": 5.198580489116284e-08, "epoch": 4.707445105947123, "percentage": 94.15, "elapsed_time": "3:13:09", "remaining_time": "0:12:00", "throughput": 19967.88, "total_tokens": 231416896} +{"current_steps": 73540, "total_steps": 78105, "loss": 0.0956, "lr": 5.1872532206361234e-08, "epoch": 4.707765187888099, "percentage": 94.16, "elapsed_time": "3:13:10", "remaining_time": "0:11:59", "throughput": 19968.07, "total_tokens": 231432320} +{"current_steps": 73545, "total_steps": 78105, "loss": 0.1142, "lr": 5.175938176829348e-08, "epoch": 4.708085269829076, "percentage": 94.16, "elapsed_time": "3:13:10", "remaining_time": "0:11:58", "throughput": 19968.32, "total_tokens": 231449600} +{"current_steps": 73550, "total_steps": 78105, "loss": 0.1118, "lr": 5.1646353582609777e-08, "epoch": 4.708405351770053, "percentage": 94.17, "elapsed_time": "3:13:11", "remaining_time": "0:11:57", "throughput": 19968.51, "total_tokens": 231465536} +{"current_steps": 73555, "total_steps": 78105, "loss": 0.1166, "lr": 5.153344765495421e-08, "epoch": 4.70872543371103, "percentage": 94.17, "elapsed_time": "3:13:12", "remaining_time": "0:11:57", "throughput": 19968.69, "total_tokens": 231480704} +{"current_steps": 73560, "total_steps": 78105, "loss": 0.1093, "lr": 5.1420663990964514e-08, "epoch": 4.709045515652007, "percentage": 94.18, "elapsed_time": "3:13:12", "remaining_time": "0:11:56", "throughput": 19968.86, "total_tokens": 231496256} +{"current_steps": 73565, "total_steps": 78105, "loss": 0.1134, "lr": 5.1308002596272555e-08, "epoch": 4.709365597592984, "percentage": 94.19, "elapsed_time": "3:13:13", "remaining_time": "0:11:55", "throughput": 19969.05, "total_tokens": 231512128} +{"current_steps": 73570, "total_steps": 78105, "loss": 0.147, "lr": 5.119546347650467e-08, "epoch": 4.709685679533961, "percentage": 94.19, "elapsed_time": "3:13:14", "remaining_time": "0:11:54", "throughput": 19969.24, "total_tokens": 231527872} +{"current_steps": 73575, "total_steps": 78105, "loss": 0.1331, "lr": 5.10830466372797e-08, "epoch": 4.710005761474937, "percentage": 94.2, "elapsed_time": "3:13:14", "remaining_time": "0:11:53", "throughput": 19969.43, "total_tokens": 231543424} +{"current_steps": 73580, "total_steps": 78105, "loss": 0.1523, "lr": 5.097075208421148e-08, "epoch": 4.710325843415914, "percentage": 94.21, "elapsed_time": "3:13:15", "remaining_time": "0:11:53", "throughput": 19969.61, "total_tokens": 231559040} +{"current_steps": 73585, "total_steps": 78105, "loss": 0.1265, "lr": 5.0858579822907206e-08, "epoch": 4.710645925356891, "percentage": 94.21, "elapsed_time": "3:13:16", "remaining_time": "0:11:52", "throughput": 19969.86, "total_tokens": 231576704} +{"current_steps": 73590, "total_steps": 78105, "loss": 0.1183, "lr": 5.07465298589685e-08, "epoch": 4.710966007297868, "percentage": 94.22, "elapsed_time": "3:13:16", "remaining_time": "0:11:51", "throughput": 19970.05, "total_tokens": 231592000} +{"current_steps": 73595, "total_steps": 78105, "loss": 0.1462, "lr": 5.06346021979906e-08, "epoch": 4.711286089238845, "percentage": 94.23, "elapsed_time": "3:13:17", "remaining_time": "0:11:50", "throughput": 19970.21, "total_tokens": 231607168} +{"current_steps": 73600, "total_steps": 78105, "loss": 0.1004, "lr": 5.0522796845561826e-08, "epoch": 4.711606171179822, "percentage": 94.23, "elapsed_time": "3:13:18", "remaining_time": "0:11:49", "throughput": 19970.42, "total_tokens": 231623424} +{"current_steps": 73605, "total_steps": 78105, "loss": 0.1133, "lr": 5.041111380726632e-08, "epoch": 4.711926253120799, "percentage": 94.24, "elapsed_time": "3:13:18", "remaining_time": "0:11:49", "throughput": 19970.59, "total_tokens": 231638592} +{"current_steps": 73610, "total_steps": 78105, "loss": 0.1748, "lr": 5.0299553088679896e-08, "epoch": 4.712246335061776, "percentage": 94.24, "elapsed_time": "3:13:19", "remaining_time": "0:11:48", "throughput": 19970.76, "total_tokens": 231653504} +{"current_steps": 73615, "total_steps": 78105, "loss": 0.1306, "lr": 5.018811469537394e-08, "epoch": 4.712566417002753, "percentage": 94.25, "elapsed_time": "3:13:20", "remaining_time": "0:11:47", "throughput": 19970.91, "total_tokens": 231668032} +{"current_steps": 73620, "total_steps": 78105, "loss": 0.1682, "lr": 5.007679863291315e-08, "epoch": 4.71288649894373, "percentage": 94.26, "elapsed_time": "3:13:20", "remaining_time": "0:11:46", "throughput": 19971.07, "total_tokens": 231683072} +{"current_steps": 73625, "total_steps": 78105, "loss": 0.1183, "lr": 4.9965604906855594e-08, "epoch": 4.713206580884706, "percentage": 94.26, "elapsed_time": "3:13:21", "remaining_time": "0:11:45", "throughput": 19971.23, "total_tokens": 231697792} +{"current_steps": 73630, "total_steps": 78105, "loss": 0.1674, "lr": 4.9854533522754045e-08, "epoch": 4.713526662825683, "percentage": 94.27, "elapsed_time": "3:13:22", "remaining_time": "0:11:45", "throughput": 19971.39, "total_tokens": 231713024} +{"current_steps": 73635, "total_steps": 78105, "loss": 0.1751, "lr": 4.974358448615463e-08, "epoch": 4.71384674476666, "percentage": 94.28, "elapsed_time": "3:13:22", "remaining_time": "0:11:44", "throughput": 19971.57, "total_tokens": 231728704} +{"current_steps": 73640, "total_steps": 78105, "loss": 0.1221, "lr": 4.963275780259791e-08, "epoch": 4.714166826707637, "percentage": 94.28, "elapsed_time": "3:13:23", "remaining_time": "0:11:43", "throughput": 19971.72, "total_tokens": 231743360} +{"current_steps": 73645, "total_steps": 78105, "loss": 0.1398, "lr": 4.952205347761751e-08, "epoch": 4.714486908648614, "percentage": 94.29, "elapsed_time": "3:13:24", "remaining_time": "0:11:42", "throughput": 19971.9, "total_tokens": 231758784} +{"current_steps": 73650, "total_steps": 78105, "loss": 0.1405, "lr": 4.9411471516741806e-08, "epoch": 4.714806990589591, "percentage": 94.3, "elapsed_time": "3:13:24", "remaining_time": "0:11:41", "throughput": 19972.07, "total_tokens": 231773888} +{"current_steps": 73655, "total_steps": 78105, "loss": 0.1416, "lr": 4.930101192549275e-08, "epoch": 4.715127072530568, "percentage": 94.3, "elapsed_time": "3:13:25", "remaining_time": "0:11:41", "throughput": 19972.23, "total_tokens": 231788800} +{"current_steps": 73660, "total_steps": 78105, "loss": 0.1014, "lr": 4.9190674709385655e-08, "epoch": 4.715447154471545, "percentage": 94.31, "elapsed_time": "3:13:26", "remaining_time": "0:11:40", "throughput": 19972.4, "total_tokens": 231804032} +{"current_steps": 73665, "total_steps": 78105, "loss": 0.1118, "lr": 4.908045987393056e-08, "epoch": 4.715767236412521, "percentage": 94.32, "elapsed_time": "3:13:26", "remaining_time": "0:11:39", "throughput": 19972.55, "total_tokens": 231818752} +{"current_steps": 73670, "total_steps": 78105, "loss": 0.1301, "lr": 4.8970367424631125e-08, "epoch": 4.716087318353498, "percentage": 94.32, "elapsed_time": "3:13:27", "remaining_time": "0:11:38", "throughput": 19972.7, "total_tokens": 231833664} +{"current_steps": 73675, "total_steps": 78105, "loss": 0.0802, "lr": 4.886039736698406e-08, "epoch": 4.716407400294475, "percentage": 94.33, "elapsed_time": "3:13:28", "remaining_time": "0:11:37", "throughput": 19972.93, "total_tokens": 231850496} +{"current_steps": 73680, "total_steps": 78105, "loss": 0.1247, "lr": 4.875054970648191e-08, "epoch": 4.716727482235452, "percentage": 94.33, "elapsed_time": "3:13:28", "remaining_time": "0:11:37", "throughput": 19973.11, "total_tokens": 231866176} +{"current_steps": 73685, "total_steps": 78105, "loss": 0.1106, "lr": 4.864082444860862e-08, "epoch": 4.717047564176429, "percentage": 94.34, "elapsed_time": "3:13:29", "remaining_time": "0:11:36", "throughput": 19973.31, "total_tokens": 231882176} +{"current_steps": 73690, "total_steps": 78105, "loss": 0.127, "lr": 4.8531221598844545e-08, "epoch": 4.717367646117406, "percentage": 94.35, "elapsed_time": "3:13:30", "remaining_time": "0:11:35", "throughput": 19973.5, "total_tokens": 231897984} +{"current_steps": 73695, "total_steps": 78105, "loss": 0.1445, "lr": 4.84217411626614e-08, "epoch": 4.717687728058383, "percentage": 94.35, "elapsed_time": "3:13:30", "remaining_time": "0:11:34", "throughput": 19973.72, "total_tokens": 231914304} +{"current_steps": 73700, "total_steps": 78105, "loss": 0.1687, "lr": 4.8312383145526774e-08, "epoch": 4.71800780999936, "percentage": 94.36, "elapsed_time": "3:13:31", "remaining_time": "0:11:34", "throughput": 19973.89, "total_tokens": 231929600} +{"current_steps": 73705, "total_steps": 78105, "loss": 0.1082, "lr": 4.8203147552901566e-08, "epoch": 4.718327891940337, "percentage": 94.37, "elapsed_time": "3:13:32", "remaining_time": "0:11:33", "throughput": 19974.05, "total_tokens": 231944576} +{"current_steps": 73710, "total_steps": 78105, "loss": 0.1516, "lr": 4.809403439024002e-08, "epoch": 4.718647973881314, "percentage": 94.37, "elapsed_time": "3:13:33", "remaining_time": "0:11:32", "throughput": 19974.27, "total_tokens": 231961408} +{"current_steps": 73715, "total_steps": 78105, "loss": 0.1671, "lr": 4.7985043662991395e-08, "epoch": 4.71896805582229, "percentage": 94.38, "elapsed_time": "3:13:33", "remaining_time": "0:11:31", "throughput": 19974.44, "total_tokens": 231976448} +{"current_steps": 73720, "total_steps": 78105, "loss": 0.1444, "lr": 4.787617537659689e-08, "epoch": 4.719288137763267, "percentage": 94.39, "elapsed_time": "3:13:34", "remaining_time": "0:11:30", "throughput": 19974.6, "total_tokens": 231990912} +{"current_steps": 73725, "total_steps": 78105, "loss": 0.1042, "lr": 4.776742953649438e-08, "epoch": 4.719608219704244, "percentage": 94.39, "elapsed_time": "3:13:35", "remaining_time": "0:11:30", "throughput": 19974.89, "total_tokens": 232009024} +{"current_steps": 73730, "total_steps": 78105, "loss": 0.1367, "lr": 4.765880614811258e-08, "epoch": 4.719928301645221, "percentage": 94.4, "elapsed_time": "3:13:35", "remaining_time": "0:11:29", "throughput": 19975.05, "total_tokens": 232024192} +{"current_steps": 73735, "total_steps": 78105, "loss": 0.1457, "lr": 4.7550305216876315e-08, "epoch": 4.720248383586198, "percentage": 94.4, "elapsed_time": "3:13:36", "remaining_time": "0:11:28", "throughput": 19975.23, "total_tokens": 232039424} +{"current_steps": 73740, "total_steps": 78105, "loss": 0.1621, "lr": 4.744192674820347e-08, "epoch": 4.720568465527175, "percentage": 94.41, "elapsed_time": "3:13:37", "remaining_time": "0:11:27", "throughput": 19975.41, "total_tokens": 232054656} +{"current_steps": 73745, "total_steps": 78105, "loss": 0.1297, "lr": 4.733367074750611e-08, "epoch": 4.720888547468152, "percentage": 94.42, "elapsed_time": "3:13:37", "remaining_time": "0:11:26", "throughput": 19975.64, "total_tokens": 232071680} +{"current_steps": 73750, "total_steps": 78105, "loss": 0.1854, "lr": 4.722553722018963e-08, "epoch": 4.721208629409129, "percentage": 94.42, "elapsed_time": "3:13:38", "remaining_time": "0:11:26", "throughput": 19975.81, "total_tokens": 232087168} +{"current_steps": 73755, "total_steps": 78105, "loss": 0.1434, "lr": 4.711752617165388e-08, "epoch": 4.721528711350105, "percentage": 94.43, "elapsed_time": "3:13:39", "remaining_time": "0:11:25", "throughput": 19976.02, "total_tokens": 232103040} +{"current_steps": 73760, "total_steps": 78105, "loss": 0.1494, "lr": 4.700963760729177e-08, "epoch": 4.721848793291082, "percentage": 94.44, "elapsed_time": "3:13:39", "remaining_time": "0:11:24", "throughput": 19976.18, "total_tokens": 232117888} +{"current_steps": 73765, "total_steps": 78105, "loss": 0.1116, "lr": 4.690187153249176e-08, "epoch": 4.722168875232059, "percentage": 94.44, "elapsed_time": "3:13:40", "remaining_time": "0:11:23", "throughput": 19976.37, "total_tokens": 232133248} +{"current_steps": 73770, "total_steps": 78105, "loss": 0.1319, "lr": 4.679422795263428e-08, "epoch": 4.722488957173036, "percentage": 94.45, "elapsed_time": "3:13:41", "remaining_time": "0:11:22", "throughput": 19976.51, "total_tokens": 232147584} +{"current_steps": 73775, "total_steps": 78105, "loss": 0.1414, "lr": 4.668670687309501e-08, "epoch": 4.722809039114013, "percentage": 94.46, "elapsed_time": "3:13:41", "remaining_time": "0:11:22", "throughput": 19976.69, "total_tokens": 232163008} +{"current_steps": 73780, "total_steps": 78105, "loss": 0.113, "lr": 4.657930829924273e-08, "epoch": 4.72312912105499, "percentage": 94.46, "elapsed_time": "3:13:42", "remaining_time": "0:11:21", "throughput": 19976.91, "total_tokens": 232179328} +{"current_steps": 73785, "total_steps": 78105, "loss": 0.1288, "lr": 4.6472032236440364e-08, "epoch": 4.723449202995967, "percentage": 94.47, "elapsed_time": "3:13:43", "remaining_time": "0:11:20", "throughput": 19977.06, "total_tokens": 232194496} +{"current_steps": 73790, "total_steps": 78105, "loss": 0.109, "lr": 4.636487869004475e-08, "epoch": 4.723769284936944, "percentage": 94.48, "elapsed_time": "3:13:43", "remaining_time": "0:11:19", "throughput": 19977.23, "total_tokens": 232209280} +{"current_steps": 73795, "total_steps": 78105, "loss": 0.1194, "lr": 4.625784766540631e-08, "epoch": 4.724089366877921, "percentage": 94.48, "elapsed_time": "3:13:44", "remaining_time": "0:11:18", "throughput": 19977.43, "total_tokens": 232225536} +{"current_steps": 73800, "total_steps": 78105, "loss": 0.1079, "lr": 4.615093916787022e-08, "epoch": 4.724409448818898, "percentage": 94.49, "elapsed_time": "3:13:45", "remaining_time": "0:11:18", "throughput": 19977.65, "total_tokens": 232242176} +{"current_steps": 73805, "total_steps": 78105, "loss": 0.1189, "lr": 4.6044153202774166e-08, "epoch": 4.724729530759874, "percentage": 94.49, "elapsed_time": "3:13:45", "remaining_time": "0:11:17", "throughput": 19977.9, "total_tokens": 232259712} +{"current_steps": 73810, "total_steps": 78105, "loss": 0.1489, "lr": 4.5937489775451374e-08, "epoch": 4.725049612700851, "percentage": 94.5, "elapsed_time": "3:13:46", "remaining_time": "0:11:16", "throughput": 19978.12, "total_tokens": 232276480} +{"current_steps": 73815, "total_steps": 78105, "loss": 0.1006, "lr": 4.58309488912273e-08, "epoch": 4.725369694641828, "percentage": 94.51, "elapsed_time": "3:13:47", "remaining_time": "0:11:15", "throughput": 19978.31, "total_tokens": 232292224} +{"current_steps": 73820, "total_steps": 78105, "loss": 0.1326, "lr": 4.5724530555422416e-08, "epoch": 4.725689776582805, "percentage": 94.51, "elapsed_time": "3:13:47", "remaining_time": "0:11:14", "throughput": 19978.48, "total_tokens": 232307392} +{"current_steps": 73825, "total_steps": 78105, "loss": 0.1721, "lr": 4.561823477335081e-08, "epoch": 4.726009858523782, "percentage": 94.52, "elapsed_time": "3:13:48", "remaining_time": "0:11:14", "throughput": 19978.65, "total_tokens": 232322752} +{"current_steps": 73830, "total_steps": 78105, "loss": 0.0902, "lr": 4.55120615503199e-08, "epoch": 4.726329940464759, "percentage": 94.53, "elapsed_time": "3:13:49", "remaining_time": "0:11:13", "throughput": 19978.82, "total_tokens": 232338304} +{"current_steps": 73835, "total_steps": 78105, "loss": 0.1177, "lr": 4.5406010891632104e-08, "epoch": 4.726650022405736, "percentage": 94.53, "elapsed_time": "3:13:49", "remaining_time": "0:11:12", "throughput": 19979.02, "total_tokens": 232354432} +{"current_steps": 73840, "total_steps": 78105, "loss": 0.141, "lr": 4.530008280258208e-08, "epoch": 4.726970104346712, "percentage": 94.54, "elapsed_time": "3:13:50", "remaining_time": "0:11:11", "throughput": 19979.19, "total_tokens": 232369920} +{"current_steps": 73845, "total_steps": 78105, "loss": 0.1157, "lr": 4.519427728846004e-08, "epoch": 4.727290186287689, "percentage": 94.55, "elapsed_time": "3:13:51", "remaining_time": "0:11:10", "throughput": 19979.36, "total_tokens": 232384896} +{"current_steps": 73850, "total_steps": 78105, "loss": 0.1335, "lr": 4.508859435454926e-08, "epoch": 4.727610268228666, "percentage": 94.55, "elapsed_time": "3:13:51", "remaining_time": "0:11:10", "throughput": 19979.52, "total_tokens": 232400064} +{"current_steps": 73855, "total_steps": 78105, "loss": 0.1583, "lr": 4.498303400612691e-08, "epoch": 4.727930350169643, "percentage": 94.56, "elapsed_time": "3:13:52", "remaining_time": "0:11:09", "throughput": 19979.71, "total_tokens": 232415808} +{"current_steps": 73860, "total_steps": 78105, "loss": 0.1177, "lr": 4.487759624846405e-08, "epoch": 4.72825043211062, "percentage": 94.57, "elapsed_time": "3:13:53", "remaining_time": "0:11:08", "throughput": 19979.88, "total_tokens": 232431168} +{"current_steps": 73865, "total_steps": 78105, "loss": 0.1467, "lr": 4.4772281086825905e-08, "epoch": 4.728570514051597, "percentage": 94.57, "elapsed_time": "3:13:53", "remaining_time": "0:11:07", "throughput": 19980.07, "total_tokens": 232447232} +{"current_steps": 73870, "total_steps": 78105, "loss": 0.1056, "lr": 4.466708852647106e-08, "epoch": 4.728890595992574, "percentage": 94.58, "elapsed_time": "3:13:54", "remaining_time": "0:11:07", "throughput": 19980.24, "total_tokens": 232462464} +{"current_steps": 73875, "total_steps": 78105, "loss": 0.2092, "lr": 4.4562018572652524e-08, "epoch": 4.729210677933551, "percentage": 94.58, "elapsed_time": "3:13:55", "remaining_time": "0:11:06", "throughput": 19980.53, "total_tokens": 232481600} +{"current_steps": 73880, "total_steps": 78105, "loss": 0.1094, "lr": 4.4457071230616656e-08, "epoch": 4.729530759874528, "percentage": 94.59, "elapsed_time": "3:13:56", "remaining_time": "0:11:05", "throughput": 19980.78, "total_tokens": 232498752} +{"current_steps": 73885, "total_steps": 78105, "loss": 0.1342, "lr": 4.435224650560427e-08, "epoch": 4.729850841815505, "percentage": 94.6, "elapsed_time": "3:13:56", "remaining_time": "0:11:04", "throughput": 19980.97, "total_tokens": 232514496} +{"current_steps": 73890, "total_steps": 78105, "loss": 0.122, "lr": 4.424754440285006e-08, "epoch": 4.730170923756481, "percentage": 94.6, "elapsed_time": "3:13:57", "remaining_time": "0:11:03", "throughput": 19981.14, "total_tokens": 232529664} +{"current_steps": 73895, "total_steps": 78105, "loss": 0.1481, "lr": 4.414296492758152e-08, "epoch": 4.730491005697458, "percentage": 94.61, "elapsed_time": "3:13:58", "remaining_time": "0:11:03", "throughput": 19981.34, "total_tokens": 232546176} +{"current_steps": 73900, "total_steps": 78105, "loss": 0.1273, "lr": 4.403850808502141e-08, "epoch": 4.730811087638435, "percentage": 94.62, "elapsed_time": "3:13:58", "remaining_time": "0:11:02", "throughput": 19981.52, "total_tokens": 232561280} +{"current_steps": 73905, "total_steps": 78105, "loss": 0.1615, "lr": 4.393417388038529e-08, "epoch": 4.731131169579412, "percentage": 94.62, "elapsed_time": "3:13:59", "remaining_time": "0:11:01", "throughput": 19981.74, "total_tokens": 232577792} +{"current_steps": 73910, "total_steps": 78105, "loss": 0.0995, "lr": 4.382996231888398e-08, "epoch": 4.731451251520389, "percentage": 94.63, "elapsed_time": "3:14:00", "remaining_time": "0:11:00", "throughput": 19981.94, "total_tokens": 232593728} +{"current_steps": 73915, "total_steps": 78105, "loss": 0.1221, "lr": 4.372587340571999e-08, "epoch": 4.731771333461366, "percentage": 94.64, "elapsed_time": "3:14:00", "remaining_time": "0:10:59", "throughput": 19982.11, "total_tokens": 232609024} +{"current_steps": 73920, "total_steps": 78105, "loss": 0.1173, "lr": 4.362190714609221e-08, "epoch": 4.732091415402343, "percentage": 94.64, "elapsed_time": "3:14:01", "remaining_time": "0:10:59", "throughput": 19982.27, "total_tokens": 232624064} +{"current_steps": 73925, "total_steps": 78105, "loss": 0.109, "lr": 4.3518063545190947e-08, "epoch": 4.73241149734332, "percentage": 94.65, "elapsed_time": "3:14:02", "remaining_time": "0:10:58", "throughput": 19982.5, "total_tokens": 232641024} +{"current_steps": 73930, "total_steps": 78105, "loss": 0.1378, "lr": 4.3414342608202595e-08, "epoch": 4.732731579284296, "percentage": 94.65, "elapsed_time": "3:14:02", "remaining_time": "0:10:57", "throughput": 19982.69, "total_tokens": 232656768} +{"current_steps": 73935, "total_steps": 78105, "loss": 0.1154, "lr": 4.3310744340306075e-08, "epoch": 4.733051661225273, "percentage": 94.66, "elapsed_time": "3:14:03", "remaining_time": "0:10:56", "throughput": 19982.87, "total_tokens": 232672576} +{"current_steps": 73940, "total_steps": 78105, "loss": 0.1172, "lr": 4.320726874667447e-08, "epoch": 4.73337174316625, "percentage": 94.67, "elapsed_time": "3:14:04", "remaining_time": "0:10:55", "throughput": 19983.05, "total_tokens": 232687872} +{"current_steps": 73945, "total_steps": 78105, "loss": 0.1152, "lr": 4.310391583247503e-08, "epoch": 4.733691825107227, "percentage": 94.67, "elapsed_time": "3:14:04", "remaining_time": "0:10:55", "throughput": 19983.25, "total_tokens": 232704000} +{"current_steps": 73950, "total_steps": 78105, "loss": 0.1181, "lr": 4.300068560286835e-08, "epoch": 4.734011907048204, "percentage": 94.68, "elapsed_time": "3:14:05", "remaining_time": "0:10:54", "throughput": 19983.43, "total_tokens": 232719552} +{"current_steps": 73955, "total_steps": 78105, "loss": 0.0953, "lr": 4.289757806300948e-08, "epoch": 4.734331988989181, "percentage": 94.69, "elapsed_time": "3:14:06", "remaining_time": "0:10:53", "throughput": 19983.6, "total_tokens": 232735040} +{"current_steps": 73960, "total_steps": 78105, "loss": 0.1417, "lr": 4.27945932180468e-08, "epoch": 4.734652070930158, "percentage": 94.69, "elapsed_time": "3:14:07", "remaining_time": "0:10:52", "throughput": 19983.92, "total_tokens": 232754560} +{"current_steps": 73965, "total_steps": 78105, "loss": 0.1613, "lr": 4.269173107312341e-08, "epoch": 4.734972152871135, "percentage": 94.7, "elapsed_time": "3:14:07", "remaining_time": "0:10:51", "throughput": 19984.12, "total_tokens": 232770688} +{"current_steps": 73970, "total_steps": 78105, "loss": 0.138, "lr": 4.258899163337521e-08, "epoch": 4.735292234812112, "percentage": 94.71, "elapsed_time": "3:14:08", "remaining_time": "0:10:51", "throughput": 19984.29, "total_tokens": 232786048} +{"current_steps": 73975, "total_steps": 78105, "loss": 0.1359, "lr": 4.248637490393226e-08, "epoch": 4.735612316753089, "percentage": 94.71, "elapsed_time": "3:14:09", "remaining_time": "0:10:50", "throughput": 19984.45, "total_tokens": 232801408} +{"current_steps": 73980, "total_steps": 78105, "loss": 0.1005, "lr": 4.238388088991935e-08, "epoch": 4.735932398694065, "percentage": 94.72, "elapsed_time": "3:14:09", "remaining_time": "0:10:49", "throughput": 19984.76, "total_tokens": 232821056} +{"current_steps": 73985, "total_steps": 78105, "loss": 0.1143, "lr": 4.228150959645405e-08, "epoch": 4.736252480635042, "percentage": 94.73, "elapsed_time": "3:14:10", "remaining_time": "0:10:48", "throughput": 19984.98, "total_tokens": 232837504} +{"current_steps": 73990, "total_steps": 78105, "loss": 0.1471, "lr": 4.2179261028648385e-08, "epoch": 4.736572562576019, "percentage": 94.73, "elapsed_time": "3:14:11", "remaining_time": "0:10:47", "throughput": 19985.12, "total_tokens": 232852032} +{"current_steps": 73995, "total_steps": 78105, "loss": 0.0952, "lr": 4.2077135191608e-08, "epoch": 4.736892644516996, "percentage": 94.74, "elapsed_time": "3:14:11", "remaining_time": "0:10:47", "throughput": 19985.31, "total_tokens": 232867904} +{"current_steps": 74000, "total_steps": 78105, "loss": 0.1332, "lr": 4.197513209043269e-08, "epoch": 4.737212726457973, "percentage": 94.74, "elapsed_time": "3:14:12", "remaining_time": "0:10:46", "throughput": 19985.53, "total_tokens": 232884288} +{"current_steps": 74005, "total_steps": 78105, "loss": 0.1478, "lr": 4.187325173021617e-08, "epoch": 4.73753280839895, "percentage": 94.75, "elapsed_time": "3:14:13", "remaining_time": "0:10:45", "throughput": 19985.72, "total_tokens": 232900416} +{"current_steps": 74010, "total_steps": 78105, "loss": 0.1663, "lr": 4.17714941160452e-08, "epoch": 4.737852890339927, "percentage": 94.76, "elapsed_time": "3:14:14", "remaining_time": "0:10:44", "throughput": 19985.94, "total_tokens": 232916928} +{"current_steps": 74015, "total_steps": 78105, "loss": 0.1142, "lr": 4.166985925300154e-08, "epoch": 4.738172972280904, "percentage": 94.76, "elapsed_time": "3:14:14", "remaining_time": "0:10:44", "throughput": 19986.23, "total_tokens": 232935744} +{"current_steps": 74020, "total_steps": 78105, "loss": 0.1247, "lr": 4.1568347146160035e-08, "epoch": 4.73849305422188, "percentage": 94.77, "elapsed_time": "3:14:15", "remaining_time": "0:10:43", "throughput": 19986.39, "total_tokens": 232950528} +{"current_steps": 74025, "total_steps": 78105, "loss": 0.1167, "lr": 4.146695780058968e-08, "epoch": 4.738813136162857, "percentage": 94.78, "elapsed_time": "3:14:16", "remaining_time": "0:10:42", "throughput": 19986.61, "total_tokens": 232966848} +{"current_steps": 74030, "total_steps": 78105, "loss": 0.0911, "lr": 4.136569122135392e-08, "epoch": 4.739133218103834, "percentage": 94.78, "elapsed_time": "3:14:16", "remaining_time": "0:10:41", "throughput": 19986.81, "total_tokens": 232983040} +{"current_steps": 74035, "total_steps": 78105, "loss": 0.1299, "lr": 4.126454741350844e-08, "epoch": 4.739453300044811, "percentage": 94.79, "elapsed_time": "3:14:17", "remaining_time": "0:10:40", "throughput": 19987.04, "total_tokens": 232999808} +{"current_steps": 74040, "total_steps": 78105, "loss": 0.1338, "lr": 4.116352638210447e-08, "epoch": 4.739773381985788, "percentage": 94.8, "elapsed_time": "3:14:18", "remaining_time": "0:10:40", "throughput": 19987.24, "total_tokens": 233016320} +{"current_steps": 74045, "total_steps": 78105, "loss": 0.1548, "lr": 4.106262813218659e-08, "epoch": 4.740093463926765, "percentage": 94.8, "elapsed_time": "3:14:18", "remaining_time": "0:10:39", "throughput": 19987.47, "total_tokens": 233033216} +{"current_steps": 74050, "total_steps": 78105, "loss": 0.1068, "lr": 4.096185266879271e-08, "epoch": 4.740413545867742, "percentage": 94.81, "elapsed_time": "3:14:19", "remaining_time": "0:10:38", "throughput": 19987.67, "total_tokens": 233049280} +{"current_steps": 74055, "total_steps": 78105, "loss": 0.1205, "lr": 4.086119999695548e-08, "epoch": 4.740733627808719, "percentage": 94.81, "elapsed_time": "3:14:20", "remaining_time": "0:10:37", "throughput": 19987.84, "total_tokens": 233064384} +{"current_steps": 74060, "total_steps": 78105, "loss": 0.1341, "lr": 4.076067012170032e-08, "epoch": 4.741053709749696, "percentage": 94.82, "elapsed_time": "3:14:20", "remaining_time": "0:10:36", "throughput": 19987.99, "total_tokens": 233079168} +{"current_steps": 74065, "total_steps": 78105, "loss": 0.1492, "lr": 4.066026304804821e-08, "epoch": 4.741373791690673, "percentage": 94.83, "elapsed_time": "3:14:21", "remaining_time": "0:10:36", "throughput": 19988.2, "total_tokens": 233095296} +{"current_steps": 74070, "total_steps": 78105, "loss": 0.0897, "lr": 4.055997878101181e-08, "epoch": 4.741693873631649, "percentage": 94.83, "elapsed_time": "3:14:22", "remaining_time": "0:10:35", "throughput": 19987.99, "total_tokens": 233111552} +{"current_steps": 74075, "total_steps": 78105, "loss": 0.1233, "lr": 4.045981732559934e-08, "epoch": 4.742013955572626, "percentage": 94.84, "elapsed_time": "3:14:23", "remaining_time": "0:10:34", "throughput": 19988.19, "total_tokens": 233127488} +{"current_steps": 74080, "total_steps": 78105, "loss": 0.1122, "lr": 4.035977868681262e-08, "epoch": 4.742334037513603, "percentage": 94.85, "elapsed_time": "3:14:23", "remaining_time": "0:10:33", "throughput": 19988.41, "total_tokens": 233143872} +{"current_steps": 74085, "total_steps": 78105, "loss": 0.1337, "lr": 4.0259862869646557e-08, "epoch": 4.74265411945458, "percentage": 94.85, "elapsed_time": "3:14:24", "remaining_time": "0:10:32", "throughput": 19988.58, "total_tokens": 233159360} +{"current_steps": 74090, "total_steps": 78105, "loss": 0.0919, "lr": 4.016006987909077e-08, "epoch": 4.742974201395557, "percentage": 94.86, "elapsed_time": "3:14:25", "remaining_time": "0:10:32", "throughput": 19988.76, "total_tokens": 233174848} +{"current_steps": 74095, "total_steps": 78105, "loss": 0.1226, "lr": 4.006039972012793e-08, "epoch": 4.743294283336534, "percentage": 94.87, "elapsed_time": "3:14:25", "remaining_time": "0:10:31", "throughput": 19988.97, "total_tokens": 233191296} +{"current_steps": 74100, "total_steps": 78105, "loss": 0.1095, "lr": 3.996085239773573e-08, "epoch": 4.743614365277511, "percentage": 94.87, "elapsed_time": "3:14:26", "remaining_time": "0:10:30", "throughput": 19989.17, "total_tokens": 233207360} +{"current_steps": 74105, "total_steps": 78105, "loss": 0.1206, "lr": 3.9861427916884633e-08, "epoch": 4.7439344472184874, "percentage": 94.88, "elapsed_time": "3:14:27", "remaining_time": "0:10:29", "throughput": 19989.32, "total_tokens": 233221888} +{"current_steps": 74110, "total_steps": 78105, "loss": 0.0942, "lr": 3.9762126282539006e-08, "epoch": 4.744254529159464, "percentage": 94.89, "elapsed_time": "3:14:27", "remaining_time": "0:10:28", "throughput": 19989.49, "total_tokens": 233236992} +{"current_steps": 74115, "total_steps": 78105, "loss": 0.1296, "lr": 3.966294749965821e-08, "epoch": 4.744574611100441, "percentage": 94.89, "elapsed_time": "3:14:28", "remaining_time": "0:10:28", "throughput": 19989.67, "total_tokens": 233252928} +{"current_steps": 74120, "total_steps": 78105, "loss": 0.0977, "lr": 3.95638915731944e-08, "epoch": 4.744894693041418, "percentage": 94.9, "elapsed_time": "3:14:29", "remaining_time": "0:10:27", "throughput": 19989.83, "total_tokens": 233267712} +{"current_steps": 74125, "total_steps": 78105, "loss": 0.0949, "lr": 3.946495850809418e-08, "epoch": 4.745214774982395, "percentage": 94.9, "elapsed_time": "3:14:29", "remaining_time": "0:10:26", "throughput": 19989.96, "total_tokens": 233281984} +{"current_steps": 74130, "total_steps": 78105, "loss": 0.0871, "lr": 3.9366148309297195e-08, "epoch": 4.745534856923372, "percentage": 94.91, "elapsed_time": "3:14:30", "remaining_time": "0:10:25", "throughput": 19990.14, "total_tokens": 233297472} +{"current_steps": 74135, "total_steps": 78105, "loss": 0.1606, "lr": 3.926746098173756e-08, "epoch": 4.745854938864349, "percentage": 94.92, "elapsed_time": "3:14:31", "remaining_time": "0:10:25", "throughput": 19990.33, "total_tokens": 233313280} +{"current_steps": 74140, "total_steps": 78105, "loss": 0.1081, "lr": 3.916889653034384e-08, "epoch": 4.746175020805326, "percentage": 94.92, "elapsed_time": "3:14:31", "remaining_time": "0:10:24", "throughput": 19990.52, "total_tokens": 233328896} +{"current_steps": 74145, "total_steps": 78105, "loss": 0.1058, "lr": 3.9070454960037086e-08, "epoch": 4.746495102746303, "percentage": 94.93, "elapsed_time": "3:14:32", "remaining_time": "0:10:23", "throughput": 19990.72, "total_tokens": 233345088} +{"current_steps": 74150, "total_steps": 78105, "loss": 0.0974, "lr": 3.897213627573365e-08, "epoch": 4.74681518468728, "percentage": 94.94, "elapsed_time": "3:14:33", "remaining_time": "0:10:22", "throughput": 19990.92, "total_tokens": 233361216} +{"current_steps": 74155, "total_steps": 78105, "loss": 0.1023, "lr": 3.8873940482342384e-08, "epoch": 4.7471352666282565, "percentage": 94.94, "elapsed_time": "3:14:34", "remaining_time": "0:10:21", "throughput": 19991.13, "total_tokens": 233377792} +{"current_steps": 74160, "total_steps": 78105, "loss": 0.1428, "lr": 3.8775867584767145e-08, "epoch": 4.7474553485692335, "percentage": 94.95, "elapsed_time": "3:14:34", "remaining_time": "0:10:21", "throughput": 19991.33, "total_tokens": 233393792} +{"current_steps": 74165, "total_steps": 78105, "loss": 0.1314, "lr": 3.8677917587904844e-08, "epoch": 4.7477754305102104, "percentage": 94.96, "elapsed_time": "3:14:35", "remaining_time": "0:10:20", "throughput": 19991.52, "total_tokens": 233409408} +{"current_steps": 74170, "total_steps": 78105, "loss": 0.2069, "lr": 3.858009049664685e-08, "epoch": 4.748095512451187, "percentage": 94.96, "elapsed_time": "3:14:36", "remaining_time": "0:10:19", "throughput": 19991.69, "total_tokens": 233424832} +{"current_steps": 74175, "total_steps": 78105, "loss": 0.1239, "lr": 3.8482386315878416e-08, "epoch": 4.748415594392164, "percentage": 94.97, "elapsed_time": "3:14:36", "remaining_time": "0:10:18", "throughput": 19991.86, "total_tokens": 233440064} +{"current_steps": 74180, "total_steps": 78105, "loss": 0.1283, "lr": 3.838480505047759e-08, "epoch": 4.748735676333141, "percentage": 94.97, "elapsed_time": "3:14:37", "remaining_time": "0:10:17", "throughput": 19992.04, "total_tokens": 233455872} +{"current_steps": 74185, "total_steps": 78105, "loss": 0.1478, "lr": 3.828734670531769e-08, "epoch": 4.749055758274118, "percentage": 94.98, "elapsed_time": "3:14:38", "remaining_time": "0:10:17", "throughput": 19992.22, "total_tokens": 233471168} +{"current_steps": 74190, "total_steps": 78105, "loss": 0.0953, "lr": 3.819001128526512e-08, "epoch": 4.749375840215095, "percentage": 94.99, "elapsed_time": "3:14:38", "remaining_time": "0:10:16", "throughput": 19992.38, "total_tokens": 233485824} +{"current_steps": 74195, "total_steps": 78105, "loss": 0.0894, "lr": 3.809279879518013e-08, "epoch": 4.7496959221560715, "percentage": 94.99, "elapsed_time": "3:14:39", "remaining_time": "0:10:15", "throughput": 19992.62, "total_tokens": 233502784} +{"current_steps": 74200, "total_steps": 78105, "loss": 0.1182, "lr": 3.7995709239917465e-08, "epoch": 4.7500160040970485, "percentage": 95.0, "elapsed_time": "3:14:40", "remaining_time": "0:10:14", "throughput": 19992.88, "total_tokens": 233520512} +{"current_steps": 74205, "total_steps": 78105, "loss": 0.1649, "lr": 3.7898742624324915e-08, "epoch": 4.7503360860380255, "percentage": 95.01, "elapsed_time": "3:14:40", "remaining_time": "0:10:13", "throughput": 19993.04, "total_tokens": 233535744} +{"current_steps": 74210, "total_steps": 78105, "loss": 0.1252, "lr": 3.780189895324443e-08, "epoch": 4.7506561679790025, "percentage": 95.01, "elapsed_time": "3:14:41", "remaining_time": "0:10:13", "throughput": 19993.26, "total_tokens": 233552064} +{"current_steps": 74214, "total_steps": 78105, "eval_loss": 0.6057931184768677, "epoch": 4.750912233531784, "percentage": 95.02, "elapsed_time": "3:15:33", "remaining_time": "0:10:15", "throughput": 19906.52, "total_tokens": 233564288} +{"current_steps": 74215, "total_steps": 78105, "loss": 0.1157, "lr": 3.7705178231511875e-08, "epoch": 4.7509762499199795, "percentage": 95.02, "elapsed_time": "3:16:07", "remaining_time": "0:10:16", "throughput": 19848.89, "total_tokens": 233567168} +{"current_steps": 74220, "total_steps": 78105, "loss": 0.1425, "lr": 3.760858046395699e-08, "epoch": 4.7512963318609565, "percentage": 95.03, "elapsed_time": "3:16:07", "remaining_time": "0:10:15", "throughput": 19849.1, "total_tokens": 233583552} +{"current_steps": 74225, "total_steps": 78105, "loss": 0.1248, "lr": 3.7512105655403974e-08, "epoch": 4.7516164138019334, "percentage": 95.03, "elapsed_time": "3:16:08", "remaining_time": "0:10:15", "throughput": 19849.29, "total_tokens": 233599168} +{"current_steps": 74230, "total_steps": 78105, "loss": 0.1434, "lr": 3.7415753810669253e-08, "epoch": 4.75193649574291, "percentage": 95.04, "elapsed_time": "3:16:09", "remaining_time": "0:10:14", "throughput": 19849.48, "total_tokens": 233614464} +{"current_steps": 74235, "total_steps": 78105, "loss": 0.1861, "lr": 3.731952493456509e-08, "epoch": 4.752256577683887, "percentage": 95.05, "elapsed_time": "3:16:10", "remaining_time": "0:10:13", "throughput": 19849.75, "total_tokens": 233632128} +{"current_steps": 74240, "total_steps": 78105, "loss": 0.1573, "lr": 3.722341903189597e-08, "epoch": 4.752576659624864, "percentage": 95.05, "elapsed_time": "3:16:10", "remaining_time": "0:10:12", "throughput": 19849.92, "total_tokens": 233647232} +{"current_steps": 74245, "total_steps": 78105, "loss": 0.1575, "lr": 3.712743610746111e-08, "epoch": 4.7528967415658405, "percentage": 95.06, "elapsed_time": "3:16:11", "remaining_time": "0:10:11", "throughput": 19850.11, "total_tokens": 233662720} +{"current_steps": 74250, "total_steps": 78105, "loss": 0.1193, "lr": 3.703157616605363e-08, "epoch": 4.7532168235068175, "percentage": 95.06, "elapsed_time": "3:16:12", "remaining_time": "0:10:11", "throughput": 19850.28, "total_tokens": 233677824} +{"current_steps": 74255, "total_steps": 78105, "loss": 0.1434, "lr": 3.693583921245997e-08, "epoch": 4.7535369054477945, "percentage": 95.07, "elapsed_time": "3:16:12", "remaining_time": "0:10:10", "throughput": 19850.48, "total_tokens": 233693568} +{"current_steps": 74260, "total_steps": 78105, "loss": 0.1683, "lr": 3.684022525146103e-08, "epoch": 4.7538569873887715, "percentage": 95.08, "elapsed_time": "3:16:13", "remaining_time": "0:10:09", "throughput": 19850.66, "total_tokens": 233708864} +{"current_steps": 74265, "total_steps": 78105, "loss": 0.1419, "lr": 3.6744734287830766e-08, "epoch": 4.7541770693297485, "percentage": 95.08, "elapsed_time": "3:16:14", "remaining_time": "0:10:08", "throughput": 19850.89, "total_tokens": 233725504} +{"current_steps": 74270, "total_steps": 78105, "loss": 0.1156, "lr": 3.664936632633814e-08, "epoch": 4.7544971512707255, "percentage": 95.09, "elapsed_time": "3:16:14", "remaining_time": "0:10:07", "throughput": 19851.05, "total_tokens": 233740480} +{"current_steps": 74275, "total_steps": 78105, "loss": 0.1669, "lr": 3.655412137174519e-08, "epoch": 4.7548172332117025, "percentage": 95.1, "elapsed_time": "3:16:15", "remaining_time": "0:10:07", "throughput": 19851.21, "total_tokens": 233754944} +{"current_steps": 74280, "total_steps": 78105, "loss": 0.1299, "lr": 3.645899942880754e-08, "epoch": 4.7551373151526795, "percentage": 95.1, "elapsed_time": "3:16:16", "remaining_time": "0:10:06", "throughput": 19851.38, "total_tokens": 233769984} +{"current_steps": 74285, "total_steps": 78105, "loss": 0.1703, "lr": 3.6364000502275285e-08, "epoch": 4.755457397093656, "percentage": 95.11, "elapsed_time": "3:16:16", "remaining_time": "0:10:05", "throughput": 19851.55, "total_tokens": 233784640} +{"current_steps": 74290, "total_steps": 78105, "loss": 0.1472, "lr": 3.626912459689214e-08, "epoch": 4.7557774790346325, "percentage": 95.12, "elapsed_time": "3:16:17", "remaining_time": "0:10:04", "throughput": 19851.82, "total_tokens": 233802240} +{"current_steps": 74295, "total_steps": 78105, "loss": 0.1296, "lr": 3.617437171739624e-08, "epoch": 4.7560975609756095, "percentage": 95.12, "elapsed_time": "3:16:18", "remaining_time": "0:10:04", "throughput": 19852.03, "total_tokens": 233818304} +{"current_steps": 74300, "total_steps": 78105, "loss": 0.1498, "lr": 3.607974186851826e-08, "epoch": 4.7564176429165865, "percentage": 95.13, "elapsed_time": "3:16:18", "remaining_time": "0:10:03", "throughput": 19852.23, "total_tokens": 233834368} +{"current_steps": 74305, "total_steps": 78105, "loss": 0.1469, "lr": 3.598523505498414e-08, "epoch": 4.7567377248575635, "percentage": 95.13, "elapsed_time": "3:16:19", "remaining_time": "0:10:02", "throughput": 19852.43, "total_tokens": 233850240} +{"current_steps": 74310, "total_steps": 78105, "loss": 0.125, "lr": 3.5890851281512594e-08, "epoch": 4.7570578067985405, "percentage": 95.14, "elapsed_time": "3:16:20", "remaining_time": "0:10:01", "throughput": 19852.58, "total_tokens": 233864832} +{"current_steps": 74315, "total_steps": 78105, "loss": 0.1653, "lr": 3.5796590552816815e-08, "epoch": 4.7573778887395175, "percentage": 95.15, "elapsed_time": "3:16:20", "remaining_time": "0:10:00", "throughput": 19852.73, "total_tokens": 233879488} +{"current_steps": 74320, "total_steps": 78105, "loss": 0.1048, "lr": 3.570245287360386e-08, "epoch": 4.7576979706804945, "percentage": 95.15, "elapsed_time": "3:16:21", "remaining_time": "0:10:00", "throughput": 19852.92, "total_tokens": 233895168} +{"current_steps": 74325, "total_steps": 78105, "loss": 0.1108, "lr": 3.560843824857441e-08, "epoch": 4.7580180526214715, "percentage": 95.16, "elapsed_time": "3:16:22", "remaining_time": "0:09:59", "throughput": 19853.12, "total_tokens": 233910784} +{"current_steps": 74330, "total_steps": 78105, "loss": 0.163, "lr": 3.5514546682423044e-08, "epoch": 4.7583381345624485, "percentage": 95.17, "elapsed_time": "3:16:22", "remaining_time": "0:09:58", "throughput": 19853.32, "total_tokens": 233926656} +{"current_steps": 74335, "total_steps": 78105, "loss": 0.192, "lr": 3.5420778179837964e-08, "epoch": 4.758658216503425, "percentage": 95.17, "elapsed_time": "3:16:23", "remaining_time": "0:09:57", "throughput": 19853.5, "total_tokens": 233941952} +{"current_steps": 74340, "total_steps": 78105, "loss": 0.1245, "lr": 3.5327132745501535e-08, "epoch": 4.758978298444402, "percentage": 95.18, "elapsed_time": "3:16:24", "remaining_time": "0:09:56", "throughput": 19853.7, "total_tokens": 233957952} +{"current_steps": 74345, "total_steps": 78105, "loss": 0.1383, "lr": 3.523361038409057e-08, "epoch": 4.759298380385379, "percentage": 95.19, "elapsed_time": "3:16:24", "remaining_time": "0:09:56", "throughput": 19853.91, "total_tokens": 233974400} +{"current_steps": 74350, "total_steps": 78105, "loss": 0.077, "lr": 3.514021110027438e-08, "epoch": 4.7596184623263555, "percentage": 95.19, "elapsed_time": "3:16:25", "remaining_time": "0:09:55", "throughput": 19854.08, "total_tokens": 233989760} +{"current_steps": 74355, "total_steps": 78105, "loss": 0.1296, "lr": 3.504693489871702e-08, "epoch": 4.7599385442673325, "percentage": 95.2, "elapsed_time": "3:16:26", "remaining_time": "0:09:54", "throughput": 19854.28, "total_tokens": 234005760} +{"current_steps": 74360, "total_steps": 78105, "loss": 0.1496, "lr": 3.495378178407643e-08, "epoch": 4.7602586262083095, "percentage": 95.21, "elapsed_time": "3:16:26", "remaining_time": "0:09:53", "throughput": 19854.51, "total_tokens": 234022400} +{"current_steps": 74365, "total_steps": 78105, "loss": 0.1553, "lr": 3.486075176100362e-08, "epoch": 4.7605787081492865, "percentage": 95.21, "elapsed_time": "3:16:27", "remaining_time": "0:09:52", "throughput": 19854.71, "total_tokens": 234038720} +{"current_steps": 74370, "total_steps": 78105, "loss": 0.1218, "lr": 3.476784483414486e-08, "epoch": 4.760898790090263, "percentage": 95.22, "elapsed_time": "3:16:28", "remaining_time": "0:09:52", "throughput": 19854.92, "total_tokens": 234054912} +{"current_steps": 74375, "total_steps": 78105, "loss": 0.1305, "lr": 3.4675061008138664e-08, "epoch": 4.76121887203124, "percentage": 95.22, "elapsed_time": "3:16:28", "remaining_time": "0:09:51", "throughput": 19855.15, "total_tokens": 234071616} +{"current_steps": 74380, "total_steps": 78105, "loss": 0.1552, "lr": 3.458240028761883e-08, "epoch": 4.761538953972217, "percentage": 95.23, "elapsed_time": "3:16:29", "remaining_time": "0:09:50", "throughput": 19855.35, "total_tokens": 234087808} +{"current_steps": 74385, "total_steps": 78105, "loss": 0.1138, "lr": 3.4489862677211936e-08, "epoch": 4.761859035913194, "percentage": 95.24, "elapsed_time": "3:16:30", "remaining_time": "0:09:49", "throughput": 19855.52, "total_tokens": 234102912} +{"current_steps": 74390, "total_steps": 78105, "loss": 0.0945, "lr": 3.4397448181538726e-08, "epoch": 4.762179117854171, "percentage": 95.24, "elapsed_time": "3:16:30", "remaining_time": "0:09:48", "throughput": 19855.7, "total_tokens": 234118400} +{"current_steps": 74395, "total_steps": 78105, "loss": 0.1406, "lr": 3.43051568052144e-08, "epoch": 4.762499199795148, "percentage": 95.25, "elapsed_time": "3:16:31", "remaining_time": "0:09:48", "throughput": 19855.85, "total_tokens": 234132864} +{"current_steps": 74400, "total_steps": 78105, "loss": 0.094, "lr": 3.4212988552846936e-08, "epoch": 4.762819281736125, "percentage": 95.26, "elapsed_time": "3:16:32", "remaining_time": "0:09:47", "throughput": 19856.06, "total_tokens": 234149248} +{"current_steps": 74405, "total_steps": 78105, "loss": 0.118, "lr": 3.4120943429039324e-08, "epoch": 4.763139363677102, "percentage": 95.26, "elapsed_time": "3:16:32", "remaining_time": "0:09:46", "throughput": 19856.21, "total_tokens": 234164224} +{"current_steps": 74410, "total_steps": 78105, "loss": 0.1164, "lr": 3.402902143838705e-08, "epoch": 4.7634594456180785, "percentage": 95.27, "elapsed_time": "3:16:33", "remaining_time": "0:09:45", "throughput": 19856.41, "total_tokens": 234180288} +{"current_steps": 74415, "total_steps": 78105, "loss": 0.1564, "lr": 3.393722258548088e-08, "epoch": 4.7637795275590555, "percentage": 95.28, "elapsed_time": "3:16:34", "remaining_time": "0:09:44", "throughput": 19856.59, "total_tokens": 234195840} +{"current_steps": 74420, "total_steps": 78105, "loss": 0.1233, "lr": 3.384554687490466e-08, "epoch": 4.764099609500032, "percentage": 95.28, "elapsed_time": "3:16:35", "remaining_time": "0:09:44", "throughput": 19856.77, "total_tokens": 234211456} +{"current_steps": 74425, "total_steps": 78105, "loss": 0.1477, "lr": 3.3753994311236106e-08, "epoch": 4.764419691441009, "percentage": 95.29, "elapsed_time": "3:16:35", "remaining_time": "0:09:43", "throughput": 19856.93, "total_tokens": 234226624} +{"current_steps": 74430, "total_steps": 78105, "loss": 0.0965, "lr": 3.366256489904685e-08, "epoch": 4.764739773381986, "percentage": 95.29, "elapsed_time": "3:16:36", "remaining_time": "0:09:42", "throughput": 19857.1, "total_tokens": 234242432} +{"current_steps": 74435, "total_steps": 78105, "loss": 0.1509, "lr": 3.3571258642902394e-08, "epoch": 4.765059855322963, "percentage": 95.3, "elapsed_time": "3:16:37", "remaining_time": "0:09:41", "throughput": 19857.28, "total_tokens": 234257920} +{"current_steps": 74440, "total_steps": 78105, "loss": 0.1437, "lr": 3.3480075547362154e-08, "epoch": 4.76537993726394, "percentage": 95.31, "elapsed_time": "3:16:37", "remaining_time": "0:09:40", "throughput": 19857.51, "total_tokens": 234274944} +{"current_steps": 74445, "total_steps": 78105, "loss": 0.0895, "lr": 3.338901561697944e-08, "epoch": 4.765700019204917, "percentage": 95.31, "elapsed_time": "3:16:38", "remaining_time": "0:09:40", "throughput": 19857.67, "total_tokens": 234289792} +{"current_steps": 74450, "total_steps": 78105, "loss": 0.0701, "lr": 3.3298078856300874e-08, "epoch": 4.766020101145894, "percentage": 95.32, "elapsed_time": "3:16:39", "remaining_time": "0:09:39", "throughput": 19857.84, "total_tokens": 234304768} +{"current_steps": 74455, "total_steps": 78105, "loss": 0.1504, "lr": 3.3207265269867836e-08, "epoch": 4.766340183086871, "percentage": 95.33, "elapsed_time": "3:16:39", "remaining_time": "0:09:38", "throughput": 19857.99, "total_tokens": 234319616} +{"current_steps": 74460, "total_steps": 78105, "loss": 0.135, "lr": 3.311657486221476e-08, "epoch": 4.766660265027847, "percentage": 95.33, "elapsed_time": "3:16:40", "remaining_time": "0:09:37", "throughput": 19858.15, "total_tokens": 234334272} +{"current_steps": 74465, "total_steps": 78105, "loss": 0.1336, "lr": 3.30260076378705e-08, "epoch": 4.766980346968824, "percentage": 95.34, "elapsed_time": "3:16:41", "remaining_time": "0:09:36", "throughput": 19858.33, "total_tokens": 234349696} +{"current_steps": 74470, "total_steps": 78105, "loss": 0.1244, "lr": 3.293556360135758e-08, "epoch": 4.767300428909801, "percentage": 95.35, "elapsed_time": "3:16:41", "remaining_time": "0:09:36", "throughput": 19858.49, "total_tokens": 234364864} +{"current_steps": 74475, "total_steps": 78105, "loss": 0.1555, "lr": 3.2845242757192085e-08, "epoch": 4.767620510850778, "percentage": 95.35, "elapsed_time": "3:16:42", "remaining_time": "0:09:35", "throughput": 19858.67, "total_tokens": 234380288} +{"current_steps": 74480, "total_steps": 78105, "loss": 0.1255, "lr": 3.275504510988403e-08, "epoch": 4.767940592791755, "percentage": 95.36, "elapsed_time": "3:16:43", "remaining_time": "0:09:34", "throughput": 19858.82, "total_tokens": 234395008} +{"current_steps": 74485, "total_steps": 78105, "loss": 0.1423, "lr": 3.266497066393759e-08, "epoch": 4.768260674732732, "percentage": 95.37, "elapsed_time": "3:16:43", "remaining_time": "0:09:33", "throughput": 19858.98, "total_tokens": 234410176} +{"current_steps": 74490, "total_steps": 78105, "loss": 0.1873, "lr": 3.2575019423851106e-08, "epoch": 4.768580756673709, "percentage": 95.37, "elapsed_time": "3:16:44", "remaining_time": "0:09:32", "throughput": 19859.15, "total_tokens": 234425856} +{"current_steps": 74495, "total_steps": 78105, "loss": 0.1291, "lr": 3.2485191394115153e-08, "epoch": 4.768900838614686, "percentage": 95.38, "elapsed_time": "3:16:45", "remaining_time": "0:09:32", "throughput": 19859.32, "total_tokens": 234441472} +{"current_steps": 74500, "total_steps": 78105, "loss": 0.0958, "lr": 3.239548657921643e-08, "epoch": 4.769220920555663, "percentage": 95.38, "elapsed_time": "3:16:45", "remaining_time": "0:09:31", "throughput": 19859.53, "total_tokens": 234457408} +{"current_steps": 74505, "total_steps": 78105, "loss": 0.128, "lr": 3.230590498363356e-08, "epoch": 4.76954100249664, "percentage": 95.39, "elapsed_time": "3:16:46", "remaining_time": "0:09:30", "throughput": 19859.7, "total_tokens": 234472832} +{"current_steps": 74510, "total_steps": 78105, "loss": 0.1635, "lr": 3.22164466118402e-08, "epoch": 4.769861084437616, "percentage": 95.4, "elapsed_time": "3:16:47", "remaining_time": "0:09:29", "throughput": 19859.89, "total_tokens": 234488320} +{"current_steps": 74515, "total_steps": 78105, "loss": 0.1556, "lr": 3.212711146830333e-08, "epoch": 4.770181166378593, "percentage": 95.4, "elapsed_time": "3:16:47", "remaining_time": "0:09:28", "throughput": 19860.15, "total_tokens": 234506112} +{"current_steps": 74520, "total_steps": 78105, "loss": 0.1281, "lr": 3.2037899557483545e-08, "epoch": 4.77050124831957, "percentage": 95.41, "elapsed_time": "3:16:48", "remaining_time": "0:09:28", "throughput": 19860.41, "total_tokens": 234523648} +{"current_steps": 74525, "total_steps": 78105, "loss": 0.1576, "lr": 3.194881088383617e-08, "epoch": 4.770821330260547, "percentage": 95.42, "elapsed_time": "3:16:49", "remaining_time": "0:09:27", "throughput": 19860.56, "total_tokens": 234538560} +{"current_steps": 74530, "total_steps": 78105, "loss": 0.1453, "lr": 3.1859845451809315e-08, "epoch": 4.771141412201524, "percentage": 95.42, "elapsed_time": "3:16:49", "remaining_time": "0:09:26", "throughput": 19860.76, "total_tokens": 234554496} +{"current_steps": 74535, "total_steps": 78105, "loss": 0.1317, "lr": 3.1771003265845815e-08, "epoch": 4.771461494142501, "percentage": 95.43, "elapsed_time": "3:16:50", "remaining_time": "0:09:25", "throughput": 19860.93, "total_tokens": 234569600} +{"current_steps": 74540, "total_steps": 78105, "loss": 0.1402, "lr": 3.168228433038212e-08, "epoch": 4.771781576083478, "percentage": 95.44, "elapsed_time": "3:16:51", "remaining_time": "0:09:24", "throughput": 19861.12, "total_tokens": 234585152} +{"current_steps": 74545, "total_steps": 78105, "loss": 0.1452, "lr": 3.159368864984802e-08, "epoch": 4.772101658024455, "percentage": 95.44, "elapsed_time": "3:16:51", "remaining_time": "0:09:24", "throughput": 19861.27, "total_tokens": 234599744} +{"current_steps": 74550, "total_steps": 78105, "loss": 0.1187, "lr": 3.150521622866748e-08, "epoch": 4.772421739965431, "percentage": 95.45, "elapsed_time": "3:16:52", "remaining_time": "0:09:23", "throughput": 19861.45, "total_tokens": 234615040} +{"current_steps": 74555, "total_steps": 78105, "loss": 0.097, "lr": 3.141686707125863e-08, "epoch": 4.772741821906408, "percentage": 95.45, "elapsed_time": "3:16:53", "remaining_time": "0:09:22", "throughput": 19861.72, "total_tokens": 234632768} +{"current_steps": 74560, "total_steps": 78105, "loss": 0.1359, "lr": 3.1328641182033214e-08, "epoch": 4.773061903847385, "percentage": 95.46, "elapsed_time": "3:16:54", "remaining_time": "0:09:21", "throughput": 19861.92, "total_tokens": 234648832} +{"current_steps": 74565, "total_steps": 78105, "loss": 0.1411, "lr": 3.1240538565396325e-08, "epoch": 4.773381985788362, "percentage": 95.47, "elapsed_time": "3:16:54", "remaining_time": "0:09:20", "throughput": 19862.08, "total_tokens": 234663872} +{"current_steps": 74570, "total_steps": 78105, "loss": 0.1816, "lr": 3.115255922574778e-08, "epoch": 4.773702067729339, "percentage": 95.47, "elapsed_time": "3:16:55", "remaining_time": "0:09:20", "throughput": 19862.27, "total_tokens": 234679296} +{"current_steps": 74575, "total_steps": 78105, "loss": 0.1338, "lr": 3.106470316748072e-08, "epoch": 4.774022149670316, "percentage": 95.48, "elapsed_time": "3:16:56", "remaining_time": "0:09:19", "throughput": 19862.48, "total_tokens": 234695360} +{"current_steps": 74580, "total_steps": 78105, "loss": 0.1087, "lr": 3.097697039498221e-08, "epoch": 4.774342231611293, "percentage": 95.49, "elapsed_time": "3:16:56", "remaining_time": "0:09:18", "throughput": 19862.7, "total_tokens": 234711616} +{"current_steps": 74585, "total_steps": 78105, "loss": 0.1395, "lr": 3.088936091263289e-08, "epoch": 4.77466231355227, "percentage": 95.49, "elapsed_time": "3:16:57", "remaining_time": "0:09:17", "throughput": 19862.87, "total_tokens": 234726784} +{"current_steps": 74590, "total_steps": 78105, "loss": 0.0694, "lr": 3.080187472480789e-08, "epoch": 4.774982395493247, "percentage": 95.5, "elapsed_time": "3:16:58", "remaining_time": "0:09:16", "throughput": 19863.07, "total_tokens": 234742400} +{"current_steps": 74595, "total_steps": 78105, "loss": 0.0987, "lr": 3.071451183587565e-08, "epoch": 4.775302477434224, "percentage": 95.51, "elapsed_time": "3:16:58", "remaining_time": "0:09:16", "throughput": 19863.28, "total_tokens": 234758528} +{"current_steps": 74600, "total_steps": 78105, "loss": 0.1424, "lr": 3.062727225019879e-08, "epoch": 4.7756225593752, "percentage": 95.51, "elapsed_time": "3:16:59", "remaining_time": "0:09:15", "throughput": 19863.48, "total_tokens": 234774656} +{"current_steps": 74605, "total_steps": 78105, "loss": 0.1352, "lr": 3.0540155972133275e-08, "epoch": 4.775942641316177, "percentage": 95.52, "elapsed_time": "3:17:00", "remaining_time": "0:09:14", "throughput": 19863.67, "total_tokens": 234790528} +{"current_steps": 74610, "total_steps": 78105, "loss": 0.105, "lr": 3.045316300602979e-08, "epoch": 4.776262723257154, "percentage": 95.53, "elapsed_time": "3:17:00", "remaining_time": "0:09:13", "throughput": 19863.87, "total_tokens": 234806464} +{"current_steps": 74615, "total_steps": 78105, "loss": 0.1414, "lr": 3.036629335623154e-08, "epoch": 4.776582805198131, "percentage": 95.53, "elapsed_time": "3:17:01", "remaining_time": "0:09:12", "throughput": 19864.03, "total_tokens": 234821632} +{"current_steps": 74620, "total_steps": 78105, "loss": 0.0939, "lr": 3.027954702707697e-08, "epoch": 4.776902887139108, "percentage": 95.54, "elapsed_time": "3:17:02", "remaining_time": "0:09:12", "throughput": 19864.23, "total_tokens": 234837376} +{"current_steps": 74625, "total_steps": 78105, "loss": 0.1217, "lr": 3.0192924022897365e-08, "epoch": 4.777222969080085, "percentage": 95.54, "elapsed_time": "3:17:02", "remaining_time": "0:09:11", "throughput": 19864.43, "total_tokens": 234853376} +{"current_steps": 74630, "total_steps": 78105, "loss": 0.1146, "lr": 3.0106424348018417e-08, "epoch": 4.777543051021062, "percentage": 95.55, "elapsed_time": "3:17:03", "remaining_time": "0:09:10", "throughput": 19864.62, "total_tokens": 234868608} +{"current_steps": 74635, "total_steps": 78105, "loss": 0.1605, "lr": 3.002004800675945e-08, "epoch": 4.777863132962038, "percentage": 95.56, "elapsed_time": "3:17:04", "remaining_time": "0:09:09", "throughput": 19864.83, "total_tokens": 234885312} +{"current_steps": 74640, "total_steps": 78105, "loss": 0.1277, "lr": 2.993379500343341e-08, "epoch": 4.778183214903015, "percentage": 95.56, "elapsed_time": "3:17:04", "remaining_time": "0:09:08", "throughput": 19865.14, "total_tokens": 234904192} +{"current_steps": 74645, "total_steps": 78105, "loss": 0.1633, "lr": 2.984766534234795e-08, "epoch": 4.778503296843992, "percentage": 95.57, "elapsed_time": "3:17:05", "remaining_time": "0:09:08", "throughput": 19865.31, "total_tokens": 234919360} +{"current_steps": 74650, "total_steps": 78105, "loss": 0.1372, "lr": 2.9761659027803257e-08, "epoch": 4.778823378784969, "percentage": 95.58, "elapsed_time": "3:17:06", "remaining_time": "0:09:07", "throughput": 19865.48, "total_tokens": 234934464} +{"current_steps": 74655, "total_steps": 78105, "loss": 0.1541, "lr": 2.9675776064094495e-08, "epoch": 4.779143460725946, "percentage": 95.58, "elapsed_time": "3:17:06", "remaining_time": "0:09:06", "throughput": 19865.74, "total_tokens": 234951744} +{"current_steps": 74660, "total_steps": 78105, "loss": 0.0929, "lr": 2.9590016455509908e-08, "epoch": 4.779463542666923, "percentage": 95.59, "elapsed_time": "3:17:07", "remaining_time": "0:09:05", "throughput": 19865.92, "total_tokens": 234966848} +{"current_steps": 74665, "total_steps": 78105, "loss": 0.1831, "lr": 2.9504380206332173e-08, "epoch": 4.7797836246079, "percentage": 95.6, "elapsed_time": "3:17:08", "remaining_time": "0:09:04", "throughput": 19866.1, "total_tokens": 234982016} +{"current_steps": 74670, "total_steps": 78105, "loss": 0.1148, "lr": 2.941886732083704e-08, "epoch": 4.780103706548877, "percentage": 95.6, "elapsed_time": "3:17:08", "remaining_time": "0:09:04", "throughput": 19866.27, "total_tokens": 234997568} +{"current_steps": 74675, "total_steps": 78105, "loss": 0.1312, "lr": 2.933347780329526e-08, "epoch": 4.780423788489854, "percentage": 95.61, "elapsed_time": "3:17:09", "remaining_time": "0:09:03", "throughput": 19866.47, "total_tokens": 235013568} +{"current_steps": 74680, "total_steps": 78105, "loss": 0.092, "lr": 2.9248211657969816e-08, "epoch": 4.780743870430831, "percentage": 95.61, "elapsed_time": "3:17:10", "remaining_time": "0:09:02", "throughput": 19866.66, "total_tokens": 235029504} +{"current_steps": 74685, "total_steps": 78105, "loss": 0.0907, "lr": 2.9163068889119516e-08, "epoch": 4.781063952371807, "percentage": 95.62, "elapsed_time": "3:17:11", "remaining_time": "0:09:01", "throughput": 19866.86, "total_tokens": 235045312} +{"current_steps": 74690, "total_steps": 78105, "loss": 0.1454, "lr": 2.9078049500995133e-08, "epoch": 4.781384034312784, "percentage": 95.63, "elapsed_time": "3:17:11", "remaining_time": "0:09:00", "throughput": 19867.08, "total_tokens": 235061824} +{"current_steps": 74695, "total_steps": 78105, "loss": 0.1331, "lr": 2.8993153497842708e-08, "epoch": 4.781704116253761, "percentage": 95.63, "elapsed_time": "3:17:12", "remaining_time": "0:09:00", "throughput": 19867.27, "total_tokens": 235077568} +{"current_steps": 74700, "total_steps": 78105, "loss": 0.1477, "lr": 2.8908380883901077e-08, "epoch": 4.782024198194738, "percentage": 95.64, "elapsed_time": "3:17:13", "remaining_time": "0:08:59", "throughput": 19867.46, "total_tokens": 235092864} +{"current_steps": 74705, "total_steps": 78105, "loss": 0.0905, "lr": 2.8823731663403243e-08, "epoch": 4.782344280135715, "percentage": 95.65, "elapsed_time": "3:17:13", "remaining_time": "0:08:58", "throughput": 19867.65, "total_tokens": 235108608} +{"current_steps": 74710, "total_steps": 78105, "loss": 0.1283, "lr": 2.8739205840576656e-08, "epoch": 4.782664362076692, "percentage": 95.65, "elapsed_time": "3:17:14", "remaining_time": "0:08:57", "throughput": 19867.84, "total_tokens": 235124608} +{"current_steps": 74715, "total_steps": 78105, "loss": 0.1257, "lr": 2.8654803419641554e-08, "epoch": 4.782984444017669, "percentage": 95.66, "elapsed_time": "3:17:15", "remaining_time": "0:08:56", "throughput": 19868.03, "total_tokens": 235140160} +{"current_steps": 74720, "total_steps": 78105, "loss": 0.1582, "lr": 2.8570524404812893e-08, "epoch": 4.783304525958646, "percentage": 95.67, "elapsed_time": "3:17:15", "remaining_time": "0:08:56", "throughput": 19868.21, "total_tokens": 235155584} +{"current_steps": 74725, "total_steps": 78105, "loss": 0.1286, "lr": 2.848636880029898e-08, "epoch": 4.783624607899622, "percentage": 95.67, "elapsed_time": "3:17:16", "remaining_time": "0:08:55", "throughput": 19868.38, "total_tokens": 235170304} +{"current_steps": 74730, "total_steps": 78105, "loss": 0.1374, "lr": 2.8402336610302283e-08, "epoch": 4.783944689840599, "percentage": 95.68, "elapsed_time": "3:17:17", "remaining_time": "0:08:54", "throughput": 19868.57, "total_tokens": 235186176} +{"current_steps": 74735, "total_steps": 78105, "loss": 0.1392, "lr": 2.831842783901889e-08, "epoch": 4.784264771781576, "percentage": 95.69, "elapsed_time": "3:17:17", "remaining_time": "0:08:53", "throughput": 19868.74, "total_tokens": 235201408} +{"current_steps": 74740, "total_steps": 78105, "loss": 0.1631, "lr": 2.8234642490638507e-08, "epoch": 4.784584853722553, "percentage": 95.69, "elapsed_time": "3:17:18", "remaining_time": "0:08:52", "throughput": 19868.92, "total_tokens": 235216896} +{"current_steps": 74745, "total_steps": 78105, "loss": 0.1293, "lr": 2.815098056934501e-08, "epoch": 4.78490493566353, "percentage": 95.7, "elapsed_time": "3:17:19", "remaining_time": "0:08:52", "throughput": 19869.14, "total_tokens": 235233408} +{"current_steps": 74750, "total_steps": 78105, "loss": 0.1907, "lr": 2.8067442079316444e-08, "epoch": 4.785225017604507, "percentage": 95.7, "elapsed_time": "3:17:19", "remaining_time": "0:08:51", "throughput": 19869.38, "total_tokens": 235250432} +{"current_steps": 74755, "total_steps": 78105, "loss": 0.0923, "lr": 2.7984027024723915e-08, "epoch": 4.785545099545484, "percentage": 95.71, "elapsed_time": "3:17:20", "remaining_time": "0:08:50", "throughput": 19869.56, "total_tokens": 235265920} +{"current_steps": 74760, "total_steps": 78105, "loss": 0.1538, "lr": 2.7900735409732704e-08, "epoch": 4.785865181486461, "percentage": 95.72, "elapsed_time": "3:17:21", "remaining_time": "0:08:49", "throughput": 19869.78, "total_tokens": 235282496} +{"current_steps": 74765, "total_steps": 78105, "loss": 0.0995, "lr": 2.7817567238501984e-08, "epoch": 4.786185263427438, "percentage": 95.72, "elapsed_time": "3:17:21", "remaining_time": "0:08:49", "throughput": 19869.96, "total_tokens": 235297664} +{"current_steps": 74770, "total_steps": 78105, "loss": 0.1395, "lr": 2.7734522515185102e-08, "epoch": 4.786505345368415, "percentage": 95.73, "elapsed_time": "3:17:22", "remaining_time": "0:08:48", "throughput": 19870.16, "total_tokens": 235313536} +{"current_steps": 74775, "total_steps": 78105, "loss": 0.1431, "lr": 2.7651601243928462e-08, "epoch": 4.786825427309391, "percentage": 95.74, "elapsed_time": "3:17:23", "remaining_time": "0:08:47", "throughput": 19870.33, "total_tokens": 235328768} +{"current_steps": 74780, "total_steps": 78105, "loss": 0.1117, "lr": 2.7568803428872915e-08, "epoch": 4.787145509250368, "percentage": 95.74, "elapsed_time": "3:17:23", "remaining_time": "0:08:46", "throughput": 19870.52, "total_tokens": 235344384} +{"current_steps": 74785, "total_steps": 78105, "loss": 0.1219, "lr": 2.7486129074152934e-08, "epoch": 4.787465591191345, "percentage": 95.75, "elapsed_time": "3:17:24", "remaining_time": "0:08:45", "throughput": 19870.78, "total_tokens": 235361792} +{"current_steps": 74790, "total_steps": 78105, "loss": 0.1024, "lr": 2.7403578183896884e-08, "epoch": 4.787785673132322, "percentage": 95.76, "elapsed_time": "3:17:25", "remaining_time": "0:08:45", "throughput": 19871.01, "total_tokens": 235378752} +{"current_steps": 74795, "total_steps": 78105, "loss": 0.125, "lr": 2.7321150762226743e-08, "epoch": 4.788105755073299, "percentage": 95.76, "elapsed_time": "3:17:26", "remaining_time": "0:08:44", "throughput": 19871.21, "total_tokens": 235394944} +{"current_steps": 74800, "total_steps": 78105, "loss": 0.1228, "lr": 2.723884681325839e-08, "epoch": 4.788425837014276, "percentage": 95.77, "elapsed_time": "3:17:26", "remaining_time": "0:08:43", "throughput": 19871.4, "total_tokens": 235410560} +{"current_steps": 74805, "total_steps": 78105, "loss": 0.1286, "lr": 2.7156666341102145e-08, "epoch": 4.788745918955253, "percentage": 95.77, "elapsed_time": "3:17:27", "remaining_time": "0:08:42", "throughput": 19871.61, "total_tokens": 235426688} +{"current_steps": 74810, "total_steps": 78105, "loss": 0.0951, "lr": 2.707460934986139e-08, "epoch": 4.78906600089623, "percentage": 95.78, "elapsed_time": "3:17:28", "remaining_time": "0:08:41", "throughput": 19871.79, "total_tokens": 235442048} +{"current_steps": 74815, "total_steps": 78105, "loss": 0.1333, "lr": 2.6992675843633408e-08, "epoch": 4.789386082837206, "percentage": 95.79, "elapsed_time": "3:17:28", "remaining_time": "0:08:41", "throughput": 19872.0, "total_tokens": 235458240} +{"current_steps": 74820, "total_steps": 78105, "loss": 0.1369, "lr": 2.69108658265102e-08, "epoch": 4.789706164778183, "percentage": 95.79, "elapsed_time": "3:17:29", "remaining_time": "0:08:40", "throughput": 19872.19, "total_tokens": 235473856} +{"current_steps": 74825, "total_steps": 78105, "loss": 0.1185, "lr": 2.6829179302576003e-08, "epoch": 4.79002624671916, "percentage": 95.8, "elapsed_time": "3:17:30", "remaining_time": "0:08:39", "throughput": 19872.37, "total_tokens": 235489536} +{"current_steps": 74830, "total_steps": 78105, "loss": 0.1613, "lr": 2.674761627591088e-08, "epoch": 4.790346328660137, "percentage": 95.81, "elapsed_time": "3:17:30", "remaining_time": "0:08:38", "throughput": 19872.58, "total_tokens": 235505920} +{"current_steps": 74835, "total_steps": 78105, "loss": 0.1412, "lr": 2.666617675058658e-08, "epoch": 4.790666410601114, "percentage": 95.81, "elapsed_time": "3:17:31", "remaining_time": "0:08:37", "throughput": 19872.77, "total_tokens": 235521216} +{"current_steps": 74840, "total_steps": 78105, "loss": 0.1291, "lr": 2.6584860730670682e-08, "epoch": 4.790986492542091, "percentage": 95.82, "elapsed_time": "3:17:32", "remaining_time": "0:08:37", "throughput": 19872.97, "total_tokens": 235536704} +{"current_steps": 74845, "total_steps": 78105, "loss": 0.1207, "lr": 2.6503668220222988e-08, "epoch": 4.791306574483068, "percentage": 95.83, "elapsed_time": "3:17:32", "remaining_time": "0:08:36", "throughput": 19873.14, "total_tokens": 235551936} +{"current_steps": 74850, "total_steps": 78105, "loss": 0.151, "lr": 2.642259922329832e-08, "epoch": 4.791626656424045, "percentage": 95.83, "elapsed_time": "3:17:33", "remaining_time": "0:08:35", "throughput": 19873.33, "total_tokens": 235567232} +{"current_steps": 74855, "total_steps": 78105, "loss": 0.0967, "lr": 2.6341653743944538e-08, "epoch": 4.791946738365022, "percentage": 95.84, "elapsed_time": "3:17:34", "remaining_time": "0:08:34", "throughput": 19873.53, "total_tokens": 235583040} +{"current_steps": 74860, "total_steps": 78105, "loss": 0.1503, "lr": 2.6260831786203976e-08, "epoch": 4.792266820305999, "percentage": 95.85, "elapsed_time": "3:17:34", "remaining_time": "0:08:33", "throughput": 19873.68, "total_tokens": 235597696} +{"current_steps": 74865, "total_steps": 78105, "loss": 0.1339, "lr": 2.6180133354112013e-08, "epoch": 4.792586902246975, "percentage": 95.85, "elapsed_time": "3:17:35", "remaining_time": "0:08:33", "throughput": 19873.89, "total_tokens": 235613888} +{"current_steps": 74870, "total_steps": 78105, "loss": 0.1132, "lr": 2.609955845169876e-08, "epoch": 4.792906984187952, "percentage": 95.86, "elapsed_time": "3:17:36", "remaining_time": "0:08:32", "throughput": 19874.08, "total_tokens": 235629632} +{"current_steps": 74875, "total_steps": 78105, "loss": 0.1654, "lr": 2.6019107082987104e-08, "epoch": 4.793227066128929, "percentage": 95.86, "elapsed_time": "3:17:36", "remaining_time": "0:08:31", "throughput": 19874.26, "total_tokens": 235644672} +{"current_steps": 74880, "total_steps": 78105, "loss": 0.106, "lr": 2.593877925199495e-08, "epoch": 4.793547148069906, "percentage": 95.87, "elapsed_time": "3:17:37", "remaining_time": "0:08:30", "throughput": 19874.45, "total_tokens": 235660160} +{"current_steps": 74885, "total_steps": 78105, "loss": 0.1481, "lr": 2.585857496273325e-08, "epoch": 4.793867230010883, "percentage": 95.88, "elapsed_time": "3:17:38", "remaining_time": "0:08:29", "throughput": 19874.62, "total_tokens": 235675520} +{"current_steps": 74890, "total_steps": 78105, "loss": 0.2174, "lr": 2.5778494219206862e-08, "epoch": 4.79418731195186, "percentage": 95.88, "elapsed_time": "3:17:38", "remaining_time": "0:08:29", "throughput": 19874.83, "total_tokens": 235691328} +{"current_steps": 74895, "total_steps": 78105, "loss": 0.1311, "lr": 2.5698537025415083e-08, "epoch": 4.794507393892837, "percentage": 95.89, "elapsed_time": "3:17:39", "remaining_time": "0:08:28", "throughput": 19875.01, "total_tokens": 235706816} +{"current_steps": 74900, "total_steps": 78105, "loss": 0.1343, "lr": 2.5618703385349997e-08, "epoch": 4.794827475833813, "percentage": 95.9, "elapsed_time": "3:17:40", "remaining_time": "0:08:27", "throughput": 19875.18, "total_tokens": 235721920} +{"current_steps": 74905, "total_steps": 78105, "loss": 0.1618, "lr": 2.553899330299814e-08, "epoch": 4.79514755777479, "percentage": 95.9, "elapsed_time": "3:17:40", "remaining_time": "0:08:26", "throughput": 19875.34, "total_tokens": 235736768} +{"current_steps": 74910, "total_steps": 78105, "loss": 0.1474, "lr": 2.5459406782339936e-08, "epoch": 4.795467639715767, "percentage": 95.91, "elapsed_time": "3:17:41", "remaining_time": "0:08:25", "throughput": 19875.53, "total_tokens": 235752512} +{"current_steps": 74915, "total_steps": 78105, "loss": 0.1266, "lr": 2.5379943827349984e-08, "epoch": 4.795787721656744, "percentage": 95.92, "elapsed_time": "3:17:42", "remaining_time": "0:08:25", "throughput": 19875.73, "total_tokens": 235768256} +{"current_steps": 74920, "total_steps": 78105, "loss": 0.1488, "lr": 2.5300604441995115e-08, "epoch": 4.796107803597721, "percentage": 95.92, "elapsed_time": "3:17:42", "remaining_time": "0:08:24", "throughput": 19875.87, "total_tokens": 235782784} +{"current_steps": 74925, "total_steps": 78105, "loss": 0.1571, "lr": 2.5221388630238543e-08, "epoch": 4.796427885538698, "percentage": 95.93, "elapsed_time": "3:17:43", "remaining_time": "0:08:23", "throughput": 19876.13, "total_tokens": 235800256} +{"current_steps": 74930, "total_steps": 78105, "loss": 0.1359, "lr": 2.5142296396034605e-08, "epoch": 4.796747967479675, "percentage": 95.93, "elapsed_time": "3:17:44", "remaining_time": "0:08:22", "throughput": 19876.31, "total_tokens": 235815936} +{"current_steps": 74935, "total_steps": 78105, "loss": 0.1299, "lr": 2.5063327743333753e-08, "epoch": 4.797068049420652, "percentage": 95.94, "elapsed_time": "3:17:44", "remaining_time": "0:08:21", "throughput": 19876.53, "total_tokens": 235832384} +{"current_steps": 74940, "total_steps": 78105, "loss": 0.1401, "lr": 2.498448267607867e-08, "epoch": 4.797388131361629, "percentage": 95.95, "elapsed_time": "3:17:45", "remaining_time": "0:08:21", "throughput": 19876.72, "total_tokens": 235848128} +{"current_steps": 74945, "total_steps": 78105, "loss": 0.1738, "lr": 2.490576119820648e-08, "epoch": 4.797708213302606, "percentage": 95.95, "elapsed_time": "3:17:46", "remaining_time": "0:08:20", "throughput": 19876.88, "total_tokens": 235862976} +{"current_steps": 74950, "total_steps": 78105, "loss": 0.1179, "lr": 2.4827163313648482e-08, "epoch": 4.798028295243582, "percentage": 95.96, "elapsed_time": "3:17:46", "remaining_time": "0:08:19", "throughput": 19877.04, "total_tokens": 235878080} +{"current_steps": 74955, "total_steps": 78105, "loss": 0.1961, "lr": 2.4748689026329042e-08, "epoch": 4.798348377184559, "percentage": 95.97, "elapsed_time": "3:17:47", "remaining_time": "0:08:18", "throughput": 19877.23, "total_tokens": 235893952} +{"current_steps": 74960, "total_steps": 78105, "loss": 0.1436, "lr": 2.4670338340166966e-08, "epoch": 4.798668459125536, "percentage": 95.97, "elapsed_time": "3:17:48", "remaining_time": "0:08:17", "throughput": 19877.48, "total_tokens": 235911296} +{"current_steps": 74965, "total_steps": 78105, "loss": 0.116, "lr": 2.4592111259074958e-08, "epoch": 4.798988541066513, "percentage": 95.98, "elapsed_time": "3:17:48", "remaining_time": "0:08:17", "throughput": 19877.68, "total_tokens": 235927296} +{"current_steps": 74970, "total_steps": 78105, "loss": 0.1287, "lr": 2.4514007786958782e-08, "epoch": 4.79930862300749, "percentage": 95.99, "elapsed_time": "3:17:49", "remaining_time": "0:08:16", "throughput": 19877.86, "total_tokens": 235942656} +{"current_steps": 74975, "total_steps": 78105, "loss": 0.1126, "lr": 2.4436027927718652e-08, "epoch": 4.799628704948467, "percentage": 95.99, "elapsed_time": "3:17:50", "remaining_time": "0:08:15", "throughput": 19878.03, "total_tokens": 235957888} +{"current_steps": 74980, "total_steps": 78105, "loss": 0.1124, "lr": 2.435817168524868e-08, "epoch": 4.799948786889444, "percentage": 96.0, "elapsed_time": "3:17:50", "remaining_time": "0:08:14", "throughput": 19878.21, "total_tokens": 235973568} +{"current_steps": 74985, "total_steps": 78105, "loss": 0.1234, "lr": 2.4280439063436578e-08, "epoch": 4.800268868830421, "percentage": 96.01, "elapsed_time": "3:17:51", "remaining_time": "0:08:13", "throughput": 19878.36, "total_tokens": 235987904} +{"current_steps": 74990, "total_steps": 78105, "loss": 0.1344, "lr": 2.4202830066163418e-08, "epoch": 4.800588950771397, "percentage": 96.01, "elapsed_time": "3:17:52", "remaining_time": "0:08:13", "throughput": 19878.57, "total_tokens": 236004352} +{"current_steps": 74995, "total_steps": 78105, "loss": 0.133, "lr": 2.4125344697305266e-08, "epoch": 4.800909032712374, "percentage": 96.02, "elapsed_time": "3:17:53", "remaining_time": "0:08:12", "throughput": 19878.81, "total_tokens": 236021696} +{"current_steps": 75000, "total_steps": 78105, "loss": 0.1096, "lr": 2.4047982960730966e-08, "epoch": 4.801229114653351, "percentage": 96.02, "elapsed_time": "3:17:53", "remaining_time": "0:08:11", "throughput": 19879.01, "total_tokens": 236037504} +{"current_steps": 75005, "total_steps": 78105, "loss": 0.1214, "lr": 2.3970744860303817e-08, "epoch": 4.801549196594328, "percentage": 96.03, "elapsed_time": "3:17:54", "remaining_time": "0:08:10", "throughput": 19879.17, "total_tokens": 236052608} +{"current_steps": 75010, "total_steps": 78105, "loss": 0.1436, "lr": 2.3893630399880185e-08, "epoch": 4.801869278535305, "percentage": 96.04, "elapsed_time": "3:17:55", "remaining_time": "0:08:09", "throughput": 19879.37, "total_tokens": 236068416} +{"current_steps": 75015, "total_steps": 78105, "loss": 0.1614, "lr": 2.381663958331143e-08, "epoch": 4.802189360476282, "percentage": 96.04, "elapsed_time": "3:17:55", "remaining_time": "0:08:09", "throughput": 19879.59, "total_tokens": 236084672} +{"current_steps": 75020, "total_steps": 78105, "loss": 0.1321, "lr": 2.373977241444142e-08, "epoch": 4.802509442417259, "percentage": 96.05, "elapsed_time": "3:17:56", "remaining_time": "0:08:08", "throughput": 19879.75, "total_tokens": 236099520} +{"current_steps": 75025, "total_steps": 78105, "loss": 0.1787, "lr": 2.366302889710903e-08, "epoch": 4.802829524358236, "percentage": 96.06, "elapsed_time": "3:17:57", "remaining_time": "0:08:07", "throughput": 19879.95, "total_tokens": 236115648} +{"current_steps": 75030, "total_steps": 78105, "loss": 0.0954, "lr": 2.358640903514592e-08, "epoch": 4.803149606299213, "percentage": 96.06, "elapsed_time": "3:17:57", "remaining_time": "0:08:06", "throughput": 19880.14, "total_tokens": 236131200} +{"current_steps": 75035, "total_steps": 78105, "loss": 0.1687, "lr": 2.3509912832378746e-08, "epoch": 4.80346968824019, "percentage": 96.07, "elapsed_time": "3:17:58", "remaining_time": "0:08:05", "throughput": 19880.32, "total_tokens": 236146368} +{"current_steps": 75040, "total_steps": 78105, "loss": 0.1407, "lr": 2.3433540292626678e-08, "epoch": 4.803789770181166, "percentage": 96.08, "elapsed_time": "3:17:59", "remaining_time": "0:08:05", "throughput": 19880.51, "total_tokens": 236162048} +{"current_steps": 75045, "total_steps": 78105, "loss": 0.1161, "lr": 2.335729141970361e-08, "epoch": 4.804109852122143, "percentage": 96.08, "elapsed_time": "3:17:59", "remaining_time": "0:08:04", "throughput": 19880.71, "total_tokens": 236177728} +{"current_steps": 75050, "total_steps": 78105, "loss": 0.0982, "lr": 2.328116621741733e-08, "epoch": 4.80442993406312, "percentage": 96.09, "elapsed_time": "3:18:00", "remaining_time": "0:08:03", "throughput": 19880.89, "total_tokens": 236193024} +{"current_steps": 75055, "total_steps": 78105, "loss": 0.1279, "lr": 2.3205164689568405e-08, "epoch": 4.804750016004097, "percentage": 96.1, "elapsed_time": "3:18:01", "remaining_time": "0:08:02", "throughput": 19881.12, "total_tokens": 236209408} +{"current_steps": 75060, "total_steps": 78105, "loss": 0.1167, "lr": 2.312928683995297e-08, "epoch": 4.805070097945074, "percentage": 96.1, "elapsed_time": "3:18:01", "remaining_time": "0:08:02", "throughput": 19881.3, "total_tokens": 236225216} +{"current_steps": 75065, "total_steps": 78105, "loss": 0.1225, "lr": 2.3053532672358824e-08, "epoch": 4.805390179886051, "percentage": 96.11, "elapsed_time": "3:18:02", "remaining_time": "0:08:01", "throughput": 19881.46, "total_tokens": 236239808} +{"current_steps": 75070, "total_steps": 78105, "loss": 0.1458, "lr": 2.297790219056989e-08, "epoch": 4.805710261827028, "percentage": 96.11, "elapsed_time": "3:18:03", "remaining_time": "0:08:00", "throughput": 19881.63, "total_tokens": 236254720} +{"current_steps": 75075, "total_steps": 78105, "loss": 0.1154, "lr": 2.2902395398361755e-08, "epoch": 4.806030343768005, "percentage": 96.12, "elapsed_time": "3:18:03", "remaining_time": "0:07:59", "throughput": 19881.8, "total_tokens": 236269632} +{"current_steps": 75080, "total_steps": 78105, "loss": 0.0826, "lr": 2.282701229950557e-08, "epoch": 4.806350425708981, "percentage": 96.13, "elapsed_time": "3:18:04", "remaining_time": "0:07:58", "throughput": 19881.97, "total_tokens": 236284864} +{"current_steps": 75085, "total_steps": 78105, "loss": 0.124, "lr": 2.275175289776499e-08, "epoch": 4.806670507649958, "percentage": 96.13, "elapsed_time": "3:18:05", "remaining_time": "0:07:58", "throughput": 19882.15, "total_tokens": 236300288} +{"current_steps": 75090, "total_steps": 78105, "loss": 0.1499, "lr": 2.26766171968984e-08, "epoch": 4.806990589590935, "percentage": 96.14, "elapsed_time": "3:18:05", "remaining_time": "0:07:57", "throughput": 19882.33, "total_tokens": 236315840} +{"current_steps": 75095, "total_steps": 78105, "loss": 0.1085, "lr": 2.2601605200657793e-08, "epoch": 4.807310671531912, "percentage": 96.15, "elapsed_time": "3:18:06", "remaining_time": "0:07:56", "throughput": 19882.51, "total_tokens": 236331264} +{"current_steps": 75100, "total_steps": 78105, "loss": 0.146, "lr": 2.2526716912788793e-08, "epoch": 4.807630753472889, "percentage": 96.15, "elapsed_time": "3:18:07", "remaining_time": "0:07:55", "throughput": 19882.7, "total_tokens": 236347264} +{"current_steps": 75105, "total_steps": 78105, "loss": 0.1494, "lr": 2.2451952337030346e-08, "epoch": 4.807950835413866, "percentage": 96.16, "elapsed_time": "3:18:07", "remaining_time": "0:07:54", "throughput": 19882.87, "total_tokens": 236362496} +{"current_steps": 75110, "total_steps": 78105, "loss": 0.089, "lr": 2.237731147711669e-08, "epoch": 4.808270917354843, "percentage": 96.17, "elapsed_time": "3:18:08", "remaining_time": "0:07:54", "throughput": 19883.11, "total_tokens": 236379776} +{"current_steps": 75115, "total_steps": 78105, "loss": 0.1351, "lr": 2.2302794336774846e-08, "epoch": 4.80859099929582, "percentage": 96.17, "elapsed_time": "3:18:09", "remaining_time": "0:07:53", "throughput": 19883.28, "total_tokens": 236395072} +{"current_steps": 75120, "total_steps": 78105, "loss": 0.1269, "lr": 2.2228400919725445e-08, "epoch": 4.808911081236797, "percentage": 96.18, "elapsed_time": "3:18:09", "remaining_time": "0:07:52", "throughput": 19883.49, "total_tokens": 236411200} +{"current_steps": 75125, "total_steps": 78105, "loss": 0.0987, "lr": 2.2154131229683297e-08, "epoch": 4.809231163177774, "percentage": 96.18, "elapsed_time": "3:18:10", "remaining_time": "0:07:51", "throughput": 19883.64, "total_tokens": 236425856} +{"current_steps": 75130, "total_steps": 78105, "loss": 0.1418, "lr": 2.2079985270357374e-08, "epoch": 4.80955124511875, "percentage": 96.19, "elapsed_time": "3:18:11", "remaining_time": "0:07:50", "throughput": 19883.82, "total_tokens": 236441408} +{"current_steps": 75135, "total_steps": 78105, "loss": 0.1102, "lr": 2.2005963045449995e-08, "epoch": 4.809871327059727, "percentage": 96.2, "elapsed_time": "3:18:11", "remaining_time": "0:07:50", "throughput": 19884.01, "total_tokens": 236457024} +{"current_steps": 75140, "total_steps": 78105, "loss": 0.1223, "lr": 2.1932064558657094e-08, "epoch": 4.810191409000704, "percentage": 96.2, "elapsed_time": "3:18:12", "remaining_time": "0:07:49", "throughput": 19884.18, "total_tokens": 236472448} +{"current_steps": 75145, "total_steps": 78105, "loss": 0.1348, "lr": 2.185828981366961e-08, "epoch": 4.810511490941681, "percentage": 96.21, "elapsed_time": "3:18:13", "remaining_time": "0:07:48", "throughput": 19884.4, "total_tokens": 236489472} +{"current_steps": 75150, "total_steps": 78105, "loss": 0.1235, "lr": 2.178463881417042e-08, "epoch": 4.810831572882658, "percentage": 96.22, "elapsed_time": "3:18:13", "remaining_time": "0:07:47", "throughput": 19884.58, "total_tokens": 236504576} +{"current_steps": 75155, "total_steps": 78105, "loss": 0.155, "lr": 2.1711111563838262e-08, "epoch": 4.811151654823635, "percentage": 96.22, "elapsed_time": "3:18:14", "remaining_time": "0:07:46", "throughput": 19884.77, "total_tokens": 236520192} +{"current_steps": 75160, "total_steps": 78105, "loss": 0.1342, "lr": 2.1637708066344075e-08, "epoch": 4.811471736764612, "percentage": 96.23, "elapsed_time": "3:18:15", "remaining_time": "0:07:46", "throughput": 19885.0, "total_tokens": 236537152} +{"current_steps": 75165, "total_steps": 78105, "loss": 0.106, "lr": 2.156442832535327e-08, "epoch": 4.811791818705588, "percentage": 96.24, "elapsed_time": "3:18:15", "remaining_time": "0:07:45", "throughput": 19885.19, "total_tokens": 236552640} +{"current_steps": 75170, "total_steps": 78105, "loss": 0.147, "lr": 2.1491272344525415e-08, "epoch": 4.812111900646565, "percentage": 96.24, "elapsed_time": "3:18:16", "remaining_time": "0:07:44", "throughput": 19885.36, "total_tokens": 236568128} +{"current_steps": 75175, "total_steps": 78105, "loss": 0.1444, "lr": 2.1418240127513423e-08, "epoch": 4.812431982587542, "percentage": 96.25, "elapsed_time": "3:18:17", "remaining_time": "0:07:43", "throughput": 19885.54, "total_tokens": 236583168} +{"current_steps": 75180, "total_steps": 78105, "loss": 0.1053, "lr": 2.13453316779641e-08, "epoch": 4.812752064528519, "percentage": 96.26, "elapsed_time": "3:18:17", "remaining_time": "0:07:42", "throughput": 19885.73, "total_tokens": 236598720} +{"current_steps": 75185, "total_steps": 78105, "loss": 0.153, "lr": 2.1272546999517584e-08, "epoch": 4.813072146469496, "percentage": 96.26, "elapsed_time": "3:18:18", "remaining_time": "0:07:42", "throughput": 19885.94, "total_tokens": 236614976} +{"current_steps": 75190, "total_steps": 78105, "loss": 0.1202, "lr": 2.1199886095809307e-08, "epoch": 4.813392228410473, "percentage": 96.27, "elapsed_time": "3:18:19", "remaining_time": "0:07:41", "throughput": 19886.11, "total_tokens": 236630528} +{"current_steps": 75195, "total_steps": 78105, "loss": 0.0844, "lr": 2.1127348970466922e-08, "epoch": 4.81371231035145, "percentage": 96.27, "elapsed_time": "3:18:19", "remaining_time": "0:07:40", "throughput": 19886.35, "total_tokens": 236647552} +{"current_steps": 75200, "total_steps": 78105, "loss": 0.1569, "lr": 2.1054935627112805e-08, "epoch": 4.814032392292427, "percentage": 96.28, "elapsed_time": "3:18:20", "remaining_time": "0:07:39", "throughput": 19886.54, "total_tokens": 236663232} +{"current_steps": 75205, "total_steps": 78105, "loss": 0.1148, "lr": 2.0982646069362955e-08, "epoch": 4.814352474233404, "percentage": 96.29, "elapsed_time": "3:18:21", "remaining_time": "0:07:38", "throughput": 19886.74, "total_tokens": 236679168} +{"current_steps": 75210, "total_steps": 78105, "loss": 0.163, "lr": 2.0910480300826985e-08, "epoch": 4.814672556174381, "percentage": 96.29, "elapsed_time": "3:18:22", "remaining_time": "0:07:38", "throughput": 19886.98, "total_tokens": 236696128} +{"current_steps": 75215, "total_steps": 78105, "loss": 0.1698, "lr": 2.0838438325108403e-08, "epoch": 4.814992638115357, "percentage": 96.3, "elapsed_time": "3:18:22", "remaining_time": "0:07:37", "throughput": 19887.19, "total_tokens": 236712448} +{"current_steps": 75220, "total_steps": 78105, "loss": 0.1178, "lr": 2.076652014580488e-08, "epoch": 4.815312720056334, "percentage": 96.31, "elapsed_time": "3:18:23", "remaining_time": "0:07:36", "throughput": 19887.4, "total_tokens": 236728768} +{"current_steps": 75225, "total_steps": 78105, "loss": 0.1358, "lr": 2.069472576650744e-08, "epoch": 4.815632801997311, "percentage": 96.31, "elapsed_time": "3:18:24", "remaining_time": "0:07:35", "throughput": 19887.63, "total_tokens": 236745728} +{"current_steps": 75230, "total_steps": 78105, "loss": 0.146, "lr": 2.062305519080099e-08, "epoch": 4.815952883938288, "percentage": 96.32, "elapsed_time": "3:18:24", "remaining_time": "0:07:34", "throughput": 19887.86, "total_tokens": 236762432} +{"current_steps": 75235, "total_steps": 78105, "loss": 0.1146, "lr": 2.0551508422264886e-08, "epoch": 4.816272965879265, "percentage": 96.33, "elapsed_time": "3:18:25", "remaining_time": "0:07:34", "throughput": 19888.09, "total_tokens": 236779520} +{"current_steps": 75240, "total_steps": 78105, "loss": 0.1183, "lr": 2.0480085464471277e-08, "epoch": 4.816593047820242, "percentage": 96.33, "elapsed_time": "3:18:26", "remaining_time": "0:07:33", "throughput": 19888.29, "total_tokens": 236795200} +{"current_steps": 75245, "total_steps": 78105, "loss": 0.1313, "lr": 2.040878632098675e-08, "epoch": 4.816913129761219, "percentage": 96.34, "elapsed_time": "3:18:26", "remaining_time": "0:07:32", "throughput": 19888.48, "total_tokens": 236811072} +{"current_steps": 75250, "total_steps": 78105, "loss": 0.1623, "lr": 2.0337610995371794e-08, "epoch": 4.817233211702196, "percentage": 96.34, "elapsed_time": "3:18:27", "remaining_time": "0:07:31", "throughput": 19888.66, "total_tokens": 236826752} +{"current_steps": 75255, "total_steps": 78105, "loss": 0.1166, "lr": 2.026655949118078e-08, "epoch": 4.817553293643172, "percentage": 96.35, "elapsed_time": "3:18:28", "remaining_time": "0:07:30", "throughput": 19888.86, "total_tokens": 236842752} +{"current_steps": 75260, "total_steps": 78105, "loss": 0.1135, "lr": 2.0195631811960882e-08, "epoch": 4.817873375584149, "percentage": 96.36, "elapsed_time": "3:18:28", "remaining_time": "0:07:30", "throughput": 19889.03, "total_tokens": 236857920} +{"current_steps": 75265, "total_steps": 78105, "loss": 0.1134, "lr": 2.0124827961254534e-08, "epoch": 4.818193457525126, "percentage": 96.36, "elapsed_time": "3:18:29", "remaining_time": "0:07:29", "throughput": 19889.19, "total_tokens": 236872960} +{"current_steps": 75270, "total_steps": 78105, "loss": 0.1411, "lr": 2.0054147942596967e-08, "epoch": 4.818513539466103, "percentage": 96.37, "elapsed_time": "3:18:30", "remaining_time": "0:07:28", "throughput": 19889.38, "total_tokens": 236888704} +{"current_steps": 75275, "total_steps": 78105, "loss": 0.1475, "lr": 1.998359175951786e-08, "epoch": 4.81883362140708, "percentage": 96.38, "elapsed_time": "3:18:30", "remaining_time": "0:07:27", "throughput": 19889.57, "total_tokens": 236904128} +{"current_steps": 75280, "total_steps": 78105, "loss": 0.1007, "lr": 1.991315941553995e-08, "epoch": 4.819153703348057, "percentage": 96.38, "elapsed_time": "3:18:31", "remaining_time": "0:07:27", "throughput": 19889.78, "total_tokens": 236920256} +{"current_steps": 75285, "total_steps": 78105, "loss": 0.0977, "lr": 1.98428509141807e-08, "epoch": 4.819473785289034, "percentage": 96.39, "elapsed_time": "3:18:32", "remaining_time": "0:07:26", "throughput": 19889.94, "total_tokens": 236935040} +{"current_steps": 75290, "total_steps": 78105, "loss": 0.1245, "lr": 1.9772666258950913e-08, "epoch": 4.819793867230011, "percentage": 96.4, "elapsed_time": "3:18:32", "remaining_time": "0:07:25", "throughput": 19890.11, "total_tokens": 236950400} +{"current_steps": 75295, "total_steps": 78105, "loss": 0.1496, "lr": 1.9702605453355007e-08, "epoch": 4.820113949170988, "percentage": 96.4, "elapsed_time": "3:18:33", "remaining_time": "0:07:24", "throughput": 19890.3, "total_tokens": 236966144} +{"current_steps": 75300, "total_steps": 78105, "loss": 0.1386, "lr": 1.963266850089185e-08, "epoch": 4.820434031111965, "percentage": 96.41, "elapsed_time": "3:18:34", "remaining_time": "0:07:23", "throughput": 19890.49, "total_tokens": 236981824} +{"current_steps": 75305, "total_steps": 78105, "loss": 0.1928, "lr": 1.9562855405053092e-08, "epoch": 4.820754113052941, "percentage": 96.42, "elapsed_time": "3:18:35", "remaining_time": "0:07:23", "throughput": 19890.74, "total_tokens": 236999232} +{"current_steps": 75310, "total_steps": 78105, "loss": 0.1102, "lr": 1.9493166169325395e-08, "epoch": 4.821074194993918, "percentage": 96.42, "elapsed_time": "3:18:35", "remaining_time": "0:07:22", "throughput": 19890.93, "total_tokens": 237014784} +{"current_steps": 75315, "total_steps": 78105, "loss": 0.0877, "lr": 1.942360079718847e-08, "epoch": 4.821394276934895, "percentage": 96.43, "elapsed_time": "3:18:36", "remaining_time": "0:07:21", "throughput": 19891.13, "total_tokens": 237031104} +{"current_steps": 75320, "total_steps": 78105, "loss": 0.1016, "lr": 1.9354159292116203e-08, "epoch": 4.821714358875872, "percentage": 96.43, "elapsed_time": "3:18:37", "remaining_time": "0:07:20", "throughput": 19891.3, "total_tokens": 237046080} +{"current_steps": 75325, "total_steps": 78105, "loss": 0.1461, "lr": 1.9284841657575827e-08, "epoch": 4.822034440816849, "percentage": 96.44, "elapsed_time": "3:18:37", "remaining_time": "0:07:19", "throughput": 19891.46, "total_tokens": 237061248} +{"current_steps": 75330, "total_steps": 78105, "loss": 0.1599, "lr": 1.9215647897029012e-08, "epoch": 4.822354522757826, "percentage": 96.45, "elapsed_time": "3:18:38", "remaining_time": "0:07:19", "throughput": 19891.66, "total_tokens": 237076928} +{"current_steps": 75335, "total_steps": 78105, "loss": 0.1229, "lr": 1.9146578013930496e-08, "epoch": 4.822674604698803, "percentage": 96.45, "elapsed_time": "3:18:39", "remaining_time": "0:07:18", "throughput": 19891.89, "total_tokens": 237093632} +{"current_steps": 75340, "total_steps": 78105, "loss": 0.1142, "lr": 1.907763201172974e-08, "epoch": 4.82299468663978, "percentage": 96.46, "elapsed_time": "3:18:39", "remaining_time": "0:07:17", "throughput": 19892.12, "total_tokens": 237110464} +{"current_steps": 75345, "total_steps": 78105, "loss": 0.1102, "lr": 1.9008809893869552e-08, "epoch": 4.823314768580756, "percentage": 96.47, "elapsed_time": "3:18:40", "remaining_time": "0:07:16", "throughput": 19892.28, "total_tokens": 237125312} +{"current_steps": 75350, "total_steps": 78105, "loss": 0.1336, "lr": 1.894011166378634e-08, "epoch": 4.823634850521733, "percentage": 96.47, "elapsed_time": "3:18:41", "remaining_time": "0:07:15", "throughput": 19892.51, "total_tokens": 237142464} +{"current_steps": 75355, "total_steps": 78105, "loss": 0.1491, "lr": 1.8871537324910427e-08, "epoch": 4.82395493246271, "percentage": 96.48, "elapsed_time": "3:18:41", "remaining_time": "0:07:15", "throughput": 19892.67, "total_tokens": 237157504} +{"current_steps": 75360, "total_steps": 78105, "loss": 0.1177, "lr": 1.8803086880666565e-08, "epoch": 4.824275014403687, "percentage": 96.49, "elapsed_time": "3:18:42", "remaining_time": "0:07:14", "throughput": 19892.86, "total_tokens": 237173120} +{"current_steps": 75365, "total_steps": 78105, "loss": 0.1385, "lr": 1.8734760334472024e-08, "epoch": 4.824595096344664, "percentage": 96.49, "elapsed_time": "3:18:43", "remaining_time": "0:07:13", "throughput": 19893.01, "total_tokens": 237187968} +{"current_steps": 75370, "total_steps": 78105, "loss": 0.1245, "lr": 1.866655768973935e-08, "epoch": 4.824915178285641, "percentage": 96.5, "elapsed_time": "3:18:43", "remaining_time": "0:07:12", "throughput": 19893.33, "total_tokens": 237207360} +{"current_steps": 75375, "total_steps": 78105, "loss": 0.0867, "lr": 1.8598478949874155e-08, "epoch": 4.825235260226618, "percentage": 96.5, "elapsed_time": "3:18:44", "remaining_time": "0:07:11", "throughput": 19893.54, "total_tokens": 237223744} +{"current_steps": 75380, "total_steps": 78105, "loss": 0.1123, "lr": 1.8530524118275382e-08, "epoch": 4.825555342167595, "percentage": 96.51, "elapsed_time": "3:18:45", "remaining_time": "0:07:11", "throughput": 19893.76, "total_tokens": 237240448} +{"current_steps": 75385, "total_steps": 78105, "loss": 0.1312, "lr": 1.846269319833699e-08, "epoch": 4.825875424108572, "percentage": 96.52, "elapsed_time": "3:18:46", "remaining_time": "0:07:10", "throughput": 19893.96, "total_tokens": 237256192} +{"current_steps": 75390, "total_steps": 78105, "loss": 0.1082, "lr": 1.8394986193445708e-08, "epoch": 4.826195506049549, "percentage": 96.52, "elapsed_time": "3:18:46", "remaining_time": "0:07:09", "throughput": 19894.11, "total_tokens": 237270912} +{"current_steps": 75395, "total_steps": 78105, "loss": 0.1427, "lr": 1.832740310698272e-08, "epoch": 4.826515587990525, "percentage": 96.53, "elapsed_time": "3:18:47", "remaining_time": "0:07:08", "throughput": 19894.36, "total_tokens": 237288128} +{"current_steps": 75400, "total_steps": 78105, "loss": 0.109, "lr": 1.8259943942322557e-08, "epoch": 4.826835669931502, "percentage": 96.54, "elapsed_time": "3:18:48", "remaining_time": "0:07:07", "throughput": 19894.54, "total_tokens": 237304064} +{"current_steps": 75405, "total_steps": 78105, "loss": 0.1363, "lr": 1.8192608702833903e-08, "epoch": 4.827155751872479, "percentage": 96.54, "elapsed_time": "3:18:48", "remaining_time": "0:07:07", "throughput": 19894.76, "total_tokens": 237320640} +{"current_steps": 75410, "total_steps": 78105, "loss": 0.1808, "lr": 1.8125397391879352e-08, "epoch": 4.827475833813456, "percentage": 96.55, "elapsed_time": "3:18:49", "remaining_time": "0:07:06", "throughput": 19895.02, "total_tokens": 237338304} +{"current_steps": 75415, "total_steps": 78105, "loss": 0.1513, "lr": 1.8058310012814273e-08, "epoch": 4.827795915754433, "percentage": 96.56, "elapsed_time": "3:18:50", "remaining_time": "0:07:05", "throughput": 19895.2, "total_tokens": 237353408} +{"current_steps": 75420, "total_steps": 78105, "loss": 0.1071, "lr": 1.7991346568989598e-08, "epoch": 4.82811599769541, "percentage": 96.56, "elapsed_time": "3:18:50", "remaining_time": "0:07:04", "throughput": 19895.37, "total_tokens": 237368960} +{"current_steps": 75425, "total_steps": 78105, "loss": 0.1679, "lr": 1.7924507063748763e-08, "epoch": 4.828436079636387, "percentage": 96.57, "elapsed_time": "3:18:51", "remaining_time": "0:07:03", "throughput": 19895.57, "total_tokens": 237384832} +{"current_steps": 75430, "total_steps": 78105, "loss": 0.1145, "lr": 1.7857791500429378e-08, "epoch": 4.828756161577363, "percentage": 96.58, "elapsed_time": "3:18:52", "remaining_time": "0:07:03", "throughput": 19895.76, "total_tokens": 237400512} +{"current_steps": 75435, "total_steps": 78105, "loss": 0.1578, "lr": 1.779119988236294e-08, "epoch": 4.82907624351834, "percentage": 96.58, "elapsed_time": "3:18:52", "remaining_time": "0:07:02", "throughput": 19895.95, "total_tokens": 237416256} +{"current_steps": 75440, "total_steps": 78105, "loss": 0.1284, "lr": 1.7724732212874572e-08, "epoch": 4.829396325459317, "percentage": 96.59, "elapsed_time": "3:18:53", "remaining_time": "0:07:01", "throughput": 19896.12, "total_tokens": 237431296} +{"current_steps": 75445, "total_steps": 78105, "loss": 0.0989, "lr": 1.7658388495283563e-08, "epoch": 4.829716407400294, "percentage": 96.59, "elapsed_time": "3:18:54", "remaining_time": "0:07:00", "throughput": 19896.31, "total_tokens": 237447040} +{"current_steps": 75450, "total_steps": 78105, "loss": 0.1114, "lr": 1.759216873290226e-08, "epoch": 4.830036489341271, "percentage": 96.6, "elapsed_time": "3:18:54", "remaining_time": "0:06:59", "throughput": 19896.51, "total_tokens": 237463168} +{"current_steps": 75455, "total_steps": 78105, "loss": 0.1209, "lr": 1.752607292903774e-08, "epoch": 4.830356571282248, "percentage": 96.61, "elapsed_time": "3:18:55", "remaining_time": "0:06:59", "throughput": 19896.72, "total_tokens": 237479232} +{"current_steps": 75460, "total_steps": 78105, "loss": 0.1228, "lr": 1.746010108699042e-08, "epoch": 4.830676653223225, "percentage": 96.61, "elapsed_time": "3:18:56", "remaining_time": "0:06:58", "throughput": 19896.9, "total_tokens": 237494976} +{"current_steps": 75465, "total_steps": 78105, "loss": 0.137, "lr": 1.7394253210054613e-08, "epoch": 4.830996735164202, "percentage": 96.62, "elapsed_time": "3:18:56", "remaining_time": "0:06:57", "throughput": 19897.1, "total_tokens": 237511232} +{"current_steps": 75470, "total_steps": 78105, "loss": 0.1509, "lr": 1.7328529301518515e-08, "epoch": 4.831316817105179, "percentage": 96.63, "elapsed_time": "3:18:57", "remaining_time": "0:06:56", "throughput": 19897.3, "total_tokens": 237526720} +{"current_steps": 75475, "total_steps": 78105, "loss": 0.1434, "lr": 1.7262929364663673e-08, "epoch": 4.831636899046156, "percentage": 96.63, "elapsed_time": "3:18:58", "remaining_time": "0:06:56", "throughput": 19897.49, "total_tokens": 237542528} +{"current_steps": 75480, "total_steps": 78105, "loss": 0.1276, "lr": 1.7197453402766073e-08, "epoch": 4.831956980987132, "percentage": 96.64, "elapsed_time": "3:18:58", "remaining_time": "0:06:55", "throughput": 19897.67, "total_tokens": 237558080} +{"current_steps": 75485, "total_steps": 78105, "loss": 0.0911, "lr": 1.7132101419095326e-08, "epoch": 4.832277062928109, "percentage": 96.65, "elapsed_time": "3:18:59", "remaining_time": "0:06:54", "throughput": 19897.89, "total_tokens": 237574592} +{"current_steps": 75490, "total_steps": 78105, "loss": 0.1761, "lr": 1.7066873416914377e-08, "epoch": 4.832597144869086, "percentage": 96.65, "elapsed_time": "3:19:00", "remaining_time": "0:06:53", "throughput": 19898.14, "total_tokens": 237592192} +{"current_steps": 75495, "total_steps": 78105, "loss": 0.1351, "lr": 1.700176939948117e-08, "epoch": 4.832917226810063, "percentage": 96.66, "elapsed_time": "3:19:01", "remaining_time": "0:06:52", "throughput": 19898.34, "total_tokens": 237608064} +{"current_steps": 75500, "total_steps": 78105, "loss": 0.1362, "lr": 1.6936789370045613e-08, "epoch": 4.83323730875104, "percentage": 96.66, "elapsed_time": "3:19:01", "remaining_time": "0:06:52", "throughput": 19898.54, "total_tokens": 237623872} +{"current_steps": 75505, "total_steps": 78105, "loss": 0.1269, "lr": 1.6871933331853164e-08, "epoch": 4.833557390692017, "percentage": 96.67, "elapsed_time": "3:19:02", "remaining_time": "0:06:51", "throughput": 19898.74, "total_tokens": 237639744} +{"current_steps": 75510, "total_steps": 78105, "loss": 0.1469, "lr": 1.680720128814234e-08, "epoch": 4.833877472632994, "percentage": 96.68, "elapsed_time": "3:19:03", "remaining_time": "0:06:50", "throughput": 19898.92, "total_tokens": 237655488} +{"current_steps": 75515, "total_steps": 78105, "loss": 0.1226, "lr": 1.6742593242145e-08, "epoch": 4.834197554573971, "percentage": 96.68, "elapsed_time": "3:19:03", "remaining_time": "0:06:49", "throughput": 19899.11, "total_tokens": 237671488} +{"current_steps": 75520, "total_steps": 78105, "loss": 0.1412, "lr": 1.667810919708829e-08, "epoch": 4.834517636514947, "percentage": 96.69, "elapsed_time": "3:19:04", "remaining_time": "0:06:48", "throughput": 19899.28, "total_tokens": 237687040} +{"current_steps": 75525, "total_steps": 78105, "loss": 0.1122, "lr": 1.6613749156191296e-08, "epoch": 4.834837718455924, "percentage": 96.7, "elapsed_time": "3:19:05", "remaining_time": "0:06:48", "throughput": 19899.48, "total_tokens": 237702912} +{"current_steps": 75530, "total_steps": 78105, "loss": 0.134, "lr": 1.6549513122668393e-08, "epoch": 4.835157800396901, "percentage": 96.7, "elapsed_time": "3:19:05", "remaining_time": "0:06:47", "throughput": 19899.66, "total_tokens": 237717888} +{"current_steps": 75535, "total_steps": 78105, "loss": 0.0941, "lr": 1.6485401099726738e-08, "epoch": 4.835477882337878, "percentage": 96.71, "elapsed_time": "3:19:06", "remaining_time": "0:06:46", "throughput": 19899.85, "total_tokens": 237733696} +{"current_steps": 75540, "total_steps": 78105, "loss": 0.1398, "lr": 1.6421413090568218e-08, "epoch": 4.835797964278855, "percentage": 96.72, "elapsed_time": "3:19:07", "remaining_time": "0:06:45", "throughput": 19900.01, "total_tokens": 237748608} +{"current_steps": 75545, "total_steps": 78105, "loss": 0.1138, "lr": 1.635754909838777e-08, "epoch": 4.836118046219832, "percentage": 96.72, "elapsed_time": "3:19:07", "remaining_time": "0:06:44", "throughput": 19900.22, "total_tokens": 237764736} +{"current_steps": 75550, "total_steps": 78105, "loss": 0.1803, "lr": 1.6293809126374515e-08, "epoch": 4.836438128160809, "percentage": 96.73, "elapsed_time": "3:19:08", "remaining_time": "0:06:44", "throughput": 19900.39, "total_tokens": 237780160} +{"current_steps": 75555, "total_steps": 78105, "loss": 0.1223, "lr": 1.6230193177711184e-08, "epoch": 4.836758210101786, "percentage": 96.74, "elapsed_time": "3:19:09", "remaining_time": "0:06:43", "throughput": 19900.56, "total_tokens": 237795200} +{"current_steps": 75560, "total_steps": 78105, "loss": 0.1103, "lr": 1.6166701255574678e-08, "epoch": 4.837078292042763, "percentage": 96.74, "elapsed_time": "3:19:09", "remaining_time": "0:06:42", "throughput": 19900.72, "total_tokens": 237809920} +{"current_steps": 75565, "total_steps": 78105, "loss": 0.1294, "lr": 1.6103333363135243e-08, "epoch": 4.83739837398374, "percentage": 96.75, "elapsed_time": "3:19:10", "remaining_time": "0:06:41", "throughput": 19900.93, "total_tokens": 237826368} +{"current_steps": 75570, "total_steps": 78105, "loss": 0.1121, "lr": 1.6040089503557287e-08, "epoch": 4.837718455924716, "percentage": 96.75, "elapsed_time": "3:19:11", "remaining_time": "0:06:40", "throughput": 19901.11, "total_tokens": 237841920} +{"current_steps": 75575, "total_steps": 78105, "loss": 0.1326, "lr": 1.5976969679998568e-08, "epoch": 4.838038537865693, "percentage": 96.76, "elapsed_time": "3:19:11", "remaining_time": "0:06:40", "throughput": 19901.28, "total_tokens": 237857408} +{"current_steps": 75580, "total_steps": 78105, "loss": 0.1293, "lr": 1.591397389561128e-08, "epoch": 4.83835861980667, "percentage": 96.77, "elapsed_time": "3:19:12", "remaining_time": "0:06:39", "throughput": 19901.46, "total_tokens": 237872704} +{"current_steps": 75585, "total_steps": 78105, "loss": 0.1315, "lr": 1.5851102153540964e-08, "epoch": 4.838678701747647, "percentage": 96.77, "elapsed_time": "3:19:13", "remaining_time": "0:06:38", "throughput": 19901.65, "total_tokens": 237888448} +{"current_steps": 75590, "total_steps": 78105, "loss": 0.1358, "lr": 1.578835445692706e-08, "epoch": 4.838998783688624, "percentage": 96.78, "elapsed_time": "3:19:13", "remaining_time": "0:06:37", "throughput": 19901.82, "total_tokens": 237903680} +{"current_steps": 75595, "total_steps": 78105, "loss": 0.1391, "lr": 1.572573080890316e-08, "epoch": 4.839318865629601, "percentage": 96.79, "elapsed_time": "3:19:14", "remaining_time": "0:06:36", "throughput": 19902.01, "total_tokens": 237919360} +{"current_steps": 75600, "total_steps": 78105, "loss": 0.1116, "lr": 1.5663231212595943e-08, "epoch": 4.839638947570578, "percentage": 96.79, "elapsed_time": "3:19:15", "remaining_time": "0:06:36", "throughput": 19902.21, "total_tokens": 237935360} +{"current_steps": 75605, "total_steps": 78105, "loss": 0.1546, "lr": 1.560085567112679e-08, "epoch": 4.839959029511555, "percentage": 96.8, "elapsed_time": "3:19:15", "remaining_time": "0:06:35", "throughput": 19902.37, "total_tokens": 237950336} +{"current_steps": 75610, "total_steps": 78105, "loss": 0.1033, "lr": 1.5538604187609884e-08, "epoch": 4.840279111452531, "percentage": 96.81, "elapsed_time": "3:19:16", "remaining_time": "0:06:34", "throughput": 19902.59, "total_tokens": 237966848} +{"current_steps": 75615, "total_steps": 78105, "loss": 0.1219, "lr": 1.5476476765154126e-08, "epoch": 4.840599193393508, "percentage": 96.81, "elapsed_time": "3:19:17", "remaining_time": "0:06:33", "throughput": 19902.75, "total_tokens": 237982080} +{"current_steps": 75620, "total_steps": 78105, "loss": 0.1846, "lr": 1.541447340686175e-08, "epoch": 4.840919275334485, "percentage": 96.82, "elapsed_time": "3:19:17", "remaining_time": "0:06:32", "throughput": 19902.94, "total_tokens": 237997888} +{"current_steps": 75625, "total_steps": 78105, "loss": 0.182, "lr": 1.5352594115828624e-08, "epoch": 4.841239357275462, "percentage": 96.82, "elapsed_time": "3:19:18", "remaining_time": "0:06:32", "throughput": 19903.11, "total_tokens": 238013376} +{"current_steps": 75630, "total_steps": 78105, "loss": 0.1719, "lr": 1.5290838895145043e-08, "epoch": 4.841559439216439, "percentage": 96.83, "elapsed_time": "3:19:19", "remaining_time": "0:06:31", "throughput": 19903.33, "total_tokens": 238030208} +{"current_steps": 75635, "total_steps": 78105, "loss": 0.121, "lr": 1.522920774789466e-08, "epoch": 4.841879521157416, "percentage": 96.84, "elapsed_time": "3:19:20", "remaining_time": "0:06:30", "throughput": 19903.53, "total_tokens": 238046272} +{"current_steps": 75640, "total_steps": 78105, "loss": 0.1466, "lr": 1.516770067715473e-08, "epoch": 4.842199603098393, "percentage": 96.84, "elapsed_time": "3:19:20", "remaining_time": "0:06:29", "throughput": 19903.71, "total_tokens": 238061568} +{"current_steps": 75645, "total_steps": 78105, "loss": 0.1399, "lr": 1.5106317685996964e-08, "epoch": 4.84251968503937, "percentage": 96.85, "elapsed_time": "3:19:21", "remaining_time": "0:06:28", "throughput": 19903.87, "total_tokens": 238076352} +{"current_steps": 75650, "total_steps": 78105, "loss": 0.1294, "lr": 1.504505877748641e-08, "epoch": 4.842839766980347, "percentage": 96.86, "elapsed_time": "3:19:22", "remaining_time": "0:06:28", "throughput": 19904.12, "total_tokens": 238094080} +{"current_steps": 75655, "total_steps": 78105, "loss": 0.0934, "lr": 1.4983923954681735e-08, "epoch": 4.843159848921324, "percentage": 96.86, "elapsed_time": "3:19:22", "remaining_time": "0:06:27", "throughput": 19904.31, "total_tokens": 238110080} +{"current_steps": 75660, "total_steps": 78105, "loss": 0.1358, "lr": 1.4922913220636326e-08, "epoch": 4.8434799308623, "percentage": 96.87, "elapsed_time": "3:19:23", "remaining_time": "0:06:26", "throughput": 19904.51, "total_tokens": 238126144} +{"current_steps": 75665, "total_steps": 78105, "loss": 0.1138, "lr": 1.4862026578396083e-08, "epoch": 4.843800012803277, "percentage": 96.88, "elapsed_time": "3:19:24", "remaining_time": "0:06:25", "throughput": 19904.73, "total_tokens": 238142592} +{"current_steps": 75670, "total_steps": 78105, "loss": 0.1079, "lr": 1.4801264031001627e-08, "epoch": 4.844120094744254, "percentage": 96.88, "elapsed_time": "3:19:24", "remaining_time": "0:06:25", "throughput": 19904.9, "total_tokens": 238157760} +{"current_steps": 75675, "total_steps": 78105, "loss": 0.1317, "lr": 1.4740625581486923e-08, "epoch": 4.844440176685231, "percentage": 96.89, "elapsed_time": "3:19:25", "remaining_time": "0:06:24", "throughput": 19905.05, "total_tokens": 238172608} +{"current_steps": 75680, "total_steps": 78105, "loss": 0.1381, "lr": 1.4680111232880379e-08, "epoch": 4.844760258626208, "percentage": 96.9, "elapsed_time": "3:19:26", "remaining_time": "0:06:23", "throughput": 19905.2, "total_tokens": 238187328} +{"current_steps": 75685, "total_steps": 78105, "loss": 0.1558, "lr": 1.4619720988203468e-08, "epoch": 4.845080340567185, "percentage": 96.9, "elapsed_time": "3:19:26", "remaining_time": "0:06:22", "throughput": 19905.4, "total_tokens": 238203584} +{"current_steps": 75690, "total_steps": 78105, "loss": 0.1319, "lr": 1.4559454850471832e-08, "epoch": 4.845400422508162, "percentage": 96.91, "elapsed_time": "3:19:27", "remaining_time": "0:06:21", "throughput": 19905.59, "total_tokens": 238219072} +{"current_steps": 75695, "total_steps": 78105, "loss": 0.1137, "lr": 1.4499312822694733e-08, "epoch": 4.8457205044491385, "percentage": 96.91, "elapsed_time": "3:19:28", "remaining_time": "0:06:21", "throughput": 19905.76, "total_tokens": 238234432} +{"current_steps": 75700, "total_steps": 78105, "loss": 0.0912, "lr": 1.44392949078756e-08, "epoch": 4.8460405863901155, "percentage": 96.92, "elapsed_time": "3:19:28", "remaining_time": "0:06:20", "throughput": 19905.91, "total_tokens": 238249024} +{"current_steps": 75705, "total_steps": 78105, "loss": 0.2068, "lr": 1.4379401109011204e-08, "epoch": 4.8463606683310925, "percentage": 96.93, "elapsed_time": "3:19:29", "remaining_time": "0:06:19", "throughput": 19906.07, "total_tokens": 238263552} +{"current_steps": 75710, "total_steps": 78105, "loss": 0.1415, "lr": 1.4319631429092207e-08, "epoch": 4.8466807502720695, "percentage": 96.93, "elapsed_time": "3:19:30", "remaining_time": "0:06:18", "throughput": 19906.24, "total_tokens": 238278848} +{"current_steps": 75715, "total_steps": 78105, "loss": 0.1501, "lr": 1.4259985871103721e-08, "epoch": 4.8470008322130465, "percentage": 96.94, "elapsed_time": "3:19:30", "remaining_time": "0:06:17", "throughput": 19906.39, "total_tokens": 238293632} +{"current_steps": 75720, "total_steps": 78105, "loss": 0.1726, "lr": 1.4200464438023642e-08, "epoch": 4.847320914154023, "percentage": 96.95, "elapsed_time": "3:19:31", "remaining_time": "0:06:17", "throughput": 19906.55, "total_tokens": 238308608} +{"current_steps": 75725, "total_steps": 78105, "loss": 0.1247, "lr": 1.4141067132824316e-08, "epoch": 4.847640996095, "percentage": 96.95, "elapsed_time": "3:19:32", "remaining_time": "0:06:16", "throughput": 19906.72, "total_tokens": 238323840} +{"current_steps": 75730, "total_steps": 78105, "loss": 0.1215, "lr": 1.40817939584717e-08, "epoch": 4.847961078035977, "percentage": 96.96, "elapsed_time": "3:19:32", "remaining_time": "0:06:15", "throughput": 19906.91, "total_tokens": 238339648} +{"current_steps": 75735, "total_steps": 78105, "loss": 0.146, "lr": 1.4022644917925654e-08, "epoch": 4.848281159976954, "percentage": 96.97, "elapsed_time": "3:19:33", "remaining_time": "0:06:14", "throughput": 19907.07, "total_tokens": 238354496} +{"current_steps": 75740, "total_steps": 78105, "loss": 0.1378, "lr": 1.3963620014139645e-08, "epoch": 4.848601241917931, "percentage": 96.97, "elapsed_time": "3:19:34", "remaining_time": "0:06:13", "throughput": 19907.24, "total_tokens": 238369664} +{"current_steps": 75745, "total_steps": 78105, "loss": 0.1618, "lr": 1.3904719250061316e-08, "epoch": 4.8489213238589075, "percentage": 96.98, "elapsed_time": "3:19:34", "remaining_time": "0:06:13", "throughput": 19907.47, "total_tokens": 238386496} +{"current_steps": 75750, "total_steps": 78105, "loss": 0.1654, "lr": 1.3845942628631648e-08, "epoch": 4.8492414057998845, "percentage": 96.98, "elapsed_time": "3:19:35", "remaining_time": "0:06:12", "throughput": 19907.69, "total_tokens": 238403264} +{"current_steps": 75755, "total_steps": 78105, "loss": 0.1614, "lr": 1.3787290152785514e-08, "epoch": 4.8495614877408615, "percentage": 96.99, "elapsed_time": "3:19:36", "remaining_time": "0:06:11", "throughput": 19907.86, "total_tokens": 238418368} +{"current_steps": 75760, "total_steps": 78105, "loss": 0.0952, "lr": 1.3728761825452242e-08, "epoch": 4.8498815696818385, "percentage": 97.0, "elapsed_time": "3:19:36", "remaining_time": "0:06:10", "throughput": 19908.06, "total_tokens": 238434368} +{"current_steps": 75765, "total_steps": 78105, "loss": 0.0997, "lr": 1.3670357649553933e-08, "epoch": 4.8502016516228155, "percentage": 97.0, "elapsed_time": "3:19:37", "remaining_time": "0:06:09", "throughput": 19908.21, "total_tokens": 238448960} +{"current_steps": 75770, "total_steps": 78105, "loss": 0.1256, "lr": 1.3612077628007147e-08, "epoch": 4.8505217335637925, "percentage": 97.01, "elapsed_time": "3:19:38", "remaining_time": "0:06:09", "throughput": 19908.36, "total_tokens": 238463552} +{"current_steps": 75775, "total_steps": 78105, "loss": 0.1224, "lr": 1.3553921763722055e-08, "epoch": 4.8508418155047694, "percentage": 97.02, "elapsed_time": "3:19:38", "remaining_time": "0:06:08", "throughput": 19908.52, "total_tokens": 238478528} +{"current_steps": 75780, "total_steps": 78105, "loss": 0.15, "lr": 1.3495890059602723e-08, "epoch": 4.851161897445746, "percentage": 97.02, "elapsed_time": "3:19:39", "remaining_time": "0:06:07", "throughput": 19908.79, "total_tokens": 238496128} +{"current_steps": 75785, "total_steps": 78105, "loss": 0.1264, "lr": 1.3437982518546832e-08, "epoch": 4.8514819793867225, "percentage": 97.03, "elapsed_time": "3:19:40", "remaining_time": "0:06:06", "throughput": 19908.99, "total_tokens": 238511936} +{"current_steps": 75790, "total_steps": 78105, "loss": 0.1236, "lr": 1.3380199143446238e-08, "epoch": 4.8518020613276995, "percentage": 97.04, "elapsed_time": "3:19:40", "remaining_time": "0:06:05", "throughput": 19909.16, "total_tokens": 238527168} +{"current_steps": 75795, "total_steps": 78105, "loss": 0.1523, "lr": 1.3322539937185852e-08, "epoch": 4.8521221432686765, "percentage": 97.04, "elapsed_time": "3:19:41", "remaining_time": "0:06:05", "throughput": 19909.35, "total_tokens": 238542976} +{"current_steps": 75800, "total_steps": 78105, "loss": 0.1294, "lr": 1.3265004902645595e-08, "epoch": 4.8524422252096535, "percentage": 97.05, "elapsed_time": "3:19:42", "remaining_time": "0:06:04", "throughput": 19909.5, "total_tokens": 238557760} +{"current_steps": 75805, "total_steps": 78105, "loss": 0.1267, "lr": 1.3207594042697614e-08, "epoch": 4.8527623071506305, "percentage": 97.06, "elapsed_time": "3:19:42", "remaining_time": "0:06:03", "throughput": 19909.7, "total_tokens": 238573888} +{"current_steps": 75810, "total_steps": 78105, "loss": 0.1533, "lr": 1.3150307360209614e-08, "epoch": 4.8530823890916075, "percentage": 97.06, "elapsed_time": "3:19:43", "remaining_time": "0:06:02", "throughput": 19909.93, "total_tokens": 238590784} +{"current_steps": 75815, "total_steps": 78105, "loss": 0.11, "lr": 1.3093144858041528e-08, "epoch": 4.8534024710325845, "percentage": 97.07, "elapsed_time": "3:19:44", "remaining_time": "0:06:01", "throughput": 19910.1, "total_tokens": 238605952} +{"current_steps": 75820, "total_steps": 78105, "loss": 0.1783, "lr": 1.3036106539048021e-08, "epoch": 4.8537225529735615, "percentage": 97.07, "elapsed_time": "3:19:44", "remaining_time": "0:06:01", "throughput": 19910.26, "total_tokens": 238621120} +{"current_steps": 75825, "total_steps": 78105, "loss": 0.1111, "lr": 1.2979192406077091e-08, "epoch": 4.8540426349145385, "percentage": 97.08, "elapsed_time": "3:19:45", "remaining_time": "0:06:00", "throughput": 19910.44, "total_tokens": 238636928} +{"current_steps": 75830, "total_steps": 78105, "loss": 0.0834, "lr": 1.2922402461970906e-08, "epoch": 4.8543627168555155, "percentage": 97.09, "elapsed_time": "3:19:46", "remaining_time": "0:05:59", "throughput": 19910.6, "total_tokens": 238652480} +{"current_steps": 75835, "total_steps": 78105, "loss": 0.1284, "lr": 1.2865736709565258e-08, "epoch": 4.854682798796492, "percentage": 97.09, "elapsed_time": "3:19:46", "remaining_time": "0:05:58", "throughput": 19910.85, "total_tokens": 238670016} +{"current_steps": 75840, "total_steps": 78105, "loss": 0.1419, "lr": 1.2809195151689823e-08, "epoch": 4.8550028807374686, "percentage": 97.1, "elapsed_time": "3:19:47", "remaining_time": "0:05:58", "throughput": 19911.05, "total_tokens": 238686272} +{"current_steps": 75845, "total_steps": 78105, "loss": 0.2286, "lr": 1.2752777791167626e-08, "epoch": 4.8553229626784455, "percentage": 97.11, "elapsed_time": "3:19:48", "remaining_time": "0:05:57", "throughput": 19911.23, "total_tokens": 238701376} +{"current_steps": 75850, "total_steps": 78105, "loss": 0.1259, "lr": 1.2696484630816408e-08, "epoch": 4.8556430446194225, "percentage": 97.11, "elapsed_time": "3:19:49", "remaining_time": "0:05:56", "throughput": 19911.46, "total_tokens": 238718656} +{"current_steps": 75855, "total_steps": 78105, "loss": 0.1536, "lr": 1.26403156734467e-08, "epoch": 4.8559631265603995, "percentage": 97.12, "elapsed_time": "3:19:49", "remaining_time": "0:05:55", "throughput": 19911.66, "total_tokens": 238734528} +{"current_steps": 75860, "total_steps": 78105, "loss": 0.1468, "lr": 1.2584270921863484e-08, "epoch": 4.8562832085013765, "percentage": 97.13, "elapsed_time": "3:19:50", "remaining_time": "0:05:54", "throughput": 19911.85, "total_tokens": 238750400} +{"current_steps": 75865, "total_steps": 78105, "loss": 0.1326, "lr": 1.2528350378865073e-08, "epoch": 4.8566032904423535, "percentage": 97.13, "elapsed_time": "3:19:51", "remaining_time": "0:05:54", "throughput": 19911.99, "total_tokens": 238764800} +{"current_steps": 75870, "total_steps": 78105, "loss": 0.1932, "lr": 1.2472554047244512e-08, "epoch": 4.8569233723833305, "percentage": 97.14, "elapsed_time": "3:19:51", "remaining_time": "0:05:53", "throughput": 19912.15, "total_tokens": 238779712} +{"current_steps": 75875, "total_steps": 78105, "loss": 0.1359, "lr": 1.2416881929787072e-08, "epoch": 4.857243454324307, "percentage": 97.14, "elapsed_time": "3:19:52", "remaining_time": "0:05:52", "throughput": 19912.35, "total_tokens": 238795840} +{"current_steps": 75880, "total_steps": 78105, "loss": 0.1416, "lr": 1.2361334029273586e-08, "epoch": 4.857563536265284, "percentage": 97.15, "elapsed_time": "3:19:53", "remaining_time": "0:05:51", "throughput": 19912.58, "total_tokens": 238812736} +{"current_steps": 75885, "total_steps": 78105, "loss": 0.1357, "lr": 1.2305910348477112e-08, "epoch": 4.857883618206261, "percentage": 97.16, "elapsed_time": "3:19:53", "remaining_time": "0:05:50", "throughput": 19912.78, "total_tokens": 238828928} +{"current_steps": 75890, "total_steps": 78105, "loss": 0.1251, "lr": 1.2250610890165992e-08, "epoch": 4.858203700147238, "percentage": 97.16, "elapsed_time": "3:19:54", "remaining_time": "0:05:50", "throughput": 19912.96, "total_tokens": 238844480} +{"current_steps": 75895, "total_steps": 78105, "loss": 0.1038, "lr": 1.2195435657100796e-08, "epoch": 4.858523782088215, "percentage": 97.17, "elapsed_time": "3:19:55", "remaining_time": "0:05:49", "throughput": 19913.16, "total_tokens": 238860736} +{"current_steps": 75900, "total_steps": 78105, "loss": 0.1103, "lr": 1.2140384652037095e-08, "epoch": 4.8588438640291916, "percentage": 97.18, "elapsed_time": "3:19:55", "remaining_time": "0:05:48", "throughput": 19913.32, "total_tokens": 238875776} +{"current_steps": 75905, "total_steps": 78105, "loss": 0.1327, "lr": 1.2085457877723805e-08, "epoch": 4.8591639459701685, "percentage": 97.18, "elapsed_time": "3:19:56", "remaining_time": "0:05:47", "throughput": 19913.52, "total_tokens": 238892224} +{"current_steps": 75910, "total_steps": 78105, "loss": 0.1306, "lr": 1.203065533690373e-08, "epoch": 4.8594840279111455, "percentage": 97.19, "elapsed_time": "3:19:57", "remaining_time": "0:05:46", "throughput": 19913.74, "total_tokens": 238909056} +{"current_steps": 75915, "total_steps": 78105, "loss": 0.1577, "lr": 1.1975977032313291e-08, "epoch": 4.8598041098521225, "percentage": 97.2, "elapsed_time": "3:19:57", "remaining_time": "0:05:46", "throughput": 19913.94, "total_tokens": 238924800} +{"current_steps": 75920, "total_steps": 78105, "loss": 0.1231, "lr": 1.1921422966683083e-08, "epoch": 4.8601241917930995, "percentage": 97.2, "elapsed_time": "3:19:58", "remaining_time": "0:05:45", "throughput": 19914.1, "total_tokens": 238939712} +{"current_steps": 75925, "total_steps": 78105, "loss": 0.0955, "lr": 1.1866993142737038e-08, "epoch": 4.860444273734076, "percentage": 97.21, "elapsed_time": "3:19:59", "remaining_time": "0:05:44", "throughput": 19914.26, "total_tokens": 238954944} +{"current_steps": 75930, "total_steps": 78105, "loss": 0.1146, "lr": 1.1812687563192982e-08, "epoch": 4.860764355675053, "percentage": 97.22, "elapsed_time": "3:19:59", "remaining_time": "0:05:43", "throughput": 19914.46, "total_tokens": 238971328} +{"current_steps": 75935, "total_steps": 78105, "loss": 0.1, "lr": 1.1758506230762911e-08, "epoch": 4.86108443761603, "percentage": 97.22, "elapsed_time": "3:20:00", "remaining_time": "0:05:42", "throughput": 19914.62, "total_tokens": 238986304} +{"current_steps": 75940, "total_steps": 78105, "loss": 0.0854, "lr": 1.1704449148152163e-08, "epoch": 4.861404519557007, "percentage": 97.23, "elapsed_time": "3:20:01", "remaining_time": "0:05:42", "throughput": 19914.8, "total_tokens": 239002048} +{"current_steps": 75945, "total_steps": 78105, "loss": 0.1655, "lr": 1.1650516318060523e-08, "epoch": 4.861724601497984, "percentage": 97.23, "elapsed_time": "3:20:01", "remaining_time": "0:05:41", "throughput": 19915.01, "total_tokens": 239018624} +{"current_steps": 75950, "total_steps": 78105, "loss": 0.1123, "lr": 1.159670774318028e-08, "epoch": 4.862044683438961, "percentage": 97.24, "elapsed_time": "3:20:02", "remaining_time": "0:05:40", "throughput": 19915.19, "total_tokens": 239034496} +{"current_steps": 75955, "total_steps": 78105, "loss": 0.1426, "lr": 1.1543023426199285e-08, "epoch": 4.862364765379938, "percentage": 97.25, "elapsed_time": "3:20:03", "remaining_time": "0:05:39", "throughput": 19915.36, "total_tokens": 239050112} +{"current_steps": 75960, "total_steps": 78105, "loss": 0.0867, "lr": 1.1489463369797338e-08, "epoch": 4.862684847320914, "percentage": 97.25, "elapsed_time": "3:20:03", "remaining_time": "0:05:38", "throughput": 19915.51, "total_tokens": 239064832} +{"current_steps": 75965, "total_steps": 78105, "loss": 0.0697, "lr": 1.14360275766498e-08, "epoch": 4.863004929261891, "percentage": 97.26, "elapsed_time": "3:20:04", "remaining_time": "0:05:38", "throughput": 19915.72, "total_tokens": 239081280} +{"current_steps": 75970, "total_steps": 78105, "loss": 0.1372, "lr": 1.1382716049424259e-08, "epoch": 4.863325011202868, "percentage": 97.27, "elapsed_time": "3:20:05", "remaining_time": "0:05:37", "throughput": 19915.89, "total_tokens": 239096640} +{"current_steps": 75975, "total_steps": 78105, "loss": 0.1582, "lr": 1.1329528790783307e-08, "epoch": 4.863645093143845, "percentage": 97.27, "elapsed_time": "3:20:05", "remaining_time": "0:05:36", "throughput": 19916.05, "total_tokens": 239111552} +{"current_steps": 75980, "total_steps": 78105, "loss": 0.1885, "lr": 1.1276465803382875e-08, "epoch": 4.863965175084822, "percentage": 97.28, "elapsed_time": "3:20:06", "remaining_time": "0:05:35", "throughput": 19916.24, "total_tokens": 239127104} +{"current_steps": 75985, "total_steps": 78105, "loss": 0.1205, "lr": 1.1223527089872232e-08, "epoch": 4.864285257025799, "percentage": 97.29, "elapsed_time": "3:20:07", "remaining_time": "0:05:35", "throughput": 19916.44, "total_tokens": 239143168} +{"current_steps": 75990, "total_steps": 78105, "loss": 0.1061, "lr": 1.1170712652895099e-08, "epoch": 4.864605338966776, "percentage": 97.29, "elapsed_time": "3:20:08", "remaining_time": "0:05:34", "throughput": 19916.65, "total_tokens": 239159424} +{"current_steps": 75995, "total_steps": 78105, "loss": 0.1172, "lr": 1.1118022495088532e-08, "epoch": 4.864925420907753, "percentage": 97.3, "elapsed_time": "3:20:08", "remaining_time": "0:05:33", "throughput": 19916.88, "total_tokens": 239176384} +{"current_steps": 76000, "total_steps": 78105, "loss": 0.1468, "lr": 1.1065456619084036e-08, "epoch": 4.86524550284873, "percentage": 97.3, "elapsed_time": "3:20:09", "remaining_time": "0:05:32", "throughput": 19917.06, "total_tokens": 239192064} +{"current_steps": 76005, "total_steps": 78105, "loss": 0.0856, "lr": 1.1013015027506458e-08, "epoch": 4.865565584789707, "percentage": 97.31, "elapsed_time": "3:20:10", "remaining_time": "0:05:31", "throughput": 19917.22, "total_tokens": 239206720} +{"current_steps": 76010, "total_steps": 78105, "loss": 0.1264, "lr": 1.0960697722973978e-08, "epoch": 4.865885666730683, "percentage": 97.32, "elapsed_time": "3:20:10", "remaining_time": "0:05:31", "throughput": 19917.44, "total_tokens": 239223488} +{"current_steps": 76015, "total_steps": 78105, "loss": 0.132, "lr": 1.090850470809951e-08, "epoch": 4.86620574867166, "percentage": 97.32, "elapsed_time": "3:20:11", "remaining_time": "0:05:30", "throughput": 19917.59, "total_tokens": 239238720} +{"current_steps": 76020, "total_steps": 78105, "loss": 0.1585, "lr": 1.0856435985488745e-08, "epoch": 4.866525830612637, "percentage": 97.33, "elapsed_time": "3:20:12", "remaining_time": "0:05:29", "throughput": 19917.72, "total_tokens": 239253120} +{"current_steps": 76025, "total_steps": 78105, "loss": 0.0961, "lr": 1.080449155774238e-08, "epoch": 4.866845912553614, "percentage": 97.34, "elapsed_time": "3:20:12", "remaining_time": "0:05:28", "throughput": 19917.88, "total_tokens": 239268096} +{"current_steps": 76030, "total_steps": 78105, "loss": 0.1236, "lr": 1.0752671427453898e-08, "epoch": 4.867165994494591, "percentage": 97.34, "elapsed_time": "3:20:13", "remaining_time": "0:05:27", "throughput": 19918.04, "total_tokens": 239282752} +{"current_steps": 76035, "total_steps": 78105, "loss": 0.1387, "lr": 1.070097559721095e-08, "epoch": 4.867486076435568, "percentage": 97.35, "elapsed_time": "3:20:14", "remaining_time": "0:05:27", "throughput": 19918.25, "total_tokens": 239299072} +{"current_steps": 76040, "total_steps": 78105, "loss": 0.1356, "lr": 1.0649404069595082e-08, "epoch": 4.867806158376545, "percentage": 97.36, "elapsed_time": "3:20:14", "remaining_time": "0:05:26", "throughput": 19918.43, "total_tokens": 239314624} +{"current_steps": 76045, "total_steps": 78105, "loss": 0.1301, "lr": 1.0597956847181457e-08, "epoch": 4.868126240317522, "percentage": 97.36, "elapsed_time": "3:20:15", "remaining_time": "0:05:25", "throughput": 19918.58, "total_tokens": 239329536} +{"current_steps": 76050, "total_steps": 78105, "loss": 0.1584, "lr": 1.0546633932538853e-08, "epoch": 4.868446322258498, "percentage": 97.37, "elapsed_time": "3:20:16", "remaining_time": "0:05:24", "throughput": 19918.75, "total_tokens": 239344896} +{"current_steps": 76055, "total_steps": 78105, "loss": 0.2035, "lr": 1.0495435328230497e-08, "epoch": 4.868766404199475, "percentage": 97.38, "elapsed_time": "3:20:16", "remaining_time": "0:05:23", "throughput": 19918.95, "total_tokens": 239360768} +{"current_steps": 76060, "total_steps": 78105, "loss": 0.135, "lr": 1.0444361036812401e-08, "epoch": 4.869086486140452, "percentage": 97.38, "elapsed_time": "3:20:17", "remaining_time": "0:05:23", "throughput": 19919.13, "total_tokens": 239376000} +{"current_steps": 76065, "total_steps": 78105, "loss": 0.1407, "lr": 1.0393411060835856e-08, "epoch": 4.869406568081429, "percentage": 97.39, "elapsed_time": "3:20:18", "remaining_time": "0:05:22", "throughput": 19919.3, "total_tokens": 239391232} +{"current_steps": 76070, "total_steps": 78105, "loss": 0.1369, "lr": 1.034258540284383e-08, "epoch": 4.869726650022406, "percentage": 97.39, "elapsed_time": "3:20:18", "remaining_time": "0:05:21", "throughput": 19919.5, "total_tokens": 239407424} +{"current_steps": 76075, "total_steps": 78105, "loss": 0.1577, "lr": 1.0291884065375402e-08, "epoch": 4.870046731963383, "percentage": 97.4, "elapsed_time": "3:20:19", "remaining_time": "0:05:20", "throughput": 19919.7, "total_tokens": 239423360} +{"current_steps": 76080, "total_steps": 78105, "loss": 0.1566, "lr": 1.0241307050961602e-08, "epoch": 4.87036681390436, "percentage": 97.41, "elapsed_time": "3:20:20", "remaining_time": "0:05:19", "throughput": 19919.88, "total_tokens": 239439104} +{"current_steps": 76085, "total_steps": 78105, "loss": 0.1454, "lr": 1.0190854362128465e-08, "epoch": 4.870686895845337, "percentage": 97.41, "elapsed_time": "3:20:20", "remaining_time": "0:05:19", "throughput": 19920.06, "total_tokens": 239454592} +{"current_steps": 76090, "total_steps": 78105, "loss": 0.1571, "lr": 1.014052600139509e-08, "epoch": 4.871006977786314, "percentage": 97.42, "elapsed_time": "3:20:21", "remaining_time": "0:05:18", "throughput": 19920.25, "total_tokens": 239470272} +{"current_steps": 76095, "total_steps": 78105, "loss": 0.1414, "lr": 1.0090321971274464e-08, "epoch": 4.871327059727291, "percentage": 97.43, "elapsed_time": "3:20:22", "remaining_time": "0:05:17", "throughput": 19920.43, "total_tokens": 239485696} +{"current_steps": 76100, "total_steps": 78105, "loss": 0.1537, "lr": 1.0040242274273749e-08, "epoch": 4.871647141668267, "percentage": 97.43, "elapsed_time": "3:20:22", "remaining_time": "0:05:16", "throughput": 19920.64, "total_tokens": 239501824} +{"current_steps": 76105, "total_steps": 78105, "loss": 0.1263, "lr": 9.99028691289372e-09, "epoch": 4.871967223609244, "percentage": 97.44, "elapsed_time": "3:20:23", "remaining_time": "0:05:15", "throughput": 19920.82, "total_tokens": 239517568} +{"current_steps": 76110, "total_steps": 78105, "loss": 0.1416, "lr": 9.940455889628498e-09, "epoch": 4.872287305550221, "percentage": 97.45, "elapsed_time": "3:20:24", "remaining_time": "0:05:15", "throughput": 19920.99, "total_tokens": 239532800} +{"current_steps": 76115, "total_steps": 78105, "loss": 0.1386, "lr": 9.89074920696692e-09, "epoch": 4.872607387491198, "percentage": 97.45, "elapsed_time": "3:20:24", "remaining_time": "0:05:14", "throughput": 19921.3, "total_tokens": 239552192} +{"current_steps": 76120, "total_steps": 78105, "loss": 0.0614, "lr": 9.841166867390895e-09, "epoch": 4.872927469432175, "percentage": 97.46, "elapsed_time": "3:20:25", "remaining_time": "0:05:13", "throughput": 19921.45, "total_tokens": 239566912} +{"current_steps": 76125, "total_steps": 78105, "loss": 0.125, "lr": 9.791708873375939e-09, "epoch": 4.873247551373152, "percentage": 97.46, "elapsed_time": "3:20:26", "remaining_time": "0:05:12", "throughput": 19921.65, "total_tokens": 239583040} +{"current_steps": 76130, "total_steps": 78105, "loss": 0.1232, "lr": 9.742375227392297e-09, "epoch": 4.873567633314129, "percentage": 97.47, "elapsed_time": "3:20:26", "remaining_time": "0:05:12", "throughput": 19921.79, "total_tokens": 239597440} +{"current_steps": 76135, "total_steps": 78105, "loss": 0.1181, "lr": 9.693165931903004e-09, "epoch": 4.873887715255106, "percentage": 97.48, "elapsed_time": "3:20:27", "remaining_time": "0:05:11", "throughput": 19921.96, "total_tokens": 239613056} +{"current_steps": 76140, "total_steps": 78105, "loss": 0.091, "lr": 9.644080989365534e-09, "epoch": 4.874207797196082, "percentage": 97.48, "elapsed_time": "3:20:28", "remaining_time": "0:05:10", "throughput": 19922.13, "total_tokens": 239628224} +{"current_steps": 76145, "total_steps": 78105, "loss": 0.1661, "lr": 9.595120402230707e-09, "epoch": 4.874527879137059, "percentage": 97.49, "elapsed_time": "3:20:28", "remaining_time": "0:05:09", "throughput": 19922.33, "total_tokens": 239644160} +{"current_steps": 76150, "total_steps": 78105, "loss": 0.1528, "lr": 9.546284172943787e-09, "epoch": 4.874847961078036, "percentage": 97.5, "elapsed_time": "3:20:29", "remaining_time": "0:05:08", "throughput": 19922.52, "total_tokens": 239660352} +{"current_steps": 76155, "total_steps": 78105, "loss": 0.1373, "lr": 9.497572303943103e-09, "epoch": 4.875168043019013, "percentage": 97.5, "elapsed_time": "3:20:30", "remaining_time": "0:05:08", "throughput": 19922.7, "total_tokens": 239675840} +{"current_steps": 76160, "total_steps": 78105, "loss": 0.1207, "lr": 9.448984797660876e-09, "epoch": 4.87548812495999, "percentage": 97.51, "elapsed_time": "3:20:31", "remaining_time": "0:05:07", "throughput": 19922.93, "total_tokens": 239692928} +{"current_steps": 76165, "total_steps": 78105, "loss": 0.1428, "lr": 9.400521656523775e-09, "epoch": 4.875808206900967, "percentage": 97.52, "elapsed_time": "3:20:31", "remaining_time": "0:05:06", "throughput": 19923.11, "total_tokens": 239708288} +{"current_steps": 76170, "total_steps": 78105, "loss": 0.1252, "lr": 9.352182882951255e-09, "epoch": 4.876128288841944, "percentage": 97.52, "elapsed_time": "3:20:32", "remaining_time": "0:05:05", "throughput": 19923.29, "total_tokens": 239724416} +{"current_steps": 76175, "total_steps": 78105, "loss": 0.1028, "lr": 9.303968479357772e-09, "epoch": 4.876448370782921, "percentage": 97.53, "elapsed_time": "3:20:33", "remaining_time": "0:05:04", "throughput": 19923.5, "total_tokens": 239740800} +{"current_steps": 76180, "total_steps": 78105, "loss": 0.1897, "lr": 9.25587844815029e-09, "epoch": 4.876768452723898, "percentage": 97.54, "elapsed_time": "3:20:33", "remaining_time": "0:05:04", "throughput": 19923.69, "total_tokens": 239756096} +{"current_steps": 76185, "total_steps": 78105, "loss": 0.1047, "lr": 9.207912791730777e-09, "epoch": 4.877088534664875, "percentage": 97.54, "elapsed_time": "3:20:34", "remaining_time": "0:05:03", "throughput": 19923.84, "total_tokens": 239770752} +{"current_steps": 76190, "total_steps": 78105, "loss": 0.1175, "lr": 9.160071512493706e-09, "epoch": 4.877408616605851, "percentage": 97.55, "elapsed_time": "3:20:35", "remaining_time": "0:05:02", "throughput": 19924.03, "total_tokens": 239787072} +{"current_steps": 76195, "total_steps": 78105, "loss": 0.1365, "lr": 9.112354612828277e-09, "epoch": 4.877728698546828, "percentage": 97.55, "elapsed_time": "3:20:35", "remaining_time": "0:05:01", "throughput": 19924.21, "total_tokens": 239802752} +{"current_steps": 76200, "total_steps": 78105, "loss": 0.1338, "lr": 9.064762095117585e-09, "epoch": 4.878048780487805, "percentage": 97.56, "elapsed_time": "3:20:36", "remaining_time": "0:05:00", "throughput": 19924.37, "total_tokens": 239818048} +{"current_steps": 76205, "total_steps": 78105, "loss": 0.1664, "lr": 9.017293961737783e-09, "epoch": 4.878368862428782, "percentage": 97.57, "elapsed_time": "3:20:37", "remaining_time": "0:05:00", "throughput": 19924.56, "total_tokens": 239833984} +{"current_steps": 76210, "total_steps": 78105, "loss": 0.1539, "lr": 8.969950215059198e-09, "epoch": 4.878688944369759, "percentage": 97.57, "elapsed_time": "3:20:37", "remaining_time": "0:04:59", "throughput": 19924.74, "total_tokens": 239849344} +{"current_steps": 76215, "total_steps": 78105, "loss": 0.1103, "lr": 8.922730857445771e-09, "epoch": 4.879009026310736, "percentage": 97.58, "elapsed_time": "3:20:38", "remaining_time": "0:04:58", "throughput": 19924.89, "total_tokens": 239863936} +{"current_steps": 76220, "total_steps": 78105, "loss": 0.1669, "lr": 8.875635891255896e-09, "epoch": 4.879329108251713, "percentage": 97.59, "elapsed_time": "3:20:39", "remaining_time": "0:04:57", "throughput": 19925.1, "total_tokens": 239880192} +{"current_steps": 76225, "total_steps": 78105, "loss": 0.1698, "lr": 8.828665318841024e-09, "epoch": 4.879649190192689, "percentage": 97.59, "elapsed_time": "3:20:39", "remaining_time": "0:04:56", "throughput": 19925.31, "total_tokens": 239896768} +{"current_steps": 76230, "total_steps": 78105, "loss": 0.1383, "lr": 8.781819142546499e-09, "epoch": 4.879969272133666, "percentage": 97.6, "elapsed_time": "3:20:40", "remaining_time": "0:04:56", "throughput": 19925.5, "total_tokens": 239912576} +{"current_steps": 76235, "total_steps": 78105, "loss": 0.1702, "lr": 8.735097364711565e-09, "epoch": 4.880289354074643, "percentage": 97.61, "elapsed_time": "3:20:41", "remaining_time": "0:04:55", "throughput": 19925.75, "total_tokens": 239930176} +{"current_steps": 76240, "total_steps": 78105, "loss": 0.1631, "lr": 8.688499987669351e-09, "epoch": 4.88060943601562, "percentage": 97.61, "elapsed_time": "3:20:41", "remaining_time": "0:04:54", "throughput": 19925.96, "total_tokens": 239946752} +{"current_steps": 76245, "total_steps": 78105, "loss": 0.1363, "lr": 8.642027013746889e-09, "epoch": 4.880929517956597, "percentage": 97.62, "elapsed_time": "3:20:42", "remaining_time": "0:04:53", "throughput": 19926.15, "total_tokens": 239962496} +{"current_steps": 76250, "total_steps": 78105, "loss": 0.1232, "lr": 8.595678445264544e-09, "epoch": 4.881249599897574, "percentage": 97.62, "elapsed_time": "3:20:43", "remaining_time": "0:04:52", "throughput": 19926.34, "total_tokens": 239978496} +{"current_steps": 76255, "total_steps": 78105, "loss": 0.1478, "lr": 8.549454284536573e-09, "epoch": 4.881569681838551, "percentage": 97.63, "elapsed_time": "3:20:43", "remaining_time": "0:04:52", "throughput": 19926.51, "total_tokens": 239993536} +{"current_steps": 76260, "total_steps": 78105, "loss": 0.166, "lr": 8.50335453387141e-09, "epoch": 4.881889763779528, "percentage": 97.64, "elapsed_time": "3:20:44", "remaining_time": "0:04:51", "throughput": 19926.69, "total_tokens": 240009152} +{"current_steps": 76265, "total_steps": 78105, "loss": 0.1519, "lr": 8.457379195571102e-09, "epoch": 4.882209845720505, "percentage": 97.64, "elapsed_time": "3:20:45", "remaining_time": "0:04:50", "throughput": 19926.87, "total_tokens": 240024832} +{"current_steps": 76270, "total_steps": 78105, "loss": 0.1361, "lr": 8.411528271931313e-09, "epoch": 4.882529927661482, "percentage": 97.65, "elapsed_time": "3:20:45", "remaining_time": "0:04:49", "throughput": 19927.06, "total_tokens": 240040320} +{"current_steps": 76275, "total_steps": 78105, "loss": 0.1749, "lr": 8.365801765241321e-09, "epoch": 4.882850009602458, "percentage": 97.66, "elapsed_time": "3:20:46", "remaining_time": "0:04:49", "throughput": 19927.23, "total_tokens": 240055424} +{"current_steps": 76280, "total_steps": 78105, "loss": 0.2047, "lr": 8.320199677784857e-09, "epoch": 4.883170091543435, "percentage": 97.66, "elapsed_time": "3:20:47", "remaining_time": "0:04:48", "throughput": 19927.43, "total_tokens": 240071872} +{"current_steps": 76285, "total_steps": 78105, "loss": 0.0991, "lr": 8.274722011838986e-09, "epoch": 4.883490173484412, "percentage": 97.67, "elapsed_time": "3:20:47", "remaining_time": "0:04:47", "throughput": 19927.62, "total_tokens": 240087872} +{"current_steps": 76290, "total_steps": 78105, "loss": 0.0973, "lr": 8.229368769674673e-09, "epoch": 4.883810255425389, "percentage": 97.68, "elapsed_time": "3:20:48", "remaining_time": "0:04:46", "throughput": 19927.79, "total_tokens": 240102912} +{"current_steps": 76295, "total_steps": 78105, "loss": 0.1305, "lr": 8.184139953556491e-09, "epoch": 4.884130337366366, "percentage": 97.68, "elapsed_time": "3:20:49", "remaining_time": "0:04:45", "throughput": 19927.97, "total_tokens": 240118592} +{"current_steps": 76300, "total_steps": 78105, "loss": 0.1303, "lr": 8.139035565742915e-09, "epoch": 4.884450419307343, "percentage": 97.69, "elapsed_time": "3:20:50", "remaining_time": "0:04:45", "throughput": 19928.17, "total_tokens": 240134464} +{"current_steps": 76305, "total_steps": 78105, "loss": 0.1544, "lr": 8.094055608486028e-09, "epoch": 4.88477050124832, "percentage": 97.7, "elapsed_time": "3:20:50", "remaining_time": "0:04:44", "throughput": 19928.36, "total_tokens": 240150720} +{"current_steps": 76310, "total_steps": 78105, "loss": 0.1457, "lr": 8.049200084032649e-09, "epoch": 4.885090583189297, "percentage": 97.7, "elapsed_time": "3:20:51", "remaining_time": "0:04:43", "throughput": 19928.53, "total_tokens": 240166272} +{"current_steps": 76315, "total_steps": 78105, "loss": 0.1214, "lr": 8.004468994621816e-09, "epoch": 4.885410665130273, "percentage": 97.71, "elapsed_time": "3:20:52", "remaining_time": "0:04:42", "throughput": 19928.69, "total_tokens": 240181056} +{"current_steps": 76320, "total_steps": 78105, "loss": 0.1339, "lr": 7.959862342487579e-09, "epoch": 4.88573074707125, "percentage": 97.71, "elapsed_time": "3:20:52", "remaining_time": "0:04:41", "throughput": 19928.86, "total_tokens": 240196160} +{"current_steps": 76325, "total_steps": 78105, "loss": 0.1292, "lr": 7.915380129857042e-09, "epoch": 4.886050829012227, "percentage": 97.72, "elapsed_time": "3:20:53", "remaining_time": "0:04:41", "throughput": 19929.03, "total_tokens": 240211520} +{"current_steps": 76330, "total_steps": 78105, "loss": 0.1346, "lr": 7.871022358951763e-09, "epoch": 4.886370910953204, "percentage": 97.73, "elapsed_time": "3:20:54", "remaining_time": "0:04:40", "throughput": 19929.24, "total_tokens": 240227776} +{"current_steps": 76335, "total_steps": 78105, "loss": 0.1496, "lr": 7.826789031986638e-09, "epoch": 4.886690992894181, "percentage": 97.73, "elapsed_time": "3:20:54", "remaining_time": "0:04:39", "throughput": 19929.41, "total_tokens": 240243072} +{"current_steps": 76340, "total_steps": 78105, "loss": 0.1494, "lr": 7.782680151170175e-09, "epoch": 4.887011074835158, "percentage": 97.74, "elapsed_time": "3:20:55", "remaining_time": "0:04:38", "throughput": 19929.59, "total_tokens": 240258624} +{"current_steps": 76345, "total_steps": 78105, "loss": 0.1253, "lr": 7.738695718705614e-09, "epoch": 4.887331156776135, "percentage": 97.75, "elapsed_time": "3:20:56", "remaining_time": "0:04:37", "throughput": 19929.78, "total_tokens": 240274496} +{"current_steps": 76350, "total_steps": 78105, "loss": 0.137, "lr": 7.694835736788697e-09, "epoch": 4.887651238717112, "percentage": 97.75, "elapsed_time": "3:20:56", "remaining_time": "0:04:37", "throughput": 19929.96, "total_tokens": 240289536} +{"current_steps": 76355, "total_steps": 78105, "loss": 0.1308, "lr": 7.651100207609619e-09, "epoch": 4.887971320658089, "percentage": 97.76, "elapsed_time": "3:20:57", "remaining_time": "0:04:36", "throughput": 19930.13, "total_tokens": 240304704} +{"current_steps": 76360, "total_steps": 78105, "loss": 0.1412, "lr": 7.607489133352464e-09, "epoch": 4.888291402599066, "percentage": 97.77, "elapsed_time": "3:20:58", "remaining_time": "0:04:35", "throughput": 19930.31, "total_tokens": 240320256} +{"current_steps": 76365, "total_steps": 78105, "loss": 0.1534, "lr": 7.564002516194936e-09, "epoch": 4.888611484540042, "percentage": 97.77, "elapsed_time": "3:20:58", "remaining_time": "0:04:34", "throughput": 19930.56, "total_tokens": 240337536} +{"current_steps": 76370, "total_steps": 78105, "loss": 0.1367, "lr": 7.520640358308351e-09, "epoch": 4.888931566481019, "percentage": 97.78, "elapsed_time": "3:20:59", "remaining_time": "0:04:33", "throughput": 19930.77, "total_tokens": 240353664} +{"current_steps": 76375, "total_steps": 78105, "loss": 0.1344, "lr": 7.477402661858201e-09, "epoch": 4.889251648421996, "percentage": 97.79, "elapsed_time": "3:21:00", "remaining_time": "0:04:33", "throughput": 19930.93, "total_tokens": 240368896} +{"current_steps": 76380, "total_steps": 78105, "loss": 0.1102, "lr": 7.434289429003872e-09, "epoch": 4.889571730362973, "percentage": 97.79, "elapsed_time": "3:21:00", "remaining_time": "0:04:32", "throughput": 19931.12, "total_tokens": 240384704} +{"current_steps": 76385, "total_steps": 78105, "loss": 0.1189, "lr": 7.391300661897249e-09, "epoch": 4.88989181230395, "percentage": 97.8, "elapsed_time": "3:21:01", "remaining_time": "0:04:31", "throughput": 19931.3, "total_tokens": 240400128} +{"current_steps": 76390, "total_steps": 78105, "loss": 0.1221, "lr": 7.348436362685785e-09, "epoch": 4.890211894244927, "percentage": 97.8, "elapsed_time": "3:21:02", "remaining_time": "0:04:30", "throughput": 19931.47, "total_tokens": 240415552} +{"current_steps": 76395, "total_steps": 78105, "loss": 0.1167, "lr": 7.30569653350971e-09, "epoch": 4.890531976185904, "percentage": 97.81, "elapsed_time": "3:21:02", "remaining_time": "0:04:30", "throughput": 19931.64, "total_tokens": 240430848} +{"current_steps": 76400, "total_steps": 78105, "loss": 0.132, "lr": 7.26308117650315e-09, "epoch": 4.890852058126881, "percentage": 97.82, "elapsed_time": "3:21:03", "remaining_time": "0:04:29", "throughput": 19931.85, "total_tokens": 240447424} +{"current_steps": 76405, "total_steps": 78105, "loss": 0.1441, "lr": 7.220590293794405e-09, "epoch": 4.891172140067857, "percentage": 97.82, "elapsed_time": "3:21:04", "remaining_time": "0:04:28", "throughput": 19932.03, "total_tokens": 240463360} +{"current_steps": 76410, "total_steps": 78105, "loss": 0.1452, "lr": 7.178223887504554e-09, "epoch": 4.891492222008834, "percentage": 97.83, "elapsed_time": "3:21:04", "remaining_time": "0:04:27", "throughput": 19932.19, "total_tokens": 240478144} +{"current_steps": 76415, "total_steps": 78105, "loss": 0.1284, "lr": 7.135981959749683e-09, "epoch": 4.891812303949811, "percentage": 97.84, "elapsed_time": "3:21:05", "remaining_time": "0:04:26", "throughput": 19932.38, "total_tokens": 240494016} +{"current_steps": 76420, "total_steps": 78105, "loss": 0.1206, "lr": 7.093864512638937e-09, "epoch": 4.892132385890788, "percentage": 97.84, "elapsed_time": "3:21:06", "remaining_time": "0:04:26", "throughput": 19932.54, "total_tokens": 240509184} +{"current_steps": 76425, "total_steps": 78105, "loss": 0.1024, "lr": 7.051871548275635e-09, "epoch": 4.892452467831765, "percentage": 97.85, "elapsed_time": "3:21:06", "remaining_time": "0:04:25", "throughput": 19932.7, "total_tokens": 240524224} +{"current_steps": 76430, "total_steps": 78105, "loss": 0.152, "lr": 7.01000306875671e-09, "epoch": 4.892772549772742, "percentage": 97.86, "elapsed_time": "3:21:07", "remaining_time": "0:04:24", "throughput": 19932.87, "total_tokens": 240539328} +{"current_steps": 76435, "total_steps": 78105, "loss": 0.1137, "lr": 6.9682590761724344e-09, "epoch": 4.893092631713719, "percentage": 97.86, "elapsed_time": "3:21:08", "remaining_time": "0:04:23", "throughput": 19933.01, "total_tokens": 240553920} +{"current_steps": 76440, "total_steps": 78105, "loss": 0.1477, "lr": 6.926639572607807e-09, "epoch": 4.893412713654696, "percentage": 97.87, "elapsed_time": "3:21:08", "remaining_time": "0:04:22", "throughput": 19933.22, "total_tokens": 240570688} +{"current_steps": 76445, "total_steps": 78105, "loss": 0.1921, "lr": 6.8851445601408884e-09, "epoch": 4.893732795595673, "percentage": 97.87, "elapsed_time": "3:21:09", "remaining_time": "0:04:22", "throughput": 19933.45, "total_tokens": 240587648} +{"current_steps": 76450, "total_steps": 78105, "loss": 0.1596, "lr": 6.843774040843354e-09, "epoch": 4.89405287753665, "percentage": 97.88, "elapsed_time": "3:21:10", "remaining_time": "0:04:21", "throughput": 19933.66, "total_tokens": 240604224} +{"current_steps": 76455, "total_steps": 78105, "loss": 0.1189, "lr": 6.802528016781607e-09, "epoch": 4.894372959477626, "percentage": 97.89, "elapsed_time": "3:21:10", "remaining_time": "0:04:20", "throughput": 19933.89, "total_tokens": 240621120} +{"current_steps": 76460, "total_steps": 78105, "loss": 0.1566, "lr": 6.76140649001511e-09, "epoch": 4.894693041418603, "percentage": 97.89, "elapsed_time": "3:21:11", "remaining_time": "0:04:19", "throughput": 19934.11, "total_tokens": 240637888} +{"current_steps": 76465, "total_steps": 78105, "loss": 0.1291, "lr": 6.720409462596944e-09, "epoch": 4.89501312335958, "percentage": 97.9, "elapsed_time": "3:21:12", "remaining_time": "0:04:18", "throughput": 19934.27, "total_tokens": 240653248} +{"current_steps": 76470, "total_steps": 78105, "loss": 0.1721, "lr": 6.6795369365746375e-09, "epoch": 4.895333205300557, "percentage": 97.91, "elapsed_time": "3:21:13", "remaining_time": "0:04:18", "throughput": 19934.45, "total_tokens": 240669056} +{"current_steps": 76475, "total_steps": 78105, "loss": 0.1247, "lr": 6.638788913989058e-09, "epoch": 4.895653287241534, "percentage": 97.91, "elapsed_time": "3:21:13", "remaining_time": "0:04:17", "throughput": 19934.67, "total_tokens": 240685376} +{"current_steps": 76480, "total_steps": 78105, "loss": 0.1372, "lr": 6.5981653968749674e-09, "epoch": 4.895973369182511, "percentage": 97.92, "elapsed_time": "3:21:14", "remaining_time": "0:04:16", "throughput": 19934.82, "total_tokens": 240700224} +{"current_steps": 76485, "total_steps": 78105, "loss": 0.1509, "lr": 6.557666387260464e-09, "epoch": 4.896293451123488, "percentage": 97.93, "elapsed_time": "3:21:15", "remaining_time": "0:04:15", "throughput": 19935.0, "total_tokens": 240715712} +{"current_steps": 76490, "total_steps": 78105, "loss": 0.1406, "lr": 6.517291887168653e-09, "epoch": 4.896613533064464, "percentage": 97.93, "elapsed_time": "3:21:15", "remaining_time": "0:04:14", "throughput": 19935.23, "total_tokens": 240732800} +{"current_steps": 76495, "total_steps": 78105, "loss": 0.1357, "lr": 6.477041898614866e-09, "epoch": 4.896933615005441, "percentage": 97.94, "elapsed_time": "3:21:16", "remaining_time": "0:04:14", "throughput": 19935.42, "total_tokens": 240748544} +{"current_steps": 76500, "total_steps": 78105, "loss": 0.0972, "lr": 6.436916423609718e-09, "epoch": 4.897253696946418, "percentage": 97.95, "elapsed_time": "3:21:17", "remaining_time": "0:04:13", "throughput": 19935.57, "total_tokens": 240763392} +{"current_steps": 76505, "total_steps": 78105, "loss": 0.1498, "lr": 6.39691546415605e-09, "epoch": 4.897573778887395, "percentage": 97.95, "elapsed_time": "3:21:17", "remaining_time": "0:04:12", "throughput": 19935.83, "total_tokens": 240781120} +{"current_steps": 76510, "total_steps": 78105, "loss": 0.1136, "lr": 6.357039022251988e-09, "epoch": 4.897893860828372, "percentage": 97.96, "elapsed_time": "3:21:18", "remaining_time": "0:04:11", "throughput": 19936.0, "total_tokens": 240796480} +{"current_steps": 76515, "total_steps": 78105, "loss": 0.1309, "lr": 6.317287099888436e-09, "epoch": 4.898213942769349, "percentage": 97.96, "elapsed_time": "3:21:19", "remaining_time": "0:04:11", "throughput": 19936.15, "total_tokens": 240811328} +{"current_steps": 76520, "total_steps": 78105, "loss": 0.1471, "lr": 6.277659699050476e-09, "epoch": 4.898534024710326, "percentage": 97.97, "elapsed_time": "3:21:19", "remaining_time": "0:04:10", "throughput": 19936.33, "total_tokens": 240826944} +{"current_steps": 76525, "total_steps": 78105, "loss": 0.1394, "lr": 6.238156821716801e-09, "epoch": 4.898854106651303, "percentage": 97.98, "elapsed_time": "3:21:20", "remaining_time": "0:04:09", "throughput": 19936.49, "total_tokens": 240841856} +{"current_steps": 76530, "total_steps": 78105, "loss": 0.1103, "lr": 6.1987784698600005e-09, "epoch": 4.89917418859228, "percentage": 97.98, "elapsed_time": "3:21:21", "remaining_time": "0:04:08", "throughput": 19936.68, "total_tokens": 240857472} +{"current_steps": 76535, "total_steps": 78105, "loss": 0.1552, "lr": 6.159524645446558e-09, "epoch": 4.899494270533257, "percentage": 97.99, "elapsed_time": "3:21:21", "remaining_time": "0:04:07", "throughput": 19936.85, "total_tokens": 240872768} +{"current_steps": 76540, "total_steps": 78105, "loss": 0.1359, "lr": 6.12039535043657e-09, "epoch": 4.899814352474233, "percentage": 98.0, "elapsed_time": "3:21:22", "remaining_time": "0:04:07", "throughput": 19936.99, "total_tokens": 240887424} +{"current_steps": 76545, "total_steps": 78105, "loss": 0.1248, "lr": 6.081390586783753e-09, "epoch": 4.90013443441521, "percentage": 98.0, "elapsed_time": "3:21:23", "remaining_time": "0:04:06", "throughput": 19937.2, "total_tokens": 240903936} +{"current_steps": 76550, "total_steps": 78105, "loss": 0.1086, "lr": 6.042510356435993e-09, "epoch": 4.900454516356187, "percentage": 98.01, "elapsed_time": "3:21:23", "remaining_time": "0:04:05", "throughput": 19937.38, "total_tokens": 240919680} +{"current_steps": 76555, "total_steps": 78105, "loss": 0.0986, "lr": 6.003754661334793e-09, "epoch": 4.900774598297164, "percentage": 98.02, "elapsed_time": "3:21:24", "remaining_time": "0:04:04", "throughput": 19937.55, "total_tokens": 240934784} +{"current_steps": 76560, "total_steps": 78105, "loss": 0.1642, "lr": 5.965123503415271e-09, "epoch": 4.901094680238141, "percentage": 98.02, "elapsed_time": "3:21:25", "remaining_time": "0:04:03", "throughput": 19937.73, "total_tokens": 240950656} +{"current_steps": 76565, "total_steps": 78105, "loss": 0.1192, "lr": 5.9266168846064395e-09, "epoch": 4.901414762179118, "percentage": 98.03, "elapsed_time": "3:21:25", "remaining_time": "0:04:03", "throughput": 19937.9, "total_tokens": 240966016} +{"current_steps": 76570, "total_steps": 78105, "loss": 0.1047, "lr": 5.888234806831206e-09, "epoch": 4.901734844120095, "percentage": 98.03, "elapsed_time": "3:21:26", "remaining_time": "0:04:02", "throughput": 19938.09, "total_tokens": 240981888} +{"current_steps": 76575, "total_steps": 78105, "loss": 0.097, "lr": 5.849977272006369e-09, "epoch": 4.902054926061072, "percentage": 98.04, "elapsed_time": "3:21:27", "remaining_time": "0:04:01", "throughput": 19938.27, "total_tokens": 240997248} +{"current_steps": 76580, "total_steps": 78105, "loss": 0.1327, "lr": 5.811844282042067e-09, "epoch": 4.902375008002048, "percentage": 98.05, "elapsed_time": "3:21:27", "remaining_time": "0:04:00", "throughput": 19938.42, "total_tokens": 241011904} +{"current_steps": 76585, "total_steps": 78105, "loss": 0.1364, "lr": 5.773835838842334e-09, "epoch": 4.902695089943025, "percentage": 98.05, "elapsed_time": "3:21:28", "remaining_time": "0:03:59", "throughput": 19938.58, "total_tokens": 241026944} +{"current_steps": 76590, "total_steps": 78105, "loss": 0.1266, "lr": 5.735951944305373e-09, "epoch": 4.903015171884002, "percentage": 98.06, "elapsed_time": "3:21:29", "remaining_time": "0:03:59", "throughput": 19938.78, "total_tokens": 241043200} +{"current_steps": 76595, "total_steps": 78105, "loss": 0.0909, "lr": 5.698192600323005e-09, "epoch": 4.903335253824979, "percentage": 98.07, "elapsed_time": "3:21:30", "remaining_time": "0:03:58", "throughput": 19939.22, "total_tokens": 241071744} +{"current_steps": 76600, "total_steps": 78105, "loss": 0.1238, "lr": 5.660557808780387e-09, "epoch": 4.903655335765956, "percentage": 98.07, "elapsed_time": "3:21:31", "remaining_time": "0:03:57", "throughput": 19939.38, "total_tokens": 241087296} +{"current_steps": 76605, "total_steps": 78105, "loss": 0.0983, "lr": 5.62304757155685e-09, "epoch": 4.903975417706933, "percentage": 98.08, "elapsed_time": "3:21:31", "remaining_time": "0:03:56", "throughput": 19939.56, "total_tokens": 241103104} +{"current_steps": 76610, "total_steps": 78105, "loss": 0.1001, "lr": 5.585661890525895e-09, "epoch": 4.90429549964791, "percentage": 98.09, "elapsed_time": "3:21:32", "remaining_time": "0:03:55", "throughput": 19939.69, "total_tokens": 241117376} +{"current_steps": 76615, "total_steps": 78105, "loss": 0.1411, "lr": 5.548400767553808e-09, "epoch": 4.904615581588887, "percentage": 98.09, "elapsed_time": "3:21:32", "remaining_time": "0:03:55", "throughput": 19939.87, "total_tokens": 241132480} +{"current_steps": 76620, "total_steps": 78105, "loss": 0.1447, "lr": 5.511264204501321e-09, "epoch": 4.904935663529864, "percentage": 98.1, "elapsed_time": "3:21:33", "remaining_time": "0:03:54", "throughput": 19940.06, "total_tokens": 241148416} +{"current_steps": 76625, "total_steps": 78105, "loss": 0.1086, "lr": 5.47425220322334e-09, "epoch": 4.905255745470841, "percentage": 98.11, "elapsed_time": "3:21:34", "remaining_time": "0:03:53", "throughput": 19940.3, "total_tokens": 241165824} +{"current_steps": 76630, "total_steps": 78105, "loss": 0.1246, "lr": 5.437364765567555e-09, "epoch": 4.905575827411817, "percentage": 98.11, "elapsed_time": "3:21:35", "remaining_time": "0:03:52", "throughput": 19940.45, "total_tokens": 241180416} +{"current_steps": 76635, "total_steps": 78105, "loss": 0.1738, "lr": 5.400601893376101e-09, "epoch": 4.905895909352794, "percentage": 98.12, "elapsed_time": "3:21:35", "remaining_time": "0:03:52", "throughput": 19940.66, "total_tokens": 241197248} +{"current_steps": 76640, "total_steps": 78105, "loss": 0.1298, "lr": 5.363963588484456e-09, "epoch": 4.906215991293771, "percentage": 98.12, "elapsed_time": "3:21:36", "remaining_time": "0:03:51", "throughput": 19940.85, "total_tokens": 241213056} +{"current_steps": 76645, "total_steps": 78105, "loss": 0.1165, "lr": 5.327449852722821e-09, "epoch": 4.906536073234748, "percentage": 98.13, "elapsed_time": "3:21:37", "remaining_time": "0:03:50", "throughput": 19941.04, "total_tokens": 241228672} +{"current_steps": 76650, "total_steps": 78105, "loss": 0.1659, "lr": 5.291060687913629e-09, "epoch": 4.906856155175725, "percentage": 98.14, "elapsed_time": "3:21:37", "remaining_time": "0:03:49", "throughput": 19941.21, "total_tokens": 241243712} +{"current_steps": 76655, "total_steps": 78105, "loss": 0.1274, "lr": 5.254796095874592e-09, "epoch": 4.907176237116702, "percentage": 98.14, "elapsed_time": "3:21:38", "remaining_time": "0:03:48", "throughput": 19941.39, "total_tokens": 241259456} +{"current_steps": 76660, "total_steps": 78105, "loss": 0.2301, "lr": 5.2186560784164845e-09, "epoch": 4.907496319057679, "percentage": 98.15, "elapsed_time": "3:21:39", "remaining_time": "0:03:48", "throughput": 19941.59, "total_tokens": 241275264} +{"current_steps": 76665, "total_steps": 78105, "loss": 0.1328, "lr": 5.1826406373436945e-09, "epoch": 4.907816400998656, "percentage": 98.16, "elapsed_time": "3:21:39", "remaining_time": "0:03:47", "throughput": 19941.81, "total_tokens": 241291840} +{"current_steps": 76670, "total_steps": 78105, "loss": 0.2093, "lr": 5.146749774455062e-09, "epoch": 4.908136482939632, "percentage": 98.16, "elapsed_time": "3:21:40", "remaining_time": "0:03:46", "throughput": 19941.99, "total_tokens": 241307520} +{"current_steps": 76675, "total_steps": 78105, "loss": 0.1466, "lr": 5.11098349154221e-09, "epoch": 4.908456564880609, "percentage": 98.17, "elapsed_time": "3:21:41", "remaining_time": "0:03:45", "throughput": 19942.18, "total_tokens": 241323264} +{"current_steps": 76680, "total_steps": 78105, "loss": 0.1059, "lr": 5.075341790391486e-09, "epoch": 4.908776646821586, "percentage": 98.18, "elapsed_time": "3:21:41", "remaining_time": "0:03:44", "throughput": 19942.36, "total_tokens": 241338880} +{"current_steps": 76685, "total_steps": 78105, "loss": 0.141, "lr": 5.0398246727825785e-09, "epoch": 4.909096728762563, "percentage": 98.18, "elapsed_time": "3:21:42", "remaining_time": "0:03:44", "throughput": 19942.55, "total_tokens": 241355200} +{"current_steps": 76690, "total_steps": 78105, "loss": 0.1335, "lr": 5.004432140489346e-09, "epoch": 4.90941681070354, "percentage": 98.19, "elapsed_time": "3:21:43", "remaining_time": "0:03:43", "throughput": 19942.73, "total_tokens": 241370240} +{"current_steps": 76695, "total_steps": 78105, "loss": 0.1502, "lr": 4.969164195278431e-09, "epoch": 4.909736892644517, "percentage": 98.19, "elapsed_time": "3:21:43", "remaining_time": "0:03:42", "throughput": 19942.9, "total_tokens": 241385728} +{"current_steps": 76700, "total_steps": 78105, "loss": 0.1209, "lr": 4.934020838911757e-09, "epoch": 4.910056974585494, "percentage": 98.2, "elapsed_time": "3:21:44", "remaining_time": "0:03:41", "throughput": 19943.09, "total_tokens": 241401472} +{"current_steps": 76705, "total_steps": 78105, "loss": 0.1353, "lr": 4.8990020731434775e-09, "epoch": 4.910377056526471, "percentage": 98.21, "elapsed_time": "3:21:45", "remaining_time": "0:03:40", "throughput": 19943.29, "total_tokens": 241417792} +{"current_steps": 76710, "total_steps": 78105, "loss": 0.1508, "lr": 4.86410789972247e-09, "epoch": 4.910697138467448, "percentage": 98.21, "elapsed_time": "3:21:45", "remaining_time": "0:03:40", "throughput": 19943.56, "total_tokens": 241435840} +{"current_steps": 76715, "total_steps": 78105, "loss": 0.1295, "lr": 4.8293383203912305e-09, "epoch": 4.911017220408425, "percentage": 98.22, "elapsed_time": "3:21:46", "remaining_time": "0:03:39", "throughput": 19943.76, "total_tokens": 241451904} +{"current_steps": 76720, "total_steps": 78105, "loss": 0.1311, "lr": 4.794693336886147e-09, "epoch": 4.911337302349401, "percentage": 98.23, "elapsed_time": "3:21:47", "remaining_time": "0:03:38", "throughput": 19943.92, "total_tokens": 241467136} +{"current_steps": 76725, "total_steps": 78105, "loss": 0.1043, "lr": 4.760172950936947e-09, "epoch": 4.911657384290378, "percentage": 98.23, "elapsed_time": "3:21:47", "remaining_time": "0:03:37", "throughput": 19944.09, "total_tokens": 241482432} +{"current_steps": 76730, "total_steps": 78105, "loss": 0.189, "lr": 4.725777164267531e-09, "epoch": 4.911977466231355, "percentage": 98.24, "elapsed_time": "3:21:48", "remaining_time": "0:03:36", "throughput": 19944.26, "total_tokens": 241497728} +{"current_steps": 76735, "total_steps": 78105, "loss": 0.125, "lr": 4.691505978595135e-09, "epoch": 4.912297548172332, "percentage": 98.25, "elapsed_time": "3:21:49", "remaining_time": "0:03:36", "throughput": 19944.42, "total_tokens": 241513024} +{"current_steps": 76740, "total_steps": 78105, "loss": 0.1549, "lr": 4.657359395631722e-09, "epoch": 4.912617630113309, "percentage": 98.25, "elapsed_time": "3:21:49", "remaining_time": "0:03:35", "throughput": 19944.61, "total_tokens": 241528384} +{"current_steps": 76745, "total_steps": 78105, "loss": 0.1177, "lr": 4.623337417081764e-09, "epoch": 4.912937712054286, "percentage": 98.26, "elapsed_time": "3:21:50", "remaining_time": "0:03:34", "throughput": 19944.78, "total_tokens": 241543360} +{"current_steps": 76750, "total_steps": 78105, "loss": 0.1099, "lr": 4.589440044644456e-09, "epoch": 4.913257793995263, "percentage": 98.27, "elapsed_time": "3:21:51", "remaining_time": "0:03:33", "throughput": 19944.98, "total_tokens": 241559744} +{"current_steps": 76755, "total_steps": 78105, "loss": 0.1189, "lr": 4.555667280012332e-09, "epoch": 4.913577875936239, "percentage": 98.27, "elapsed_time": "3:21:52", "remaining_time": "0:03:33", "throughput": 19945.19, "total_tokens": 241576448} +{"current_steps": 76760, "total_steps": 78105, "loss": 0.1318, "lr": 4.522019124871824e-09, "epoch": 4.913897957877216, "percentage": 98.28, "elapsed_time": "3:21:52", "remaining_time": "0:03:32", "throughput": 19945.35, "total_tokens": 241591744} +{"current_steps": 76765, "total_steps": 78105, "loss": 0.1418, "lr": 4.488495580903529e-09, "epoch": 4.914218039818193, "percentage": 98.28, "elapsed_time": "3:21:53", "remaining_time": "0:03:31", "throughput": 19945.52, "total_tokens": 241606976} +{"current_steps": 76770, "total_steps": 78105, "loss": 0.1476, "lr": 4.455096649780832e-09, "epoch": 4.91453812175917, "percentage": 98.29, "elapsed_time": "3:21:54", "remaining_time": "0:03:30", "throughput": 19945.75, "total_tokens": 241624064} +{"current_steps": 76775, "total_steps": 78105, "loss": 0.1563, "lr": 4.421822333171844e-09, "epoch": 4.914858203700147, "percentage": 98.3, "elapsed_time": "3:21:54", "remaining_time": "0:03:29", "throughput": 19945.93, "total_tokens": 241639552} +{"current_steps": 76780, "total_steps": 78105, "loss": 0.1066, "lr": 4.388672632738289e-09, "epoch": 4.915178285641124, "percentage": 98.3, "elapsed_time": "3:21:55", "remaining_time": "0:03:29", "throughput": 19946.1, "total_tokens": 241654848} +{"current_steps": 76785, "total_steps": 78105, "loss": 0.1624, "lr": 4.355647550134956e-09, "epoch": 4.915498367582101, "percentage": 98.31, "elapsed_time": "3:21:56", "remaining_time": "0:03:28", "throughput": 19946.27, "total_tokens": 241670080} +{"current_steps": 76790, "total_steps": 78105, "loss": 0.1416, "lr": 4.322747087011358e-09, "epoch": 4.915818449523078, "percentage": 98.32, "elapsed_time": "3:21:56", "remaining_time": "0:03:27", "throughput": 19946.44, "total_tokens": 241685312} +{"current_steps": 76795, "total_steps": 78105, "loss": 0.1207, "lr": 4.2899712450103496e-09, "epoch": 4.916138531464055, "percentage": 98.32, "elapsed_time": "3:21:57", "remaining_time": "0:03:26", "throughput": 19946.59, "total_tokens": 241699968} +{"current_steps": 76800, "total_steps": 78105, "loss": 0.1647, "lr": 4.2573200257683966e-09, "epoch": 4.916458613405032, "percentage": 98.33, "elapsed_time": "3:21:58", "remaining_time": "0:03:25", "throughput": 19946.77, "total_tokens": 241715776} +{"current_steps": 76805, "total_steps": 78105, "loss": 0.1806, "lr": 4.224793430915863e-09, "epoch": 4.916778695346008, "percentage": 98.34, "elapsed_time": "3:21:58", "remaining_time": "0:03:25", "throughput": 19946.89, "total_tokens": 241729792} +{"current_steps": 76810, "total_steps": 78105, "loss": 0.1019, "lr": 4.192391462077283e-09, "epoch": 4.917098777286985, "percentage": 98.34, "elapsed_time": "3:21:59", "remaining_time": "0:03:24", "throughput": 19947.05, "total_tokens": 241744704} +{"current_steps": 76815, "total_steps": 78105, "loss": 0.125, "lr": 4.160114120870528e-09, "epoch": 4.917418859227962, "percentage": 98.35, "elapsed_time": "3:22:00", "remaining_time": "0:03:23", "throughput": 19947.23, "total_tokens": 241760512} +{"current_steps": 76820, "total_steps": 78105, "loss": 0.1156, "lr": 4.127961408907366e-09, "epoch": 4.917738941168939, "percentage": 98.35, "elapsed_time": "3:22:00", "remaining_time": "0:03:22", "throughput": 19947.43, "total_tokens": 241776512} +{"current_steps": 76825, "total_steps": 78105, "loss": 0.1726, "lr": 4.095933327793178e-09, "epoch": 4.918059023109916, "percentage": 98.36, "elapsed_time": "3:22:01", "remaining_time": "0:03:21", "throughput": 19947.58, "total_tokens": 241791360} +{"current_steps": 76830, "total_steps": 78105, "loss": 0.1392, "lr": 4.064029879127518e-09, "epoch": 4.918379105050893, "percentage": 98.37, "elapsed_time": "3:22:01", "remaining_time": "0:03:21", "throughput": 19947.74, "total_tokens": 241806272} +{"current_steps": 76835, "total_steps": 78105, "loss": 0.1218, "lr": 4.0322510645032786e-09, "epoch": 4.91869918699187, "percentage": 98.37, "elapsed_time": "3:22:02", "remaining_time": "0:03:20", "throughput": 19947.96, "total_tokens": 241823040} +{"current_steps": 76840, "total_steps": 78105, "loss": 0.098, "lr": 4.000596885507524e-09, "epoch": 4.919019268932847, "percentage": 98.38, "elapsed_time": "3:22:03", "remaining_time": "0:03:19", "throughput": 19948.12, "total_tokens": 241837824} +{"current_steps": 76845, "total_steps": 78105, "loss": 0.108, "lr": 3.969067343720379e-09, "epoch": 4.919339350873823, "percentage": 98.39, "elapsed_time": "3:22:04", "remaining_time": "0:03:18", "throughput": 19948.29, "total_tokens": 241853120} +{"current_steps": 76850, "total_steps": 78105, "loss": 0.0886, "lr": 3.9376624407169716e-09, "epoch": 4.9196594328148, "percentage": 98.39, "elapsed_time": "3:22:04", "remaining_time": "0:03:18", "throughput": 19948.48, "total_tokens": 241868928} +{"current_steps": 76855, "total_steps": 78105, "loss": 0.1296, "lr": 3.9063821780652156e-09, "epoch": 4.919979514755777, "percentage": 98.4, "elapsed_time": "3:22:05", "remaining_time": "0:03:17", "throughput": 19948.66, "total_tokens": 241884544} +{"current_steps": 76860, "total_steps": 78105, "loss": 0.1034, "lr": 3.875226557326916e-09, "epoch": 4.920299596696754, "percentage": 98.41, "elapsed_time": "3:22:06", "remaining_time": "0:03:16", "throughput": 19948.81, "total_tokens": 241899392} +{"current_steps": 76865, "total_steps": 78105, "loss": 0.1351, "lr": 3.8441955800580525e-09, "epoch": 4.920619678637731, "percentage": 98.41, "elapsed_time": "3:22:06", "remaining_time": "0:03:15", "throughput": 19948.99, "total_tokens": 241915072} +{"current_steps": 76870, "total_steps": 78105, "loss": 0.1709, "lr": 3.813289247807939e-09, "epoch": 4.920939760578708, "percentage": 98.42, "elapsed_time": "3:22:07", "remaining_time": "0:03:14", "throughput": 19949.2, "total_tokens": 241931840} +{"current_steps": 76875, "total_steps": 78105, "loss": 0.1152, "lr": 3.782507562120341e-09, "epoch": 4.921259842519685, "percentage": 98.43, "elapsed_time": "3:22:08", "remaining_time": "0:03:14", "throughput": 19949.41, "total_tokens": 241948224} +{"current_steps": 76880, "total_steps": 78105, "loss": 0.114, "lr": 3.751850524531531e-09, "epoch": 4.921579924460662, "percentage": 98.43, "elapsed_time": "3:22:08", "remaining_time": "0:03:13", "throughput": 19949.57, "total_tokens": 241963264} +{"current_steps": 76885, "total_steps": 78105, "loss": 0.1146, "lr": 3.72131813657306e-09, "epoch": 4.921900006401639, "percentage": 98.44, "elapsed_time": "3:22:09", "remaining_time": "0:03:12", "throughput": 19949.76, "total_tokens": 241979264} +{"current_steps": 76890, "total_steps": 78105, "loss": 0.1478, "lr": 3.690910399769265e-09, "epoch": 4.922220088342616, "percentage": 98.44, "elapsed_time": "3:22:10", "remaining_time": "0:03:11", "throughput": 19949.93, "total_tokens": 241994624} +{"current_steps": 76895, "total_steps": 78105, "loss": 0.1463, "lr": 3.660627315638654e-09, "epoch": 4.922540170283592, "percentage": 98.45, "elapsed_time": "3:22:10", "remaining_time": "0:03:10", "throughput": 19950.12, "total_tokens": 242010368} +{"current_steps": 76900, "total_steps": 78105, "loss": 0.1097, "lr": 3.63046888569335e-09, "epoch": 4.922860252224569, "percentage": 98.46, "elapsed_time": "3:22:11", "remaining_time": "0:03:10", "throughput": 19950.26, "total_tokens": 242024960} +{"current_steps": 76905, "total_steps": 78105, "loss": 0.1899, "lr": 3.600435111439371e-09, "epoch": 4.923180334165546, "percentage": 98.46, "elapsed_time": "3:22:12", "remaining_time": "0:03:09", "throughput": 19950.46, "total_tokens": 242041152} +{"current_steps": 76910, "total_steps": 78105, "loss": 0.1325, "lr": 3.57052599437635e-09, "epoch": 4.923500416106523, "percentage": 98.47, "elapsed_time": "3:22:12", "remaining_time": "0:03:08", "throughput": 19950.65, "total_tokens": 242057024} +{"current_steps": 76915, "total_steps": 78105, "loss": 0.1525, "lr": 3.540741535997816e-09, "epoch": 4.9238204980475, "percentage": 98.48, "elapsed_time": "3:22:13", "remaining_time": "0:03:07", "throughput": 19950.79, "total_tokens": 242071680} +{"current_steps": 76920, "total_steps": 78105, "loss": 0.1029, "lr": 3.5110817377909113e-09, "epoch": 4.924140579988477, "percentage": 98.48, "elapsed_time": "3:22:14", "remaining_time": "0:03:06", "throughput": 19950.95, "total_tokens": 242086400} +{"current_steps": 76925, "total_steps": 78105, "loss": 0.1437, "lr": 3.481546601237229e-09, "epoch": 4.924460661929454, "percentage": 98.49, "elapsed_time": "3:22:14", "remaining_time": "0:03:06", "throughput": 19951.18, "total_tokens": 242103360} +{"current_steps": 76930, "total_steps": 78105, "loss": 0.1526, "lr": 3.4521361278108677e-09, "epoch": 4.924780743870431, "percentage": 98.5, "elapsed_time": "3:22:15", "remaining_time": "0:03:05", "throughput": 19951.34, "total_tokens": 242118720} +{"current_steps": 76935, "total_steps": 78105, "loss": 0.1564, "lr": 3.4228503189809302e-09, "epoch": 4.925100825811407, "percentage": 98.5, "elapsed_time": "3:22:16", "remaining_time": "0:03:04", "throughput": 19951.6, "total_tokens": 242136320} +{"current_steps": 76940, "total_steps": 78105, "loss": 0.1275, "lr": 3.3936891762095803e-09, "epoch": 4.925420907752384, "percentage": 98.51, "elapsed_time": "3:22:16", "remaining_time": "0:03:03", "throughput": 19951.86, "total_tokens": 242154048} +{"current_steps": 76945, "total_steps": 78105, "loss": 0.1692, "lr": 3.3646527009531528e-09, "epoch": 4.925740989693361, "percentage": 98.51, "elapsed_time": "3:22:17", "remaining_time": "0:03:02", "throughput": 19952.07, "total_tokens": 242170560} +{"current_steps": 76950, "total_steps": 78105, "loss": 0.1088, "lr": 3.335740894661321e-09, "epoch": 4.926061071634338, "percentage": 98.52, "elapsed_time": "3:22:18", "remaining_time": "0:03:02", "throughput": 19952.23, "total_tokens": 242185472} +{"current_steps": 76955, "total_steps": 78105, "loss": 0.1553, "lr": 3.3069537587782086e-09, "epoch": 4.926381153575315, "percentage": 98.53, "elapsed_time": "3:22:18", "remaining_time": "0:03:01", "throughput": 19952.43, "total_tokens": 242201792} +{"current_steps": 76960, "total_steps": 78105, "loss": 0.125, "lr": 3.278291294740721e-09, "epoch": 4.926701235516292, "percentage": 98.53, "elapsed_time": "3:22:19", "remaining_time": "0:03:00", "throughput": 19952.56, "total_tokens": 242215872} +{"current_steps": 76965, "total_steps": 78105, "loss": 0.1315, "lr": 3.2497535039804905e-09, "epoch": 4.927021317457269, "percentage": 98.54, "elapsed_time": "3:22:20", "remaining_time": "0:02:59", "throughput": 19952.74, "total_tokens": 242231232} +{"current_steps": 76970, "total_steps": 78105, "loss": 0.0974, "lr": 3.221340387922489e-09, "epoch": 4.927341399398246, "percentage": 98.55, "elapsed_time": "3:22:20", "remaining_time": "0:02:59", "throughput": 19952.91, "total_tokens": 242246528} +{"current_steps": 76975, "total_steps": 78105, "loss": 0.1575, "lr": 3.1930519479855816e-09, "epoch": 4.927661481339223, "percentage": 98.55, "elapsed_time": "3:22:21", "remaining_time": "0:02:58", "throughput": 19953.08, "total_tokens": 242261824} +{"current_steps": 76980, "total_steps": 78105, "loss": 0.1637, "lr": 3.164888185582249e-09, "epoch": 4.9279815632802, "percentage": 98.56, "elapsed_time": "3:22:22", "remaining_time": "0:02:57", "throughput": 19953.31, "total_tokens": 242278592} +{"current_steps": 76985, "total_steps": 78105, "loss": 0.1099, "lr": 3.1368491021188663e-09, "epoch": 4.928301645221176, "percentage": 98.57, "elapsed_time": "3:22:22", "remaining_time": "0:02:56", "throughput": 19953.48, "total_tokens": 242294272} +{"current_steps": 76990, "total_steps": 78105, "loss": 0.1196, "lr": 3.108934698995425e-09, "epoch": 4.928621727162153, "percentage": 98.57, "elapsed_time": "3:22:23", "remaining_time": "0:02:55", "throughput": 19953.65, "total_tokens": 242309248} +{"current_steps": 76995, "total_steps": 78105, "loss": 0.1361, "lr": 3.0811449776060874e-09, "epoch": 4.92894180910313, "percentage": 98.58, "elapsed_time": "3:22:24", "remaining_time": "0:02:55", "throughput": 19953.81, "total_tokens": 242324608} +{"current_steps": 77000, "total_steps": 78105, "loss": 0.0993, "lr": 3.053479939338355e-09, "epoch": 4.929261891044107, "percentage": 98.59, "elapsed_time": "3:22:24", "remaining_time": "0:02:54", "throughput": 19954.01, "total_tokens": 242340288} +{"current_steps": 77005, "total_steps": 78105, "loss": 0.1388, "lr": 3.0259395855739005e-09, "epoch": 4.929581972985084, "percentage": 98.59, "elapsed_time": "3:22:25", "remaining_time": "0:02:53", "throughput": 19954.19, "total_tokens": 242356288} +{"current_steps": 77010, "total_steps": 78105, "loss": 0.1588, "lr": 2.998523917687457e-09, "epoch": 4.929902054926061, "percentage": 98.6, "elapsed_time": "3:22:26", "remaining_time": "0:02:52", "throughput": 19954.39, "total_tokens": 242372160} +{"current_steps": 77015, "total_steps": 78105, "loss": 0.1585, "lr": 2.971232937048485e-09, "epoch": 4.930222136867038, "percentage": 98.6, "elapsed_time": "3:22:26", "remaining_time": "0:02:51", "throughput": 19954.56, "total_tokens": 242387712} +{"current_steps": 77020, "total_steps": 78105, "loss": 0.128, "lr": 2.9440666450195053e-09, "epoch": 4.930542218808014, "percentage": 98.61, "elapsed_time": "3:22:27", "remaining_time": "0:02:51", "throughput": 19954.72, "total_tokens": 242402624} +{"current_steps": 77025, "total_steps": 78105, "loss": 0.1348, "lr": 2.9170250429572113e-09, "epoch": 4.930862300748991, "percentage": 98.62, "elapsed_time": "3:22:28", "remaining_time": "0:02:50", "throughput": 19954.96, "total_tokens": 242419776} +{"current_steps": 77030, "total_steps": 78105, "loss": 0.1029, "lr": 2.8901081322119105e-09, "epoch": 4.931182382689968, "percentage": 98.62, "elapsed_time": "3:22:29", "remaining_time": "0:02:49", "throughput": 19955.16, "total_tokens": 242435840} +{"current_steps": 77035, "total_steps": 78105, "loss": 0.1312, "lr": 2.8633159141275293e-09, "epoch": 4.931502464630945, "percentage": 98.63, "elapsed_time": "3:22:29", "remaining_time": "0:02:48", "throughput": 19955.33, "total_tokens": 242451520} +{"current_steps": 77040, "total_steps": 78105, "loss": 0.1471, "lr": 2.836648390042163e-09, "epoch": 4.931822546571922, "percentage": 98.64, "elapsed_time": "3:22:30", "remaining_time": "0:02:47", "throughput": 19955.54, "total_tokens": 242467840} +{"current_steps": 77045, "total_steps": 78105, "loss": 0.1564, "lr": 2.8101055612872465e-09, "epoch": 4.932142628512899, "percentage": 98.64, "elapsed_time": "3:22:31", "remaining_time": "0:02:47", "throughput": 19955.73, "total_tokens": 242483840} +{"current_steps": 77050, "total_steps": 78105, "loss": 0.1553, "lr": 2.783687429188109e-09, "epoch": 4.932462710453876, "percentage": 98.65, "elapsed_time": "3:22:31", "remaining_time": "0:02:46", "throughput": 19955.97, "total_tokens": 242501184} +{"current_steps": 77055, "total_steps": 78105, "loss": 0.1446, "lr": 2.7573939950642503e-09, "epoch": 4.932782792394853, "percentage": 98.66, "elapsed_time": "3:22:32", "remaining_time": "0:02:45", "throughput": 19956.15, "total_tokens": 242516800} +{"current_steps": 77060, "total_steps": 78105, "loss": 0.1238, "lr": 2.7312252602285093e-09, "epoch": 4.93310287433583, "percentage": 98.66, "elapsed_time": "3:22:33", "remaining_time": "0:02:44", "throughput": 19956.31, "total_tokens": 242532096} +{"current_steps": 77065, "total_steps": 78105, "loss": 0.1247, "lr": 2.705181225987619e-09, "epoch": 4.933422956276807, "percentage": 98.67, "elapsed_time": "3:22:33", "remaining_time": "0:02:44", "throughput": 19956.51, "total_tokens": 242548416} +{"current_steps": 77070, "total_steps": 78105, "loss": 0.1417, "lr": 2.679261893641927e-09, "epoch": 4.933743038217783, "percentage": 98.67, "elapsed_time": "3:22:34", "remaining_time": "0:02:43", "throughput": 19956.68, "total_tokens": 242563776} +{"current_steps": 77075, "total_steps": 78105, "loss": 0.1207, "lr": 2.6534672644859536e-09, "epoch": 4.93406312015876, "percentage": 98.68, "elapsed_time": "3:22:35", "remaining_time": "0:02:42", "throughput": 19956.84, "total_tokens": 242579008} +{"current_steps": 77080, "total_steps": 78105, "loss": 0.1284, "lr": 2.6277973398075586e-09, "epoch": 4.934383202099737, "percentage": 98.69, "elapsed_time": "3:22:35", "remaining_time": "0:02:41", "throughput": 19957.0, "total_tokens": 242593856} +{"current_steps": 77085, "total_steps": 78105, "loss": 0.1056, "lr": 2.602252120888493e-09, "epoch": 4.934703284040714, "percentage": 98.69, "elapsed_time": "3:22:36", "remaining_time": "0:02:40", "throughput": 19957.19, "total_tokens": 242609664} +{"current_steps": 77090, "total_steps": 78105, "loss": 0.1059, "lr": 2.5768316090046817e-09, "epoch": 4.935023365981691, "percentage": 98.7, "elapsed_time": "3:22:37", "remaining_time": "0:02:40", "throughput": 19957.36, "total_tokens": 242625152} +{"current_steps": 77095, "total_steps": 78105, "loss": 0.1323, "lr": 2.5515358054251095e-09, "epoch": 4.935343447922668, "percentage": 98.71, "elapsed_time": "3:22:37", "remaining_time": "0:02:39", "throughput": 19957.51, "total_tokens": 242639872} +{"current_steps": 77100, "total_steps": 78105, "loss": 0.097, "lr": 2.5263647114132096e-09, "epoch": 4.935663529863645, "percentage": 98.71, "elapsed_time": "3:22:38", "remaining_time": "0:02:38", "throughput": 19957.7, "total_tokens": 242655936} +{"current_steps": 77105, "total_steps": 78105, "loss": 0.1782, "lr": 2.501318328226032e-09, "epoch": 4.935983611804622, "percentage": 98.72, "elapsed_time": "3:22:39", "remaining_time": "0:02:37", "throughput": 19957.89, "total_tokens": 242671424} +{"current_steps": 77110, "total_steps": 78105, "loss": 0.1143, "lr": 2.4763966571136888e-09, "epoch": 4.936303693745598, "percentage": 98.73, "elapsed_time": "3:22:39", "remaining_time": "0:02:36", "throughput": 19958.06, "total_tokens": 242686784} +{"current_steps": 77115, "total_steps": 78105, "loss": 0.1169, "lr": 2.4515996993210167e-09, "epoch": 4.936623775686575, "percentage": 98.73, "elapsed_time": "3:22:40", "remaining_time": "0:02:36", "throughput": 19958.23, "total_tokens": 242702208} +{"current_steps": 77120, "total_steps": 78105, "loss": 0.1495, "lr": 2.4269274560861923e-09, "epoch": 4.936943857627552, "percentage": 98.74, "elapsed_time": "3:22:41", "remaining_time": "0:02:35", "throughput": 19958.39, "total_tokens": 242717056} +{"current_steps": 77125, "total_steps": 78105, "loss": 0.0898, "lr": 2.402379928641285e-09, "epoch": 4.937263939568529, "percentage": 98.75, "elapsed_time": "3:22:41", "remaining_time": "0:02:34", "throughput": 19958.58, "total_tokens": 242732992} +{"current_steps": 77130, "total_steps": 78105, "loss": 0.1133, "lr": 2.377957118211982e-09, "epoch": 4.937584021509506, "percentage": 98.75, "elapsed_time": "3:22:42", "remaining_time": "0:02:33", "throughput": 19958.75, "total_tokens": 242748288} +{"current_steps": 77135, "total_steps": 78105, "loss": 0.1387, "lr": 2.3536590260181402e-09, "epoch": 4.937904103450483, "percentage": 98.76, "elapsed_time": "3:22:43", "remaining_time": "0:02:32", "throughput": 19958.91, "total_tokens": 242763264} +{"current_steps": 77140, "total_steps": 78105, "loss": 0.13, "lr": 2.3294856532724008e-09, "epoch": 4.93822418539146, "percentage": 98.76, "elapsed_time": "3:22:43", "remaining_time": "0:02:32", "throughput": 19959.09, "total_tokens": 242778560} +{"current_steps": 77145, "total_steps": 78105, "loss": 0.1385, "lr": 2.305437001182409e-09, "epoch": 4.938544267332437, "percentage": 98.77, "elapsed_time": "3:22:44", "remaining_time": "0:02:31", "throughput": 19959.26, "total_tokens": 242793920} +{"current_steps": 77150, "total_steps": 78105, "loss": 0.1293, "lr": 2.281513070948871e-09, "epoch": 4.938864349273414, "percentage": 98.78, "elapsed_time": "3:22:45", "remaining_time": "0:02:30", "throughput": 19959.46, "total_tokens": 242810176} +{"current_steps": 77155, "total_steps": 78105, "loss": 0.0911, "lr": 2.2577138637663865e-09, "epoch": 4.939184431214391, "percentage": 98.78, "elapsed_time": "3:22:45", "remaining_time": "0:02:29", "throughput": 19959.67, "total_tokens": 242826432} +{"current_steps": 77160, "total_steps": 78105, "loss": 0.1056, "lr": 2.2340393808234496e-09, "epoch": 4.939504513155367, "percentage": 98.79, "elapsed_time": "3:22:46", "remaining_time": "0:02:29", "throughput": 19959.82, "total_tokens": 242841216} +{"current_steps": 77165, "total_steps": 78105, "loss": 0.1069, "lr": 2.2104896233021698e-09, "epoch": 4.939824595096344, "percentage": 98.8, "elapsed_time": "3:22:47", "remaining_time": "0:02:28", "throughput": 19959.96, "total_tokens": 242856000} +{"current_steps": 77170, "total_steps": 78105, "loss": 0.1748, "lr": 2.1870645923788292e-09, "epoch": 4.940144677037321, "percentage": 98.8, "elapsed_time": "3:22:47", "remaining_time": "0:02:27", "throughput": 19960.15, "total_tokens": 242871872} +{"current_steps": 77175, "total_steps": 78105, "loss": 0.1392, "lr": 2.163764289222492e-09, "epoch": 4.940464758978298, "percentage": 98.81, "elapsed_time": "3:22:48", "remaining_time": "0:02:26", "throughput": 19960.42, "total_tokens": 242890048} +{"current_steps": 77180, "total_steps": 78105, "loss": 0.1054, "lr": 2.140588714997227e-09, "epoch": 4.940784840919275, "percentage": 98.82, "elapsed_time": "3:22:49", "remaining_time": "0:02:25", "throughput": 19960.59, "total_tokens": 242905600} +{"current_steps": 77185, "total_steps": 78105, "loss": 0.2571, "lr": 2.117537870859887e-09, "epoch": 4.941104922860252, "percentage": 98.82, "elapsed_time": "3:22:49", "remaining_time": "0:02:25", "throughput": 19960.78, "total_tokens": 242921472} +{"current_steps": 77190, "total_steps": 78105, "loss": 0.1923, "lr": 2.0946117579617733e-09, "epoch": 4.941425004801229, "percentage": 98.83, "elapsed_time": "3:22:50", "remaining_time": "0:02:24", "throughput": 19960.97, "total_tokens": 242937088} +{"current_steps": 77195, "total_steps": 78105, "loss": 0.1124, "lr": 2.071810377447525e-09, "epoch": 4.941745086742206, "percentage": 98.83, "elapsed_time": "3:22:51", "remaining_time": "0:02:23", "throughput": 19961.19, "total_tokens": 242953728} +{"current_steps": 77200, "total_steps": 78105, "loss": 0.0859, "lr": 2.0491337304559546e-09, "epoch": 4.9420651686831825, "percentage": 98.84, "elapsed_time": "3:22:51", "remaining_time": "0:02:22", "throughput": 19961.4, "total_tokens": 242970048} +{"current_steps": 77205, "total_steps": 78105, "loss": 0.173, "lr": 2.0265818181194885e-09, "epoch": 4.9423852506241595, "percentage": 98.85, "elapsed_time": "3:22:52", "remaining_time": "0:02:21", "throughput": 19961.6, "total_tokens": 242986624} +{"current_steps": 77210, "total_steps": 78105, "loss": 0.1756, "lr": 2.004154641563616e-09, "epoch": 4.9427053325651364, "percentage": 98.85, "elapsed_time": "3:22:53", "remaining_time": "0:02:21", "throughput": 19961.76, "total_tokens": 243001536} +{"current_steps": 77215, "total_steps": 78105, "loss": 0.1873, "lr": 1.981852201909107e-09, "epoch": 4.943025414506113, "percentage": 98.86, "elapsed_time": "3:22:54", "remaining_time": "0:02:20", "throughput": 19961.99, "total_tokens": 243018432} +{"current_steps": 77220, "total_steps": 78105, "loss": 0.1458, "lr": 1.9596745002689597e-09, "epoch": 4.94334549644709, "percentage": 98.87, "elapsed_time": "3:22:54", "remaining_time": "0:02:19", "throughput": 19962.18, "total_tokens": 243033920} +{"current_steps": 77225, "total_steps": 78105, "loss": 0.1194, "lr": 1.9376215377509e-09, "epoch": 4.943665578388067, "percentage": 98.87, "elapsed_time": "3:22:55", "remaining_time": "0:02:18", "throughput": 19962.34, "total_tokens": 243048896} +{"current_steps": 77230, "total_steps": 78105, "loss": 0.1452, "lr": 1.9156933154559908e-09, "epoch": 4.943985660329044, "percentage": 98.88, "elapsed_time": "3:22:56", "remaining_time": "0:02:17", "throughput": 19962.51, "total_tokens": 243064576} +{"current_steps": 77235, "total_steps": 78105, "loss": 0.1342, "lr": 1.893889834479468e-09, "epoch": 4.944305742270021, "percentage": 98.89, "elapsed_time": "3:22:56", "remaining_time": "0:02:17", "throughput": 19962.69, "total_tokens": 243080320} +{"current_steps": 77240, "total_steps": 78105, "loss": 0.1674, "lr": 1.8722110959099037e-09, "epoch": 4.944625824210998, "percentage": 98.89, "elapsed_time": "3:22:57", "remaining_time": "0:02:16", "throughput": 19962.86, "total_tokens": 243095744} +{"current_steps": 77245, "total_steps": 78105, "loss": 0.1205, "lr": 1.850657100829767e-09, "epoch": 4.944945906151975, "percentage": 98.9, "elapsed_time": "3:22:58", "remaining_time": "0:02:15", "throughput": 19963.08, "total_tokens": 243112320} +{"current_steps": 77250, "total_steps": 78105, "loss": 0.1471, "lr": 1.8292278503156958e-09, "epoch": 4.9452659880929515, "percentage": 98.91, "elapsed_time": "3:22:58", "remaining_time": "0:02:14", "throughput": 19963.23, "total_tokens": 243127168} +{"current_steps": 77255, "total_steps": 78105, "loss": 0.1377, "lr": 1.8079233454371126e-09, "epoch": 4.9455860700339285, "percentage": 98.91, "elapsed_time": "3:22:59", "remaining_time": "0:02:14", "throughput": 19963.42, "total_tokens": 243142848} +{"current_steps": 77260, "total_steps": 78105, "loss": 0.0792, "lr": 1.7867435872584437e-09, "epoch": 4.9459061519749055, "percentage": 98.92, "elapsed_time": "3:23:00", "remaining_time": "0:02:13", "throughput": 19963.63, "total_tokens": 243159552} +{"current_steps": 77265, "total_steps": 78105, "loss": 0.1807, "lr": 1.7656885768368992e-09, "epoch": 4.9462262339158825, "percentage": 98.92, "elapsed_time": "3:23:00", "remaining_time": "0:02:12", "throughput": 19963.81, "total_tokens": 243175040} +{"current_steps": 77270, "total_steps": 78105, "loss": 0.1333, "lr": 1.7447583152244153e-09, "epoch": 4.946546315856859, "percentage": 98.93, "elapsed_time": "3:23:01", "remaining_time": "0:02:11", "throughput": 19963.99, "total_tokens": 243190912} +{"current_steps": 77275, "total_steps": 78105, "loss": 0.1126, "lr": 1.723952803465434e-09, "epoch": 4.946866397797836, "percentage": 98.94, "elapsed_time": "3:23:02", "remaining_time": "0:02:10", "throughput": 19964.15, "total_tokens": 243205952} +{"current_steps": 77280, "total_steps": 78105, "loss": 0.1722, "lr": 1.703272042599402e-09, "epoch": 4.947186479738813, "percentage": 98.94, "elapsed_time": "3:23:02", "remaining_time": "0:02:10", "throughput": 19964.31, "total_tokens": 243220928} +{"current_steps": 77285, "total_steps": 78105, "loss": 0.1612, "lr": 1.682716033658549e-09, "epoch": 4.9475065616797895, "percentage": 98.95, "elapsed_time": "3:23:03", "remaining_time": "0:02:09", "throughput": 19964.52, "total_tokens": 243237824} +{"current_steps": 77290, "total_steps": 78105, "loss": 0.1464, "lr": 1.6622847776698314e-09, "epoch": 4.9478266436207665, "percentage": 98.96, "elapsed_time": "3:23:04", "remaining_time": "0:02:08", "throughput": 19964.68, "total_tokens": 243252928} +{"current_steps": 77295, "total_steps": 78105, "loss": 0.1613, "lr": 1.641978275652989e-09, "epoch": 4.9481467255617435, "percentage": 98.96, "elapsed_time": "3:23:04", "remaining_time": "0:02:07", "throughput": 19964.91, "total_tokens": 243270144} +{"current_steps": 77300, "total_steps": 78105, "loss": 0.1337, "lr": 1.621796528622488e-09, "epoch": 4.9484668075027205, "percentage": 98.97, "elapsed_time": "3:23:05", "remaining_time": "0:02:06", "throughput": 19965.1, "total_tokens": 243286016} +{"current_steps": 77305, "total_steps": 78105, "loss": 0.1124, "lr": 1.6017395375858557e-09, "epoch": 4.9487868894436975, "percentage": 98.98, "elapsed_time": "3:23:06", "remaining_time": "0:02:06", "throughput": 19965.27, "total_tokens": 243301248} +{"current_steps": 77310, "total_steps": 78105, "loss": 0.1515, "lr": 1.5818073035447912e-09, "epoch": 4.9491069713846745, "percentage": 98.98, "elapsed_time": "3:23:06", "remaining_time": "0:02:05", "throughput": 19965.44, "total_tokens": 243316800} +{"current_steps": 77315, "total_steps": 78105, "loss": 0.1255, "lr": 1.5619998274943316e-09, "epoch": 4.9494270533256515, "percentage": 98.99, "elapsed_time": "3:23:07", "remaining_time": "0:02:04", "throughput": 19965.6, "total_tokens": 243332224} +{"current_steps": 77320, "total_steps": 78105, "loss": 0.1724, "lr": 1.5423171104236857e-09, "epoch": 4.9497471352666285, "percentage": 98.99, "elapsed_time": "3:23:08", "remaining_time": "0:02:03", "throughput": 19965.79, "total_tokens": 243348096} +{"current_steps": 77325, "total_steps": 78105, "loss": 0.1574, "lr": 1.522759153315956e-09, "epoch": 4.9500672172076055, "percentage": 99.0, "elapsed_time": "3:23:08", "remaining_time": "0:02:02", "throughput": 19965.97, "total_tokens": 243363392} +{"current_steps": 77330, "total_steps": 78105, "loss": 0.1445, "lr": 1.5033259571473058e-09, "epoch": 4.950387299148582, "percentage": 99.01, "elapsed_time": "3:23:09", "remaining_time": "0:02:02", "throughput": 19966.16, "total_tokens": 243379520} +{"current_steps": 77335, "total_steps": 78105, "loss": 0.1646, "lr": 1.484017522888348e-09, "epoch": 4.9507073810895585, "percentage": 99.01, "elapsed_time": "3:23:10", "remaining_time": "0:02:01", "throughput": 19966.32, "total_tokens": 243394368} +{"current_steps": 77340, "total_steps": 78105, "loss": 0.1214, "lr": 1.4648338515033112e-09, "epoch": 4.9510274630305355, "percentage": 99.02, "elapsed_time": "3:23:10", "remaining_time": "0:02:00", "throughput": 19966.47, "total_tokens": 243409216} +{"current_steps": 77345, "total_steps": 78105, "loss": 0.1642, "lr": 1.4457749439503178e-09, "epoch": 4.9513475449715125, "percentage": 99.03, "elapsed_time": "3:23:11", "remaining_time": "0:01:59", "throughput": 19966.62, "total_tokens": 243424256} +{"current_steps": 77350, "total_steps": 78105, "loss": 0.1421, "lr": 1.4268408011805512e-09, "epoch": 4.9516676269124895, "percentage": 99.03, "elapsed_time": "3:23:12", "remaining_time": "0:01:59", "throughput": 19966.79, "total_tokens": 243439936} +{"current_steps": 77355, "total_steps": 78105, "loss": 0.1381, "lr": 1.4080314241399218e-09, "epoch": 4.9519877088534665, "percentage": 99.04, "elapsed_time": "3:23:12", "remaining_time": "0:01:58", "throughput": 19966.95, "total_tokens": 243454656} +{"current_steps": 77360, "total_steps": 78105, "loss": 0.1524, "lr": 1.3893468137674005e-09, "epoch": 4.9523077907944435, "percentage": 99.05, "elapsed_time": "3:23:13", "remaining_time": "0:01:57", "throughput": 19967.09, "total_tokens": 243469632} +{"current_steps": 77365, "total_steps": 78105, "loss": 0.1276, "lr": 1.3707869709961297e-09, "epoch": 4.9526278727354205, "percentage": 99.05, "elapsed_time": "3:23:14", "remaining_time": "0:01:56", "throughput": 19967.27, "total_tokens": 243485056} +{"current_steps": 77370, "total_steps": 78105, "loss": 0.152, "lr": 1.3523518967528682e-09, "epoch": 4.9529479546763975, "percentage": 99.06, "elapsed_time": "3:23:14", "remaining_time": "0:01:55", "throughput": 19967.44, "total_tokens": 243500480} +{"current_steps": 77375, "total_steps": 78105, "loss": 0.1355, "lr": 1.3340415919579908e-09, "epoch": 4.953268036617374, "percentage": 99.07, "elapsed_time": "3:23:15", "remaining_time": "0:01:55", "throughput": 19967.62, "total_tokens": 243515968} +{"current_steps": 77380, "total_steps": 78105, "loss": 0.1437, "lr": 1.3158560575263212e-09, "epoch": 4.953588118558351, "percentage": 99.07, "elapsed_time": "3:23:16", "remaining_time": "0:01:54", "throughput": 19967.79, "total_tokens": 243531136} +{"current_steps": 77385, "total_steps": 78105, "loss": 0.1236, "lr": 1.297795294365467e-09, "epoch": 4.953908200499328, "percentage": 99.08, "elapsed_time": "3:23:16", "remaining_time": "0:01:53", "throughput": 19967.95, "total_tokens": 243546176} +{"current_steps": 77390, "total_steps": 78105, "loss": 0.157, "lr": 1.2798593033774843e-09, "epoch": 4.9542282824403046, "percentage": 99.08, "elapsed_time": "3:23:17", "remaining_time": "0:01:52", "throughput": 19968.11, "total_tokens": 243561536} +{"current_steps": 77395, "total_steps": 78105, "loss": 0.0718, "lr": 1.2620480854580453e-09, "epoch": 4.9545483643812815, "percentage": 99.09, "elapsed_time": "3:23:18", "remaining_time": "0:01:51", "throughput": 19968.26, "total_tokens": 243576320} +{"current_steps": 77400, "total_steps": 78105, "loss": 0.0955, "lr": 1.2443616414964387e-09, "epoch": 4.9548684463222585, "percentage": 99.1, "elapsed_time": "3:23:18", "remaining_time": "0:01:51", "throughput": 19968.43, "total_tokens": 243591488} +{"current_steps": 77405, "total_steps": 78105, "loss": 0.1424, "lr": 1.2267999723758472e-09, "epoch": 4.9551885282632355, "percentage": 99.1, "elapsed_time": "3:23:19", "remaining_time": "0:01:50", "throughput": 19968.66, "total_tokens": 243608832} +{"current_steps": 77410, "total_steps": 78105, "loss": 0.175, "lr": 1.2093630789730692e-09, "epoch": 4.9555086102042125, "percentage": 99.11, "elapsed_time": "3:23:20", "remaining_time": "0:01:49", "throughput": 19968.86, "total_tokens": 243625536} +{"current_steps": 77415, "total_steps": 78105, "loss": 0.1116, "lr": 1.1920509621590748e-09, "epoch": 4.9558286921451895, "percentage": 99.12, "elapsed_time": "3:23:20", "remaining_time": "0:01:48", "throughput": 19968.98, "total_tokens": 243639808} +{"current_steps": 77420, "total_steps": 78105, "loss": 0.1386, "lr": 1.17486362279845e-09, "epoch": 4.9561487740861665, "percentage": 99.12, "elapsed_time": "3:23:21", "remaining_time": "0:01:47", "throughput": 19969.18, "total_tokens": 243655424} +{"current_steps": 77425, "total_steps": 78105, "loss": 0.1339, "lr": 1.1578010617488422e-09, "epoch": 4.956468856027143, "percentage": 99.13, "elapsed_time": "3:23:22", "remaining_time": "0:01:47", "throughput": 19969.34, "total_tokens": 243670848} +{"current_steps": 77430, "total_steps": 78105, "loss": 0.0594, "lr": 1.1408632798629027e-09, "epoch": 4.95678893796812, "percentage": 99.14, "elapsed_time": "3:23:22", "remaining_time": "0:01:46", "throughput": 19969.47, "total_tokens": 243685120} +{"current_steps": 77435, "total_steps": 78105, "loss": 0.1308, "lr": 1.1240502779863438e-09, "epoch": 4.957109019909097, "percentage": 99.14, "elapsed_time": "3:23:23", "remaining_time": "0:01:45", "throughput": 19969.62, "total_tokens": 243699776} +{"current_steps": 77440, "total_steps": 78105, "loss": 0.124, "lr": 1.1073620569582167e-09, "epoch": 4.957429101850074, "percentage": 99.15, "elapsed_time": "3:23:24", "remaining_time": "0:01:44", "throughput": 19969.81, "total_tokens": 243715904} +{"current_steps": 77445, "total_steps": 78105, "loss": 0.1289, "lr": 1.0907986176122988e-09, "epoch": 4.957749183791051, "percentage": 99.15, "elapsed_time": "3:23:24", "remaining_time": "0:01:44", "throughput": 19969.99, "total_tokens": 243731264} +{"current_steps": 77450, "total_steps": 78105, "loss": 0.127, "lr": 1.0743599607754285e-09, "epoch": 4.9580692657320276, "percentage": 99.16, "elapsed_time": "3:23:25", "remaining_time": "0:01:43", "throughput": 19970.17, "total_tokens": 243747392} +{"current_steps": 77455, "total_steps": 78105, "loss": 0.1257, "lr": 1.0580460872688937e-09, "epoch": 4.9583893476730045, "percentage": 99.17, "elapsed_time": "3:23:26", "remaining_time": "0:01:42", "throughput": 19970.36, "total_tokens": 243763456} +{"current_steps": 77460, "total_steps": 78105, "loss": 0.1227, "lr": 1.041856997906765e-09, "epoch": 4.9587094296139815, "percentage": 99.17, "elapsed_time": "3:23:26", "remaining_time": "0:01:41", "throughput": 19970.57, "total_tokens": 243779904} +{"current_steps": 77465, "total_steps": 78105, "loss": 0.1338, "lr": 1.0257926934978402e-09, "epoch": 4.959029511554958, "percentage": 99.18, "elapsed_time": "3:23:27", "remaining_time": "0:01:40", "throughput": 19970.74, "total_tokens": 243795648} +{"current_steps": 77470, "total_steps": 78105, "loss": 0.0907, "lr": 1.0098531748439778e-09, "epoch": 4.959349593495935, "percentage": 99.19, "elapsed_time": "3:23:28", "remaining_time": "0:01:40", "throughput": 19970.89, "total_tokens": 243810624} +{"current_steps": 77475, "total_steps": 78105, "loss": 0.1494, "lr": 9.94038442741485e-10, "epoch": 4.959669675436912, "percentage": 99.19, "elapsed_time": "3:23:28", "remaining_time": "0:01:39", "throughput": 19971.04, "total_tokens": 243825408} +{"current_steps": 77480, "total_steps": 78105, "loss": 0.1262, "lr": 9.783484979797309e-10, "epoch": 4.959989757377889, "percentage": 99.2, "elapsed_time": "3:23:29", "remaining_time": "0:01:38", "throughput": 19971.22, "total_tokens": 243841472} +{"current_steps": 77485, "total_steps": 78105, "loss": 0.0931, "lr": 9.627833413425324e-10, "epoch": 4.960309839318866, "percentage": 99.21, "elapsed_time": "3:23:30", "remaining_time": "0:01:37", "throughput": 19971.37, "total_tokens": 243856448} +{"current_steps": 77490, "total_steps": 78105, "loss": 0.1191, "lr": 9.473429736067686e-10, "epoch": 4.960629921259843, "percentage": 99.21, "elapsed_time": "3:23:30", "remaining_time": "0:01:36", "throughput": 19971.55, "total_tokens": 243872128} +{"current_steps": 77495, "total_steps": 78105, "loss": 0.1775, "lr": 9.32027395543489e-10, "epoch": 4.96095000320082, "percentage": 99.22, "elapsed_time": "3:23:31", "remaining_time": "0:01:36", "throughput": 19971.79, "total_tokens": 243889600} +{"current_steps": 77500, "total_steps": 78105, "loss": 0.1342, "lr": 9.168366079176372e-10, "epoch": 4.961270085141797, "percentage": 99.23, "elapsed_time": "3:23:32", "remaining_time": "0:01:35", "throughput": 19971.97, "total_tokens": 243905536} +{"current_steps": 77505, "total_steps": 78105, "loss": 0.1372, "lr": 9.017706114877734e-10, "epoch": 4.961590167082774, "percentage": 99.23, "elapsed_time": "3:23:33", "remaining_time": "0:01:34", "throughput": 19972.11, "total_tokens": 243920000} +{"current_steps": 77510, "total_steps": 78105, "loss": 0.1475, "lr": 8.86829407006351e-10, "epoch": 4.9619102490237506, "percentage": 99.24, "elapsed_time": "3:23:33", "remaining_time": "0:01:33", "throughput": 19972.34, "total_tokens": 243936960} +{"current_steps": 77515, "total_steps": 78105, "loss": 0.1515, "lr": 8.720129952188849e-10, "epoch": 4.962230330964727, "percentage": 99.24, "elapsed_time": "3:23:34", "remaining_time": "0:01:32", "throughput": 19972.52, "total_tokens": 243952832} +{"current_steps": 77520, "total_steps": 78105, "loss": 0.1125, "lr": 8.573213768658939e-10, "epoch": 4.962550412905704, "percentage": 99.25, "elapsed_time": "3:23:35", "remaining_time": "0:01:32", "throughput": 19972.7, "total_tokens": 243968896} +{"current_steps": 77525, "total_steps": 78105, "loss": 0.1148, "lr": 8.427545526806802e-10, "epoch": 4.962870494846681, "percentage": 99.26, "elapsed_time": "3:23:35", "remaining_time": "0:01:31", "throughput": 19972.84, "total_tokens": 243983488} +{"current_steps": 77530, "total_steps": 78105, "loss": 0.1502, "lr": 8.283125233904399e-10, "epoch": 4.963190576787658, "percentage": 99.26, "elapsed_time": "3:23:36", "remaining_time": "0:01:30", "throughput": 19973.04, "total_tokens": 243999296} +{"current_steps": 77535, "total_steps": 78105, "loss": 0.118, "lr": 8.139952897168179e-10, "epoch": 4.963510658728635, "percentage": 99.27, "elapsed_time": "3:23:37", "remaining_time": "0:01:29", "throughput": 19973.21, "total_tokens": 244014528} +{"current_steps": 77540, "total_steps": 78105, "loss": 0.1087, "lr": 7.998028523745205e-10, "epoch": 4.963830740669612, "percentage": 99.28, "elapsed_time": "3:23:37", "remaining_time": "0:01:29", "throughput": 19973.39, "total_tokens": 244029952} +{"current_steps": 77545, "total_steps": 78105, "loss": 0.1206, "lr": 7.857352120718698e-10, "epoch": 4.964150822610589, "percentage": 99.28, "elapsed_time": "3:23:38", "remaining_time": "0:01:28", "throughput": 19973.58, "total_tokens": 244045824} +{"current_steps": 77550, "total_steps": 78105, "loss": 0.1345, "lr": 7.717923695119145e-10, "epoch": 4.964470904551566, "percentage": 99.29, "elapsed_time": "3:23:39", "remaining_time": "0:01:27", "throughput": 19973.74, "total_tokens": 244060928} +{"current_steps": 77555, "total_steps": 78105, "loss": 0.1289, "lr": 7.579743253907645e-10, "epoch": 4.964790986492542, "percentage": 99.3, "elapsed_time": "3:23:39", "remaining_time": "0:01:26", "throughput": 19973.9, "total_tokens": 244075776} +{"current_steps": 77560, "total_steps": 78105, "loss": 0.181, "lr": 7.442810803981459e-10, "epoch": 4.965111068433519, "percentage": 99.3, "elapsed_time": "3:23:40", "remaining_time": "0:01:25", "throughput": 19974.13, "total_tokens": 244092928} +{"current_steps": 77565, "total_steps": 78105, "loss": 0.1588, "lr": 7.307126352179561e-10, "epoch": 4.965431150374496, "percentage": 99.31, "elapsed_time": "3:23:41", "remaining_time": "0:01:25", "throughput": 19974.29, "total_tokens": 244108288} +{"current_steps": 77570, "total_steps": 78105, "loss": 0.1545, "lr": 7.172689905277086e-10, "epoch": 4.965751232315473, "percentage": 99.32, "elapsed_time": "3:23:41", "remaining_time": "0:01:24", "throughput": 19974.44, "total_tokens": 244122944} +{"current_steps": 77575, "total_steps": 78105, "loss": 0.1665, "lr": 7.039501469988108e-10, "epoch": 4.96607131425645, "percentage": 99.32, "elapsed_time": "3:23:42", "remaining_time": "0:01:23", "throughput": 19974.63, "total_tokens": 244139008} +{"current_steps": 77580, "total_steps": 78105, "loss": 0.127, "lr": 6.907561052962863e-10, "epoch": 4.966391396197427, "percentage": 99.33, "elapsed_time": "3:23:43", "remaining_time": "0:01:22", "throughput": 19974.86, "total_tokens": 244156096} +{"current_steps": 77585, "total_steps": 78105, "loss": 0.1055, "lr": 6.776868660790525e-10, "epoch": 4.966711478138404, "percentage": 99.33, "elapsed_time": "3:23:43", "remaining_time": "0:01:21", "throughput": 19975.08, "total_tokens": 244173376} +{"current_steps": 77590, "total_steps": 78105, "loss": 0.123, "lr": 6.64742429999643e-10, "epoch": 4.967031560079381, "percentage": 99.34, "elapsed_time": "3:23:44", "remaining_time": "0:01:21", "throughput": 19975.23, "total_tokens": 244188352} +{"current_steps": 77595, "total_steps": 78105, "loss": 0.1075, "lr": 6.519227977044851e-10, "epoch": 4.967351642020358, "percentage": 99.35, "elapsed_time": "3:23:45", "remaining_time": "0:01:20", "throughput": 19975.45, "total_tokens": 244205120} +{"current_steps": 77600, "total_steps": 78105, "loss": 0.1182, "lr": 6.392279698333447e-10, "epoch": 4.967671723961334, "percentage": 99.35, "elapsed_time": "3:23:45", "remaining_time": "0:01:19", "throughput": 19975.62, "total_tokens": 244220672} +{"current_steps": 77605, "total_steps": 78105, "loss": 0.167, "lr": 6.266579470207146e-10, "epoch": 4.967991805902311, "percentage": 99.36, "elapsed_time": "3:23:46", "remaining_time": "0:01:18", "throughput": 19975.8, "total_tokens": 244236032} +{"current_steps": 77610, "total_steps": 78105, "loss": 0.1858, "lr": 6.14212729894148e-10, "epoch": 4.968311887843288, "percentage": 99.37, "elapsed_time": "3:23:47", "remaining_time": "0:01:17", "throughput": 19976.01, "total_tokens": 244252800} +{"current_steps": 77615, "total_steps": 78105, "loss": 0.1277, "lr": 6.018923190748149e-10, "epoch": 4.968631969784265, "percentage": 99.37, "elapsed_time": "3:23:47", "remaining_time": "0:01:17", "throughput": 19976.2, "total_tokens": 244268544} +{"current_steps": 77620, "total_steps": 78105, "loss": 0.121, "lr": 5.896967151780564e-10, "epoch": 4.968952051725242, "percentage": 99.38, "elapsed_time": "3:23:48", "remaining_time": "0:01:16", "throughput": 19976.35, "total_tokens": 244283264} +{"current_steps": 77625, "total_steps": 78105, "loss": 0.1355, "lr": 5.776259188131073e-10, "epoch": 4.969272133666219, "percentage": 99.39, "elapsed_time": "3:23:49", "remaining_time": "0:01:15", "throughput": 19976.55, "total_tokens": 244299712} +{"current_steps": 77630, "total_steps": 78105, "loss": 0.1311, "lr": 5.656799305822636e-10, "epoch": 4.969592215607196, "percentage": 99.39, "elapsed_time": "3:23:49", "remaining_time": "0:01:14", "throughput": 19976.71, "total_tokens": 244314816} +{"current_steps": 77635, "total_steps": 78105, "loss": 0.1564, "lr": 5.538587510822702e-10, "epoch": 4.969912297548173, "percentage": 99.4, "elapsed_time": "3:23:50", "remaining_time": "0:01:14", "throughput": 19976.89, "total_tokens": 244330496} +{"current_steps": 77640, "total_steps": 78105, "loss": 0.1397, "lr": 5.421623809034882e-10, "epoch": 4.970232379489149, "percentage": 99.4, "elapsed_time": "3:23:51", "remaining_time": "0:01:13", "throughput": 19977.04, "total_tokens": 244345728} +{"current_steps": 77645, "total_steps": 78105, "loss": 0.0867, "lr": 5.305908206298949e-10, "epoch": 4.970552461430126, "percentage": 99.41, "elapsed_time": "3:23:52", "remaining_time": "0:01:12", "throughput": 19977.22, "total_tokens": 244361728} +{"current_steps": 77650, "total_steps": 78105, "loss": 0.1088, "lr": 5.191440708393613e-10, "epoch": 4.970872543371103, "percentage": 99.42, "elapsed_time": "3:23:52", "remaining_time": "0:01:11", "throughput": 19977.41, "total_tokens": 244377728} +{"current_steps": 77655, "total_steps": 78105, "loss": 0.1322, "lr": 5.078221321036525e-10, "epoch": 4.97119262531208, "percentage": 99.42, "elapsed_time": "3:23:53", "remaining_time": "0:01:10", "throughput": 19977.55, "total_tokens": 244392640} +{"current_steps": 77660, "total_steps": 78105, "loss": 0.1037, "lr": 4.966250049875942e-10, "epoch": 4.971512707253057, "percentage": 99.43, "elapsed_time": "3:23:54", "remaining_time": "0:01:10", "throughput": 19977.71, "total_tokens": 244407616} +{"current_steps": 77665, "total_steps": 78105, "loss": 0.0899, "lr": 4.855526900510166e-10, "epoch": 4.971832789194034, "percentage": 99.44, "elapsed_time": "3:23:54", "remaining_time": "0:01:09", "throughput": 19977.94, "total_tokens": 244425216} +{"current_steps": 77670, "total_steps": 78105, "loss": 0.1292, "lr": 4.746051878462554e-10, "epoch": 4.972152871135011, "percentage": 99.44, "elapsed_time": "3:23:55", "remaining_time": "0:01:08", "throughput": 19978.19, "total_tokens": 244443136} +{"current_steps": 77675, "total_steps": 78105, "loss": 0.1404, "lr": 4.6378249892009565e-10, "epoch": 4.972472953075988, "percentage": 99.45, "elapsed_time": "3:23:56", "remaining_time": "0:01:07", "throughput": 19978.39, "total_tokens": 244459648} +{"current_steps": 77680, "total_steps": 78105, "loss": 0.1061, "lr": 4.5308462381293826e-10, "epoch": 4.972793035016965, "percentage": 99.46, "elapsed_time": "3:23:56", "remaining_time": "0:01:06", "throughput": 19978.58, "total_tokens": 244475840} +{"current_steps": 77685, "total_steps": 78105, "loss": 0.1305, "lr": 4.425115630593557e-10, "epoch": 4.973113116957942, "percentage": 99.46, "elapsed_time": "3:23:57", "remaining_time": "0:01:06", "throughput": 19978.79, "total_tokens": 244492224} +{"current_steps": 77690, "total_steps": 78105, "loss": 0.1381, "lr": 4.320633171867039e-10, "epoch": 4.973433198898918, "percentage": 99.47, "elapsed_time": "3:23:58", "remaining_time": "0:01:05", "throughput": 19978.99, "total_tokens": 244508608} +{"current_steps": 77695, "total_steps": 78105, "loss": 0.1032, "lr": 4.217398867173428e-10, "epoch": 4.973753280839895, "percentage": 99.48, "elapsed_time": "3:23:58", "remaining_time": "0:01:04", "throughput": 19979.14, "total_tokens": 244523328} +{"current_steps": 77700, "total_steps": 78105, "loss": 0.1129, "lr": 4.1154127216613826e-10, "epoch": 4.974073362780872, "percentage": 99.48, "elapsed_time": "3:23:59", "remaining_time": "0:01:03", "throughput": 19979.37, "total_tokens": 244540352} +{"current_steps": 77705, "total_steps": 78105, "loss": 0.1084, "lr": 4.014674740429603e-10, "epoch": 4.974393444721849, "percentage": 99.49, "elapsed_time": "3:24:00", "remaining_time": "0:01:03", "throughput": 19979.54, "total_tokens": 244555840} +{"current_steps": 77710, "total_steps": 78105, "loss": 0.1037, "lr": 3.915184928504623e-10, "epoch": 4.974713526662826, "percentage": 99.49, "elapsed_time": "3:24:00", "remaining_time": "0:01:02", "throughput": 19979.7, "total_tokens": 244570752} +{"current_steps": 77715, "total_steps": 78105, "loss": 0.132, "lr": 3.816943290854691e-10, "epoch": 4.975033608603803, "percentage": 99.5, "elapsed_time": "3:24:01", "remaining_time": "0:01:01", "throughput": 19979.84, "total_tokens": 244585472} +{"current_steps": 77720, "total_steps": 78105, "loss": 0.1101, "lr": 3.719949832386993e-10, "epoch": 4.97535369054478, "percentage": 99.51, "elapsed_time": "3:24:02", "remaining_time": "0:01:00", "throughput": 19980.01, "total_tokens": 244600640} +{"current_steps": 77725, "total_steps": 78105, "loss": 0.0917, "lr": 3.624204557944877e-10, "epoch": 4.975673772485757, "percentage": 99.51, "elapsed_time": "3:24:02", "remaining_time": "0:00:59", "throughput": 19980.19, "total_tokens": 244616128} +{"current_steps": 77730, "total_steps": 78105, "loss": 0.1886, "lr": 3.5297074723078527e-10, "epoch": 4.975993854426733, "percentage": 99.52, "elapsed_time": "3:24:03", "remaining_time": "0:00:59", "throughput": 19980.38, "total_tokens": 244632128} +{"current_steps": 77735, "total_steps": 78105, "loss": 0.1892, "lr": 3.4364585801943685e-10, "epoch": 4.97631393636771, "percentage": 99.53, "elapsed_time": "3:24:04", "remaining_time": "0:00:58", "throughput": 19980.56, "total_tokens": 244647808} +{"current_steps": 77740, "total_steps": 78105, "loss": 0.1363, "lr": 3.344457886264585e-10, "epoch": 4.976634018308687, "percentage": 99.53, "elapsed_time": "3:24:04", "remaining_time": "0:00:57", "throughput": 19980.75, "total_tokens": 244663808} +{"current_steps": 77745, "total_steps": 78105, "loss": 0.1687, "lr": 3.253705395106499e-10, "epoch": 4.976954100249664, "percentage": 99.54, "elapsed_time": "3:24:05", "remaining_time": "0:00:56", "throughput": 19980.94, "total_tokens": 244679936} +{"current_steps": 77750, "total_steps": 78105, "loss": 0.1167, "lr": 3.1642011112581474e-10, "epoch": 4.977274182190641, "percentage": 99.55, "elapsed_time": "3:24:06", "remaining_time": "0:00:55", "throughput": 19981.12, "total_tokens": 244695680} +{"current_steps": 77755, "total_steps": 78105, "loss": 0.1144, "lr": 3.075945039182626e-10, "epoch": 4.977594264131618, "percentage": 99.55, "elapsed_time": "3:24:06", "remaining_time": "0:00:55", "throughput": 19981.29, "total_tokens": 244710784} +{"current_steps": 77760, "total_steps": 78105, "loss": 0.1137, "lr": 2.9889371832930725e-10, "epoch": 4.977914346072595, "percentage": 99.56, "elapsed_time": "3:24:07", "remaining_time": "0:00:54", "throughput": 19981.51, "total_tokens": 244727744} +{"current_steps": 77765, "total_steps": 78105, "loss": 0.1229, "lr": 2.903177547930458e-10, "epoch": 4.978234428013572, "percentage": 99.56, "elapsed_time": "3:24:08", "remaining_time": "0:00:53", "throughput": 19981.69, "total_tokens": 244743360} +{"current_steps": 77770, "total_steps": 78105, "loss": 0.0964, "lr": 2.8186661373774683e-10, "epoch": 4.978554509954549, "percentage": 99.57, "elapsed_time": "3:24:09", "remaining_time": "0:00:52", "throughput": 19981.89, "total_tokens": 244759232} +{"current_steps": 77775, "total_steps": 78105, "loss": 0.1706, "lr": 2.735402955855726e-10, "epoch": 4.978874591895526, "percentage": 99.58, "elapsed_time": "3:24:09", "remaining_time": "0:00:51", "throughput": 19982.04, "total_tokens": 244774016} +{"current_steps": 77780, "total_steps": 78105, "loss": 0.1908, "lr": 2.6533880075202414e-10, "epoch": 4.979194673836502, "percentage": 99.58, "elapsed_time": "3:24:10", "remaining_time": "0:00:51", "throughput": 19982.22, "total_tokens": 244789440} +{"current_steps": 77785, "total_steps": 78105, "loss": 0.1037, "lr": 2.5726212964677364e-10, "epoch": 4.979514755777479, "percentage": 99.59, "elapsed_time": "3:24:11", "remaining_time": "0:00:50", "throughput": 19982.36, "total_tokens": 244804096} +{"current_steps": 77790, "total_steps": 78105, "loss": 0.1278, "lr": 2.493102826733873e-10, "epoch": 4.979834837718456, "percentage": 99.6, "elapsed_time": "3:24:11", "remaining_time": "0:00:49", "throughput": 19982.54, "total_tokens": 244819520} +{"current_steps": 77795, "total_steps": 78105, "loss": 0.1482, "lr": 2.414832602287698e-10, "epoch": 4.980154919659433, "percentage": 99.6, "elapsed_time": "3:24:12", "remaining_time": "0:00:48", "throughput": 19982.72, "total_tokens": 244835712} +{"current_steps": 77800, "total_steps": 78105, "loss": 0.1263, "lr": 2.3378106270344204e-10, "epoch": 4.98047500160041, "percentage": 99.61, "elapsed_time": "3:24:13", "remaining_time": "0:00:48", "throughput": 19982.91, "total_tokens": 244851520} +{"current_steps": 77805, "total_steps": 78105, "loss": 0.1208, "lr": 2.262036904823739e-10, "epoch": 4.980795083541387, "percentage": 99.62, "elapsed_time": "3:24:13", "remaining_time": "0:00:47", "throughput": 19983.1, "total_tokens": 244867712} +{"current_steps": 77810, "total_steps": 78105, "loss": 0.1306, "lr": 2.1875114394387387e-10, "epoch": 4.981115165482364, "percentage": 99.62, "elapsed_time": "3:24:14", "remaining_time": "0:00:46", "throughput": 19983.26, "total_tokens": 244883136} +{"current_steps": 77815, "total_steps": 78105, "loss": 0.1069, "lr": 2.114234234601442e-10, "epoch": 4.981435247423341, "percentage": 99.63, "elapsed_time": "3:24:15", "remaining_time": "0:00:45", "throughput": 19983.42, "total_tokens": 244898112} +{"current_steps": 77820, "total_steps": 78105, "loss": 0.1617, "lr": 2.0422052939700343e-10, "epoch": 4.981755329364317, "percentage": 99.64, "elapsed_time": "3:24:15", "remaining_time": "0:00:44", "throughput": 19983.59, "total_tokens": 244913600} +{"current_steps": 77825, "total_steps": 78105, "loss": 0.1076, "lr": 1.9714246211388622e-10, "epoch": 4.982075411305294, "percentage": 99.64, "elapsed_time": "3:24:16", "remaining_time": "0:00:44", "throughput": 19983.75, "total_tokens": 244929088} +{"current_steps": 77830, "total_steps": 78105, "loss": 0.1483, "lr": 1.9018922196467615e-10, "epoch": 4.982395493246271, "percentage": 99.65, "elapsed_time": "3:24:17", "remaining_time": "0:00:43", "throughput": 19983.94, "total_tokens": 244944832} +{"current_steps": 77835, "total_steps": 78105, "loss": 0.1109, "lr": 1.8336080929631793e-10, "epoch": 4.982715575187248, "percentage": 99.65, "elapsed_time": "3:24:17", "remaining_time": "0:00:42", "throughput": 19984.14, "total_tokens": 244960960} +{"current_steps": 77840, "total_steps": 78105, "loss": 0.1747, "lr": 1.7665722444992761e-10, "epoch": 4.983035657128225, "percentage": 99.66, "elapsed_time": "3:24:18", "remaining_time": "0:00:41", "throughput": 19984.31, "total_tokens": 244977408} +{"current_steps": 77845, "total_steps": 78105, "loss": 0.1474, "lr": 1.700784677602374e-10, "epoch": 4.983355739069202, "percentage": 99.67, "elapsed_time": "3:24:19", "remaining_time": "0:00:40", "throughput": 19984.49, "total_tokens": 244993664} +{"current_steps": 77850, "total_steps": 78105, "loss": 0.0894, "lr": 1.6362453955559575e-10, "epoch": 4.983675821010179, "percentage": 99.67, "elapsed_time": "3:24:19", "remaining_time": "0:00:40", "throughput": 19984.67, "total_tokens": 245009152} +{"current_steps": 77855, "total_steps": 78105, "loss": 0.1345, "lr": 1.5729544015824494e-10, "epoch": 4.983995902951156, "percentage": 99.68, "elapsed_time": "3:24:20", "remaining_time": "0:00:39", "throughput": 19984.87, "total_tokens": 245025216} +{"current_steps": 77860, "total_steps": 78105, "loss": 0.1322, "lr": 1.5109116988432092e-10, "epoch": 4.984315984892133, "percentage": 99.69, "elapsed_time": "3:24:21", "remaining_time": "0:00:38", "throughput": 19985.05, "total_tokens": 245040896} +{"current_steps": 77865, "total_steps": 78105, "loss": 0.1358, "lr": 1.4501172904385353e-10, "epoch": 4.984636066833109, "percentage": 99.69, "elapsed_time": "3:24:21", "remaining_time": "0:00:37", "throughput": 19985.2, "total_tokens": 245055808} +{"current_steps": 77870, "total_steps": 78105, "loss": 0.1047, "lr": 1.3905711794021114e-10, "epoch": 4.984956148774086, "percentage": 99.7, "elapsed_time": "3:24:22", "remaining_time": "0:00:37", "throughput": 19985.4, "total_tokens": 245072448} +{"current_steps": 77875, "total_steps": 78105, "loss": 0.1337, "lr": 1.3322733687065604e-10, "epoch": 4.985276230715063, "percentage": 99.71, "elapsed_time": "3:24:23", "remaining_time": "0:00:36", "throughput": 19985.6, "total_tokens": 245088320} +{"current_steps": 77880, "total_steps": 78105, "loss": 0.1103, "lr": 1.2752238612662172e-10, "epoch": 4.98559631265604, "percentage": 99.71, "elapsed_time": "3:24:23", "remaining_time": "0:00:35", "throughput": 19985.79, "total_tokens": 245104128} +{"current_steps": 77885, "total_steps": 78105, "loss": 0.1379, "lr": 1.2194226599260283e-10, "epoch": 4.985916394597017, "percentage": 99.72, "elapsed_time": "3:24:24", "remaining_time": "0:00:34", "throughput": 19986.0, "total_tokens": 245120512} +{"current_steps": 77890, "total_steps": 78105, "loss": 0.1084, "lr": 1.164869767472654e-10, "epoch": 4.986236476537994, "percentage": 99.72, "elapsed_time": "3:24:25", "remaining_time": "0:00:33", "throughput": 19986.2, "total_tokens": 245136896} +{"current_steps": 77895, "total_steps": 78105, "loss": 0.1533, "lr": 1.1115651866344667e-10, "epoch": 4.986556558478971, "percentage": 99.73, "elapsed_time": "3:24:26", "remaining_time": "0:00:33", "throughput": 19986.45, "total_tokens": 245154432} +{"current_steps": 77900, "total_steps": 78105, "loss": 0.154, "lr": 1.0595089200676756e-10, "epoch": 4.986876640419948, "percentage": 99.74, "elapsed_time": "3:24:26", "remaining_time": "0:00:32", "throughput": 19986.64, "total_tokens": 245170624} +{"current_steps": 77905, "total_steps": 78105, "loss": 0.1123, "lr": 1.0087009703757533e-10, "epoch": 4.987196722360924, "percentage": 99.74, "elapsed_time": "3:24:27", "remaining_time": "0:00:31", "throughput": 19986.81, "total_tokens": 245186112} +{"current_steps": 77910, "total_steps": 78105, "loss": 0.0976, "lr": 9.59141340092784e-11, "epoch": 4.987516804301901, "percentage": 99.75, "elapsed_time": "3:24:28", "remaining_time": "0:00:30", "throughput": 19986.99, "total_tokens": 245201472} +{"current_steps": 77915, "total_steps": 78105, "loss": 0.1485, "lr": 9.108300316973407e-11, "epoch": 4.987836886242878, "percentage": 99.76, "elapsed_time": "3:24:28", "remaining_time": "0:00:29", "throughput": 19987.16, "total_tokens": 245216960} +{"current_steps": 77920, "total_steps": 78105, "loss": 0.1192, "lr": 8.637670475986071e-11, "epoch": 4.988156968183855, "percentage": 99.76, "elapsed_time": "3:24:29", "remaining_time": "0:00:29", "throughput": 19987.36, "total_tokens": 245233280} +{"current_steps": 77925, "total_steps": 78105, "loss": 0.1237, "lr": 8.179523901474806e-11, "epoch": 4.988477050124832, "percentage": 99.77, "elapsed_time": "3:24:30", "remaining_time": "0:00:28", "throughput": 19987.54, "total_tokens": 245248896} +{"current_steps": 77930, "total_steps": 78105, "loss": 0.1457, "lr": 7.733860616310208e-11, "epoch": 4.988797132065809, "percentage": 99.78, "elapsed_time": "3:24:30", "remaining_time": "0:00:27", "throughput": 19987.7, "total_tokens": 245263744} +{"current_steps": 77935, "total_steps": 78105, "loss": 0.0994, "lr": 7.300680642752245e-11, "epoch": 4.989117214006786, "percentage": 99.78, "elapsed_time": "3:24:31", "remaining_time": "0:00:26", "throughput": 19987.85, "total_tokens": 245278848} +{"current_steps": 77940, "total_steps": 78105, "loss": 0.1573, "lr": 6.879984002450269e-11, "epoch": 4.989437295947763, "percentage": 99.79, "elapsed_time": "3:24:32", "remaining_time": "0:00:25", "throughput": 19988.0, "total_tokens": 245293632} +{"current_steps": 77945, "total_steps": 78105, "loss": 0.1218, "lr": 6.471770716387493e-11, "epoch": 4.98975737788874, "percentage": 99.8, "elapsed_time": "3:24:32", "remaining_time": "0:00:25", "throughput": 19988.2, "total_tokens": 245309952} +{"current_steps": 77950, "total_steps": 78105, "loss": 0.1241, "lr": 6.076040804964267e-11, "epoch": 4.990077459829717, "percentage": 99.8, "elapsed_time": "3:24:33", "remaining_time": "0:00:24", "throughput": 19988.37, "total_tokens": 245325888} +{"current_steps": 77955, "total_steps": 78105, "loss": 0.1366, "lr": 5.6927942879148046e-11, "epoch": 4.990397541770693, "percentage": 99.81, "elapsed_time": "3:24:34", "remaining_time": "0:00:23", "throughput": 19988.54, "total_tokens": 245341312} +{"current_steps": 77960, "total_steps": 78105, "loss": 0.1117, "lr": 5.322031184390453e-11, "epoch": 4.99071762371167, "percentage": 99.81, "elapsed_time": "3:24:34", "remaining_time": "0:00:22", "throughput": 19988.74, "total_tokens": 245357760} +{"current_steps": 77965, "total_steps": 78105, "loss": 0.1299, "lr": 4.963751512931936e-11, "epoch": 4.991037705652647, "percentage": 99.82, "elapsed_time": "3:24:35", "remaining_time": "0:00:22", "throughput": 19988.87, "total_tokens": 245371904} +{"current_steps": 77970, "total_steps": 78105, "loss": 0.1135, "lr": 4.617955291386089e-11, "epoch": 4.991357787593624, "percentage": 99.83, "elapsed_time": "3:24:36", "remaining_time": "0:00:21", "throughput": 19989.04, "total_tokens": 245387712} +{"current_steps": 77975, "total_steps": 78105, "loss": 0.1689, "lr": 4.2846425370723924e-11, "epoch": 4.991677869534601, "percentage": 99.83, "elapsed_time": "3:24:36", "remaining_time": "0:00:20", "throughput": 19989.2, "total_tokens": 245402624} +{"current_steps": 77980, "total_steps": 78105, "loss": 0.1612, "lr": 3.9638132665886784e-11, "epoch": 4.991997951475578, "percentage": 99.84, "elapsed_time": "3:24:37", "remaining_time": "0:00:19", "throughput": 19989.39, "total_tokens": 245418624} +{"current_steps": 77985, "total_steps": 78105, "loss": 0.0896, "lr": 3.655467495949916e-11, "epoch": 4.992318033416555, "percentage": 99.85, "elapsed_time": "3:24:38", "remaining_time": "0:00:18", "throughput": 19989.56, "total_tokens": 245433984} +{"current_steps": 77990, "total_steps": 78105, "loss": 0.129, "lr": 3.359605240588204e-11, "epoch": 4.992638115357532, "percentage": 99.85, "elapsed_time": "3:24:38", "remaining_time": "0:00:18", "throughput": 19989.73, "total_tokens": 245449472} +{"current_steps": 77995, "total_steps": 78105, "loss": 0.099, "lr": 3.076226515269509e-11, "epoch": 4.992958197298508, "percentage": 99.86, "elapsed_time": "3:24:39", "remaining_time": "0:00:17", "throughput": 19989.88, "total_tokens": 245464512} +{"current_steps": 78000, "total_steps": 78105, "loss": 0.1504, "lr": 2.8053313341214196e-11, "epoch": 4.993278279239485, "percentage": 99.87, "elapsed_time": "3:24:40", "remaining_time": "0:00:16", "throughput": 19990.08, "total_tokens": 245480768} +{"current_steps": 78005, "total_steps": 78105, "loss": 0.149, "lr": 2.5469197107164113e-11, "epoch": 4.993598361180462, "percentage": 99.87, "elapsed_time": "3:24:40", "remaining_time": "0:00:15", "throughput": 19990.26, "total_tokens": 245496640} +{"current_steps": 78010, "total_steps": 78105, "loss": 0.0999, "lr": 2.300991657905316e-11, "epoch": 4.993918443121439, "percentage": 99.88, "elapsed_time": "3:24:41", "remaining_time": "0:00:14", "throughput": 19990.43, "total_tokens": 245512320} +{"current_steps": 78015, "total_steps": 78105, "loss": 0.1716, "lr": 2.067547187983854e-11, "epoch": 4.994238525062416, "percentage": 99.88, "elapsed_time": "3:24:42", "remaining_time": "0:00:14", "throughput": 19990.58, "total_tokens": 245527168} +{"current_steps": 78020, "total_steps": 78105, "loss": 0.1145, "lr": 1.846586312637122e-11, "epoch": 4.994558607003393, "percentage": 99.89, "elapsed_time": "3:24:42", "remaining_time": "0:00:13", "throughput": 19990.75, "total_tokens": 245542592} +{"current_steps": 78025, "total_steps": 78105, "loss": 0.1178, "lr": 1.638109042884084e-11, "epoch": 4.99487868894437, "percentage": 99.9, "elapsed_time": "3:24:43", "remaining_time": "0:00:12", "throughput": 19990.92, "total_tokens": 245558208} +{"current_steps": 78030, "total_steps": 78105, "loss": 0.0969, "lr": 1.4421153891053252e-11, "epoch": 4.995198770885347, "percentage": 99.9, "elapsed_time": "3:24:44", "remaining_time": "0:00:11", "throughput": 19991.14, "total_tokens": 245574976} +{"current_steps": 78035, "total_steps": 78105, "loss": 0.1036, "lr": 1.2586053611263193e-11, "epoch": 4.995518852826324, "percentage": 99.91, "elapsed_time": "3:24:44", "remaining_time": "0:00:11", "throughput": 19991.29, "total_tokens": 245589888} +{"current_steps": 78040, "total_steps": 78105, "loss": 0.1257, "lr": 1.0875789681064063e-11, "epoch": 4.995838934767301, "percentage": 99.92, "elapsed_time": "3:24:45", "remaining_time": "0:00:10", "throughput": 19991.46, "total_tokens": 245605376} +{"current_steps": 78045, "total_steps": 78105, "loss": 0.1021, "lr": 9.290362185387924e-12, "epoch": 4.996159016708277, "percentage": 99.92, "elapsed_time": "3:24:46", "remaining_time": "0:00:09", "throughput": 19991.63, "total_tokens": 245621056} +{"current_steps": 78050, "total_steps": 78105, "loss": 0.149, "lr": 7.829771204170833e-12, "epoch": 4.996479098649254, "percentage": 99.93, "elapsed_time": "3:24:46", "remaining_time": "0:00:08", "throughput": 19991.81, "total_tokens": 245636928} +{"current_steps": 78055, "total_steps": 78105, "loss": 0.1508, "lr": 6.494016809854842e-12, "epoch": 4.996799180590231, "percentage": 99.94, "elapsed_time": "3:24:47", "remaining_time": "0:00:07", "throughput": 19991.96, "total_tokens": 245651968} +{"current_steps": 78060, "total_steps": 78105, "loss": 0.1208, "lr": 5.283099069053332e-12, "epoch": 4.997119262531208, "percentage": 99.94, "elapsed_time": "3:24:48", "remaining_time": "0:00:07", "throughput": 19992.11, "total_tokens": 245666752} +{"current_steps": 78065, "total_steps": 78105, "loss": 0.1275, "lr": 4.1970180425510155e-12, "epoch": 4.997439344472185, "percentage": 99.95, "elapsed_time": "3:24:48", "remaining_time": "0:00:06", "throughput": 19992.33, "total_tokens": 245683776} +{"current_steps": 78070, "total_steps": 78105, "loss": 0.1582, "lr": 3.2357737844712635e-12, "epoch": 4.997759426413162, "percentage": 99.96, "elapsed_time": "3:24:49", "remaining_time": "0:00:05", "throughput": 19992.5, "total_tokens": 245699008} +{"current_steps": 78075, "total_steps": 78105, "loss": 0.1184, "lr": 2.399366342831222e-12, "epoch": 4.998079508354139, "percentage": 99.96, "elapsed_time": "3:24:50", "remaining_time": "0:00:04", "throughput": 19992.69, "total_tokens": 245715008} +{"current_steps": 78080, "total_steps": 78105, "loss": 0.1467, "lr": 1.6877957592642547e-12, "epoch": 4.998399590295116, "percentage": 99.97, "elapsed_time": "3:24:50", "remaining_time": "0:00:03", "throughput": 19992.86, "total_tokens": 245730240} +{"current_steps": 78085, "total_steps": 78105, "loss": 0.1206, "lr": 1.101062069575054e-12, "epoch": 4.998719672236092, "percentage": 99.97, "elapsed_time": "3:24:51", "remaining_time": "0:00:03", "throughput": 19993.01, "total_tokens": 245744896} +{"current_steps": 78090, "total_steps": 78105, "loss": 0.1843, "lr": 6.391653026294187e-13, "epoch": 4.999039754177069, "percentage": 99.98, "elapsed_time": "3:24:52", "remaining_time": "0:00:02", "throughput": 19993.22, "total_tokens": 245761728} +{"current_steps": 78095, "total_steps": 78105, "loss": 0.1793, "lr": 3.0210548174203214e-13, "epoch": 4.999359836118046, "percentage": 99.99, "elapsed_time": "3:24:52", "remaining_time": "0:00:01", "throughput": 19993.4, "total_tokens": 245777472} +{"current_steps": 78100, "total_steps": 78105, "loss": 0.1155, "lr": 8.988262384379553e-14, "epoch": 4.999679918059023, "percentage": 99.99, "elapsed_time": "3:24:53", "remaining_time": "0:00:00", "throughput": 19993.54, "total_tokens": 245792448} +{"current_steps": 78105, "total_steps": 78105, "loss": 0.0874, "lr": 2.4967394818276035e-15, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "3:24:54", "remaining_time": "0:00:00", "throughput": 19993.63, "total_tokens": 245808128} +{"current_steps": 78105, "total_steps": 78105, "epoch": 5.0, "percentage": 100.0, "elapsed_time": "3:25:44", "remaining_time": "0:00:00", "throughput": 19912.92, "total_tokens": 245808128} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000..0aaecb0 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,125183 @@ +{ + "best_global_step": 31248, + "best_metric": 0.4481422007083893, + "best_model_checkpoint": "saves_bts_preliminary/base/llama-3.2-1b-instruct/train_record_42_1776331412/checkpoint-31248", + "epoch": 5.0, + "eval_steps": 3906, + "global_step": 78105, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003200819409768901, + "grad_norm": 665.1940307617188, + "learning_rate": 2.560491614389963e-09, + "loss": 2.1551, + "num_input_tokens_seen": 15360, + "step": 5 + }, + { + "epoch": 0.0006401638819537802, + "grad_norm": 461.3468017578125, + "learning_rate": 5.761106132377417e-09, + "loss": 2.3621, + "num_input_tokens_seen": 31104, + "step": 10 + }, + { + "epoch": 0.0009602458229306702, + "grad_norm": 550.9429931640625, + "learning_rate": 8.961720650364872e-09, + "loss": 2.1414, + "num_input_tokens_seen": 46208, + "step": 15 + }, + { + "epoch": 0.0012803277639075604, + "grad_norm": 378.13751220703125, + "learning_rate": 1.2162335168352324e-08, + "loss": 2.7749, + "num_input_tokens_seen": 62464, + "step": 20 + }, + { + "epoch": 0.0016004097048844504, + "grad_norm": 433.9951477050781, + "learning_rate": 1.536294968633978e-08, + "loss": 2.146, + "num_input_tokens_seen": 79104, + "step": 25 + }, + { + "epoch": 0.0019204916458613404, + "grad_norm": 381.77020263671875, + "learning_rate": 1.8563564204327233e-08, + "loss": 2.2474, + "num_input_tokens_seen": 94912, + "step": 30 + }, + { + "epoch": 0.0022405735868382304, + "grad_norm": 551.3102416992188, + "learning_rate": 2.1764178722314687e-08, + "loss": 2.3948, + "num_input_tokens_seen": 110784, + "step": 35 + }, + { + "epoch": 0.002560655527815121, + "grad_norm": 321.86431884765625, + "learning_rate": 2.496479324030214e-08, + "loss": 2.2191, + "num_input_tokens_seen": 125696, + "step": 40 + }, + { + "epoch": 0.002880737468792011, + "grad_norm": 439.31109619140625, + "learning_rate": 2.8165407758289592e-08, + "loss": 2.2197, + "num_input_tokens_seen": 140672, + "step": 45 + }, + { + "epoch": 0.003200819409768901, + "grad_norm": 411.2679138183594, + "learning_rate": 3.1366022276277046e-08, + "loss": 2.1808, + "num_input_tokens_seen": 155456, + "step": 50 + }, + { + "epoch": 0.003520901350745791, + "grad_norm": 412.19287109375, + "learning_rate": 3.4566636794264506e-08, + "loss": 2.078, + "num_input_tokens_seen": 170816, + "step": 55 + }, + { + "epoch": 0.003840983291722681, + "grad_norm": 465.98797607421875, + "learning_rate": 3.7767251312251953e-08, + "loss": 2.3648, + "num_input_tokens_seen": 185088, + "step": 60 + }, + { + "epoch": 0.004161065232699571, + "grad_norm": 541.8258666992188, + "learning_rate": 4.096786583023941e-08, + "loss": 2.0772, + "num_input_tokens_seen": 200384, + "step": 65 + }, + { + "epoch": 0.004481147173676461, + "grad_norm": 432.58892822265625, + "learning_rate": 4.416848034822686e-08, + "loss": 2.047, + "num_input_tokens_seen": 215744, + "step": 70 + }, + { + "epoch": 0.004801229114653352, + "grad_norm": 288.8199157714844, + "learning_rate": 4.736909486621432e-08, + "loss": 2.0567, + "num_input_tokens_seen": 230400, + "step": 75 + }, + { + "epoch": 0.005121311055630242, + "grad_norm": 828.94580078125, + "learning_rate": 5.056970938420177e-08, + "loss": 1.8879, + "num_input_tokens_seen": 246592, + "step": 80 + }, + { + "epoch": 0.005441392996607132, + "grad_norm": 214.80044555664062, + "learning_rate": 5.377032390218922e-08, + "loss": 1.6707, + "num_input_tokens_seen": 262272, + "step": 85 + }, + { + "epoch": 0.005761474937584022, + "grad_norm": 291.09893798828125, + "learning_rate": 5.6970938420176683e-08, + "loss": 1.5695, + "num_input_tokens_seen": 277760, + "step": 90 + }, + { + "epoch": 0.006081556878560912, + "grad_norm": 362.93035888671875, + "learning_rate": 6.017155293816413e-08, + "loss": 1.8715, + "num_input_tokens_seen": 292992, + "step": 95 + }, + { + "epoch": 0.006401638819537802, + "grad_norm": 216.91578674316406, + "learning_rate": 6.337216745615158e-08, + "loss": 1.6241, + "num_input_tokens_seen": 307840, + "step": 100 + }, + { + "epoch": 0.006721720760514692, + "grad_norm": 165.20880126953125, + "learning_rate": 6.657278197413904e-08, + "loss": 1.6729, + "num_input_tokens_seen": 323008, + "step": 105 + }, + { + "epoch": 0.007041802701491582, + "grad_norm": 220.99749755859375, + "learning_rate": 6.977339649212649e-08, + "loss": 1.6035, + "num_input_tokens_seen": 339456, + "step": 110 + }, + { + "epoch": 0.007361884642468472, + "grad_norm": 118.33271026611328, + "learning_rate": 7.297401101011395e-08, + "loss": 1.4905, + "num_input_tokens_seen": 354816, + "step": 115 + }, + { + "epoch": 0.007681966583445362, + "grad_norm": 121.52469635009766, + "learning_rate": 7.61746255281014e-08, + "loss": 1.1193, + "num_input_tokens_seen": 369472, + "step": 120 + }, + { + "epoch": 0.008002048524422252, + "grad_norm": 90.63349151611328, + "learning_rate": 7.937524004608884e-08, + "loss": 1.2304, + "num_input_tokens_seen": 384768, + "step": 125 + }, + { + "epoch": 0.008322130465399142, + "grad_norm": 144.30996704101562, + "learning_rate": 8.257585456407631e-08, + "loss": 1.2864, + "num_input_tokens_seen": 400192, + "step": 130 + }, + { + "epoch": 0.008642212406376032, + "grad_norm": 106.96931457519531, + "learning_rate": 8.577646908206376e-08, + "loss": 1.1549, + "num_input_tokens_seen": 416640, + "step": 135 + }, + { + "epoch": 0.008962294347352922, + "grad_norm": 96.35757446289062, + "learning_rate": 8.89770836000512e-08, + "loss": 1.0669, + "num_input_tokens_seen": 432640, + "step": 140 + }, + { + "epoch": 0.009282376288329812, + "grad_norm": 150.35189819335938, + "learning_rate": 9.217769811803867e-08, + "loss": 1.1356, + "num_input_tokens_seen": 448640, + "step": 145 + }, + { + "epoch": 0.009602458229306703, + "grad_norm": 110.15003967285156, + "learning_rate": 9.537831263602612e-08, + "loss": 1.0674, + "num_input_tokens_seen": 464448, + "step": 150 + }, + { + "epoch": 0.009922540170283593, + "grad_norm": 106.2641372680664, + "learning_rate": 9.857892715401356e-08, + "loss": 1.0259, + "num_input_tokens_seen": 479488, + "step": 155 + }, + { + "epoch": 0.010242622111260483, + "grad_norm": 79.93838500976562, + "learning_rate": 1.0177954167200103e-07, + "loss": 0.8523, + "num_input_tokens_seen": 495296, + "step": 160 + }, + { + "epoch": 0.010562704052237373, + "grad_norm": 127.67366790771484, + "learning_rate": 1.0498015618998849e-07, + "loss": 0.8782, + "num_input_tokens_seen": 510144, + "step": 165 + }, + { + "epoch": 0.010882785993214263, + "grad_norm": 92.34154510498047, + "learning_rate": 1.0818077070797593e-07, + "loss": 0.8971, + "num_input_tokens_seen": 524928, + "step": 170 + }, + { + "epoch": 0.011202867934191153, + "grad_norm": 55.52916717529297, + "learning_rate": 1.1138138522596339e-07, + "loss": 0.8068, + "num_input_tokens_seen": 541504, + "step": 175 + }, + { + "epoch": 0.011522949875168043, + "grad_norm": 99.5811767578125, + "learning_rate": 1.1458199974395085e-07, + "loss": 0.7725, + "num_input_tokens_seen": 556096, + "step": 180 + }, + { + "epoch": 0.011843031816144933, + "grad_norm": 93.44267272949219, + "learning_rate": 1.1778261426193829e-07, + "loss": 0.7868, + "num_input_tokens_seen": 572736, + "step": 185 + }, + { + "epoch": 0.012163113757121823, + "grad_norm": 84.76941680908203, + "learning_rate": 1.2098322877992575e-07, + "loss": 0.8952, + "num_input_tokens_seen": 588352, + "step": 190 + }, + { + "epoch": 0.012483195698098713, + "grad_norm": 65.54842376708984, + "learning_rate": 1.2418384329791322e-07, + "loss": 1.0207, + "num_input_tokens_seen": 603520, + "step": 195 + }, + { + "epoch": 0.012803277639075603, + "grad_norm": 81.05231475830078, + "learning_rate": 1.2738445781590066e-07, + "loss": 0.837, + "num_input_tokens_seen": 619392, + "step": 200 + }, + { + "epoch": 0.013123359580052493, + "grad_norm": 72.9712905883789, + "learning_rate": 1.305850723338881e-07, + "loss": 0.8286, + "num_input_tokens_seen": 635456, + "step": 205 + }, + { + "epoch": 0.013443441521029383, + "grad_norm": 161.39279174804688, + "learning_rate": 1.3378568685187557e-07, + "loss": 0.9468, + "num_input_tokens_seen": 650880, + "step": 210 + }, + { + "epoch": 0.013763523462006273, + "grad_norm": 64.44905090332031, + "learning_rate": 1.36986301369863e-07, + "loss": 0.8357, + "num_input_tokens_seen": 666688, + "step": 215 + }, + { + "epoch": 0.014083605402983163, + "grad_norm": 80.04804992675781, + "learning_rate": 1.4018691588785048e-07, + "loss": 0.8286, + "num_input_tokens_seen": 682112, + "step": 220 + }, + { + "epoch": 0.014403687343960053, + "grad_norm": 74.0257797241211, + "learning_rate": 1.4338753040583795e-07, + "loss": 0.8606, + "num_input_tokens_seen": 697728, + "step": 225 + }, + { + "epoch": 0.014723769284936943, + "grad_norm": 60.82829666137695, + "learning_rate": 1.4658814492382539e-07, + "loss": 0.6876, + "num_input_tokens_seen": 712704, + "step": 230 + }, + { + "epoch": 0.015043851225913833, + "grad_norm": 102.77518463134766, + "learning_rate": 1.4978875944181283e-07, + "loss": 1.0125, + "num_input_tokens_seen": 729408, + "step": 235 + }, + { + "epoch": 0.015363933166890723, + "grad_norm": 110.79398345947266, + "learning_rate": 1.529893739598003e-07, + "loss": 0.8374, + "num_input_tokens_seen": 745344, + "step": 240 + }, + { + "epoch": 0.015684015107867613, + "grad_norm": 72.13504791259766, + "learning_rate": 1.5618998847778773e-07, + "loss": 0.7396, + "num_input_tokens_seen": 762688, + "step": 245 + }, + { + "epoch": 0.016004097048844503, + "grad_norm": 65.22174835205078, + "learning_rate": 1.5939060299577523e-07, + "loss": 0.7435, + "num_input_tokens_seen": 779392, + "step": 250 + }, + { + "epoch": 0.016324178989821393, + "grad_norm": 88.56317901611328, + "learning_rate": 1.6259121751376267e-07, + "loss": 0.8331, + "num_input_tokens_seen": 794112, + "step": 255 + }, + { + "epoch": 0.016644260930798283, + "grad_norm": 55.04991149902344, + "learning_rate": 1.657918320317501e-07, + "loss": 0.8581, + "num_input_tokens_seen": 810112, + "step": 260 + }, + { + "epoch": 0.016964342871775173, + "grad_norm": 86.12551879882812, + "learning_rate": 1.6899244654973758e-07, + "loss": 0.8835, + "num_input_tokens_seen": 825472, + "step": 265 + }, + { + "epoch": 0.017284424812752063, + "grad_norm": 79.96257019042969, + "learning_rate": 1.7219306106772502e-07, + "loss": 1.0456, + "num_input_tokens_seen": 840128, + "step": 270 + }, + { + "epoch": 0.017604506753728953, + "grad_norm": 71.04536437988281, + "learning_rate": 1.7539367558571246e-07, + "loss": 0.8342, + "num_input_tokens_seen": 855104, + "step": 275 + }, + { + "epoch": 0.017924588694705843, + "grad_norm": 76.29857635498047, + "learning_rate": 1.7859429010369995e-07, + "loss": 0.8543, + "num_input_tokens_seen": 870848, + "step": 280 + }, + { + "epoch": 0.018244670635682733, + "grad_norm": 55.14130783081055, + "learning_rate": 1.817949046216874e-07, + "loss": 0.7704, + "num_input_tokens_seen": 885760, + "step": 285 + }, + { + "epoch": 0.018564752576659623, + "grad_norm": 53.0601806640625, + "learning_rate": 1.8499551913967483e-07, + "loss": 0.7688, + "num_input_tokens_seen": 900928, + "step": 290 + }, + { + "epoch": 0.018884834517636517, + "grad_norm": 59.8973274230957, + "learning_rate": 1.881961336576623e-07, + "loss": 0.8229, + "num_input_tokens_seen": 915968, + "step": 295 + }, + { + "epoch": 0.019204916458613407, + "grad_norm": 134.5538787841797, + "learning_rate": 1.9139674817564974e-07, + "loss": 1.0619, + "num_input_tokens_seen": 933056, + "step": 300 + }, + { + "epoch": 0.019524998399590297, + "grad_norm": 94.20873260498047, + "learning_rate": 1.9459736269363718e-07, + "loss": 0.7684, + "num_input_tokens_seen": 948416, + "step": 305 + }, + { + "epoch": 0.019845080340567187, + "grad_norm": 78.61224365234375, + "learning_rate": 1.9779797721162467e-07, + "loss": 0.8022, + "num_input_tokens_seen": 962880, + "step": 310 + }, + { + "epoch": 0.020165162281544077, + "grad_norm": 69.258544921875, + "learning_rate": 2.0099859172961212e-07, + "loss": 0.8321, + "num_input_tokens_seen": 979904, + "step": 315 + }, + { + "epoch": 0.020485244222520967, + "grad_norm": 74.51676177978516, + "learning_rate": 2.0419920624759956e-07, + "loss": 0.8538, + "num_input_tokens_seen": 995136, + "step": 320 + }, + { + "epoch": 0.020805326163497857, + "grad_norm": 69.00685119628906, + "learning_rate": 2.0739982076558702e-07, + "loss": 0.8307, + "num_input_tokens_seen": 1011008, + "step": 325 + }, + { + "epoch": 0.021125408104474747, + "grad_norm": 66.43866729736328, + "learning_rate": 2.1060043528357446e-07, + "loss": 0.8699, + "num_input_tokens_seen": 1025792, + "step": 330 + }, + { + "epoch": 0.021445490045451637, + "grad_norm": 65.58399963378906, + "learning_rate": 2.138010498015619e-07, + "loss": 0.7236, + "num_input_tokens_seen": 1042944, + "step": 335 + }, + { + "epoch": 0.021765571986428527, + "grad_norm": 85.79719543457031, + "learning_rate": 2.170016643195494e-07, + "loss": 0.8477, + "num_input_tokens_seen": 1058688, + "step": 340 + }, + { + "epoch": 0.022085653927405417, + "grad_norm": 60.7198600769043, + "learning_rate": 2.2020227883753684e-07, + "loss": 0.7446, + "num_input_tokens_seen": 1074560, + "step": 345 + }, + { + "epoch": 0.022405735868382307, + "grad_norm": 83.92898559570312, + "learning_rate": 2.2340289335552428e-07, + "loss": 0.731, + "num_input_tokens_seen": 1089728, + "step": 350 + }, + { + "epoch": 0.022725817809359197, + "grad_norm": 106.46416473388672, + "learning_rate": 2.2660350787351175e-07, + "loss": 0.9404, + "num_input_tokens_seen": 1105024, + "step": 355 + }, + { + "epoch": 0.023045899750336087, + "grad_norm": 56.536590576171875, + "learning_rate": 2.298041223914992e-07, + "loss": 0.8047, + "num_input_tokens_seen": 1121088, + "step": 360 + }, + { + "epoch": 0.023365981691312977, + "grad_norm": 91.30480194091797, + "learning_rate": 2.3300473690948663e-07, + "loss": 0.7779, + "num_input_tokens_seen": 1136896, + "step": 365 + }, + { + "epoch": 0.023686063632289867, + "grad_norm": 63.47901916503906, + "learning_rate": 2.3620535142747412e-07, + "loss": 0.7289, + "num_input_tokens_seen": 1153280, + "step": 370 + }, + { + "epoch": 0.024006145573266757, + "grad_norm": 73.70288848876953, + "learning_rate": 2.3940596594546154e-07, + "loss": 0.9225, + "num_input_tokens_seen": 1169536, + "step": 375 + }, + { + "epoch": 0.024326227514243647, + "grad_norm": 94.86209869384766, + "learning_rate": 2.42606580463449e-07, + "loss": 0.8308, + "num_input_tokens_seen": 1185088, + "step": 380 + }, + { + "epoch": 0.024646309455220537, + "grad_norm": 43.36772537231445, + "learning_rate": 2.4580719498143647e-07, + "loss": 0.6761, + "num_input_tokens_seen": 1200832, + "step": 385 + }, + { + "epoch": 0.024966391396197427, + "grad_norm": 59.62614440917969, + "learning_rate": 2.4900780949942394e-07, + "loss": 0.6987, + "num_input_tokens_seen": 1216320, + "step": 390 + }, + { + "epoch": 0.025286473337174317, + "grad_norm": 53.64003372192383, + "learning_rate": 2.5220842401741135e-07, + "loss": 0.793, + "num_input_tokens_seen": 1232832, + "step": 395 + }, + { + "epoch": 0.025606555278151207, + "grad_norm": 57.19229507446289, + "learning_rate": 2.554090385353988e-07, + "loss": 0.7453, + "num_input_tokens_seen": 1248384, + "step": 400 + }, + { + "epoch": 0.025926637219128097, + "grad_norm": 60.54597854614258, + "learning_rate": 2.586096530533863e-07, + "loss": 0.7423, + "num_input_tokens_seen": 1263936, + "step": 405 + }, + { + "epoch": 0.026246719160104987, + "grad_norm": 73.44427490234375, + "learning_rate": 2.618102675713737e-07, + "loss": 1.2019, + "num_input_tokens_seen": 1294208, + "step": 410 + }, + { + "epoch": 0.026566801101081877, + "grad_norm": 81.38436889648438, + "learning_rate": 2.650108820893612e-07, + "loss": 0.869, + "num_input_tokens_seen": 1309120, + "step": 415 + }, + { + "epoch": 0.026886883042058767, + "grad_norm": 62.996360778808594, + "learning_rate": 2.6821149660734863e-07, + "loss": 0.8688, + "num_input_tokens_seen": 1324224, + "step": 420 + }, + { + "epoch": 0.027206964983035656, + "grad_norm": 74.5821533203125, + "learning_rate": 2.714121111253361e-07, + "loss": 0.6996, + "num_input_tokens_seen": 1341056, + "step": 425 + }, + { + "epoch": 0.027527046924012546, + "grad_norm": 56.93548583984375, + "learning_rate": 2.7461272564332357e-07, + "loss": 0.811, + "num_input_tokens_seen": 1356544, + "step": 430 + }, + { + "epoch": 0.027847128864989436, + "grad_norm": 77.51539611816406, + "learning_rate": 2.77813340161311e-07, + "loss": 0.7142, + "num_input_tokens_seen": 1371840, + "step": 435 + }, + { + "epoch": 0.028167210805966326, + "grad_norm": 78.96206665039062, + "learning_rate": 2.8101395467929845e-07, + "loss": 0.7129, + "num_input_tokens_seen": 1386816, + "step": 440 + }, + { + "epoch": 0.028487292746943216, + "grad_norm": 73.54304504394531, + "learning_rate": 2.842145691972859e-07, + "loss": 0.7842, + "num_input_tokens_seen": 1401792, + "step": 445 + }, + { + "epoch": 0.028807374687920106, + "grad_norm": 83.50739288330078, + "learning_rate": 2.874151837152734e-07, + "loss": 0.6599, + "num_input_tokens_seen": 1416896, + "step": 450 + }, + { + "epoch": 0.029127456628896996, + "grad_norm": 101.03546905517578, + "learning_rate": 2.906157982332608e-07, + "loss": 0.8144, + "num_input_tokens_seen": 1432704, + "step": 455 + }, + { + "epoch": 0.029447538569873886, + "grad_norm": 88.28244018554688, + "learning_rate": 2.9381641275124827e-07, + "loss": 0.7013, + "num_input_tokens_seen": 1448384, + "step": 460 + }, + { + "epoch": 0.029767620510850776, + "grad_norm": 70.85072326660156, + "learning_rate": 2.9701702726923573e-07, + "loss": 0.7257, + "num_input_tokens_seen": 1464832, + "step": 465 + }, + { + "epoch": 0.030087702451827666, + "grad_norm": 77.64027404785156, + "learning_rate": 3.0021764178722315e-07, + "loss": 0.6784, + "num_input_tokens_seen": 1479424, + "step": 470 + }, + { + "epoch": 0.030407784392804556, + "grad_norm": 57.142372131347656, + "learning_rate": 3.034182563052106e-07, + "loss": 0.775, + "num_input_tokens_seen": 1494336, + "step": 475 + }, + { + "epoch": 0.030727866333781446, + "grad_norm": 73.69835662841797, + "learning_rate": 3.066188708231981e-07, + "loss": 0.6793, + "num_input_tokens_seen": 1509184, + "step": 480 + }, + { + "epoch": 0.031047948274758336, + "grad_norm": 41.85919189453125, + "learning_rate": 3.0981948534118555e-07, + "loss": 0.8161, + "num_input_tokens_seen": 1525504, + "step": 485 + }, + { + "epoch": 0.031368030215735226, + "grad_norm": 54.704063415527344, + "learning_rate": 3.13020099859173e-07, + "loss": 0.6406, + "num_input_tokens_seen": 1541504, + "step": 490 + }, + { + "epoch": 0.03168811215671212, + "grad_norm": 43.00974655151367, + "learning_rate": 3.1622071437716043e-07, + "loss": 0.6751, + "num_input_tokens_seen": 1557184, + "step": 495 + }, + { + "epoch": 0.032008194097689006, + "grad_norm": 66.06085205078125, + "learning_rate": 3.194213288951479e-07, + "loss": 0.7808, + "num_input_tokens_seen": 1573440, + "step": 500 + }, + { + "epoch": 0.0323282760386659, + "grad_norm": 42.740814208984375, + "learning_rate": 3.2262194341313536e-07, + "loss": 0.8709, + "num_input_tokens_seen": 1588736, + "step": 505 + }, + { + "epoch": 0.032648357979642786, + "grad_norm": 50.957767486572266, + "learning_rate": 3.258225579311228e-07, + "loss": 0.7501, + "num_input_tokens_seen": 1604352, + "step": 510 + }, + { + "epoch": 0.03296843992061968, + "grad_norm": 49.58732223510742, + "learning_rate": 3.2902317244911025e-07, + "loss": 0.7055, + "num_input_tokens_seen": 1618816, + "step": 515 + }, + { + "epoch": 0.033288521861596566, + "grad_norm": 66.24559020996094, + "learning_rate": 3.322237869670977e-07, + "loss": 0.7891, + "num_input_tokens_seen": 1635648, + "step": 520 + }, + { + "epoch": 0.03360860380257346, + "grad_norm": 66.4916000366211, + "learning_rate": 3.3542440148508513e-07, + "loss": 0.7411, + "num_input_tokens_seen": 1651328, + "step": 525 + }, + { + "epoch": 0.033928685743550346, + "grad_norm": 43.57494354248047, + "learning_rate": 3.386250160030726e-07, + "loss": 0.7586, + "num_input_tokens_seen": 1668928, + "step": 530 + }, + { + "epoch": 0.03424876768452724, + "grad_norm": 51.38517761230469, + "learning_rate": 3.418256305210601e-07, + "loss": 0.6686, + "num_input_tokens_seen": 1685504, + "step": 535 + }, + { + "epoch": 0.034568849625504126, + "grad_norm": 66.16326141357422, + "learning_rate": 3.450262450390475e-07, + "loss": 0.7246, + "num_input_tokens_seen": 1701952, + "step": 540 + }, + { + "epoch": 0.03488893156648102, + "grad_norm": 61.60828399658203, + "learning_rate": 3.48226859557035e-07, + "loss": 0.7636, + "num_input_tokens_seen": 1716992, + "step": 545 + }, + { + "epoch": 0.035209013507457906, + "grad_norm": 39.76877212524414, + "learning_rate": 3.5142747407502246e-07, + "loss": 0.642, + "num_input_tokens_seen": 1732160, + "step": 550 + }, + { + "epoch": 0.0355290954484348, + "grad_norm": 86.3873519897461, + "learning_rate": 3.546280885930099e-07, + "loss": 0.6954, + "num_input_tokens_seen": 1748416, + "step": 555 + }, + { + "epoch": 0.035849177389411686, + "grad_norm": 51.928836822509766, + "learning_rate": 3.5782870311099734e-07, + "loss": 0.7845, + "num_input_tokens_seen": 1763776, + "step": 560 + }, + { + "epoch": 0.03616925933038858, + "grad_norm": 70.09086608886719, + "learning_rate": 3.610293176289848e-07, + "loss": 0.7543, + "num_input_tokens_seen": 1780160, + "step": 565 + }, + { + "epoch": 0.036489341271365466, + "grad_norm": 57.85261535644531, + "learning_rate": 3.642299321469722e-07, + "loss": 0.6669, + "num_input_tokens_seen": 1795968, + "step": 570 + }, + { + "epoch": 0.03680942321234236, + "grad_norm": 54.24429702758789, + "learning_rate": 3.674305466649597e-07, + "loss": 0.667, + "num_input_tokens_seen": 1815424, + "step": 575 + }, + { + "epoch": 0.037129505153319246, + "grad_norm": 83.34840393066406, + "learning_rate": 3.7063116118294716e-07, + "loss": 0.8525, + "num_input_tokens_seen": 1831936, + "step": 580 + }, + { + "epoch": 0.03744958709429614, + "grad_norm": 36.585693359375, + "learning_rate": 3.7383177570093457e-07, + "loss": 0.7031, + "num_input_tokens_seen": 1847424, + "step": 585 + }, + { + "epoch": 0.03776966903527303, + "grad_norm": 54.43415451049805, + "learning_rate": 3.7703239021892204e-07, + "loss": 0.7892, + "num_input_tokens_seen": 1862400, + "step": 590 + }, + { + "epoch": 0.03808975097624992, + "grad_norm": 85.02204132080078, + "learning_rate": 3.8023300473690956e-07, + "loss": 0.7662, + "num_input_tokens_seen": 1876928, + "step": 595 + }, + { + "epoch": 0.03840983291722681, + "grad_norm": 65.5552978515625, + "learning_rate": 3.834336192548969e-07, + "loss": 0.6976, + "num_input_tokens_seen": 1892608, + "step": 600 + }, + { + "epoch": 0.0387299148582037, + "grad_norm": 34.3391227722168, + "learning_rate": 3.8663423377288444e-07, + "loss": 0.7447, + "num_input_tokens_seen": 1909696, + "step": 605 + }, + { + "epoch": 0.03904999679918059, + "grad_norm": 60.28556442260742, + "learning_rate": 3.898348482908719e-07, + "loss": 0.8291, + "num_input_tokens_seen": 1924864, + "step": 610 + }, + { + "epoch": 0.03937007874015748, + "grad_norm": 56.60572052001953, + "learning_rate": 3.930354628088593e-07, + "loss": 0.5963, + "num_input_tokens_seen": 1939968, + "step": 615 + }, + { + "epoch": 0.03969016068113437, + "grad_norm": 102.64276123046875, + "learning_rate": 3.962360773268468e-07, + "loss": 0.7966, + "num_input_tokens_seen": 1955136, + "step": 620 + }, + { + "epoch": 0.04001024262211126, + "grad_norm": 43.5626106262207, + "learning_rate": 3.9943669184483426e-07, + "loss": 0.7001, + "num_input_tokens_seen": 1970880, + "step": 625 + }, + { + "epoch": 0.04033032456308815, + "grad_norm": 60.98816680908203, + "learning_rate": 4.0263730636282167e-07, + "loss": 0.6206, + "num_input_tokens_seen": 1986752, + "step": 630 + }, + { + "epoch": 0.04065040650406504, + "grad_norm": 55.84475326538086, + "learning_rate": 4.0583792088080914e-07, + "loss": 0.6489, + "num_input_tokens_seen": 2001856, + "step": 635 + }, + { + "epoch": 0.04097048844504193, + "grad_norm": 60.64847946166992, + "learning_rate": 4.090385353987966e-07, + "loss": 0.686, + "num_input_tokens_seen": 2019968, + "step": 640 + }, + { + "epoch": 0.04129057038601882, + "grad_norm": 72.6482162475586, + "learning_rate": 4.12239149916784e-07, + "loss": 0.7926, + "num_input_tokens_seen": 2035328, + "step": 645 + }, + { + "epoch": 0.04161065232699571, + "grad_norm": 57.449119567871094, + "learning_rate": 4.154397644347715e-07, + "loss": 0.6748, + "num_input_tokens_seen": 2055168, + "step": 650 + }, + { + "epoch": 0.0419307342679726, + "grad_norm": 56.275997161865234, + "learning_rate": 4.18640378952759e-07, + "loss": 0.7494, + "num_input_tokens_seen": 2071808, + "step": 655 + }, + { + "epoch": 0.04225081620894949, + "grad_norm": 54.97762680053711, + "learning_rate": 4.2184099347074637e-07, + "loss": 0.6756, + "num_input_tokens_seen": 2087424, + "step": 660 + }, + { + "epoch": 0.04257089814992638, + "grad_norm": 39.03304672241211, + "learning_rate": 4.250416079887339e-07, + "loss": 0.8031, + "num_input_tokens_seen": 2102592, + "step": 665 + }, + { + "epoch": 0.04289098009090327, + "grad_norm": 69.05823516845703, + "learning_rate": 4.2824222250672136e-07, + "loss": 0.7141, + "num_input_tokens_seen": 2119488, + "step": 670 + }, + { + "epoch": 0.04321106203188016, + "grad_norm": 43.34394836425781, + "learning_rate": 4.3144283702470877e-07, + "loss": 0.6826, + "num_input_tokens_seen": 2136000, + "step": 675 + }, + { + "epoch": 0.04353114397285705, + "grad_norm": 56.64551544189453, + "learning_rate": 4.3464345154269624e-07, + "loss": 0.8482, + "num_input_tokens_seen": 2152448, + "step": 680 + }, + { + "epoch": 0.04385122591383394, + "grad_norm": 52.42671203613281, + "learning_rate": 4.378440660606837e-07, + "loss": 0.6599, + "num_input_tokens_seen": 2168000, + "step": 685 + }, + { + "epoch": 0.04417130785481083, + "grad_norm": 47.8173828125, + "learning_rate": 4.410446805786711e-07, + "loss": 0.6298, + "num_input_tokens_seen": 2183552, + "step": 690 + }, + { + "epoch": 0.04449138979578772, + "grad_norm": 61.49156188964844, + "learning_rate": 4.442452950966586e-07, + "loss": 0.7726, + "num_input_tokens_seen": 2199488, + "step": 695 + }, + { + "epoch": 0.04481147173676461, + "grad_norm": 44.134681701660156, + "learning_rate": 4.4744590961464605e-07, + "loss": 0.7076, + "num_input_tokens_seen": 2215296, + "step": 700 + }, + { + "epoch": 0.0451315536777415, + "grad_norm": 56.93507766723633, + "learning_rate": 4.5064652413263347e-07, + "loss": 0.7781, + "num_input_tokens_seen": 2230016, + "step": 705 + }, + { + "epoch": 0.04545163561871839, + "grad_norm": 53.70526123046875, + "learning_rate": 4.5384713865062093e-07, + "loss": 0.7446, + "num_input_tokens_seen": 2245056, + "step": 710 + }, + { + "epoch": 0.04577171755969528, + "grad_norm": 43.36274337768555, + "learning_rate": 4.5704775316860845e-07, + "loss": 0.6779, + "num_input_tokens_seen": 2261248, + "step": 715 + }, + { + "epoch": 0.04609179950067217, + "grad_norm": 54.705894470214844, + "learning_rate": 4.602483676865958e-07, + "loss": 0.6933, + "num_input_tokens_seen": 2278016, + "step": 720 + }, + { + "epoch": 0.04641188144164906, + "grad_norm": 51.452781677246094, + "learning_rate": 4.6344898220458334e-07, + "loss": 0.5873, + "num_input_tokens_seen": 2292800, + "step": 725 + }, + { + "epoch": 0.04673196338262595, + "grad_norm": 41.64985656738281, + "learning_rate": 4.666495967225708e-07, + "loss": 0.6548, + "num_input_tokens_seen": 2308224, + "step": 730 + }, + { + "epoch": 0.04705204532360284, + "grad_norm": 51.33768081665039, + "learning_rate": 4.698502112405582e-07, + "loss": 0.7381, + "num_input_tokens_seen": 2325760, + "step": 735 + }, + { + "epoch": 0.04737212726457973, + "grad_norm": 70.91117858886719, + "learning_rate": 4.730508257585457e-07, + "loss": 0.748, + "num_input_tokens_seen": 2341632, + "step": 740 + }, + { + "epoch": 0.04769220920555662, + "grad_norm": 43.42658233642578, + "learning_rate": 4.7625144027653315e-07, + "loss": 0.7587, + "num_input_tokens_seen": 2357504, + "step": 745 + }, + { + "epoch": 0.04801229114653351, + "grad_norm": 45.304691314697266, + "learning_rate": 4.794520547945206e-07, + "loss": 0.7956, + "num_input_tokens_seen": 2372608, + "step": 750 + }, + { + "epoch": 0.0483323730875104, + "grad_norm": 46.09050750732422, + "learning_rate": 4.82652669312508e-07, + "loss": 0.6417, + "num_input_tokens_seen": 2388352, + "step": 755 + }, + { + "epoch": 0.04865245502848729, + "grad_norm": 44.028072357177734, + "learning_rate": 4.858532838304955e-07, + "loss": 0.7503, + "num_input_tokens_seen": 2404480, + "step": 760 + }, + { + "epoch": 0.04897253696946418, + "grad_norm": 33.77079391479492, + "learning_rate": 4.89053898348483e-07, + "loss": 0.5662, + "num_input_tokens_seen": 2419648, + "step": 765 + }, + { + "epoch": 0.04929261891044107, + "grad_norm": 47.61936950683594, + "learning_rate": 4.922545128664704e-07, + "loss": 0.747, + "num_input_tokens_seen": 2435584, + "step": 770 + }, + { + "epoch": 0.04961270085141796, + "grad_norm": 46.914703369140625, + "learning_rate": 4.954551273844579e-07, + "loss": 0.6679, + "num_input_tokens_seen": 2451072, + "step": 775 + }, + { + "epoch": 0.04993278279239485, + "grad_norm": 65.14242553710938, + "learning_rate": 4.986557419024453e-07, + "loss": 0.7975, + "num_input_tokens_seen": 2467968, + "step": 780 + }, + { + "epoch": 0.05025286473337175, + "grad_norm": 62.60467529296875, + "learning_rate": 5.018563564204327e-07, + "loss": 0.6755, + "num_input_tokens_seen": 2484928, + "step": 785 + }, + { + "epoch": 0.05057294667434863, + "grad_norm": 50.371490478515625, + "learning_rate": 5.050569709384202e-07, + "loss": 0.6544, + "num_input_tokens_seen": 2501504, + "step": 790 + }, + { + "epoch": 0.050893028615325527, + "grad_norm": 40.66035461425781, + "learning_rate": 5.082575854564077e-07, + "loss": 0.5379, + "num_input_tokens_seen": 2518848, + "step": 795 + }, + { + "epoch": 0.05121311055630241, + "grad_norm": 40.596134185791016, + "learning_rate": 5.114581999743951e-07, + "loss": 0.6548, + "num_input_tokens_seen": 2535680, + "step": 800 + }, + { + "epoch": 0.051533192497279307, + "grad_norm": 61.10208511352539, + "learning_rate": 5.146588144923826e-07, + "loss": 0.7033, + "num_input_tokens_seen": 2550976, + "step": 805 + }, + { + "epoch": 0.05185327443825619, + "grad_norm": 50.96018600463867, + "learning_rate": 5.1785942901037e-07, + "loss": 0.6126, + "num_input_tokens_seen": 2566656, + "step": 810 + }, + { + "epoch": 0.052173356379233086, + "grad_norm": 52.809627532958984, + "learning_rate": 5.210600435283575e-07, + "loss": 0.6814, + "num_input_tokens_seen": 2581568, + "step": 815 + }, + { + "epoch": 0.05249343832020997, + "grad_norm": 51.88179016113281, + "learning_rate": 5.24260658046345e-07, + "loss": 0.6776, + "num_input_tokens_seen": 2596608, + "step": 820 + }, + { + "epoch": 0.052813520261186866, + "grad_norm": 49.88214111328125, + "learning_rate": 5.274612725643324e-07, + "loss": 0.6859, + "num_input_tokens_seen": 2612032, + "step": 825 + }, + { + "epoch": 0.05313360220216375, + "grad_norm": 70.92676544189453, + "learning_rate": 5.306618870823198e-07, + "loss": 0.8132, + "num_input_tokens_seen": 2627264, + "step": 830 + }, + { + "epoch": 0.053453684143140646, + "grad_norm": 54.18450927734375, + "learning_rate": 5.338625016003073e-07, + "loss": 0.6381, + "num_input_tokens_seen": 2643264, + "step": 835 + }, + { + "epoch": 0.05377376608411753, + "grad_norm": 52.33003616333008, + "learning_rate": 5.370631161182948e-07, + "loss": 0.643, + "num_input_tokens_seen": 2659264, + "step": 840 + }, + { + "epoch": 0.054093848025094426, + "grad_norm": 50.06191635131836, + "learning_rate": 5.402637306362822e-07, + "loss": 0.5987, + "num_input_tokens_seen": 2673856, + "step": 845 + }, + { + "epoch": 0.05441392996607131, + "grad_norm": 41.82462692260742, + "learning_rate": 5.434643451542697e-07, + "loss": 0.5327, + "num_input_tokens_seen": 2688448, + "step": 850 + }, + { + "epoch": 0.054734011907048206, + "grad_norm": 74.65840148925781, + "learning_rate": 5.466649596722571e-07, + "loss": 0.6485, + "num_input_tokens_seen": 2703872, + "step": 855 + }, + { + "epoch": 0.05505409384802509, + "grad_norm": 70.52397155761719, + "learning_rate": 5.498655741902445e-07, + "loss": 0.6576, + "num_input_tokens_seen": 2719040, + "step": 860 + }, + { + "epoch": 0.055374175789001986, + "grad_norm": 45.35000991821289, + "learning_rate": 5.530661887082321e-07, + "loss": 0.6156, + "num_input_tokens_seen": 2735168, + "step": 865 + }, + { + "epoch": 0.05569425772997887, + "grad_norm": 42.28901672363281, + "learning_rate": 5.562668032262195e-07, + "loss": 0.5268, + "num_input_tokens_seen": 2750592, + "step": 870 + }, + { + "epoch": 0.056014339670955766, + "grad_norm": 35.01417922973633, + "learning_rate": 5.594674177442069e-07, + "loss": 0.6933, + "num_input_tokens_seen": 2767232, + "step": 875 + }, + { + "epoch": 0.05633442161193265, + "grad_norm": 62.38637924194336, + "learning_rate": 5.626680322621944e-07, + "loss": 0.7287, + "num_input_tokens_seen": 2784768, + "step": 880 + }, + { + "epoch": 0.056654503552909546, + "grad_norm": 45.86594772338867, + "learning_rate": 5.658686467801819e-07, + "loss": 0.6941, + "num_input_tokens_seen": 2799872, + "step": 885 + }, + { + "epoch": 0.05697458549388643, + "grad_norm": 56.592227935791016, + "learning_rate": 5.690692612981693e-07, + "loss": 0.6689, + "num_input_tokens_seen": 2816000, + "step": 890 + }, + { + "epoch": 0.057294667434863326, + "grad_norm": 53.07893753051758, + "learning_rate": 5.722698758161568e-07, + "loss": 0.6694, + "num_input_tokens_seen": 2831744, + "step": 895 + }, + { + "epoch": 0.05761474937584021, + "grad_norm": 48.84443283081055, + "learning_rate": 5.754704903341442e-07, + "loss": 0.7011, + "num_input_tokens_seen": 2847424, + "step": 900 + }, + { + "epoch": 0.057934831316817106, + "grad_norm": 60.7545166015625, + "learning_rate": 5.786711048521316e-07, + "loss": 0.738, + "num_input_tokens_seen": 2862272, + "step": 905 + }, + { + "epoch": 0.05825491325779399, + "grad_norm": 43.08628845214844, + "learning_rate": 5.818717193701191e-07, + "loss": 0.524, + "num_input_tokens_seen": 2877120, + "step": 910 + }, + { + "epoch": 0.058574995198770886, + "grad_norm": 43.929237365722656, + "learning_rate": 5.850723338881066e-07, + "loss": 0.6219, + "num_input_tokens_seen": 2894592, + "step": 915 + }, + { + "epoch": 0.05889507713974777, + "grad_norm": 43.51637268066406, + "learning_rate": 5.88272948406094e-07, + "loss": 0.6786, + "num_input_tokens_seen": 2909888, + "step": 920 + }, + { + "epoch": 0.059215159080724666, + "grad_norm": 49.569942474365234, + "learning_rate": 5.914735629240815e-07, + "loss": 0.7083, + "num_input_tokens_seen": 2925632, + "step": 925 + }, + { + "epoch": 0.05953524102170155, + "grad_norm": 45.110687255859375, + "learning_rate": 5.946741774420689e-07, + "loss": 0.658, + "num_input_tokens_seen": 2941760, + "step": 930 + }, + { + "epoch": 0.059855322962678446, + "grad_norm": 46.60281753540039, + "learning_rate": 5.978747919600564e-07, + "loss": 0.814, + "num_input_tokens_seen": 2957376, + "step": 935 + }, + { + "epoch": 0.06017540490365533, + "grad_norm": 63.01597213745117, + "learning_rate": 6.010754064780439e-07, + "loss": 0.6456, + "num_input_tokens_seen": 2972800, + "step": 940 + }, + { + "epoch": 0.060495486844632226, + "grad_norm": 54.9277458190918, + "learning_rate": 6.042760209960313e-07, + "loss": 0.8423, + "num_input_tokens_seen": 2988480, + "step": 945 + }, + { + "epoch": 0.06081556878560911, + "grad_norm": 41.451438903808594, + "learning_rate": 6.074766355140187e-07, + "loss": 0.7039, + "num_input_tokens_seen": 3004480, + "step": 950 + }, + { + "epoch": 0.061135650726586006, + "grad_norm": 51.21691131591797, + "learning_rate": 6.106772500320062e-07, + "loss": 0.5306, + "num_input_tokens_seen": 3020288, + "step": 955 + }, + { + "epoch": 0.06145573266756289, + "grad_norm": 51.52146911621094, + "learning_rate": 6.138778645499937e-07, + "loss": 0.6458, + "num_input_tokens_seen": 3035968, + "step": 960 + }, + { + "epoch": 0.061775814608539786, + "grad_norm": 71.30914306640625, + "learning_rate": 6.170784790679811e-07, + "loss": 0.8471, + "num_input_tokens_seen": 3051776, + "step": 965 + }, + { + "epoch": 0.06209589654951667, + "grad_norm": 73.83409881591797, + "learning_rate": 6.202790935859686e-07, + "loss": 0.6216, + "num_input_tokens_seen": 3066560, + "step": 970 + }, + { + "epoch": 0.062415978490493566, + "grad_norm": 56.74808883666992, + "learning_rate": 6.23479708103956e-07, + "loss": 0.5871, + "num_input_tokens_seen": 3082496, + "step": 975 + }, + { + "epoch": 0.06273606043147045, + "grad_norm": 48.36574172973633, + "learning_rate": 6.266803226219435e-07, + "loss": 0.66, + "num_input_tokens_seen": 3097856, + "step": 980 + }, + { + "epoch": 0.06305614237244735, + "grad_norm": 40.88313674926758, + "learning_rate": 6.298809371399309e-07, + "loss": 0.537, + "num_input_tokens_seen": 3113664, + "step": 985 + }, + { + "epoch": 0.06337622431342424, + "grad_norm": 40.82008361816406, + "learning_rate": 6.330815516579184e-07, + "loss": 0.6638, + "num_input_tokens_seen": 3129792, + "step": 990 + }, + { + "epoch": 0.06369630625440113, + "grad_norm": 33.187496185302734, + "learning_rate": 6.362821661759058e-07, + "loss": 0.7097, + "num_input_tokens_seen": 3145024, + "step": 995 + }, + { + "epoch": 0.06401638819537801, + "grad_norm": 49.209102630615234, + "learning_rate": 6.394827806938933e-07, + "loss": 0.7786, + "num_input_tokens_seen": 3161216, + "step": 1000 + }, + { + "epoch": 0.0643364701363549, + "grad_norm": 35.893821716308594, + "learning_rate": 6.426833952118808e-07, + "loss": 0.4931, + "num_input_tokens_seen": 3176960, + "step": 1005 + }, + { + "epoch": 0.0646565520773318, + "grad_norm": 43.26754379272461, + "learning_rate": 6.458840097298682e-07, + "loss": 0.6856, + "num_input_tokens_seen": 3193088, + "step": 1010 + }, + { + "epoch": 0.0649766340183087, + "grad_norm": 59.718223571777344, + "learning_rate": 6.490846242478556e-07, + "loss": 0.7215, + "num_input_tokens_seen": 3210112, + "step": 1015 + }, + { + "epoch": 0.06529671595928557, + "grad_norm": 41.272186279296875, + "learning_rate": 6.52285238765843e-07, + "loss": 0.5505, + "num_input_tokens_seen": 3224768, + "step": 1020 + }, + { + "epoch": 0.06561679790026247, + "grad_norm": 40.542118072509766, + "learning_rate": 6.554858532838305e-07, + "loss": 0.6565, + "num_input_tokens_seen": 3240128, + "step": 1025 + }, + { + "epoch": 0.06593687984123936, + "grad_norm": 47.87497329711914, + "learning_rate": 6.58686467801818e-07, + "loss": 0.6695, + "num_input_tokens_seen": 3256576, + "step": 1030 + }, + { + "epoch": 0.06625696178221625, + "grad_norm": 45.24762725830078, + "learning_rate": 6.618870823198055e-07, + "loss": 0.6371, + "num_input_tokens_seen": 3272384, + "step": 1035 + }, + { + "epoch": 0.06657704372319313, + "grad_norm": 35.92161178588867, + "learning_rate": 6.65087696837793e-07, + "loss": 0.4953, + "num_input_tokens_seen": 3288512, + "step": 1040 + }, + { + "epoch": 0.06689712566417003, + "grad_norm": 57.04534149169922, + "learning_rate": 6.682883113557803e-07, + "loss": 0.5671, + "num_input_tokens_seen": 3306304, + "step": 1045 + }, + { + "epoch": 0.06721720760514692, + "grad_norm": 46.77021026611328, + "learning_rate": 6.714889258737677e-07, + "loss": 0.5265, + "num_input_tokens_seen": 3321344, + "step": 1050 + }, + { + "epoch": 0.06753728954612381, + "grad_norm": 42.27693176269531, + "learning_rate": 6.746895403917553e-07, + "loss": 0.6922, + "num_input_tokens_seen": 3338560, + "step": 1055 + }, + { + "epoch": 0.06785737148710069, + "grad_norm": 42.63730239868164, + "learning_rate": 6.778901549097428e-07, + "loss": 0.5338, + "num_input_tokens_seen": 3353152, + "step": 1060 + }, + { + "epoch": 0.06817745342807759, + "grad_norm": 49.433223724365234, + "learning_rate": 6.810907694277303e-07, + "loss": 0.517, + "num_input_tokens_seen": 3369536, + "step": 1065 + }, + { + "epoch": 0.06849753536905448, + "grad_norm": 50.5761604309082, + "learning_rate": 6.842913839457177e-07, + "loss": 0.6104, + "num_input_tokens_seen": 3384832, + "step": 1070 + }, + { + "epoch": 0.06881761731003137, + "grad_norm": 36.79216766357422, + "learning_rate": 6.874919984637051e-07, + "loss": 0.5292, + "num_input_tokens_seen": 3399424, + "step": 1075 + }, + { + "epoch": 0.06913769925100825, + "grad_norm": 36.427913665771484, + "learning_rate": 6.906926129816925e-07, + "loss": 0.697, + "num_input_tokens_seen": 3416704, + "step": 1080 + }, + { + "epoch": 0.06945778119198515, + "grad_norm": 61.8076057434082, + "learning_rate": 6.9389322749968e-07, + "loss": 0.649, + "num_input_tokens_seen": 3431552, + "step": 1085 + }, + { + "epoch": 0.06977786313296204, + "grad_norm": 42.833839416503906, + "learning_rate": 6.970938420176675e-07, + "loss": 0.72, + "num_input_tokens_seen": 3447488, + "step": 1090 + }, + { + "epoch": 0.07009794507393893, + "grad_norm": 46.1790657043457, + "learning_rate": 7.00294456535655e-07, + "loss": 0.5332, + "num_input_tokens_seen": 3463424, + "step": 1095 + }, + { + "epoch": 0.07041802701491581, + "grad_norm": 55.056427001953125, + "learning_rate": 7.034950710536424e-07, + "loss": 0.6978, + "num_input_tokens_seen": 3479680, + "step": 1100 + }, + { + "epoch": 0.0707381089558927, + "grad_norm": 52.99421691894531, + "learning_rate": 7.066956855716298e-07, + "loss": 0.5592, + "num_input_tokens_seen": 3495552, + "step": 1105 + }, + { + "epoch": 0.0710581908968696, + "grad_norm": 57.095001220703125, + "learning_rate": 7.098963000896172e-07, + "loss": 0.5827, + "num_input_tokens_seen": 3510976, + "step": 1110 + }, + { + "epoch": 0.0713782728378465, + "grad_norm": 55.853031158447266, + "learning_rate": 7.130969146076047e-07, + "loss": 0.6853, + "num_input_tokens_seen": 3526016, + "step": 1115 + }, + { + "epoch": 0.07169835477882337, + "grad_norm": 25.43338966369629, + "learning_rate": 7.162975291255922e-07, + "loss": 0.591, + "num_input_tokens_seen": 3540544, + "step": 1120 + }, + { + "epoch": 0.07201843671980027, + "grad_norm": 58.295326232910156, + "learning_rate": 7.194981436435796e-07, + "loss": 0.618, + "num_input_tokens_seen": 3556416, + "step": 1125 + }, + { + "epoch": 0.07233851866077716, + "grad_norm": 56.83702850341797, + "learning_rate": 7.226987581615671e-07, + "loss": 0.5904, + "num_input_tokens_seen": 3572096, + "step": 1130 + }, + { + "epoch": 0.07265860060175405, + "grad_norm": 63.959800720214844, + "learning_rate": 7.258993726795545e-07, + "loss": 0.5448, + "num_input_tokens_seen": 3587712, + "step": 1135 + }, + { + "epoch": 0.07297868254273093, + "grad_norm": 37.382205963134766, + "learning_rate": 7.290999871975419e-07, + "loss": 0.6193, + "num_input_tokens_seen": 3605056, + "step": 1140 + }, + { + "epoch": 0.07329876448370783, + "grad_norm": 48.51614761352539, + "learning_rate": 7.323006017155294e-07, + "loss": 0.7339, + "num_input_tokens_seen": 3621184, + "step": 1145 + }, + { + "epoch": 0.07361884642468472, + "grad_norm": 33.038692474365234, + "learning_rate": 7.355012162335169e-07, + "loss": 0.6294, + "num_input_tokens_seen": 3635392, + "step": 1150 + }, + { + "epoch": 0.07393892836566161, + "grad_norm": 32.9710578918457, + "learning_rate": 7.387018307515043e-07, + "loss": 0.5553, + "num_input_tokens_seen": 3649984, + "step": 1155 + }, + { + "epoch": 0.07425901030663849, + "grad_norm": 44.39550018310547, + "learning_rate": 7.419024452694919e-07, + "loss": 0.6221, + "num_input_tokens_seen": 3665920, + "step": 1160 + }, + { + "epoch": 0.07457909224761539, + "grad_norm": 35.02703094482422, + "learning_rate": 7.451030597874792e-07, + "loss": 0.4922, + "num_input_tokens_seen": 3680256, + "step": 1165 + }, + { + "epoch": 0.07489917418859228, + "grad_norm": 46.17085266113281, + "learning_rate": 7.483036743054666e-07, + "loss": 0.5405, + "num_input_tokens_seen": 3697536, + "step": 1170 + }, + { + "epoch": 0.07521925612956917, + "grad_norm": 35.6434326171875, + "learning_rate": 7.515042888234542e-07, + "loss": 0.659, + "num_input_tokens_seen": 3713088, + "step": 1175 + }, + { + "epoch": 0.07553933807054607, + "grad_norm": 62.18160629272461, + "learning_rate": 7.547049033414417e-07, + "loss": 0.6654, + "num_input_tokens_seen": 3729920, + "step": 1180 + }, + { + "epoch": 0.07585942001152295, + "grad_norm": 56.37630844116211, + "learning_rate": 7.579055178594291e-07, + "loss": 0.5656, + "num_input_tokens_seen": 3745664, + "step": 1185 + }, + { + "epoch": 0.07617950195249984, + "grad_norm": 43.3575439453125, + "learning_rate": 7.611061323774166e-07, + "loss": 0.5935, + "num_input_tokens_seen": 3760576, + "step": 1190 + }, + { + "epoch": 0.07649958389347673, + "grad_norm": 33.00615310668945, + "learning_rate": 7.64306746895404e-07, + "loss": 0.5309, + "num_input_tokens_seen": 3776576, + "step": 1195 + }, + { + "epoch": 0.07681966583445363, + "grad_norm": 61.66545486450195, + "learning_rate": 7.675073614133914e-07, + "loss": 0.6443, + "num_input_tokens_seen": 3792384, + "step": 1200 + }, + { + "epoch": 0.0771397477754305, + "grad_norm": 48.94987487792969, + "learning_rate": 7.707079759313789e-07, + "loss": 0.6309, + "num_input_tokens_seen": 3806592, + "step": 1205 + }, + { + "epoch": 0.0774598297164074, + "grad_norm": 46.02937698364258, + "learning_rate": 7.739085904493664e-07, + "loss": 0.6041, + "num_input_tokens_seen": 3822080, + "step": 1210 + }, + { + "epoch": 0.07777991165738429, + "grad_norm": 49.60054397583008, + "learning_rate": 7.771092049673538e-07, + "loss": 0.5811, + "num_input_tokens_seen": 3837120, + "step": 1215 + }, + { + "epoch": 0.07809999359836119, + "grad_norm": 43.28769302368164, + "learning_rate": 7.803098194853412e-07, + "loss": 0.7375, + "num_input_tokens_seen": 3852864, + "step": 1220 + }, + { + "epoch": 0.07842007553933807, + "grad_norm": 34.01580810546875, + "learning_rate": 7.835104340033287e-07, + "loss": 0.5147, + "num_input_tokens_seen": 3869184, + "step": 1225 + }, + { + "epoch": 0.07874015748031496, + "grad_norm": 39.29686737060547, + "learning_rate": 7.867110485213161e-07, + "loss": 0.5631, + "num_input_tokens_seen": 3885248, + "step": 1230 + }, + { + "epoch": 0.07906023942129185, + "grad_norm": 37.10702133178711, + "learning_rate": 7.899116630393036e-07, + "loss": 0.4543, + "num_input_tokens_seen": 3900416, + "step": 1235 + }, + { + "epoch": 0.07938032136226875, + "grad_norm": 42.77960205078125, + "learning_rate": 7.931122775572911e-07, + "loss": 0.5628, + "num_input_tokens_seen": 3916096, + "step": 1240 + }, + { + "epoch": 0.07970040330324563, + "grad_norm": 64.74522399902344, + "learning_rate": 7.963128920752785e-07, + "loss": 0.6837, + "num_input_tokens_seen": 3933312, + "step": 1245 + }, + { + "epoch": 0.08002048524422252, + "grad_norm": 110.02814483642578, + "learning_rate": 7.995135065932659e-07, + "loss": 0.6801, + "num_input_tokens_seen": 3949440, + "step": 1250 + }, + { + "epoch": 0.08034056718519941, + "grad_norm": 53.23227310180664, + "learning_rate": 8.027141211112534e-07, + "loss": 0.6986, + "num_input_tokens_seen": 3964992, + "step": 1255 + }, + { + "epoch": 0.0806606491261763, + "grad_norm": 60.859100341796875, + "learning_rate": 8.059147356292408e-07, + "loss": 0.7041, + "num_input_tokens_seen": 3981696, + "step": 1260 + }, + { + "epoch": 0.08098073106715319, + "grad_norm": 51.11474609375, + "learning_rate": 8.091153501472283e-07, + "loss": 0.6558, + "num_input_tokens_seen": 3997248, + "step": 1265 + }, + { + "epoch": 0.08130081300813008, + "grad_norm": 31.851579666137695, + "learning_rate": 8.123159646652158e-07, + "loss": 0.6925, + "num_input_tokens_seen": 4011648, + "step": 1270 + }, + { + "epoch": 0.08162089494910697, + "grad_norm": 62.27423095703125, + "learning_rate": 8.155165791832032e-07, + "loss": 0.5155, + "num_input_tokens_seen": 4028160, + "step": 1275 + }, + { + "epoch": 0.08194097689008387, + "grad_norm": 53.183143615722656, + "learning_rate": 8.187171937011906e-07, + "loss": 0.6299, + "num_input_tokens_seen": 4043584, + "step": 1280 + }, + { + "epoch": 0.08226105883106075, + "grad_norm": 51.07596206665039, + "learning_rate": 8.219178082191781e-07, + "loss": 0.7085, + "num_input_tokens_seen": 4059456, + "step": 1285 + }, + { + "epoch": 0.08258114077203764, + "grad_norm": 26.739032745361328, + "learning_rate": 8.251184227371655e-07, + "loss": 0.5905, + "num_input_tokens_seen": 4076096, + "step": 1290 + }, + { + "epoch": 0.08290122271301453, + "grad_norm": 45.553611755371094, + "learning_rate": 8.283190372551531e-07, + "loss": 0.6022, + "num_input_tokens_seen": 4093568, + "step": 1295 + }, + { + "epoch": 0.08322130465399143, + "grad_norm": 38.94580078125, + "learning_rate": 8.315196517731406e-07, + "loss": 0.6345, + "num_input_tokens_seen": 4108864, + "step": 1300 + }, + { + "epoch": 0.0835413865949683, + "grad_norm": 44.810638427734375, + "learning_rate": 8.34720266291128e-07, + "loss": 0.6653, + "num_input_tokens_seen": 4124224, + "step": 1305 + }, + { + "epoch": 0.0838614685359452, + "grad_norm": 49.50967025756836, + "learning_rate": 8.379208808091154e-07, + "loss": 0.5601, + "num_input_tokens_seen": 4139008, + "step": 1310 + }, + { + "epoch": 0.08418155047692209, + "grad_norm": 40.894466400146484, + "learning_rate": 8.411214953271029e-07, + "loss": 0.5444, + "num_input_tokens_seen": 4155008, + "step": 1315 + }, + { + "epoch": 0.08450163241789899, + "grad_norm": 33.00490188598633, + "learning_rate": 8.443221098450903e-07, + "loss": 0.5153, + "num_input_tokens_seen": 4172544, + "step": 1320 + }, + { + "epoch": 0.08482171435887587, + "grad_norm": 48.815155029296875, + "learning_rate": 8.475227243630778e-07, + "loss": 0.7105, + "num_input_tokens_seen": 4188416, + "step": 1325 + }, + { + "epoch": 0.08514179629985276, + "grad_norm": 26.39876365661621, + "learning_rate": 8.507233388810653e-07, + "loss": 0.5091, + "num_input_tokens_seen": 4202560, + "step": 1330 + }, + { + "epoch": 0.08546187824082965, + "grad_norm": 55.284690856933594, + "learning_rate": 8.539239533990527e-07, + "loss": 0.5489, + "num_input_tokens_seen": 4219392, + "step": 1335 + }, + { + "epoch": 0.08578196018180655, + "grad_norm": 57.29672622680664, + "learning_rate": 8.571245679170401e-07, + "loss": 0.5204, + "num_input_tokens_seen": 4235328, + "step": 1340 + }, + { + "epoch": 0.08610204212278343, + "grad_norm": 57.691734313964844, + "learning_rate": 8.603251824350276e-07, + "loss": 0.5803, + "num_input_tokens_seen": 4250368, + "step": 1345 + }, + { + "epoch": 0.08642212406376032, + "grad_norm": 32.90237808227539, + "learning_rate": 8.63525796953015e-07, + "loss": 0.5222, + "num_input_tokens_seen": 4265856, + "step": 1350 + }, + { + "epoch": 0.08674220600473721, + "grad_norm": 36.315696716308594, + "learning_rate": 8.667264114710025e-07, + "loss": 0.6881, + "num_input_tokens_seen": 4281792, + "step": 1355 + }, + { + "epoch": 0.0870622879457141, + "grad_norm": 39.44912338256836, + "learning_rate": 8.6992702598899e-07, + "loss": 0.624, + "num_input_tokens_seen": 4297088, + "step": 1360 + }, + { + "epoch": 0.087382369886691, + "grad_norm": 41.882659912109375, + "learning_rate": 8.731276405069774e-07, + "loss": 0.549, + "num_input_tokens_seen": 4312192, + "step": 1365 + }, + { + "epoch": 0.08770245182766788, + "grad_norm": 32.866432189941406, + "learning_rate": 8.763282550249648e-07, + "loss": 0.5303, + "num_input_tokens_seen": 4326720, + "step": 1370 + }, + { + "epoch": 0.08802253376864477, + "grad_norm": 63.38829803466797, + "learning_rate": 8.795288695429523e-07, + "loss": 0.7837, + "num_input_tokens_seen": 4341760, + "step": 1375 + }, + { + "epoch": 0.08834261570962167, + "grad_norm": 46.85302734375, + "learning_rate": 8.827294840609397e-07, + "loss": 0.6399, + "num_input_tokens_seen": 4357760, + "step": 1380 + }, + { + "epoch": 0.08866269765059856, + "grad_norm": 45.806060791015625, + "learning_rate": 8.859300985789272e-07, + "loss": 0.6436, + "num_input_tokens_seen": 4373824, + "step": 1385 + }, + { + "epoch": 0.08898277959157544, + "grad_norm": 42.48093795776367, + "learning_rate": 8.891307130969147e-07, + "loss": 0.4859, + "num_input_tokens_seen": 4388992, + "step": 1390 + }, + { + "epoch": 0.08930286153255233, + "grad_norm": 37.50892639160156, + "learning_rate": 8.923313276149021e-07, + "loss": 0.5303, + "num_input_tokens_seen": 4404288, + "step": 1395 + }, + { + "epoch": 0.08962294347352923, + "grad_norm": 56.63300704956055, + "learning_rate": 8.955319421328895e-07, + "loss": 0.5821, + "num_input_tokens_seen": 4419840, + "step": 1400 + }, + { + "epoch": 0.08994302541450612, + "grad_norm": 53.720794677734375, + "learning_rate": 8.98732556650877e-07, + "loss": 0.5764, + "num_input_tokens_seen": 4435200, + "step": 1405 + }, + { + "epoch": 0.090263107355483, + "grad_norm": 60.1173095703125, + "learning_rate": 9.019331711688644e-07, + "loss": 0.6502, + "num_input_tokens_seen": 4450368, + "step": 1410 + }, + { + "epoch": 0.09058318929645989, + "grad_norm": 46.95440673828125, + "learning_rate": 9.05133785686852e-07, + "loss": 0.494, + "num_input_tokens_seen": 4466048, + "step": 1415 + }, + { + "epoch": 0.09090327123743679, + "grad_norm": 64.48873901367188, + "learning_rate": 9.083344002048395e-07, + "loss": 0.5941, + "num_input_tokens_seen": 4481920, + "step": 1420 + }, + { + "epoch": 0.09122335317841368, + "grad_norm": 84.09088134765625, + "learning_rate": 9.115350147228269e-07, + "loss": 0.6146, + "num_input_tokens_seen": 4498112, + "step": 1425 + }, + { + "epoch": 0.09154343511939056, + "grad_norm": 38.68749237060547, + "learning_rate": 9.147356292408143e-07, + "loss": 0.4945, + "num_input_tokens_seen": 4515648, + "step": 1430 + }, + { + "epoch": 0.09186351706036745, + "grad_norm": 54.44438552856445, + "learning_rate": 9.179362437588018e-07, + "loss": 0.6525, + "num_input_tokens_seen": 4531840, + "step": 1435 + }, + { + "epoch": 0.09218359900134435, + "grad_norm": 45.8602180480957, + "learning_rate": 9.211368582767892e-07, + "loss": 0.5896, + "num_input_tokens_seen": 4547456, + "step": 1440 + }, + { + "epoch": 0.09250368094232124, + "grad_norm": 47.04875564575195, + "learning_rate": 9.243374727947767e-07, + "loss": 0.5707, + "num_input_tokens_seen": 4563328, + "step": 1445 + }, + { + "epoch": 0.09282376288329812, + "grad_norm": 32.941192626953125, + "learning_rate": 9.275380873127642e-07, + "loss": 0.5985, + "num_input_tokens_seen": 4579392, + "step": 1450 + }, + { + "epoch": 0.09314384482427501, + "grad_norm": 73.8390121459961, + "learning_rate": 9.307387018307516e-07, + "loss": 0.5964, + "num_input_tokens_seen": 4595584, + "step": 1455 + }, + { + "epoch": 0.0934639267652519, + "grad_norm": 38.29906463623047, + "learning_rate": 9.33939316348739e-07, + "loss": 0.5431, + "num_input_tokens_seen": 4610112, + "step": 1460 + }, + { + "epoch": 0.0937840087062288, + "grad_norm": 69.439208984375, + "learning_rate": 9.371399308667265e-07, + "loss": 0.6868, + "num_input_tokens_seen": 4626432, + "step": 1465 + }, + { + "epoch": 0.09410409064720568, + "grad_norm": 52.77604675292969, + "learning_rate": 9.403405453847139e-07, + "loss": 0.7079, + "num_input_tokens_seen": 4641792, + "step": 1470 + }, + { + "epoch": 0.09442417258818257, + "grad_norm": 21.79796028137207, + "learning_rate": 9.435411599027014e-07, + "loss": 0.4159, + "num_input_tokens_seen": 4656896, + "step": 1475 + }, + { + "epoch": 0.09474425452915947, + "grad_norm": 49.98097610473633, + "learning_rate": 9.467417744206889e-07, + "loss": 0.6569, + "num_input_tokens_seen": 4673472, + "step": 1480 + }, + { + "epoch": 0.09506433647013636, + "grad_norm": 34.78599166870117, + "learning_rate": 9.499423889386763e-07, + "loss": 0.4387, + "num_input_tokens_seen": 4688896, + "step": 1485 + }, + { + "epoch": 0.09538441841111324, + "grad_norm": 43.45535659790039, + "learning_rate": 9.531430034566637e-07, + "loss": 0.7065, + "num_input_tokens_seen": 4704576, + "step": 1490 + }, + { + "epoch": 0.09570450035209013, + "grad_norm": 47.040870666503906, + "learning_rate": 9.563436179746512e-07, + "loss": 0.5807, + "num_input_tokens_seen": 4719040, + "step": 1495 + }, + { + "epoch": 0.09602458229306703, + "grad_norm": 31.702251434326172, + "learning_rate": 9.595442324926386e-07, + "loss": 0.5473, + "num_input_tokens_seen": 4733696, + "step": 1500 + }, + { + "epoch": 0.09634466423404392, + "grad_norm": 45.263675689697266, + "learning_rate": 9.62744847010626e-07, + "loss": 0.5672, + "num_input_tokens_seen": 4748992, + "step": 1505 + }, + { + "epoch": 0.0966647461750208, + "grad_norm": 48.82842254638672, + "learning_rate": 9.659454615286136e-07, + "loss": 0.703, + "num_input_tokens_seen": 4764992, + "step": 1510 + }, + { + "epoch": 0.09698482811599769, + "grad_norm": 56.47787857055664, + "learning_rate": 9.69146076046601e-07, + "loss": 0.6398, + "num_input_tokens_seen": 4780352, + "step": 1515 + }, + { + "epoch": 0.09730491005697459, + "grad_norm": 36.64969253540039, + "learning_rate": 9.723466905645885e-07, + "loss": 0.5853, + "num_input_tokens_seen": 4796224, + "step": 1520 + }, + { + "epoch": 0.09762499199795148, + "grad_norm": 38.10419464111328, + "learning_rate": 9.75547305082576e-07, + "loss": 0.5472, + "num_input_tokens_seen": 4811840, + "step": 1525 + }, + { + "epoch": 0.09794507393892836, + "grad_norm": 32.214229583740234, + "learning_rate": 9.787479196005634e-07, + "loss": 0.5887, + "num_input_tokens_seen": 4826432, + "step": 1530 + }, + { + "epoch": 0.09826515587990525, + "grad_norm": 41.1507453918457, + "learning_rate": 9.81948534118551e-07, + "loss": 0.4231, + "num_input_tokens_seen": 4841920, + "step": 1535 + }, + { + "epoch": 0.09858523782088215, + "grad_norm": 41.10664367675781, + "learning_rate": 9.851491486365384e-07, + "loss": 0.6139, + "num_input_tokens_seen": 4857536, + "step": 1540 + }, + { + "epoch": 0.09890531976185904, + "grad_norm": 61.70024871826172, + "learning_rate": 9.883497631545258e-07, + "loss": 0.7112, + "num_input_tokens_seen": 4873408, + "step": 1545 + }, + { + "epoch": 0.09922540170283592, + "grad_norm": 53.393367767333984, + "learning_rate": 9.91550377672513e-07, + "loss": 0.6346, + "num_input_tokens_seen": 4889536, + "step": 1550 + }, + { + "epoch": 0.09954548364381281, + "grad_norm": 53.40909194946289, + "learning_rate": 9.947509921905006e-07, + "loss": 0.5943, + "num_input_tokens_seen": 4904448, + "step": 1555 + }, + { + "epoch": 0.0998655655847897, + "grad_norm": 34.59697341918945, + "learning_rate": 9.97951606708488e-07, + "loss": 0.6641, + "num_input_tokens_seen": 4919616, + "step": 1560 + }, + { + "epoch": 0.1001856475257666, + "grad_norm": 37.37678146362305, + "learning_rate": 1.0011522212264755e-06, + "loss": 0.569, + "num_input_tokens_seen": 4934144, + "step": 1565 + }, + { + "epoch": 0.1005057294667435, + "grad_norm": 34.36375045776367, + "learning_rate": 1.004352835744463e-06, + "loss": 0.5557, + "num_input_tokens_seen": 4950272, + "step": 1570 + }, + { + "epoch": 0.10082581140772037, + "grad_norm": 50.955360412597656, + "learning_rate": 1.0075534502624504e-06, + "loss": 0.6099, + "num_input_tokens_seen": 4965056, + "step": 1575 + }, + { + "epoch": 0.10114589334869727, + "grad_norm": 31.129056930541992, + "learning_rate": 1.0107540647804379e-06, + "loss": 0.5355, + "num_input_tokens_seen": 4980160, + "step": 1580 + }, + { + "epoch": 0.10146597528967416, + "grad_norm": 34.259246826171875, + "learning_rate": 1.0139546792984254e-06, + "loss": 0.4531, + "num_input_tokens_seen": 4996992, + "step": 1585 + }, + { + "epoch": 0.10178605723065105, + "grad_norm": 39.33750534057617, + "learning_rate": 1.0171552938164128e-06, + "loss": 0.5276, + "num_input_tokens_seen": 5012608, + "step": 1590 + }, + { + "epoch": 0.10210613917162793, + "grad_norm": 34.261600494384766, + "learning_rate": 1.0203559083344003e-06, + "loss": 0.4634, + "num_input_tokens_seen": 5027840, + "step": 1595 + }, + { + "epoch": 0.10242622111260483, + "grad_norm": 48.74466323852539, + "learning_rate": 1.0235565228523878e-06, + "loss": 0.6232, + "num_input_tokens_seen": 5042752, + "step": 1600 + }, + { + "epoch": 0.10274630305358172, + "grad_norm": 37.806739807128906, + "learning_rate": 1.0267571373703752e-06, + "loss": 0.5789, + "num_input_tokens_seen": 5058624, + "step": 1605 + }, + { + "epoch": 0.10306638499455861, + "grad_norm": 33.15849685668945, + "learning_rate": 1.0299577518883625e-06, + "loss": 0.6365, + "num_input_tokens_seen": 5075008, + "step": 1610 + }, + { + "epoch": 0.10338646693553549, + "grad_norm": 57.73208236694336, + "learning_rate": 1.0331583664063502e-06, + "loss": 0.5593, + "num_input_tokens_seen": 5090880, + "step": 1615 + }, + { + "epoch": 0.10370654887651239, + "grad_norm": 55.581504821777344, + "learning_rate": 1.0363589809243376e-06, + "loss": 0.5551, + "num_input_tokens_seen": 5106816, + "step": 1620 + }, + { + "epoch": 0.10402663081748928, + "grad_norm": 28.43035888671875, + "learning_rate": 1.039559595442325e-06, + "loss": 0.4576, + "num_input_tokens_seen": 5123904, + "step": 1625 + }, + { + "epoch": 0.10434671275846617, + "grad_norm": 36.56483459472656, + "learning_rate": 1.0427602099603126e-06, + "loss": 0.5034, + "num_input_tokens_seen": 5140160, + "step": 1630 + }, + { + "epoch": 0.10466679469944305, + "grad_norm": 47.81189727783203, + "learning_rate": 1.0459608244783e-06, + "loss": 0.4584, + "num_input_tokens_seen": 5156288, + "step": 1635 + }, + { + "epoch": 0.10498687664041995, + "grad_norm": 50.95032501220703, + "learning_rate": 1.0491614389962873e-06, + "loss": 0.5812, + "num_input_tokens_seen": 5171776, + "step": 1640 + }, + { + "epoch": 0.10530695858139684, + "grad_norm": 46.651588439941406, + "learning_rate": 1.0523620535142747e-06, + "loss": 0.7, + "num_input_tokens_seen": 5189504, + "step": 1645 + }, + { + "epoch": 0.10562704052237373, + "grad_norm": 49.6609992980957, + "learning_rate": 1.0555626680322622e-06, + "loss": 0.5602, + "num_input_tokens_seen": 5205056, + "step": 1650 + }, + { + "epoch": 0.10594712246335061, + "grad_norm": 42.85281753540039, + "learning_rate": 1.0587632825502497e-06, + "loss": 0.6155, + "num_input_tokens_seen": 5222656, + "step": 1655 + }, + { + "epoch": 0.1062672044043275, + "grad_norm": 54.39984893798828, + "learning_rate": 1.0619638970682371e-06, + "loss": 0.5936, + "num_input_tokens_seen": 5238848, + "step": 1660 + }, + { + "epoch": 0.1065872863453044, + "grad_norm": 43.852813720703125, + "learning_rate": 1.0651645115862246e-06, + "loss": 0.5589, + "num_input_tokens_seen": 5255296, + "step": 1665 + }, + { + "epoch": 0.10690736828628129, + "grad_norm": 60.331695556640625, + "learning_rate": 1.068365126104212e-06, + "loss": 0.7539, + "num_input_tokens_seen": 5270336, + "step": 1670 + }, + { + "epoch": 0.10722745022725817, + "grad_norm": 51.526920318603516, + "learning_rate": 1.0715657406221996e-06, + "loss": 0.5115, + "num_input_tokens_seen": 5286720, + "step": 1675 + }, + { + "epoch": 0.10754753216823507, + "grad_norm": 40.55554962158203, + "learning_rate": 1.074766355140187e-06, + "loss": 0.621, + "num_input_tokens_seen": 5301632, + "step": 1680 + }, + { + "epoch": 0.10786761410921196, + "grad_norm": 46.909175872802734, + "learning_rate": 1.0779669696581745e-06, + "loss": 0.3955, + "num_input_tokens_seen": 5316544, + "step": 1685 + }, + { + "epoch": 0.10818769605018885, + "grad_norm": 55.149776458740234, + "learning_rate": 1.081167584176162e-06, + "loss": 0.6051, + "num_input_tokens_seen": 5332544, + "step": 1690 + }, + { + "epoch": 0.10850777799116573, + "grad_norm": 37.83523178100586, + "learning_rate": 1.0843681986941494e-06, + "loss": 0.4972, + "num_input_tokens_seen": 5348096, + "step": 1695 + }, + { + "epoch": 0.10882785993214263, + "grad_norm": 50.16152572631836, + "learning_rate": 1.0875688132121367e-06, + "loss": 0.6385, + "num_input_tokens_seen": 5362368, + "step": 1700 + }, + { + "epoch": 0.10914794187311952, + "grad_norm": 42.90364456176758, + "learning_rate": 1.0907694277301241e-06, + "loss": 0.5557, + "num_input_tokens_seen": 5378176, + "step": 1705 + }, + { + "epoch": 0.10946802381409641, + "grad_norm": 43.40330123901367, + "learning_rate": 1.0939700422481116e-06, + "loss": 0.5577, + "num_input_tokens_seen": 5394752, + "step": 1710 + }, + { + "epoch": 0.10978810575507329, + "grad_norm": 41.54042053222656, + "learning_rate": 1.0971706567660993e-06, + "loss": 0.6704, + "num_input_tokens_seen": 5412160, + "step": 1715 + }, + { + "epoch": 0.11010818769605019, + "grad_norm": 41.009124755859375, + "learning_rate": 1.1003712712840868e-06, + "loss": 0.5459, + "num_input_tokens_seen": 5427840, + "step": 1720 + }, + { + "epoch": 0.11042826963702708, + "grad_norm": 47.597904205322266, + "learning_rate": 1.1035718858020742e-06, + "loss": 0.4625, + "num_input_tokens_seen": 5444224, + "step": 1725 + }, + { + "epoch": 0.11074835157800397, + "grad_norm": 68.12987518310547, + "learning_rate": 1.1067725003200615e-06, + "loss": 0.6443, + "num_input_tokens_seen": 5459648, + "step": 1730 + }, + { + "epoch": 0.11106843351898085, + "grad_norm": 51.33424377441406, + "learning_rate": 1.109973114838049e-06, + "loss": 0.7315, + "num_input_tokens_seen": 5475008, + "step": 1735 + }, + { + "epoch": 0.11138851545995775, + "grad_norm": 39.57004165649414, + "learning_rate": 1.1131737293560364e-06, + "loss": 0.5923, + "num_input_tokens_seen": 5491456, + "step": 1740 + }, + { + "epoch": 0.11170859740093464, + "grad_norm": 35.5581169128418, + "learning_rate": 1.1163743438740239e-06, + "loss": 0.5267, + "num_input_tokens_seen": 5507520, + "step": 1745 + }, + { + "epoch": 0.11202867934191153, + "grad_norm": 45.71389389038086, + "learning_rate": 1.1195749583920113e-06, + "loss": 0.6325, + "num_input_tokens_seen": 5523072, + "step": 1750 + }, + { + "epoch": 0.11234876128288843, + "grad_norm": 48.16838073730469, + "learning_rate": 1.1227755729099988e-06, + "loss": 0.5338, + "num_input_tokens_seen": 5538304, + "step": 1755 + }, + { + "epoch": 0.1126688432238653, + "grad_norm": 40.58455276489258, + "learning_rate": 1.1259761874279863e-06, + "loss": 0.4734, + "num_input_tokens_seen": 5553920, + "step": 1760 + }, + { + "epoch": 0.1129889251648422, + "grad_norm": 37.179954528808594, + "learning_rate": 1.1291768019459737e-06, + "loss": 0.4812, + "num_input_tokens_seen": 5569344, + "step": 1765 + }, + { + "epoch": 0.11330900710581909, + "grad_norm": 36.621273040771484, + "learning_rate": 1.1323774164639612e-06, + "loss": 0.5413, + "num_input_tokens_seen": 5585856, + "step": 1770 + }, + { + "epoch": 0.11362908904679599, + "grad_norm": 54.24860382080078, + "learning_rate": 1.1355780309819487e-06, + "loss": 0.5719, + "num_input_tokens_seen": 5603840, + "step": 1775 + }, + { + "epoch": 0.11394917098777287, + "grad_norm": 33.54169464111328, + "learning_rate": 1.1387786454999361e-06, + "loss": 0.5216, + "num_input_tokens_seen": 5620352, + "step": 1780 + }, + { + "epoch": 0.11426925292874976, + "grad_norm": 52.16750717163086, + "learning_rate": 1.1419792600179236e-06, + "loss": 0.5749, + "num_input_tokens_seen": 5636544, + "step": 1785 + }, + { + "epoch": 0.11458933486972665, + "grad_norm": 36.16944122314453, + "learning_rate": 1.1451798745359109e-06, + "loss": 0.5669, + "num_input_tokens_seen": 5652672, + "step": 1790 + }, + { + "epoch": 0.11490941681070355, + "grad_norm": 43.88142013549805, + "learning_rate": 1.1483804890538983e-06, + "loss": 0.4952, + "num_input_tokens_seen": 5668224, + "step": 1795 + }, + { + "epoch": 0.11522949875168043, + "grad_norm": 28.41265296936035, + "learning_rate": 1.1515811035718858e-06, + "loss": 0.4601, + "num_input_tokens_seen": 5684480, + "step": 1800 + }, + { + "epoch": 0.11554958069265732, + "grad_norm": 45.184234619140625, + "learning_rate": 1.1547817180898733e-06, + "loss": 0.4491, + "num_input_tokens_seen": 5700864, + "step": 1805 + }, + { + "epoch": 0.11586966263363421, + "grad_norm": 45.908042907714844, + "learning_rate": 1.1579823326078607e-06, + "loss": 0.5962, + "num_input_tokens_seen": 5716224, + "step": 1810 + }, + { + "epoch": 0.1161897445746111, + "grad_norm": 29.20834732055664, + "learning_rate": 1.1611829471258482e-06, + "loss": 0.4912, + "num_input_tokens_seen": 5733056, + "step": 1815 + }, + { + "epoch": 0.11650982651558799, + "grad_norm": 48.397457122802734, + "learning_rate": 1.1643835616438357e-06, + "loss": 0.6931, + "num_input_tokens_seen": 5749120, + "step": 1820 + }, + { + "epoch": 0.11682990845656488, + "grad_norm": 36.38230895996094, + "learning_rate": 1.1675841761618231e-06, + "loss": 0.6725, + "num_input_tokens_seen": 5763968, + "step": 1825 + }, + { + "epoch": 0.11714999039754177, + "grad_norm": 40.08740997314453, + "learning_rate": 1.1707847906798106e-06, + "loss": 0.602, + "num_input_tokens_seen": 5779520, + "step": 1830 + }, + { + "epoch": 0.11747007233851867, + "grad_norm": 47.374046325683594, + "learning_rate": 1.173985405197798e-06, + "loss": 0.5777, + "num_input_tokens_seen": 5798848, + "step": 1835 + }, + { + "epoch": 0.11779015427949555, + "grad_norm": 45.5966911315918, + "learning_rate": 1.1771860197157855e-06, + "loss": 0.4797, + "num_input_tokens_seen": 5813952, + "step": 1840 + }, + { + "epoch": 0.11811023622047244, + "grad_norm": 45.10554885864258, + "learning_rate": 1.180386634233773e-06, + "loss": 0.427, + "num_input_tokens_seen": 5830016, + "step": 1845 + }, + { + "epoch": 0.11843031816144933, + "grad_norm": 48.5747184753418, + "learning_rate": 1.1835872487517603e-06, + "loss": 0.5708, + "num_input_tokens_seen": 5845312, + "step": 1850 + }, + { + "epoch": 0.11875040010242623, + "grad_norm": 26.386159896850586, + "learning_rate": 1.186787863269748e-06, + "loss": 0.6263, + "num_input_tokens_seen": 5860672, + "step": 1855 + }, + { + "epoch": 0.1190704820434031, + "grad_norm": 38.38077926635742, + "learning_rate": 1.1899884777877354e-06, + "loss": 0.5924, + "num_input_tokens_seen": 5875776, + "step": 1860 + }, + { + "epoch": 0.11939056398438, + "grad_norm": 51.72248458862305, + "learning_rate": 1.1931890923057229e-06, + "loss": 0.3965, + "num_input_tokens_seen": 5891200, + "step": 1865 + }, + { + "epoch": 0.11971064592535689, + "grad_norm": 37.34616470336914, + "learning_rate": 1.1963897068237103e-06, + "loss": 0.4818, + "num_input_tokens_seen": 5907008, + "step": 1870 + }, + { + "epoch": 0.12003072786633379, + "grad_norm": 36.98713302612305, + "learning_rate": 1.1995903213416978e-06, + "loss": 0.4941, + "num_input_tokens_seen": 5923264, + "step": 1875 + }, + { + "epoch": 0.12035080980731067, + "grad_norm": 56.246829986572266, + "learning_rate": 1.202790935859685e-06, + "loss": 0.5121, + "num_input_tokens_seen": 5939648, + "step": 1880 + }, + { + "epoch": 0.12067089174828756, + "grad_norm": 30.091732025146484, + "learning_rate": 1.2059915503776725e-06, + "loss": 0.4495, + "num_input_tokens_seen": 5955520, + "step": 1885 + }, + { + "epoch": 0.12099097368926445, + "grad_norm": 49.609230041503906, + "learning_rate": 1.20919216489566e-06, + "loss": 0.5622, + "num_input_tokens_seen": 5971072, + "step": 1890 + }, + { + "epoch": 0.12131105563024135, + "grad_norm": 40.81990051269531, + "learning_rate": 1.2123927794136475e-06, + "loss": 0.605, + "num_input_tokens_seen": 5987264, + "step": 1895 + }, + { + "epoch": 0.12163113757121823, + "grad_norm": 58.6569709777832, + "learning_rate": 1.215593393931635e-06, + "loss": 0.634, + "num_input_tokens_seen": 6002880, + "step": 1900 + }, + { + "epoch": 0.12195121951219512, + "grad_norm": 46.04775619506836, + "learning_rate": 1.2187940084496224e-06, + "loss": 0.5494, + "num_input_tokens_seen": 6019520, + "step": 1905 + }, + { + "epoch": 0.12227130145317201, + "grad_norm": 82.18603515625, + "learning_rate": 1.2219946229676099e-06, + "loss": 0.6188, + "num_input_tokens_seen": 6035904, + "step": 1910 + }, + { + "epoch": 0.1225913833941489, + "grad_norm": 41.66156768798828, + "learning_rate": 1.2251952374855973e-06, + "loss": 0.607, + "num_input_tokens_seen": 6050816, + "step": 1915 + }, + { + "epoch": 0.12291146533512579, + "grad_norm": 33.17472457885742, + "learning_rate": 1.2283958520035848e-06, + "loss": 0.4801, + "num_input_tokens_seen": 6065920, + "step": 1920 + }, + { + "epoch": 0.12323154727610268, + "grad_norm": 31.21087074279785, + "learning_rate": 1.2315964665215723e-06, + "loss": 0.6155, + "num_input_tokens_seen": 6081728, + "step": 1925 + }, + { + "epoch": 0.12355162921707957, + "grad_norm": 32.39944076538086, + "learning_rate": 1.2347970810395597e-06, + "loss": 0.5382, + "num_input_tokens_seen": 6097472, + "step": 1930 + }, + { + "epoch": 0.12387171115805647, + "grad_norm": 35.646366119384766, + "learning_rate": 1.2379976955575472e-06, + "loss": 0.5551, + "num_input_tokens_seen": 6113152, + "step": 1935 + }, + { + "epoch": 0.12419179309903335, + "grad_norm": 39.95088195800781, + "learning_rate": 1.2411983100755345e-06, + "loss": 0.6146, + "num_input_tokens_seen": 6129408, + "step": 1940 + }, + { + "epoch": 0.12451187504001024, + "grad_norm": 28.999357223510742, + "learning_rate": 1.244398924593522e-06, + "loss": 0.5587, + "num_input_tokens_seen": 6144896, + "step": 1945 + }, + { + "epoch": 0.12483195698098713, + "grad_norm": 46.54081344604492, + "learning_rate": 1.2475995391115094e-06, + "loss": 0.6047, + "num_input_tokens_seen": 6160256, + "step": 1950 + }, + { + "epoch": 0.12515203892196403, + "grad_norm": 46.05059814453125, + "learning_rate": 1.250800153629497e-06, + "loss": 0.583, + "num_input_tokens_seen": 6175104, + "step": 1955 + }, + { + "epoch": 0.1254721208629409, + "grad_norm": 32.27542495727539, + "learning_rate": 1.2540007681474845e-06, + "loss": 0.5475, + "num_input_tokens_seen": 6190272, + "step": 1960 + }, + { + "epoch": 0.1257922028039178, + "grad_norm": 39.56837844848633, + "learning_rate": 1.257201382665472e-06, + "loss": 0.5469, + "num_input_tokens_seen": 6205952, + "step": 1965 + }, + { + "epoch": 0.1261122847448947, + "grad_norm": 48.05415344238281, + "learning_rate": 1.2604019971834595e-06, + "loss": 0.5827, + "num_input_tokens_seen": 6222592, + "step": 1970 + }, + { + "epoch": 0.12643236668587157, + "grad_norm": 43.072750091552734, + "learning_rate": 1.263602611701447e-06, + "loss": 0.4421, + "num_input_tokens_seen": 6237696, + "step": 1975 + }, + { + "epoch": 0.12675244862684848, + "grad_norm": 51.23579025268555, + "learning_rate": 1.2668032262194344e-06, + "loss": 0.4855, + "num_input_tokens_seen": 6254080, + "step": 1980 + }, + { + "epoch": 0.12707253056782536, + "grad_norm": 29.89756202697754, + "learning_rate": 1.2700038407374215e-06, + "loss": 0.5988, + "num_input_tokens_seen": 6270016, + "step": 1985 + }, + { + "epoch": 0.12739261250880227, + "grad_norm": 38.249359130859375, + "learning_rate": 1.2732044552554091e-06, + "loss": 0.5707, + "num_input_tokens_seen": 6285184, + "step": 1990 + }, + { + "epoch": 0.12771269444977915, + "grad_norm": 51.404876708984375, + "learning_rate": 1.2764050697733966e-06, + "loss": 0.5036, + "num_input_tokens_seen": 6300992, + "step": 1995 + }, + { + "epoch": 0.12803277639075603, + "grad_norm": 22.812774658203125, + "learning_rate": 1.279605684291384e-06, + "loss": 0.5292, + "num_input_tokens_seen": 6315392, + "step": 2000 + }, + { + "epoch": 0.12835285833173293, + "grad_norm": 42.461273193359375, + "learning_rate": 1.2828062988093715e-06, + "loss": 0.4883, + "num_input_tokens_seen": 6331136, + "step": 2005 + }, + { + "epoch": 0.1286729402727098, + "grad_norm": 38.56764221191406, + "learning_rate": 1.286006913327359e-06, + "loss": 0.5797, + "num_input_tokens_seen": 6347584, + "step": 2010 + }, + { + "epoch": 0.1289930222136867, + "grad_norm": 57.683963775634766, + "learning_rate": 1.2892075278453465e-06, + "loss": 0.5378, + "num_input_tokens_seen": 6362560, + "step": 2015 + }, + { + "epoch": 0.1293131041546636, + "grad_norm": 29.227014541625977, + "learning_rate": 1.292408142363334e-06, + "loss": 0.4951, + "num_input_tokens_seen": 6378304, + "step": 2020 + }, + { + "epoch": 0.12963318609564048, + "grad_norm": 43.57859802246094, + "learning_rate": 1.2956087568813214e-06, + "loss": 0.5996, + "num_input_tokens_seen": 6393280, + "step": 2025 + }, + { + "epoch": 0.1299532680366174, + "grad_norm": 56.2505989074707, + "learning_rate": 1.2988093713993089e-06, + "loss": 0.5688, + "num_input_tokens_seen": 6409472, + "step": 2030 + }, + { + "epoch": 0.13027334997759427, + "grad_norm": 88.03363800048828, + "learning_rate": 1.3020099859172963e-06, + "loss": 0.7725, + "num_input_tokens_seen": 6424960, + "step": 2035 + }, + { + "epoch": 0.13059343191857115, + "grad_norm": 42.13602066040039, + "learning_rate": 1.3052106004352838e-06, + "loss": 0.5561, + "num_input_tokens_seen": 6440000, + "step": 2040 + }, + { + "epoch": 0.13091351385954805, + "grad_norm": 32.962276458740234, + "learning_rate": 1.308411214953271e-06, + "loss": 0.4751, + "num_input_tokens_seen": 6457600, + "step": 2045 + }, + { + "epoch": 0.13123359580052493, + "grad_norm": 37.870506286621094, + "learning_rate": 1.3116118294712585e-06, + "loss": 0.523, + "num_input_tokens_seen": 6473088, + "step": 2050 + }, + { + "epoch": 0.1315536777415018, + "grad_norm": 68.34502410888672, + "learning_rate": 1.314812443989246e-06, + "loss": 0.6071, + "num_input_tokens_seen": 6487680, + "step": 2055 + }, + { + "epoch": 0.13187375968247872, + "grad_norm": 33.80329895019531, + "learning_rate": 1.3180130585072335e-06, + "loss": 0.4731, + "num_input_tokens_seen": 6503296, + "step": 2060 + }, + { + "epoch": 0.1321938416234556, + "grad_norm": 18.021574020385742, + "learning_rate": 1.321213673025221e-06, + "loss": 0.5632, + "num_input_tokens_seen": 6518912, + "step": 2065 + }, + { + "epoch": 0.1325139235644325, + "grad_norm": 46.232139587402344, + "learning_rate": 1.3244142875432084e-06, + "loss": 0.5936, + "num_input_tokens_seen": 6534784, + "step": 2070 + }, + { + "epoch": 0.13283400550540939, + "grad_norm": 57.625709533691406, + "learning_rate": 1.3276149020611959e-06, + "loss": 0.5515, + "num_input_tokens_seen": 6550528, + "step": 2075 + }, + { + "epoch": 0.13315408744638627, + "grad_norm": 40.16531753540039, + "learning_rate": 1.3308155165791833e-06, + "loss": 0.5215, + "num_input_tokens_seen": 6566144, + "step": 2080 + }, + { + "epoch": 0.13347416938736317, + "grad_norm": 75.86842346191406, + "learning_rate": 1.3340161310971708e-06, + "loss": 0.5965, + "num_input_tokens_seen": 6583872, + "step": 2085 + }, + { + "epoch": 0.13379425132834005, + "grad_norm": 25.69826889038086, + "learning_rate": 1.3372167456151583e-06, + "loss": 0.4697, + "num_input_tokens_seen": 6602304, + "step": 2090 + }, + { + "epoch": 0.13411433326931693, + "grad_norm": 67.23705291748047, + "learning_rate": 1.3404173601331457e-06, + "loss": 0.4969, + "num_input_tokens_seen": 6618112, + "step": 2095 + }, + { + "epoch": 0.13443441521029384, + "grad_norm": 56.47102355957031, + "learning_rate": 1.3436179746511332e-06, + "loss": 0.6301, + "num_input_tokens_seen": 6635584, + "step": 2100 + }, + { + "epoch": 0.13475449715127072, + "grad_norm": 35.792091369628906, + "learning_rate": 1.3468185891691205e-06, + "loss": 0.5273, + "num_input_tokens_seen": 6653568, + "step": 2105 + }, + { + "epoch": 0.13507457909224763, + "grad_norm": 33.35669708251953, + "learning_rate": 1.350019203687108e-06, + "loss": 0.4814, + "num_input_tokens_seen": 6668864, + "step": 2110 + }, + { + "epoch": 0.1353946610332245, + "grad_norm": 31.8349552154541, + "learning_rate": 1.3532198182050954e-06, + "loss": 0.5259, + "num_input_tokens_seen": 6684416, + "step": 2115 + }, + { + "epoch": 0.13571474297420139, + "grad_norm": 44.52934646606445, + "learning_rate": 1.3564204327230829e-06, + "loss": 0.598, + "num_input_tokens_seen": 6699392, + "step": 2120 + }, + { + "epoch": 0.1360348249151783, + "grad_norm": 42.586700439453125, + "learning_rate": 1.3596210472410703e-06, + "loss": 0.5651, + "num_input_tokens_seen": 6714560, + "step": 2125 + }, + { + "epoch": 0.13635490685615517, + "grad_norm": 48.244773864746094, + "learning_rate": 1.3628216617590578e-06, + "loss": 0.5548, + "num_input_tokens_seen": 6729920, + "step": 2130 + }, + { + "epoch": 0.13667498879713205, + "grad_norm": 31.550065994262695, + "learning_rate": 1.3660222762770453e-06, + "loss": 0.5734, + "num_input_tokens_seen": 6744768, + "step": 2135 + }, + { + "epoch": 0.13699507073810896, + "grad_norm": 30.512941360473633, + "learning_rate": 1.3692228907950327e-06, + "loss": 0.4626, + "num_input_tokens_seen": 6759680, + "step": 2140 + }, + { + "epoch": 0.13731515267908584, + "grad_norm": 42.52937698364258, + "learning_rate": 1.3724235053130202e-06, + "loss": 0.5224, + "num_input_tokens_seen": 6775168, + "step": 2145 + }, + { + "epoch": 0.13763523462006275, + "grad_norm": 36.929779052734375, + "learning_rate": 1.3756241198310077e-06, + "loss": 0.5441, + "num_input_tokens_seen": 6791296, + "step": 2150 + }, + { + "epoch": 0.13795531656103963, + "grad_norm": 20.963132858276367, + "learning_rate": 1.3788247343489951e-06, + "loss": 0.446, + "num_input_tokens_seen": 6808640, + "step": 2155 + }, + { + "epoch": 0.1382753985020165, + "grad_norm": 40.756004333496094, + "learning_rate": 1.3820253488669826e-06, + "loss": 0.6584, + "num_input_tokens_seen": 6824064, + "step": 2160 + }, + { + "epoch": 0.1385954804429934, + "grad_norm": 56.29258728027344, + "learning_rate": 1.3852259633849698e-06, + "loss": 0.4944, + "num_input_tokens_seen": 6839872, + "step": 2165 + }, + { + "epoch": 0.1389155623839703, + "grad_norm": 39.42473602294922, + "learning_rate": 1.3884265779029573e-06, + "loss": 0.4855, + "num_input_tokens_seen": 6855744, + "step": 2170 + }, + { + "epoch": 0.1392356443249472, + "grad_norm": 42.55796432495117, + "learning_rate": 1.3916271924209448e-06, + "loss": 0.5269, + "num_input_tokens_seen": 6869632, + "step": 2175 + }, + { + "epoch": 0.13955572626592408, + "grad_norm": 32.48601150512695, + "learning_rate": 1.3948278069389322e-06, + "loss": 0.4592, + "num_input_tokens_seen": 6885696, + "step": 2180 + }, + { + "epoch": 0.13987580820690096, + "grad_norm": 41.20307540893555, + "learning_rate": 1.3980284214569197e-06, + "loss": 0.4235, + "num_input_tokens_seen": 6901504, + "step": 2185 + }, + { + "epoch": 0.14019589014787787, + "grad_norm": 45.36053466796875, + "learning_rate": 1.4012290359749072e-06, + "loss": 0.6028, + "num_input_tokens_seen": 6916480, + "step": 2190 + }, + { + "epoch": 0.14051597208885475, + "grad_norm": 55.36004638671875, + "learning_rate": 1.4044296504928949e-06, + "loss": 0.4976, + "num_input_tokens_seen": 6932416, + "step": 2195 + }, + { + "epoch": 0.14083605402983163, + "grad_norm": 45.13800811767578, + "learning_rate": 1.4076302650108823e-06, + "loss": 0.5292, + "num_input_tokens_seen": 6948992, + "step": 2200 + }, + { + "epoch": 0.14115613597080853, + "grad_norm": 59.14999008178711, + "learning_rate": 1.4108308795288698e-06, + "loss": 0.4567, + "num_input_tokens_seen": 6964416, + "step": 2205 + }, + { + "epoch": 0.1414762179117854, + "grad_norm": 39.41249084472656, + "learning_rate": 1.4140314940468573e-06, + "loss": 0.4673, + "num_input_tokens_seen": 6980544, + "step": 2210 + }, + { + "epoch": 0.14179629985276232, + "grad_norm": 36.35877990722656, + "learning_rate": 1.4172321085648447e-06, + "loss": 0.5736, + "num_input_tokens_seen": 6997440, + "step": 2215 + }, + { + "epoch": 0.1421163817937392, + "grad_norm": 24.607650756835938, + "learning_rate": 1.4204327230828322e-06, + "loss": 0.4089, + "num_input_tokens_seen": 7012672, + "step": 2220 + }, + { + "epoch": 0.14243646373471608, + "grad_norm": 50.25101852416992, + "learning_rate": 1.4236333376008192e-06, + "loss": 0.4398, + "num_input_tokens_seen": 7027648, + "step": 2225 + }, + { + "epoch": 0.142756545675693, + "grad_norm": 37.27925491333008, + "learning_rate": 1.426833952118807e-06, + "loss": 0.5415, + "num_input_tokens_seen": 7043200, + "step": 2230 + }, + { + "epoch": 0.14307662761666987, + "grad_norm": 51.25174331665039, + "learning_rate": 1.4300345666367944e-06, + "loss": 0.7296, + "num_input_tokens_seen": 7058688, + "step": 2235 + }, + { + "epoch": 0.14339670955764675, + "grad_norm": 39.54377746582031, + "learning_rate": 1.4332351811547819e-06, + "loss": 0.589, + "num_input_tokens_seen": 7074048, + "step": 2240 + }, + { + "epoch": 0.14371679149862365, + "grad_norm": 44.530582427978516, + "learning_rate": 1.4364357956727693e-06, + "loss": 0.5317, + "num_input_tokens_seen": 7089792, + "step": 2245 + }, + { + "epoch": 0.14403687343960053, + "grad_norm": 52.69428253173828, + "learning_rate": 1.4396364101907568e-06, + "loss": 0.4828, + "num_input_tokens_seen": 7104640, + "step": 2250 + }, + { + "epoch": 0.14435695538057744, + "grad_norm": 48.21638870239258, + "learning_rate": 1.4428370247087443e-06, + "loss": 0.5703, + "num_input_tokens_seen": 7120064, + "step": 2255 + }, + { + "epoch": 0.14467703732155432, + "grad_norm": 36.12133026123047, + "learning_rate": 1.4460376392267317e-06, + "loss": 0.4994, + "num_input_tokens_seen": 7135232, + "step": 2260 + }, + { + "epoch": 0.1449971192625312, + "grad_norm": 62.1004753112793, + "learning_rate": 1.4492382537447192e-06, + "loss": 0.5625, + "num_input_tokens_seen": 7149632, + "step": 2265 + }, + { + "epoch": 0.1453172012035081, + "grad_norm": 35.51633834838867, + "learning_rate": 1.4524388682627067e-06, + "loss": 0.421, + "num_input_tokens_seen": 7164352, + "step": 2270 + }, + { + "epoch": 0.14563728314448499, + "grad_norm": 42.39748001098633, + "learning_rate": 1.4556394827806941e-06, + "loss": 0.5397, + "num_input_tokens_seen": 7179776, + "step": 2275 + }, + { + "epoch": 0.14595736508546187, + "grad_norm": 38.83152770996094, + "learning_rate": 1.4588400972986816e-06, + "loss": 0.4446, + "num_input_tokens_seen": 7194240, + "step": 2280 + }, + { + "epoch": 0.14627744702643877, + "grad_norm": 26.364704132080078, + "learning_rate": 1.4620407118166688e-06, + "loss": 0.4157, + "num_input_tokens_seen": 7209472, + "step": 2285 + }, + { + "epoch": 0.14659752896741565, + "grad_norm": 46.9654655456543, + "learning_rate": 1.4652413263346563e-06, + "loss": 0.4357, + "num_input_tokens_seen": 7225088, + "step": 2290 + }, + { + "epoch": 0.14691761090839256, + "grad_norm": 72.65473175048828, + "learning_rate": 1.4684419408526438e-06, + "loss": 0.6335, + "num_input_tokens_seen": 7240704, + "step": 2295 + }, + { + "epoch": 0.14723769284936944, + "grad_norm": 50.462738037109375, + "learning_rate": 1.4716425553706312e-06, + "loss": 0.4996, + "num_input_tokens_seen": 7256000, + "step": 2300 + }, + { + "epoch": 0.14755777479034632, + "grad_norm": 39.59038162231445, + "learning_rate": 1.4748431698886187e-06, + "loss": 0.5397, + "num_input_tokens_seen": 7270336, + "step": 2305 + }, + { + "epoch": 0.14787785673132323, + "grad_norm": 37.994197845458984, + "learning_rate": 1.4780437844066062e-06, + "loss": 0.5644, + "num_input_tokens_seen": 7285440, + "step": 2310 + }, + { + "epoch": 0.1481979386723001, + "grad_norm": 33.6203727722168, + "learning_rate": 1.4812443989245936e-06, + "loss": 0.4965, + "num_input_tokens_seen": 7301120, + "step": 2315 + }, + { + "epoch": 0.14851802061327699, + "grad_norm": 28.978073120117188, + "learning_rate": 1.4844450134425811e-06, + "loss": 0.514, + "num_input_tokens_seen": 7316416, + "step": 2320 + }, + { + "epoch": 0.1488381025542539, + "grad_norm": 41.08457946777344, + "learning_rate": 1.4876456279605686e-06, + "loss": 0.5168, + "num_input_tokens_seen": 7332160, + "step": 2325 + }, + { + "epoch": 0.14915818449523077, + "grad_norm": 52.09782409667969, + "learning_rate": 1.490846242478556e-06, + "loss": 0.6872, + "num_input_tokens_seen": 7347776, + "step": 2330 + }, + { + "epoch": 0.14947826643620768, + "grad_norm": 40.05617904663086, + "learning_rate": 1.4940468569965435e-06, + "loss": 0.5837, + "num_input_tokens_seen": 7363200, + "step": 2335 + }, + { + "epoch": 0.14979834837718456, + "grad_norm": 50.91035842895508, + "learning_rate": 1.497247471514531e-06, + "loss": 0.4732, + "num_input_tokens_seen": 7378752, + "step": 2340 + }, + { + "epoch": 0.15011843031816144, + "grad_norm": 41.02193069458008, + "learning_rate": 1.5004480860325182e-06, + "loss": 0.4904, + "num_input_tokens_seen": 7393984, + "step": 2345 + }, + { + "epoch": 0.15043851225913835, + "grad_norm": 35.76988983154297, + "learning_rate": 1.5036487005505057e-06, + "loss": 0.5569, + "num_input_tokens_seen": 7408832, + "step": 2350 + }, + { + "epoch": 0.15075859420011523, + "grad_norm": 17.719465255737305, + "learning_rate": 1.5068493150684932e-06, + "loss": 0.4751, + "num_input_tokens_seen": 7423040, + "step": 2355 + }, + { + "epoch": 0.15107867614109213, + "grad_norm": 51.66796875, + "learning_rate": 1.5100499295864806e-06, + "loss": 0.5269, + "num_input_tokens_seen": 7438464, + "step": 2360 + }, + { + "epoch": 0.151398758082069, + "grad_norm": 44.904296875, + "learning_rate": 1.5132505441044681e-06, + "loss": 0.5282, + "num_input_tokens_seen": 7453824, + "step": 2365 + }, + { + "epoch": 0.1517188400230459, + "grad_norm": 39.21469497680664, + "learning_rate": 1.5164511586224556e-06, + "loss": 0.6004, + "num_input_tokens_seen": 7469312, + "step": 2370 + }, + { + "epoch": 0.1520389219640228, + "grad_norm": 38.74185562133789, + "learning_rate": 1.519651773140443e-06, + "loss": 0.6537, + "num_input_tokens_seen": 7485120, + "step": 2375 + }, + { + "epoch": 0.15235900390499968, + "grad_norm": 37.769020080566406, + "learning_rate": 1.5228523876584305e-06, + "loss": 0.5134, + "num_input_tokens_seen": 7500416, + "step": 2380 + }, + { + "epoch": 0.15267908584597656, + "grad_norm": 37.960411071777344, + "learning_rate": 1.526053002176418e-06, + "loss": 0.4294, + "num_input_tokens_seen": 7516672, + "step": 2385 + }, + { + "epoch": 0.15299916778695347, + "grad_norm": 46.24180221557617, + "learning_rate": 1.5292536166944054e-06, + "loss": 0.5174, + "num_input_tokens_seen": 7532800, + "step": 2390 + }, + { + "epoch": 0.15331924972793035, + "grad_norm": 42.24235534667969, + "learning_rate": 1.532454231212393e-06, + "loss": 0.5293, + "num_input_tokens_seen": 7547776, + "step": 2395 + }, + { + "epoch": 0.15363933166890725, + "grad_norm": 35.31581497192383, + "learning_rate": 1.5356548457303804e-06, + "loss": 0.6115, + "num_input_tokens_seen": 7562496, + "step": 2400 + }, + { + "epoch": 0.15395941360988413, + "grad_norm": 49.22823715209961, + "learning_rate": 1.5388554602483676e-06, + "loss": 0.5652, + "num_input_tokens_seen": 7577920, + "step": 2405 + }, + { + "epoch": 0.154279495550861, + "grad_norm": 42.188575744628906, + "learning_rate": 1.542056074766355e-06, + "loss": 0.5677, + "num_input_tokens_seen": 7593216, + "step": 2410 + }, + { + "epoch": 0.15459957749183792, + "grad_norm": 51.251583099365234, + "learning_rate": 1.5452566892843426e-06, + "loss": 0.5918, + "num_input_tokens_seen": 7608192, + "step": 2415 + }, + { + "epoch": 0.1549196594328148, + "grad_norm": 54.47895431518555, + "learning_rate": 1.54845730380233e-06, + "loss": 0.4747, + "num_input_tokens_seen": 7624448, + "step": 2420 + }, + { + "epoch": 0.15523974137379168, + "grad_norm": 64.11780548095703, + "learning_rate": 1.5516579183203175e-06, + "loss": 0.4965, + "num_input_tokens_seen": 7640192, + "step": 2425 + }, + { + "epoch": 0.15555982331476859, + "grad_norm": 30.881677627563477, + "learning_rate": 1.554858532838305e-06, + "loss": 0.5224, + "num_input_tokens_seen": 7654848, + "step": 2430 + }, + { + "epoch": 0.15587990525574547, + "grad_norm": 66.72223663330078, + "learning_rate": 1.5580591473562926e-06, + "loss": 0.6756, + "num_input_tokens_seen": 7670848, + "step": 2435 + }, + { + "epoch": 0.15619998719672237, + "grad_norm": 30.989604949951172, + "learning_rate": 1.5612597618742801e-06, + "loss": 0.6278, + "num_input_tokens_seen": 7686016, + "step": 2440 + }, + { + "epoch": 0.15652006913769925, + "grad_norm": 42.251888275146484, + "learning_rate": 1.5644603763922676e-06, + "loss": 0.549, + "num_input_tokens_seen": 7701760, + "step": 2445 + }, + { + "epoch": 0.15684015107867613, + "grad_norm": 40.16205596923828, + "learning_rate": 1.567660990910255e-06, + "loss": 0.5704, + "num_input_tokens_seen": 7717760, + "step": 2450 + }, + { + "epoch": 0.15716023301965304, + "grad_norm": 22.00271987915039, + "learning_rate": 1.5708616054282425e-06, + "loss": 0.3862, + "num_input_tokens_seen": 7733376, + "step": 2455 + }, + { + "epoch": 0.15748031496062992, + "grad_norm": 21.01577377319336, + "learning_rate": 1.57406221994623e-06, + "loss": 0.5703, + "num_input_tokens_seen": 7750464, + "step": 2460 + }, + { + "epoch": 0.1578003969016068, + "grad_norm": 53.58307647705078, + "learning_rate": 1.577262834464217e-06, + "loss": 0.5254, + "num_input_tokens_seen": 7768576, + "step": 2465 + }, + { + "epoch": 0.1581204788425837, + "grad_norm": 42.3109016418457, + "learning_rate": 1.5804634489822047e-06, + "loss": 0.4806, + "num_input_tokens_seen": 7784256, + "step": 2470 + }, + { + "epoch": 0.15844056078356059, + "grad_norm": 47.568458557128906, + "learning_rate": 1.5836640635001922e-06, + "loss": 0.3638, + "num_input_tokens_seen": 7799232, + "step": 2475 + }, + { + "epoch": 0.1587606427245375, + "grad_norm": 45.880043029785156, + "learning_rate": 1.5868646780181796e-06, + "loss": 0.6353, + "num_input_tokens_seen": 7815040, + "step": 2480 + }, + { + "epoch": 0.15908072466551437, + "grad_norm": 22.228591918945312, + "learning_rate": 1.5900652925361671e-06, + "loss": 0.4745, + "num_input_tokens_seen": 7830400, + "step": 2485 + }, + { + "epoch": 0.15940080660649125, + "grad_norm": 36.211090087890625, + "learning_rate": 1.5932659070541546e-06, + "loss": 0.5098, + "num_input_tokens_seen": 7846336, + "step": 2490 + }, + { + "epoch": 0.15972088854746816, + "grad_norm": 36.9947509765625, + "learning_rate": 1.596466521572142e-06, + "loss": 0.6127, + "num_input_tokens_seen": 7862528, + "step": 2495 + }, + { + "epoch": 0.16004097048844504, + "grad_norm": 30.433122634887695, + "learning_rate": 1.5996671360901295e-06, + "loss": 0.4496, + "num_input_tokens_seen": 7880000, + "step": 2500 + }, + { + "epoch": 0.16036105242942192, + "grad_norm": 20.76383399963379, + "learning_rate": 1.602867750608117e-06, + "loss": 0.4487, + "num_input_tokens_seen": 7895296, + "step": 2505 + }, + { + "epoch": 0.16068113437039883, + "grad_norm": 59.94544982910156, + "learning_rate": 1.6060683651261044e-06, + "loss": 0.636, + "num_input_tokens_seen": 7911104, + "step": 2510 + }, + { + "epoch": 0.1610012163113757, + "grad_norm": 38.77571487426758, + "learning_rate": 1.609268979644092e-06, + "loss": 0.5421, + "num_input_tokens_seen": 7925952, + "step": 2515 + }, + { + "epoch": 0.1613212982523526, + "grad_norm": 36.788536071777344, + "learning_rate": 1.6124695941620794e-06, + "loss": 0.6206, + "num_input_tokens_seen": 7940928, + "step": 2520 + }, + { + "epoch": 0.1616413801933295, + "grad_norm": 43.84877395629883, + "learning_rate": 1.6156702086800666e-06, + "loss": 0.5694, + "num_input_tokens_seen": 7955200, + "step": 2525 + }, + { + "epoch": 0.16196146213430637, + "grad_norm": 45.8337516784668, + "learning_rate": 1.618870823198054e-06, + "loss": 0.6508, + "num_input_tokens_seen": 7970944, + "step": 2530 + }, + { + "epoch": 0.16228154407528328, + "grad_norm": 42.0396842956543, + "learning_rate": 1.6220714377160416e-06, + "loss": 0.4962, + "num_input_tokens_seen": 7985856, + "step": 2535 + }, + { + "epoch": 0.16260162601626016, + "grad_norm": 49.147640228271484, + "learning_rate": 1.625272052234029e-06, + "loss": 0.4368, + "num_input_tokens_seen": 8001088, + "step": 2540 + }, + { + "epoch": 0.16292170795723707, + "grad_norm": 42.915924072265625, + "learning_rate": 1.6284726667520165e-06, + "loss": 0.5544, + "num_input_tokens_seen": 8017856, + "step": 2545 + }, + { + "epoch": 0.16324178989821395, + "grad_norm": 43.75297546386719, + "learning_rate": 1.631673281270004e-06, + "loss": 0.5051, + "num_input_tokens_seen": 8033792, + "step": 2550 + }, + { + "epoch": 0.16356187183919083, + "grad_norm": 33.498939514160156, + "learning_rate": 1.6348738957879914e-06, + "loss": 0.5028, + "num_input_tokens_seen": 8048256, + "step": 2555 + }, + { + "epoch": 0.16388195378016773, + "grad_norm": 27.690343856811523, + "learning_rate": 1.638074510305979e-06, + "loss": 0.5597, + "num_input_tokens_seen": 8063680, + "step": 2560 + }, + { + "epoch": 0.1642020357211446, + "grad_norm": 38.08916091918945, + "learning_rate": 1.6412751248239664e-06, + "loss": 0.4872, + "num_input_tokens_seen": 8079744, + "step": 2565 + }, + { + "epoch": 0.1645221176621215, + "grad_norm": 40.25361633300781, + "learning_rate": 1.6444757393419538e-06, + "loss": 0.5346, + "num_input_tokens_seen": 8094656, + "step": 2570 + }, + { + "epoch": 0.1648421996030984, + "grad_norm": 25.213605880737305, + "learning_rate": 1.6476763538599413e-06, + "loss": 0.5457, + "num_input_tokens_seen": 8110528, + "step": 2575 + }, + { + "epoch": 0.16516228154407528, + "grad_norm": 41.70231628417969, + "learning_rate": 1.6508769683779288e-06, + "loss": 0.5589, + "num_input_tokens_seen": 8126720, + "step": 2580 + }, + { + "epoch": 0.1654823634850522, + "grad_norm": 57.64213562011719, + "learning_rate": 1.654077582895916e-06, + "loss": 0.5366, + "num_input_tokens_seen": 8141312, + "step": 2585 + }, + { + "epoch": 0.16580244542602907, + "grad_norm": 36.742889404296875, + "learning_rate": 1.6572781974139035e-06, + "loss": 0.5181, + "num_input_tokens_seen": 8156736, + "step": 2590 + }, + { + "epoch": 0.16612252736700595, + "grad_norm": 47.61459732055664, + "learning_rate": 1.660478811931891e-06, + "loss": 0.5483, + "num_input_tokens_seen": 8172480, + "step": 2595 + }, + { + "epoch": 0.16644260930798285, + "grad_norm": 39.73297882080078, + "learning_rate": 1.6636794264498784e-06, + "loss": 0.6026, + "num_input_tokens_seen": 8188224, + "step": 2600 + }, + { + "epoch": 0.16676269124895973, + "grad_norm": 34.356449127197266, + "learning_rate": 1.666880040967866e-06, + "loss": 0.4673, + "num_input_tokens_seen": 8204352, + "step": 2605 + }, + { + "epoch": 0.1670827731899366, + "grad_norm": 43.510215759277344, + "learning_rate": 1.6700806554858534e-06, + "loss": 0.6196, + "num_input_tokens_seen": 8219328, + "step": 2610 + }, + { + "epoch": 0.16740285513091352, + "grad_norm": 21.804351806640625, + "learning_rate": 1.6732812700038408e-06, + "loss": 0.4395, + "num_input_tokens_seen": 8234560, + "step": 2615 + }, + { + "epoch": 0.1677229370718904, + "grad_norm": 37.56566619873047, + "learning_rate": 1.6764818845218283e-06, + "loss": 0.6626, + "num_input_tokens_seen": 8249344, + "step": 2620 + }, + { + "epoch": 0.1680430190128673, + "grad_norm": 52.418338775634766, + "learning_rate": 1.6796824990398158e-06, + "loss": 0.5272, + "num_input_tokens_seen": 8265600, + "step": 2625 + }, + { + "epoch": 0.16836310095384419, + "grad_norm": 28.86993408203125, + "learning_rate": 1.6828831135578032e-06, + "loss": 0.4894, + "num_input_tokens_seen": 8280384, + "step": 2630 + }, + { + "epoch": 0.16868318289482107, + "grad_norm": 50.95699691772461, + "learning_rate": 1.6860837280757907e-06, + "loss": 0.5581, + "num_input_tokens_seen": 8295744, + "step": 2635 + }, + { + "epoch": 0.16900326483579797, + "grad_norm": 27.886852264404297, + "learning_rate": 1.6892843425937782e-06, + "loss": 0.5212, + "num_input_tokens_seen": 8311680, + "step": 2640 + }, + { + "epoch": 0.16932334677677485, + "grad_norm": 33.237648010253906, + "learning_rate": 1.6924849571117654e-06, + "loss": 0.5057, + "num_input_tokens_seen": 8326592, + "step": 2645 + }, + { + "epoch": 0.16964342871775173, + "grad_norm": 63.40398406982422, + "learning_rate": 1.6956855716297529e-06, + "loss": 0.5038, + "num_input_tokens_seen": 8341632, + "step": 2650 + }, + { + "epoch": 0.16996351065872864, + "grad_norm": 30.190183639526367, + "learning_rate": 1.6988861861477404e-06, + "loss": 0.4613, + "num_input_tokens_seen": 8358400, + "step": 2655 + }, + { + "epoch": 0.17028359259970552, + "grad_norm": 34.05051040649414, + "learning_rate": 1.7020868006657278e-06, + "loss": 0.4923, + "num_input_tokens_seen": 8374144, + "step": 2660 + }, + { + "epoch": 0.17060367454068243, + "grad_norm": 20.95635986328125, + "learning_rate": 1.7052874151837153e-06, + "loss": 0.4093, + "num_input_tokens_seen": 8389440, + "step": 2665 + }, + { + "epoch": 0.1709237564816593, + "grad_norm": 35.497066497802734, + "learning_rate": 1.7084880297017028e-06, + "loss": 0.5168, + "num_input_tokens_seen": 8404672, + "step": 2670 + }, + { + "epoch": 0.17124383842263619, + "grad_norm": 69.72010040283203, + "learning_rate": 1.7116886442196904e-06, + "loss": 0.5451, + "num_input_tokens_seen": 8420096, + "step": 2675 + }, + { + "epoch": 0.1715639203636131, + "grad_norm": 49.44060516357422, + "learning_rate": 1.714889258737678e-06, + "loss": 0.6514, + "num_input_tokens_seen": 8436288, + "step": 2680 + }, + { + "epoch": 0.17188400230458997, + "grad_norm": 27.69544219970703, + "learning_rate": 1.7180898732556654e-06, + "loss": 0.5755, + "num_input_tokens_seen": 8452672, + "step": 2685 + }, + { + "epoch": 0.17220408424556685, + "grad_norm": 22.199567794799805, + "learning_rate": 1.7212904877736528e-06, + "loss": 0.4582, + "num_input_tokens_seen": 8469120, + "step": 2690 + }, + { + "epoch": 0.17252416618654376, + "grad_norm": 36.53813934326172, + "learning_rate": 1.7244911022916403e-06, + "loss": 0.4967, + "num_input_tokens_seen": 8484736, + "step": 2695 + }, + { + "epoch": 0.17284424812752064, + "grad_norm": 45.47534942626953, + "learning_rate": 1.7276917168096278e-06, + "loss": 0.738, + "num_input_tokens_seen": 8500480, + "step": 2700 + }, + { + "epoch": 0.17316433006849755, + "grad_norm": 34.53159713745117, + "learning_rate": 1.7308923313276148e-06, + "loss": 0.3984, + "num_input_tokens_seen": 8514624, + "step": 2705 + }, + { + "epoch": 0.17348441200947443, + "grad_norm": 25.911266326904297, + "learning_rate": 1.7340929458456025e-06, + "loss": 0.4841, + "num_input_tokens_seen": 8529664, + "step": 2710 + }, + { + "epoch": 0.1738044939504513, + "grad_norm": 64.5189437866211, + "learning_rate": 1.73729356036359e-06, + "loss": 0.6277, + "num_input_tokens_seen": 8545280, + "step": 2715 + }, + { + "epoch": 0.1741245758914282, + "grad_norm": 35.652706146240234, + "learning_rate": 1.7404941748815774e-06, + "loss": 0.5041, + "num_input_tokens_seen": 8560384, + "step": 2720 + }, + { + "epoch": 0.1744446578324051, + "grad_norm": 40.048362731933594, + "learning_rate": 1.743694789399565e-06, + "loss": 0.582, + "num_input_tokens_seen": 8576960, + "step": 2725 + }, + { + "epoch": 0.174764739773382, + "grad_norm": 32.10976791381836, + "learning_rate": 1.7468954039175524e-06, + "loss": 0.4479, + "num_input_tokens_seen": 8592384, + "step": 2730 + }, + { + "epoch": 0.17508482171435888, + "grad_norm": 37.385169982910156, + "learning_rate": 1.7500960184355398e-06, + "loss": 0.5252, + "num_input_tokens_seen": 8609024, + "step": 2735 + }, + { + "epoch": 0.17540490365533576, + "grad_norm": 49.784149169921875, + "learning_rate": 1.7532966329535273e-06, + "loss": 0.4628, + "num_input_tokens_seen": 8624768, + "step": 2740 + }, + { + "epoch": 0.17572498559631267, + "grad_norm": 28.95673370361328, + "learning_rate": 1.7564972474715148e-06, + "loss": 0.5124, + "num_input_tokens_seen": 8640448, + "step": 2745 + }, + { + "epoch": 0.17604506753728955, + "grad_norm": 32.19405746459961, + "learning_rate": 1.7596978619895022e-06, + "loss": 0.5235, + "num_input_tokens_seen": 8655296, + "step": 2750 + }, + { + "epoch": 0.17636514947826643, + "grad_norm": 63.266685485839844, + "learning_rate": 1.7628984765074897e-06, + "loss": 0.514, + "num_input_tokens_seen": 8671872, + "step": 2755 + }, + { + "epoch": 0.17668523141924333, + "grad_norm": 23.281618118286133, + "learning_rate": 1.7660990910254772e-06, + "loss": 0.5037, + "num_input_tokens_seen": 8687680, + "step": 2760 + }, + { + "epoch": 0.1770053133602202, + "grad_norm": 69.17237854003906, + "learning_rate": 1.7692997055434644e-06, + "loss": 0.6377, + "num_input_tokens_seen": 8702912, + "step": 2765 + }, + { + "epoch": 0.17732539530119712, + "grad_norm": 30.508115768432617, + "learning_rate": 1.7725003200614519e-06, + "loss": 0.4919, + "num_input_tokens_seen": 8718144, + "step": 2770 + }, + { + "epoch": 0.177645477242174, + "grad_norm": 36.519588470458984, + "learning_rate": 1.7757009345794394e-06, + "loss": 0.5202, + "num_input_tokens_seen": 8732992, + "step": 2775 + }, + { + "epoch": 0.17796555918315088, + "grad_norm": 25.143461227416992, + "learning_rate": 1.7789015490974268e-06, + "loss": 0.5555, + "num_input_tokens_seen": 8749056, + "step": 2780 + }, + { + "epoch": 0.17828564112412779, + "grad_norm": 29.076881408691406, + "learning_rate": 1.7821021636154143e-06, + "loss": 0.5075, + "num_input_tokens_seen": 8765184, + "step": 2785 + }, + { + "epoch": 0.17860572306510467, + "grad_norm": 51.000274658203125, + "learning_rate": 1.7853027781334018e-06, + "loss": 0.4381, + "num_input_tokens_seen": 8780480, + "step": 2790 + }, + { + "epoch": 0.17892580500608155, + "grad_norm": 31.757402420043945, + "learning_rate": 1.7885033926513892e-06, + "loss": 0.5762, + "num_input_tokens_seen": 8796352, + "step": 2795 + }, + { + "epoch": 0.17924588694705845, + "grad_norm": 29.46088981628418, + "learning_rate": 1.7917040071693767e-06, + "loss": 0.4723, + "num_input_tokens_seen": 8810688, + "step": 2800 + }, + { + "epoch": 0.17956596888803533, + "grad_norm": 46.98865509033203, + "learning_rate": 1.7949046216873642e-06, + "loss": 0.4801, + "num_input_tokens_seen": 8826304, + "step": 2805 + }, + { + "epoch": 0.17988605082901224, + "grad_norm": 22.81716537475586, + "learning_rate": 1.7981052362053516e-06, + "loss": 0.4333, + "num_input_tokens_seen": 8841344, + "step": 2810 + }, + { + "epoch": 0.18020613276998912, + "grad_norm": 39.35813522338867, + "learning_rate": 1.801305850723339e-06, + "loss": 0.5636, + "num_input_tokens_seen": 8857664, + "step": 2815 + }, + { + "epoch": 0.180526214710966, + "grad_norm": 49.55950164794922, + "learning_rate": 1.8045064652413266e-06, + "loss": 0.597, + "num_input_tokens_seen": 8873408, + "step": 2820 + }, + { + "epoch": 0.1808462966519429, + "grad_norm": 35.18671417236328, + "learning_rate": 1.8077070797593138e-06, + "loss": 0.4652, + "num_input_tokens_seen": 8889472, + "step": 2825 + }, + { + "epoch": 0.18116637859291979, + "grad_norm": 91.99665832519531, + "learning_rate": 1.8109076942773013e-06, + "loss": 0.6546, + "num_input_tokens_seen": 8904640, + "step": 2830 + }, + { + "epoch": 0.18148646053389667, + "grad_norm": 36.22922897338867, + "learning_rate": 1.8141083087952887e-06, + "loss": 0.4452, + "num_input_tokens_seen": 8919680, + "step": 2835 + }, + { + "epoch": 0.18180654247487357, + "grad_norm": 33.34925079345703, + "learning_rate": 1.8173089233132762e-06, + "loss": 0.4601, + "num_input_tokens_seen": 8935360, + "step": 2840 + }, + { + "epoch": 0.18212662441585045, + "grad_norm": 26.40673828125, + "learning_rate": 1.8205095378312637e-06, + "loss": 0.4969, + "num_input_tokens_seen": 8951552, + "step": 2845 + }, + { + "epoch": 0.18244670635682736, + "grad_norm": 60.47718048095703, + "learning_rate": 1.8237101523492512e-06, + "loss": 0.6108, + "num_input_tokens_seen": 8968576, + "step": 2850 + }, + { + "epoch": 0.18276678829780424, + "grad_norm": 33.50872802734375, + "learning_rate": 1.8269107668672386e-06, + "loss": 0.4499, + "num_input_tokens_seen": 8983744, + "step": 2855 + }, + { + "epoch": 0.18308687023878112, + "grad_norm": 34.95276641845703, + "learning_rate": 1.830111381385226e-06, + "loss": 0.4559, + "num_input_tokens_seen": 8999040, + "step": 2860 + }, + { + "epoch": 0.18340695217975803, + "grad_norm": 46.03987121582031, + "learning_rate": 1.8333119959032136e-06, + "loss": 0.4791, + "num_input_tokens_seen": 9015872, + "step": 2865 + }, + { + "epoch": 0.1837270341207349, + "grad_norm": 29.54501724243164, + "learning_rate": 1.836512610421201e-06, + "loss": 0.4226, + "num_input_tokens_seen": 9031744, + "step": 2870 + }, + { + "epoch": 0.18404711606171179, + "grad_norm": 47.564693450927734, + "learning_rate": 1.8397132249391885e-06, + "loss": 0.6299, + "num_input_tokens_seen": 9047872, + "step": 2875 + }, + { + "epoch": 0.1843671980026887, + "grad_norm": 40.981868743896484, + "learning_rate": 1.842913839457176e-06, + "loss": 0.5186, + "num_input_tokens_seen": 9062976, + "step": 2880 + }, + { + "epoch": 0.18468727994366557, + "grad_norm": 52.84366989135742, + "learning_rate": 1.8461144539751632e-06, + "loss": 0.6009, + "num_input_tokens_seen": 9078208, + "step": 2885 + }, + { + "epoch": 0.18500736188464248, + "grad_norm": 31.023448944091797, + "learning_rate": 1.8493150684931507e-06, + "loss": 0.442, + "num_input_tokens_seen": 9094720, + "step": 2890 + }, + { + "epoch": 0.18532744382561936, + "grad_norm": 38.88870620727539, + "learning_rate": 1.8525156830111381e-06, + "loss": 0.5871, + "num_input_tokens_seen": 9110336, + "step": 2895 + }, + { + "epoch": 0.18564752576659624, + "grad_norm": 30.54854393005371, + "learning_rate": 1.8557162975291256e-06, + "loss": 0.5919, + "num_input_tokens_seen": 9126912, + "step": 2900 + }, + { + "epoch": 0.18596760770757315, + "grad_norm": 30.578502655029297, + "learning_rate": 1.858916912047113e-06, + "loss": 0.5381, + "num_input_tokens_seen": 9142400, + "step": 2905 + }, + { + "epoch": 0.18628768964855003, + "grad_norm": 63.938838958740234, + "learning_rate": 1.8621175265651005e-06, + "loss": 0.5612, + "num_input_tokens_seen": 9157184, + "step": 2910 + }, + { + "epoch": 0.1866077715895269, + "grad_norm": 25.26899528503418, + "learning_rate": 1.8653181410830882e-06, + "loss": 0.5459, + "num_input_tokens_seen": 9173696, + "step": 2915 + }, + { + "epoch": 0.1869278535305038, + "grad_norm": 49.911930084228516, + "learning_rate": 1.8685187556010757e-06, + "loss": 0.5249, + "num_input_tokens_seen": 9190080, + "step": 2920 + }, + { + "epoch": 0.1872479354714807, + "grad_norm": 60.84322738647461, + "learning_rate": 1.8717193701190632e-06, + "loss": 0.6124, + "num_input_tokens_seen": 9204352, + "step": 2925 + }, + { + "epoch": 0.1875680174124576, + "grad_norm": 54.25505828857422, + "learning_rate": 1.8749199846370506e-06, + "loss": 0.5798, + "num_input_tokens_seen": 9219200, + "step": 2930 + }, + { + "epoch": 0.18788809935343448, + "grad_norm": 33.66474914550781, + "learning_rate": 1.878120599155038e-06, + "loss": 0.4787, + "num_input_tokens_seen": 9233600, + "step": 2935 + }, + { + "epoch": 0.18820818129441136, + "grad_norm": 45.08006286621094, + "learning_rate": 1.8813212136730256e-06, + "loss": 0.4465, + "num_input_tokens_seen": 9249536, + "step": 2940 + }, + { + "epoch": 0.18852826323538827, + "grad_norm": 42.213497161865234, + "learning_rate": 1.8845218281910126e-06, + "loss": 0.6955, + "num_input_tokens_seen": 9264256, + "step": 2945 + }, + { + "epoch": 0.18884834517636515, + "grad_norm": 34.07358932495117, + "learning_rate": 1.8877224427090003e-06, + "loss": 0.4204, + "num_input_tokens_seen": 9279488, + "step": 2950 + }, + { + "epoch": 0.18916842711734205, + "grad_norm": 31.774642944335938, + "learning_rate": 1.8909230572269877e-06, + "loss": 0.5249, + "num_input_tokens_seen": 9294336, + "step": 2955 + }, + { + "epoch": 0.18948850905831893, + "grad_norm": 48.98189926147461, + "learning_rate": 1.8941236717449752e-06, + "loss": 0.5779, + "num_input_tokens_seen": 9309376, + "step": 2960 + }, + { + "epoch": 0.1898085909992958, + "grad_norm": 25.99772834777832, + "learning_rate": 1.8973242862629627e-06, + "loss": 0.4775, + "num_input_tokens_seen": 9324224, + "step": 2965 + }, + { + "epoch": 0.19012867294027272, + "grad_norm": 31.74199867248535, + "learning_rate": 1.9005249007809502e-06, + "loss": 0.4359, + "num_input_tokens_seen": 9339136, + "step": 2970 + }, + { + "epoch": 0.1904487548812496, + "grad_norm": 36.28694534301758, + "learning_rate": 1.9037255152989376e-06, + "loss": 0.4773, + "num_input_tokens_seen": 9354688, + "step": 2975 + }, + { + "epoch": 0.19076883682222648, + "grad_norm": 35.98822021484375, + "learning_rate": 1.906926129816925e-06, + "loss": 0.5187, + "num_input_tokens_seen": 9369664, + "step": 2980 + }, + { + "epoch": 0.19108891876320339, + "grad_norm": 45.65713882446289, + "learning_rate": 1.9101267443349123e-06, + "loss": 0.6318, + "num_input_tokens_seen": 9384768, + "step": 2985 + }, + { + "epoch": 0.19140900070418027, + "grad_norm": 37.603233337402344, + "learning_rate": 1.9133273588529e-06, + "loss": 0.4131, + "num_input_tokens_seen": 9400320, + "step": 2990 + }, + { + "epoch": 0.19172908264515717, + "grad_norm": 43.58580780029297, + "learning_rate": 1.9165279733708873e-06, + "loss": 0.4738, + "num_input_tokens_seen": 9415872, + "step": 2995 + }, + { + "epoch": 0.19204916458613405, + "grad_norm": 43.10757827758789, + "learning_rate": 1.9197285878888747e-06, + "loss": 0.5976, + "num_input_tokens_seen": 9432704, + "step": 3000 + }, + { + "epoch": 0.19236924652711093, + "grad_norm": 50.389366149902344, + "learning_rate": 1.922929202406862e-06, + "loss": 0.493, + "num_input_tokens_seen": 9448192, + "step": 3005 + }, + { + "epoch": 0.19268932846808784, + "grad_norm": 37.12274169921875, + "learning_rate": 1.9261298169248497e-06, + "loss": 0.4355, + "num_input_tokens_seen": 9464576, + "step": 3010 + }, + { + "epoch": 0.19300941040906472, + "grad_norm": 40.23035430908203, + "learning_rate": 1.929330431442837e-06, + "loss": 0.5541, + "num_input_tokens_seen": 9479936, + "step": 3015 + }, + { + "epoch": 0.1933294923500416, + "grad_norm": 38.04003143310547, + "learning_rate": 1.9325310459608246e-06, + "loss": 0.584, + "num_input_tokens_seen": 9497280, + "step": 3020 + }, + { + "epoch": 0.1936495742910185, + "grad_norm": 50.77696990966797, + "learning_rate": 1.935731660478812e-06, + "loss": 0.4925, + "num_input_tokens_seen": 9514496, + "step": 3025 + }, + { + "epoch": 0.19396965623199539, + "grad_norm": 34.73927688598633, + "learning_rate": 1.9389322749967995e-06, + "loss": 0.4291, + "num_input_tokens_seen": 9529536, + "step": 3030 + }, + { + "epoch": 0.1942897381729723, + "grad_norm": 62.97267532348633, + "learning_rate": 1.942132889514787e-06, + "loss": 0.5895, + "num_input_tokens_seen": 9545472, + "step": 3035 + }, + { + "epoch": 0.19460982011394917, + "grad_norm": 41.01129150390625, + "learning_rate": 1.9453335040327745e-06, + "loss": 0.6192, + "num_input_tokens_seen": 9560960, + "step": 3040 + }, + { + "epoch": 0.19492990205492605, + "grad_norm": 35.43782043457031, + "learning_rate": 1.948534118550762e-06, + "loss": 0.5843, + "num_input_tokens_seen": 9576320, + "step": 3045 + }, + { + "epoch": 0.19524998399590296, + "grad_norm": 47.53586196899414, + "learning_rate": 1.9517347330687494e-06, + "loss": 0.5828, + "num_input_tokens_seen": 9592192, + "step": 3050 + }, + { + "epoch": 0.19557006593687984, + "grad_norm": 58.23247146606445, + "learning_rate": 1.954935347586737e-06, + "loss": 0.6384, + "num_input_tokens_seen": 9608128, + "step": 3055 + }, + { + "epoch": 0.19589014787785672, + "grad_norm": 29.407941818237305, + "learning_rate": 1.9581359621047243e-06, + "loss": 0.5492, + "num_input_tokens_seen": 9623872, + "step": 3060 + }, + { + "epoch": 0.19621022981883363, + "grad_norm": 32.81422424316406, + "learning_rate": 1.9613365766227114e-06, + "loss": 0.5739, + "num_input_tokens_seen": 9639488, + "step": 3065 + }, + { + "epoch": 0.1965303117598105, + "grad_norm": 32.38412857055664, + "learning_rate": 1.9645371911406993e-06, + "loss": 0.514, + "num_input_tokens_seen": 9655680, + "step": 3070 + }, + { + "epoch": 0.1968503937007874, + "grad_norm": 36.62615966796875, + "learning_rate": 1.9677378056586867e-06, + "loss": 0.5939, + "num_input_tokens_seen": 9670400, + "step": 3075 + }, + { + "epoch": 0.1971704756417643, + "grad_norm": 20.26902198791504, + "learning_rate": 1.9709384201766742e-06, + "loss": 0.4051, + "num_input_tokens_seen": 9685504, + "step": 3080 + }, + { + "epoch": 0.19749055758274117, + "grad_norm": 52.93482208251953, + "learning_rate": 1.9741390346946617e-06, + "loss": 0.6816, + "num_input_tokens_seen": 9701952, + "step": 3085 + }, + { + "epoch": 0.19781063952371808, + "grad_norm": 26.73394012451172, + "learning_rate": 1.977339649212649e-06, + "loss": 0.4583, + "num_input_tokens_seen": 9716928, + "step": 3090 + }, + { + "epoch": 0.19813072146469496, + "grad_norm": 64.89881896972656, + "learning_rate": 1.9805402637306366e-06, + "loss": 0.6711, + "num_input_tokens_seen": 9732096, + "step": 3095 + }, + { + "epoch": 0.19845080340567184, + "grad_norm": 33.69021224975586, + "learning_rate": 1.983740878248624e-06, + "loss": 0.5532, + "num_input_tokens_seen": 9747648, + "step": 3100 + }, + { + "epoch": 0.19877088534664875, + "grad_norm": 18.838212966918945, + "learning_rate": 1.9869414927666116e-06, + "loss": 0.4788, + "num_input_tokens_seen": 9763072, + "step": 3105 + }, + { + "epoch": 0.19909096728762563, + "grad_norm": 30.05465316772461, + "learning_rate": 1.990142107284599e-06, + "loss": 0.4999, + "num_input_tokens_seen": 9778176, + "step": 3110 + }, + { + "epoch": 0.19941104922860253, + "grad_norm": 40.499576568603516, + "learning_rate": 1.9933427218025865e-06, + "loss": 0.5723, + "num_input_tokens_seen": 9793536, + "step": 3115 + }, + { + "epoch": 0.1997311311695794, + "grad_norm": 25.934938430786133, + "learning_rate": 1.996543336320574e-06, + "loss": 0.51, + "num_input_tokens_seen": 9808192, + "step": 3120 + }, + { + "epoch": 0.2000512131105563, + "grad_norm": 38.55900955200195, + "learning_rate": 1.999743950838561e-06, + "loss": 0.615, + "num_input_tokens_seen": 9823232, + "step": 3125 + }, + { + "epoch": 0.2003712950515332, + "grad_norm": 38.68204116821289, + "learning_rate": 2.0029445653565485e-06, + "loss": 0.6188, + "num_input_tokens_seen": 9839488, + "step": 3130 + }, + { + "epoch": 0.20069137699251008, + "grad_norm": 23.846555709838867, + "learning_rate": 2.006145179874536e-06, + "loss": 0.4495, + "num_input_tokens_seen": 9856192, + "step": 3135 + }, + { + "epoch": 0.201011458933487, + "grad_norm": 52.554203033447266, + "learning_rate": 2.0093457943925234e-06, + "loss": 0.5418, + "num_input_tokens_seen": 9871552, + "step": 3140 + }, + { + "epoch": 0.20133154087446387, + "grad_norm": 41.95298385620117, + "learning_rate": 2.012546408910511e-06, + "loss": 0.5226, + "num_input_tokens_seen": 9886016, + "step": 3145 + }, + { + "epoch": 0.20165162281544075, + "grad_norm": 43.12849426269531, + "learning_rate": 2.0157470234284983e-06, + "loss": 0.6394, + "num_input_tokens_seen": 9903552, + "step": 3150 + }, + { + "epoch": 0.20197170475641765, + "grad_norm": 45.6246452331543, + "learning_rate": 2.018947637946486e-06, + "loss": 0.3662, + "num_input_tokens_seen": 9920320, + "step": 3155 + }, + { + "epoch": 0.20229178669739453, + "grad_norm": 38.445648193359375, + "learning_rate": 2.0221482524644733e-06, + "loss": 0.6472, + "num_input_tokens_seen": 9935936, + "step": 3160 + }, + { + "epoch": 0.2026118686383714, + "grad_norm": 31.72264862060547, + "learning_rate": 2.0253488669824607e-06, + "loss": 0.5605, + "num_input_tokens_seen": 9950912, + "step": 3165 + }, + { + "epoch": 0.20293195057934832, + "grad_norm": 36.37489700317383, + "learning_rate": 2.028549481500448e-06, + "loss": 0.5868, + "num_input_tokens_seen": 9966080, + "step": 3170 + }, + { + "epoch": 0.2032520325203252, + "grad_norm": 33.666255950927734, + "learning_rate": 2.0317500960184357e-06, + "loss": 0.4591, + "num_input_tokens_seen": 9982080, + "step": 3175 + }, + { + "epoch": 0.2035721144613021, + "grad_norm": 36.12996292114258, + "learning_rate": 2.034950710536423e-06, + "loss": 0.5424, + "num_input_tokens_seen": 9996544, + "step": 3180 + }, + { + "epoch": 0.20389219640227899, + "grad_norm": 32.287879943847656, + "learning_rate": 2.0381513250544106e-06, + "loss": 0.546, + "num_input_tokens_seen": 10011776, + "step": 3185 + }, + { + "epoch": 0.20421227834325587, + "grad_norm": 29.309194564819336, + "learning_rate": 2.041351939572398e-06, + "loss": 0.4516, + "num_input_tokens_seen": 10027264, + "step": 3190 + }, + { + "epoch": 0.20453236028423277, + "grad_norm": 21.963972091674805, + "learning_rate": 2.0445525540903855e-06, + "loss": 0.4452, + "num_input_tokens_seen": 10042432, + "step": 3195 + }, + { + "epoch": 0.20485244222520965, + "grad_norm": 28.673465728759766, + "learning_rate": 2.047753168608373e-06, + "loss": 0.4167, + "num_input_tokens_seen": 10057792, + "step": 3200 + }, + { + "epoch": 0.20517252416618653, + "grad_norm": 31.394569396972656, + "learning_rate": 2.0509537831263605e-06, + "loss": 0.4757, + "num_input_tokens_seen": 10074752, + "step": 3205 + }, + { + "epoch": 0.20549260610716344, + "grad_norm": 66.8029556274414, + "learning_rate": 2.054154397644348e-06, + "loss": 0.6621, + "num_input_tokens_seen": 10090752, + "step": 3210 + }, + { + "epoch": 0.20581268804814032, + "grad_norm": 53.44734573364258, + "learning_rate": 2.0573550121623354e-06, + "loss": 0.5276, + "num_input_tokens_seen": 10106240, + "step": 3215 + }, + { + "epoch": 0.20613276998911723, + "grad_norm": 33.38349914550781, + "learning_rate": 2.060555626680323e-06, + "loss": 0.4633, + "num_input_tokens_seen": 10121856, + "step": 3220 + }, + { + "epoch": 0.2064528519300941, + "grad_norm": 67.10578155517578, + "learning_rate": 2.0637562411983103e-06, + "loss": 0.4951, + "num_input_tokens_seen": 10137408, + "step": 3225 + }, + { + "epoch": 0.20677293387107099, + "grad_norm": 31.50621223449707, + "learning_rate": 2.066956855716298e-06, + "loss": 0.485, + "num_input_tokens_seen": 10152640, + "step": 3230 + }, + { + "epoch": 0.2070930158120479, + "grad_norm": 38.36640548706055, + "learning_rate": 2.0701574702342853e-06, + "loss": 0.4234, + "num_input_tokens_seen": 10167936, + "step": 3235 + }, + { + "epoch": 0.20741309775302477, + "grad_norm": 33.01862335205078, + "learning_rate": 2.0733580847522727e-06, + "loss": 0.463, + "num_input_tokens_seen": 10183360, + "step": 3240 + }, + { + "epoch": 0.20773317969400165, + "grad_norm": 35.146644592285156, + "learning_rate": 2.0765586992702598e-06, + "loss": 0.5593, + "num_input_tokens_seen": 10198208, + "step": 3245 + }, + { + "epoch": 0.20805326163497856, + "grad_norm": 28.14240264892578, + "learning_rate": 2.0797593137882473e-06, + "loss": 0.5326, + "num_input_tokens_seen": 10213568, + "step": 3250 + }, + { + "epoch": 0.20837334357595544, + "grad_norm": 38.94904708862305, + "learning_rate": 2.0829599283062347e-06, + "loss": 0.4922, + "num_input_tokens_seen": 10230080, + "step": 3255 + }, + { + "epoch": 0.20869342551693235, + "grad_norm": 120.84587860107422, + "learning_rate": 2.086160542824222e-06, + "loss": 0.5181, + "num_input_tokens_seen": 10246912, + "step": 3260 + }, + { + "epoch": 0.20901350745790923, + "grad_norm": 35.72627639770508, + "learning_rate": 2.0893611573422097e-06, + "loss": 0.538, + "num_input_tokens_seen": 10262464, + "step": 3265 + }, + { + "epoch": 0.2093335893988861, + "grad_norm": 38.55387878417969, + "learning_rate": 2.092561771860197e-06, + "loss": 0.6577, + "num_input_tokens_seen": 10278016, + "step": 3270 + }, + { + "epoch": 0.209653671339863, + "grad_norm": 51.4505615234375, + "learning_rate": 2.0957623863781846e-06, + "loss": 0.5375, + "num_input_tokens_seen": 10293824, + "step": 3275 + }, + { + "epoch": 0.2099737532808399, + "grad_norm": 43.18518829345703, + "learning_rate": 2.0989630008961725e-06, + "loss": 0.5761, + "num_input_tokens_seen": 10309568, + "step": 3280 + }, + { + "epoch": 0.21029383522181677, + "grad_norm": 35.21234130859375, + "learning_rate": 2.10216361541416e-06, + "loss": 0.439, + "num_input_tokens_seen": 10325696, + "step": 3285 + }, + { + "epoch": 0.21061391716279368, + "grad_norm": 38.29065704345703, + "learning_rate": 2.1053642299321474e-06, + "loss": 0.5091, + "num_input_tokens_seen": 10342016, + "step": 3290 + }, + { + "epoch": 0.21093399910377056, + "grad_norm": 32.59242630004883, + "learning_rate": 2.108564844450135e-06, + "loss": 0.4905, + "num_input_tokens_seen": 10357632, + "step": 3295 + }, + { + "epoch": 0.21125408104474747, + "grad_norm": 52.8670539855957, + "learning_rate": 2.1117654589681223e-06, + "loss": 0.5472, + "num_input_tokens_seen": 10373056, + "step": 3300 + }, + { + "epoch": 0.21157416298572435, + "grad_norm": 69.28680419921875, + "learning_rate": 2.1149660734861094e-06, + "loss": 0.7056, + "num_input_tokens_seen": 10389248, + "step": 3305 + }, + { + "epoch": 0.21189424492670123, + "grad_norm": 30.316104888916016, + "learning_rate": 2.118166688004097e-06, + "loss": 0.4812, + "num_input_tokens_seen": 10404864, + "step": 3310 + }, + { + "epoch": 0.21221432686767813, + "grad_norm": 17.38062286376953, + "learning_rate": 2.1213673025220843e-06, + "loss": 0.4748, + "num_input_tokens_seen": 10420416, + "step": 3315 + }, + { + "epoch": 0.212534408808655, + "grad_norm": 34.31101989746094, + "learning_rate": 2.124567917040072e-06, + "loss": 0.4937, + "num_input_tokens_seen": 10435456, + "step": 3320 + }, + { + "epoch": 0.21285449074963192, + "grad_norm": 33.25156021118164, + "learning_rate": 2.1277685315580593e-06, + "loss": 0.5216, + "num_input_tokens_seen": 10451584, + "step": 3325 + }, + { + "epoch": 0.2131745726906088, + "grad_norm": 36.52932357788086, + "learning_rate": 2.1309691460760467e-06, + "loss": 0.3753, + "num_input_tokens_seen": 10467584, + "step": 3330 + }, + { + "epoch": 0.21349465463158568, + "grad_norm": 39.36795425415039, + "learning_rate": 2.134169760594034e-06, + "loss": 0.5046, + "num_input_tokens_seen": 10483264, + "step": 3335 + }, + { + "epoch": 0.21381473657256259, + "grad_norm": 34.229454040527344, + "learning_rate": 2.1373703751120217e-06, + "loss": 0.5896, + "num_input_tokens_seen": 10498560, + "step": 3340 + }, + { + "epoch": 0.21413481851353947, + "grad_norm": 41.1175537109375, + "learning_rate": 2.140570989630009e-06, + "loss": 0.4651, + "num_input_tokens_seen": 10514176, + "step": 3345 + }, + { + "epoch": 0.21445490045451635, + "grad_norm": 31.22627067565918, + "learning_rate": 2.1437716041479966e-06, + "loss": 0.4636, + "num_input_tokens_seen": 10529792, + "step": 3350 + }, + { + "epoch": 0.21477498239549325, + "grad_norm": 43.74403762817383, + "learning_rate": 2.146972218665984e-06, + "loss": 0.5346, + "num_input_tokens_seen": 10545856, + "step": 3355 + }, + { + "epoch": 0.21509506433647013, + "grad_norm": 49.46287536621094, + "learning_rate": 2.1501728331839715e-06, + "loss": 0.4138, + "num_input_tokens_seen": 10562368, + "step": 3360 + }, + { + "epoch": 0.21541514627744704, + "grad_norm": 56.734031677246094, + "learning_rate": 2.153373447701959e-06, + "loss": 0.6783, + "num_input_tokens_seen": 10577280, + "step": 3365 + }, + { + "epoch": 0.21573522821842392, + "grad_norm": 32.997650146484375, + "learning_rate": 2.1565740622199465e-06, + "loss": 0.537, + "num_input_tokens_seen": 10592384, + "step": 3370 + }, + { + "epoch": 0.2160553101594008, + "grad_norm": 24.644643783569336, + "learning_rate": 2.159774676737934e-06, + "loss": 0.4837, + "num_input_tokens_seen": 10608704, + "step": 3375 + }, + { + "epoch": 0.2163753921003777, + "grad_norm": 46.55210876464844, + "learning_rate": 2.1629752912559214e-06, + "loss": 0.4391, + "num_input_tokens_seen": 10625280, + "step": 3380 + }, + { + "epoch": 0.21669547404135459, + "grad_norm": 26.87586212158203, + "learning_rate": 2.166175905773909e-06, + "loss": 0.5326, + "num_input_tokens_seen": 10641152, + "step": 3385 + }, + { + "epoch": 0.21701555598233147, + "grad_norm": 36.185699462890625, + "learning_rate": 2.1693765202918963e-06, + "loss": 0.4427, + "num_input_tokens_seen": 10657472, + "step": 3390 + }, + { + "epoch": 0.21733563792330837, + "grad_norm": 28.153026580810547, + "learning_rate": 2.172577134809884e-06, + "loss": 0.529, + "num_input_tokens_seen": 10673600, + "step": 3395 + }, + { + "epoch": 0.21765571986428525, + "grad_norm": 36.479408264160156, + "learning_rate": 2.1757777493278713e-06, + "loss": 0.54, + "num_input_tokens_seen": 10689344, + "step": 3400 + }, + { + "epoch": 0.21797580180526216, + "grad_norm": 32.746978759765625, + "learning_rate": 2.1789783638458587e-06, + "loss": 0.5031, + "num_input_tokens_seen": 10704384, + "step": 3405 + }, + { + "epoch": 0.21829588374623904, + "grad_norm": 38.629215240478516, + "learning_rate": 2.182178978363846e-06, + "loss": 0.4983, + "num_input_tokens_seen": 10720512, + "step": 3410 + }, + { + "epoch": 0.21861596568721592, + "grad_norm": 39.2166748046875, + "learning_rate": 2.1853795928818337e-06, + "loss": 0.5506, + "num_input_tokens_seen": 10735744, + "step": 3415 + }, + { + "epoch": 0.21893604762819283, + "grad_norm": 28.448915481567383, + "learning_rate": 2.188580207399821e-06, + "loss": 0.5132, + "num_input_tokens_seen": 10750848, + "step": 3420 + }, + { + "epoch": 0.2192561295691697, + "grad_norm": 54.67979431152344, + "learning_rate": 2.191780821917808e-06, + "loss": 0.5671, + "num_input_tokens_seen": 10765632, + "step": 3425 + }, + { + "epoch": 0.21957621151014659, + "grad_norm": 52.90773391723633, + "learning_rate": 2.1949814364357956e-06, + "loss": 0.451, + "num_input_tokens_seen": 10780928, + "step": 3430 + }, + { + "epoch": 0.2198962934511235, + "grad_norm": 27.78440284729004, + "learning_rate": 2.198182050953783e-06, + "loss": 0.4231, + "num_input_tokens_seen": 10796864, + "step": 3435 + }, + { + "epoch": 0.22021637539210037, + "grad_norm": 36.3461799621582, + "learning_rate": 2.2013826654717706e-06, + "loss": 0.4668, + "num_input_tokens_seen": 10812672, + "step": 3440 + }, + { + "epoch": 0.22053645733307728, + "grad_norm": 39.273197174072266, + "learning_rate": 2.204583279989758e-06, + "loss": 0.4731, + "num_input_tokens_seen": 10828544, + "step": 3445 + }, + { + "epoch": 0.22085653927405416, + "grad_norm": 25.89637565612793, + "learning_rate": 2.2077838945077455e-06, + "loss": 0.5998, + "num_input_tokens_seen": 10844288, + "step": 3450 + }, + { + "epoch": 0.22117662121503104, + "grad_norm": 46.364906311035156, + "learning_rate": 2.210984509025733e-06, + "loss": 0.5434, + "num_input_tokens_seen": 10860160, + "step": 3455 + }, + { + "epoch": 0.22149670315600795, + "grad_norm": 38.275840759277344, + "learning_rate": 2.2141851235437204e-06, + "loss": 0.6148, + "num_input_tokens_seen": 10875968, + "step": 3460 + }, + { + "epoch": 0.22181678509698483, + "grad_norm": 34.2492561340332, + "learning_rate": 2.217385738061708e-06, + "loss": 0.3782, + "num_input_tokens_seen": 10892096, + "step": 3465 + }, + { + "epoch": 0.2221368670379617, + "grad_norm": 48.338104248046875, + "learning_rate": 2.2205863525796954e-06, + "loss": 0.441, + "num_input_tokens_seen": 10908544, + "step": 3470 + }, + { + "epoch": 0.2224569489789386, + "grad_norm": 59.66163635253906, + "learning_rate": 2.223786967097683e-06, + "loss": 0.515, + "num_input_tokens_seen": 10924544, + "step": 3475 + }, + { + "epoch": 0.2227770309199155, + "grad_norm": 65.29768371582031, + "learning_rate": 2.2269875816156703e-06, + "loss": 0.4791, + "num_input_tokens_seen": 10941888, + "step": 3480 + }, + { + "epoch": 0.2230971128608924, + "grad_norm": 22.127185821533203, + "learning_rate": 2.2301881961336578e-06, + "loss": 0.4866, + "num_input_tokens_seen": 10958592, + "step": 3485 + }, + { + "epoch": 0.22341719480186928, + "grad_norm": 65.63289642333984, + "learning_rate": 2.2333888106516453e-06, + "loss": 0.664, + "num_input_tokens_seen": 10974208, + "step": 3490 + }, + { + "epoch": 0.22373727674284616, + "grad_norm": 28.329952239990234, + "learning_rate": 2.2365894251696327e-06, + "loss": 0.4307, + "num_input_tokens_seen": 10988928, + "step": 3495 + }, + { + "epoch": 0.22405735868382307, + "grad_norm": 39.40082550048828, + "learning_rate": 2.23979003968762e-06, + "loss": 0.4749, + "num_input_tokens_seen": 11005952, + "step": 3500 + }, + { + "epoch": 0.22437744062479995, + "grad_norm": 51.59908676147461, + "learning_rate": 2.2429906542056077e-06, + "loss": 0.4606, + "num_input_tokens_seen": 11020608, + "step": 3505 + }, + { + "epoch": 0.22469752256577685, + "grad_norm": 42.2195930480957, + "learning_rate": 2.246191268723595e-06, + "loss": 0.4978, + "num_input_tokens_seen": 11036736, + "step": 3510 + }, + { + "epoch": 0.22501760450675373, + "grad_norm": 36.639251708984375, + "learning_rate": 2.2493918832415826e-06, + "loss": 0.3839, + "num_input_tokens_seen": 11052480, + "step": 3515 + }, + { + "epoch": 0.2253376864477306, + "grad_norm": 46.27688217163086, + "learning_rate": 2.25259249775957e-06, + "loss": 0.5269, + "num_input_tokens_seen": 11068416, + "step": 3520 + }, + { + "epoch": 0.22565776838870752, + "grad_norm": 41.08591842651367, + "learning_rate": 2.2557931122775575e-06, + "loss": 0.464, + "num_input_tokens_seen": 11086400, + "step": 3525 + }, + { + "epoch": 0.2259778503296844, + "grad_norm": 78.64652252197266, + "learning_rate": 2.258993726795545e-06, + "loss": 0.574, + "num_input_tokens_seen": 11102848, + "step": 3530 + }, + { + "epoch": 0.22629793227066128, + "grad_norm": 41.922210693359375, + "learning_rate": 2.2621943413135325e-06, + "loss": 0.5355, + "num_input_tokens_seen": 11118080, + "step": 3535 + }, + { + "epoch": 0.22661801421163819, + "grad_norm": 26.902446746826172, + "learning_rate": 2.26539495583152e-06, + "loss": 0.5444, + "num_input_tokens_seen": 11133120, + "step": 3540 + }, + { + "epoch": 0.22693809615261507, + "grad_norm": 30.191570281982422, + "learning_rate": 2.268595570349507e-06, + "loss": 0.4621, + "num_input_tokens_seen": 11150144, + "step": 3545 + }, + { + "epoch": 0.22725817809359197, + "grad_norm": 51.89157485961914, + "learning_rate": 2.271796184867495e-06, + "loss": 0.5036, + "num_input_tokens_seen": 11165760, + "step": 3550 + }, + { + "epoch": 0.22757826003456885, + "grad_norm": 54.39223861694336, + "learning_rate": 2.2749967993854823e-06, + "loss": 0.4894, + "num_input_tokens_seen": 11180096, + "step": 3555 + }, + { + "epoch": 0.22789834197554573, + "grad_norm": 20.2183837890625, + "learning_rate": 2.27819741390347e-06, + "loss": 0.396, + "num_input_tokens_seen": 11195584, + "step": 3560 + }, + { + "epoch": 0.22821842391652264, + "grad_norm": 60.52756118774414, + "learning_rate": 2.2813980284214573e-06, + "loss": 0.4426, + "num_input_tokens_seen": 11210944, + "step": 3565 + }, + { + "epoch": 0.22853850585749952, + "grad_norm": 27.151121139526367, + "learning_rate": 2.2845986429394447e-06, + "loss": 0.428, + "num_input_tokens_seen": 11227392, + "step": 3570 + }, + { + "epoch": 0.2288585877984764, + "grad_norm": 36.064151763916016, + "learning_rate": 2.287799257457432e-06, + "loss": 0.6618, + "num_input_tokens_seen": 11244416, + "step": 3575 + }, + { + "epoch": 0.2291786697394533, + "grad_norm": 32.479736328125, + "learning_rate": 2.2909998719754197e-06, + "loss": 0.4212, + "num_input_tokens_seen": 11260672, + "step": 3580 + }, + { + "epoch": 0.22949875168043019, + "grad_norm": 61.9122314453125, + "learning_rate": 2.294200486493407e-06, + "loss": 0.5228, + "num_input_tokens_seen": 11277504, + "step": 3585 + }, + { + "epoch": 0.2298188336214071, + "grad_norm": 99.93607330322266, + "learning_rate": 2.2974011010113946e-06, + "loss": 0.5973, + "num_input_tokens_seen": 11293184, + "step": 3590 + }, + { + "epoch": 0.23013891556238397, + "grad_norm": 30.172042846679688, + "learning_rate": 2.300601715529382e-06, + "loss": 0.3263, + "num_input_tokens_seen": 11308480, + "step": 3595 + }, + { + "epoch": 0.23045899750336085, + "grad_norm": 33.70685577392578, + "learning_rate": 2.3038023300473695e-06, + "loss": 0.4499, + "num_input_tokens_seen": 11323840, + "step": 3600 + }, + { + "epoch": 0.23077907944433776, + "grad_norm": 62.67223358154297, + "learning_rate": 2.3070029445653566e-06, + "loss": 0.6712, + "num_input_tokens_seen": 11339456, + "step": 3605 + }, + { + "epoch": 0.23109916138531464, + "grad_norm": 53.916053771972656, + "learning_rate": 2.310203559083344e-06, + "loss": 0.6056, + "num_input_tokens_seen": 11354880, + "step": 3610 + }, + { + "epoch": 0.23141924332629152, + "grad_norm": 33.28792190551758, + "learning_rate": 2.3134041736013315e-06, + "loss": 0.5216, + "num_input_tokens_seen": 11369984, + "step": 3615 + }, + { + "epoch": 0.23173932526726843, + "grad_norm": 55.69629669189453, + "learning_rate": 2.316604788119319e-06, + "loss": 0.4563, + "num_input_tokens_seen": 11384640, + "step": 3620 + }, + { + "epoch": 0.2320594072082453, + "grad_norm": 46.9283447265625, + "learning_rate": 2.3198054026373064e-06, + "loss": 0.4994, + "num_input_tokens_seen": 11401152, + "step": 3625 + }, + { + "epoch": 0.2323794891492222, + "grad_norm": 39.31660842895508, + "learning_rate": 2.323006017155294e-06, + "loss": 0.4981, + "num_input_tokens_seen": 11416896, + "step": 3630 + }, + { + "epoch": 0.2326995710901991, + "grad_norm": 30.165401458740234, + "learning_rate": 2.3262066316732814e-06, + "loss": 0.5016, + "num_input_tokens_seen": 11432832, + "step": 3635 + }, + { + "epoch": 0.23301965303117597, + "grad_norm": 31.685001373291016, + "learning_rate": 2.329407246191269e-06, + "loss": 0.5421, + "num_input_tokens_seen": 11448320, + "step": 3640 + }, + { + "epoch": 0.23333973497215288, + "grad_norm": 30.91363525390625, + "learning_rate": 2.3326078607092563e-06, + "loss": 0.6042, + "num_input_tokens_seen": 11463488, + "step": 3645 + }, + { + "epoch": 0.23365981691312976, + "grad_norm": 28.006206512451172, + "learning_rate": 2.3358084752272438e-06, + "loss": 0.4562, + "num_input_tokens_seen": 11478592, + "step": 3650 + }, + { + "epoch": 0.23397989885410664, + "grad_norm": 33.18257522583008, + "learning_rate": 2.3390090897452312e-06, + "loss": 0.4173, + "num_input_tokens_seen": 11494720, + "step": 3655 + }, + { + "epoch": 0.23429998079508355, + "grad_norm": 61.95631408691406, + "learning_rate": 2.3422097042632187e-06, + "loss": 0.5285, + "num_input_tokens_seen": 11511232, + "step": 3660 + }, + { + "epoch": 0.23462006273606043, + "grad_norm": 33.45263671875, + "learning_rate": 2.345410318781206e-06, + "loss": 0.5821, + "num_input_tokens_seen": 11527040, + "step": 3665 + }, + { + "epoch": 0.23494014467703733, + "grad_norm": 43.51099395751953, + "learning_rate": 2.3486109332991936e-06, + "loss": 0.5213, + "num_input_tokens_seen": 11542528, + "step": 3670 + }, + { + "epoch": 0.2352602266180142, + "grad_norm": 40.02987289428711, + "learning_rate": 2.351811547817181e-06, + "loss": 0.4849, + "num_input_tokens_seen": 11557696, + "step": 3675 + }, + { + "epoch": 0.2355803085589911, + "grad_norm": 45.00043487548828, + "learning_rate": 2.3550121623351686e-06, + "loss": 0.4685, + "num_input_tokens_seen": 11574848, + "step": 3680 + }, + { + "epoch": 0.235900390499968, + "grad_norm": 29.15911102294922, + "learning_rate": 2.358212776853156e-06, + "loss": 0.531, + "num_input_tokens_seen": 11589696, + "step": 3685 + }, + { + "epoch": 0.23622047244094488, + "grad_norm": 24.568700790405273, + "learning_rate": 2.3614133913711435e-06, + "loss": 0.4164, + "num_input_tokens_seen": 11606848, + "step": 3690 + }, + { + "epoch": 0.23654055438192176, + "grad_norm": 32.191829681396484, + "learning_rate": 2.364614005889131e-06, + "loss": 0.7326, + "num_input_tokens_seen": 11623744, + "step": 3695 + }, + { + "epoch": 0.23686063632289867, + "grad_norm": 31.209732055664062, + "learning_rate": 2.3678146204071184e-06, + "loss": 0.5326, + "num_input_tokens_seen": 11640256, + "step": 3700 + }, + { + "epoch": 0.23718071826387555, + "grad_norm": 32.12062072753906, + "learning_rate": 2.371015234925106e-06, + "loss": 0.4121, + "num_input_tokens_seen": 11655744, + "step": 3705 + }, + { + "epoch": 0.23750080020485245, + "grad_norm": 16.075319290161133, + "learning_rate": 2.3742158494430934e-06, + "loss": 0.3082, + "num_input_tokens_seen": 11671104, + "step": 3710 + }, + { + "epoch": 0.23782088214582933, + "grad_norm": 39.50933837890625, + "learning_rate": 2.377416463961081e-06, + "loss": 0.4167, + "num_input_tokens_seen": 11686848, + "step": 3715 + }, + { + "epoch": 0.2381409640868062, + "grad_norm": 37.043487548828125, + "learning_rate": 2.3806170784790683e-06, + "loss": 0.5506, + "num_input_tokens_seen": 11701952, + "step": 3720 + }, + { + "epoch": 0.23846104602778312, + "grad_norm": 52.412208557128906, + "learning_rate": 2.3838176929970554e-06, + "loss": 0.5445, + "num_input_tokens_seen": 11718592, + "step": 3725 + }, + { + "epoch": 0.23878112796876, + "grad_norm": 25.490497589111328, + "learning_rate": 2.387018307515043e-06, + "loss": 0.5111, + "num_input_tokens_seen": 11734208, + "step": 3730 + }, + { + "epoch": 0.2391012099097369, + "grad_norm": 15.01897144317627, + "learning_rate": 2.3902189220330303e-06, + "loss": 0.4335, + "num_input_tokens_seen": 11748992, + "step": 3735 + }, + { + "epoch": 0.23942129185071379, + "grad_norm": 35.6739616394043, + "learning_rate": 2.3934195365510178e-06, + "loss": 0.4791, + "num_input_tokens_seen": 11764608, + "step": 3740 + }, + { + "epoch": 0.23974137379169067, + "grad_norm": 48.30351257324219, + "learning_rate": 2.3966201510690052e-06, + "loss": 0.5135, + "num_input_tokens_seen": 11780800, + "step": 3745 + }, + { + "epoch": 0.24006145573266757, + "grad_norm": 29.354196548461914, + "learning_rate": 2.3998207655869927e-06, + "loss": 0.5004, + "num_input_tokens_seen": 11795776, + "step": 3750 + }, + { + "epoch": 0.24038153767364445, + "grad_norm": 47.85197067260742, + "learning_rate": 2.40302138010498e-06, + "loss": 0.5629, + "num_input_tokens_seen": 11811776, + "step": 3755 + }, + { + "epoch": 0.24070161961462133, + "grad_norm": 30.454509735107422, + "learning_rate": 2.406221994622968e-06, + "loss": 0.5444, + "num_input_tokens_seen": 11828224, + "step": 3760 + }, + { + "epoch": 0.24102170155559824, + "grad_norm": 46.69361877441406, + "learning_rate": 2.4094226091409555e-06, + "loss": 0.4867, + "num_input_tokens_seen": 11843904, + "step": 3765 + }, + { + "epoch": 0.24134178349657512, + "grad_norm": 54.29519271850586, + "learning_rate": 2.412623223658943e-06, + "loss": 0.6049, + "num_input_tokens_seen": 11859392, + "step": 3770 + }, + { + "epoch": 0.24166186543755203, + "grad_norm": 76.86779022216797, + "learning_rate": 2.4158238381769305e-06, + "loss": 0.5397, + "num_input_tokens_seen": 11875968, + "step": 3775 + }, + { + "epoch": 0.2419819473785289, + "grad_norm": 57.70315170288086, + "learning_rate": 2.419024452694918e-06, + "loss": 0.4635, + "num_input_tokens_seen": 11891584, + "step": 3780 + }, + { + "epoch": 0.24230202931950579, + "grad_norm": 30.214435577392578, + "learning_rate": 2.422225067212905e-06, + "loss": 0.4962, + "num_input_tokens_seen": 11906944, + "step": 3785 + }, + { + "epoch": 0.2426221112604827, + "grad_norm": 40.42811965942383, + "learning_rate": 2.4254256817308924e-06, + "loss": 0.5116, + "num_input_tokens_seen": 11922368, + "step": 3790 + }, + { + "epoch": 0.24294219320145957, + "grad_norm": 38.01627731323242, + "learning_rate": 2.42862629624888e-06, + "loss": 0.5673, + "num_input_tokens_seen": 11938432, + "step": 3795 + }, + { + "epoch": 0.24326227514243645, + "grad_norm": 44.343265533447266, + "learning_rate": 2.4318269107668674e-06, + "loss": 0.5269, + "num_input_tokens_seen": 11956608, + "step": 3800 + }, + { + "epoch": 0.24358235708341336, + "grad_norm": 25.23143196105957, + "learning_rate": 2.435027525284855e-06, + "loss": 0.4994, + "num_input_tokens_seen": 11972480, + "step": 3805 + }, + { + "epoch": 0.24390243902439024, + "grad_norm": 24.453022003173828, + "learning_rate": 2.4382281398028423e-06, + "loss": 0.4374, + "num_input_tokens_seen": 11987072, + "step": 3810 + }, + { + "epoch": 0.24422252096536715, + "grad_norm": 29.0211124420166, + "learning_rate": 2.4414287543208298e-06, + "loss": 0.4759, + "num_input_tokens_seen": 12002112, + "step": 3815 + }, + { + "epoch": 0.24454260290634403, + "grad_norm": 22.636981964111328, + "learning_rate": 2.4446293688388172e-06, + "loss": 0.3848, + "num_input_tokens_seen": 12016960, + "step": 3820 + }, + { + "epoch": 0.2448626848473209, + "grad_norm": 59.252681732177734, + "learning_rate": 2.4478299833568047e-06, + "loss": 0.4593, + "num_input_tokens_seen": 12032576, + "step": 3825 + }, + { + "epoch": 0.2451827667882978, + "grad_norm": 28.256120681762695, + "learning_rate": 2.451030597874792e-06, + "loss": 0.3892, + "num_input_tokens_seen": 12048768, + "step": 3830 + }, + { + "epoch": 0.2455028487292747, + "grad_norm": 40.46773147583008, + "learning_rate": 2.4542312123927796e-06, + "loss": 0.4523, + "num_input_tokens_seen": 12065088, + "step": 3835 + }, + { + "epoch": 0.24582293067025157, + "grad_norm": 41.15385055541992, + "learning_rate": 2.457431826910767e-06, + "loss": 0.6496, + "num_input_tokens_seen": 12080704, + "step": 3840 + }, + { + "epoch": 0.24614301261122848, + "grad_norm": 39.17153549194336, + "learning_rate": 2.4606324414287546e-06, + "loss": 0.4849, + "num_input_tokens_seen": 12096704, + "step": 3845 + }, + { + "epoch": 0.24646309455220536, + "grad_norm": 33.91334915161133, + "learning_rate": 2.463833055946742e-06, + "loss": 0.5674, + "num_input_tokens_seen": 12111488, + "step": 3850 + }, + { + "epoch": 0.24678317649318227, + "grad_norm": 28.956090927124023, + "learning_rate": 2.4670336704647295e-06, + "loss": 0.523, + "num_input_tokens_seen": 12128192, + "step": 3855 + }, + { + "epoch": 0.24710325843415915, + "grad_norm": 19.10064125061035, + "learning_rate": 2.470234284982717e-06, + "loss": 0.4971, + "num_input_tokens_seen": 12143808, + "step": 3860 + }, + { + "epoch": 0.24742334037513602, + "grad_norm": 34.386478424072266, + "learning_rate": 2.4734348995007044e-06, + "loss": 0.5054, + "num_input_tokens_seen": 12159232, + "step": 3865 + }, + { + "epoch": 0.24774342231611293, + "grad_norm": 27.23644256591797, + "learning_rate": 2.476635514018692e-06, + "loss": 0.4884, + "num_input_tokens_seen": 12175360, + "step": 3870 + }, + { + "epoch": 0.2480635042570898, + "grad_norm": 15.254672050476074, + "learning_rate": 2.4798361285366794e-06, + "loss": 0.4883, + "num_input_tokens_seen": 12190272, + "step": 3875 + }, + { + "epoch": 0.2483835861980667, + "grad_norm": 41.05552291870117, + "learning_rate": 2.483036743054667e-06, + "loss": 0.5172, + "num_input_tokens_seen": 12208448, + "step": 3880 + }, + { + "epoch": 0.2487036681390436, + "grad_norm": 40.22503662109375, + "learning_rate": 2.4862373575726543e-06, + "loss": 0.7252, + "num_input_tokens_seen": 12225984, + "step": 3885 + }, + { + "epoch": 0.24902375008002048, + "grad_norm": 31.924701690673828, + "learning_rate": 2.4894379720906418e-06, + "loss": 0.5371, + "num_input_tokens_seen": 12241408, + "step": 3890 + }, + { + "epoch": 0.24934383202099739, + "grad_norm": 33.78814697265625, + "learning_rate": 2.4926385866086292e-06, + "loss": 0.4739, + "num_input_tokens_seen": 12256064, + "step": 3895 + }, + { + "epoch": 0.24966391396197427, + "grad_norm": 25.134414672851562, + "learning_rate": 2.4958392011266167e-06, + "loss": 0.4953, + "num_input_tokens_seen": 12272832, + "step": 3900 + }, + { + "epoch": 0.24998399590295114, + "grad_norm": 31.279550552368164, + "learning_rate": 2.4990398156446038e-06, + "loss": 0.6094, + "num_input_tokens_seen": 12289024, + "step": 3905 + }, + { + "epoch": 0.25004801229114654, + "eval_loss": 0.5013530850410461, + "eval_runtime": 51.1333, + "eval_samples_per_second": 271.565, + "eval_steps_per_second": 33.95, + "num_input_tokens_seen": 12292032, + "step": 3906 + }, + { + "epoch": 0.25030407784392805, + "grad_norm": 33.843570709228516, + "learning_rate": 2.5022404301625912e-06, + "loss": 0.4658, + "num_input_tokens_seen": 12304064, + "step": 3910 + }, + { + "epoch": 0.25062415978490493, + "grad_norm": 29.35317039489746, + "learning_rate": 2.5054410446805787e-06, + "loss": 0.5182, + "num_input_tokens_seen": 12320256, + "step": 3915 + }, + { + "epoch": 0.2509442417258818, + "grad_norm": 26.291515350341797, + "learning_rate": 2.508641659198566e-06, + "loss": 0.5085, + "num_input_tokens_seen": 12335360, + "step": 3920 + }, + { + "epoch": 0.2512643236668587, + "grad_norm": 26.62727928161621, + "learning_rate": 2.5118422737165536e-06, + "loss": 0.5237, + "num_input_tokens_seen": 12351488, + "step": 3925 + }, + { + "epoch": 0.2515844056078356, + "grad_norm": 66.48930358886719, + "learning_rate": 2.515042888234541e-06, + "loss": 0.6624, + "num_input_tokens_seen": 12365952, + "step": 3930 + }, + { + "epoch": 0.2519044875488125, + "grad_norm": 46.302879333496094, + "learning_rate": 2.5182435027525286e-06, + "loss": 0.5619, + "num_input_tokens_seen": 12381376, + "step": 3935 + }, + { + "epoch": 0.2522245694897894, + "grad_norm": 45.5851936340332, + "learning_rate": 2.521444117270516e-06, + "loss": 0.4985, + "num_input_tokens_seen": 12396160, + "step": 3940 + }, + { + "epoch": 0.25254465143076626, + "grad_norm": 27.25436782836914, + "learning_rate": 2.5246447317885035e-06, + "loss": 0.5166, + "num_input_tokens_seen": 12410944, + "step": 3945 + }, + { + "epoch": 0.25286473337174314, + "grad_norm": 26.741636276245117, + "learning_rate": 2.527845346306491e-06, + "loss": 0.3987, + "num_input_tokens_seen": 12426304, + "step": 3950 + }, + { + "epoch": 0.2531848153127201, + "grad_norm": 49.20320129394531, + "learning_rate": 2.5310459608244784e-06, + "loss": 0.4964, + "num_input_tokens_seen": 12442432, + "step": 3955 + }, + { + "epoch": 0.25350489725369696, + "grad_norm": 23.122488021850586, + "learning_rate": 2.534246575342466e-06, + "loss": 0.523, + "num_input_tokens_seen": 12458368, + "step": 3960 + }, + { + "epoch": 0.25382497919467384, + "grad_norm": 33.18400192260742, + "learning_rate": 2.5374471898604538e-06, + "loss": 0.4316, + "num_input_tokens_seen": 12474368, + "step": 3965 + }, + { + "epoch": 0.2541450611356507, + "grad_norm": 24.35385513305664, + "learning_rate": 2.5406478043784412e-06, + "loss": 0.5835, + "num_input_tokens_seen": 12490176, + "step": 3970 + }, + { + "epoch": 0.2544651430766276, + "grad_norm": 31.626466751098633, + "learning_rate": 2.5438484188964287e-06, + "loss": 0.6054, + "num_input_tokens_seen": 12507840, + "step": 3975 + }, + { + "epoch": 0.25478522501760453, + "grad_norm": 46.87931823730469, + "learning_rate": 2.547049033414416e-06, + "loss": 0.4305, + "num_input_tokens_seen": 12523520, + "step": 3980 + }, + { + "epoch": 0.2551053069585814, + "grad_norm": 33.321903228759766, + "learning_rate": 2.5502496479324037e-06, + "loss": 0.5948, + "num_input_tokens_seen": 12540736, + "step": 3985 + }, + { + "epoch": 0.2554253888995583, + "grad_norm": 45.20151138305664, + "learning_rate": 2.553450262450391e-06, + "loss": 0.5259, + "num_input_tokens_seen": 12556352, + "step": 3990 + }, + { + "epoch": 0.25574547084053517, + "grad_norm": 23.539976119995117, + "learning_rate": 2.5566508769683777e-06, + "loss": 0.3837, + "num_input_tokens_seen": 12570688, + "step": 3995 + }, + { + "epoch": 0.25606555278151205, + "grad_norm": 19.51964569091797, + "learning_rate": 2.559851491486365e-06, + "loss": 0.4606, + "num_input_tokens_seen": 12585984, + "step": 4000 + }, + { + "epoch": 0.25638563472248893, + "grad_norm": 24.286575317382812, + "learning_rate": 2.5630521060043527e-06, + "loss": 0.5053, + "num_input_tokens_seen": 12601216, + "step": 4005 + }, + { + "epoch": 0.25670571666346587, + "grad_norm": 51.9384880065918, + "learning_rate": 2.56625272052234e-06, + "loss": 0.4721, + "num_input_tokens_seen": 12618624, + "step": 4010 + }, + { + "epoch": 0.25702579860444275, + "grad_norm": 25.84684181213379, + "learning_rate": 2.5694533350403276e-06, + "loss": 0.6074, + "num_input_tokens_seen": 12635840, + "step": 4015 + }, + { + "epoch": 0.2573458805454196, + "grad_norm": 23.537113189697266, + "learning_rate": 2.572653949558315e-06, + "loss": 0.4088, + "num_input_tokens_seen": 12651904, + "step": 4020 + }, + { + "epoch": 0.2576659624863965, + "grad_norm": 32.8919792175293, + "learning_rate": 2.5758545640763025e-06, + "loss": 0.599, + "num_input_tokens_seen": 12666944, + "step": 4025 + }, + { + "epoch": 0.2579860444273734, + "grad_norm": 31.139699935913086, + "learning_rate": 2.5790551785942904e-06, + "loss": 0.4438, + "num_input_tokens_seen": 12682688, + "step": 4030 + }, + { + "epoch": 0.2583061263683503, + "grad_norm": 43.784461975097656, + "learning_rate": 2.582255793112278e-06, + "loss": 0.5512, + "num_input_tokens_seen": 12698304, + "step": 4035 + }, + { + "epoch": 0.2586262083093272, + "grad_norm": 38.50110626220703, + "learning_rate": 2.5854564076302654e-06, + "loss": 0.5183, + "num_input_tokens_seen": 12712896, + "step": 4040 + }, + { + "epoch": 0.2589462902503041, + "grad_norm": 54.73103713989258, + "learning_rate": 2.588657022148253e-06, + "loss": 0.3824, + "num_input_tokens_seen": 12727488, + "step": 4045 + }, + { + "epoch": 0.25926637219128096, + "grad_norm": 40.12355422973633, + "learning_rate": 2.5918576366662403e-06, + "loss": 0.3892, + "num_input_tokens_seen": 12742720, + "step": 4050 + }, + { + "epoch": 0.25958645413225784, + "grad_norm": 36.95515060424805, + "learning_rate": 2.5950582511842278e-06, + "loss": 0.4508, + "num_input_tokens_seen": 12757760, + "step": 4055 + }, + { + "epoch": 0.2599065360732348, + "grad_norm": 43.24046325683594, + "learning_rate": 2.5982588657022152e-06, + "loss": 0.5575, + "num_input_tokens_seen": 12773312, + "step": 4060 + }, + { + "epoch": 0.26022661801421165, + "grad_norm": 67.74784088134766, + "learning_rate": 2.6014594802202027e-06, + "loss": 0.6196, + "num_input_tokens_seen": 12790336, + "step": 4065 + }, + { + "epoch": 0.26054669995518853, + "grad_norm": 27.64333724975586, + "learning_rate": 2.60466009473819e-06, + "loss": 0.4902, + "num_input_tokens_seen": 12805632, + "step": 4070 + }, + { + "epoch": 0.2608667818961654, + "grad_norm": 32.454856872558594, + "learning_rate": 2.6078607092561776e-06, + "loss": 0.5747, + "num_input_tokens_seen": 12821184, + "step": 4075 + }, + { + "epoch": 0.2611868638371423, + "grad_norm": 32.209415435791016, + "learning_rate": 2.611061323774165e-06, + "loss": 0.5308, + "num_input_tokens_seen": 12836544, + "step": 4080 + }, + { + "epoch": 0.26150694577811917, + "grad_norm": 42.24782180786133, + "learning_rate": 2.6142619382921526e-06, + "loss": 0.5001, + "num_input_tokens_seen": 12852032, + "step": 4085 + }, + { + "epoch": 0.2618270277190961, + "grad_norm": 34.42269515991211, + "learning_rate": 2.61746255281014e-06, + "loss": 0.4556, + "num_input_tokens_seen": 12867456, + "step": 4090 + }, + { + "epoch": 0.262147109660073, + "grad_norm": 23.578750610351562, + "learning_rate": 2.6206631673281275e-06, + "loss": 0.3812, + "num_input_tokens_seen": 12885184, + "step": 4095 + }, + { + "epoch": 0.26246719160104987, + "grad_norm": 31.62354850769043, + "learning_rate": 2.623863781846115e-06, + "loss": 0.4032, + "num_input_tokens_seen": 12900416, + "step": 4100 + }, + { + "epoch": 0.26278727354202674, + "grad_norm": 41.52431106567383, + "learning_rate": 2.6270643963641024e-06, + "loss": 0.6657, + "num_input_tokens_seen": 12915648, + "step": 4105 + }, + { + "epoch": 0.2631073554830036, + "grad_norm": 16.895889282226562, + "learning_rate": 2.63026501088209e-06, + "loss": 0.4769, + "num_input_tokens_seen": 12930368, + "step": 4110 + }, + { + "epoch": 0.26342743742398056, + "grad_norm": 24.371248245239258, + "learning_rate": 2.633465625400077e-06, + "loss": 0.5281, + "num_input_tokens_seen": 12945472, + "step": 4115 + }, + { + "epoch": 0.26374751936495744, + "grad_norm": 40.16469192504883, + "learning_rate": 2.6366662399180644e-06, + "loss": 0.5905, + "num_input_tokens_seen": 12961472, + "step": 4120 + }, + { + "epoch": 0.2640676013059343, + "grad_norm": 39.42387390136719, + "learning_rate": 2.639866854436052e-06, + "loss": 0.6382, + "num_input_tokens_seen": 12975872, + "step": 4125 + }, + { + "epoch": 0.2643876832469112, + "grad_norm": 32.50996398925781, + "learning_rate": 2.6430674689540394e-06, + "loss": 0.5455, + "num_input_tokens_seen": 12992128, + "step": 4130 + }, + { + "epoch": 0.2647077651878881, + "grad_norm": 17.82326889038086, + "learning_rate": 2.646268083472027e-06, + "loss": 0.4478, + "num_input_tokens_seen": 13021184, + "step": 4135 + }, + { + "epoch": 0.265027847128865, + "grad_norm": 27.730472564697266, + "learning_rate": 2.6494686979900143e-06, + "loss": 0.4527, + "num_input_tokens_seen": 13037568, + "step": 4140 + }, + { + "epoch": 0.2653479290698419, + "grad_norm": 34.77454376220703, + "learning_rate": 2.6526693125080018e-06, + "loss": 0.5069, + "num_input_tokens_seen": 13053440, + "step": 4145 + }, + { + "epoch": 0.26566801101081877, + "grad_norm": 48.517513275146484, + "learning_rate": 2.6558699270259892e-06, + "loss": 0.6118, + "num_input_tokens_seen": 13069440, + "step": 4150 + }, + { + "epoch": 0.26598809295179565, + "grad_norm": 49.84355545043945, + "learning_rate": 2.6590705415439767e-06, + "loss": 0.5617, + "num_input_tokens_seen": 13084224, + "step": 4155 + }, + { + "epoch": 0.26630817489277253, + "grad_norm": 42.1230354309082, + "learning_rate": 2.662271156061964e-06, + "loss": 0.7227, + "num_input_tokens_seen": 13098688, + "step": 4160 + }, + { + "epoch": 0.26662825683374947, + "grad_norm": 27.500717163085938, + "learning_rate": 2.6654717705799516e-06, + "loss": 0.4641, + "num_input_tokens_seen": 13114112, + "step": 4165 + }, + { + "epoch": 0.26694833877472635, + "grad_norm": 29.926355361938477, + "learning_rate": 2.668672385097939e-06, + "loss": 0.4726, + "num_input_tokens_seen": 13130240, + "step": 4170 + }, + { + "epoch": 0.2672684207157032, + "grad_norm": 37.02042007446289, + "learning_rate": 2.6718729996159266e-06, + "loss": 0.4111, + "num_input_tokens_seen": 13145536, + "step": 4175 + }, + { + "epoch": 0.2675885026566801, + "grad_norm": 39.15915298461914, + "learning_rate": 2.675073614133914e-06, + "loss": 0.6038, + "num_input_tokens_seen": 13160256, + "step": 4180 + }, + { + "epoch": 0.267908584597657, + "grad_norm": 42.32218933105469, + "learning_rate": 2.6782742286519015e-06, + "loss": 0.5761, + "num_input_tokens_seen": 13177088, + "step": 4185 + }, + { + "epoch": 0.26822866653863386, + "grad_norm": 27.79146385192871, + "learning_rate": 2.681474843169889e-06, + "loss": 0.5292, + "num_input_tokens_seen": 13192768, + "step": 4190 + }, + { + "epoch": 0.2685487484796108, + "grad_norm": 31.111082077026367, + "learning_rate": 2.6846754576878764e-06, + "loss": 0.5566, + "num_input_tokens_seen": 13208192, + "step": 4195 + }, + { + "epoch": 0.2688688304205877, + "grad_norm": 41.88045120239258, + "learning_rate": 2.687876072205864e-06, + "loss": 0.603, + "num_input_tokens_seen": 13223872, + "step": 4200 + }, + { + "epoch": 0.26918891236156456, + "grad_norm": 20.85816764831543, + "learning_rate": 2.6910766867238514e-06, + "loss": 0.5049, + "num_input_tokens_seen": 13239104, + "step": 4205 + }, + { + "epoch": 0.26950899430254144, + "grad_norm": 33.59029769897461, + "learning_rate": 2.694277301241839e-06, + "loss": 0.3918, + "num_input_tokens_seen": 13254464, + "step": 4210 + }, + { + "epoch": 0.2698290762435183, + "grad_norm": 25.215017318725586, + "learning_rate": 2.6974779157598263e-06, + "loss": 0.3768, + "num_input_tokens_seen": 13269824, + "step": 4215 + }, + { + "epoch": 0.27014915818449525, + "grad_norm": 24.753707885742188, + "learning_rate": 2.7006785302778138e-06, + "loss": 0.4682, + "num_input_tokens_seen": 13288448, + "step": 4220 + }, + { + "epoch": 0.27046924012547213, + "grad_norm": 37.09461975097656, + "learning_rate": 2.7038791447958012e-06, + "loss": 0.5645, + "num_input_tokens_seen": 13304320, + "step": 4225 + }, + { + "epoch": 0.270789322066449, + "grad_norm": 44.19548416137695, + "learning_rate": 2.7070797593137883e-06, + "loss": 0.6966, + "num_input_tokens_seen": 13321920, + "step": 4230 + }, + { + "epoch": 0.2711094040074259, + "grad_norm": 55.688716888427734, + "learning_rate": 2.7102803738317757e-06, + "loss": 0.4596, + "num_input_tokens_seen": 13337280, + "step": 4235 + }, + { + "epoch": 0.27142948594840277, + "grad_norm": 19.742685317993164, + "learning_rate": 2.713480988349763e-06, + "loss": 0.4459, + "num_input_tokens_seen": 13354112, + "step": 4240 + }, + { + "epoch": 0.2717495678893797, + "grad_norm": 33.530784606933594, + "learning_rate": 2.7166816028677507e-06, + "loss": 0.4562, + "num_input_tokens_seen": 13369600, + "step": 4245 + }, + { + "epoch": 0.2720696498303566, + "grad_norm": 29.451522827148438, + "learning_rate": 2.719882217385738e-06, + "loss": 0.4506, + "num_input_tokens_seen": 13385536, + "step": 4250 + }, + { + "epoch": 0.27238973177133347, + "grad_norm": 33.94363784790039, + "learning_rate": 2.7230828319037256e-06, + "loss": 0.4474, + "num_input_tokens_seen": 13400832, + "step": 4255 + }, + { + "epoch": 0.27270981371231034, + "grad_norm": 35.43048858642578, + "learning_rate": 2.726283446421713e-06, + "loss": 0.3514, + "num_input_tokens_seen": 13416000, + "step": 4260 + }, + { + "epoch": 0.2730298956532872, + "grad_norm": 43.50857162475586, + "learning_rate": 2.7294840609397005e-06, + "loss": 0.5444, + "num_input_tokens_seen": 13431360, + "step": 4265 + }, + { + "epoch": 0.2733499775942641, + "grad_norm": 56.93281173706055, + "learning_rate": 2.732684675457688e-06, + "loss": 0.5426, + "num_input_tokens_seen": 13446976, + "step": 4270 + }, + { + "epoch": 0.27367005953524104, + "grad_norm": 22.420866012573242, + "learning_rate": 2.7358852899756755e-06, + "loss": 0.4505, + "num_input_tokens_seen": 13463040, + "step": 4275 + }, + { + "epoch": 0.2739901414762179, + "grad_norm": 40.55877685546875, + "learning_rate": 2.739085904493663e-06, + "loss": 0.7821, + "num_input_tokens_seen": 13478656, + "step": 4280 + }, + { + "epoch": 0.2743102234171948, + "grad_norm": 31.715986251831055, + "learning_rate": 2.7422865190116504e-06, + "loss": 0.5213, + "num_input_tokens_seen": 13495296, + "step": 4285 + }, + { + "epoch": 0.2746303053581717, + "grad_norm": 19.550508499145508, + "learning_rate": 2.745487133529638e-06, + "loss": 0.4987, + "num_input_tokens_seen": 13510912, + "step": 4290 + }, + { + "epoch": 0.27495038729914856, + "grad_norm": 27.051515579223633, + "learning_rate": 2.7486877480476253e-06, + "loss": 0.4589, + "num_input_tokens_seen": 13525952, + "step": 4295 + }, + { + "epoch": 0.2752704692401255, + "grad_norm": 27.03931427001953, + "learning_rate": 2.751888362565613e-06, + "loss": 0.5413, + "num_input_tokens_seen": 13546752, + "step": 4300 + }, + { + "epoch": 0.2755905511811024, + "grad_norm": 34.389522552490234, + "learning_rate": 2.7550889770836003e-06, + "loss": 0.5601, + "num_input_tokens_seen": 13561728, + "step": 4305 + }, + { + "epoch": 0.27591063312207925, + "grad_norm": 22.863780975341797, + "learning_rate": 2.7582895916015877e-06, + "loss": 0.4906, + "num_input_tokens_seen": 13577344, + "step": 4310 + }, + { + "epoch": 0.27623071506305613, + "grad_norm": 29.320947647094727, + "learning_rate": 2.761490206119575e-06, + "loss": 0.5657, + "num_input_tokens_seen": 13592256, + "step": 4315 + }, + { + "epoch": 0.276550797004033, + "grad_norm": 37.63776779174805, + "learning_rate": 2.7646908206375627e-06, + "loss": 0.5641, + "num_input_tokens_seen": 13608832, + "step": 4320 + }, + { + "epoch": 0.27687087894500995, + "grad_norm": 27.637666702270508, + "learning_rate": 2.76789143515555e-06, + "loss": 0.5632, + "num_input_tokens_seen": 13624128, + "step": 4325 + }, + { + "epoch": 0.2771909608859868, + "grad_norm": 28.94356346130371, + "learning_rate": 2.7710920496735376e-06, + "loss": 0.5651, + "num_input_tokens_seen": 13640576, + "step": 4330 + }, + { + "epoch": 0.2775110428269637, + "grad_norm": 27.817543029785156, + "learning_rate": 2.774292664191525e-06, + "loss": 0.5601, + "num_input_tokens_seen": 13655872, + "step": 4335 + }, + { + "epoch": 0.2778311247679406, + "grad_norm": 35.48267364501953, + "learning_rate": 2.7774932787095125e-06, + "loss": 0.7039, + "num_input_tokens_seen": 13672384, + "step": 4340 + }, + { + "epoch": 0.27815120670891746, + "grad_norm": 23.586395263671875, + "learning_rate": 2.7806938932275e-06, + "loss": 0.4537, + "num_input_tokens_seen": 13687296, + "step": 4345 + }, + { + "epoch": 0.2784712886498944, + "grad_norm": 29.500123977661133, + "learning_rate": 2.783894507745487e-06, + "loss": 0.5639, + "num_input_tokens_seen": 13702528, + "step": 4350 + }, + { + "epoch": 0.2787913705908713, + "grad_norm": 41.90767288208008, + "learning_rate": 2.7870951222634745e-06, + "loss": 0.4969, + "num_input_tokens_seen": 13717568, + "step": 4355 + }, + { + "epoch": 0.27911145253184816, + "grad_norm": 24.220903396606445, + "learning_rate": 2.790295736781462e-06, + "loss": 0.4192, + "num_input_tokens_seen": 13733568, + "step": 4360 + }, + { + "epoch": 0.27943153447282504, + "grad_norm": 26.489349365234375, + "learning_rate": 2.7934963512994495e-06, + "loss": 0.406, + "num_input_tokens_seen": 13749312, + "step": 4365 + }, + { + "epoch": 0.2797516164138019, + "grad_norm": 19.172691345214844, + "learning_rate": 2.796696965817437e-06, + "loss": 0.5638, + "num_input_tokens_seen": 13765952, + "step": 4370 + }, + { + "epoch": 0.2800716983547788, + "grad_norm": 46.4947509765625, + "learning_rate": 2.7998975803354244e-06, + "loss": 0.555, + "num_input_tokens_seen": 13781440, + "step": 4375 + }, + { + "epoch": 0.28039178029575573, + "grad_norm": 52.319610595703125, + "learning_rate": 2.803098194853412e-06, + "loss": 0.4757, + "num_input_tokens_seen": 13796864, + "step": 4380 + }, + { + "epoch": 0.2807118622367326, + "grad_norm": 31.000598907470703, + "learning_rate": 2.8062988093713993e-06, + "loss": 0.5962, + "num_input_tokens_seen": 13812736, + "step": 4385 + }, + { + "epoch": 0.2810319441777095, + "grad_norm": 46.62118148803711, + "learning_rate": 2.809499423889387e-06, + "loss": 0.4975, + "num_input_tokens_seen": 13828288, + "step": 4390 + }, + { + "epoch": 0.28135202611868637, + "grad_norm": 30.784732818603516, + "learning_rate": 2.8127000384073743e-06, + "loss": 0.491, + "num_input_tokens_seen": 13845568, + "step": 4395 + }, + { + "epoch": 0.28167210805966325, + "grad_norm": 29.354412078857422, + "learning_rate": 2.8159006529253617e-06, + "loss": 0.4427, + "num_input_tokens_seen": 13860608, + "step": 4400 + }, + { + "epoch": 0.2819921900006402, + "grad_norm": 34.374046325683594, + "learning_rate": 2.819101267443349e-06, + "loss": 0.5652, + "num_input_tokens_seen": 13876608, + "step": 4405 + }, + { + "epoch": 0.28231227194161707, + "grad_norm": 33.037784576416016, + "learning_rate": 2.8223018819613367e-06, + "loss": 0.5074, + "num_input_tokens_seen": 13893632, + "step": 4410 + }, + { + "epoch": 0.28263235388259395, + "grad_norm": 49.912601470947266, + "learning_rate": 2.825502496479324e-06, + "loss": 0.4899, + "num_input_tokens_seen": 13909312, + "step": 4415 + }, + { + "epoch": 0.2829524358235708, + "grad_norm": 33.34136199951172, + "learning_rate": 2.8287031109973116e-06, + "loss": 0.6169, + "num_input_tokens_seen": 13925824, + "step": 4420 + }, + { + "epoch": 0.2832725177645477, + "grad_norm": 33.20978927612305, + "learning_rate": 2.831903725515299e-06, + "loss": 0.5673, + "num_input_tokens_seen": 13942336, + "step": 4425 + }, + { + "epoch": 0.28359259970552464, + "grad_norm": 29.62407684326172, + "learning_rate": 2.8351043400332865e-06, + "loss": 0.4293, + "num_input_tokens_seen": 13957760, + "step": 4430 + }, + { + "epoch": 0.2839126816465015, + "grad_norm": 34.68098068237305, + "learning_rate": 2.838304954551274e-06, + "loss": 0.5372, + "num_input_tokens_seen": 13972992, + "step": 4435 + }, + { + "epoch": 0.2842327635874784, + "grad_norm": 32.96346664428711, + "learning_rate": 2.8415055690692615e-06, + "loss": 0.4939, + "num_input_tokens_seen": 13988736, + "step": 4440 + }, + { + "epoch": 0.2845528455284553, + "grad_norm": 39.13813018798828, + "learning_rate": 2.8447061835872494e-06, + "loss": 0.6253, + "num_input_tokens_seen": 14003520, + "step": 4445 + }, + { + "epoch": 0.28487292746943216, + "grad_norm": 31.526294708251953, + "learning_rate": 2.847906798105237e-06, + "loss": 0.5701, + "num_input_tokens_seen": 14020608, + "step": 4450 + }, + { + "epoch": 0.28519300941040904, + "grad_norm": 33.982120513916016, + "learning_rate": 2.8511074126232243e-06, + "loss": 0.4201, + "num_input_tokens_seen": 14035328, + "step": 4455 + }, + { + "epoch": 0.285513091351386, + "grad_norm": 38.2498779296875, + "learning_rate": 2.8543080271412118e-06, + "loss": 0.484, + "num_input_tokens_seen": 14050816, + "step": 4460 + }, + { + "epoch": 0.28583317329236285, + "grad_norm": 18.74299430847168, + "learning_rate": 2.8575086416591992e-06, + "loss": 0.3641, + "num_input_tokens_seen": 14066880, + "step": 4465 + }, + { + "epoch": 0.28615325523333973, + "grad_norm": 27.665557861328125, + "learning_rate": 2.860709256177186e-06, + "loss": 0.5482, + "num_input_tokens_seen": 14081728, + "step": 4470 + }, + { + "epoch": 0.2864733371743166, + "grad_norm": 27.132463455200195, + "learning_rate": 2.8639098706951733e-06, + "loss": 0.5251, + "num_input_tokens_seen": 14097984, + "step": 4475 + }, + { + "epoch": 0.2867934191152935, + "grad_norm": 28.598159790039062, + "learning_rate": 2.8671104852131608e-06, + "loss": 0.4527, + "num_input_tokens_seen": 14115264, + "step": 4480 + }, + { + "epoch": 0.2871135010562704, + "grad_norm": 26.22392463684082, + "learning_rate": 2.8703110997311482e-06, + "loss": 0.3157, + "num_input_tokens_seen": 14129472, + "step": 4485 + }, + { + "epoch": 0.2874335829972473, + "grad_norm": 36.61156463623047, + "learning_rate": 2.8735117142491357e-06, + "loss": 0.63, + "num_input_tokens_seen": 14144896, + "step": 4490 + }, + { + "epoch": 0.2877536649382242, + "grad_norm": 33.44105529785156, + "learning_rate": 2.876712328767123e-06, + "loss": 0.3895, + "num_input_tokens_seen": 14159744, + "step": 4495 + }, + { + "epoch": 0.28807374687920106, + "grad_norm": 40.605831146240234, + "learning_rate": 2.8799129432851106e-06, + "loss": 0.4867, + "num_input_tokens_seen": 14176384, + "step": 4500 + }, + { + "epoch": 0.28839382882017794, + "grad_norm": 40.3709716796875, + "learning_rate": 2.883113557803098e-06, + "loss": 0.5708, + "num_input_tokens_seen": 14192320, + "step": 4505 + }, + { + "epoch": 0.2887139107611549, + "grad_norm": 25.131389617919922, + "learning_rate": 2.886314172321086e-06, + "loss": 0.4018, + "num_input_tokens_seen": 14208192, + "step": 4510 + }, + { + "epoch": 0.28903399270213176, + "grad_norm": 68.00257110595703, + "learning_rate": 2.8895147868390735e-06, + "loss": 0.6319, + "num_input_tokens_seen": 14223104, + "step": 4515 + }, + { + "epoch": 0.28935407464310864, + "grad_norm": 40.51014709472656, + "learning_rate": 2.892715401357061e-06, + "loss": 0.4392, + "num_input_tokens_seen": 14238464, + "step": 4520 + }, + { + "epoch": 0.2896741565840855, + "grad_norm": 40.63906478881836, + "learning_rate": 2.8959160158750484e-06, + "loss": 0.4296, + "num_input_tokens_seen": 14254656, + "step": 4525 + }, + { + "epoch": 0.2899942385250624, + "grad_norm": 57.27716064453125, + "learning_rate": 2.899116630393036e-06, + "loss": 0.4133, + "num_input_tokens_seen": 14268928, + "step": 4530 + }, + { + "epoch": 0.29031432046603933, + "grad_norm": 29.19027328491211, + "learning_rate": 2.9023172449110233e-06, + "loss": 0.5015, + "num_input_tokens_seen": 14285952, + "step": 4535 + }, + { + "epoch": 0.2906344024070162, + "grad_norm": 28.411819458007812, + "learning_rate": 2.905517859429011e-06, + "loss": 0.4114, + "num_input_tokens_seen": 14301760, + "step": 4540 + }, + { + "epoch": 0.2909544843479931, + "grad_norm": 28.45497703552246, + "learning_rate": 2.9087184739469983e-06, + "loss": 0.5292, + "num_input_tokens_seen": 14318208, + "step": 4545 + }, + { + "epoch": 0.29127456628896997, + "grad_norm": 31.174909591674805, + "learning_rate": 2.9119190884649857e-06, + "loss": 0.4741, + "num_input_tokens_seen": 14334144, + "step": 4550 + }, + { + "epoch": 0.29159464822994685, + "grad_norm": 66.41686248779297, + "learning_rate": 2.915119702982973e-06, + "loss": 0.6629, + "num_input_tokens_seen": 14349120, + "step": 4555 + }, + { + "epoch": 0.29191473017092373, + "grad_norm": 44.810245513916016, + "learning_rate": 2.9183203175009607e-06, + "loss": 0.5884, + "num_input_tokens_seen": 14365376, + "step": 4560 + }, + { + "epoch": 0.29223481211190067, + "grad_norm": 39.419612884521484, + "learning_rate": 2.921520932018948e-06, + "loss": 0.5375, + "num_input_tokens_seen": 14381568, + "step": 4565 + }, + { + "epoch": 0.29255489405287755, + "grad_norm": 28.65028953552246, + "learning_rate": 2.9247215465369356e-06, + "loss": 0.4778, + "num_input_tokens_seen": 14397312, + "step": 4570 + }, + { + "epoch": 0.2928749759938544, + "grad_norm": 37.34736251831055, + "learning_rate": 2.927922161054923e-06, + "loss": 0.4894, + "num_input_tokens_seen": 14412928, + "step": 4575 + }, + { + "epoch": 0.2931950579348313, + "grad_norm": 52.14083480834961, + "learning_rate": 2.9311227755729105e-06, + "loss": 0.5225, + "num_input_tokens_seen": 14427968, + "step": 4580 + }, + { + "epoch": 0.2935151398758082, + "grad_norm": 33.90268325805664, + "learning_rate": 2.934323390090898e-06, + "loss": 0.4422, + "num_input_tokens_seen": 14442944, + "step": 4585 + }, + { + "epoch": 0.2938352218167851, + "grad_norm": 21.928491592407227, + "learning_rate": 2.937524004608885e-06, + "loss": 0.5032, + "num_input_tokens_seen": 14458176, + "step": 4590 + }, + { + "epoch": 0.294155303757762, + "grad_norm": 25.73727035522461, + "learning_rate": 2.9407246191268725e-06, + "loss": 0.4244, + "num_input_tokens_seen": 14473600, + "step": 4595 + }, + { + "epoch": 0.2944753856987389, + "grad_norm": 28.402843475341797, + "learning_rate": 2.94392523364486e-06, + "loss": 0.6711, + "num_input_tokens_seen": 14488896, + "step": 4600 + }, + { + "epoch": 0.29479546763971576, + "grad_norm": 56.12818908691406, + "learning_rate": 2.9471258481628475e-06, + "loss": 0.6182, + "num_input_tokens_seen": 14502784, + "step": 4605 + }, + { + "epoch": 0.29511554958069264, + "grad_norm": 33.027099609375, + "learning_rate": 2.950326462680835e-06, + "loss": 0.4884, + "num_input_tokens_seen": 14520000, + "step": 4610 + }, + { + "epoch": 0.2954356315216696, + "grad_norm": 31.596391677856445, + "learning_rate": 2.9535270771988224e-06, + "loss": 0.3833, + "num_input_tokens_seen": 14536320, + "step": 4615 + }, + { + "epoch": 0.29575571346264645, + "grad_norm": 42.88957595825195, + "learning_rate": 2.95672769171681e-06, + "loss": 0.4793, + "num_input_tokens_seen": 14551808, + "step": 4620 + }, + { + "epoch": 0.29607579540362333, + "grad_norm": 25.47672462463379, + "learning_rate": 2.9599283062347973e-06, + "loss": 0.3634, + "num_input_tokens_seen": 14566976, + "step": 4625 + }, + { + "epoch": 0.2963958773446002, + "grad_norm": 28.05396270751953, + "learning_rate": 2.963128920752785e-06, + "loss": 0.3677, + "num_input_tokens_seen": 14583104, + "step": 4630 + }, + { + "epoch": 0.2967159592855771, + "grad_norm": 37.808258056640625, + "learning_rate": 2.9663295352707723e-06, + "loss": 0.4969, + "num_input_tokens_seen": 14598336, + "step": 4635 + }, + { + "epoch": 0.29703604122655397, + "grad_norm": 48.53775405883789, + "learning_rate": 2.9695301497887597e-06, + "loss": 0.5815, + "num_input_tokens_seen": 14615232, + "step": 4640 + }, + { + "epoch": 0.2973561231675309, + "grad_norm": 27.559240341186523, + "learning_rate": 2.972730764306747e-06, + "loss": 0.4829, + "num_input_tokens_seen": 14630848, + "step": 4645 + }, + { + "epoch": 0.2976762051085078, + "grad_norm": 32.293983459472656, + "learning_rate": 2.9759313788247347e-06, + "loss": 0.5286, + "num_input_tokens_seen": 14647040, + "step": 4650 + }, + { + "epoch": 0.29799628704948466, + "grad_norm": 33.761146545410156, + "learning_rate": 2.979131993342722e-06, + "loss": 0.4687, + "num_input_tokens_seen": 14661888, + "step": 4655 + }, + { + "epoch": 0.29831636899046154, + "grad_norm": 93.1884765625, + "learning_rate": 2.9823326078607096e-06, + "loss": 0.8269, + "num_input_tokens_seen": 14677696, + "step": 4660 + }, + { + "epoch": 0.2986364509314384, + "grad_norm": 26.395090103149414, + "learning_rate": 2.985533222378697e-06, + "loss": 0.4892, + "num_input_tokens_seen": 14693184, + "step": 4665 + }, + { + "epoch": 0.29895653287241536, + "grad_norm": 33.311561584472656, + "learning_rate": 2.9887338368966845e-06, + "loss": 0.3668, + "num_input_tokens_seen": 14707904, + "step": 4670 + }, + { + "epoch": 0.29927661481339224, + "grad_norm": 31.744384765625, + "learning_rate": 2.991934451414672e-06, + "loss": 0.4194, + "num_input_tokens_seen": 14724096, + "step": 4675 + }, + { + "epoch": 0.2995966967543691, + "grad_norm": 41.77426528930664, + "learning_rate": 2.9951350659326595e-06, + "loss": 0.575, + "num_input_tokens_seen": 14738752, + "step": 4680 + }, + { + "epoch": 0.299916778695346, + "grad_norm": 47.95245361328125, + "learning_rate": 2.998335680450647e-06, + "loss": 0.3559, + "num_input_tokens_seen": 14753664, + "step": 4685 + }, + { + "epoch": 0.3002368606363229, + "grad_norm": 31.915889739990234, + "learning_rate": 3.0015362949686344e-06, + "loss": 0.4628, + "num_input_tokens_seen": 14769152, + "step": 4690 + }, + { + "epoch": 0.3005569425772998, + "grad_norm": 46.27490997314453, + "learning_rate": 3.004736909486622e-06, + "loss": 0.5057, + "num_input_tokens_seen": 14784128, + "step": 4695 + }, + { + "epoch": 0.3008770245182767, + "grad_norm": 57.33107376098633, + "learning_rate": 3.0079375240046093e-06, + "loss": 0.5411, + "num_input_tokens_seen": 14799936, + "step": 4700 + }, + { + "epoch": 0.30119710645925357, + "grad_norm": 54.509864807128906, + "learning_rate": 3.011138138522597e-06, + "loss": 0.641, + "num_input_tokens_seen": 14814208, + "step": 4705 + }, + { + "epoch": 0.30151718840023045, + "grad_norm": 41.620643615722656, + "learning_rate": 3.014338753040584e-06, + "loss": 0.5401, + "num_input_tokens_seen": 14829504, + "step": 4710 + }, + { + "epoch": 0.30183727034120733, + "grad_norm": 27.767379760742188, + "learning_rate": 3.0175393675585713e-06, + "loss": 0.4631, + "num_input_tokens_seen": 14843968, + "step": 4715 + }, + { + "epoch": 0.30215735228218427, + "grad_norm": 35.70241928100586, + "learning_rate": 3.0207399820765588e-06, + "loss": 0.4741, + "num_input_tokens_seen": 14859840, + "step": 4720 + }, + { + "epoch": 0.30247743422316115, + "grad_norm": 55.7733039855957, + "learning_rate": 3.0239405965945462e-06, + "loss": 0.5715, + "num_input_tokens_seen": 14875648, + "step": 4725 + }, + { + "epoch": 0.302797516164138, + "grad_norm": 29.921001434326172, + "learning_rate": 3.0271412111125337e-06, + "loss": 0.5296, + "num_input_tokens_seen": 14890560, + "step": 4730 + }, + { + "epoch": 0.3031175981051149, + "grad_norm": 36.64336395263672, + "learning_rate": 3.030341825630521e-06, + "loss": 0.5479, + "num_input_tokens_seen": 14906688, + "step": 4735 + }, + { + "epoch": 0.3034376800460918, + "grad_norm": 46.107086181640625, + "learning_rate": 3.0335424401485086e-06, + "loss": 0.625, + "num_input_tokens_seen": 14922496, + "step": 4740 + }, + { + "epoch": 0.30375776198706866, + "grad_norm": 39.55488586425781, + "learning_rate": 3.036743054666496e-06, + "loss": 0.5292, + "num_input_tokens_seen": 14938880, + "step": 4745 + }, + { + "epoch": 0.3040778439280456, + "grad_norm": 42.71869659423828, + "learning_rate": 3.0399436691844836e-06, + "loss": 0.5366, + "num_input_tokens_seen": 14954112, + "step": 4750 + }, + { + "epoch": 0.3043979258690225, + "grad_norm": 44.669490814208984, + "learning_rate": 3.043144283702471e-06, + "loss": 0.4153, + "num_input_tokens_seen": 14969536, + "step": 4755 + }, + { + "epoch": 0.30471800780999936, + "grad_norm": 35.3562126159668, + "learning_rate": 3.0463448982204585e-06, + "loss": 0.3946, + "num_input_tokens_seen": 14985728, + "step": 4760 + }, + { + "epoch": 0.30503808975097624, + "grad_norm": 25.413599014282227, + "learning_rate": 3.049545512738446e-06, + "loss": 0.5285, + "num_input_tokens_seen": 15003904, + "step": 4765 + }, + { + "epoch": 0.3053581716919531, + "grad_norm": 59.23112487792969, + "learning_rate": 3.0527461272564334e-06, + "loss": 0.6277, + "num_input_tokens_seen": 15019328, + "step": 4770 + }, + { + "epoch": 0.30567825363293005, + "grad_norm": 55.75103759765625, + "learning_rate": 3.055946741774421e-06, + "loss": 0.5619, + "num_input_tokens_seen": 15033856, + "step": 4775 + }, + { + "epoch": 0.30599833557390693, + "grad_norm": 22.977415084838867, + "learning_rate": 3.0591473562924084e-06, + "loss": 0.4167, + "num_input_tokens_seen": 15051200, + "step": 4780 + }, + { + "epoch": 0.3063184175148838, + "grad_norm": 31.56866455078125, + "learning_rate": 3.062347970810396e-06, + "loss": 0.4579, + "num_input_tokens_seen": 15066368, + "step": 4785 + }, + { + "epoch": 0.3066384994558607, + "grad_norm": 19.824785232543945, + "learning_rate": 3.0655485853283833e-06, + "loss": 0.3633, + "num_input_tokens_seen": 15081600, + "step": 4790 + }, + { + "epoch": 0.30695858139683757, + "grad_norm": 37.12388229370117, + "learning_rate": 3.0687491998463708e-06, + "loss": 0.5975, + "num_input_tokens_seen": 15097536, + "step": 4795 + }, + { + "epoch": 0.3072786633378145, + "grad_norm": 34.85268020629883, + "learning_rate": 3.0719498143643583e-06, + "loss": 0.5337, + "num_input_tokens_seen": 15113728, + "step": 4800 + }, + { + "epoch": 0.3075987452787914, + "grad_norm": 22.34710121154785, + "learning_rate": 3.0751504288823457e-06, + "loss": 0.6239, + "num_input_tokens_seen": 15130048, + "step": 4805 + }, + { + "epoch": 0.30791882721976827, + "grad_norm": 24.52249526977539, + "learning_rate": 3.078351043400333e-06, + "loss": 0.4298, + "num_input_tokens_seen": 15145600, + "step": 4810 + }, + { + "epoch": 0.30823890916074514, + "grad_norm": 50.72840881347656, + "learning_rate": 3.0815516579183207e-06, + "loss": 0.5121, + "num_input_tokens_seen": 15162368, + "step": 4815 + }, + { + "epoch": 0.308558991101722, + "grad_norm": 34.565704345703125, + "learning_rate": 3.084752272436308e-06, + "loss": 0.497, + "num_input_tokens_seen": 15178368, + "step": 4820 + }, + { + "epoch": 0.3088790730426989, + "grad_norm": 30.786231994628906, + "learning_rate": 3.0879528869542956e-06, + "loss": 0.5909, + "num_input_tokens_seen": 15193408, + "step": 4825 + }, + { + "epoch": 0.30919915498367584, + "grad_norm": 17.76447868347168, + "learning_rate": 3.0911535014722826e-06, + "loss": 0.3568, + "num_input_tokens_seen": 15208640, + "step": 4830 + }, + { + "epoch": 0.3095192369246527, + "grad_norm": 59.99802017211914, + "learning_rate": 3.09435411599027e-06, + "loss": 0.532, + "num_input_tokens_seen": 15224192, + "step": 4835 + }, + { + "epoch": 0.3098393188656296, + "grad_norm": 36.624752044677734, + "learning_rate": 3.0975547305082576e-06, + "loss": 0.591, + "num_input_tokens_seen": 15239616, + "step": 4840 + }, + { + "epoch": 0.3101594008066065, + "grad_norm": 23.7210636138916, + "learning_rate": 3.100755345026245e-06, + "loss": 0.4536, + "num_input_tokens_seen": 15255680, + "step": 4845 + }, + { + "epoch": 0.31047948274758336, + "grad_norm": 18.88695526123047, + "learning_rate": 3.1039559595442325e-06, + "loss": 0.5104, + "num_input_tokens_seen": 15271232, + "step": 4850 + }, + { + "epoch": 0.3107995646885603, + "grad_norm": 49.04075622558594, + "learning_rate": 3.10715657406222e-06, + "loss": 0.4745, + "num_input_tokens_seen": 15287040, + "step": 4855 + }, + { + "epoch": 0.31111964662953717, + "grad_norm": 29.447961807250977, + "learning_rate": 3.1103571885802074e-06, + "loss": 0.5914, + "num_input_tokens_seen": 15304256, + "step": 4860 + }, + { + "epoch": 0.31143972857051405, + "grad_norm": 21.896705627441406, + "learning_rate": 3.113557803098195e-06, + "loss": 0.3885, + "num_input_tokens_seen": 15322112, + "step": 4865 + }, + { + "epoch": 0.31175981051149093, + "grad_norm": 59.705421447753906, + "learning_rate": 3.1167584176161824e-06, + "loss": 0.5413, + "num_input_tokens_seen": 15337216, + "step": 4870 + }, + { + "epoch": 0.3120798924524678, + "grad_norm": 53.69181442260742, + "learning_rate": 3.11995903213417e-06, + "loss": 0.5022, + "num_input_tokens_seen": 15354048, + "step": 4875 + }, + { + "epoch": 0.31239997439344475, + "grad_norm": 32.639522552490234, + "learning_rate": 3.1231596466521573e-06, + "loss": 0.4865, + "num_input_tokens_seen": 15369344, + "step": 4880 + }, + { + "epoch": 0.3127200563344216, + "grad_norm": 63.56182861328125, + "learning_rate": 3.1263602611701448e-06, + "loss": 0.5673, + "num_input_tokens_seen": 15385920, + "step": 4885 + }, + { + "epoch": 0.3130401382753985, + "grad_norm": 26.34950065612793, + "learning_rate": 3.1295608756881322e-06, + "loss": 0.4472, + "num_input_tokens_seen": 15401536, + "step": 4890 + }, + { + "epoch": 0.3133602202163754, + "grad_norm": 32.807010650634766, + "learning_rate": 3.1327614902061197e-06, + "loss": 0.5766, + "num_input_tokens_seen": 15417152, + "step": 4895 + }, + { + "epoch": 0.31368030215735226, + "grad_norm": 33.443416595458984, + "learning_rate": 3.135962104724107e-06, + "loss": 0.6036, + "num_input_tokens_seen": 15431872, + "step": 4900 + }, + { + "epoch": 0.3140003840983292, + "grad_norm": 20.304805755615234, + "learning_rate": 3.1391627192420946e-06, + "loss": 0.4521, + "num_input_tokens_seen": 15447552, + "step": 4905 + }, + { + "epoch": 0.3143204660393061, + "grad_norm": 37.0919075012207, + "learning_rate": 3.142363333760082e-06, + "loss": 0.5343, + "num_input_tokens_seen": 15464384, + "step": 4910 + }, + { + "epoch": 0.31464054798028296, + "grad_norm": 25.233070373535156, + "learning_rate": 3.1455639482780696e-06, + "loss": 0.5376, + "num_input_tokens_seen": 15479808, + "step": 4915 + }, + { + "epoch": 0.31496062992125984, + "grad_norm": 22.162580490112305, + "learning_rate": 3.148764562796057e-06, + "loss": 0.5259, + "num_input_tokens_seen": 15496512, + "step": 4920 + }, + { + "epoch": 0.3152807118622367, + "grad_norm": 35.19190979003906, + "learning_rate": 3.151965177314045e-06, + "loss": 0.4217, + "num_input_tokens_seen": 15512896, + "step": 4925 + }, + { + "epoch": 0.3156007938032136, + "grad_norm": 32.4355354309082, + "learning_rate": 3.1551657918320324e-06, + "loss": 0.3742, + "num_input_tokens_seen": 15528064, + "step": 4930 + }, + { + "epoch": 0.31592087574419053, + "grad_norm": 49.31787872314453, + "learning_rate": 3.15836640635002e-06, + "loss": 0.4034, + "num_input_tokens_seen": 15543424, + "step": 4935 + }, + { + "epoch": 0.3162409576851674, + "grad_norm": 42.536712646484375, + "learning_rate": 3.1615670208680073e-06, + "loss": 0.6094, + "num_input_tokens_seen": 15560192, + "step": 4940 + }, + { + "epoch": 0.3165610396261443, + "grad_norm": 36.98291778564453, + "learning_rate": 3.164767635385995e-06, + "loss": 0.5051, + "num_input_tokens_seen": 15576128, + "step": 4945 + }, + { + "epoch": 0.31688112156712117, + "grad_norm": 96.97798156738281, + "learning_rate": 3.1679682499039814e-06, + "loss": 0.7277, + "num_input_tokens_seen": 15592576, + "step": 4950 + }, + { + "epoch": 0.31720120350809805, + "grad_norm": 52.78471374511719, + "learning_rate": 3.171168864421969e-06, + "loss": 0.4515, + "num_input_tokens_seen": 15607744, + "step": 4955 + }, + { + "epoch": 0.317521285449075, + "grad_norm": 42.35615539550781, + "learning_rate": 3.1743694789399564e-06, + "loss": 0.6054, + "num_input_tokens_seen": 15623360, + "step": 4960 + }, + { + "epoch": 0.31784136739005187, + "grad_norm": 34.578460693359375, + "learning_rate": 3.177570093457944e-06, + "loss": 0.4599, + "num_input_tokens_seen": 15638656, + "step": 4965 + }, + { + "epoch": 0.31816144933102875, + "grad_norm": 52.791446685791016, + "learning_rate": 3.1807707079759313e-06, + "loss": 0.5218, + "num_input_tokens_seen": 15654400, + "step": 4970 + }, + { + "epoch": 0.3184815312720056, + "grad_norm": 28.640661239624023, + "learning_rate": 3.1839713224939188e-06, + "loss": 0.4533, + "num_input_tokens_seen": 15670016, + "step": 4975 + }, + { + "epoch": 0.3188016132129825, + "grad_norm": 62.54088592529297, + "learning_rate": 3.1871719370119062e-06, + "loss": 0.5557, + "num_input_tokens_seen": 15686208, + "step": 4980 + }, + { + "epoch": 0.31912169515395944, + "grad_norm": 23.34925651550293, + "learning_rate": 3.1903725515298937e-06, + "loss": 0.4331, + "num_input_tokens_seen": 15702592, + "step": 4985 + }, + { + "epoch": 0.3194417770949363, + "grad_norm": 49.41212844848633, + "learning_rate": 3.193573166047881e-06, + "loss": 0.5049, + "num_input_tokens_seen": 15717696, + "step": 4990 + }, + { + "epoch": 0.3197618590359132, + "grad_norm": 46.784366607666016, + "learning_rate": 3.196773780565869e-06, + "loss": 0.4435, + "num_input_tokens_seen": 15733184, + "step": 4995 + }, + { + "epoch": 0.3200819409768901, + "grad_norm": 42.353336334228516, + "learning_rate": 3.1999743950838565e-06, + "loss": 0.5851, + "num_input_tokens_seen": 15749888, + "step": 5000 + }, + { + "epoch": 0.32040202291786696, + "grad_norm": 38.63792419433594, + "learning_rate": 3.203175009601844e-06, + "loss": 0.4457, + "num_input_tokens_seen": 15768384, + "step": 5005 + }, + { + "epoch": 0.32072210485884384, + "grad_norm": 34.20757293701172, + "learning_rate": 3.2063756241198314e-06, + "loss": 0.3612, + "num_input_tokens_seen": 15784448, + "step": 5010 + }, + { + "epoch": 0.3210421867998208, + "grad_norm": 26.597951889038086, + "learning_rate": 3.209576238637819e-06, + "loss": 0.4858, + "num_input_tokens_seen": 15800512, + "step": 5015 + }, + { + "epoch": 0.32136226874079765, + "grad_norm": 23.833599090576172, + "learning_rate": 3.2127768531558064e-06, + "loss": 0.5056, + "num_input_tokens_seen": 15815680, + "step": 5020 + }, + { + "epoch": 0.32168235068177453, + "grad_norm": 31.0048885345459, + "learning_rate": 3.215977467673794e-06, + "loss": 0.5082, + "num_input_tokens_seen": 15830528, + "step": 5025 + }, + { + "epoch": 0.3220024326227514, + "grad_norm": 31.909061431884766, + "learning_rate": 3.2191780821917813e-06, + "loss": 0.5727, + "num_input_tokens_seen": 15845568, + "step": 5030 + }, + { + "epoch": 0.3223225145637283, + "grad_norm": 32.722957611083984, + "learning_rate": 3.2223786967097688e-06, + "loss": 0.5703, + "num_input_tokens_seen": 15861632, + "step": 5035 + }, + { + "epoch": 0.3226425965047052, + "grad_norm": 49.20211410522461, + "learning_rate": 3.2255793112277563e-06, + "loss": 0.6083, + "num_input_tokens_seen": 15877632, + "step": 5040 + }, + { + "epoch": 0.3229626784456821, + "grad_norm": 22.239431381225586, + "learning_rate": 3.2287799257457437e-06, + "loss": 0.4118, + "num_input_tokens_seen": 15893184, + "step": 5045 + }, + { + "epoch": 0.323282760386659, + "grad_norm": 36.036319732666016, + "learning_rate": 3.231980540263731e-06, + "loss": 0.4441, + "num_input_tokens_seen": 15908416, + "step": 5050 + }, + { + "epoch": 0.32360284232763586, + "grad_norm": 24.171253204345703, + "learning_rate": 3.2351811547817187e-06, + "loss": 0.4126, + "num_input_tokens_seen": 15924224, + "step": 5055 + }, + { + "epoch": 0.32392292426861274, + "grad_norm": 43.140350341796875, + "learning_rate": 3.238381769299706e-06, + "loss": 0.341, + "num_input_tokens_seen": 15940032, + "step": 5060 + }, + { + "epoch": 0.3242430062095897, + "grad_norm": 48.94254684448242, + "learning_rate": 3.2415823838176936e-06, + "loss": 0.5604, + "num_input_tokens_seen": 15954944, + "step": 5065 + }, + { + "epoch": 0.32456308815056656, + "grad_norm": 47.071353912353516, + "learning_rate": 3.2447829983356806e-06, + "loss": 0.5718, + "num_input_tokens_seen": 15970624, + "step": 5070 + }, + { + "epoch": 0.32488317009154344, + "grad_norm": 43.38968276977539, + "learning_rate": 3.247983612853668e-06, + "loss": 0.5341, + "num_input_tokens_seen": 15986688, + "step": 5075 + }, + { + "epoch": 0.3252032520325203, + "grad_norm": 28.76389503479004, + "learning_rate": 3.2511842273716556e-06, + "loss": 0.4263, + "num_input_tokens_seen": 16002752, + "step": 5080 + }, + { + "epoch": 0.3255233339734972, + "grad_norm": 23.485069274902344, + "learning_rate": 3.254384841889643e-06, + "loss": 0.535, + "num_input_tokens_seen": 16018112, + "step": 5085 + }, + { + "epoch": 0.32584341591447413, + "grad_norm": 21.850555419921875, + "learning_rate": 3.2575854564076305e-06, + "loss": 0.481, + "num_input_tokens_seen": 16033984, + "step": 5090 + }, + { + "epoch": 0.326163497855451, + "grad_norm": 32.31990051269531, + "learning_rate": 3.260786070925618e-06, + "loss": 0.3992, + "num_input_tokens_seen": 16049536, + "step": 5095 + }, + { + "epoch": 0.3264835797964279, + "grad_norm": 38.52376937866211, + "learning_rate": 3.2639866854436054e-06, + "loss": 0.4262, + "num_input_tokens_seen": 16066304, + "step": 5100 + }, + { + "epoch": 0.32680366173740477, + "grad_norm": 41.96830368041992, + "learning_rate": 3.267187299961593e-06, + "loss": 0.5625, + "num_input_tokens_seen": 16081536, + "step": 5105 + }, + { + "epoch": 0.32712374367838165, + "grad_norm": 29.735448837280273, + "learning_rate": 3.2703879144795804e-06, + "loss": 0.708, + "num_input_tokens_seen": 16096896, + "step": 5110 + }, + { + "epoch": 0.32744382561935853, + "grad_norm": 32.344417572021484, + "learning_rate": 3.273588528997568e-06, + "loss": 0.5131, + "num_input_tokens_seen": 16112960, + "step": 5115 + }, + { + "epoch": 0.32776390756033547, + "grad_norm": 40.66057205200195, + "learning_rate": 3.2767891435155553e-06, + "loss": 0.467, + "num_input_tokens_seen": 16129344, + "step": 5120 + }, + { + "epoch": 0.32808398950131235, + "grad_norm": 22.34542465209961, + "learning_rate": 3.2799897580335428e-06, + "loss": 0.6375, + "num_input_tokens_seen": 16147776, + "step": 5125 + }, + { + "epoch": 0.3284040714422892, + "grad_norm": 42.05990219116211, + "learning_rate": 3.2831903725515302e-06, + "loss": 0.4597, + "num_input_tokens_seen": 16162880, + "step": 5130 + }, + { + "epoch": 0.3287241533832661, + "grad_norm": 17.290834426879883, + "learning_rate": 3.2863909870695177e-06, + "loss": 0.2809, + "num_input_tokens_seen": 16179392, + "step": 5135 + }, + { + "epoch": 0.329044235324243, + "grad_norm": 43.21586990356445, + "learning_rate": 3.289591601587505e-06, + "loss": 0.6244, + "num_input_tokens_seen": 16194560, + "step": 5140 + }, + { + "epoch": 0.3293643172652199, + "grad_norm": 67.11534881591797, + "learning_rate": 3.2927922161054926e-06, + "loss": 0.5175, + "num_input_tokens_seen": 16210112, + "step": 5145 + }, + { + "epoch": 0.3296843992061968, + "grad_norm": 67.33219909667969, + "learning_rate": 3.29599283062348e-06, + "loss": 0.5084, + "num_input_tokens_seen": 16225856, + "step": 5150 + }, + { + "epoch": 0.3300044811471737, + "grad_norm": 31.06239128112793, + "learning_rate": 3.2991934451414676e-06, + "loss": 0.5103, + "num_input_tokens_seen": 16241536, + "step": 5155 + }, + { + "epoch": 0.33032456308815056, + "grad_norm": 37.402740478515625, + "learning_rate": 3.302394059659455e-06, + "loss": 0.4566, + "num_input_tokens_seen": 16256384, + "step": 5160 + }, + { + "epoch": 0.33064464502912744, + "grad_norm": 40.51024627685547, + "learning_rate": 3.3055946741774425e-06, + "loss": 0.4815, + "num_input_tokens_seen": 16271360, + "step": 5165 + }, + { + "epoch": 0.3309647269701044, + "grad_norm": 28.808246612548828, + "learning_rate": 3.30879528869543e-06, + "loss": 0.4515, + "num_input_tokens_seen": 16286656, + "step": 5170 + }, + { + "epoch": 0.33128480891108125, + "grad_norm": 25.6644344329834, + "learning_rate": 3.3119959032134174e-06, + "loss": 0.4787, + "num_input_tokens_seen": 16302592, + "step": 5175 + }, + { + "epoch": 0.33160489085205813, + "grad_norm": 30.197542190551758, + "learning_rate": 3.315196517731405e-06, + "loss": 0.6982, + "num_input_tokens_seen": 16318656, + "step": 5180 + }, + { + "epoch": 0.331924972793035, + "grad_norm": 27.73756217956543, + "learning_rate": 3.3183971322493924e-06, + "loss": 0.5162, + "num_input_tokens_seen": 16334016, + "step": 5185 + }, + { + "epoch": 0.3322450547340119, + "grad_norm": 35.10762023925781, + "learning_rate": 3.3215977467673794e-06, + "loss": 0.5793, + "num_input_tokens_seen": 16349888, + "step": 5190 + }, + { + "epoch": 0.33256513667498877, + "grad_norm": 25.246158599853516, + "learning_rate": 3.324798361285367e-06, + "loss": 0.5796, + "num_input_tokens_seen": 16365056, + "step": 5195 + }, + { + "epoch": 0.3328852186159657, + "grad_norm": 21.86536407470703, + "learning_rate": 3.3279989758033544e-06, + "loss": 0.4358, + "num_input_tokens_seen": 16380032, + "step": 5200 + }, + { + "epoch": 0.3332053005569426, + "grad_norm": 33.93208694458008, + "learning_rate": 3.331199590321342e-06, + "loss": 0.6268, + "num_input_tokens_seen": 16394752, + "step": 5205 + }, + { + "epoch": 0.33352538249791946, + "grad_norm": 21.6890926361084, + "learning_rate": 3.3344002048393293e-06, + "loss": 0.5224, + "num_input_tokens_seen": 16410368, + "step": 5210 + }, + { + "epoch": 0.33384546443889634, + "grad_norm": 33.44954299926758, + "learning_rate": 3.3376008193573168e-06, + "loss": 0.4788, + "num_input_tokens_seen": 16425088, + "step": 5215 + }, + { + "epoch": 0.3341655463798732, + "grad_norm": 25.992691040039062, + "learning_rate": 3.3408014338753042e-06, + "loss": 0.4521, + "num_input_tokens_seen": 16441152, + "step": 5220 + }, + { + "epoch": 0.33448562832085016, + "grad_norm": 37.4842529296875, + "learning_rate": 3.3440020483932917e-06, + "loss": 0.5389, + "num_input_tokens_seen": 16458240, + "step": 5225 + }, + { + "epoch": 0.33480571026182704, + "grad_norm": 47.693687438964844, + "learning_rate": 3.347202662911279e-06, + "loss": 0.575, + "num_input_tokens_seen": 16474112, + "step": 5230 + }, + { + "epoch": 0.3351257922028039, + "grad_norm": 35.79508590698242, + "learning_rate": 3.3504032774292666e-06, + "loss": 0.4176, + "num_input_tokens_seen": 16490560, + "step": 5235 + }, + { + "epoch": 0.3354458741437808, + "grad_norm": 96.1209487915039, + "learning_rate": 3.353603891947254e-06, + "loss": 0.7561, + "num_input_tokens_seen": 16505728, + "step": 5240 + }, + { + "epoch": 0.3357659560847577, + "grad_norm": 29.462099075317383, + "learning_rate": 3.3568045064652416e-06, + "loss": 0.491, + "num_input_tokens_seen": 16521856, + "step": 5245 + }, + { + "epoch": 0.3360860380257346, + "grad_norm": 23.954118728637695, + "learning_rate": 3.360005120983229e-06, + "loss": 0.4595, + "num_input_tokens_seen": 16537792, + "step": 5250 + }, + { + "epoch": 0.3364061199667115, + "grad_norm": 35.56183624267578, + "learning_rate": 3.3632057355012165e-06, + "loss": 0.4857, + "num_input_tokens_seen": 16553408, + "step": 5255 + }, + { + "epoch": 0.33672620190768837, + "grad_norm": 29.579648971557617, + "learning_rate": 3.366406350019204e-06, + "loss": 0.4904, + "num_input_tokens_seen": 16570112, + "step": 5260 + }, + { + "epoch": 0.33704628384866525, + "grad_norm": 54.755104064941406, + "learning_rate": 3.3696069645371914e-06, + "loss": 0.5798, + "num_input_tokens_seen": 16586304, + "step": 5265 + }, + { + "epoch": 0.33736636578964213, + "grad_norm": 36.84575653076172, + "learning_rate": 3.372807579055179e-06, + "loss": 0.5247, + "num_input_tokens_seen": 16601920, + "step": 5270 + }, + { + "epoch": 0.33768644773061907, + "grad_norm": 26.736759185791016, + "learning_rate": 3.3760081935731664e-06, + "loss": 0.5074, + "num_input_tokens_seen": 16618240, + "step": 5275 + }, + { + "epoch": 0.33800652967159595, + "grad_norm": 28.898080825805664, + "learning_rate": 3.379208808091154e-06, + "loss": 0.6338, + "num_input_tokens_seen": 16633408, + "step": 5280 + }, + { + "epoch": 0.3383266116125728, + "grad_norm": 35.41246795654297, + "learning_rate": 3.3824094226091413e-06, + "loss": 0.4926, + "num_input_tokens_seen": 16649664, + "step": 5285 + }, + { + "epoch": 0.3386466935535497, + "grad_norm": 31.545101165771484, + "learning_rate": 3.3856100371271288e-06, + "loss": 0.4736, + "num_input_tokens_seen": 16664896, + "step": 5290 + }, + { + "epoch": 0.3389667754945266, + "grad_norm": 47.230560302734375, + "learning_rate": 3.3888106516451162e-06, + "loss": 0.5782, + "num_input_tokens_seen": 16680384, + "step": 5295 + }, + { + "epoch": 0.33928685743550346, + "grad_norm": 26.765348434448242, + "learning_rate": 3.3920112661631037e-06, + "loss": 0.4507, + "num_input_tokens_seen": 16696192, + "step": 5300 + }, + { + "epoch": 0.3396069393764804, + "grad_norm": 32.10804748535156, + "learning_rate": 3.395211880681091e-06, + "loss": 0.63, + "num_input_tokens_seen": 16727104, + "step": 5305 + }, + { + "epoch": 0.3399270213174573, + "grad_norm": 30.96208953857422, + "learning_rate": 3.398412495199078e-06, + "loss": 0.5304, + "num_input_tokens_seen": 16741696, + "step": 5310 + }, + { + "epoch": 0.34024710325843416, + "grad_norm": 30.943119049072266, + "learning_rate": 3.4016131097170657e-06, + "loss": 0.589, + "num_input_tokens_seen": 16757120, + "step": 5315 + }, + { + "epoch": 0.34056718519941104, + "grad_norm": 32.4697380065918, + "learning_rate": 3.404813724235053e-06, + "loss": 0.5482, + "num_input_tokens_seen": 16772416, + "step": 5320 + }, + { + "epoch": 0.3408872671403879, + "grad_norm": 20.46015167236328, + "learning_rate": 3.4080143387530406e-06, + "loss": 0.3581, + "num_input_tokens_seen": 16788352, + "step": 5325 + }, + { + "epoch": 0.34120734908136485, + "grad_norm": 38.192237854003906, + "learning_rate": 3.411214953271028e-06, + "loss": 0.4131, + "num_input_tokens_seen": 16803648, + "step": 5330 + }, + { + "epoch": 0.34152743102234173, + "grad_norm": 28.86044692993164, + "learning_rate": 3.4144155677890155e-06, + "loss": 0.5274, + "num_input_tokens_seen": 16818944, + "step": 5335 + }, + { + "epoch": 0.3418475129633186, + "grad_norm": 31.56170082092285, + "learning_rate": 3.417616182307003e-06, + "loss": 0.4915, + "num_input_tokens_seen": 16833984, + "step": 5340 + }, + { + "epoch": 0.3421675949042955, + "grad_norm": 31.4489688873291, + "learning_rate": 3.4208167968249905e-06, + "loss": 0.6687, + "num_input_tokens_seen": 16849280, + "step": 5345 + }, + { + "epoch": 0.34248767684527237, + "grad_norm": 33.60383224487305, + "learning_rate": 3.424017411342978e-06, + "loss": 0.4588, + "num_input_tokens_seen": 16865664, + "step": 5350 + }, + { + "epoch": 0.3428077587862493, + "grad_norm": 35.012840270996094, + "learning_rate": 3.4272180258609654e-06, + "loss": 0.6085, + "num_input_tokens_seen": 16881536, + "step": 5355 + }, + { + "epoch": 0.3431278407272262, + "grad_norm": 23.877336502075195, + "learning_rate": 3.430418640378953e-06, + "loss": 0.4435, + "num_input_tokens_seen": 16898816, + "step": 5360 + }, + { + "epoch": 0.34344792266820307, + "grad_norm": 30.3134765625, + "learning_rate": 3.4336192548969403e-06, + "loss": 0.5517, + "num_input_tokens_seen": 16915136, + "step": 5365 + }, + { + "epoch": 0.34376800460917994, + "grad_norm": 34.58237838745117, + "learning_rate": 3.436819869414928e-06, + "loss": 0.59, + "num_input_tokens_seen": 16931200, + "step": 5370 + }, + { + "epoch": 0.3440880865501568, + "grad_norm": 22.405658721923828, + "learning_rate": 3.4400204839329153e-06, + "loss": 0.5054, + "num_input_tokens_seen": 16946816, + "step": 5375 + }, + { + "epoch": 0.3444081684911337, + "grad_norm": 44.43580627441406, + "learning_rate": 3.4432210984509027e-06, + "loss": 0.4416, + "num_input_tokens_seen": 16962880, + "step": 5380 + }, + { + "epoch": 0.34472825043211064, + "grad_norm": 41.31477737426758, + "learning_rate": 3.4464217129688902e-06, + "loss": 0.5929, + "num_input_tokens_seen": 16978240, + "step": 5385 + }, + { + "epoch": 0.3450483323730875, + "grad_norm": 30.653884887695312, + "learning_rate": 3.4496223274868777e-06, + "loss": 0.4171, + "num_input_tokens_seen": 16993728, + "step": 5390 + }, + { + "epoch": 0.3453684143140644, + "grad_norm": 40.9422721862793, + "learning_rate": 3.452822942004865e-06, + "loss": 0.6317, + "num_input_tokens_seen": 17009728, + "step": 5395 + }, + { + "epoch": 0.3456884962550413, + "grad_norm": 40.47362518310547, + "learning_rate": 3.4560235565228526e-06, + "loss": 0.4645, + "num_input_tokens_seen": 17024640, + "step": 5400 + }, + { + "epoch": 0.34600857819601816, + "grad_norm": 42.13897705078125, + "learning_rate": 3.4592241710408405e-06, + "loss": 0.4109, + "num_input_tokens_seen": 17040512, + "step": 5405 + }, + { + "epoch": 0.3463286601369951, + "grad_norm": 17.997047424316406, + "learning_rate": 3.462424785558828e-06, + "loss": 0.5849, + "num_input_tokens_seen": 17056064, + "step": 5410 + }, + { + "epoch": 0.34664874207797197, + "grad_norm": 27.689199447631836, + "learning_rate": 3.4656254000768154e-06, + "loss": 0.492, + "num_input_tokens_seen": 17072448, + "step": 5415 + }, + { + "epoch": 0.34696882401894885, + "grad_norm": 40.01524353027344, + "learning_rate": 3.468826014594803e-06, + "loss": 0.626, + "num_input_tokens_seen": 17088320, + "step": 5420 + }, + { + "epoch": 0.34728890595992573, + "grad_norm": 38.83869171142578, + "learning_rate": 3.4720266291127904e-06, + "loss": 0.5002, + "num_input_tokens_seen": 17104448, + "step": 5425 + }, + { + "epoch": 0.3476089879009026, + "grad_norm": 28.335857391357422, + "learning_rate": 3.475227243630777e-06, + "loss": 0.4201, + "num_input_tokens_seen": 17121856, + "step": 5430 + }, + { + "epoch": 0.34792906984187955, + "grad_norm": 35.10919952392578, + "learning_rate": 3.4784278581487645e-06, + "loss": 0.6419, + "num_input_tokens_seen": 17136512, + "step": 5435 + }, + { + "epoch": 0.3482491517828564, + "grad_norm": 26.953418731689453, + "learning_rate": 3.481628472666752e-06, + "loss": 0.5514, + "num_input_tokens_seen": 17152640, + "step": 5440 + }, + { + "epoch": 0.3485692337238333, + "grad_norm": 17.140901565551758, + "learning_rate": 3.4848290871847394e-06, + "loss": 0.3865, + "num_input_tokens_seen": 17167680, + "step": 5445 + }, + { + "epoch": 0.3488893156648102, + "grad_norm": 47.715179443359375, + "learning_rate": 3.488029701702727e-06, + "loss": 0.7384, + "num_input_tokens_seen": 17183296, + "step": 5450 + }, + { + "epoch": 0.34920939760578706, + "grad_norm": 49.38996124267578, + "learning_rate": 3.4912303162207143e-06, + "loss": 0.6669, + "num_input_tokens_seen": 17198272, + "step": 5455 + }, + { + "epoch": 0.349529479546764, + "grad_norm": 48.98793411254883, + "learning_rate": 3.494430930738702e-06, + "loss": 0.5908, + "num_input_tokens_seen": 17213376, + "step": 5460 + }, + { + "epoch": 0.3498495614877409, + "grad_norm": 44.328487396240234, + "learning_rate": 3.4976315452566893e-06, + "loss": 0.5236, + "num_input_tokens_seen": 17229632, + "step": 5465 + }, + { + "epoch": 0.35016964342871776, + "grad_norm": 30.30027198791504, + "learning_rate": 3.5008321597746767e-06, + "loss": 0.6976, + "num_input_tokens_seen": 17245696, + "step": 5470 + }, + { + "epoch": 0.35048972536969464, + "grad_norm": 24.894258499145508, + "learning_rate": 3.5040327742926646e-06, + "loss": 0.4405, + "num_input_tokens_seen": 17262976, + "step": 5475 + }, + { + "epoch": 0.3508098073106715, + "grad_norm": 44.761451721191406, + "learning_rate": 3.507233388810652e-06, + "loss": 0.48, + "num_input_tokens_seen": 17278336, + "step": 5480 + }, + { + "epoch": 0.3511298892516484, + "grad_norm": 48.111656188964844, + "learning_rate": 3.5104340033286396e-06, + "loss": 0.4554, + "num_input_tokens_seen": 17293248, + "step": 5485 + }, + { + "epoch": 0.35144997119262533, + "grad_norm": 36.20976257324219, + "learning_rate": 3.513634617846627e-06, + "loss": 0.4985, + "num_input_tokens_seen": 17309184, + "step": 5490 + }, + { + "epoch": 0.3517700531336022, + "grad_norm": 28.822999954223633, + "learning_rate": 3.5168352323646145e-06, + "loss": 0.527, + "num_input_tokens_seen": 17325248, + "step": 5495 + }, + { + "epoch": 0.3520901350745791, + "grad_norm": 24.576942443847656, + "learning_rate": 3.520035846882602e-06, + "loss": 0.5258, + "num_input_tokens_seen": 17340736, + "step": 5500 + }, + { + "epoch": 0.35241021701555597, + "grad_norm": 33.24995803833008, + "learning_rate": 3.5232364614005894e-06, + "loss": 0.474, + "num_input_tokens_seen": 17355904, + "step": 5505 + }, + { + "epoch": 0.35273029895653285, + "grad_norm": 27.663663864135742, + "learning_rate": 3.526437075918577e-06, + "loss": 0.5218, + "num_input_tokens_seen": 17371264, + "step": 5510 + }, + { + "epoch": 0.3530503808975098, + "grad_norm": 29.617881774902344, + "learning_rate": 3.5296376904365644e-06, + "loss": 0.4929, + "num_input_tokens_seen": 17388608, + "step": 5515 + }, + { + "epoch": 0.35337046283848667, + "grad_norm": 31.108774185180664, + "learning_rate": 3.532838304954552e-06, + "loss": 0.4981, + "num_input_tokens_seen": 17403712, + "step": 5520 + }, + { + "epoch": 0.35369054477946354, + "grad_norm": 38.418819427490234, + "learning_rate": 3.5360389194725393e-06, + "loss": 0.5438, + "num_input_tokens_seen": 17421056, + "step": 5525 + }, + { + "epoch": 0.3540106267204404, + "grad_norm": 55.30427169799805, + "learning_rate": 3.5392395339905268e-06, + "loss": 0.6835, + "num_input_tokens_seen": 17436096, + "step": 5530 + }, + { + "epoch": 0.3543307086614173, + "grad_norm": 27.07163429260254, + "learning_rate": 3.5424401485085142e-06, + "loss": 0.4615, + "num_input_tokens_seen": 17452480, + "step": 5535 + }, + { + "epoch": 0.35465079060239424, + "grad_norm": 40.18779754638672, + "learning_rate": 3.5456407630265017e-06, + "loss": 0.454, + "num_input_tokens_seen": 17467776, + "step": 5540 + }, + { + "epoch": 0.3549708725433711, + "grad_norm": 38.78753662109375, + "learning_rate": 3.548841377544489e-06, + "loss": 0.5886, + "num_input_tokens_seen": 17482752, + "step": 5545 + }, + { + "epoch": 0.355290954484348, + "grad_norm": 23.250106811523438, + "learning_rate": 3.552041992062476e-06, + "loss": 0.4031, + "num_input_tokens_seen": 17497792, + "step": 5550 + }, + { + "epoch": 0.3556110364253249, + "grad_norm": 38.46280288696289, + "learning_rate": 3.5552426065804637e-06, + "loss": 0.6044, + "num_input_tokens_seen": 17513024, + "step": 5555 + }, + { + "epoch": 0.35593111836630176, + "grad_norm": 43.001304626464844, + "learning_rate": 3.558443221098451e-06, + "loss": 0.4138, + "num_input_tokens_seen": 17529024, + "step": 5560 + }, + { + "epoch": 0.35625120030727864, + "grad_norm": 20.243738174438477, + "learning_rate": 3.5616438356164386e-06, + "loss": 0.5137, + "num_input_tokens_seen": 17543936, + "step": 5565 + }, + { + "epoch": 0.35657128224825557, + "grad_norm": 49.21430587768555, + "learning_rate": 3.564844450134426e-06, + "loss": 0.6699, + "num_input_tokens_seen": 17559168, + "step": 5570 + }, + { + "epoch": 0.35689136418923245, + "grad_norm": 29.909109115600586, + "learning_rate": 3.5680450646524135e-06, + "loss": 0.4501, + "num_input_tokens_seen": 17574912, + "step": 5575 + }, + { + "epoch": 0.35721144613020933, + "grad_norm": 18.71735382080078, + "learning_rate": 3.571245679170401e-06, + "loss": 0.4311, + "num_input_tokens_seen": 17590272, + "step": 5580 + }, + { + "epoch": 0.3575315280711862, + "grad_norm": 43.74350357055664, + "learning_rate": 3.5744462936883885e-06, + "loss": 0.5236, + "num_input_tokens_seen": 17605952, + "step": 5585 + }, + { + "epoch": 0.3578516100121631, + "grad_norm": 22.063283920288086, + "learning_rate": 3.577646908206376e-06, + "loss": 0.4981, + "num_input_tokens_seen": 17621120, + "step": 5590 + }, + { + "epoch": 0.35817169195314, + "grad_norm": 30.462017059326172, + "learning_rate": 3.5808475227243634e-06, + "loss": 0.6862, + "num_input_tokens_seen": 17636096, + "step": 5595 + }, + { + "epoch": 0.3584917738941169, + "grad_norm": 51.045867919921875, + "learning_rate": 3.584048137242351e-06, + "loss": 0.5436, + "num_input_tokens_seen": 17652672, + "step": 5600 + }, + { + "epoch": 0.3588118558350938, + "grad_norm": 32.641998291015625, + "learning_rate": 3.5872487517603383e-06, + "loss": 0.6614, + "num_input_tokens_seen": 17668032, + "step": 5605 + }, + { + "epoch": 0.35913193777607066, + "grad_norm": 23.419673919677734, + "learning_rate": 3.590449366278326e-06, + "loss": 0.5959, + "num_input_tokens_seen": 17683264, + "step": 5610 + }, + { + "epoch": 0.35945201971704754, + "grad_norm": 34.34988021850586, + "learning_rate": 3.5936499807963133e-06, + "loss": 0.4085, + "num_input_tokens_seen": 17699200, + "step": 5615 + }, + { + "epoch": 0.3597721016580245, + "grad_norm": 26.567842483520508, + "learning_rate": 3.5968505953143007e-06, + "loss": 0.3946, + "num_input_tokens_seen": 17714816, + "step": 5620 + }, + { + "epoch": 0.36009218359900136, + "grad_norm": 38.477577209472656, + "learning_rate": 3.6000512098322882e-06, + "loss": 0.4728, + "num_input_tokens_seen": 17732352, + "step": 5625 + }, + { + "epoch": 0.36041226553997824, + "grad_norm": 50.67283248901367, + "learning_rate": 3.6032518243502757e-06, + "loss": 0.6305, + "num_input_tokens_seen": 17748288, + "step": 5630 + }, + { + "epoch": 0.3607323474809551, + "grad_norm": 36.24601364135742, + "learning_rate": 3.606452438868263e-06, + "loss": 0.4213, + "num_input_tokens_seen": 17763520, + "step": 5635 + }, + { + "epoch": 0.361052429421932, + "grad_norm": 32.271907806396484, + "learning_rate": 3.6096530533862506e-06, + "loss": 0.5041, + "num_input_tokens_seen": 17779328, + "step": 5640 + }, + { + "epoch": 0.3613725113629089, + "grad_norm": 30.317367553710938, + "learning_rate": 3.612853667904238e-06, + "loss": 0.4788, + "num_input_tokens_seen": 17795584, + "step": 5645 + }, + { + "epoch": 0.3616925933038858, + "grad_norm": 20.150102615356445, + "learning_rate": 3.6160542824222255e-06, + "loss": 0.463, + "num_input_tokens_seen": 17810368, + "step": 5650 + }, + { + "epoch": 0.3620126752448627, + "grad_norm": 23.76387596130371, + "learning_rate": 3.619254896940213e-06, + "loss": 0.4958, + "num_input_tokens_seen": 17826496, + "step": 5655 + }, + { + "epoch": 0.36233275718583957, + "grad_norm": 35.2263069152832, + "learning_rate": 3.6224555114582005e-06, + "loss": 0.5192, + "num_input_tokens_seen": 17841344, + "step": 5660 + }, + { + "epoch": 0.36265283912681645, + "grad_norm": 27.15921974182129, + "learning_rate": 3.625656125976188e-06, + "loss": 0.5501, + "num_input_tokens_seen": 17857408, + "step": 5665 + }, + { + "epoch": 0.36297292106779333, + "grad_norm": 24.37274932861328, + "learning_rate": 3.628856740494175e-06, + "loss": 0.5163, + "num_input_tokens_seen": 17873280, + "step": 5670 + }, + { + "epoch": 0.36329300300877027, + "grad_norm": 25.289405822753906, + "learning_rate": 3.6320573550121625e-06, + "loss": 0.5723, + "num_input_tokens_seen": 17888448, + "step": 5675 + }, + { + "epoch": 0.36361308494974715, + "grad_norm": 50.0404167175293, + "learning_rate": 3.63525796953015e-06, + "loss": 0.5371, + "num_input_tokens_seen": 17904320, + "step": 5680 + }, + { + "epoch": 0.363933166890724, + "grad_norm": 39.63492202758789, + "learning_rate": 3.6384585840481374e-06, + "loss": 0.3783, + "num_input_tokens_seen": 17919744, + "step": 5685 + }, + { + "epoch": 0.3642532488317009, + "grad_norm": 34.38381576538086, + "learning_rate": 3.641659198566125e-06, + "loss": 0.3927, + "num_input_tokens_seen": 17934656, + "step": 5690 + }, + { + "epoch": 0.3645733307726778, + "grad_norm": 54.6318473815918, + "learning_rate": 3.6448598130841123e-06, + "loss": 0.6628, + "num_input_tokens_seen": 17950976, + "step": 5695 + }, + { + "epoch": 0.3648934127136547, + "grad_norm": 27.76530647277832, + "learning_rate": 3.6480604276021e-06, + "loss": 0.6469, + "num_input_tokens_seen": 17966400, + "step": 5700 + }, + { + "epoch": 0.3652134946546316, + "grad_norm": 99.29348754882812, + "learning_rate": 3.6512610421200873e-06, + "loss": 0.6176, + "num_input_tokens_seen": 17983872, + "step": 5705 + }, + { + "epoch": 0.3655335765956085, + "grad_norm": 41.55954360961914, + "learning_rate": 3.6544616566380747e-06, + "loss": 0.548, + "num_input_tokens_seen": 17999680, + "step": 5710 + }, + { + "epoch": 0.36585365853658536, + "grad_norm": 23.483184814453125, + "learning_rate": 3.657662271156062e-06, + "loss": 0.4054, + "num_input_tokens_seen": 18014784, + "step": 5715 + }, + { + "epoch": 0.36617374047756224, + "grad_norm": 27.025270462036133, + "learning_rate": 3.6608628856740497e-06, + "loss": 0.5321, + "num_input_tokens_seen": 18029888, + "step": 5720 + }, + { + "epoch": 0.3664938224185392, + "grad_norm": 33.137454986572266, + "learning_rate": 3.664063500192037e-06, + "loss": 0.4562, + "num_input_tokens_seen": 18045632, + "step": 5725 + }, + { + "epoch": 0.36681390435951605, + "grad_norm": 42.9515266418457, + "learning_rate": 3.6672641147100246e-06, + "loss": 0.6684, + "num_input_tokens_seen": 18060928, + "step": 5730 + }, + { + "epoch": 0.36713398630049293, + "grad_norm": 25.466896057128906, + "learning_rate": 3.670464729228012e-06, + "loss": 0.4735, + "num_input_tokens_seen": 18076352, + "step": 5735 + }, + { + "epoch": 0.3674540682414698, + "grad_norm": 46.54610061645508, + "learning_rate": 3.6736653437459995e-06, + "loss": 0.5248, + "num_input_tokens_seen": 18092096, + "step": 5740 + }, + { + "epoch": 0.3677741501824467, + "grad_norm": 27.049694061279297, + "learning_rate": 3.676865958263987e-06, + "loss": 0.7352, + "num_input_tokens_seen": 18107648, + "step": 5745 + }, + { + "epoch": 0.36809423212342357, + "grad_norm": 24.69159698486328, + "learning_rate": 3.6800665727819745e-06, + "loss": 0.489, + "num_input_tokens_seen": 18123264, + "step": 5750 + }, + { + "epoch": 0.3684143140644005, + "grad_norm": 29.635217666625977, + "learning_rate": 3.683267187299962e-06, + "loss": 0.6408, + "num_input_tokens_seen": 18138368, + "step": 5755 + }, + { + "epoch": 0.3687343960053774, + "grad_norm": 13.892148971557617, + "learning_rate": 3.6864678018179494e-06, + "loss": 0.3899, + "num_input_tokens_seen": 18154432, + "step": 5760 + }, + { + "epoch": 0.36905447794635426, + "grad_norm": 54.13380432128906, + "learning_rate": 3.689668416335937e-06, + "loss": 0.4416, + "num_input_tokens_seen": 18171200, + "step": 5765 + }, + { + "epoch": 0.36937455988733114, + "grad_norm": 39.60659408569336, + "learning_rate": 3.6928690308539243e-06, + "loss": 0.4342, + "num_input_tokens_seen": 18187008, + "step": 5770 + }, + { + "epoch": 0.369694641828308, + "grad_norm": 39.4443473815918, + "learning_rate": 3.696069645371912e-06, + "loss": 0.4311, + "num_input_tokens_seen": 18202944, + "step": 5775 + }, + { + "epoch": 0.37001472376928496, + "grad_norm": 103.32891845703125, + "learning_rate": 3.6992702598898993e-06, + "loss": 0.6343, + "num_input_tokens_seen": 18217984, + "step": 5780 + }, + { + "epoch": 0.37033480571026184, + "grad_norm": 29.013689041137695, + "learning_rate": 3.7024708744078867e-06, + "loss": 0.6752, + "num_input_tokens_seen": 18233984, + "step": 5785 + }, + { + "epoch": 0.3706548876512387, + "grad_norm": 29.28402328491211, + "learning_rate": 3.7056714889258738e-06, + "loss": 0.5054, + "num_input_tokens_seen": 18249024, + "step": 5790 + }, + { + "epoch": 0.3709749695922156, + "grad_norm": 41.88875198364258, + "learning_rate": 3.7088721034438612e-06, + "loss": 0.4128, + "num_input_tokens_seen": 18264256, + "step": 5795 + }, + { + "epoch": 0.3712950515331925, + "grad_norm": 51.70709991455078, + "learning_rate": 3.7120727179618487e-06, + "loss": 0.6297, + "num_input_tokens_seen": 18279936, + "step": 5800 + }, + { + "epoch": 0.3716151334741694, + "grad_norm": 22.734384536743164, + "learning_rate": 3.715273332479836e-06, + "loss": 0.4761, + "num_input_tokens_seen": 18295488, + "step": 5805 + }, + { + "epoch": 0.3719352154151463, + "grad_norm": 23.440353393554688, + "learning_rate": 3.7184739469978236e-06, + "loss": 0.6025, + "num_input_tokens_seen": 18311360, + "step": 5810 + }, + { + "epoch": 0.37225529735612317, + "grad_norm": 24.408361434936523, + "learning_rate": 3.721674561515811e-06, + "loss": 0.4959, + "num_input_tokens_seen": 18326592, + "step": 5815 + }, + { + "epoch": 0.37257537929710005, + "grad_norm": 22.500865936279297, + "learning_rate": 3.7248751760337986e-06, + "loss": 0.3994, + "num_input_tokens_seen": 18344000, + "step": 5820 + }, + { + "epoch": 0.37289546123807693, + "grad_norm": 44.44254684448242, + "learning_rate": 3.728075790551786e-06, + "loss": 0.6157, + "num_input_tokens_seen": 18359872, + "step": 5825 + }, + { + "epoch": 0.3732155431790538, + "grad_norm": 36.45853805541992, + "learning_rate": 3.7312764050697735e-06, + "loss": 0.4682, + "num_input_tokens_seen": 18374400, + "step": 5830 + }, + { + "epoch": 0.37353562512003075, + "grad_norm": 22.68947982788086, + "learning_rate": 3.734477019587761e-06, + "loss": 0.3252, + "num_input_tokens_seen": 18390656, + "step": 5835 + }, + { + "epoch": 0.3738557070610076, + "grad_norm": 48.377872467041016, + "learning_rate": 3.7376776341057485e-06, + "loss": 0.5131, + "num_input_tokens_seen": 18406400, + "step": 5840 + }, + { + "epoch": 0.3741757890019845, + "grad_norm": 44.16627883911133, + "learning_rate": 3.740878248623736e-06, + "loss": 0.7071, + "num_input_tokens_seen": 18422848, + "step": 5845 + }, + { + "epoch": 0.3744958709429614, + "grad_norm": 39.77794647216797, + "learning_rate": 3.7440788631417234e-06, + "loss": 0.6232, + "num_input_tokens_seen": 18438912, + "step": 5850 + }, + { + "epoch": 0.37481595288393826, + "grad_norm": 40.77140808105469, + "learning_rate": 3.747279477659711e-06, + "loss": 0.5506, + "num_input_tokens_seen": 18455488, + "step": 5855 + }, + { + "epoch": 0.3751360348249152, + "grad_norm": 36.773162841796875, + "learning_rate": 3.7504800921776983e-06, + "loss": 0.451, + "num_input_tokens_seen": 18470080, + "step": 5860 + }, + { + "epoch": 0.3754561167658921, + "grad_norm": 29.15806770324707, + "learning_rate": 3.7536807066956858e-06, + "loss": 0.5892, + "num_input_tokens_seen": 18484480, + "step": 5865 + }, + { + "epoch": 0.37577619870686896, + "grad_norm": 23.57305335998535, + "learning_rate": 3.7568813212136733e-06, + "loss": 0.5427, + "num_input_tokens_seen": 18501312, + "step": 5870 + }, + { + "epoch": 0.37609628064784584, + "grad_norm": 27.58259391784668, + "learning_rate": 3.7600819357316607e-06, + "loss": 0.5258, + "num_input_tokens_seen": 18515968, + "step": 5875 + }, + { + "epoch": 0.3764163625888227, + "grad_norm": 18.161163330078125, + "learning_rate": 3.763282550249648e-06, + "loss": 0.4716, + "num_input_tokens_seen": 18531200, + "step": 5880 + }, + { + "epoch": 0.37673644452979965, + "grad_norm": 30.9069881439209, + "learning_rate": 3.766483164767636e-06, + "loss": 0.4008, + "num_input_tokens_seen": 18547712, + "step": 5885 + }, + { + "epoch": 0.37705652647077653, + "grad_norm": 28.732818603515625, + "learning_rate": 3.7696837792856235e-06, + "loss": 0.4438, + "num_input_tokens_seen": 18562752, + "step": 5890 + }, + { + "epoch": 0.3773766084117534, + "grad_norm": 30.994474411010742, + "learning_rate": 3.772884393803611e-06, + "loss": 0.4763, + "num_input_tokens_seen": 18578368, + "step": 5895 + }, + { + "epoch": 0.3776966903527303, + "grad_norm": 38.41969680786133, + "learning_rate": 3.7760850083215985e-06, + "loss": 0.4597, + "num_input_tokens_seen": 18593408, + "step": 5900 + }, + { + "epoch": 0.37801677229370717, + "grad_norm": 26.002717971801758, + "learning_rate": 3.779285622839586e-06, + "loss": 0.4968, + "num_input_tokens_seen": 18607872, + "step": 5905 + }, + { + "epoch": 0.3783368542346841, + "grad_norm": 19.925926208496094, + "learning_rate": 3.7824862373575726e-06, + "loss": 0.414, + "num_input_tokens_seen": 18624000, + "step": 5910 + }, + { + "epoch": 0.378656936175661, + "grad_norm": 36.59950256347656, + "learning_rate": 3.78568685187556e-06, + "loss": 0.6451, + "num_input_tokens_seen": 18640704, + "step": 5915 + }, + { + "epoch": 0.37897701811663786, + "grad_norm": 27.657333374023438, + "learning_rate": 3.7888874663935475e-06, + "loss": 0.501, + "num_input_tokens_seen": 18658368, + "step": 5920 + }, + { + "epoch": 0.37929710005761474, + "grad_norm": 54.508235931396484, + "learning_rate": 3.792088080911535e-06, + "loss": 0.5381, + "num_input_tokens_seen": 18675584, + "step": 5925 + }, + { + "epoch": 0.3796171819985916, + "grad_norm": 30.076391220092773, + "learning_rate": 3.7952886954295224e-06, + "loss": 0.6799, + "num_input_tokens_seen": 18691904, + "step": 5930 + }, + { + "epoch": 0.3799372639395685, + "grad_norm": 34.37838363647461, + "learning_rate": 3.79848930994751e-06, + "loss": 0.4492, + "num_input_tokens_seen": 18708736, + "step": 5935 + }, + { + "epoch": 0.38025734588054544, + "grad_norm": 32.58385467529297, + "learning_rate": 3.8016899244654974e-06, + "loss": 0.4253, + "num_input_tokens_seen": 18724032, + "step": 5940 + }, + { + "epoch": 0.3805774278215223, + "grad_norm": 55.80094909667969, + "learning_rate": 3.804890538983485e-06, + "loss": 0.5151, + "num_input_tokens_seen": 18740352, + "step": 5945 + }, + { + "epoch": 0.3808975097624992, + "grad_norm": 29.88770294189453, + "learning_rate": 3.8080911535014723e-06, + "loss": 0.4548, + "num_input_tokens_seen": 18754560, + "step": 5950 + }, + { + "epoch": 0.3812175917034761, + "grad_norm": 61.93190383911133, + "learning_rate": 3.81129176801946e-06, + "loss": 0.7133, + "num_input_tokens_seen": 18769280, + "step": 5955 + }, + { + "epoch": 0.38153767364445296, + "grad_norm": 44.2761116027832, + "learning_rate": 3.8144923825374477e-06, + "loss": 0.4603, + "num_input_tokens_seen": 18784512, + "step": 5960 + }, + { + "epoch": 0.3818577555854299, + "grad_norm": 43.63498306274414, + "learning_rate": 3.817692997055435e-06, + "loss": 0.4459, + "num_input_tokens_seen": 18800384, + "step": 5965 + }, + { + "epoch": 0.38217783752640677, + "grad_norm": 40.355316162109375, + "learning_rate": 3.820893611573423e-06, + "loss": 0.4875, + "num_input_tokens_seen": 18815552, + "step": 5970 + }, + { + "epoch": 0.38249791946738365, + "grad_norm": 19.373620986938477, + "learning_rate": 3.82409422609141e-06, + "loss": 0.4904, + "num_input_tokens_seen": 18830080, + "step": 5975 + }, + { + "epoch": 0.38281800140836053, + "grad_norm": 37.09465408325195, + "learning_rate": 3.8272948406093975e-06, + "loss": 0.5087, + "num_input_tokens_seen": 18846272, + "step": 5980 + }, + { + "epoch": 0.3831380833493374, + "grad_norm": 36.47341537475586, + "learning_rate": 3.8304954551273846e-06, + "loss": 0.4675, + "num_input_tokens_seen": 18860928, + "step": 5985 + }, + { + "epoch": 0.38345816529031435, + "grad_norm": 41.92753982543945, + "learning_rate": 3.8336960696453725e-06, + "loss": 0.4753, + "num_input_tokens_seen": 18877120, + "step": 5990 + }, + { + "epoch": 0.3837782472312912, + "grad_norm": 43.556190490722656, + "learning_rate": 3.8368966841633595e-06, + "loss": 0.6253, + "num_input_tokens_seen": 18892224, + "step": 5995 + }, + { + "epoch": 0.3840983291722681, + "grad_norm": 40.31187438964844, + "learning_rate": 3.840097298681347e-06, + "loss": 0.5498, + "num_input_tokens_seen": 18907008, + "step": 6000 + }, + { + "epoch": 0.384418411113245, + "grad_norm": 24.449857711791992, + "learning_rate": 3.8432979131993344e-06, + "loss": 0.3618, + "num_input_tokens_seen": 18922368, + "step": 6005 + }, + { + "epoch": 0.38473849305422186, + "grad_norm": 29.806760787963867, + "learning_rate": 3.846498527717322e-06, + "loss": 0.4821, + "num_input_tokens_seen": 18937536, + "step": 6010 + }, + { + "epoch": 0.38505857499519874, + "grad_norm": 36.225067138671875, + "learning_rate": 3.849699142235309e-06, + "loss": 0.5246, + "num_input_tokens_seen": 18954048, + "step": 6015 + }, + { + "epoch": 0.3853786569361757, + "grad_norm": 30.742048263549805, + "learning_rate": 3.852899756753297e-06, + "loss": 0.5951, + "num_input_tokens_seen": 18969472, + "step": 6020 + }, + { + "epoch": 0.38569873887715256, + "grad_norm": 26.204708099365234, + "learning_rate": 3.856100371271284e-06, + "loss": 0.4919, + "num_input_tokens_seen": 18983872, + "step": 6025 + }, + { + "epoch": 0.38601882081812944, + "grad_norm": 38.82505416870117, + "learning_rate": 3.859300985789271e-06, + "loss": 0.4294, + "num_input_tokens_seen": 18999360, + "step": 6030 + }, + { + "epoch": 0.3863389027591063, + "grad_norm": 29.84668731689453, + "learning_rate": 3.862501600307259e-06, + "loss": 0.5733, + "num_input_tokens_seen": 19015424, + "step": 6035 + }, + { + "epoch": 0.3866589847000832, + "grad_norm": 28.835132598876953, + "learning_rate": 3.865702214825246e-06, + "loss": 0.4235, + "num_input_tokens_seen": 19031616, + "step": 6040 + }, + { + "epoch": 0.38697906664106013, + "grad_norm": 28.746370315551758, + "learning_rate": 3.868902829343234e-06, + "loss": 0.5539, + "num_input_tokens_seen": 19047040, + "step": 6045 + }, + { + "epoch": 0.387299148582037, + "grad_norm": 44.4168815612793, + "learning_rate": 3.872103443861221e-06, + "loss": 0.551, + "num_input_tokens_seen": 19062848, + "step": 6050 + }, + { + "epoch": 0.3876192305230139, + "grad_norm": 46.091514587402344, + "learning_rate": 3.875304058379209e-06, + "loss": 0.7136, + "num_input_tokens_seen": 19078976, + "step": 6055 + }, + { + "epoch": 0.38793931246399077, + "grad_norm": 28.701120376586914, + "learning_rate": 3.878504672897196e-06, + "loss": 0.4144, + "num_input_tokens_seen": 19093632, + "step": 6060 + }, + { + "epoch": 0.38825939440496765, + "grad_norm": 40.34693908691406, + "learning_rate": 3.881705287415184e-06, + "loss": 0.3784, + "num_input_tokens_seen": 19109056, + "step": 6065 + }, + { + "epoch": 0.3885794763459446, + "grad_norm": 35.81085205078125, + "learning_rate": 3.884905901933171e-06, + "loss": 0.3711, + "num_input_tokens_seen": 19124544, + "step": 6070 + }, + { + "epoch": 0.38889955828692147, + "grad_norm": 31.784053802490234, + "learning_rate": 3.888106516451159e-06, + "loss": 0.4527, + "num_input_tokens_seen": 19140480, + "step": 6075 + }, + { + "epoch": 0.38921964022789834, + "grad_norm": 45.394474029541016, + "learning_rate": 3.891307130969146e-06, + "loss": 0.46, + "num_input_tokens_seen": 19156736, + "step": 6080 + }, + { + "epoch": 0.3895397221688752, + "grad_norm": 33.013816833496094, + "learning_rate": 3.894507745487134e-06, + "loss": 0.5563, + "num_input_tokens_seen": 19171520, + "step": 6085 + }, + { + "epoch": 0.3898598041098521, + "grad_norm": 28.016103744506836, + "learning_rate": 3.897708360005121e-06, + "loss": 0.6042, + "num_input_tokens_seen": 19187136, + "step": 6090 + }, + { + "epoch": 0.39017988605082904, + "grad_norm": 27.902408599853516, + "learning_rate": 3.900908974523109e-06, + "loss": 0.5486, + "num_input_tokens_seen": 19201856, + "step": 6095 + }, + { + "epoch": 0.3904999679918059, + "grad_norm": 32.56270980834961, + "learning_rate": 3.904109589041096e-06, + "loss": 0.4568, + "num_input_tokens_seen": 19216704, + "step": 6100 + }, + { + "epoch": 0.3908200499327828, + "grad_norm": 37.548831939697266, + "learning_rate": 3.907310203559084e-06, + "loss": 0.6902, + "num_input_tokens_seen": 19233408, + "step": 6105 + }, + { + "epoch": 0.3911401318737597, + "grad_norm": 24.026535034179688, + "learning_rate": 3.910510818077071e-06, + "loss": 0.4412, + "num_input_tokens_seen": 19250048, + "step": 6110 + }, + { + "epoch": 0.39146021381473656, + "grad_norm": 23.25770378112793, + "learning_rate": 3.913711432595059e-06, + "loss": 0.5017, + "num_input_tokens_seen": 19266560, + "step": 6115 + }, + { + "epoch": 0.39178029575571344, + "grad_norm": 66.61859130859375, + "learning_rate": 3.916912047113046e-06, + "loss": 0.4863, + "num_input_tokens_seen": 19281408, + "step": 6120 + }, + { + "epoch": 0.39210037769669037, + "grad_norm": 29.331615447998047, + "learning_rate": 3.920112661631034e-06, + "loss": 0.4705, + "num_input_tokens_seen": 19296384, + "step": 6125 + }, + { + "epoch": 0.39242045963766725, + "grad_norm": 19.20572853088379, + "learning_rate": 3.9233132761490215e-06, + "loss": 0.4233, + "num_input_tokens_seen": 19312000, + "step": 6130 + }, + { + "epoch": 0.39274054157864413, + "grad_norm": 24.21821403503418, + "learning_rate": 3.926513890667009e-06, + "loss": 0.4467, + "num_input_tokens_seen": 19327488, + "step": 6135 + }, + { + "epoch": 0.393060623519621, + "grad_norm": 31.929677963256836, + "learning_rate": 3.9297145051849965e-06, + "loss": 0.6074, + "num_input_tokens_seen": 19342080, + "step": 6140 + }, + { + "epoch": 0.3933807054605979, + "grad_norm": 44.59328842163086, + "learning_rate": 3.9329151197029835e-06, + "loss": 0.4601, + "num_input_tokens_seen": 19358336, + "step": 6145 + }, + { + "epoch": 0.3937007874015748, + "grad_norm": 65.26045989990234, + "learning_rate": 3.9361157342209706e-06, + "loss": 0.6262, + "num_input_tokens_seen": 19373376, + "step": 6150 + }, + { + "epoch": 0.3940208693425517, + "grad_norm": 81.655029296875, + "learning_rate": 3.939316348738958e-06, + "loss": 0.5754, + "num_input_tokens_seen": 19388608, + "step": 6155 + }, + { + "epoch": 0.3943409512835286, + "grad_norm": 30.29998016357422, + "learning_rate": 3.9425169632569455e-06, + "loss": 0.6771, + "num_input_tokens_seen": 19403264, + "step": 6160 + }, + { + "epoch": 0.39466103322450546, + "grad_norm": 39.630706787109375, + "learning_rate": 3.9457175777749325e-06, + "loss": 0.602, + "num_input_tokens_seen": 19418816, + "step": 6165 + }, + { + "epoch": 0.39498111516548234, + "grad_norm": 35.93238067626953, + "learning_rate": 3.9489181922929204e-06, + "loss": 0.5712, + "num_input_tokens_seen": 19435392, + "step": 6170 + }, + { + "epoch": 0.3953011971064593, + "grad_norm": 28.95148468017578, + "learning_rate": 3.9521188068109075e-06, + "loss": 0.5541, + "num_input_tokens_seen": 19450048, + "step": 6175 + }, + { + "epoch": 0.39562127904743616, + "grad_norm": 29.412525177001953, + "learning_rate": 3.955319421328895e-06, + "loss": 0.529, + "num_input_tokens_seen": 19466368, + "step": 6180 + }, + { + "epoch": 0.39594136098841304, + "grad_norm": 23.349491119384766, + "learning_rate": 3.958520035846882e-06, + "loss": 0.385, + "num_input_tokens_seen": 19482624, + "step": 6185 + }, + { + "epoch": 0.3962614429293899, + "grad_norm": 28.905799865722656, + "learning_rate": 3.96172065036487e-06, + "loss": 0.5878, + "num_input_tokens_seen": 19498048, + "step": 6190 + }, + { + "epoch": 0.3965815248703668, + "grad_norm": 20.196725845336914, + "learning_rate": 3.964921264882858e-06, + "loss": 0.5465, + "num_input_tokens_seen": 19515264, + "step": 6195 + }, + { + "epoch": 0.3969016068113437, + "grad_norm": 41.77888488769531, + "learning_rate": 3.968121879400845e-06, + "loss": 0.484, + "num_input_tokens_seen": 19533056, + "step": 6200 + }, + { + "epoch": 0.3972216887523206, + "grad_norm": 31.400531768798828, + "learning_rate": 3.971322493918833e-06, + "loss": 0.4691, + "num_input_tokens_seen": 19548480, + "step": 6205 + }, + { + "epoch": 0.3975417706932975, + "grad_norm": 30.1193904876709, + "learning_rate": 3.97452310843682e-06, + "loss": 0.3764, + "num_input_tokens_seen": 19563712, + "step": 6210 + }, + { + "epoch": 0.39786185263427437, + "grad_norm": 32.85342025756836, + "learning_rate": 3.977723722954808e-06, + "loss": 0.4834, + "num_input_tokens_seen": 19578624, + "step": 6215 + }, + { + "epoch": 0.39818193457525125, + "grad_norm": 31.459009170532227, + "learning_rate": 3.980924337472795e-06, + "loss": 0.4655, + "num_input_tokens_seen": 19593408, + "step": 6220 + }, + { + "epoch": 0.39850201651622813, + "grad_norm": 124.02976989746094, + "learning_rate": 3.984124951990783e-06, + "loss": 0.6619, + "num_input_tokens_seen": 19609216, + "step": 6225 + }, + { + "epoch": 0.39882209845720507, + "grad_norm": 46.37904357910156, + "learning_rate": 3.98732556650877e-06, + "loss": 0.578, + "num_input_tokens_seen": 19624896, + "step": 6230 + }, + { + "epoch": 0.39914218039818194, + "grad_norm": 52.72289276123047, + "learning_rate": 3.990526181026758e-06, + "loss": 0.5431, + "num_input_tokens_seen": 19640128, + "step": 6235 + }, + { + "epoch": 0.3994622623391588, + "grad_norm": 31.863994598388672, + "learning_rate": 3.993726795544745e-06, + "loss": 0.3773, + "num_input_tokens_seen": 19655680, + "step": 6240 + }, + { + "epoch": 0.3997823442801357, + "grad_norm": 25.923751831054688, + "learning_rate": 3.996927410062733e-06, + "loss": 0.6672, + "num_input_tokens_seen": 19672192, + "step": 6245 + }, + { + "epoch": 0.4001024262211126, + "grad_norm": 68.67561340332031, + "learning_rate": 4.00012802458072e-06, + "loss": 0.6138, + "num_input_tokens_seen": 19688896, + "step": 6250 + }, + { + "epoch": 0.4004225081620895, + "grad_norm": 20.479108810424805, + "learning_rate": 4.003328639098708e-06, + "loss": 0.4616, + "num_input_tokens_seen": 19704128, + "step": 6255 + }, + { + "epoch": 0.4007425901030664, + "grad_norm": 51.65874099731445, + "learning_rate": 4.006529253616695e-06, + "loss": 0.5449, + "num_input_tokens_seen": 19719744, + "step": 6260 + }, + { + "epoch": 0.4010626720440433, + "grad_norm": 46.15406799316406, + "learning_rate": 4.009729868134683e-06, + "loss": 0.5009, + "num_input_tokens_seen": 19736128, + "step": 6265 + }, + { + "epoch": 0.40138275398502016, + "grad_norm": 34.48747253417969, + "learning_rate": 4.01293048265267e-06, + "loss": 0.5497, + "num_input_tokens_seen": 19751296, + "step": 6270 + }, + { + "epoch": 0.40170283592599704, + "grad_norm": 55.446571350097656, + "learning_rate": 4.016131097170657e-06, + "loss": 0.5978, + "num_input_tokens_seen": 19765888, + "step": 6275 + }, + { + "epoch": 0.402022917866974, + "grad_norm": 29.981290817260742, + "learning_rate": 4.019331711688645e-06, + "loss": 0.5703, + "num_input_tokens_seen": 19780864, + "step": 6280 + }, + { + "epoch": 0.40234299980795085, + "grad_norm": 31.195552825927734, + "learning_rate": 4.022532326206632e-06, + "loss": 0.491, + "num_input_tokens_seen": 19796864, + "step": 6285 + }, + { + "epoch": 0.40266308174892773, + "grad_norm": 32.11478805541992, + "learning_rate": 4.02573294072462e-06, + "loss": 0.5399, + "num_input_tokens_seen": 19811904, + "step": 6290 + }, + { + "epoch": 0.4029831636899046, + "grad_norm": 23.105045318603516, + "learning_rate": 4.028933555242607e-06, + "loss": 0.5094, + "num_input_tokens_seen": 19827456, + "step": 6295 + }, + { + "epoch": 0.4033032456308815, + "grad_norm": 25.628971099853516, + "learning_rate": 4.032134169760595e-06, + "loss": 0.4796, + "num_input_tokens_seen": 19842496, + "step": 6300 + }, + { + "epoch": 0.40362332757185837, + "grad_norm": 46.660701751708984, + "learning_rate": 4.035334784278582e-06, + "loss": 0.5461, + "num_input_tokens_seen": 19858240, + "step": 6305 + }, + { + "epoch": 0.4039434095128353, + "grad_norm": 34.71757125854492, + "learning_rate": 4.0385353987965695e-06, + "loss": 0.4699, + "num_input_tokens_seen": 19874368, + "step": 6310 + }, + { + "epoch": 0.4042634914538122, + "grad_norm": 28.1708984375, + "learning_rate": 4.0417360133145566e-06, + "loss": 0.5295, + "num_input_tokens_seen": 19891904, + "step": 6315 + }, + { + "epoch": 0.40458357339478906, + "grad_norm": 24.817888259887695, + "learning_rate": 4.0449366278325445e-06, + "loss": 0.4789, + "num_input_tokens_seen": 19907392, + "step": 6320 + }, + { + "epoch": 0.40490365533576594, + "grad_norm": 38.02722930908203, + "learning_rate": 4.0481372423505315e-06, + "loss": 0.5229, + "num_input_tokens_seen": 19922624, + "step": 6325 + }, + { + "epoch": 0.4052237372767428, + "grad_norm": 52.55462646484375, + "learning_rate": 4.051337856868519e-06, + "loss": 0.5257, + "num_input_tokens_seen": 19939328, + "step": 6330 + }, + { + "epoch": 0.40554381921771976, + "grad_norm": 41.361427307128906, + "learning_rate": 4.0545384713865064e-06, + "loss": 0.4435, + "num_input_tokens_seen": 19955392, + "step": 6335 + }, + { + "epoch": 0.40586390115869664, + "grad_norm": 39.10175704956055, + "learning_rate": 4.057739085904494e-06, + "loss": 0.5389, + "num_input_tokens_seen": 19971520, + "step": 6340 + }, + { + "epoch": 0.4061839830996735, + "grad_norm": 22.633283615112305, + "learning_rate": 4.060939700422481e-06, + "loss": 0.621, + "num_input_tokens_seen": 19988352, + "step": 6345 + }, + { + "epoch": 0.4065040650406504, + "grad_norm": 50.42052459716797, + "learning_rate": 4.064140314940469e-06, + "loss": 0.5246, + "num_input_tokens_seen": 20005184, + "step": 6350 + }, + { + "epoch": 0.4068241469816273, + "grad_norm": 43.611671447753906, + "learning_rate": 4.067340929458456e-06, + "loss": 0.4853, + "num_input_tokens_seen": 20020800, + "step": 6355 + }, + { + "epoch": 0.4071442289226042, + "grad_norm": 29.161914825439453, + "learning_rate": 4.070541543976444e-06, + "loss": 0.5291, + "num_input_tokens_seen": 20036416, + "step": 6360 + }, + { + "epoch": 0.4074643108635811, + "grad_norm": 29.020750045776367, + "learning_rate": 4.073742158494431e-06, + "loss": 0.5981, + "num_input_tokens_seen": 20053184, + "step": 6365 + }, + { + "epoch": 0.40778439280455797, + "grad_norm": 39.75625991821289, + "learning_rate": 4.076942773012419e-06, + "loss": 0.4081, + "num_input_tokens_seen": 20069888, + "step": 6370 + }, + { + "epoch": 0.40810447474553485, + "grad_norm": 63.50645065307617, + "learning_rate": 4.080143387530406e-06, + "loss": 0.5855, + "num_input_tokens_seen": 20085760, + "step": 6375 + }, + { + "epoch": 0.40842455668651173, + "grad_norm": 43.84078598022461, + "learning_rate": 4.083344002048394e-06, + "loss": 0.4317, + "num_input_tokens_seen": 20101056, + "step": 6380 + }, + { + "epoch": 0.4087446386274886, + "grad_norm": 34.091094970703125, + "learning_rate": 4.086544616566381e-06, + "loss": 0.5518, + "num_input_tokens_seen": 20116352, + "step": 6385 + }, + { + "epoch": 0.40906472056846555, + "grad_norm": 18.827375411987305, + "learning_rate": 4.089745231084368e-06, + "loss": 0.4846, + "num_input_tokens_seen": 20131712, + "step": 6390 + }, + { + "epoch": 0.4093848025094424, + "grad_norm": 51.087890625, + "learning_rate": 4.092945845602356e-06, + "loss": 0.4319, + "num_input_tokens_seen": 20146880, + "step": 6395 + }, + { + "epoch": 0.4097048844504193, + "grad_norm": 19.613656997680664, + "learning_rate": 4.096146460120343e-06, + "loss": 0.3733, + "num_input_tokens_seen": 20163520, + "step": 6400 + }, + { + "epoch": 0.4100249663913962, + "grad_norm": 38.53398513793945, + "learning_rate": 4.099347074638331e-06, + "loss": 0.4392, + "num_input_tokens_seen": 20177984, + "step": 6405 + }, + { + "epoch": 0.41034504833237306, + "grad_norm": 14.655762672424316, + "learning_rate": 4.102547689156318e-06, + "loss": 0.4381, + "num_input_tokens_seen": 20193472, + "step": 6410 + }, + { + "epoch": 0.41066513027335, + "grad_norm": 63.25564956665039, + "learning_rate": 4.105748303674306e-06, + "loss": 0.5462, + "num_input_tokens_seen": 20210624, + "step": 6415 + }, + { + "epoch": 0.4109852122143269, + "grad_norm": 40.789031982421875, + "learning_rate": 4.108948918192293e-06, + "loss": 0.5894, + "num_input_tokens_seen": 20226688, + "step": 6420 + }, + { + "epoch": 0.41130529415530376, + "grad_norm": 31.243993759155273, + "learning_rate": 4.112149532710281e-06, + "loss": 0.4931, + "num_input_tokens_seen": 20244608, + "step": 6425 + }, + { + "epoch": 0.41162537609628064, + "grad_norm": 29.588743209838867, + "learning_rate": 4.115350147228268e-06, + "loss": 0.4032, + "num_input_tokens_seen": 20261184, + "step": 6430 + }, + { + "epoch": 0.4119454580372575, + "grad_norm": 73.75435638427734, + "learning_rate": 4.118550761746256e-06, + "loss": 0.4591, + "num_input_tokens_seen": 20276736, + "step": 6435 + }, + { + "epoch": 0.41226553997823445, + "grad_norm": 26.760740280151367, + "learning_rate": 4.121751376264243e-06, + "loss": 0.5466, + "num_input_tokens_seen": 20293888, + "step": 6440 + }, + { + "epoch": 0.41258562191921133, + "grad_norm": 23.04122543334961, + "learning_rate": 4.124951990782231e-06, + "loss": 0.6605, + "num_input_tokens_seen": 20309696, + "step": 6445 + }, + { + "epoch": 0.4129057038601882, + "grad_norm": 40.109291076660156, + "learning_rate": 4.128152605300218e-06, + "loss": 0.5178, + "num_input_tokens_seen": 20325632, + "step": 6450 + }, + { + "epoch": 0.4132257858011651, + "grad_norm": 18.540939331054688, + "learning_rate": 4.131353219818206e-06, + "loss": 0.4484, + "num_input_tokens_seen": 20341888, + "step": 6455 + }, + { + "epoch": 0.41354586774214197, + "grad_norm": 32.76128005981445, + "learning_rate": 4.134553834336193e-06, + "loss": 0.5741, + "num_input_tokens_seen": 20358656, + "step": 6460 + }, + { + "epoch": 0.4138659496831189, + "grad_norm": 35.129337310791016, + "learning_rate": 4.1377544488541806e-06, + "loss": 0.4933, + "num_input_tokens_seen": 20372864, + "step": 6465 + }, + { + "epoch": 0.4141860316240958, + "grad_norm": 26.483928680419922, + "learning_rate": 4.140955063372168e-06, + "loss": 0.5224, + "num_input_tokens_seen": 20387840, + "step": 6470 + }, + { + "epoch": 0.41450611356507266, + "grad_norm": 29.83782958984375, + "learning_rate": 4.1441556778901555e-06, + "loss": 0.4308, + "num_input_tokens_seen": 20404736, + "step": 6475 + }, + { + "epoch": 0.41482619550604954, + "grad_norm": 28.035940170288086, + "learning_rate": 4.1473562924081426e-06, + "loss": 0.6879, + "num_input_tokens_seen": 20422848, + "step": 6480 + }, + { + "epoch": 0.4151462774470264, + "grad_norm": 37.0591926574707, + "learning_rate": 4.1505569069261304e-06, + "loss": 0.5472, + "num_input_tokens_seen": 20438016, + "step": 6485 + }, + { + "epoch": 0.4154663593880033, + "grad_norm": 30.629589080810547, + "learning_rate": 4.1537575214441175e-06, + "loss": 0.6798, + "num_input_tokens_seen": 20454464, + "step": 6490 + }, + { + "epoch": 0.41578644132898024, + "grad_norm": 21.514545440673828, + "learning_rate": 4.156958135962105e-06, + "loss": 0.4954, + "num_input_tokens_seen": 20470464, + "step": 6495 + }, + { + "epoch": 0.4161065232699571, + "grad_norm": 31.35344123840332, + "learning_rate": 4.160158750480092e-06, + "loss": 0.5747, + "num_input_tokens_seen": 20485696, + "step": 6500 + }, + { + "epoch": 0.416426605210934, + "grad_norm": 19.090286254882812, + "learning_rate": 4.16335936499808e-06, + "loss": 0.4195, + "num_input_tokens_seen": 20500608, + "step": 6505 + }, + { + "epoch": 0.4167466871519109, + "grad_norm": 34.63272476196289, + "learning_rate": 4.166559979516067e-06, + "loss": 0.5438, + "num_input_tokens_seen": 20515776, + "step": 6510 + }, + { + "epoch": 0.41706676909288776, + "grad_norm": 36.324256896972656, + "learning_rate": 4.169760594034054e-06, + "loss": 0.5452, + "num_input_tokens_seen": 20531456, + "step": 6515 + }, + { + "epoch": 0.4173868510338647, + "grad_norm": 26.87264633178711, + "learning_rate": 4.172961208552042e-06, + "loss": 0.5425, + "num_input_tokens_seen": 20547200, + "step": 6520 + }, + { + "epoch": 0.41770693297484157, + "grad_norm": 27.677000045776367, + "learning_rate": 4.176161823070029e-06, + "loss": 0.5122, + "num_input_tokens_seen": 20562560, + "step": 6525 + }, + { + "epoch": 0.41802701491581845, + "grad_norm": 21.735551834106445, + "learning_rate": 4.179362437588017e-06, + "loss": 0.4084, + "num_input_tokens_seen": 20577344, + "step": 6530 + }, + { + "epoch": 0.41834709685679533, + "grad_norm": 37.1097526550293, + "learning_rate": 4.182563052106004e-06, + "loss": 0.4662, + "num_input_tokens_seen": 20593088, + "step": 6535 + }, + { + "epoch": 0.4186671787977722, + "grad_norm": 41.52687454223633, + "learning_rate": 4.185763666623992e-06, + "loss": 0.4794, + "num_input_tokens_seen": 20609792, + "step": 6540 + }, + { + "epoch": 0.41898726073874915, + "grad_norm": 21.161483764648438, + "learning_rate": 4.188964281141979e-06, + "loss": 0.4879, + "num_input_tokens_seen": 20625280, + "step": 6545 + }, + { + "epoch": 0.419307342679726, + "grad_norm": 27.533668518066406, + "learning_rate": 4.192164895659967e-06, + "loss": 0.5136, + "num_input_tokens_seen": 20641984, + "step": 6550 + }, + { + "epoch": 0.4196274246207029, + "grad_norm": 29.93686866760254, + "learning_rate": 4.195365510177954e-06, + "loss": 0.6363, + "num_input_tokens_seen": 20658240, + "step": 6555 + }, + { + "epoch": 0.4199475065616798, + "grad_norm": 33.95027160644531, + "learning_rate": 4.198566124695942e-06, + "loss": 0.5114, + "num_input_tokens_seen": 20673408, + "step": 6560 + }, + { + "epoch": 0.42026758850265666, + "grad_norm": 36.69838333129883, + "learning_rate": 4.201766739213929e-06, + "loss": 0.4934, + "num_input_tokens_seen": 20688960, + "step": 6565 + }, + { + "epoch": 0.42058767044363354, + "grad_norm": 21.692129135131836, + "learning_rate": 4.204967353731917e-06, + "loss": 0.4829, + "num_input_tokens_seen": 20704768, + "step": 6570 + }, + { + "epoch": 0.4209077523846105, + "grad_norm": 28.5656795501709, + "learning_rate": 4.208167968249904e-06, + "loss": 0.4683, + "num_input_tokens_seen": 20719808, + "step": 6575 + }, + { + "epoch": 0.42122783432558736, + "grad_norm": 30.019485473632812, + "learning_rate": 4.211368582767892e-06, + "loss": 0.6623, + "num_input_tokens_seen": 20735680, + "step": 6580 + }, + { + "epoch": 0.42154791626656424, + "grad_norm": 28.171175003051758, + "learning_rate": 4.214569197285879e-06, + "loss": 0.4296, + "num_input_tokens_seen": 20750144, + "step": 6585 + }, + { + "epoch": 0.4218679982075411, + "grad_norm": 34.01813507080078, + "learning_rate": 4.217769811803867e-06, + "loss": 0.473, + "num_input_tokens_seen": 20766016, + "step": 6590 + }, + { + "epoch": 0.422188080148518, + "grad_norm": 39.627681732177734, + "learning_rate": 4.220970426321854e-06, + "loss": 0.6441, + "num_input_tokens_seen": 20782720, + "step": 6595 + }, + { + "epoch": 0.42250816208949493, + "grad_norm": 31.660442352294922, + "learning_rate": 4.224171040839842e-06, + "loss": 0.4494, + "num_input_tokens_seen": 20798208, + "step": 6600 + }, + { + "epoch": 0.4228282440304718, + "grad_norm": 23.503585815429688, + "learning_rate": 4.227371655357829e-06, + "loss": 0.763, + "num_input_tokens_seen": 20813056, + "step": 6605 + }, + { + "epoch": 0.4231483259714487, + "grad_norm": 35.3075065612793, + "learning_rate": 4.230572269875817e-06, + "loss": 0.483, + "num_input_tokens_seen": 20828800, + "step": 6610 + }, + { + "epoch": 0.42346840791242557, + "grad_norm": 44.66595458984375, + "learning_rate": 4.233772884393805e-06, + "loss": 0.5142, + "num_input_tokens_seen": 20844736, + "step": 6615 + }, + { + "epoch": 0.42378848985340245, + "grad_norm": 25.229429244995117, + "learning_rate": 4.236973498911792e-06, + "loss": 0.4001, + "num_input_tokens_seen": 20860672, + "step": 6620 + }, + { + "epoch": 0.4241085717943794, + "grad_norm": 26.181352615356445, + "learning_rate": 4.2401741134297795e-06, + "loss": 0.4318, + "num_input_tokens_seen": 20877184, + "step": 6625 + }, + { + "epoch": 0.42442865373535626, + "grad_norm": 48.75513458251953, + "learning_rate": 4.243374727947766e-06, + "loss": 0.4935, + "num_input_tokens_seen": 20894208, + "step": 6630 + }, + { + "epoch": 0.42474873567633314, + "grad_norm": 44.322242736816406, + "learning_rate": 4.246575342465754e-06, + "loss": 0.5097, + "num_input_tokens_seen": 20909696, + "step": 6635 + }, + { + "epoch": 0.42506881761731, + "grad_norm": 32.69789123535156, + "learning_rate": 4.249775956983741e-06, + "loss": 0.4985, + "num_input_tokens_seen": 20926016, + "step": 6640 + }, + { + "epoch": 0.4253888995582869, + "grad_norm": 20.9238224029541, + "learning_rate": 4.2529765715017285e-06, + "loss": 0.4899, + "num_input_tokens_seen": 20941440, + "step": 6645 + }, + { + "epoch": 0.42570898149926384, + "grad_norm": 31.354585647583008, + "learning_rate": 4.256177186019716e-06, + "loss": 0.5684, + "num_input_tokens_seen": 20956352, + "step": 6650 + }, + { + "epoch": 0.4260290634402407, + "grad_norm": 74.00277709960938, + "learning_rate": 4.2593778005377035e-06, + "loss": 0.541, + "num_input_tokens_seen": 20973056, + "step": 6655 + }, + { + "epoch": 0.4263491453812176, + "grad_norm": 35.57090377807617, + "learning_rate": 4.2625784150556905e-06, + "loss": 0.5376, + "num_input_tokens_seen": 20989248, + "step": 6660 + }, + { + "epoch": 0.4266692273221945, + "grad_norm": 29.46420669555664, + "learning_rate": 4.265779029573678e-06, + "loss": 0.5234, + "num_input_tokens_seen": 21004096, + "step": 6665 + }, + { + "epoch": 0.42698930926317136, + "grad_norm": 24.64568328857422, + "learning_rate": 4.2689796440916655e-06, + "loss": 0.5485, + "num_input_tokens_seen": 21020224, + "step": 6670 + }, + { + "epoch": 0.42730939120414824, + "grad_norm": 26.515810012817383, + "learning_rate": 4.272180258609653e-06, + "loss": 0.4845, + "num_input_tokens_seen": 21035264, + "step": 6675 + }, + { + "epoch": 0.42762947314512517, + "grad_norm": 18.36496925354004, + "learning_rate": 4.275380873127641e-06, + "loss": 0.4489, + "num_input_tokens_seen": 21051904, + "step": 6680 + }, + { + "epoch": 0.42794955508610205, + "grad_norm": 27.265832901000977, + "learning_rate": 4.278581487645628e-06, + "loss": 0.4542, + "num_input_tokens_seen": 21069504, + "step": 6685 + }, + { + "epoch": 0.42826963702707893, + "grad_norm": 40.747772216796875, + "learning_rate": 4.281782102163616e-06, + "loss": 0.569, + "num_input_tokens_seen": 21084288, + "step": 6690 + }, + { + "epoch": 0.4285897189680558, + "grad_norm": 17.71495246887207, + "learning_rate": 4.284982716681603e-06, + "loss": 0.4573, + "num_input_tokens_seen": 21100544, + "step": 6695 + }, + { + "epoch": 0.4289098009090327, + "grad_norm": 33.64828109741211, + "learning_rate": 4.288183331199591e-06, + "loss": 0.52, + "num_input_tokens_seen": 21116608, + "step": 6700 + }, + { + "epoch": 0.4292298828500096, + "grad_norm": 45.03371810913086, + "learning_rate": 4.291383945717578e-06, + "loss": 0.734, + "num_input_tokens_seen": 21131776, + "step": 6705 + }, + { + "epoch": 0.4295499647909865, + "grad_norm": 41.529136657714844, + "learning_rate": 4.294584560235566e-06, + "loss": 0.4443, + "num_input_tokens_seen": 21146368, + "step": 6710 + }, + { + "epoch": 0.4298700467319634, + "grad_norm": 79.04810333251953, + "learning_rate": 4.297785174753553e-06, + "loss": 0.6303, + "num_input_tokens_seen": 21162368, + "step": 6715 + }, + { + "epoch": 0.43019012867294026, + "grad_norm": 28.742454528808594, + "learning_rate": 4.300985789271541e-06, + "loss": 0.5002, + "num_input_tokens_seen": 21179008, + "step": 6720 + }, + { + "epoch": 0.43051021061391714, + "grad_norm": 38.753196716308594, + "learning_rate": 4.304186403789528e-06, + "loss": 0.777, + "num_input_tokens_seen": 21196160, + "step": 6725 + }, + { + "epoch": 0.4308302925548941, + "grad_norm": 32.10105895996094, + "learning_rate": 4.307387018307516e-06, + "loss": 0.5936, + "num_input_tokens_seen": 21212032, + "step": 6730 + }, + { + "epoch": 0.43115037449587096, + "grad_norm": 21.785459518432617, + "learning_rate": 4.310587632825503e-06, + "loss": 0.4063, + "num_input_tokens_seen": 21230272, + "step": 6735 + }, + { + "epoch": 0.43147045643684784, + "grad_norm": 25.455005645751953, + "learning_rate": 4.313788247343491e-06, + "loss": 0.469, + "num_input_tokens_seen": 21245760, + "step": 6740 + }, + { + "epoch": 0.4317905383778247, + "grad_norm": 29.61812400817871, + "learning_rate": 4.316988861861478e-06, + "loss": 0.415, + "num_input_tokens_seen": 21260992, + "step": 6745 + }, + { + "epoch": 0.4321106203188016, + "grad_norm": 22.517724990844727, + "learning_rate": 4.320189476379465e-06, + "loss": 0.3589, + "num_input_tokens_seen": 21275136, + "step": 6750 + }, + { + "epoch": 0.4324307022597785, + "grad_norm": 78.81586456298828, + "learning_rate": 4.323390090897453e-06, + "loss": 0.6596, + "num_input_tokens_seen": 21290560, + "step": 6755 + }, + { + "epoch": 0.4327507842007554, + "grad_norm": 50.769317626953125, + "learning_rate": 4.32659070541544e-06, + "loss": 0.5297, + "num_input_tokens_seen": 21305536, + "step": 6760 + }, + { + "epoch": 0.4330708661417323, + "grad_norm": 44.98862075805664, + "learning_rate": 4.329791319933428e-06, + "loss": 0.4501, + "num_input_tokens_seen": 21320576, + "step": 6765 + }, + { + "epoch": 0.43339094808270917, + "grad_norm": 27.15208625793457, + "learning_rate": 4.332991934451415e-06, + "loss": 0.54, + "num_input_tokens_seen": 21335680, + "step": 6770 + }, + { + "epoch": 0.43371103002368605, + "grad_norm": 26.015419006347656, + "learning_rate": 4.336192548969403e-06, + "loss": 0.4078, + "num_input_tokens_seen": 21351808, + "step": 6775 + }, + { + "epoch": 0.43403111196466293, + "grad_norm": 56.656707763671875, + "learning_rate": 4.33939316348739e-06, + "loss": 0.6034, + "num_input_tokens_seen": 21366464, + "step": 6780 + }, + { + "epoch": 0.43435119390563987, + "grad_norm": 26.747055053710938, + "learning_rate": 4.342593778005378e-06, + "loss": 0.3572, + "num_input_tokens_seen": 21383296, + "step": 6785 + }, + { + "epoch": 0.43467127584661674, + "grad_norm": 52.59417724609375, + "learning_rate": 4.345794392523365e-06, + "loss": 0.5126, + "num_input_tokens_seen": 21399616, + "step": 6790 + }, + { + "epoch": 0.4349913577875936, + "grad_norm": 20.552345275878906, + "learning_rate": 4.3489950070413526e-06, + "loss": 0.5213, + "num_input_tokens_seen": 21414848, + "step": 6795 + }, + { + "epoch": 0.4353114397285705, + "grad_norm": 42.09149169921875, + "learning_rate": 4.35219562155934e-06, + "loss": 0.3968, + "num_input_tokens_seen": 21430144, + "step": 6800 + }, + { + "epoch": 0.4356315216695474, + "grad_norm": 26.702302932739258, + "learning_rate": 4.3553962360773275e-06, + "loss": 0.4789, + "num_input_tokens_seen": 21445248, + "step": 6805 + }, + { + "epoch": 0.4359516036105243, + "grad_norm": 63.777427673339844, + "learning_rate": 4.3585968505953145e-06, + "loss": 0.7234, + "num_input_tokens_seen": 21460544, + "step": 6810 + }, + { + "epoch": 0.4362716855515012, + "grad_norm": 31.23002052307129, + "learning_rate": 4.3617974651133024e-06, + "loss": 0.5245, + "num_input_tokens_seen": 21476224, + "step": 6815 + }, + { + "epoch": 0.4365917674924781, + "grad_norm": 34.06576156616211, + "learning_rate": 4.3649980796312895e-06, + "loss": 0.6448, + "num_input_tokens_seen": 21491712, + "step": 6820 + }, + { + "epoch": 0.43691184943345496, + "grad_norm": 48.40498352050781, + "learning_rate": 4.368198694149277e-06, + "loss": 0.6611, + "num_input_tokens_seen": 21508928, + "step": 6825 + }, + { + "epoch": 0.43723193137443184, + "grad_norm": 40.73934555053711, + "learning_rate": 4.371399308667264e-06, + "loss": 0.5358, + "num_input_tokens_seen": 21523648, + "step": 6830 + }, + { + "epoch": 0.43755201331540877, + "grad_norm": 14.979179382324219, + "learning_rate": 4.374599923185252e-06, + "loss": 0.5569, + "num_input_tokens_seen": 21539264, + "step": 6835 + }, + { + "epoch": 0.43787209525638565, + "grad_norm": 49.92237854003906, + "learning_rate": 4.377800537703239e-06, + "loss": 0.4689, + "num_input_tokens_seen": 21555520, + "step": 6840 + }, + { + "epoch": 0.43819217719736253, + "grad_norm": 33.444183349609375, + "learning_rate": 4.381001152221227e-06, + "loss": 0.6048, + "num_input_tokens_seen": 21570112, + "step": 6845 + }, + { + "epoch": 0.4385122591383394, + "grad_norm": 28.180255889892578, + "learning_rate": 4.384201766739214e-06, + "loss": 0.4128, + "num_input_tokens_seen": 21584768, + "step": 6850 + }, + { + "epoch": 0.4388323410793163, + "grad_norm": 27.91827964782715, + "learning_rate": 4.387402381257202e-06, + "loss": 0.367, + "num_input_tokens_seen": 21599872, + "step": 6855 + }, + { + "epoch": 0.43915242302029317, + "grad_norm": 30.556766510009766, + "learning_rate": 4.390602995775189e-06, + "loss": 0.5798, + "num_input_tokens_seen": 21615808, + "step": 6860 + }, + { + "epoch": 0.4394725049612701, + "grad_norm": 28.222728729248047, + "learning_rate": 4.393803610293177e-06, + "loss": 0.6669, + "num_input_tokens_seen": 21630656, + "step": 6865 + }, + { + "epoch": 0.439792586902247, + "grad_norm": 53.45668029785156, + "learning_rate": 4.397004224811164e-06, + "loss": 0.5259, + "num_input_tokens_seen": 21646976, + "step": 6870 + }, + { + "epoch": 0.44011266884322386, + "grad_norm": 31.034198760986328, + "learning_rate": 4.400204839329151e-06, + "loss": 0.6549, + "num_input_tokens_seen": 21665024, + "step": 6875 + }, + { + "epoch": 0.44043275078420074, + "grad_norm": 18.940208435058594, + "learning_rate": 4.403405453847139e-06, + "loss": 0.5625, + "num_input_tokens_seen": 21680128, + "step": 6880 + }, + { + "epoch": 0.4407528327251776, + "grad_norm": 20.202709197998047, + "learning_rate": 4.406606068365126e-06, + "loss": 0.3949, + "num_input_tokens_seen": 21695808, + "step": 6885 + }, + { + "epoch": 0.44107291466615456, + "grad_norm": 20.441242218017578, + "learning_rate": 4.409806682883114e-06, + "loss": 0.4253, + "num_input_tokens_seen": 21711936, + "step": 6890 + }, + { + "epoch": 0.44139299660713144, + "grad_norm": 45.17290496826172, + "learning_rate": 4.413007297401101e-06, + "loss": 0.5217, + "num_input_tokens_seen": 21727488, + "step": 6895 + }, + { + "epoch": 0.4417130785481083, + "grad_norm": 27.147932052612305, + "learning_rate": 4.416207911919089e-06, + "loss": 0.6068, + "num_input_tokens_seen": 21745280, + "step": 6900 + }, + { + "epoch": 0.4420331604890852, + "grad_norm": 33.51067352294922, + "learning_rate": 4.419408526437076e-06, + "loss": 0.5917, + "num_input_tokens_seen": 21760000, + "step": 6905 + }, + { + "epoch": 0.4423532424300621, + "grad_norm": 23.813228607177734, + "learning_rate": 4.422609140955064e-06, + "loss": 0.4796, + "num_input_tokens_seen": 21775232, + "step": 6910 + }, + { + "epoch": 0.442673324371039, + "grad_norm": 34.836971282958984, + "learning_rate": 4.425809755473051e-06, + "loss": 0.5064, + "num_input_tokens_seen": 21790656, + "step": 6915 + }, + { + "epoch": 0.4429934063120159, + "grad_norm": 50.51862335205078, + "learning_rate": 4.429010369991039e-06, + "loss": 0.5419, + "num_input_tokens_seen": 21806336, + "step": 6920 + }, + { + "epoch": 0.44331348825299277, + "grad_norm": 21.813928604125977, + "learning_rate": 4.432210984509026e-06, + "loss": 0.4058, + "num_input_tokens_seen": 21823744, + "step": 6925 + }, + { + "epoch": 0.44363357019396965, + "grad_norm": 34.028228759765625, + "learning_rate": 4.435411599027014e-06, + "loss": 0.645, + "num_input_tokens_seen": 21839104, + "step": 6930 + }, + { + "epoch": 0.44395365213494653, + "grad_norm": 21.05149269104004, + "learning_rate": 4.438612213545001e-06, + "loss": 0.4493, + "num_input_tokens_seen": 21854528, + "step": 6935 + }, + { + "epoch": 0.4442737340759234, + "grad_norm": 47.86943817138672, + "learning_rate": 4.441812828062989e-06, + "loss": 0.7441, + "num_input_tokens_seen": 21870592, + "step": 6940 + }, + { + "epoch": 0.44459381601690035, + "grad_norm": 48.299591064453125, + "learning_rate": 4.445013442580976e-06, + "loss": 0.5566, + "num_input_tokens_seen": 21885696, + "step": 6945 + }, + { + "epoch": 0.4449138979578772, + "grad_norm": 26.98124122619629, + "learning_rate": 4.448214057098964e-06, + "loss": 0.4665, + "num_input_tokens_seen": 21901248, + "step": 6950 + }, + { + "epoch": 0.4452339798988541, + "grad_norm": 26.4757022857666, + "learning_rate": 4.451414671616951e-06, + "loss": 0.6195, + "num_input_tokens_seen": 21916352, + "step": 6955 + }, + { + "epoch": 0.445554061839831, + "grad_norm": 23.428730010986328, + "learning_rate": 4.4546152861349385e-06, + "loss": 0.4685, + "num_input_tokens_seen": 21931072, + "step": 6960 + }, + { + "epoch": 0.44587414378080786, + "grad_norm": 30.457664489746094, + "learning_rate": 4.457815900652926e-06, + "loss": 0.515, + "num_input_tokens_seen": 21946752, + "step": 6965 + }, + { + "epoch": 0.4461942257217848, + "grad_norm": 55.60489273071289, + "learning_rate": 4.4610165151709135e-06, + "loss": 0.5767, + "num_input_tokens_seen": 21962176, + "step": 6970 + }, + { + "epoch": 0.4465143076627617, + "grad_norm": 13.208145141601562, + "learning_rate": 4.4642171296889005e-06, + "loss": 0.4379, + "num_input_tokens_seen": 21978496, + "step": 6975 + }, + { + "epoch": 0.44683438960373856, + "grad_norm": 31.05898094177246, + "learning_rate": 4.467417744206888e-06, + "loss": 0.6503, + "num_input_tokens_seen": 21992576, + "step": 6980 + }, + { + "epoch": 0.44715447154471544, + "grad_norm": 16.03257942199707, + "learning_rate": 4.4706183587248755e-06, + "loss": 0.5465, + "num_input_tokens_seen": 22008000, + "step": 6985 + }, + { + "epoch": 0.4474745534856923, + "grad_norm": 30.839153289794922, + "learning_rate": 4.4738189732428625e-06, + "loss": 0.7151, + "num_input_tokens_seen": 22022976, + "step": 6990 + }, + { + "epoch": 0.44779463542666925, + "grad_norm": 31.204858779907227, + "learning_rate": 4.47701958776085e-06, + "loss": 0.5282, + "num_input_tokens_seen": 22038144, + "step": 6995 + }, + { + "epoch": 0.44811471736764613, + "grad_norm": 42.70002365112305, + "learning_rate": 4.4802202022788374e-06, + "loss": 0.5655, + "num_input_tokens_seen": 22054016, + "step": 7000 + }, + { + "epoch": 0.448434799308623, + "grad_norm": 28.804901123046875, + "learning_rate": 4.483420816796825e-06, + "loss": 0.4632, + "num_input_tokens_seen": 22073920, + "step": 7005 + }, + { + "epoch": 0.4487548812495999, + "grad_norm": 40.94184112548828, + "learning_rate": 4.486621431314812e-06, + "loss": 0.4887, + "num_input_tokens_seen": 22089728, + "step": 7010 + }, + { + "epoch": 0.44907496319057677, + "grad_norm": 44.04177474975586, + "learning_rate": 4.4898220458328e-06, + "loss": 0.6692, + "num_input_tokens_seen": 22105344, + "step": 7015 + }, + { + "epoch": 0.4493950451315537, + "grad_norm": 31.412391662597656, + "learning_rate": 4.493022660350787e-06, + "loss": 0.5347, + "num_input_tokens_seen": 22121792, + "step": 7020 + }, + { + "epoch": 0.4497151270725306, + "grad_norm": 27.682044982910156, + "learning_rate": 4.496223274868775e-06, + "loss": 0.4648, + "num_input_tokens_seen": 22136128, + "step": 7025 + }, + { + "epoch": 0.45003520901350746, + "grad_norm": 21.034236907958984, + "learning_rate": 4.499423889386762e-06, + "loss": 0.4841, + "num_input_tokens_seen": 22151936, + "step": 7030 + }, + { + "epoch": 0.45035529095448434, + "grad_norm": 24.370620727539062, + "learning_rate": 4.50262450390475e-06, + "loss": 0.3882, + "num_input_tokens_seen": 22168128, + "step": 7035 + }, + { + "epoch": 0.4506753728954612, + "grad_norm": 42.288124084472656, + "learning_rate": 4.505825118422737e-06, + "loss": 0.4943, + "num_input_tokens_seen": 22184512, + "step": 7040 + }, + { + "epoch": 0.4509954548364381, + "grad_norm": 27.482173919677734, + "learning_rate": 4.509025732940725e-06, + "loss": 0.6257, + "num_input_tokens_seen": 22199872, + "step": 7045 + }, + { + "epoch": 0.45131553677741504, + "grad_norm": 21.566308975219727, + "learning_rate": 4.512226347458712e-06, + "loss": 0.5604, + "num_input_tokens_seen": 22216960, + "step": 7050 + }, + { + "epoch": 0.4516356187183919, + "grad_norm": 25.27988052368164, + "learning_rate": 4.5154269619767e-06, + "loss": 0.3448, + "num_input_tokens_seen": 22231488, + "step": 7055 + }, + { + "epoch": 0.4519557006593688, + "grad_norm": 24.321704864501953, + "learning_rate": 4.518627576494687e-06, + "loss": 0.5967, + "num_input_tokens_seen": 22246976, + "step": 7060 + }, + { + "epoch": 0.4522757826003457, + "grad_norm": 16.465486526489258, + "learning_rate": 4.521828191012675e-06, + "loss": 0.4857, + "num_input_tokens_seen": 22263680, + "step": 7065 + }, + { + "epoch": 0.45259586454132256, + "grad_norm": 24.959074020385742, + "learning_rate": 4.525028805530662e-06, + "loss": 0.4252, + "num_input_tokens_seen": 22279552, + "step": 7070 + }, + { + "epoch": 0.4529159464822995, + "grad_norm": 24.44870948791504, + "learning_rate": 4.52822942004865e-06, + "loss": 0.5059, + "num_input_tokens_seen": 22295296, + "step": 7075 + }, + { + "epoch": 0.45323602842327637, + "grad_norm": 44.555702209472656, + "learning_rate": 4.531430034566637e-06, + "loss": 0.5148, + "num_input_tokens_seen": 22310400, + "step": 7080 + }, + { + "epoch": 0.45355611036425325, + "grad_norm": 70.1511001586914, + "learning_rate": 4.534630649084625e-06, + "loss": 0.5518, + "num_input_tokens_seen": 22326848, + "step": 7085 + }, + { + "epoch": 0.45387619230523013, + "grad_norm": 31.22317886352539, + "learning_rate": 4.537831263602613e-06, + "loss": 0.5397, + "num_input_tokens_seen": 22342592, + "step": 7090 + }, + { + "epoch": 0.454196274246207, + "grad_norm": 27.42571449279785, + "learning_rate": 4.5410318781206e-06, + "loss": 0.5794, + "num_input_tokens_seen": 22358912, + "step": 7095 + }, + { + "epoch": 0.45451635618718395, + "grad_norm": 27.755748748779297, + "learning_rate": 4.544232492638588e-06, + "loss": 0.4531, + "num_input_tokens_seen": 22374528, + "step": 7100 + }, + { + "epoch": 0.4548364381281608, + "grad_norm": 14.873360633850098, + "learning_rate": 4.547433107156575e-06, + "loss": 0.4472, + "num_input_tokens_seen": 22389824, + "step": 7105 + }, + { + "epoch": 0.4551565200691377, + "grad_norm": 32.014549255371094, + "learning_rate": 4.550633721674562e-06, + "loss": 0.4667, + "num_input_tokens_seen": 22405376, + "step": 7110 + }, + { + "epoch": 0.4554766020101146, + "grad_norm": 17.425426483154297, + "learning_rate": 4.553834336192549e-06, + "loss": 0.4732, + "num_input_tokens_seen": 22420032, + "step": 7115 + }, + { + "epoch": 0.45579668395109146, + "grad_norm": 29.99361228942871, + "learning_rate": 4.557034950710537e-06, + "loss": 0.5929, + "num_input_tokens_seen": 22434688, + "step": 7120 + }, + { + "epoch": 0.45611676589206834, + "grad_norm": 39.593292236328125, + "learning_rate": 4.560235565228524e-06, + "loss": 0.4998, + "num_input_tokens_seen": 22451072, + "step": 7125 + }, + { + "epoch": 0.4564368478330453, + "grad_norm": 28.673254013061523, + "learning_rate": 4.563436179746512e-06, + "loss": 0.4491, + "num_input_tokens_seen": 22466688, + "step": 7130 + }, + { + "epoch": 0.45675692977402216, + "grad_norm": 24.80447006225586, + "learning_rate": 4.566636794264499e-06, + "loss": 0.5812, + "num_input_tokens_seen": 22482432, + "step": 7135 + }, + { + "epoch": 0.45707701171499904, + "grad_norm": 37.78736114501953, + "learning_rate": 4.5698374087824865e-06, + "loss": 0.4904, + "num_input_tokens_seen": 22496960, + "step": 7140 + }, + { + "epoch": 0.4573970936559759, + "grad_norm": 44.02430725097656, + "learning_rate": 4.5730380233004736e-06, + "loss": 0.5087, + "num_input_tokens_seen": 22512256, + "step": 7145 + }, + { + "epoch": 0.4577171755969528, + "grad_norm": 40.18843078613281, + "learning_rate": 4.5762386378184615e-06, + "loss": 0.5976, + "num_input_tokens_seen": 22528192, + "step": 7150 + }, + { + "epoch": 0.45803725753792973, + "grad_norm": 22.147106170654297, + "learning_rate": 4.579439252336449e-06, + "loss": 0.5641, + "num_input_tokens_seen": 22545408, + "step": 7155 + }, + { + "epoch": 0.4583573394789066, + "grad_norm": 44.94514846801758, + "learning_rate": 4.582639866854436e-06, + "loss": 0.6556, + "num_input_tokens_seen": 22560832, + "step": 7160 + }, + { + "epoch": 0.4586774214198835, + "grad_norm": 45.01123046875, + "learning_rate": 4.585840481372424e-06, + "loss": 0.494, + "num_input_tokens_seen": 22577728, + "step": 7165 + }, + { + "epoch": 0.45899750336086037, + "grad_norm": 31.978303909301758, + "learning_rate": 4.589041095890411e-06, + "loss": 0.5734, + "num_input_tokens_seen": 22594112, + "step": 7170 + }, + { + "epoch": 0.45931758530183725, + "grad_norm": 40.31599807739258, + "learning_rate": 4.592241710408399e-06, + "loss": 0.5078, + "num_input_tokens_seen": 22609728, + "step": 7175 + }, + { + "epoch": 0.4596376672428142, + "grad_norm": 39.5265998840332, + "learning_rate": 4.595442324926386e-06, + "loss": 0.5578, + "num_input_tokens_seen": 22624704, + "step": 7180 + }, + { + "epoch": 0.45995774918379106, + "grad_norm": 32.56690216064453, + "learning_rate": 4.598642939444374e-06, + "loss": 0.4481, + "num_input_tokens_seen": 22640448, + "step": 7185 + }, + { + "epoch": 0.46027783112476794, + "grad_norm": 34.84424591064453, + "learning_rate": 4.601843553962361e-06, + "loss": 0.3506, + "num_input_tokens_seen": 22655680, + "step": 7190 + }, + { + "epoch": 0.4605979130657448, + "grad_norm": 22.94706916809082, + "learning_rate": 4.605044168480349e-06, + "loss": 0.5025, + "num_input_tokens_seen": 22672128, + "step": 7195 + }, + { + "epoch": 0.4609179950067217, + "grad_norm": 46.69490432739258, + "learning_rate": 4.608244782998336e-06, + "loss": 0.5757, + "num_input_tokens_seen": 22687104, + "step": 7200 + }, + { + "epoch": 0.4612380769476986, + "grad_norm": 29.07457733154297, + "learning_rate": 4.611445397516324e-06, + "loss": 0.5052, + "num_input_tokens_seen": 22702976, + "step": 7205 + }, + { + "epoch": 0.4615581588886755, + "grad_norm": 27.508514404296875, + "learning_rate": 4.614646012034311e-06, + "loss": 0.4947, + "num_input_tokens_seen": 22718464, + "step": 7210 + }, + { + "epoch": 0.4618782408296524, + "grad_norm": 21.67587661743164, + "learning_rate": 4.617846626552299e-06, + "loss": 0.4546, + "num_input_tokens_seen": 22734656, + "step": 7215 + }, + { + "epoch": 0.4621983227706293, + "grad_norm": 26.630643844604492, + "learning_rate": 4.621047241070286e-06, + "loss": 0.449, + "num_input_tokens_seen": 22750016, + "step": 7220 + }, + { + "epoch": 0.46251840471160616, + "grad_norm": 60.36968231201172, + "learning_rate": 4.624247855588273e-06, + "loss": 0.4982, + "num_input_tokens_seen": 22765632, + "step": 7225 + }, + { + "epoch": 0.46283848665258304, + "grad_norm": 34.01925277709961, + "learning_rate": 4.627448470106261e-06, + "loss": 0.5168, + "num_input_tokens_seen": 22781184, + "step": 7230 + }, + { + "epoch": 0.46315856859355997, + "grad_norm": 34.64235305786133, + "learning_rate": 4.630649084624248e-06, + "loss": 0.6873, + "num_input_tokens_seen": 22797248, + "step": 7235 + }, + { + "epoch": 0.46347865053453685, + "grad_norm": 18.049989700317383, + "learning_rate": 4.633849699142236e-06, + "loss": 0.4643, + "num_input_tokens_seen": 22812800, + "step": 7240 + }, + { + "epoch": 0.46379873247551373, + "grad_norm": 83.42581939697266, + "learning_rate": 4.637050313660223e-06, + "loss": 0.5374, + "num_input_tokens_seen": 22827200, + "step": 7245 + }, + { + "epoch": 0.4641188144164906, + "grad_norm": 29.076335906982422, + "learning_rate": 4.640250928178211e-06, + "loss": 0.6252, + "num_input_tokens_seen": 22843264, + "step": 7250 + }, + { + "epoch": 0.4644388963574675, + "grad_norm": 56.30864715576172, + "learning_rate": 4.643451542696198e-06, + "loss": 0.5602, + "num_input_tokens_seen": 22858624, + "step": 7255 + }, + { + "epoch": 0.4647589782984444, + "grad_norm": 25.633464813232422, + "learning_rate": 4.646652157214186e-06, + "loss": 0.5698, + "num_input_tokens_seen": 22873856, + "step": 7260 + }, + { + "epoch": 0.4650790602394213, + "grad_norm": 27.811967849731445, + "learning_rate": 4.649852771732173e-06, + "loss": 0.4438, + "num_input_tokens_seen": 22889600, + "step": 7265 + }, + { + "epoch": 0.4653991421803982, + "grad_norm": 38.23081970214844, + "learning_rate": 4.653053386250161e-06, + "loss": 0.5235, + "num_input_tokens_seen": 22904704, + "step": 7270 + }, + { + "epoch": 0.46571922412137506, + "grad_norm": 23.11844253540039, + "learning_rate": 4.656254000768148e-06, + "loss": 0.4328, + "num_input_tokens_seen": 22920384, + "step": 7275 + }, + { + "epoch": 0.46603930606235194, + "grad_norm": 26.480052947998047, + "learning_rate": 4.659454615286136e-06, + "loss": 0.5346, + "num_input_tokens_seen": 22937024, + "step": 7280 + }, + { + "epoch": 0.4663593880033289, + "grad_norm": 20.510231018066406, + "learning_rate": 4.662655229804123e-06, + "loss": 0.5461, + "num_input_tokens_seen": 22954816, + "step": 7285 + }, + { + "epoch": 0.46667946994430576, + "grad_norm": 21.20734214782715, + "learning_rate": 4.6658558443221105e-06, + "loss": 0.3186, + "num_input_tokens_seen": 22969472, + "step": 7290 + }, + { + "epoch": 0.46699955188528264, + "grad_norm": 23.97284507751465, + "learning_rate": 4.669056458840098e-06, + "loss": 0.493, + "num_input_tokens_seen": 22984192, + "step": 7295 + }, + { + "epoch": 0.4673196338262595, + "grad_norm": 12.695852279663086, + "learning_rate": 4.6722570733580855e-06, + "loss": 0.4327, + "num_input_tokens_seen": 22999680, + "step": 7300 + }, + { + "epoch": 0.4676397157672364, + "grad_norm": 45.3319206237793, + "learning_rate": 4.6754576878760725e-06, + "loss": 0.5308, + "num_input_tokens_seen": 23014720, + "step": 7305 + }, + { + "epoch": 0.4679597977082133, + "grad_norm": 61.42768859863281, + "learning_rate": 4.67865830239406e-06, + "loss": 0.5436, + "num_input_tokens_seen": 23030528, + "step": 7310 + }, + { + "epoch": 0.4682798796491902, + "grad_norm": 52.17071533203125, + "learning_rate": 4.6818589169120474e-06, + "loss": 0.5756, + "num_input_tokens_seen": 23046784, + "step": 7315 + }, + { + "epoch": 0.4685999615901671, + "grad_norm": 45.41504669189453, + "learning_rate": 4.685059531430035e-06, + "loss": 0.4716, + "num_input_tokens_seen": 23061632, + "step": 7320 + }, + { + "epoch": 0.46892004353114397, + "grad_norm": 23.899980545043945, + "learning_rate": 4.688260145948022e-06, + "loss": 0.4359, + "num_input_tokens_seen": 23077888, + "step": 7325 + }, + { + "epoch": 0.46924012547212085, + "grad_norm": 24.41979217529297, + "learning_rate": 4.69146076046601e-06, + "loss": 0.5006, + "num_input_tokens_seen": 23093568, + "step": 7330 + }, + { + "epoch": 0.46956020741309773, + "grad_norm": 25.766250610351562, + "learning_rate": 4.694661374983997e-06, + "loss": 0.6237, + "num_input_tokens_seen": 23108992, + "step": 7335 + }, + { + "epoch": 0.46988028935407467, + "grad_norm": 27.431026458740234, + "learning_rate": 4.697861989501985e-06, + "loss": 0.468, + "num_input_tokens_seen": 23124992, + "step": 7340 + }, + { + "epoch": 0.47020037129505154, + "grad_norm": 26.178462982177734, + "learning_rate": 4.701062604019972e-06, + "loss": 0.4068, + "num_input_tokens_seen": 23141888, + "step": 7345 + }, + { + "epoch": 0.4705204532360284, + "grad_norm": 40.35417175292969, + "learning_rate": 4.704263218537959e-06, + "loss": 0.4744, + "num_input_tokens_seen": 23157888, + "step": 7350 + }, + { + "epoch": 0.4708405351770053, + "grad_norm": 37.63656234741211, + "learning_rate": 4.707463833055947e-06, + "loss": 0.3915, + "num_input_tokens_seen": 23172864, + "step": 7355 + }, + { + "epoch": 0.4711606171179822, + "grad_norm": 44.65847396850586, + "learning_rate": 4.710664447573934e-06, + "loss": 0.558, + "num_input_tokens_seen": 23188352, + "step": 7360 + }, + { + "epoch": 0.4714806990589591, + "grad_norm": 30.14317512512207, + "learning_rate": 4.713865062091922e-06, + "loss": 0.4398, + "num_input_tokens_seen": 23204096, + "step": 7365 + }, + { + "epoch": 0.471800780999936, + "grad_norm": 21.03263282775879, + "learning_rate": 4.717065676609909e-06, + "loss": 0.4179, + "num_input_tokens_seen": 23219072, + "step": 7370 + }, + { + "epoch": 0.4721208629409129, + "grad_norm": 19.65338134765625, + "learning_rate": 4.720266291127897e-06, + "loss": 0.5466, + "num_input_tokens_seen": 23234560, + "step": 7375 + }, + { + "epoch": 0.47244094488188976, + "grad_norm": 28.68679428100586, + "learning_rate": 4.723466905645884e-06, + "loss": 0.4228, + "num_input_tokens_seen": 23250304, + "step": 7380 + }, + { + "epoch": 0.47276102682286664, + "grad_norm": 37.535980224609375, + "learning_rate": 4.726667520163872e-06, + "loss": 0.8351, + "num_input_tokens_seen": 23265664, + "step": 7385 + }, + { + "epoch": 0.4730811087638435, + "grad_norm": 31.67320442199707, + "learning_rate": 4.729868134681859e-06, + "loss": 0.6303, + "num_input_tokens_seen": 23281472, + "step": 7390 + }, + { + "epoch": 0.47340119070482045, + "grad_norm": 57.3853759765625, + "learning_rate": 4.733068749199847e-06, + "loss": 0.5959, + "num_input_tokens_seen": 23297024, + "step": 7395 + }, + { + "epoch": 0.47372127264579733, + "grad_norm": 34.57282638549805, + "learning_rate": 4.736269363717834e-06, + "loss": 0.5036, + "num_input_tokens_seen": 23313344, + "step": 7400 + }, + { + "epoch": 0.4740413545867742, + "grad_norm": 26.481918334960938, + "learning_rate": 4.739469978235822e-06, + "loss": 0.5866, + "num_input_tokens_seen": 23328512, + "step": 7405 + }, + { + "epoch": 0.4743614365277511, + "grad_norm": 35.49188232421875, + "learning_rate": 4.742670592753809e-06, + "loss": 0.544, + "num_input_tokens_seen": 23342400, + "step": 7410 + }, + { + "epoch": 0.47468151846872797, + "grad_norm": 30.255807876586914, + "learning_rate": 4.745871207271797e-06, + "loss": 0.5637, + "num_input_tokens_seen": 23358400, + "step": 7415 + }, + { + "epoch": 0.4750016004097049, + "grad_norm": 39.56448745727539, + "learning_rate": 4.749071821789784e-06, + "loss": 0.5588, + "num_input_tokens_seen": 23373376, + "step": 7420 + }, + { + "epoch": 0.4753216823506818, + "grad_norm": 47.08961868286133, + "learning_rate": 4.752272436307772e-06, + "loss": 0.6253, + "num_input_tokens_seen": 23390400, + "step": 7425 + }, + { + "epoch": 0.47564176429165866, + "grad_norm": 41.877105712890625, + "learning_rate": 4.755473050825759e-06, + "loss": 0.4955, + "num_input_tokens_seen": 23406400, + "step": 7430 + }, + { + "epoch": 0.47596184623263554, + "grad_norm": 21.882400512695312, + "learning_rate": 4.758673665343747e-06, + "loss": 0.3974, + "num_input_tokens_seen": 23421824, + "step": 7435 + }, + { + "epoch": 0.4762819281736124, + "grad_norm": 41.77271270751953, + "learning_rate": 4.761874279861734e-06, + "loss": 0.6372, + "num_input_tokens_seen": 23438272, + "step": 7440 + }, + { + "epoch": 0.47660201011458936, + "grad_norm": 121.13290405273438, + "learning_rate": 4.765074894379722e-06, + "loss": 0.576, + "num_input_tokens_seen": 23454976, + "step": 7445 + }, + { + "epoch": 0.47692209205556624, + "grad_norm": 45.91484832763672, + "learning_rate": 4.768275508897709e-06, + "loss": 0.5894, + "num_input_tokens_seen": 23471168, + "step": 7450 + }, + { + "epoch": 0.4772421739965431, + "grad_norm": 51.448909759521484, + "learning_rate": 4.7714761234156965e-06, + "loss": 0.5048, + "num_input_tokens_seen": 23486720, + "step": 7455 + }, + { + "epoch": 0.47756225593752, + "grad_norm": 22.199329376220703, + "learning_rate": 4.7746767379336836e-06, + "loss": 0.3781, + "num_input_tokens_seen": 23502912, + "step": 7460 + }, + { + "epoch": 0.4778823378784969, + "grad_norm": 29.08517074584961, + "learning_rate": 4.777877352451671e-06, + "loss": 0.4494, + "num_input_tokens_seen": 23519552, + "step": 7465 + }, + { + "epoch": 0.4782024198194738, + "grad_norm": 39.66550827026367, + "learning_rate": 4.7810779669696585e-06, + "loss": 0.5935, + "num_input_tokens_seen": 23535936, + "step": 7470 + }, + { + "epoch": 0.4785225017604507, + "grad_norm": 48.53025817871094, + "learning_rate": 4.7842785814876455e-06, + "loss": 0.5767, + "num_input_tokens_seen": 23551040, + "step": 7475 + }, + { + "epoch": 0.47884258370142757, + "grad_norm": 34.987335205078125, + "learning_rate": 4.7874791960056334e-06, + "loss": 0.3451, + "num_input_tokens_seen": 23566208, + "step": 7480 + }, + { + "epoch": 0.47916266564240445, + "grad_norm": 25.38376235961914, + "learning_rate": 4.7906798105236205e-06, + "loss": 0.5582, + "num_input_tokens_seen": 23581696, + "step": 7485 + }, + { + "epoch": 0.47948274758338133, + "grad_norm": 23.17549705505371, + "learning_rate": 4.793880425041608e-06, + "loss": 0.4705, + "num_input_tokens_seen": 23597248, + "step": 7490 + }, + { + "epoch": 0.4798028295243582, + "grad_norm": 50.789127349853516, + "learning_rate": 4.797081039559595e-06, + "loss": 0.7067, + "num_input_tokens_seen": 23612800, + "step": 7495 + }, + { + "epoch": 0.48012291146533514, + "grad_norm": 40.80821990966797, + "learning_rate": 4.800281654077583e-06, + "loss": 0.5679, + "num_input_tokens_seen": 23629056, + "step": 7500 + }, + { + "epoch": 0.480442993406312, + "grad_norm": 42.90043258666992, + "learning_rate": 4.80348226859557e-06, + "loss": 0.5621, + "num_input_tokens_seen": 23644352, + "step": 7505 + }, + { + "epoch": 0.4807630753472889, + "grad_norm": 29.88090705871582, + "learning_rate": 4.806682883113558e-06, + "loss": 0.5127, + "num_input_tokens_seen": 23661120, + "step": 7510 + }, + { + "epoch": 0.4810831572882658, + "grad_norm": 33.3177604675293, + "learning_rate": 4.809883497631545e-06, + "loss": 0.4116, + "num_input_tokens_seen": 23677632, + "step": 7515 + }, + { + "epoch": 0.48140323922924266, + "grad_norm": 18.830358505249023, + "learning_rate": 4.813084112149533e-06, + "loss": 0.4893, + "num_input_tokens_seen": 23693952, + "step": 7520 + }, + { + "epoch": 0.4817233211702196, + "grad_norm": 23.85812759399414, + "learning_rate": 4.81628472666752e-06, + "loss": 0.4928, + "num_input_tokens_seen": 23708736, + "step": 7525 + }, + { + "epoch": 0.4820434031111965, + "grad_norm": 21.893638610839844, + "learning_rate": 4.819485341185508e-06, + "loss": 0.5116, + "num_input_tokens_seen": 23724864, + "step": 7530 + }, + { + "epoch": 0.48236348505217336, + "grad_norm": 25.33639144897461, + "learning_rate": 4.822685955703495e-06, + "loss": 0.4219, + "num_input_tokens_seen": 23740160, + "step": 7535 + }, + { + "epoch": 0.48268356699315024, + "grad_norm": 34.1551628112793, + "learning_rate": 4.825886570221483e-06, + "loss": 0.6203, + "num_input_tokens_seen": 23755264, + "step": 7540 + }, + { + "epoch": 0.4830036489341271, + "grad_norm": 20.326963424682617, + "learning_rate": 4.82908718473947e-06, + "loss": 0.4138, + "num_input_tokens_seen": 23770112, + "step": 7545 + }, + { + "epoch": 0.48332373087510405, + "grad_norm": 44.06591033935547, + "learning_rate": 4.832287799257458e-06, + "loss": 0.5278, + "num_input_tokens_seen": 23785536, + "step": 7550 + }, + { + "epoch": 0.48364381281608093, + "grad_norm": 32.73126983642578, + "learning_rate": 4.835488413775445e-06, + "loss": 0.4705, + "num_input_tokens_seen": 23801280, + "step": 7555 + }, + { + "epoch": 0.4839638947570578, + "grad_norm": 49.01058578491211, + "learning_rate": 4.838689028293433e-06, + "loss": 0.5496, + "num_input_tokens_seen": 23816448, + "step": 7560 + }, + { + "epoch": 0.4842839766980347, + "grad_norm": 20.465591430664062, + "learning_rate": 4.84188964281142e-06, + "loss": 0.6405, + "num_input_tokens_seen": 23832512, + "step": 7565 + }, + { + "epoch": 0.48460405863901157, + "grad_norm": 13.519756317138672, + "learning_rate": 4.845090257329408e-06, + "loss": 0.4612, + "num_input_tokens_seen": 23848192, + "step": 7570 + }, + { + "epoch": 0.48492414057998845, + "grad_norm": 17.371519088745117, + "learning_rate": 4.848290871847396e-06, + "loss": 0.5829, + "num_input_tokens_seen": 23863808, + "step": 7575 + }, + { + "epoch": 0.4852442225209654, + "grad_norm": 26.358009338378906, + "learning_rate": 4.851491486365383e-06, + "loss": 0.4312, + "num_input_tokens_seen": 23880960, + "step": 7580 + }, + { + "epoch": 0.48556430446194226, + "grad_norm": 33.023746490478516, + "learning_rate": 4.85469210088337e-06, + "loss": 0.4587, + "num_input_tokens_seen": 23896256, + "step": 7585 + }, + { + "epoch": 0.48588438640291914, + "grad_norm": 36.188758850097656, + "learning_rate": 4.857892715401357e-06, + "loss": 0.596, + "num_input_tokens_seen": 23912832, + "step": 7590 + }, + { + "epoch": 0.486204468343896, + "grad_norm": 25.22881317138672, + "learning_rate": 4.861093329919345e-06, + "loss": 0.5466, + "num_input_tokens_seen": 23928768, + "step": 7595 + }, + { + "epoch": 0.4865245502848729, + "grad_norm": 36.726802825927734, + "learning_rate": 4.864293944437332e-06, + "loss": 0.5774, + "num_input_tokens_seen": 23945088, + "step": 7600 + }, + { + "epoch": 0.48684463222584984, + "grad_norm": 28.322744369506836, + "learning_rate": 4.86749455895532e-06, + "loss": 0.5805, + "num_input_tokens_seen": 23960192, + "step": 7605 + }, + { + "epoch": 0.4871647141668267, + "grad_norm": 27.07916259765625, + "learning_rate": 4.870695173473307e-06, + "loss": 0.4313, + "num_input_tokens_seen": 23977152, + "step": 7610 + }, + { + "epoch": 0.4874847961078036, + "grad_norm": 56.84762954711914, + "learning_rate": 4.873895787991295e-06, + "loss": 0.457, + "num_input_tokens_seen": 23992448, + "step": 7615 + }, + { + "epoch": 0.4878048780487805, + "grad_norm": 19.95673942565918, + "learning_rate": 4.877096402509282e-06, + "loss": 0.4882, + "num_input_tokens_seen": 24008384, + "step": 7620 + }, + { + "epoch": 0.48812495998975736, + "grad_norm": 29.983524322509766, + "learning_rate": 4.8802970170272696e-06, + "loss": 0.6539, + "num_input_tokens_seen": 24025728, + "step": 7625 + }, + { + "epoch": 0.4884450419307343, + "grad_norm": 24.90115737915039, + "learning_rate": 4.883497631545257e-06, + "loss": 0.5459, + "num_input_tokens_seen": 24040448, + "step": 7630 + }, + { + "epoch": 0.48876512387171117, + "grad_norm": 47.204654693603516, + "learning_rate": 4.8866982460632445e-06, + "loss": 0.5034, + "num_input_tokens_seen": 24055168, + "step": 7635 + }, + { + "epoch": 0.48908520581268805, + "grad_norm": 32.12424850463867, + "learning_rate": 4.889898860581232e-06, + "loss": 0.6462, + "num_input_tokens_seen": 24070016, + "step": 7640 + }, + { + "epoch": 0.48940528775366493, + "grad_norm": 22.129335403442383, + "learning_rate": 4.8930994750992194e-06, + "loss": 0.363, + "num_input_tokens_seen": 24084800, + "step": 7645 + }, + { + "epoch": 0.4897253696946418, + "grad_norm": 39.59850311279297, + "learning_rate": 4.896300089617207e-06, + "loss": 0.5898, + "num_input_tokens_seen": 24100416, + "step": 7650 + }, + { + "epoch": 0.49004545163561875, + "grad_norm": 35.70777893066406, + "learning_rate": 4.899500704135194e-06, + "loss": 0.42, + "num_input_tokens_seen": 24117120, + "step": 7655 + }, + { + "epoch": 0.4903655335765956, + "grad_norm": 27.107698440551758, + "learning_rate": 4.902701318653182e-06, + "loss": 0.5558, + "num_input_tokens_seen": 24132224, + "step": 7660 + }, + { + "epoch": 0.4906856155175725, + "grad_norm": 27.739315032958984, + "learning_rate": 4.905901933171169e-06, + "loss": 0.472, + "num_input_tokens_seen": 24147712, + "step": 7665 + }, + { + "epoch": 0.4910056974585494, + "grad_norm": 49.23149490356445, + "learning_rate": 4.909102547689157e-06, + "loss": 0.4881, + "num_input_tokens_seen": 24164288, + "step": 7670 + }, + { + "epoch": 0.49132577939952626, + "grad_norm": 29.230178833007812, + "learning_rate": 4.912303162207144e-06, + "loss": 0.6297, + "num_input_tokens_seen": 24193728, + "step": 7675 + }, + { + "epoch": 0.49164586134050314, + "grad_norm": 37.29115676879883, + "learning_rate": 4.915503776725132e-06, + "loss": 0.5525, + "num_input_tokens_seen": 24208832, + "step": 7680 + }, + { + "epoch": 0.4919659432814801, + "grad_norm": 37.592708587646484, + "learning_rate": 4.918704391243119e-06, + "loss": 0.617, + "num_input_tokens_seen": 24223424, + "step": 7685 + }, + { + "epoch": 0.49228602522245696, + "grad_norm": 21.325464248657227, + "learning_rate": 4.921905005761107e-06, + "loss": 0.5242, + "num_input_tokens_seen": 24238976, + "step": 7690 + }, + { + "epoch": 0.49260610716343384, + "grad_norm": 26.14690399169922, + "learning_rate": 4.925105620279094e-06, + "loss": 0.4704, + "num_input_tokens_seen": 24253632, + "step": 7695 + }, + { + "epoch": 0.4929261891044107, + "grad_norm": 42.4908447265625, + "learning_rate": 4.928306234797082e-06, + "loss": 0.4639, + "num_input_tokens_seen": 24270720, + "step": 7700 + }, + { + "epoch": 0.4932462710453876, + "grad_norm": 29.21802520751953, + "learning_rate": 4.931506849315069e-06, + "loss": 0.533, + "num_input_tokens_seen": 24285312, + "step": 7705 + }, + { + "epoch": 0.49356635298636453, + "grad_norm": 54.64085388183594, + "learning_rate": 4.934707463833056e-06, + "loss": 0.5732, + "num_input_tokens_seen": 24300352, + "step": 7710 + }, + { + "epoch": 0.4938864349273414, + "grad_norm": 23.30608367919922, + "learning_rate": 4.937908078351044e-06, + "loss": 0.5093, + "num_input_tokens_seen": 24317056, + "step": 7715 + }, + { + "epoch": 0.4942065168683183, + "grad_norm": 28.910512924194336, + "learning_rate": 4.941108692869031e-06, + "loss": 0.4937, + "num_input_tokens_seen": 24333184, + "step": 7720 + }, + { + "epoch": 0.49452659880929517, + "grad_norm": 36.367862701416016, + "learning_rate": 4.944309307387019e-06, + "loss": 0.6299, + "num_input_tokens_seen": 24348672, + "step": 7725 + }, + { + "epoch": 0.49484668075027205, + "grad_norm": 37.77497100830078, + "learning_rate": 4.947509921905006e-06, + "loss": 0.5236, + "num_input_tokens_seen": 24363648, + "step": 7730 + }, + { + "epoch": 0.495166762691249, + "grad_norm": 42.591854095458984, + "learning_rate": 4.950710536422994e-06, + "loss": 0.4852, + "num_input_tokens_seen": 24380992, + "step": 7735 + }, + { + "epoch": 0.49548684463222586, + "grad_norm": 28.563312530517578, + "learning_rate": 4.953911150940981e-06, + "loss": 0.5179, + "num_input_tokens_seen": 24395776, + "step": 7740 + }, + { + "epoch": 0.49580692657320274, + "grad_norm": 19.420364379882812, + "learning_rate": 4.957111765458969e-06, + "loss": 0.4997, + "num_input_tokens_seen": 24411584, + "step": 7745 + }, + { + "epoch": 0.4961270085141796, + "grad_norm": 41.12648010253906, + "learning_rate": 4.960312379976956e-06, + "loss": 0.7475, + "num_input_tokens_seen": 24426560, + "step": 7750 + }, + { + "epoch": 0.4964470904551565, + "grad_norm": 23.768217086791992, + "learning_rate": 4.963512994494944e-06, + "loss": 0.6749, + "num_input_tokens_seen": 24441856, + "step": 7755 + }, + { + "epoch": 0.4967671723961334, + "grad_norm": 24.844717025756836, + "learning_rate": 4.966713609012931e-06, + "loss": 0.4507, + "num_input_tokens_seen": 24457472, + "step": 7760 + }, + { + "epoch": 0.4970872543371103, + "grad_norm": 78.31437683105469, + "learning_rate": 4.969914223530919e-06, + "loss": 0.6491, + "num_input_tokens_seen": 24471936, + "step": 7765 + }, + { + "epoch": 0.4974073362780872, + "grad_norm": 33.941471099853516, + "learning_rate": 4.973114838048906e-06, + "loss": 0.4794, + "num_input_tokens_seen": 24487616, + "step": 7770 + }, + { + "epoch": 0.4977274182190641, + "grad_norm": 41.977115631103516, + "learning_rate": 4.9763154525668936e-06, + "loss": 0.6671, + "num_input_tokens_seen": 24503936, + "step": 7775 + }, + { + "epoch": 0.49804750016004096, + "grad_norm": 30.50855827331543, + "learning_rate": 4.979516067084881e-06, + "loss": 0.6155, + "num_input_tokens_seen": 24518464, + "step": 7780 + }, + { + "epoch": 0.49836758210101784, + "grad_norm": 28.7293701171875, + "learning_rate": 4.9827166816028685e-06, + "loss": 0.3628, + "num_input_tokens_seen": 24533184, + "step": 7785 + }, + { + "epoch": 0.49868766404199477, + "grad_norm": 27.17364501953125, + "learning_rate": 4.9859172961208556e-06, + "loss": 0.5781, + "num_input_tokens_seen": 24548992, + "step": 7790 + }, + { + "epoch": 0.49900774598297165, + "grad_norm": 30.336999893188477, + "learning_rate": 4.9891179106388434e-06, + "loss": 0.5001, + "num_input_tokens_seen": 24566592, + "step": 7795 + }, + { + "epoch": 0.49932782792394853, + "grad_norm": 32.90946960449219, + "learning_rate": 4.9923185251568305e-06, + "loss": 0.6452, + "num_input_tokens_seen": 24583552, + "step": 7800 + }, + { + "epoch": 0.4996479098649254, + "grad_norm": 12.126652717590332, + "learning_rate": 4.995519139674818e-06, + "loss": 0.5224, + "num_input_tokens_seen": 24599552, + "step": 7805 + }, + { + "epoch": 0.4999679918059023, + "grad_norm": 45.299739837646484, + "learning_rate": 4.998719754192805e-06, + "loss": 0.4689, + "num_input_tokens_seen": 24614912, + "step": 7810 + }, + { + "epoch": 0.5000960245822931, + "eval_loss": 0.5265359282493591, + "eval_runtime": 51.0921, + "eval_samples_per_second": 271.784, + "eval_steps_per_second": 33.978, + "num_input_tokens_seen": 24620672, + "step": 7812 + }, + { + "epoch": 0.5002880737468792, + "grad_norm": 29.955860137939453, + "learning_rate": 4.999999977529344e-06, + "loss": 0.4975, + "num_input_tokens_seen": 24629696, + "step": 7815 + }, + { + "epoch": 0.5006081556878561, + "grad_norm": 26.357284545898438, + "learning_rate": 4.99999984020867e-06, + "loss": 0.5334, + "num_input_tokens_seen": 24646208, + "step": 7820 + }, + { + "epoch": 0.500928237628833, + "grad_norm": 40.533111572265625, + "learning_rate": 4.999999578051025e-06, + "loss": 0.6436, + "num_input_tokens_seen": 24661120, + "step": 7825 + }, + { + "epoch": 0.5012483195698099, + "grad_norm": 40.62910842895508, + "learning_rate": 4.999999191056423e-06, + "loss": 0.5722, + "num_input_tokens_seen": 24678336, + "step": 7830 + }, + { + "epoch": 0.5015684015107867, + "grad_norm": 28.745250701904297, + "learning_rate": 4.999998679224884e-06, + "loss": 0.6323, + "num_input_tokens_seen": 24694336, + "step": 7835 + }, + { + "epoch": 0.5018884834517636, + "grad_norm": 23.679622650146484, + "learning_rate": 4.999998042556433e-06, + "loss": 0.4604, + "num_input_tokens_seen": 24708608, + "step": 7840 + }, + { + "epoch": 0.5022085653927405, + "grad_norm": 35.468650817871094, + "learning_rate": 4.999997281051102e-06, + "loss": 0.5815, + "num_input_tokens_seen": 24724672, + "step": 7845 + }, + { + "epoch": 0.5025286473337174, + "grad_norm": 33.45244216918945, + "learning_rate": 4.999996394708928e-06, + "loss": 0.5677, + "num_input_tokens_seen": 24740608, + "step": 7850 + }, + { + "epoch": 0.5028487292746944, + "grad_norm": 33.69066619873047, + "learning_rate": 4.999995383529957e-06, + "loss": 0.5456, + "num_input_tokens_seen": 24758528, + "step": 7855 + }, + { + "epoch": 0.5031688112156713, + "grad_norm": 21.258798599243164, + "learning_rate": 4.9999942475142375e-06, + "loss": 0.4849, + "num_input_tokens_seen": 24772992, + "step": 7860 + }, + { + "epoch": 0.5034888931566481, + "grad_norm": 23.78034210205078, + "learning_rate": 4.999992986661828e-06, + "loss": 0.5574, + "num_input_tokens_seen": 24788160, + "step": 7865 + }, + { + "epoch": 0.503808975097625, + "grad_norm": 41.02124786376953, + "learning_rate": 4.99999160097279e-06, + "loss": 0.5406, + "num_input_tokens_seen": 24804224, + "step": 7870 + }, + { + "epoch": 0.5041290570386019, + "grad_norm": 28.90238380432129, + "learning_rate": 4.999990090447194e-06, + "loss": 0.6051, + "num_input_tokens_seen": 24820928, + "step": 7875 + }, + { + "epoch": 0.5044491389795788, + "grad_norm": 31.764989852905273, + "learning_rate": 4.9999884550851155e-06, + "loss": 0.5021, + "num_input_tokens_seen": 24835776, + "step": 7880 + }, + { + "epoch": 0.5047692209205557, + "grad_norm": 29.527854919433594, + "learning_rate": 4.999986694886634e-06, + "loss": 0.4682, + "num_input_tokens_seen": 24851648, + "step": 7885 + }, + { + "epoch": 0.5050893028615325, + "grad_norm": 56.120643615722656, + "learning_rate": 4.99998480985184e-06, + "loss": 0.5255, + "num_input_tokens_seen": 24866880, + "step": 7890 + }, + { + "epoch": 0.5054093848025094, + "grad_norm": 26.88783836364746, + "learning_rate": 4.9999827999808256e-06, + "loss": 0.4494, + "num_input_tokens_seen": 24881856, + "step": 7895 + }, + { + "epoch": 0.5057294667434863, + "grad_norm": 42.35232925415039, + "learning_rate": 4.999980665273693e-06, + "loss": 0.5709, + "num_input_tokens_seen": 24898560, + "step": 7900 + }, + { + "epoch": 0.5060495486844632, + "grad_norm": 20.523231506347656, + "learning_rate": 4.999978405730548e-06, + "loss": 0.4448, + "num_input_tokens_seen": 24913216, + "step": 7905 + }, + { + "epoch": 0.5063696306254402, + "grad_norm": 34.744712829589844, + "learning_rate": 4.999976021351503e-06, + "loss": 0.6069, + "num_input_tokens_seen": 24928832, + "step": 7910 + }, + { + "epoch": 0.506689712566417, + "grad_norm": 46.477134704589844, + "learning_rate": 4.999973512136677e-06, + "loss": 0.6528, + "num_input_tokens_seen": 24944512, + "step": 7915 + }, + { + "epoch": 0.5070097945073939, + "grad_norm": 38.694374084472656, + "learning_rate": 4.999970878086197e-06, + "loss": 0.6276, + "num_input_tokens_seen": 24959744, + "step": 7920 + }, + { + "epoch": 0.5073298764483708, + "grad_norm": 44.43246078491211, + "learning_rate": 4.999968119200192e-06, + "loss": 0.5793, + "num_input_tokens_seen": 24975488, + "step": 7925 + }, + { + "epoch": 0.5076499583893477, + "grad_norm": 23.746732711791992, + "learning_rate": 4.999965235478801e-06, + "loss": 0.5143, + "num_input_tokens_seen": 24990272, + "step": 7930 + }, + { + "epoch": 0.5079700403303246, + "grad_norm": 32.33531188964844, + "learning_rate": 4.9999622269221685e-06, + "loss": 0.3795, + "num_input_tokens_seen": 25006272, + "step": 7935 + }, + { + "epoch": 0.5082901222713014, + "grad_norm": 33.01438903808594, + "learning_rate": 4.9999590935304435e-06, + "loss": 0.4828, + "num_input_tokens_seen": 25022464, + "step": 7940 + }, + { + "epoch": 0.5086102042122783, + "grad_norm": 20.03788185119629, + "learning_rate": 4.999955835303784e-06, + "loss": 0.4114, + "num_input_tokens_seen": 25037376, + "step": 7945 + }, + { + "epoch": 0.5089302861532552, + "grad_norm": 32.96141815185547, + "learning_rate": 4.999952452242352e-06, + "loss": 0.5628, + "num_input_tokens_seen": 25053440, + "step": 7950 + }, + { + "epoch": 0.5092503680942321, + "grad_norm": 21.934057235717773, + "learning_rate": 4.999948944346315e-06, + "loss": 0.6121, + "num_input_tokens_seen": 25069120, + "step": 7955 + }, + { + "epoch": 0.5095704500352091, + "grad_norm": 27.31989288330078, + "learning_rate": 4.9999453116158505e-06, + "loss": 0.4008, + "num_input_tokens_seen": 25084224, + "step": 7960 + }, + { + "epoch": 0.509890531976186, + "grad_norm": 59.944644927978516, + "learning_rate": 4.999941554051139e-06, + "loss": 0.6273, + "num_input_tokens_seen": 25099520, + "step": 7965 + }, + { + "epoch": 0.5102106139171628, + "grad_norm": 35.90625762939453, + "learning_rate": 4.999937671652367e-06, + "loss": 0.5022, + "num_input_tokens_seen": 25115200, + "step": 7970 + }, + { + "epoch": 0.5105306958581397, + "grad_norm": 15.83523178100586, + "learning_rate": 4.9999336644197306e-06, + "loss": 0.4842, + "num_input_tokens_seen": 25131520, + "step": 7975 + }, + { + "epoch": 0.5108507777991166, + "grad_norm": 45.297119140625, + "learning_rate": 4.999929532353428e-06, + "loss": 0.6849, + "num_input_tokens_seen": 25147776, + "step": 7980 + }, + { + "epoch": 0.5111708597400935, + "grad_norm": 28.77227783203125, + "learning_rate": 4.999925275453666e-06, + "loss": 0.5415, + "num_input_tokens_seen": 25162816, + "step": 7985 + }, + { + "epoch": 0.5114909416810703, + "grad_norm": 26.189369201660156, + "learning_rate": 4.999920893720659e-06, + "loss": 0.4387, + "num_input_tokens_seen": 25178752, + "step": 7990 + }, + { + "epoch": 0.5118110236220472, + "grad_norm": 34.21223831176758, + "learning_rate": 4.999916387154622e-06, + "loss": 0.5952, + "num_input_tokens_seen": 25194048, + "step": 7995 + }, + { + "epoch": 0.5121311055630241, + "grad_norm": 20.14518928527832, + "learning_rate": 4.999911755755783e-06, + "loss": 0.4733, + "num_input_tokens_seen": 25209216, + "step": 8000 + }, + { + "epoch": 0.512451187504001, + "grad_norm": 31.22332000732422, + "learning_rate": 4.999906999524373e-06, + "loss": 0.6132, + "num_input_tokens_seen": 25224640, + "step": 8005 + }, + { + "epoch": 0.5127712694449779, + "grad_norm": 27.335357666015625, + "learning_rate": 4.999902118460629e-06, + "loss": 0.3993, + "num_input_tokens_seen": 25239808, + "step": 8010 + }, + { + "epoch": 0.5130913513859549, + "grad_norm": 31.22426986694336, + "learning_rate": 4.999897112564795e-06, + "loss": 0.4945, + "num_input_tokens_seen": 25257344, + "step": 8015 + }, + { + "epoch": 0.5134114333269317, + "grad_norm": 32.318138122558594, + "learning_rate": 4.999891981837119e-06, + "loss": 0.4241, + "num_input_tokens_seen": 25272064, + "step": 8020 + }, + { + "epoch": 0.5137315152679086, + "grad_norm": 27.342021942138672, + "learning_rate": 4.99988672627786e-06, + "loss": 0.4864, + "num_input_tokens_seen": 25287936, + "step": 8025 + }, + { + "epoch": 0.5140515972088855, + "grad_norm": 21.021343231201172, + "learning_rate": 4.9998813458872795e-06, + "loss": 0.4549, + "num_input_tokens_seen": 25303232, + "step": 8030 + }, + { + "epoch": 0.5143716791498624, + "grad_norm": 27.553861618041992, + "learning_rate": 4.999875840665646e-06, + "loss": 0.4298, + "num_input_tokens_seen": 25318656, + "step": 8035 + }, + { + "epoch": 0.5146917610908393, + "grad_norm": 45.53428649902344, + "learning_rate": 4.9998702106132336e-06, + "loss": 0.486, + "num_input_tokens_seen": 25333760, + "step": 8040 + }, + { + "epoch": 0.5150118430318161, + "grad_norm": 18.647144317626953, + "learning_rate": 4.999864455730324e-06, + "loss": 0.5555, + "num_input_tokens_seen": 25349824, + "step": 8045 + }, + { + "epoch": 0.515331924972793, + "grad_norm": 24.088735580444336, + "learning_rate": 4.9998585760172065e-06, + "loss": 0.4811, + "num_input_tokens_seen": 25365376, + "step": 8050 + }, + { + "epoch": 0.5156520069137699, + "grad_norm": 32.16999053955078, + "learning_rate": 4.999852571474172e-06, + "loss": 0.5462, + "num_input_tokens_seen": 25380928, + "step": 8055 + }, + { + "epoch": 0.5159720888547468, + "grad_norm": 24.900938034057617, + "learning_rate": 4.999846442101522e-06, + "loss": 0.4673, + "num_input_tokens_seen": 25396352, + "step": 8060 + }, + { + "epoch": 0.5162921707957238, + "grad_norm": 25.337465286254883, + "learning_rate": 4.999840187899561e-06, + "loss": 0.5058, + "num_input_tokens_seen": 25411456, + "step": 8065 + }, + { + "epoch": 0.5166122527367006, + "grad_norm": 21.813234329223633, + "learning_rate": 4.9998338088686024e-06, + "loss": 0.4942, + "num_input_tokens_seen": 25428544, + "step": 8070 + }, + { + "epoch": 0.5169323346776775, + "grad_norm": 40.269752502441406, + "learning_rate": 4.999827305008964e-06, + "loss": 0.5725, + "num_input_tokens_seen": 25443968, + "step": 8075 + }, + { + "epoch": 0.5172524166186544, + "grad_norm": 47.26615524291992, + "learning_rate": 4.999820676320972e-06, + "loss": 0.4588, + "num_input_tokens_seen": 25458880, + "step": 8080 + }, + { + "epoch": 0.5175724985596313, + "grad_norm": 34.614559173583984, + "learning_rate": 4.999813922804956e-06, + "loss": 0.3874, + "num_input_tokens_seen": 25474304, + "step": 8085 + }, + { + "epoch": 0.5178925805006082, + "grad_norm": 38.960391998291016, + "learning_rate": 4.999807044461255e-06, + "loss": 0.6068, + "num_input_tokens_seen": 25489472, + "step": 8090 + }, + { + "epoch": 0.518212662441585, + "grad_norm": 34.54029846191406, + "learning_rate": 4.99980004129021e-06, + "loss": 0.488, + "num_input_tokens_seen": 25504896, + "step": 8095 + }, + { + "epoch": 0.5185327443825619, + "grad_norm": 42.57952117919922, + "learning_rate": 4.999792913292172e-06, + "loss": 0.5093, + "num_input_tokens_seen": 25519360, + "step": 8100 + }, + { + "epoch": 0.5188528263235388, + "grad_norm": 79.23836517333984, + "learning_rate": 4.999785660467496e-06, + "loss": 0.6216, + "num_input_tokens_seen": 25535680, + "step": 8105 + }, + { + "epoch": 0.5191729082645157, + "grad_norm": 32.82673263549805, + "learning_rate": 4.999778282816547e-06, + "loss": 0.5825, + "num_input_tokens_seen": 25550592, + "step": 8110 + }, + { + "epoch": 0.5194929902054926, + "grad_norm": 40.820716857910156, + "learning_rate": 4.99977078033969e-06, + "loss": 0.5011, + "num_input_tokens_seen": 25565696, + "step": 8115 + }, + { + "epoch": 0.5198130721464695, + "grad_norm": 18.4709415435791, + "learning_rate": 4.999763153037302e-06, + "loss": 0.4527, + "num_input_tokens_seen": 25581568, + "step": 8120 + }, + { + "epoch": 0.5201331540874464, + "grad_norm": 50.47328567504883, + "learning_rate": 4.9997554009097625e-06, + "loss": 0.6174, + "num_input_tokens_seen": 25597760, + "step": 8125 + }, + { + "epoch": 0.5204532360284233, + "grad_norm": 24.329042434692383, + "learning_rate": 4.999747523957459e-06, + "loss": 0.534, + "num_input_tokens_seen": 25615424, + "step": 8130 + }, + { + "epoch": 0.5207733179694002, + "grad_norm": 31.145105361938477, + "learning_rate": 4.9997395221807845e-06, + "loss": 0.5229, + "num_input_tokens_seen": 25630720, + "step": 8135 + }, + { + "epoch": 0.5210933999103771, + "grad_norm": 43.02310562133789, + "learning_rate": 4.99973139558014e-06, + "loss": 0.5855, + "num_input_tokens_seen": 25649280, + "step": 8140 + }, + { + "epoch": 0.5214134818513539, + "grad_norm": 40.13945007324219, + "learning_rate": 4.999723144155929e-06, + "loss": 0.6326, + "num_input_tokens_seen": 25664576, + "step": 8145 + }, + { + "epoch": 0.5217335637923308, + "grad_norm": 24.301637649536133, + "learning_rate": 4.999714767908565e-06, + "loss": 0.5045, + "num_input_tokens_seen": 25680384, + "step": 8150 + }, + { + "epoch": 0.5220536457333077, + "grad_norm": 21.952287673950195, + "learning_rate": 4.999706266838466e-06, + "loss": 0.466, + "num_input_tokens_seen": 25696128, + "step": 8155 + }, + { + "epoch": 0.5223737276742846, + "grad_norm": 17.345703125, + "learning_rate": 4.9996976409460575e-06, + "loss": 0.4224, + "num_input_tokens_seen": 25712768, + "step": 8160 + }, + { + "epoch": 0.5226938096152615, + "grad_norm": 28.340557098388672, + "learning_rate": 4.9996888902317684e-06, + "loss": 0.607, + "num_input_tokens_seen": 25727616, + "step": 8165 + }, + { + "epoch": 0.5230138915562383, + "grad_norm": 22.099363327026367, + "learning_rate": 4.999680014696037e-06, + "loss": 0.4967, + "num_input_tokens_seen": 25744384, + "step": 8170 + }, + { + "epoch": 0.5233339734972153, + "grad_norm": 34.076744079589844, + "learning_rate": 4.999671014339306e-06, + "loss": 0.434, + "num_input_tokens_seen": 25760640, + "step": 8175 + }, + { + "epoch": 0.5236540554381922, + "grad_norm": 30.524316787719727, + "learning_rate": 4.999661889162024e-06, + "loss": 0.4633, + "num_input_tokens_seen": 25776640, + "step": 8180 + }, + { + "epoch": 0.5239741373791691, + "grad_norm": 31.973146438598633, + "learning_rate": 4.999652639164648e-06, + "loss": 0.4204, + "num_input_tokens_seen": 25791744, + "step": 8185 + }, + { + "epoch": 0.524294219320146, + "grad_norm": 40.61989974975586, + "learning_rate": 4.99964326434764e-06, + "loss": 0.7351, + "num_input_tokens_seen": 25808256, + "step": 8190 + }, + { + "epoch": 0.5246143012611229, + "grad_norm": 37.38151550292969, + "learning_rate": 4.9996337647114675e-06, + "loss": 0.4311, + "num_input_tokens_seen": 25824128, + "step": 8195 + }, + { + "epoch": 0.5249343832020997, + "grad_norm": 31.975629806518555, + "learning_rate": 4.999624140256605e-06, + "loss": 0.4729, + "num_input_tokens_seen": 25840576, + "step": 8200 + }, + { + "epoch": 0.5252544651430766, + "grad_norm": 23.774778366088867, + "learning_rate": 4.999614390983532e-06, + "loss": 0.3891, + "num_input_tokens_seen": 25856256, + "step": 8205 + }, + { + "epoch": 0.5255745470840535, + "grad_norm": 18.606239318847656, + "learning_rate": 4.9996045168927365e-06, + "loss": 0.4825, + "num_input_tokens_seen": 25871424, + "step": 8210 + }, + { + "epoch": 0.5258946290250304, + "grad_norm": 31.731996536254883, + "learning_rate": 4.999594517984711e-06, + "loss": 0.619, + "num_input_tokens_seen": 25886272, + "step": 8215 + }, + { + "epoch": 0.5262147109660072, + "grad_norm": 56.08704376220703, + "learning_rate": 4.999584394259956e-06, + "loss": 0.7025, + "num_input_tokens_seen": 25901952, + "step": 8220 + }, + { + "epoch": 0.5265347929069842, + "grad_norm": 22.057924270629883, + "learning_rate": 4.999574145718977e-06, + "loss": 0.4938, + "num_input_tokens_seen": 25917888, + "step": 8225 + }, + { + "epoch": 0.5268548748479611, + "grad_norm": 39.583553314208984, + "learning_rate": 4.9995637723622834e-06, + "loss": 0.4671, + "num_input_tokens_seen": 25933824, + "step": 8230 + }, + { + "epoch": 0.527174956788938, + "grad_norm": 26.24613380432129, + "learning_rate": 4.999553274190396e-06, + "loss": 0.5235, + "num_input_tokens_seen": 25951360, + "step": 8235 + }, + { + "epoch": 0.5274950387299149, + "grad_norm": 46.1359748840332, + "learning_rate": 4.999542651203837e-06, + "loss": 0.4596, + "num_input_tokens_seen": 25967232, + "step": 8240 + }, + { + "epoch": 0.5278151206708918, + "grad_norm": 26.273548126220703, + "learning_rate": 4.999531903403138e-06, + "loss": 0.5693, + "num_input_tokens_seen": 25982592, + "step": 8245 + }, + { + "epoch": 0.5281352026118686, + "grad_norm": 61.08652877807617, + "learning_rate": 4.9995210307888354e-06, + "loss": 0.5243, + "num_input_tokens_seen": 25997824, + "step": 8250 + }, + { + "epoch": 0.5284552845528455, + "grad_norm": 25.97014617919922, + "learning_rate": 4.999510033361472e-06, + "loss": 0.5829, + "num_input_tokens_seen": 26013632, + "step": 8255 + }, + { + "epoch": 0.5287753664938224, + "grad_norm": 24.3104190826416, + "learning_rate": 4.999498911121596e-06, + "loss": 0.4653, + "num_input_tokens_seen": 26028800, + "step": 8260 + }, + { + "epoch": 0.5290954484347993, + "grad_norm": 22.537748336791992, + "learning_rate": 4.999487664069765e-06, + "loss": 0.455, + "num_input_tokens_seen": 26044672, + "step": 8265 + }, + { + "epoch": 0.5294155303757762, + "grad_norm": 24.322023391723633, + "learning_rate": 4.999476292206539e-06, + "loss": 0.5519, + "num_input_tokens_seen": 26060544, + "step": 8270 + }, + { + "epoch": 0.529735612316753, + "grad_norm": 13.749019622802734, + "learning_rate": 4.999464795532487e-06, + "loss": 0.5508, + "num_input_tokens_seen": 26076160, + "step": 8275 + }, + { + "epoch": 0.53005569425773, + "grad_norm": 29.94706916809082, + "learning_rate": 4.999453174048181e-06, + "loss": 0.512, + "num_input_tokens_seen": 26092352, + "step": 8280 + }, + { + "epoch": 0.5303757761987069, + "grad_norm": 56.76931381225586, + "learning_rate": 4.999441427754204e-06, + "loss": 0.4312, + "num_input_tokens_seen": 26107776, + "step": 8285 + }, + { + "epoch": 0.5306958581396838, + "grad_norm": 36.50358581542969, + "learning_rate": 4.9994295566511405e-06, + "loss": 0.4546, + "num_input_tokens_seen": 26123712, + "step": 8290 + }, + { + "epoch": 0.5310159400806607, + "grad_norm": 39.9493408203125, + "learning_rate": 4.9994175607395835e-06, + "loss": 0.534, + "num_input_tokens_seen": 26139200, + "step": 8295 + }, + { + "epoch": 0.5313360220216375, + "grad_norm": 22.39226722717285, + "learning_rate": 4.999405440020133e-06, + "loss": 0.5576, + "num_input_tokens_seen": 26153600, + "step": 8300 + }, + { + "epoch": 0.5316561039626144, + "grad_norm": 27.96100425720215, + "learning_rate": 4.999393194493395e-06, + "loss": 0.6323, + "num_input_tokens_seen": 26169600, + "step": 8305 + }, + { + "epoch": 0.5319761859035913, + "grad_norm": 25.633983612060547, + "learning_rate": 4.999380824159978e-06, + "loss": 0.4154, + "num_input_tokens_seen": 26185536, + "step": 8310 + }, + { + "epoch": 0.5322962678445682, + "grad_norm": 21.195772171020508, + "learning_rate": 4.999368329020502e-06, + "loss": 0.4978, + "num_input_tokens_seen": 26201536, + "step": 8315 + }, + { + "epoch": 0.5326163497855451, + "grad_norm": 19.12188720703125, + "learning_rate": 4.99935570907559e-06, + "loss": 0.4387, + "num_input_tokens_seen": 26216576, + "step": 8320 + }, + { + "epoch": 0.5329364317265219, + "grad_norm": 30.31956672668457, + "learning_rate": 4.999342964325873e-06, + "loss": 0.3684, + "num_input_tokens_seen": 26232640, + "step": 8325 + }, + { + "epoch": 0.5332565136674989, + "grad_norm": 23.365747451782227, + "learning_rate": 4.999330094771987e-06, + "loss": 0.5969, + "num_input_tokens_seen": 26248448, + "step": 8330 + }, + { + "epoch": 0.5335765956084758, + "grad_norm": 34.33578872680664, + "learning_rate": 4.999317100414575e-06, + "loss": 0.5528, + "num_input_tokens_seen": 26263872, + "step": 8335 + }, + { + "epoch": 0.5338966775494527, + "grad_norm": 34.22898864746094, + "learning_rate": 4.999303981254284e-06, + "loss": 0.594, + "num_input_tokens_seen": 26279872, + "step": 8340 + }, + { + "epoch": 0.5342167594904296, + "grad_norm": 31.759153366088867, + "learning_rate": 4.999290737291772e-06, + "loss": 0.4841, + "num_input_tokens_seen": 26295680, + "step": 8345 + }, + { + "epoch": 0.5345368414314065, + "grad_norm": 38.165504455566406, + "learning_rate": 4.999277368527699e-06, + "loss": 0.5733, + "num_input_tokens_seen": 26311040, + "step": 8350 + }, + { + "epoch": 0.5348569233723833, + "grad_norm": 41.476173400878906, + "learning_rate": 4.9992638749627315e-06, + "loss": 0.8566, + "num_input_tokens_seen": 26326144, + "step": 8355 + }, + { + "epoch": 0.5351770053133602, + "grad_norm": 19.18714714050293, + "learning_rate": 4.999250256597544e-06, + "loss": 0.433, + "num_input_tokens_seen": 26341952, + "step": 8360 + }, + { + "epoch": 0.5354970872543371, + "grad_norm": 34.590999603271484, + "learning_rate": 4.999236513432818e-06, + "loss": 0.3803, + "num_input_tokens_seen": 26356672, + "step": 8365 + }, + { + "epoch": 0.535817169195314, + "grad_norm": 22.47686767578125, + "learning_rate": 4.999222645469239e-06, + "loss": 0.572, + "num_input_tokens_seen": 26373056, + "step": 8370 + }, + { + "epoch": 0.5361372511362908, + "grad_norm": 19.918527603149414, + "learning_rate": 4.999208652707497e-06, + "loss": 0.48, + "num_input_tokens_seen": 26388032, + "step": 8375 + }, + { + "epoch": 0.5364573330772677, + "grad_norm": 78.18395233154297, + "learning_rate": 4.999194535148294e-06, + "loss": 0.5506, + "num_input_tokens_seen": 26402880, + "step": 8380 + }, + { + "epoch": 0.5367774150182447, + "grad_norm": 18.949100494384766, + "learning_rate": 4.999180292792334e-06, + "loss": 0.6615, + "num_input_tokens_seen": 26419328, + "step": 8385 + }, + { + "epoch": 0.5370974969592216, + "grad_norm": 33.906497955322266, + "learning_rate": 4.9991659256403276e-06, + "loss": 0.5452, + "num_input_tokens_seen": 26434112, + "step": 8390 + }, + { + "epoch": 0.5374175789001985, + "grad_norm": 19.283559799194336, + "learning_rate": 4.999151433692992e-06, + "loss": 0.49, + "num_input_tokens_seen": 26450688, + "step": 8395 + }, + { + "epoch": 0.5377376608411754, + "grad_norm": 26.415315628051758, + "learning_rate": 4.999136816951053e-06, + "loss": 0.4958, + "num_input_tokens_seen": 26466368, + "step": 8400 + }, + { + "epoch": 0.5380577427821522, + "grad_norm": 24.337093353271484, + "learning_rate": 4.9991220754152366e-06, + "loss": 0.4806, + "num_input_tokens_seen": 26482624, + "step": 8405 + }, + { + "epoch": 0.5383778247231291, + "grad_norm": 26.68187141418457, + "learning_rate": 4.999107209086283e-06, + "loss": 0.49, + "num_input_tokens_seen": 26499200, + "step": 8410 + }, + { + "epoch": 0.538697906664106, + "grad_norm": 22.253870010375977, + "learning_rate": 4.9990922179649325e-06, + "loss": 0.5364, + "num_input_tokens_seen": 26514048, + "step": 8415 + }, + { + "epoch": 0.5390179886050829, + "grad_norm": 26.443801879882812, + "learning_rate": 4.9990771020519336e-06, + "loss": 0.4705, + "num_input_tokens_seen": 26529216, + "step": 8420 + }, + { + "epoch": 0.5393380705460598, + "grad_norm": 28.558069229125977, + "learning_rate": 4.999061861348041e-06, + "loss": 0.497, + "num_input_tokens_seen": 26545152, + "step": 8425 + }, + { + "epoch": 0.5396581524870366, + "grad_norm": 51.00938034057617, + "learning_rate": 4.999046495854017e-06, + "loss": 0.6447, + "num_input_tokens_seen": 26560512, + "step": 8430 + }, + { + "epoch": 0.5399782344280136, + "grad_norm": 26.085206985473633, + "learning_rate": 4.999031005570628e-06, + "loss": 0.4271, + "num_input_tokens_seen": 26575488, + "step": 8435 + }, + { + "epoch": 0.5402983163689905, + "grad_norm": 26.51821517944336, + "learning_rate": 4.9990153904986475e-06, + "loss": 0.4814, + "num_input_tokens_seen": 26591040, + "step": 8440 + }, + { + "epoch": 0.5406183983099674, + "grad_norm": 10.961884498596191, + "learning_rate": 4.998999650638855e-06, + "loss": 0.4528, + "num_input_tokens_seen": 26606976, + "step": 8445 + }, + { + "epoch": 0.5409384802509443, + "grad_norm": 29.580347061157227, + "learning_rate": 4.998983785992036e-06, + "loss": 0.483, + "num_input_tokens_seen": 26623296, + "step": 8450 + }, + { + "epoch": 0.5412585621919211, + "grad_norm": 34.27033233642578, + "learning_rate": 4.9989677965589846e-06, + "loss": 0.4677, + "num_input_tokens_seen": 26638336, + "step": 8455 + }, + { + "epoch": 0.541578644132898, + "grad_norm": 14.566357612609863, + "learning_rate": 4.9989516823404985e-06, + "loss": 0.3693, + "num_input_tokens_seen": 26654400, + "step": 8460 + }, + { + "epoch": 0.5418987260738749, + "grad_norm": 40.30228042602539, + "learning_rate": 4.9989354433373805e-06, + "loss": 0.4609, + "num_input_tokens_seen": 26669568, + "step": 8465 + }, + { + "epoch": 0.5422188080148518, + "grad_norm": 24.896350860595703, + "learning_rate": 4.9989190795504435e-06, + "loss": 0.6815, + "num_input_tokens_seen": 26686016, + "step": 8470 + }, + { + "epoch": 0.5425388899558287, + "grad_norm": 43.860992431640625, + "learning_rate": 4.998902590980504e-06, + "loss": 0.5297, + "num_input_tokens_seen": 26701504, + "step": 8475 + }, + { + "epoch": 0.5428589718968055, + "grad_norm": 52.11386489868164, + "learning_rate": 4.998885977628386e-06, + "loss": 0.8467, + "num_input_tokens_seen": 26717120, + "step": 8480 + }, + { + "epoch": 0.5431790538377824, + "grad_norm": 81.08377838134766, + "learning_rate": 4.998869239494918e-06, + "loss": 0.6439, + "num_input_tokens_seen": 26732736, + "step": 8485 + }, + { + "epoch": 0.5434991357787594, + "grad_norm": 25.960580825805664, + "learning_rate": 4.998852376580937e-06, + "loss": 0.5143, + "num_input_tokens_seen": 26747392, + "step": 8490 + }, + { + "epoch": 0.5438192177197363, + "grad_norm": 16.743452072143555, + "learning_rate": 4.9988353888872835e-06, + "loss": 0.5548, + "num_input_tokens_seen": 26762112, + "step": 8495 + }, + { + "epoch": 0.5441392996607132, + "grad_norm": 18.13081169128418, + "learning_rate": 4.998818276414807e-06, + "loss": 0.5419, + "num_input_tokens_seen": 26777856, + "step": 8500 + }, + { + "epoch": 0.54445938160169, + "grad_norm": 30.423749923706055, + "learning_rate": 4.998801039164361e-06, + "loss": 0.5263, + "num_input_tokens_seen": 26794048, + "step": 8505 + }, + { + "epoch": 0.5447794635426669, + "grad_norm": 23.25507164001465, + "learning_rate": 4.998783677136808e-06, + "loss": 0.3765, + "num_input_tokens_seen": 26809792, + "step": 8510 + }, + { + "epoch": 0.5450995454836438, + "grad_norm": 44.66399002075195, + "learning_rate": 4.998766190333013e-06, + "loss": 0.5347, + "num_input_tokens_seen": 26825024, + "step": 8515 + }, + { + "epoch": 0.5454196274246207, + "grad_norm": 37.93608856201172, + "learning_rate": 4.998748578753851e-06, + "loss": 0.5222, + "num_input_tokens_seen": 26841536, + "step": 8520 + }, + { + "epoch": 0.5457397093655976, + "grad_norm": 26.236785888671875, + "learning_rate": 4.9987308424002e-06, + "loss": 0.4838, + "num_input_tokens_seen": 26857600, + "step": 8525 + }, + { + "epoch": 0.5460597913065744, + "grad_norm": 23.63370704650879, + "learning_rate": 4.998712981272946e-06, + "loss": 0.4069, + "num_input_tokens_seen": 26874176, + "step": 8530 + }, + { + "epoch": 0.5463798732475513, + "grad_norm": 34.395713806152344, + "learning_rate": 4.9986949953729815e-06, + "loss": 0.5914, + "num_input_tokens_seen": 26890176, + "step": 8535 + }, + { + "epoch": 0.5466999551885282, + "grad_norm": 23.205659866333008, + "learning_rate": 4.998676884701203e-06, + "loss": 0.3921, + "num_input_tokens_seen": 26905728, + "step": 8540 + }, + { + "epoch": 0.5470200371295052, + "grad_norm": 57.18128204345703, + "learning_rate": 4.998658649258517e-06, + "loss": 0.5498, + "num_input_tokens_seen": 26921280, + "step": 8545 + }, + { + "epoch": 0.5473401190704821, + "grad_norm": 31.390810012817383, + "learning_rate": 4.998640289045833e-06, + "loss": 0.5778, + "num_input_tokens_seen": 26936704, + "step": 8550 + }, + { + "epoch": 0.547660201011459, + "grad_norm": 18.61397933959961, + "learning_rate": 4.998621804064068e-06, + "loss": 0.4431, + "num_input_tokens_seen": 26952448, + "step": 8555 + }, + { + "epoch": 0.5479802829524358, + "grad_norm": 29.155675888061523, + "learning_rate": 4.9986031943141444e-06, + "loss": 0.4593, + "num_input_tokens_seen": 26967680, + "step": 8560 + }, + { + "epoch": 0.5483003648934127, + "grad_norm": 14.606059074401855, + "learning_rate": 4.998584459796992e-06, + "loss": 0.4198, + "num_input_tokens_seen": 26982400, + "step": 8565 + }, + { + "epoch": 0.5486204468343896, + "grad_norm": 38.284515380859375, + "learning_rate": 4.998565600513546e-06, + "loss": 0.4486, + "num_input_tokens_seen": 26998400, + "step": 8570 + }, + { + "epoch": 0.5489405287753665, + "grad_norm": 41.554813385009766, + "learning_rate": 4.99854661646475e-06, + "loss": 0.4721, + "num_input_tokens_seen": 27015936, + "step": 8575 + }, + { + "epoch": 0.5492606107163434, + "grad_norm": 30.414630889892578, + "learning_rate": 4.998527507651549e-06, + "loss": 0.3714, + "num_input_tokens_seen": 27030528, + "step": 8580 + }, + { + "epoch": 0.5495806926573202, + "grad_norm": 43.2541618347168, + "learning_rate": 4.9985082740748996e-06, + "loss": 0.5324, + "num_input_tokens_seen": 27046080, + "step": 8585 + }, + { + "epoch": 0.5499007745982971, + "grad_norm": 22.387834548950195, + "learning_rate": 4.998488915735761e-06, + "loss": 0.545, + "num_input_tokens_seen": 27061504, + "step": 8590 + }, + { + "epoch": 0.5502208565392741, + "grad_norm": 47.18291091918945, + "learning_rate": 4.9984694326351e-06, + "loss": 0.4747, + "num_input_tokens_seen": 27077056, + "step": 8595 + }, + { + "epoch": 0.550540938480251, + "grad_norm": 21.343690872192383, + "learning_rate": 4.998449824773889e-06, + "loss": 0.4366, + "num_input_tokens_seen": 27095168, + "step": 8600 + }, + { + "epoch": 0.5508610204212279, + "grad_norm": 18.807748794555664, + "learning_rate": 4.998430092153108e-06, + "loss": 0.4849, + "num_input_tokens_seen": 27110144, + "step": 8605 + }, + { + "epoch": 0.5511811023622047, + "grad_norm": 34.19808578491211, + "learning_rate": 4.9984102347737425e-06, + "loss": 0.6933, + "num_input_tokens_seen": 27124864, + "step": 8610 + }, + { + "epoch": 0.5515011843031816, + "grad_norm": 25.22885513305664, + "learning_rate": 4.998390252636783e-06, + "loss": 0.4974, + "num_input_tokens_seen": 27140160, + "step": 8615 + }, + { + "epoch": 0.5518212662441585, + "grad_norm": 39.187129974365234, + "learning_rate": 4.998370145743229e-06, + "loss": 0.5157, + "num_input_tokens_seen": 27155712, + "step": 8620 + }, + { + "epoch": 0.5521413481851354, + "grad_norm": 51.69096374511719, + "learning_rate": 4.998349914094082e-06, + "loss": 0.5579, + "num_input_tokens_seen": 27171520, + "step": 8625 + }, + { + "epoch": 0.5524614301261123, + "grad_norm": 27.138229370117188, + "learning_rate": 4.998329557690354e-06, + "loss": 0.4922, + "num_input_tokens_seen": 27186752, + "step": 8630 + }, + { + "epoch": 0.5527815120670891, + "grad_norm": 22.456335067749023, + "learning_rate": 4.998309076533062e-06, + "loss": 0.4771, + "num_input_tokens_seen": 27201664, + "step": 8635 + }, + { + "epoch": 0.553101594008066, + "grad_norm": 25.080730438232422, + "learning_rate": 4.998288470623228e-06, + "loss": 0.6043, + "num_input_tokens_seen": 27218880, + "step": 8640 + }, + { + "epoch": 0.5534216759490429, + "grad_norm": 40.42284393310547, + "learning_rate": 4.9982677399618805e-06, + "loss": 0.4912, + "num_input_tokens_seen": 27235648, + "step": 8645 + }, + { + "epoch": 0.5537417578900199, + "grad_norm": 31.205928802490234, + "learning_rate": 4.998246884550056e-06, + "loss": 0.4957, + "num_input_tokens_seen": 27250880, + "step": 8650 + }, + { + "epoch": 0.5540618398309968, + "grad_norm": 20.355182647705078, + "learning_rate": 4.998225904388793e-06, + "loss": 0.3812, + "num_input_tokens_seen": 27266880, + "step": 8655 + }, + { + "epoch": 0.5543819217719737, + "grad_norm": 54.858951568603516, + "learning_rate": 4.998204799479142e-06, + "loss": 0.6054, + "num_input_tokens_seen": 27282816, + "step": 8660 + }, + { + "epoch": 0.5547020037129505, + "grad_norm": 21.374523162841797, + "learning_rate": 4.998183569822156e-06, + "loss": 0.5391, + "num_input_tokens_seen": 27298496, + "step": 8665 + }, + { + "epoch": 0.5550220856539274, + "grad_norm": 33.64070510864258, + "learning_rate": 4.998162215418896e-06, + "loss": 0.4191, + "num_input_tokens_seen": 27313856, + "step": 8670 + }, + { + "epoch": 0.5553421675949043, + "grad_norm": 31.551939010620117, + "learning_rate": 4.998140736270426e-06, + "loss": 0.3481, + "num_input_tokens_seen": 27328448, + "step": 8675 + }, + { + "epoch": 0.5556622495358812, + "grad_norm": 20.43535041809082, + "learning_rate": 4.998119132377821e-06, + "loss": 0.4723, + "num_input_tokens_seen": 27343872, + "step": 8680 + }, + { + "epoch": 0.555982331476858, + "grad_norm": 44.0897216796875, + "learning_rate": 4.998097403742158e-06, + "loss": 0.6518, + "num_input_tokens_seen": 27358720, + "step": 8685 + }, + { + "epoch": 0.5563024134178349, + "grad_norm": 28.293270111083984, + "learning_rate": 4.998075550364523e-06, + "loss": 0.4923, + "num_input_tokens_seen": 27373312, + "step": 8690 + }, + { + "epoch": 0.5566224953588118, + "grad_norm": 24.726774215698242, + "learning_rate": 4.998053572246007e-06, + "loss": 0.438, + "num_input_tokens_seen": 27388544, + "step": 8695 + }, + { + "epoch": 0.5569425772997888, + "grad_norm": 58.66106414794922, + "learning_rate": 4.998031469387709e-06, + "loss": 0.4999, + "num_input_tokens_seen": 27405120, + "step": 8700 + }, + { + "epoch": 0.5572626592407657, + "grad_norm": 38.01280212402344, + "learning_rate": 4.998009241790729e-06, + "loss": 0.4881, + "num_input_tokens_seen": 27420544, + "step": 8705 + }, + { + "epoch": 0.5575827411817426, + "grad_norm": 16.821165084838867, + "learning_rate": 4.99798688945618e-06, + "loss": 0.4088, + "num_input_tokens_seen": 27435456, + "step": 8710 + }, + { + "epoch": 0.5579028231227194, + "grad_norm": 26.456802368164062, + "learning_rate": 4.9979644123851776e-06, + "loss": 0.3932, + "num_input_tokens_seen": 27452032, + "step": 8715 + }, + { + "epoch": 0.5582229050636963, + "grad_norm": 21.41167449951172, + "learning_rate": 4.997941810578843e-06, + "loss": 0.6223, + "num_input_tokens_seen": 27467520, + "step": 8720 + }, + { + "epoch": 0.5585429870046732, + "grad_norm": 18.222370147705078, + "learning_rate": 4.9979190840383065e-06, + "loss": 0.3775, + "num_input_tokens_seen": 27482816, + "step": 8725 + }, + { + "epoch": 0.5588630689456501, + "grad_norm": 18.677448272705078, + "learning_rate": 4.997896232764702e-06, + "loss": 0.4589, + "num_input_tokens_seen": 27497600, + "step": 8730 + }, + { + "epoch": 0.559183150886627, + "grad_norm": 55.77671432495117, + "learning_rate": 4.997873256759171e-06, + "loss": 0.4818, + "num_input_tokens_seen": 27513344, + "step": 8735 + }, + { + "epoch": 0.5595032328276038, + "grad_norm": 48.33034896850586, + "learning_rate": 4.997850156022859e-06, + "loss": 0.4708, + "num_input_tokens_seen": 27530304, + "step": 8740 + }, + { + "epoch": 0.5598233147685807, + "grad_norm": 28.160871505737305, + "learning_rate": 4.997826930556922e-06, + "loss": 0.5141, + "num_input_tokens_seen": 27545664, + "step": 8745 + }, + { + "epoch": 0.5601433967095576, + "grad_norm": 29.109901428222656, + "learning_rate": 4.997803580362519e-06, + "loss": 0.3639, + "num_input_tokens_seen": 27560704, + "step": 8750 + }, + { + "epoch": 0.5604634786505346, + "grad_norm": 23.473251342773438, + "learning_rate": 4.997780105440816e-06, + "loss": 0.6498, + "num_input_tokens_seen": 27577472, + "step": 8755 + }, + { + "epoch": 0.5607835605915115, + "grad_norm": 27.203807830810547, + "learning_rate": 4.9977565057929845e-06, + "loss": 0.4584, + "num_input_tokens_seen": 27592960, + "step": 8760 + }, + { + "epoch": 0.5611036425324883, + "grad_norm": 25.513038635253906, + "learning_rate": 4.9977327814202036e-06, + "loss": 0.4441, + "num_input_tokens_seen": 27610304, + "step": 8765 + }, + { + "epoch": 0.5614237244734652, + "grad_norm": 44.13124084472656, + "learning_rate": 4.9977089323236575e-06, + "loss": 0.7925, + "num_input_tokens_seen": 27628288, + "step": 8770 + }, + { + "epoch": 0.5617438064144421, + "grad_norm": 24.51640510559082, + "learning_rate": 4.997684958504537e-06, + "loss": 0.5821, + "num_input_tokens_seen": 27643904, + "step": 8775 + }, + { + "epoch": 0.562063888355419, + "grad_norm": 18.876609802246094, + "learning_rate": 4.99766085996404e-06, + "loss": 0.5352, + "num_input_tokens_seen": 27660224, + "step": 8780 + }, + { + "epoch": 0.5623839702963959, + "grad_norm": 23.595643997192383, + "learning_rate": 4.997636636703371e-06, + "loss": 0.4474, + "num_input_tokens_seen": 27675136, + "step": 8785 + }, + { + "epoch": 0.5627040522373727, + "grad_norm": 37.66817855834961, + "learning_rate": 4.997612288723736e-06, + "loss": 0.4169, + "num_input_tokens_seen": 27690176, + "step": 8790 + }, + { + "epoch": 0.5630241341783496, + "grad_norm": 26.998249053955078, + "learning_rate": 4.997587816026355e-06, + "loss": 0.5149, + "num_input_tokens_seen": 27704512, + "step": 8795 + }, + { + "epoch": 0.5633442161193265, + "grad_norm": 46.95388412475586, + "learning_rate": 4.997563218612446e-06, + "loss": 0.6121, + "num_input_tokens_seen": 27720448, + "step": 8800 + }, + { + "epoch": 0.5636642980603035, + "grad_norm": 21.200130462646484, + "learning_rate": 4.99753849648324e-06, + "loss": 0.6136, + "num_input_tokens_seen": 27735808, + "step": 8805 + }, + { + "epoch": 0.5639843800012804, + "grad_norm": 24.557025909423828, + "learning_rate": 4.9975136496399705e-06, + "loss": 0.4028, + "num_input_tokens_seen": 27750464, + "step": 8810 + }, + { + "epoch": 0.5643044619422573, + "grad_norm": 25.271453857421875, + "learning_rate": 4.997488678083879e-06, + "loss": 0.4814, + "num_input_tokens_seen": 27764352, + "step": 8815 + }, + { + "epoch": 0.5646245438832341, + "grad_norm": 29.118356704711914, + "learning_rate": 4.997463581816211e-06, + "loss": 0.4174, + "num_input_tokens_seen": 27779840, + "step": 8820 + }, + { + "epoch": 0.564944625824211, + "grad_norm": 19.876379013061523, + "learning_rate": 4.997438360838221e-06, + "loss": 0.3541, + "num_input_tokens_seen": 27796352, + "step": 8825 + }, + { + "epoch": 0.5652647077651879, + "grad_norm": 34.627159118652344, + "learning_rate": 4.9974130151511675e-06, + "loss": 0.5327, + "num_input_tokens_seen": 27810624, + "step": 8830 + }, + { + "epoch": 0.5655847897061648, + "grad_norm": 27.855012893676758, + "learning_rate": 4.997387544756317e-06, + "loss": 0.4414, + "num_input_tokens_seen": 27825152, + "step": 8835 + }, + { + "epoch": 0.5659048716471416, + "grad_norm": 30.73896598815918, + "learning_rate": 4.997361949654941e-06, + "loss": 0.6232, + "num_input_tokens_seen": 27840384, + "step": 8840 + }, + { + "epoch": 0.5662249535881185, + "grad_norm": 23.912233352661133, + "learning_rate": 4.9973362298483175e-06, + "loss": 0.4049, + "num_input_tokens_seen": 27856640, + "step": 8845 + }, + { + "epoch": 0.5665450355290954, + "grad_norm": 34.257991790771484, + "learning_rate": 4.99731038533773e-06, + "loss": 0.3466, + "num_input_tokens_seen": 27872768, + "step": 8850 + }, + { + "epoch": 0.5668651174700723, + "grad_norm": 37.837745666503906, + "learning_rate": 4.997284416124471e-06, + "loss": 0.597, + "num_input_tokens_seen": 27887488, + "step": 8855 + }, + { + "epoch": 0.5671851994110493, + "grad_norm": 45.544349670410156, + "learning_rate": 4.997258322209836e-06, + "loss": 0.4015, + "num_input_tokens_seen": 27902720, + "step": 8860 + }, + { + "epoch": 0.5675052813520262, + "grad_norm": 29.372610092163086, + "learning_rate": 4.997232103595127e-06, + "loss": 0.4278, + "num_input_tokens_seen": 27918080, + "step": 8865 + }, + { + "epoch": 0.567825363293003, + "grad_norm": 52.333168029785156, + "learning_rate": 4.997205760281656e-06, + "loss": 0.5455, + "num_input_tokens_seen": 27933760, + "step": 8870 + }, + { + "epoch": 0.5681454452339799, + "grad_norm": 37.62151336669922, + "learning_rate": 4.997179292270736e-06, + "loss": 0.5835, + "num_input_tokens_seen": 27948992, + "step": 8875 + }, + { + "epoch": 0.5684655271749568, + "grad_norm": 33.87052536010742, + "learning_rate": 4.997152699563689e-06, + "loss": 0.5552, + "num_input_tokens_seen": 27966848, + "step": 8880 + }, + { + "epoch": 0.5687856091159337, + "grad_norm": 22.607269287109375, + "learning_rate": 4.997125982161845e-06, + "loss": 0.4773, + "num_input_tokens_seen": 27981696, + "step": 8885 + }, + { + "epoch": 0.5691056910569106, + "grad_norm": 35.250709533691406, + "learning_rate": 4.997099140066535e-06, + "loss": 0.4981, + "num_input_tokens_seen": 27997376, + "step": 8890 + }, + { + "epoch": 0.5694257729978874, + "grad_norm": 29.7626953125, + "learning_rate": 4.9970721732791005e-06, + "loss": 0.4624, + "num_input_tokens_seen": 28014016, + "step": 8895 + }, + { + "epoch": 0.5697458549388643, + "grad_norm": 23.9675350189209, + "learning_rate": 4.99704508180089e-06, + "loss": 0.5496, + "num_input_tokens_seen": 28030400, + "step": 8900 + }, + { + "epoch": 0.5700659368798412, + "grad_norm": 25.977487564086914, + "learning_rate": 4.997017865633253e-06, + "loss": 0.456, + "num_input_tokens_seen": 28047104, + "step": 8905 + }, + { + "epoch": 0.5703860188208181, + "grad_norm": 46.47616958618164, + "learning_rate": 4.996990524777552e-06, + "loss": 0.5433, + "num_input_tokens_seen": 28063168, + "step": 8910 + }, + { + "epoch": 0.5707061007617951, + "grad_norm": 32.1097526550293, + "learning_rate": 4.99696305923515e-06, + "loss": 0.593, + "num_input_tokens_seen": 28079104, + "step": 8915 + }, + { + "epoch": 0.571026182702772, + "grad_norm": 31.427947998046875, + "learning_rate": 4.996935469007418e-06, + "loss": 0.5787, + "num_input_tokens_seen": 28094720, + "step": 8920 + }, + { + "epoch": 0.5713462646437488, + "grad_norm": 44.35649871826172, + "learning_rate": 4.996907754095735e-06, + "loss": 0.4108, + "num_input_tokens_seen": 28110848, + "step": 8925 + }, + { + "epoch": 0.5716663465847257, + "grad_norm": 31.354450225830078, + "learning_rate": 4.996879914501486e-06, + "loss": 0.5035, + "num_input_tokens_seen": 28127040, + "step": 8930 + }, + { + "epoch": 0.5719864285257026, + "grad_norm": 28.654857635498047, + "learning_rate": 4.996851950226059e-06, + "loss": 0.4915, + "num_input_tokens_seen": 28143424, + "step": 8935 + }, + { + "epoch": 0.5723065104666795, + "grad_norm": 50.40884780883789, + "learning_rate": 4.996823861270851e-06, + "loss": 0.7354, + "num_input_tokens_seen": 28158528, + "step": 8940 + }, + { + "epoch": 0.5726265924076563, + "grad_norm": 45.948280334472656, + "learning_rate": 4.9967956476372645e-06, + "loss": 0.5087, + "num_input_tokens_seen": 28173888, + "step": 8945 + }, + { + "epoch": 0.5729466743486332, + "grad_norm": 24.559141159057617, + "learning_rate": 4.99676730932671e-06, + "loss": 0.5592, + "num_input_tokens_seen": 28191872, + "step": 8950 + }, + { + "epoch": 0.5732667562896101, + "grad_norm": 41.98897171020508, + "learning_rate": 4.996738846340601e-06, + "loss": 0.6421, + "num_input_tokens_seen": 28206912, + "step": 8955 + }, + { + "epoch": 0.573586838230587, + "grad_norm": 44.00584411621094, + "learning_rate": 4.996710258680358e-06, + "loss": 0.618, + "num_input_tokens_seen": 28222336, + "step": 8960 + }, + { + "epoch": 0.573906920171564, + "grad_norm": 9.202842712402344, + "learning_rate": 4.99668154634741e-06, + "loss": 0.4461, + "num_input_tokens_seen": 28238144, + "step": 8965 + }, + { + "epoch": 0.5742270021125409, + "grad_norm": 45.46683883666992, + "learning_rate": 4.996652709343191e-06, + "loss": 0.3794, + "num_input_tokens_seen": 28253376, + "step": 8970 + }, + { + "epoch": 0.5745470840535177, + "grad_norm": 33.61173629760742, + "learning_rate": 4.9966237476691395e-06, + "loss": 0.5317, + "num_input_tokens_seen": 28269248, + "step": 8975 + }, + { + "epoch": 0.5748671659944946, + "grad_norm": 38.50613021850586, + "learning_rate": 4.996594661326702e-06, + "loss": 0.4716, + "num_input_tokens_seen": 28285440, + "step": 8980 + }, + { + "epoch": 0.5751872479354715, + "grad_norm": 23.06582260131836, + "learning_rate": 4.996565450317333e-06, + "loss": 0.3745, + "num_input_tokens_seen": 28300736, + "step": 8985 + }, + { + "epoch": 0.5755073298764484, + "grad_norm": 33.43952941894531, + "learning_rate": 4.996536114642489e-06, + "loss": 0.4753, + "num_input_tokens_seen": 28315712, + "step": 8990 + }, + { + "epoch": 0.5758274118174252, + "grad_norm": 55.373050689697266, + "learning_rate": 4.996506654303634e-06, + "loss": 0.5826, + "num_input_tokens_seen": 28332032, + "step": 8995 + }, + { + "epoch": 0.5761474937584021, + "grad_norm": 44.54171371459961, + "learning_rate": 4.996477069302243e-06, + "loss": 0.5248, + "num_input_tokens_seen": 28347968, + "step": 9000 + }, + { + "epoch": 0.576467575699379, + "grad_norm": 18.57148551940918, + "learning_rate": 4.996447359639789e-06, + "loss": 0.4764, + "num_input_tokens_seen": 28362944, + "step": 9005 + }, + { + "epoch": 0.5767876576403559, + "grad_norm": 76.70552825927734, + "learning_rate": 4.996417525317757e-06, + "loss": 0.4941, + "num_input_tokens_seen": 28378624, + "step": 9010 + }, + { + "epoch": 0.5771077395813328, + "grad_norm": 37.844635009765625, + "learning_rate": 4.9963875663376385e-06, + "loss": 0.5076, + "num_input_tokens_seen": 28394048, + "step": 9015 + }, + { + "epoch": 0.5774278215223098, + "grad_norm": 42.33098602294922, + "learning_rate": 4.996357482700927e-06, + "loss": 0.537, + "num_input_tokens_seen": 28409664, + "step": 9020 + }, + { + "epoch": 0.5777479034632866, + "grad_norm": 32.94294738769531, + "learning_rate": 4.9963272744091275e-06, + "loss": 0.4628, + "num_input_tokens_seen": 28424768, + "step": 9025 + }, + { + "epoch": 0.5780679854042635, + "grad_norm": 32.62940979003906, + "learning_rate": 4.996296941463745e-06, + "loss": 0.4314, + "num_input_tokens_seen": 28439424, + "step": 9030 + }, + { + "epoch": 0.5783880673452404, + "grad_norm": 43.01372528076172, + "learning_rate": 4.9962664838662974e-06, + "loss": 0.6351, + "num_input_tokens_seen": 28456064, + "step": 9035 + }, + { + "epoch": 0.5787081492862173, + "grad_norm": 31.19029998779297, + "learning_rate": 4.996235901618304e-06, + "loss": 0.5251, + "num_input_tokens_seen": 28472768, + "step": 9040 + }, + { + "epoch": 0.5790282312271942, + "grad_norm": 29.00741958618164, + "learning_rate": 4.996205194721291e-06, + "loss": 0.4512, + "num_input_tokens_seen": 28488704, + "step": 9045 + }, + { + "epoch": 0.579348313168171, + "grad_norm": 45.74059295654297, + "learning_rate": 4.9961743631767935e-06, + "loss": 0.5944, + "num_input_tokens_seen": 28503808, + "step": 9050 + }, + { + "epoch": 0.5796683951091479, + "grad_norm": 63.668636322021484, + "learning_rate": 4.99614340698635e-06, + "loss": 0.5982, + "num_input_tokens_seen": 28520320, + "step": 9055 + }, + { + "epoch": 0.5799884770501248, + "grad_norm": 61.94572067260742, + "learning_rate": 4.9961123261515074e-06, + "loss": 0.5961, + "num_input_tokens_seen": 28535680, + "step": 9060 + }, + { + "epoch": 0.5803085589911017, + "grad_norm": 29.941740036010742, + "learning_rate": 4.996081120673817e-06, + "loss": 0.4588, + "num_input_tokens_seen": 28551488, + "step": 9065 + }, + { + "epoch": 0.5806286409320787, + "grad_norm": 52.54363250732422, + "learning_rate": 4.996049790554837e-06, + "loss": 0.5326, + "num_input_tokens_seen": 28567040, + "step": 9070 + }, + { + "epoch": 0.5809487228730555, + "grad_norm": 20.925434112548828, + "learning_rate": 4.996018335796131e-06, + "loss": 0.4566, + "num_input_tokens_seen": 28581568, + "step": 9075 + }, + { + "epoch": 0.5812688048140324, + "grad_norm": 20.178712844848633, + "learning_rate": 4.995986756399272e-06, + "loss": 0.5191, + "num_input_tokens_seen": 28596992, + "step": 9080 + }, + { + "epoch": 0.5815888867550093, + "grad_norm": 48.01366424560547, + "learning_rate": 4.995955052365834e-06, + "loss": 0.7651, + "num_input_tokens_seen": 28613952, + "step": 9085 + }, + { + "epoch": 0.5819089686959862, + "grad_norm": 32.07456970214844, + "learning_rate": 4.995923223697403e-06, + "loss": 0.6473, + "num_input_tokens_seen": 28628736, + "step": 9090 + }, + { + "epoch": 0.5822290506369631, + "grad_norm": 32.50999450683594, + "learning_rate": 4.995891270395566e-06, + "loss": 0.5631, + "num_input_tokens_seen": 28644928, + "step": 9095 + }, + { + "epoch": 0.5825491325779399, + "grad_norm": 31.99846076965332, + "learning_rate": 4.99585919246192e-06, + "loss": 0.4256, + "num_input_tokens_seen": 28661184, + "step": 9100 + }, + { + "epoch": 0.5828692145189168, + "grad_norm": 18.50117301940918, + "learning_rate": 4.995826989898066e-06, + "loss": 0.3767, + "num_input_tokens_seen": 28675776, + "step": 9105 + }, + { + "epoch": 0.5831892964598937, + "grad_norm": 42.558135986328125, + "learning_rate": 4.995794662705613e-06, + "loss": 0.49, + "num_input_tokens_seen": 28692160, + "step": 9110 + }, + { + "epoch": 0.5835093784008706, + "grad_norm": 28.368776321411133, + "learning_rate": 4.995762210886175e-06, + "loss": 0.361, + "num_input_tokens_seen": 28707328, + "step": 9115 + }, + { + "epoch": 0.5838294603418475, + "grad_norm": 21.560653686523438, + "learning_rate": 4.995729634441371e-06, + "loss": 0.4557, + "num_input_tokens_seen": 28722624, + "step": 9120 + }, + { + "epoch": 0.5841495422828245, + "grad_norm": 32.00550842285156, + "learning_rate": 4.995696933372829e-06, + "loss": 0.4721, + "num_input_tokens_seen": 28737856, + "step": 9125 + }, + { + "epoch": 0.5844696242238013, + "grad_norm": 18.197792053222656, + "learning_rate": 4.995664107682182e-06, + "loss": 0.5019, + "num_input_tokens_seen": 28752448, + "step": 9130 + }, + { + "epoch": 0.5847897061647782, + "grad_norm": 27.12523651123047, + "learning_rate": 4.995631157371069e-06, + "loss": 0.4909, + "num_input_tokens_seen": 28767616, + "step": 9135 + }, + { + "epoch": 0.5851097881057551, + "grad_norm": 15.729421615600586, + "learning_rate": 4.995598082441136e-06, + "loss": 0.5935, + "num_input_tokens_seen": 28782400, + "step": 9140 + }, + { + "epoch": 0.585429870046732, + "grad_norm": 45.78367233276367, + "learning_rate": 4.995564882894033e-06, + "loss": 0.6664, + "num_input_tokens_seen": 28798336, + "step": 9145 + }, + { + "epoch": 0.5857499519877089, + "grad_norm": 29.966291427612305, + "learning_rate": 4.995531558731419e-06, + "loss": 0.4209, + "num_input_tokens_seen": 28814336, + "step": 9150 + }, + { + "epoch": 0.5860700339286857, + "grad_norm": 51.72190475463867, + "learning_rate": 4.995498109954957e-06, + "loss": 0.7192, + "num_input_tokens_seen": 28830656, + "step": 9155 + }, + { + "epoch": 0.5863901158696626, + "grad_norm": 25.09246063232422, + "learning_rate": 4.995464536566319e-06, + "loss": 0.6507, + "num_input_tokens_seen": 28846656, + "step": 9160 + }, + { + "epoch": 0.5867101978106395, + "grad_norm": 29.55617904663086, + "learning_rate": 4.99543083856718e-06, + "loss": 0.5404, + "num_input_tokens_seen": 28862848, + "step": 9165 + }, + { + "epoch": 0.5870302797516164, + "grad_norm": 34.391517639160156, + "learning_rate": 4.995397015959223e-06, + "loss": 0.5889, + "num_input_tokens_seen": 28878400, + "step": 9170 + }, + { + "epoch": 0.5873503616925934, + "grad_norm": 46.48502731323242, + "learning_rate": 4.995363068744137e-06, + "loss": 0.7242, + "num_input_tokens_seen": 28893568, + "step": 9175 + }, + { + "epoch": 0.5876704436335702, + "grad_norm": 29.886098861694336, + "learning_rate": 4.9953289969236174e-06, + "loss": 0.6035, + "num_input_tokens_seen": 28909824, + "step": 9180 + }, + { + "epoch": 0.5879905255745471, + "grad_norm": 25.52352523803711, + "learning_rate": 4.995294800499366e-06, + "loss": 0.4631, + "num_input_tokens_seen": 28925568, + "step": 9185 + }, + { + "epoch": 0.588310607515524, + "grad_norm": 30.148086547851562, + "learning_rate": 4.995260479473089e-06, + "loss": 0.4697, + "num_input_tokens_seen": 28941056, + "step": 9190 + }, + { + "epoch": 0.5886306894565009, + "grad_norm": 47.83500289916992, + "learning_rate": 4.995226033846501e-06, + "loss": 0.5873, + "num_input_tokens_seen": 28955456, + "step": 9195 + }, + { + "epoch": 0.5889507713974778, + "grad_norm": 30.718576431274414, + "learning_rate": 4.9951914636213225e-06, + "loss": 0.4355, + "num_input_tokens_seen": 28971136, + "step": 9200 + }, + { + "epoch": 0.5892708533384546, + "grad_norm": 47.727699279785156, + "learning_rate": 4.995156768799279e-06, + "loss": 0.5336, + "num_input_tokens_seen": 28988480, + "step": 9205 + }, + { + "epoch": 0.5895909352794315, + "grad_norm": 51.75484085083008, + "learning_rate": 4.995121949382103e-06, + "loss": 0.4822, + "num_input_tokens_seen": 29004544, + "step": 9210 + }, + { + "epoch": 0.5899110172204084, + "grad_norm": 16.024328231811523, + "learning_rate": 4.995087005371534e-06, + "loss": 0.5036, + "num_input_tokens_seen": 29019328, + "step": 9215 + }, + { + "epoch": 0.5902310991613853, + "grad_norm": 42.177101135253906, + "learning_rate": 4.995051936769316e-06, + "loss": 0.5889, + "num_input_tokens_seen": 29034752, + "step": 9220 + }, + { + "epoch": 0.5905511811023622, + "grad_norm": 29.21888542175293, + "learning_rate": 4.9950167435772e-06, + "loss": 0.6516, + "num_input_tokens_seen": 29050816, + "step": 9225 + }, + { + "epoch": 0.5908712630433391, + "grad_norm": 24.59104347229004, + "learning_rate": 4.994981425796945e-06, + "loss": 0.3739, + "num_input_tokens_seen": 29065920, + "step": 9230 + }, + { + "epoch": 0.591191344984316, + "grad_norm": 60.13844299316406, + "learning_rate": 4.994945983430313e-06, + "loss": 0.6625, + "num_input_tokens_seen": 29080960, + "step": 9235 + }, + { + "epoch": 0.5915114269252929, + "grad_norm": 32.2174072265625, + "learning_rate": 4.994910416479074e-06, + "loss": 0.611, + "num_input_tokens_seen": 29096768, + "step": 9240 + }, + { + "epoch": 0.5918315088662698, + "grad_norm": 36.26111602783203, + "learning_rate": 4.994874724945005e-06, + "loss": 0.6683, + "num_input_tokens_seen": 29112448, + "step": 9245 + }, + { + "epoch": 0.5921515908072467, + "grad_norm": 44.25364303588867, + "learning_rate": 4.994838908829887e-06, + "loss": 0.6542, + "num_input_tokens_seen": 29127808, + "step": 9250 + }, + { + "epoch": 0.5924716727482235, + "grad_norm": 27.483028411865234, + "learning_rate": 4.994802968135509e-06, + "loss": 0.4267, + "num_input_tokens_seen": 29142912, + "step": 9255 + }, + { + "epoch": 0.5927917546892004, + "grad_norm": 27.029550552368164, + "learning_rate": 4.994766902863666e-06, + "loss": 0.4546, + "num_input_tokens_seen": 29159168, + "step": 9260 + }, + { + "epoch": 0.5931118366301773, + "grad_norm": 27.546083450317383, + "learning_rate": 4.9947307130161586e-06, + "loss": 0.5397, + "num_input_tokens_seen": 29175488, + "step": 9265 + }, + { + "epoch": 0.5934319185711542, + "grad_norm": 37.26860427856445, + "learning_rate": 4.994694398594794e-06, + "loss": 0.5568, + "num_input_tokens_seen": 29191104, + "step": 9270 + }, + { + "epoch": 0.5937520005121311, + "grad_norm": 14.490337371826172, + "learning_rate": 4.994657959601385e-06, + "loss": 0.5317, + "num_input_tokens_seen": 29206208, + "step": 9275 + }, + { + "epoch": 0.5940720824531079, + "grad_norm": 32.1594123840332, + "learning_rate": 4.994621396037752e-06, + "loss": 0.5157, + "num_input_tokens_seen": 29221312, + "step": 9280 + }, + { + "epoch": 0.5943921643940849, + "grad_norm": 44.69757843017578, + "learning_rate": 4.994584707905721e-06, + "loss": 0.5074, + "num_input_tokens_seen": 29236800, + "step": 9285 + }, + { + "epoch": 0.5947122463350618, + "grad_norm": 26.7379207611084, + "learning_rate": 4.994547895207123e-06, + "loss": 0.6807, + "num_input_tokens_seen": 29252480, + "step": 9290 + }, + { + "epoch": 0.5950323282760387, + "grad_norm": 30.52481460571289, + "learning_rate": 4.994510957943797e-06, + "loss": 0.6824, + "num_input_tokens_seen": 29266944, + "step": 9295 + }, + { + "epoch": 0.5953524102170156, + "grad_norm": 20.976749420166016, + "learning_rate": 4.994473896117587e-06, + "loss": 0.501, + "num_input_tokens_seen": 29282816, + "step": 9300 + }, + { + "epoch": 0.5956724921579925, + "grad_norm": 23.0886173248291, + "learning_rate": 4.994436709730344e-06, + "loss": 0.3954, + "num_input_tokens_seen": 29297984, + "step": 9305 + }, + { + "epoch": 0.5959925740989693, + "grad_norm": 28.32071304321289, + "learning_rate": 4.9943993987839245e-06, + "loss": 0.4635, + "num_input_tokens_seen": 29313664, + "step": 9310 + }, + { + "epoch": 0.5963126560399462, + "grad_norm": 19.941667556762695, + "learning_rate": 4.994361963280192e-06, + "loss": 0.5479, + "num_input_tokens_seen": 29329472, + "step": 9315 + }, + { + "epoch": 0.5966327379809231, + "grad_norm": 37.501007080078125, + "learning_rate": 4.994324403221017e-06, + "loss": 0.5641, + "num_input_tokens_seen": 29346048, + "step": 9320 + }, + { + "epoch": 0.5969528199219, + "grad_norm": 21.16779136657715, + "learning_rate": 4.994286718608272e-06, + "loss": 0.4047, + "num_input_tokens_seen": 29361664, + "step": 9325 + }, + { + "epoch": 0.5972729018628768, + "grad_norm": 26.577831268310547, + "learning_rate": 4.994248909443841e-06, + "loss": 0.5308, + "num_input_tokens_seen": 29376896, + "step": 9330 + }, + { + "epoch": 0.5975929838038538, + "grad_norm": 32.09839630126953, + "learning_rate": 4.994210975729611e-06, + "loss": 0.54, + "num_input_tokens_seen": 29392640, + "step": 9335 + }, + { + "epoch": 0.5979130657448307, + "grad_norm": 21.110557556152344, + "learning_rate": 4.994172917467478e-06, + "loss": 0.5542, + "num_input_tokens_seen": 29407808, + "step": 9340 + }, + { + "epoch": 0.5982331476858076, + "grad_norm": 35.191490173339844, + "learning_rate": 4.994134734659341e-06, + "loss": 0.317, + "num_input_tokens_seen": 29422528, + "step": 9345 + }, + { + "epoch": 0.5985532296267845, + "grad_norm": 29.540597915649414, + "learning_rate": 4.994096427307105e-06, + "loss": 0.5401, + "num_input_tokens_seen": 29438272, + "step": 9350 + }, + { + "epoch": 0.5988733115677614, + "grad_norm": 44.85287857055664, + "learning_rate": 4.994057995412687e-06, + "loss": 0.6292, + "num_input_tokens_seen": 29453376, + "step": 9355 + }, + { + "epoch": 0.5991933935087382, + "grad_norm": 35.69842529296875, + "learning_rate": 4.994019438978002e-06, + "loss": 0.5249, + "num_input_tokens_seen": 29469248, + "step": 9360 + }, + { + "epoch": 0.5995134754497151, + "grad_norm": 60.40846252441406, + "learning_rate": 4.993980758004979e-06, + "loss": 0.4756, + "num_input_tokens_seen": 29485888, + "step": 9365 + }, + { + "epoch": 0.599833557390692, + "grad_norm": 50.756591796875, + "learning_rate": 4.993941952495546e-06, + "loss": 0.5408, + "num_input_tokens_seen": 29500864, + "step": 9370 + }, + { + "epoch": 0.6001536393316689, + "grad_norm": 15.169511795043945, + "learning_rate": 4.993903022451643e-06, + "loss": 0.3278, + "num_input_tokens_seen": 29516288, + "step": 9375 + }, + { + "epoch": 0.6004737212726458, + "grad_norm": 17.88841438293457, + "learning_rate": 4.993863967875213e-06, + "loss": 0.4373, + "num_input_tokens_seen": 29530880, + "step": 9380 + }, + { + "epoch": 0.6007938032136226, + "grad_norm": 15.582412719726562, + "learning_rate": 4.993824788768207e-06, + "loss": 0.4586, + "num_input_tokens_seen": 29547840, + "step": 9385 + }, + { + "epoch": 0.6011138851545996, + "grad_norm": 32.27970886230469, + "learning_rate": 4.99378548513258e-06, + "loss": 0.4992, + "num_input_tokens_seen": 29564096, + "step": 9390 + }, + { + "epoch": 0.6014339670955765, + "grad_norm": 25.590879440307617, + "learning_rate": 4.993746056970297e-06, + "loss": 0.6268, + "num_input_tokens_seen": 29579520, + "step": 9395 + }, + { + "epoch": 0.6017540490365534, + "grad_norm": 34.916290283203125, + "learning_rate": 4.993706504283324e-06, + "loss": 0.5077, + "num_input_tokens_seen": 29594048, + "step": 9400 + }, + { + "epoch": 0.6020741309775303, + "grad_norm": 23.953563690185547, + "learning_rate": 4.993666827073639e-06, + "loss": 0.4874, + "num_input_tokens_seen": 29610368, + "step": 9405 + }, + { + "epoch": 0.6023942129185071, + "grad_norm": 42.52962875366211, + "learning_rate": 4.99362702534322e-06, + "loss": 0.5101, + "num_input_tokens_seen": 29625792, + "step": 9410 + }, + { + "epoch": 0.602714294859484, + "grad_norm": 25.68097686767578, + "learning_rate": 4.993587099094057e-06, + "loss": 0.4477, + "num_input_tokens_seen": 29642240, + "step": 9415 + }, + { + "epoch": 0.6030343768004609, + "grad_norm": 32.27827453613281, + "learning_rate": 4.993547048328143e-06, + "loss": 0.484, + "num_input_tokens_seen": 29658176, + "step": 9420 + }, + { + "epoch": 0.6033544587414378, + "grad_norm": 22.486291885375977, + "learning_rate": 4.9935068730474774e-06, + "loss": 0.4888, + "num_input_tokens_seen": 29675392, + "step": 9425 + }, + { + "epoch": 0.6036745406824147, + "grad_norm": 26.321760177612305, + "learning_rate": 4.993466573254067e-06, + "loss": 0.6311, + "num_input_tokens_seen": 29690048, + "step": 9430 + }, + { + "epoch": 0.6039946226233915, + "grad_norm": 36.97983169555664, + "learning_rate": 4.993426148949924e-06, + "loss": 0.4616, + "num_input_tokens_seen": 29705088, + "step": 9435 + }, + { + "epoch": 0.6043147045643685, + "grad_norm": 21.750930786132812, + "learning_rate": 4.993385600137066e-06, + "loss": 0.5847, + "num_input_tokens_seen": 29720576, + "step": 9440 + }, + { + "epoch": 0.6046347865053454, + "grad_norm": 16.882015228271484, + "learning_rate": 4.99334492681752e-06, + "loss": 0.5055, + "num_input_tokens_seen": 29736896, + "step": 9445 + }, + { + "epoch": 0.6049548684463223, + "grad_norm": 23.77710723876953, + "learning_rate": 4.9933041289933145e-06, + "loss": 0.4512, + "num_input_tokens_seen": 29752768, + "step": 9450 + }, + { + "epoch": 0.6052749503872992, + "grad_norm": 39.59722137451172, + "learning_rate": 4.993263206666489e-06, + "loss": 0.5214, + "num_input_tokens_seen": 29768640, + "step": 9455 + }, + { + "epoch": 0.605595032328276, + "grad_norm": 16.28834342956543, + "learning_rate": 4.993222159839086e-06, + "loss": 0.4837, + "num_input_tokens_seen": 29786240, + "step": 9460 + }, + { + "epoch": 0.6059151142692529, + "grad_norm": 26.871618270874023, + "learning_rate": 4.9931809885131545e-06, + "loss": 0.6349, + "num_input_tokens_seen": 29802176, + "step": 9465 + }, + { + "epoch": 0.6062351962102298, + "grad_norm": 23.452106475830078, + "learning_rate": 4.99313969269075e-06, + "loss": 0.4686, + "num_input_tokens_seen": 29817600, + "step": 9470 + }, + { + "epoch": 0.6065552781512067, + "grad_norm": 87.27430725097656, + "learning_rate": 4.993098272373937e-06, + "loss": 0.5119, + "num_input_tokens_seen": 29832960, + "step": 9475 + }, + { + "epoch": 0.6068753600921836, + "grad_norm": 10.9166898727417, + "learning_rate": 4.993056727564782e-06, + "loss": 0.3367, + "num_input_tokens_seen": 29848448, + "step": 9480 + }, + { + "epoch": 0.6071954420331604, + "grad_norm": 19.749191284179688, + "learning_rate": 4.99301505826536e-06, + "loss": 0.4875, + "num_input_tokens_seen": 29863296, + "step": 9485 + }, + { + "epoch": 0.6075155239741373, + "grad_norm": 82.10762786865234, + "learning_rate": 4.992973264477752e-06, + "loss": 0.4788, + "num_input_tokens_seen": 29880128, + "step": 9490 + }, + { + "epoch": 0.6078356059151143, + "grad_norm": 20.817697525024414, + "learning_rate": 4.9929313462040435e-06, + "loss": 0.4658, + "num_input_tokens_seen": 29895296, + "step": 9495 + }, + { + "epoch": 0.6081556878560912, + "grad_norm": 31.43428611755371, + "learning_rate": 4.992889303446329e-06, + "loss": 0.5559, + "num_input_tokens_seen": 29911296, + "step": 9500 + }, + { + "epoch": 0.6084757697970681, + "grad_norm": 39.422603607177734, + "learning_rate": 4.992847136206708e-06, + "loss": 0.598, + "num_input_tokens_seen": 29926912, + "step": 9505 + }, + { + "epoch": 0.608795851738045, + "grad_norm": 64.10171508789062, + "learning_rate": 4.9928048444872854e-06, + "loss": 0.6065, + "num_input_tokens_seen": 29942272, + "step": 9510 + }, + { + "epoch": 0.6091159336790218, + "grad_norm": 34.85307312011719, + "learning_rate": 4.992762428290174e-06, + "loss": 0.5619, + "num_input_tokens_seen": 29958016, + "step": 9515 + }, + { + "epoch": 0.6094360156199987, + "grad_norm": 21.861051559448242, + "learning_rate": 4.992719887617491e-06, + "loss": 0.595, + "num_input_tokens_seen": 29974720, + "step": 9520 + }, + { + "epoch": 0.6097560975609756, + "grad_norm": 19.499494552612305, + "learning_rate": 4.992677222471361e-06, + "loss": 0.5034, + "num_input_tokens_seen": 29991680, + "step": 9525 + }, + { + "epoch": 0.6100761795019525, + "grad_norm": 24.043010711669922, + "learning_rate": 4.992634432853914e-06, + "loss": 0.3773, + "num_input_tokens_seen": 30007168, + "step": 9530 + }, + { + "epoch": 0.6103962614429294, + "grad_norm": 41.61792755126953, + "learning_rate": 4.992591518767288e-06, + "loss": 0.4996, + "num_input_tokens_seen": 30022656, + "step": 9535 + }, + { + "epoch": 0.6107163433839062, + "grad_norm": 33.52785873413086, + "learning_rate": 4.992548480213624e-06, + "loss": 0.4819, + "num_input_tokens_seen": 30038144, + "step": 9540 + }, + { + "epoch": 0.6110364253248832, + "grad_norm": 69.40245056152344, + "learning_rate": 4.992505317195072e-06, + "loss": 0.4784, + "num_input_tokens_seen": 30055488, + "step": 9545 + }, + { + "epoch": 0.6113565072658601, + "grad_norm": 39.76873779296875, + "learning_rate": 4.992462029713789e-06, + "loss": 0.4976, + "num_input_tokens_seen": 30071040, + "step": 9550 + }, + { + "epoch": 0.611676589206837, + "grad_norm": 25.720003128051758, + "learning_rate": 4.992418617771933e-06, + "loss": 0.4546, + "num_input_tokens_seen": 30088000, + "step": 9555 + }, + { + "epoch": 0.6119966711478139, + "grad_norm": 25.169721603393555, + "learning_rate": 4.992375081371675e-06, + "loss": 0.5951, + "num_input_tokens_seen": 30103040, + "step": 9560 + }, + { + "epoch": 0.6123167530887907, + "grad_norm": 129.67201232910156, + "learning_rate": 4.992331420515187e-06, + "loss": 0.5744, + "num_input_tokens_seen": 30117760, + "step": 9565 + }, + { + "epoch": 0.6126368350297676, + "grad_norm": 55.396522521972656, + "learning_rate": 4.992287635204651e-06, + "loss": 0.572, + "num_input_tokens_seen": 30133888, + "step": 9570 + }, + { + "epoch": 0.6129569169707445, + "grad_norm": 38.550743103027344, + "learning_rate": 4.9922437254422515e-06, + "loss": 0.5694, + "num_input_tokens_seen": 30149312, + "step": 9575 + }, + { + "epoch": 0.6132769989117214, + "grad_norm": 31.476646423339844, + "learning_rate": 4.992199691230183e-06, + "loss": 0.4877, + "num_input_tokens_seen": 30163840, + "step": 9580 + }, + { + "epoch": 0.6135970808526983, + "grad_norm": 38.745941162109375, + "learning_rate": 4.992155532570641e-06, + "loss": 0.6384, + "num_input_tokens_seen": 30180544, + "step": 9585 + }, + { + "epoch": 0.6139171627936751, + "grad_norm": 29.112672805786133, + "learning_rate": 4.992111249465836e-06, + "loss": 0.4069, + "num_input_tokens_seen": 30197120, + "step": 9590 + }, + { + "epoch": 0.614237244734652, + "grad_norm": 42.36651611328125, + "learning_rate": 4.992066841917974e-06, + "loss": 0.5439, + "num_input_tokens_seen": 30213376, + "step": 9595 + }, + { + "epoch": 0.614557326675629, + "grad_norm": 21.08698844909668, + "learning_rate": 4.992022309929275e-06, + "loss": 0.5553, + "num_input_tokens_seen": 30229120, + "step": 9600 + }, + { + "epoch": 0.6148774086166059, + "grad_norm": 59.03708267211914, + "learning_rate": 4.991977653501963e-06, + "loss": 0.5806, + "num_input_tokens_seen": 30244544, + "step": 9605 + }, + { + "epoch": 0.6151974905575828, + "grad_norm": 26.23607635498047, + "learning_rate": 4.991932872638267e-06, + "loss": 0.5126, + "num_input_tokens_seen": 30260864, + "step": 9610 + }, + { + "epoch": 0.6155175724985597, + "grad_norm": 47.76034927368164, + "learning_rate": 4.991887967340422e-06, + "loss": 0.5995, + "num_input_tokens_seen": 30275456, + "step": 9615 + }, + { + "epoch": 0.6158376544395365, + "grad_norm": 49.796234130859375, + "learning_rate": 4.991842937610673e-06, + "loss": 0.6657, + "num_input_tokens_seen": 30291008, + "step": 9620 + }, + { + "epoch": 0.6161577363805134, + "grad_norm": 39.92337417602539, + "learning_rate": 4.991797783451267e-06, + "loss": 0.588, + "num_input_tokens_seen": 30306816, + "step": 9625 + }, + { + "epoch": 0.6164778183214903, + "grad_norm": 24.99176788330078, + "learning_rate": 4.991752504864459e-06, + "loss": 0.4988, + "num_input_tokens_seen": 30322240, + "step": 9630 + }, + { + "epoch": 0.6167979002624672, + "grad_norm": 24.175525665283203, + "learning_rate": 4.99170710185251e-06, + "loss": 0.4139, + "num_input_tokens_seen": 30338048, + "step": 9635 + }, + { + "epoch": 0.617117982203444, + "grad_norm": 36.907806396484375, + "learning_rate": 4.991661574417687e-06, + "loss": 0.4087, + "num_input_tokens_seen": 30353984, + "step": 9640 + }, + { + "epoch": 0.6174380641444209, + "grad_norm": 34.958438873291016, + "learning_rate": 4.991615922562264e-06, + "loss": 0.6605, + "num_input_tokens_seen": 30370048, + "step": 9645 + }, + { + "epoch": 0.6177581460853978, + "grad_norm": 30.257915496826172, + "learning_rate": 4.9915701462885194e-06, + "loss": 0.4528, + "num_input_tokens_seen": 30385344, + "step": 9650 + }, + { + "epoch": 0.6180782280263748, + "grad_norm": 25.9368896484375, + "learning_rate": 4.99152424559874e-06, + "loss": 0.578, + "num_input_tokens_seen": 30400576, + "step": 9655 + }, + { + "epoch": 0.6183983099673517, + "grad_norm": 29.047916412353516, + "learning_rate": 4.991478220495218e-06, + "loss": 0.4926, + "num_input_tokens_seen": 30415424, + "step": 9660 + }, + { + "epoch": 0.6187183919083286, + "grad_norm": 31.409706115722656, + "learning_rate": 4.991432070980251e-06, + "loss": 0.5363, + "num_input_tokens_seen": 30430592, + "step": 9665 + }, + { + "epoch": 0.6190384738493054, + "grad_norm": 37.155982971191406, + "learning_rate": 4.991385797056144e-06, + "loss": 0.5223, + "num_input_tokens_seen": 30445952, + "step": 9670 + }, + { + "epoch": 0.6193585557902823, + "grad_norm": 22.28230094909668, + "learning_rate": 4.9913393987252075e-06, + "loss": 0.4267, + "num_input_tokens_seen": 30461568, + "step": 9675 + }, + { + "epoch": 0.6196786377312592, + "grad_norm": 29.745319366455078, + "learning_rate": 4.9912928759897585e-06, + "loss": 0.4994, + "num_input_tokens_seen": 30476672, + "step": 9680 + }, + { + "epoch": 0.6199987196722361, + "grad_norm": 31.88670539855957, + "learning_rate": 4.991246228852119e-06, + "loss": 0.446, + "num_input_tokens_seen": 30492672, + "step": 9685 + }, + { + "epoch": 0.620318801613213, + "grad_norm": 29.871694564819336, + "learning_rate": 4.991199457314621e-06, + "loss": 0.5466, + "num_input_tokens_seen": 30507712, + "step": 9690 + }, + { + "epoch": 0.6206388835541898, + "grad_norm": 61.0704345703125, + "learning_rate": 4.991152561379596e-06, + "loss": 0.7476, + "num_input_tokens_seen": 30524032, + "step": 9695 + }, + { + "epoch": 0.6209589654951667, + "grad_norm": 21.716556549072266, + "learning_rate": 4.9911055410493905e-06, + "loss": 0.4074, + "num_input_tokens_seen": 30539776, + "step": 9700 + }, + { + "epoch": 0.6212790474361437, + "grad_norm": 34.7012825012207, + "learning_rate": 4.991058396326348e-06, + "loss": 0.4596, + "num_input_tokens_seen": 30554752, + "step": 9705 + }, + { + "epoch": 0.6215991293771206, + "grad_norm": 14.846569061279297, + "learning_rate": 4.991011127212826e-06, + "loss": 0.4597, + "num_input_tokens_seen": 30571968, + "step": 9710 + }, + { + "epoch": 0.6219192113180975, + "grad_norm": 38.12076187133789, + "learning_rate": 4.990963733711183e-06, + "loss": 0.5344, + "num_input_tokens_seen": 30587264, + "step": 9715 + }, + { + "epoch": 0.6222392932590743, + "grad_norm": 23.966402053833008, + "learning_rate": 4.990916215823785e-06, + "loss": 0.6401, + "num_input_tokens_seen": 30603200, + "step": 9720 + }, + { + "epoch": 0.6225593752000512, + "grad_norm": 27.201549530029297, + "learning_rate": 4.990868573553007e-06, + "loss": 0.4959, + "num_input_tokens_seen": 30617856, + "step": 9725 + }, + { + "epoch": 0.6228794571410281, + "grad_norm": 24.290630340576172, + "learning_rate": 4.990820806901227e-06, + "loss": 0.6265, + "num_input_tokens_seen": 30632448, + "step": 9730 + }, + { + "epoch": 0.623199539082005, + "grad_norm": 21.602535247802734, + "learning_rate": 4.990772915870829e-06, + "loss": 0.5136, + "num_input_tokens_seen": 30647680, + "step": 9735 + }, + { + "epoch": 0.6235196210229819, + "grad_norm": 35.334354400634766, + "learning_rate": 4.9907249004642076e-06, + "loss": 0.6384, + "num_input_tokens_seen": 30663616, + "step": 9740 + }, + { + "epoch": 0.6238397029639587, + "grad_norm": 41.03178787231445, + "learning_rate": 4.990676760683757e-06, + "loss": 0.6294, + "num_input_tokens_seen": 30682496, + "step": 9745 + }, + { + "epoch": 0.6241597849049356, + "grad_norm": 31.628620147705078, + "learning_rate": 4.990628496531883e-06, + "loss": 0.5362, + "num_input_tokens_seen": 30697664, + "step": 9750 + }, + { + "epoch": 0.6244798668459125, + "grad_norm": 51.590641021728516, + "learning_rate": 4.990580108010994e-06, + "loss": 0.5101, + "num_input_tokens_seen": 30713408, + "step": 9755 + }, + { + "epoch": 0.6247999487868895, + "grad_norm": 23.67918586730957, + "learning_rate": 4.990531595123509e-06, + "loss": 0.4221, + "num_input_tokens_seen": 30730496, + "step": 9760 + }, + { + "epoch": 0.6251200307278664, + "grad_norm": 35.385013580322266, + "learning_rate": 4.9904829578718474e-06, + "loss": 0.4812, + "num_input_tokens_seen": 30745664, + "step": 9765 + }, + { + "epoch": 0.6254401126688433, + "grad_norm": 22.00327491760254, + "learning_rate": 4.9904341962584396e-06, + "loss": 0.4585, + "num_input_tokens_seen": 30760960, + "step": 9770 + }, + { + "epoch": 0.6257601946098201, + "grad_norm": 26.99054527282715, + "learning_rate": 4.99038531028572e-06, + "loss": 0.5699, + "num_input_tokens_seen": 30775872, + "step": 9775 + }, + { + "epoch": 0.626080276550797, + "grad_norm": 27.92428970336914, + "learning_rate": 4.990336299956131e-06, + "loss": 0.4519, + "num_input_tokens_seen": 30791424, + "step": 9780 + }, + { + "epoch": 0.6264003584917739, + "grad_norm": 30.677942276000977, + "learning_rate": 4.990287165272119e-06, + "loss": 0.5148, + "num_input_tokens_seen": 30807680, + "step": 9785 + }, + { + "epoch": 0.6267204404327508, + "grad_norm": 34.58668899536133, + "learning_rate": 4.990237906236136e-06, + "loss": 0.5699, + "num_input_tokens_seen": 30823680, + "step": 9790 + }, + { + "epoch": 0.6270405223737276, + "grad_norm": 22.4052791595459, + "learning_rate": 4.9901885228506444e-06, + "loss": 0.6064, + "num_input_tokens_seen": 30839360, + "step": 9795 + }, + { + "epoch": 0.6273606043147045, + "grad_norm": 41.049659729003906, + "learning_rate": 4.990139015118108e-06, + "loss": 0.4785, + "num_input_tokens_seen": 30854784, + "step": 9800 + }, + { + "epoch": 0.6276806862556814, + "grad_norm": 30.617490768432617, + "learning_rate": 4.990089383041e-06, + "loss": 0.5806, + "num_input_tokens_seen": 30870592, + "step": 9805 + }, + { + "epoch": 0.6280007681966584, + "grad_norm": 41.841407775878906, + "learning_rate": 4.9900396266217985e-06, + "loss": 0.5823, + "num_input_tokens_seen": 30885952, + "step": 9810 + }, + { + "epoch": 0.6283208501376353, + "grad_norm": 27.89768409729004, + "learning_rate": 4.9899897458629886e-06, + "loss": 0.4227, + "num_input_tokens_seen": 30902080, + "step": 9815 + }, + { + "epoch": 0.6286409320786122, + "grad_norm": 44.96581268310547, + "learning_rate": 4.989939740767061e-06, + "loss": 0.519, + "num_input_tokens_seen": 30919232, + "step": 9820 + }, + { + "epoch": 0.628961014019589, + "grad_norm": 50.688072204589844, + "learning_rate": 4.989889611336512e-06, + "loss": 0.4413, + "num_input_tokens_seen": 30936448, + "step": 9825 + }, + { + "epoch": 0.6292810959605659, + "grad_norm": 25.89674186706543, + "learning_rate": 4.989839357573845e-06, + "loss": 0.3904, + "num_input_tokens_seen": 30952256, + "step": 9830 + }, + { + "epoch": 0.6296011779015428, + "grad_norm": 27.26775360107422, + "learning_rate": 4.98978897948157e-06, + "loss": 0.527, + "num_input_tokens_seen": 30967360, + "step": 9835 + }, + { + "epoch": 0.6299212598425197, + "grad_norm": 29.11992073059082, + "learning_rate": 4.989738477062201e-06, + "loss": 0.5358, + "num_input_tokens_seen": 30982016, + "step": 9840 + }, + { + "epoch": 0.6302413417834966, + "grad_norm": 29.02492904663086, + "learning_rate": 4.989687850318262e-06, + "loss": 0.4595, + "num_input_tokens_seen": 30998080, + "step": 9845 + }, + { + "epoch": 0.6305614237244734, + "grad_norm": 18.324474334716797, + "learning_rate": 4.98963709925228e-06, + "loss": 0.4813, + "num_input_tokens_seen": 31013184, + "step": 9850 + }, + { + "epoch": 0.6308815056654503, + "grad_norm": 19.796104431152344, + "learning_rate": 4.98958622386679e-06, + "loss": 0.5403, + "num_input_tokens_seen": 31028800, + "step": 9855 + }, + { + "epoch": 0.6312015876064272, + "grad_norm": 24.60552406311035, + "learning_rate": 4.989535224164331e-06, + "loss": 0.5813, + "num_input_tokens_seen": 31044736, + "step": 9860 + }, + { + "epoch": 0.6315216695474042, + "grad_norm": 17.3646240234375, + "learning_rate": 4.98948410014745e-06, + "loss": 0.4064, + "num_input_tokens_seen": 31059968, + "step": 9865 + }, + { + "epoch": 0.6318417514883811, + "grad_norm": 20.047691345214844, + "learning_rate": 4.989432851818701e-06, + "loss": 0.4707, + "num_input_tokens_seen": 31074880, + "step": 9870 + }, + { + "epoch": 0.6321618334293579, + "grad_norm": 30.557476043701172, + "learning_rate": 4.989381479180643e-06, + "loss": 0.4296, + "num_input_tokens_seen": 31090432, + "step": 9875 + }, + { + "epoch": 0.6324819153703348, + "grad_norm": 33.84144973754883, + "learning_rate": 4.989329982235839e-06, + "loss": 0.6233, + "num_input_tokens_seen": 31107264, + "step": 9880 + }, + { + "epoch": 0.6328019973113117, + "grad_norm": 28.75821876525879, + "learning_rate": 4.989278360986864e-06, + "loss": 0.4683, + "num_input_tokens_seen": 31124096, + "step": 9885 + }, + { + "epoch": 0.6331220792522886, + "grad_norm": 18.550731658935547, + "learning_rate": 4.9892266154362915e-06, + "loss": 0.4671, + "num_input_tokens_seen": 31138752, + "step": 9890 + }, + { + "epoch": 0.6334421611932655, + "grad_norm": 46.04861068725586, + "learning_rate": 4.989174745586709e-06, + "loss": 0.6549, + "num_input_tokens_seen": 31154048, + "step": 9895 + }, + { + "epoch": 0.6337622431342423, + "grad_norm": 14.112957000732422, + "learning_rate": 4.989122751440706e-06, + "loss": 0.7086, + "num_input_tokens_seen": 31170304, + "step": 9900 + }, + { + "epoch": 0.6340823250752192, + "grad_norm": 17.62590789794922, + "learning_rate": 4.989070633000877e-06, + "loss": 0.4503, + "num_input_tokens_seen": 31186368, + "step": 9905 + }, + { + "epoch": 0.6344024070161961, + "grad_norm": 22.063493728637695, + "learning_rate": 4.989018390269827e-06, + "loss": 0.5373, + "num_input_tokens_seen": 31201664, + "step": 9910 + }, + { + "epoch": 0.634722488957173, + "grad_norm": 67.06751251220703, + "learning_rate": 4.988966023250162e-06, + "loss": 0.6288, + "num_input_tokens_seen": 31217216, + "step": 9915 + }, + { + "epoch": 0.63504257089815, + "grad_norm": 27.714672088623047, + "learning_rate": 4.988913531944498e-06, + "loss": 0.4829, + "num_input_tokens_seen": 31233728, + "step": 9920 + }, + { + "epoch": 0.6353626528391269, + "grad_norm": 17.541929244995117, + "learning_rate": 4.988860916355458e-06, + "loss": 0.5572, + "num_input_tokens_seen": 31249664, + "step": 9925 + }, + { + "epoch": 0.6356827347801037, + "grad_norm": 25.177276611328125, + "learning_rate": 4.988808176485668e-06, + "loss": 0.4233, + "num_input_tokens_seen": 31265408, + "step": 9930 + }, + { + "epoch": 0.6360028167210806, + "grad_norm": 18.794422149658203, + "learning_rate": 4.98875531233776e-06, + "loss": 0.3763, + "num_input_tokens_seen": 31281088, + "step": 9935 + }, + { + "epoch": 0.6363228986620575, + "grad_norm": 25.73431396484375, + "learning_rate": 4.9887023239143766e-06, + "loss": 0.5096, + "num_input_tokens_seen": 31296512, + "step": 9940 + }, + { + "epoch": 0.6366429806030344, + "grad_norm": 34.159881591796875, + "learning_rate": 4.988649211218161e-06, + "loss": 0.5026, + "num_input_tokens_seen": 31312000, + "step": 9945 + }, + { + "epoch": 0.6369630625440112, + "grad_norm": 35.69326400756836, + "learning_rate": 4.988595974251768e-06, + "loss": 0.4883, + "num_input_tokens_seen": 31327488, + "step": 9950 + }, + { + "epoch": 0.6372831444849881, + "grad_norm": 38.39735412597656, + "learning_rate": 4.988542613017853e-06, + "loss": 0.4126, + "num_input_tokens_seen": 31343936, + "step": 9955 + }, + { + "epoch": 0.637603226425965, + "grad_norm": 18.49521827697754, + "learning_rate": 4.988489127519084e-06, + "loss": 0.4868, + "num_input_tokens_seen": 31359232, + "step": 9960 + }, + { + "epoch": 0.6379233083669419, + "grad_norm": 21.320110321044922, + "learning_rate": 4.988435517758129e-06, + "loss": 0.4822, + "num_input_tokens_seen": 31375616, + "step": 9965 + }, + { + "epoch": 0.6382433903079189, + "grad_norm": 27.103927612304688, + "learning_rate": 4.988381783737666e-06, + "loss": 0.5156, + "num_input_tokens_seen": 31391680, + "step": 9970 + }, + { + "epoch": 0.6385634722488958, + "grad_norm": 14.873517990112305, + "learning_rate": 4.988327925460378e-06, + "loss": 0.503, + "num_input_tokens_seen": 31407424, + "step": 9975 + }, + { + "epoch": 0.6388835541898726, + "grad_norm": 19.831167221069336, + "learning_rate": 4.988273942928955e-06, + "loss": 0.4856, + "num_input_tokens_seen": 31422912, + "step": 9980 + }, + { + "epoch": 0.6392036361308495, + "grad_norm": 30.803447723388672, + "learning_rate": 4.988219836146092e-06, + "loss": 0.5529, + "num_input_tokens_seen": 31438912, + "step": 9985 + }, + { + "epoch": 0.6395237180718264, + "grad_norm": 20.89615249633789, + "learning_rate": 4.988165605114492e-06, + "loss": 0.404, + "num_input_tokens_seen": 31455168, + "step": 9990 + }, + { + "epoch": 0.6398438000128033, + "grad_norm": 27.70865821838379, + "learning_rate": 4.988111249836861e-06, + "loss": 0.4475, + "num_input_tokens_seen": 31470592, + "step": 9995 + }, + { + "epoch": 0.6401638819537802, + "grad_norm": 21.295930862426758, + "learning_rate": 4.988056770315915e-06, + "loss": 0.4538, + "num_input_tokens_seen": 31485632, + "step": 10000 + }, + { + "epoch": 0.640483963894757, + "grad_norm": 17.818187713623047, + "learning_rate": 4.988002166554373e-06, + "loss": 0.4255, + "num_input_tokens_seen": 31500352, + "step": 10005 + }, + { + "epoch": 0.6408040458357339, + "grad_norm": 35.117218017578125, + "learning_rate": 4.987947438554963e-06, + "loss": 0.5204, + "num_input_tokens_seen": 31516928, + "step": 10010 + }, + { + "epoch": 0.6411241277767108, + "grad_norm": 19.536333084106445, + "learning_rate": 4.987892586320417e-06, + "loss": 0.4247, + "num_input_tokens_seen": 31532608, + "step": 10015 + }, + { + "epoch": 0.6414442097176877, + "grad_norm": 54.585548400878906, + "learning_rate": 4.987837609853474e-06, + "loss": 0.6965, + "num_input_tokens_seen": 31548608, + "step": 10020 + }, + { + "epoch": 0.6417642916586647, + "grad_norm": 52.75040817260742, + "learning_rate": 4.98778250915688e-06, + "loss": 0.6434, + "num_input_tokens_seen": 31564224, + "step": 10025 + }, + { + "epoch": 0.6420843735996415, + "grad_norm": 17.46915626525879, + "learning_rate": 4.9877272842333855e-06, + "loss": 0.5233, + "num_input_tokens_seen": 31579200, + "step": 10030 + }, + { + "epoch": 0.6424044555406184, + "grad_norm": 50.24399948120117, + "learning_rate": 4.987671935085749e-06, + "loss": 0.5963, + "num_input_tokens_seen": 31594816, + "step": 10035 + }, + { + "epoch": 0.6427245374815953, + "grad_norm": 17.814611434936523, + "learning_rate": 4.987616461716732e-06, + "loss": 0.4608, + "num_input_tokens_seen": 31610432, + "step": 10040 + }, + { + "epoch": 0.6430446194225722, + "grad_norm": 42.51185607910156, + "learning_rate": 4.987560864129109e-06, + "loss": 0.5298, + "num_input_tokens_seen": 31627520, + "step": 10045 + }, + { + "epoch": 0.6433647013635491, + "grad_norm": 38.736122131347656, + "learning_rate": 4.9875051423256515e-06, + "loss": 0.4993, + "num_input_tokens_seen": 31644352, + "step": 10050 + }, + { + "epoch": 0.6436847833045259, + "grad_norm": 26.07855796813965, + "learning_rate": 4.987449296309145e-06, + "loss": 0.6124, + "num_input_tokens_seen": 31660160, + "step": 10055 + }, + { + "epoch": 0.6440048652455028, + "grad_norm": 37.150081634521484, + "learning_rate": 4.987393326082377e-06, + "loss": 0.597, + "num_input_tokens_seen": 31676352, + "step": 10060 + }, + { + "epoch": 0.6443249471864797, + "grad_norm": 52.35087585449219, + "learning_rate": 4.987337231648143e-06, + "loss": 0.4995, + "num_input_tokens_seen": 31692160, + "step": 10065 + }, + { + "epoch": 0.6446450291274566, + "grad_norm": 21.155261993408203, + "learning_rate": 4.987281013009244e-06, + "loss": 0.475, + "num_input_tokens_seen": 31707712, + "step": 10070 + }, + { + "epoch": 0.6449651110684336, + "grad_norm": 16.161949157714844, + "learning_rate": 4.987224670168487e-06, + "loss": 0.385, + "num_input_tokens_seen": 31723776, + "step": 10075 + }, + { + "epoch": 0.6452851930094105, + "grad_norm": 33.667240142822266, + "learning_rate": 4.987168203128685e-06, + "loss": 0.5022, + "num_input_tokens_seen": 31740672, + "step": 10080 + }, + { + "epoch": 0.6456052749503873, + "grad_norm": 27.484901428222656, + "learning_rate": 4.987111611892658e-06, + "loss": 0.3749, + "num_input_tokens_seen": 31755648, + "step": 10085 + }, + { + "epoch": 0.6459253568913642, + "grad_norm": 20.18424415588379, + "learning_rate": 4.987054896463233e-06, + "loss": 0.6307, + "num_input_tokens_seen": 31770624, + "step": 10090 + }, + { + "epoch": 0.6462454388323411, + "grad_norm": 20.20116424560547, + "learning_rate": 4.9869980568432395e-06, + "loss": 0.5458, + "num_input_tokens_seen": 31786816, + "step": 10095 + }, + { + "epoch": 0.646565520773318, + "grad_norm": 37.6721305847168, + "learning_rate": 4.986941093035519e-06, + "loss": 0.4628, + "num_input_tokens_seen": 31801792, + "step": 10100 + }, + { + "epoch": 0.6468856027142948, + "grad_norm": 36.96791458129883, + "learning_rate": 4.986884005042913e-06, + "loss": 0.5302, + "num_input_tokens_seen": 31818944, + "step": 10105 + }, + { + "epoch": 0.6472056846552717, + "grad_norm": 23.50295066833496, + "learning_rate": 4.986826792868274e-06, + "loss": 0.5139, + "num_input_tokens_seen": 31834176, + "step": 10110 + }, + { + "epoch": 0.6475257665962486, + "grad_norm": 18.29768180847168, + "learning_rate": 4.986769456514459e-06, + "loss": 0.4676, + "num_input_tokens_seen": 31850176, + "step": 10115 + }, + { + "epoch": 0.6478458485372255, + "grad_norm": 40.533145904541016, + "learning_rate": 4.986711995984329e-06, + "loss": 0.5846, + "num_input_tokens_seen": 31865600, + "step": 10120 + }, + { + "epoch": 0.6481659304782024, + "grad_norm": 22.789100646972656, + "learning_rate": 4.986654411280756e-06, + "loss": 0.5411, + "num_input_tokens_seen": 31880512, + "step": 10125 + }, + { + "epoch": 0.6484860124191794, + "grad_norm": 24.021343231201172, + "learning_rate": 4.986596702406613e-06, + "loss": 0.5552, + "num_input_tokens_seen": 31896512, + "step": 10130 + }, + { + "epoch": 0.6488060943601562, + "grad_norm": 34.94270706176758, + "learning_rate": 4.986538869364784e-06, + "loss": 0.5344, + "num_input_tokens_seen": 31912768, + "step": 10135 + }, + { + "epoch": 0.6491261763011331, + "grad_norm": 44.63126754760742, + "learning_rate": 4.986480912158156e-06, + "loss": 0.5518, + "num_input_tokens_seen": 31927488, + "step": 10140 + }, + { + "epoch": 0.64944625824211, + "grad_norm": 25.742822647094727, + "learning_rate": 4.986422830789622e-06, + "loss": 0.448, + "num_input_tokens_seen": 31942784, + "step": 10145 + }, + { + "epoch": 0.6497663401830869, + "grad_norm": 14.126810073852539, + "learning_rate": 4.986364625262083e-06, + "loss": 0.4547, + "num_input_tokens_seen": 31958528, + "step": 10150 + }, + { + "epoch": 0.6500864221240638, + "grad_norm": 30.094491958618164, + "learning_rate": 4.986306295578446e-06, + "loss": 0.4755, + "num_input_tokens_seen": 31974016, + "step": 10155 + }, + { + "epoch": 0.6504065040650406, + "grad_norm": 31.94321632385254, + "learning_rate": 4.986247841741624e-06, + "loss": 0.4424, + "num_input_tokens_seen": 31990720, + "step": 10160 + }, + { + "epoch": 0.6507265860060175, + "grad_norm": 18.392990112304688, + "learning_rate": 4.986189263754534e-06, + "loss": 0.4666, + "num_input_tokens_seen": 32007168, + "step": 10165 + }, + { + "epoch": 0.6510466679469944, + "grad_norm": 18.671039581298828, + "learning_rate": 4.9861305616201024e-06, + "loss": 0.4939, + "num_input_tokens_seen": 32022208, + "step": 10170 + }, + { + "epoch": 0.6513667498879713, + "grad_norm": 32.99215316772461, + "learning_rate": 4.986071735341261e-06, + "loss": 0.6421, + "num_input_tokens_seen": 32037760, + "step": 10175 + }, + { + "epoch": 0.6516868318289483, + "grad_norm": 38.99028778076172, + "learning_rate": 4.986012784920946e-06, + "loss": 0.4327, + "num_input_tokens_seen": 32052800, + "step": 10180 + }, + { + "epoch": 0.6520069137699251, + "grad_norm": 18.19139289855957, + "learning_rate": 4.9859537103621026e-06, + "loss": 0.6118, + "num_input_tokens_seen": 32068288, + "step": 10185 + }, + { + "epoch": 0.652326995710902, + "grad_norm": 32.494625091552734, + "learning_rate": 4.985894511667679e-06, + "loss": 0.4767, + "num_input_tokens_seen": 32083200, + "step": 10190 + }, + { + "epoch": 0.6526470776518789, + "grad_norm": 19.539180755615234, + "learning_rate": 4.985835188840632e-06, + "loss": 0.4958, + "num_input_tokens_seen": 32098432, + "step": 10195 + }, + { + "epoch": 0.6529671595928558, + "grad_norm": 19.62476348876953, + "learning_rate": 4.985775741883924e-06, + "loss": 0.4854, + "num_input_tokens_seen": 32114944, + "step": 10200 + }, + { + "epoch": 0.6532872415338327, + "grad_norm": 30.147470474243164, + "learning_rate": 4.985716170800524e-06, + "loss": 0.404, + "num_input_tokens_seen": 32131520, + "step": 10205 + }, + { + "epoch": 0.6536073234748095, + "grad_norm": 25.493064880371094, + "learning_rate": 4.985656475593405e-06, + "loss": 0.4456, + "num_input_tokens_seen": 32147008, + "step": 10210 + }, + { + "epoch": 0.6539274054157864, + "grad_norm": 38.86955642700195, + "learning_rate": 4.98559665626555e-06, + "loss": 0.5132, + "num_input_tokens_seen": 32162560, + "step": 10215 + }, + { + "epoch": 0.6542474873567633, + "grad_norm": 26.2667293548584, + "learning_rate": 4.985536712819944e-06, + "loss": 0.4752, + "num_input_tokens_seen": 32177024, + "step": 10220 + }, + { + "epoch": 0.6545675692977402, + "grad_norm": 19.38361167907715, + "learning_rate": 4.9854766452595815e-06, + "loss": 0.5257, + "num_input_tokens_seen": 32193024, + "step": 10225 + }, + { + "epoch": 0.6548876512387171, + "grad_norm": 23.472434997558594, + "learning_rate": 4.985416453587462e-06, + "loss": 0.4498, + "num_input_tokens_seen": 32208896, + "step": 10230 + }, + { + "epoch": 0.655207733179694, + "grad_norm": 26.833595275878906, + "learning_rate": 4.985356137806591e-06, + "loss": 0.4368, + "num_input_tokens_seen": 32225280, + "step": 10235 + }, + { + "epoch": 0.6555278151206709, + "grad_norm": 17.922847747802734, + "learning_rate": 4.98529569791998e-06, + "loss": 0.4657, + "num_input_tokens_seen": 32240768, + "step": 10240 + }, + { + "epoch": 0.6558478970616478, + "grad_norm": 29.007282257080078, + "learning_rate": 4.985235133930647e-06, + "loss": 0.5121, + "num_input_tokens_seen": 32255872, + "step": 10245 + }, + { + "epoch": 0.6561679790026247, + "grad_norm": 29.50904083251953, + "learning_rate": 4.9851744458416175e-06, + "loss": 0.5042, + "num_input_tokens_seen": 32272512, + "step": 10250 + }, + { + "epoch": 0.6564880609436016, + "grad_norm": 24.242246627807617, + "learning_rate": 4.98511363365592e-06, + "loss": 0.5399, + "num_input_tokens_seen": 32286272, + "step": 10255 + }, + { + "epoch": 0.6568081428845784, + "grad_norm": 21.42854118347168, + "learning_rate": 4.985052697376592e-06, + "loss": 0.5322, + "num_input_tokens_seen": 32301952, + "step": 10260 + }, + { + "epoch": 0.6571282248255553, + "grad_norm": 20.016834259033203, + "learning_rate": 4.984991637006677e-06, + "loss": 0.6091, + "num_input_tokens_seen": 32318208, + "step": 10265 + }, + { + "epoch": 0.6574483067665322, + "grad_norm": 18.533475875854492, + "learning_rate": 4.984930452549224e-06, + "loss": 0.5593, + "num_input_tokens_seen": 32333504, + "step": 10270 + }, + { + "epoch": 0.6577683887075091, + "grad_norm": 16.552337646484375, + "learning_rate": 4.984869144007286e-06, + "loss": 0.5094, + "num_input_tokens_seen": 32350208, + "step": 10275 + }, + { + "epoch": 0.658088470648486, + "grad_norm": 43.041297912597656, + "learning_rate": 4.984807711383928e-06, + "loss": 0.5346, + "num_input_tokens_seen": 32365376, + "step": 10280 + }, + { + "epoch": 0.6584085525894628, + "grad_norm": 28.090389251708984, + "learning_rate": 4.984746154682215e-06, + "loss": 0.4705, + "num_input_tokens_seen": 32381248, + "step": 10285 + }, + { + "epoch": 0.6587286345304398, + "grad_norm": 24.666288375854492, + "learning_rate": 4.984684473905221e-06, + "loss": 0.5344, + "num_input_tokens_seen": 32395968, + "step": 10290 + }, + { + "epoch": 0.6590487164714167, + "grad_norm": 22.888572692871094, + "learning_rate": 4.9846226690560275e-06, + "loss": 0.459, + "num_input_tokens_seen": 32411456, + "step": 10295 + }, + { + "epoch": 0.6593687984123936, + "grad_norm": 33.18014907836914, + "learning_rate": 4.984560740137719e-06, + "loss": 0.4806, + "num_input_tokens_seen": 32426880, + "step": 10300 + }, + { + "epoch": 0.6596888803533705, + "grad_norm": 33.519386291503906, + "learning_rate": 4.98449868715339e-06, + "loss": 0.5004, + "num_input_tokens_seen": 32442368, + "step": 10305 + }, + { + "epoch": 0.6600089622943474, + "grad_norm": 25.687376022338867, + "learning_rate": 4.984436510106137e-06, + "loss": 0.5125, + "num_input_tokens_seen": 32457920, + "step": 10310 + }, + { + "epoch": 0.6603290442353242, + "grad_norm": 28.04863929748535, + "learning_rate": 4.984374208999066e-06, + "loss": 0.4846, + "num_input_tokens_seen": 32473536, + "step": 10315 + }, + { + "epoch": 0.6606491261763011, + "grad_norm": 30.585485458374023, + "learning_rate": 4.984311783835287e-06, + "loss": 0.4919, + "num_input_tokens_seen": 32490240, + "step": 10320 + }, + { + "epoch": 0.660969208117278, + "grad_norm": 34.335487365722656, + "learning_rate": 4.984249234617919e-06, + "loss": 0.4298, + "num_input_tokens_seen": 32505728, + "step": 10325 + }, + { + "epoch": 0.6612892900582549, + "grad_norm": 47.32184600830078, + "learning_rate": 4.984186561350083e-06, + "loss": 0.5663, + "num_input_tokens_seen": 32520960, + "step": 10330 + }, + { + "epoch": 0.6616093719992318, + "grad_norm": 18.451427459716797, + "learning_rate": 4.98412376403491e-06, + "loss": 0.5126, + "num_input_tokens_seen": 32535872, + "step": 10335 + }, + { + "epoch": 0.6619294539402087, + "grad_norm": 27.737829208374023, + "learning_rate": 4.984060842675536e-06, + "loss": 0.4722, + "num_input_tokens_seen": 32552448, + "step": 10340 + }, + { + "epoch": 0.6622495358811856, + "grad_norm": 37.217552185058594, + "learning_rate": 4.983997797275103e-06, + "loss": 0.4801, + "num_input_tokens_seen": 32567232, + "step": 10345 + }, + { + "epoch": 0.6625696178221625, + "grad_norm": 15.787866592407227, + "learning_rate": 4.983934627836758e-06, + "loss": 0.4348, + "num_input_tokens_seen": 32583360, + "step": 10350 + }, + { + "epoch": 0.6628896997631394, + "grad_norm": 42.86286544799805, + "learning_rate": 4.983871334363656e-06, + "loss": 0.4362, + "num_input_tokens_seen": 32598656, + "step": 10355 + }, + { + "epoch": 0.6632097817041163, + "grad_norm": 38.347267150878906, + "learning_rate": 4.9838079168589574e-06, + "loss": 0.4203, + "num_input_tokens_seen": 32614144, + "step": 10360 + }, + { + "epoch": 0.6635298636450931, + "grad_norm": 21.827211380004883, + "learning_rate": 4.98374437532583e-06, + "loss": 0.5201, + "num_input_tokens_seen": 32629120, + "step": 10365 + }, + { + "epoch": 0.66384994558607, + "grad_norm": 30.160625457763672, + "learning_rate": 4.9836807097674445e-06, + "loss": 0.6147, + "num_input_tokens_seen": 32645952, + "step": 10370 + }, + { + "epoch": 0.6641700275270469, + "grad_norm": 18.23525619506836, + "learning_rate": 4.983616920186982e-06, + "loss": 0.6087, + "num_input_tokens_seen": 32661696, + "step": 10375 + }, + { + "epoch": 0.6644901094680238, + "grad_norm": 29.9882869720459, + "learning_rate": 4.983553006587627e-06, + "loss": 0.5026, + "num_input_tokens_seen": 32677760, + "step": 10380 + }, + { + "epoch": 0.6648101914090007, + "grad_norm": 20.471261978149414, + "learning_rate": 4.983488968972572e-06, + "loss": 0.4457, + "num_input_tokens_seen": 32693568, + "step": 10385 + }, + { + "epoch": 0.6651302733499775, + "grad_norm": 25.933229446411133, + "learning_rate": 4.983424807345013e-06, + "loss": 0.4913, + "num_input_tokens_seen": 32708736, + "step": 10390 + }, + { + "epoch": 0.6654503552909545, + "grad_norm": 25.50645637512207, + "learning_rate": 4.983360521708156e-06, + "loss": 0.4128, + "num_input_tokens_seen": 32725440, + "step": 10395 + }, + { + "epoch": 0.6657704372319314, + "grad_norm": 33.66632843017578, + "learning_rate": 4.9832961120652085e-06, + "loss": 0.4205, + "num_input_tokens_seen": 32740608, + "step": 10400 + }, + { + "epoch": 0.6660905191729083, + "grad_norm": 18.595888137817383, + "learning_rate": 4.983231578419389e-06, + "loss": 0.6104, + "num_input_tokens_seen": 32756416, + "step": 10405 + }, + { + "epoch": 0.6664106011138852, + "grad_norm": 20.43606185913086, + "learning_rate": 4.983166920773919e-06, + "loss": 0.3898, + "num_input_tokens_seen": 32771904, + "step": 10410 + }, + { + "epoch": 0.666730683054862, + "grad_norm": 52.39131546020508, + "learning_rate": 4.983102139132027e-06, + "loss": 0.6124, + "num_input_tokens_seen": 32787392, + "step": 10415 + }, + { + "epoch": 0.6670507649958389, + "grad_norm": 20.825286865234375, + "learning_rate": 4.983037233496948e-06, + "loss": 0.4642, + "num_input_tokens_seen": 32803136, + "step": 10420 + }, + { + "epoch": 0.6673708469368158, + "grad_norm": 18.661603927612305, + "learning_rate": 4.982972203871924e-06, + "loss": 0.4817, + "num_input_tokens_seen": 32818624, + "step": 10425 + }, + { + "epoch": 0.6676909288777927, + "grad_norm": 20.75483512878418, + "learning_rate": 4.9829070502602e-06, + "loss": 0.4955, + "num_input_tokens_seen": 32833920, + "step": 10430 + }, + { + "epoch": 0.6680110108187696, + "grad_norm": 17.35269546508789, + "learning_rate": 4.982841772665033e-06, + "loss": 0.4401, + "num_input_tokens_seen": 32848512, + "step": 10435 + }, + { + "epoch": 0.6683310927597464, + "grad_norm": 25.9147891998291, + "learning_rate": 4.9827763710896795e-06, + "loss": 0.4935, + "num_input_tokens_seen": 32865088, + "step": 10440 + }, + { + "epoch": 0.6686511747007234, + "grad_norm": 40.11516189575195, + "learning_rate": 4.982710845537406e-06, + "loss": 0.6524, + "num_input_tokens_seen": 32882048, + "step": 10445 + }, + { + "epoch": 0.6689712566417003, + "grad_norm": 27.3958740234375, + "learning_rate": 4.982645196011485e-06, + "loss": 0.4655, + "num_input_tokens_seen": 32898624, + "step": 10450 + }, + { + "epoch": 0.6692913385826772, + "grad_norm": 36.781578063964844, + "learning_rate": 4.9825794225151956e-06, + "loss": 0.6318, + "num_input_tokens_seen": 32914176, + "step": 10455 + }, + { + "epoch": 0.6696114205236541, + "grad_norm": 32.05882263183594, + "learning_rate": 4.98251352505182e-06, + "loss": 0.4735, + "num_input_tokens_seen": 32931136, + "step": 10460 + }, + { + "epoch": 0.669931502464631, + "grad_norm": 33.53985595703125, + "learning_rate": 4.98244750362465e-06, + "loss": 0.4761, + "num_input_tokens_seen": 32947648, + "step": 10465 + }, + { + "epoch": 0.6702515844056078, + "grad_norm": 21.17329216003418, + "learning_rate": 4.982381358236983e-06, + "loss": 0.4036, + "num_input_tokens_seen": 32964224, + "step": 10470 + }, + { + "epoch": 0.6705716663465847, + "grad_norm": 41.66630172729492, + "learning_rate": 4.98231508889212e-06, + "loss": 0.6034, + "num_input_tokens_seen": 32982080, + "step": 10475 + }, + { + "epoch": 0.6708917482875616, + "grad_norm": 31.554553985595703, + "learning_rate": 4.9822486955933725e-06, + "loss": 0.6726, + "num_input_tokens_seen": 32997440, + "step": 10480 + }, + { + "epoch": 0.6712118302285385, + "grad_norm": 23.595796585083008, + "learning_rate": 4.982182178344054e-06, + "loss": 0.5825, + "num_input_tokens_seen": 33013056, + "step": 10485 + }, + { + "epoch": 0.6715319121695154, + "grad_norm": 13.96167278289795, + "learning_rate": 4.982115537147487e-06, + "loss": 0.5856, + "num_input_tokens_seen": 33028416, + "step": 10490 + }, + { + "epoch": 0.6718519941104922, + "grad_norm": 25.1162052154541, + "learning_rate": 4.982048772006998e-06, + "loss": 0.5513, + "num_input_tokens_seen": 33042880, + "step": 10495 + }, + { + "epoch": 0.6721720760514692, + "grad_norm": 24.34369468688965, + "learning_rate": 4.981981882925924e-06, + "loss": 0.408, + "num_input_tokens_seen": 33058240, + "step": 10500 + }, + { + "epoch": 0.6724921579924461, + "grad_norm": 12.68226146697998, + "learning_rate": 4.981914869907601e-06, + "loss": 0.5072, + "num_input_tokens_seen": 33073920, + "step": 10505 + }, + { + "epoch": 0.672812239933423, + "grad_norm": 24.870155334472656, + "learning_rate": 4.981847732955378e-06, + "loss": 0.4809, + "num_input_tokens_seen": 33089344, + "step": 10510 + }, + { + "epoch": 0.6731323218743999, + "grad_norm": 39.568695068359375, + "learning_rate": 4.9817804720726074e-06, + "loss": 0.477, + "num_input_tokens_seen": 33104384, + "step": 10515 + }, + { + "epoch": 0.6734524038153767, + "grad_norm": 28.14733123779297, + "learning_rate": 4.981713087262646e-06, + "loss": 0.5688, + "num_input_tokens_seen": 33120064, + "step": 10520 + }, + { + "epoch": 0.6737724857563536, + "grad_norm": 21.1607608795166, + "learning_rate": 4.98164557852886e-06, + "loss": 0.5787, + "num_input_tokens_seen": 33136128, + "step": 10525 + }, + { + "epoch": 0.6740925676973305, + "grad_norm": 24.09821128845215, + "learning_rate": 4.981577945874621e-06, + "loss": 0.4445, + "num_input_tokens_seen": 33154816, + "step": 10530 + }, + { + "epoch": 0.6744126496383074, + "grad_norm": 22.979965209960938, + "learning_rate": 4.981510189303305e-06, + "loss": 0.6317, + "num_input_tokens_seen": 33169920, + "step": 10535 + }, + { + "epoch": 0.6747327315792843, + "grad_norm": 33.07871627807617, + "learning_rate": 4.981442308818295e-06, + "loss": 0.7115, + "num_input_tokens_seen": 33187712, + "step": 10540 + }, + { + "epoch": 0.6750528135202611, + "grad_norm": 32.40293502807617, + "learning_rate": 4.981374304422982e-06, + "loss": 0.6101, + "num_input_tokens_seen": 33202880, + "step": 10545 + }, + { + "epoch": 0.6753728954612381, + "grad_norm": 17.175193786621094, + "learning_rate": 4.981306176120761e-06, + "loss": 0.4095, + "num_input_tokens_seen": 33218944, + "step": 10550 + }, + { + "epoch": 0.675692977402215, + "grad_norm": 16.773773193359375, + "learning_rate": 4.981237923915036e-06, + "loss": 0.5573, + "num_input_tokens_seen": 33234688, + "step": 10555 + }, + { + "epoch": 0.6760130593431919, + "grad_norm": 46.4110221862793, + "learning_rate": 4.981169547809212e-06, + "loss": 0.4964, + "num_input_tokens_seen": 33249920, + "step": 10560 + }, + { + "epoch": 0.6763331412841688, + "grad_norm": 22.79724884033203, + "learning_rate": 4.981101047806704e-06, + "loss": 0.4489, + "num_input_tokens_seen": 33265792, + "step": 10565 + }, + { + "epoch": 0.6766532232251457, + "grad_norm": 40.8646125793457, + "learning_rate": 4.981032423910934e-06, + "loss": 0.5544, + "num_input_tokens_seen": 33280832, + "step": 10570 + }, + { + "epoch": 0.6769733051661225, + "grad_norm": 61.46011734008789, + "learning_rate": 4.980963676125328e-06, + "loss": 0.4573, + "num_input_tokens_seen": 33296576, + "step": 10575 + }, + { + "epoch": 0.6772933871070994, + "grad_norm": 30.388057708740234, + "learning_rate": 4.980894804453319e-06, + "loss": 0.4879, + "num_input_tokens_seen": 33316672, + "step": 10580 + }, + { + "epoch": 0.6776134690480763, + "grad_norm": 17.945266723632812, + "learning_rate": 4.980825808898346e-06, + "loss": 0.5271, + "num_input_tokens_seen": 33331456, + "step": 10585 + }, + { + "epoch": 0.6779335509890532, + "grad_norm": 16.19684600830078, + "learning_rate": 4.980756689463854e-06, + "loss": 0.4008, + "num_input_tokens_seen": 33346752, + "step": 10590 + }, + { + "epoch": 0.67825363293003, + "grad_norm": 30.482776641845703, + "learning_rate": 4.980687446153294e-06, + "loss": 0.5284, + "num_input_tokens_seen": 33362688, + "step": 10595 + }, + { + "epoch": 0.6785737148710069, + "grad_norm": 41.2686767578125, + "learning_rate": 4.980618078970125e-06, + "loss": 0.4703, + "num_input_tokens_seen": 33377600, + "step": 10600 + }, + { + "epoch": 0.6788937968119839, + "grad_norm": 28.947877883911133, + "learning_rate": 4.98054858791781e-06, + "loss": 0.6389, + "num_input_tokens_seen": 33393280, + "step": 10605 + }, + { + "epoch": 0.6792138787529608, + "grad_norm": 36.533390045166016, + "learning_rate": 4.98047897299982e-06, + "loss": 0.5023, + "num_input_tokens_seen": 33410112, + "step": 10610 + }, + { + "epoch": 0.6795339606939377, + "grad_norm": 23.6832218170166, + "learning_rate": 4.98040923421963e-06, + "loss": 0.494, + "num_input_tokens_seen": 33426624, + "step": 10615 + }, + { + "epoch": 0.6798540426349146, + "grad_norm": 36.203426361083984, + "learning_rate": 4.980339371580723e-06, + "loss": 0.5107, + "num_input_tokens_seen": 33442240, + "step": 10620 + }, + { + "epoch": 0.6801741245758914, + "grad_norm": 40.179752349853516, + "learning_rate": 4.980269385086587e-06, + "loss": 0.6039, + "num_input_tokens_seen": 33458432, + "step": 10625 + }, + { + "epoch": 0.6804942065168683, + "grad_norm": 23.036460876464844, + "learning_rate": 4.9801992747407175e-06, + "loss": 0.6173, + "num_input_tokens_seen": 33473856, + "step": 10630 + }, + { + "epoch": 0.6808142884578452, + "grad_norm": 47.47160720825195, + "learning_rate": 4.980129040546615e-06, + "loss": 0.5634, + "num_input_tokens_seen": 33489216, + "step": 10635 + }, + { + "epoch": 0.6811343703988221, + "grad_norm": 24.012405395507812, + "learning_rate": 4.980058682507787e-06, + "loss": 0.627, + "num_input_tokens_seen": 33505280, + "step": 10640 + }, + { + "epoch": 0.681454452339799, + "grad_norm": 29.548608779907227, + "learning_rate": 4.9799882006277464e-06, + "loss": 0.4155, + "num_input_tokens_seen": 33520512, + "step": 10645 + }, + { + "epoch": 0.6817745342807758, + "grad_norm": 28.965707778930664, + "learning_rate": 4.979917594910012e-06, + "loss": 0.5237, + "num_input_tokens_seen": 33537344, + "step": 10650 + }, + { + "epoch": 0.6820946162217527, + "grad_norm": 48.3701286315918, + "learning_rate": 4.979846865358111e-06, + "loss": 0.4356, + "num_input_tokens_seen": 33552128, + "step": 10655 + }, + { + "epoch": 0.6824146981627297, + "grad_norm": 25.6319637298584, + "learning_rate": 4.979776011975575e-06, + "loss": 0.425, + "num_input_tokens_seen": 33566784, + "step": 10660 + }, + { + "epoch": 0.6827347801037066, + "grad_norm": 32.427940368652344, + "learning_rate": 4.979705034765942e-06, + "loss": 0.4357, + "num_input_tokens_seen": 33582080, + "step": 10665 + }, + { + "epoch": 0.6830548620446835, + "grad_norm": 59.91474533081055, + "learning_rate": 4.9796339337327545e-06, + "loss": 0.5631, + "num_input_tokens_seen": 33596864, + "step": 10670 + }, + { + "epoch": 0.6833749439856603, + "grad_norm": 25.786470413208008, + "learning_rate": 4.979562708879565e-06, + "loss": 0.4965, + "num_input_tokens_seen": 33613056, + "step": 10675 + }, + { + "epoch": 0.6836950259266372, + "grad_norm": 64.52566528320312, + "learning_rate": 4.979491360209929e-06, + "loss": 0.582, + "num_input_tokens_seen": 33628160, + "step": 10680 + }, + { + "epoch": 0.6840151078676141, + "grad_norm": 37.265785217285156, + "learning_rate": 4.97941988772741e-06, + "loss": 0.4595, + "num_input_tokens_seen": 33645184, + "step": 10685 + }, + { + "epoch": 0.684335189808591, + "grad_norm": 24.19659423828125, + "learning_rate": 4.979348291435577e-06, + "loss": 0.5501, + "num_input_tokens_seen": 33660800, + "step": 10690 + }, + { + "epoch": 0.6846552717495679, + "grad_norm": 28.214599609375, + "learning_rate": 4.979276571338004e-06, + "loss": 0.4411, + "num_input_tokens_seen": 33676736, + "step": 10695 + }, + { + "epoch": 0.6849753536905447, + "grad_norm": 28.034313201904297, + "learning_rate": 4.979204727438274e-06, + "loss": 0.6194, + "num_input_tokens_seen": 33692928, + "step": 10700 + }, + { + "epoch": 0.6852954356315216, + "grad_norm": 27.835859298706055, + "learning_rate": 4.979132759739973e-06, + "loss": 0.5011, + "num_input_tokens_seen": 33708352, + "step": 10705 + }, + { + "epoch": 0.6856155175724986, + "grad_norm": 50.913089752197266, + "learning_rate": 4.979060668246696e-06, + "loss": 0.5775, + "num_input_tokens_seen": 33723968, + "step": 10710 + }, + { + "epoch": 0.6859355995134755, + "grad_norm": 51.7571907043457, + "learning_rate": 4.9789884529620415e-06, + "loss": 0.5769, + "num_input_tokens_seen": 33739200, + "step": 10715 + }, + { + "epoch": 0.6862556814544524, + "grad_norm": 51.479774475097656, + "learning_rate": 4.978916113889616e-06, + "loss": 0.5149, + "num_input_tokens_seen": 33754944, + "step": 10720 + }, + { + "epoch": 0.6865757633954293, + "grad_norm": 58.965850830078125, + "learning_rate": 4.978843651033032e-06, + "loss": 0.4469, + "num_input_tokens_seen": 33769472, + "step": 10725 + }, + { + "epoch": 0.6868958453364061, + "grad_norm": 29.611129760742188, + "learning_rate": 4.978771064395909e-06, + "loss": 0.55, + "num_input_tokens_seen": 33784896, + "step": 10730 + }, + { + "epoch": 0.687215927277383, + "grad_norm": 16.97421646118164, + "learning_rate": 4.978698353981869e-06, + "loss": 0.4415, + "num_input_tokens_seen": 33800320, + "step": 10735 + }, + { + "epoch": 0.6875360092183599, + "grad_norm": 24.586278915405273, + "learning_rate": 4.978625519794546e-06, + "loss": 0.5544, + "num_input_tokens_seen": 33816896, + "step": 10740 + }, + { + "epoch": 0.6878560911593368, + "grad_norm": 22.608474731445312, + "learning_rate": 4.9785525618375735e-06, + "loss": 0.6134, + "num_input_tokens_seen": 33833344, + "step": 10745 + }, + { + "epoch": 0.6881761731003136, + "grad_norm": 33.346195220947266, + "learning_rate": 4.978479480114598e-06, + "loss": 0.5297, + "num_input_tokens_seen": 33849856, + "step": 10750 + }, + { + "epoch": 0.6884962550412905, + "grad_norm": 17.446847915649414, + "learning_rate": 4.978406274629265e-06, + "loss": 0.5039, + "num_input_tokens_seen": 33865344, + "step": 10755 + }, + { + "epoch": 0.6888163369822674, + "grad_norm": 44.494842529296875, + "learning_rate": 4.978332945385234e-06, + "loss": 0.5573, + "num_input_tokens_seen": 33881792, + "step": 10760 + }, + { + "epoch": 0.6891364189232444, + "grad_norm": 27.349849700927734, + "learning_rate": 4.978259492386164e-06, + "loss": 0.6204, + "num_input_tokens_seen": 33898368, + "step": 10765 + }, + { + "epoch": 0.6894565008642213, + "grad_norm": 31.201513290405273, + "learning_rate": 4.978185915635725e-06, + "loss": 0.5, + "num_input_tokens_seen": 33914368, + "step": 10770 + }, + { + "epoch": 0.6897765828051982, + "grad_norm": 30.97865867614746, + "learning_rate": 4.978112215137588e-06, + "loss": 0.4288, + "num_input_tokens_seen": 33929728, + "step": 10775 + }, + { + "epoch": 0.690096664746175, + "grad_norm": 29.683237075805664, + "learning_rate": 4.978038390895437e-06, + "loss": 0.4813, + "num_input_tokens_seen": 33944704, + "step": 10780 + }, + { + "epoch": 0.6904167466871519, + "grad_norm": 41.18208312988281, + "learning_rate": 4.977964442912955e-06, + "loss": 0.7686, + "num_input_tokens_seen": 33960128, + "step": 10785 + }, + { + "epoch": 0.6907368286281288, + "grad_norm": 17.046850204467773, + "learning_rate": 4.977890371193836e-06, + "loss": 0.6656, + "num_input_tokens_seen": 33975104, + "step": 10790 + }, + { + "epoch": 0.6910569105691057, + "grad_norm": 28.7425594329834, + "learning_rate": 4.977816175741779e-06, + "loss": 0.5917, + "num_input_tokens_seen": 33990720, + "step": 10795 + }, + { + "epoch": 0.6913769925100826, + "grad_norm": 26.48096466064453, + "learning_rate": 4.9777418565604885e-06, + "loss": 0.5768, + "num_input_tokens_seen": 34006272, + "step": 10800 + }, + { + "epoch": 0.6916970744510594, + "grad_norm": 18.103130340576172, + "learning_rate": 4.977667413653676e-06, + "loss": 0.4049, + "num_input_tokens_seen": 34022400, + "step": 10805 + }, + { + "epoch": 0.6920171563920363, + "grad_norm": 32.288543701171875, + "learning_rate": 4.977592847025058e-06, + "loss": 0.4865, + "num_input_tokens_seen": 34038720, + "step": 10810 + }, + { + "epoch": 0.6923372383330133, + "grad_norm": 23.837360382080078, + "learning_rate": 4.9775181566783595e-06, + "loss": 0.5691, + "num_input_tokens_seen": 34053760, + "step": 10815 + }, + { + "epoch": 0.6926573202739902, + "grad_norm": 15.106657981872559, + "learning_rate": 4.977443342617308e-06, + "loss": 0.396, + "num_input_tokens_seen": 34069568, + "step": 10820 + }, + { + "epoch": 0.6929774022149671, + "grad_norm": 13.791499137878418, + "learning_rate": 4.977368404845641e-06, + "loss": 0.4147, + "num_input_tokens_seen": 34086912, + "step": 10825 + }, + { + "epoch": 0.6932974841559439, + "grad_norm": 18.72361183166504, + "learning_rate": 4.9772933433671006e-06, + "loss": 0.4735, + "num_input_tokens_seen": 34101696, + "step": 10830 + }, + { + "epoch": 0.6936175660969208, + "grad_norm": 33.368160247802734, + "learning_rate": 4.977218158185434e-06, + "loss": 0.4479, + "num_input_tokens_seen": 34115904, + "step": 10835 + }, + { + "epoch": 0.6939376480378977, + "grad_norm": 88.93049621582031, + "learning_rate": 4.977142849304396e-06, + "loss": 0.6332, + "num_input_tokens_seen": 34131328, + "step": 10840 + }, + { + "epoch": 0.6942577299788746, + "grad_norm": 45.65459060668945, + "learning_rate": 4.977067416727747e-06, + "loss": 0.5561, + "num_input_tokens_seen": 34148544, + "step": 10845 + }, + { + "epoch": 0.6945778119198515, + "grad_norm": 25.432086944580078, + "learning_rate": 4.976991860459254e-06, + "loss": 0.5371, + "num_input_tokens_seen": 34163520, + "step": 10850 + }, + { + "epoch": 0.6948978938608283, + "grad_norm": 49.570709228515625, + "learning_rate": 4.976916180502689e-06, + "loss": 0.4885, + "num_input_tokens_seen": 34178368, + "step": 10855 + }, + { + "epoch": 0.6952179758018052, + "grad_norm": 54.69110107421875, + "learning_rate": 4.976840376861832e-06, + "loss": 0.6393, + "num_input_tokens_seen": 34193920, + "step": 10860 + }, + { + "epoch": 0.6955380577427821, + "grad_norm": 30.064373016357422, + "learning_rate": 4.976764449540468e-06, + "loss": 0.6128, + "num_input_tokens_seen": 34210176, + "step": 10865 + }, + { + "epoch": 0.6958581396837591, + "grad_norm": 45.923423767089844, + "learning_rate": 4.976688398542389e-06, + "loss": 0.5699, + "num_input_tokens_seen": 34227328, + "step": 10870 + }, + { + "epoch": 0.696178221624736, + "grad_norm": 27.11710548400879, + "learning_rate": 4.976612223871391e-06, + "loss": 0.4597, + "num_input_tokens_seen": 34241984, + "step": 10875 + }, + { + "epoch": 0.6964983035657129, + "grad_norm": 27.623186111450195, + "learning_rate": 4.976535925531279e-06, + "loss": 0.4676, + "num_input_tokens_seen": 34256896, + "step": 10880 + }, + { + "epoch": 0.6968183855066897, + "grad_norm": 18.09124755859375, + "learning_rate": 4.9764595035258625e-06, + "loss": 0.5493, + "num_input_tokens_seen": 34273792, + "step": 10885 + }, + { + "epoch": 0.6971384674476666, + "grad_norm": 35.45683288574219, + "learning_rate": 4.9763829578589586e-06, + "loss": 0.5085, + "num_input_tokens_seen": 34290752, + "step": 10890 + }, + { + "epoch": 0.6974585493886435, + "grad_norm": 23.839229583740234, + "learning_rate": 4.976306288534388e-06, + "loss": 0.5571, + "num_input_tokens_seen": 34308416, + "step": 10895 + }, + { + "epoch": 0.6977786313296204, + "grad_norm": 17.964296340942383, + "learning_rate": 4.976229495555981e-06, + "loss": 0.7076, + "num_input_tokens_seen": 34323776, + "step": 10900 + }, + { + "epoch": 0.6980987132705972, + "grad_norm": 18.0959529876709, + "learning_rate": 4.97615257892757e-06, + "loss": 0.4328, + "num_input_tokens_seen": 34338944, + "step": 10905 + }, + { + "epoch": 0.6984187952115741, + "grad_norm": 24.661043167114258, + "learning_rate": 4.976075538652998e-06, + "loss": 0.4736, + "num_input_tokens_seen": 34354432, + "step": 10910 + }, + { + "epoch": 0.698738877152551, + "grad_norm": 22.65460205078125, + "learning_rate": 4.97599837473611e-06, + "loss": 0.4492, + "num_input_tokens_seen": 34370432, + "step": 10915 + }, + { + "epoch": 0.699058959093528, + "grad_norm": 36.7968864440918, + "learning_rate": 4.975921087180761e-06, + "loss": 0.5545, + "num_input_tokens_seen": 34386496, + "step": 10920 + }, + { + "epoch": 0.6993790410345049, + "grad_norm": 28.624797821044922, + "learning_rate": 4.975843675990809e-06, + "loss": 0.5357, + "num_input_tokens_seen": 34402560, + "step": 10925 + }, + { + "epoch": 0.6996991229754818, + "grad_norm": 20.000957489013672, + "learning_rate": 4.97576614117012e-06, + "loss": 0.4802, + "num_input_tokens_seen": 34418496, + "step": 10930 + }, + { + "epoch": 0.7000192049164586, + "grad_norm": 30.821168899536133, + "learning_rate": 4.9756884827225665e-06, + "loss": 0.494, + "num_input_tokens_seen": 34434752, + "step": 10935 + }, + { + "epoch": 0.7003392868574355, + "grad_norm": 27.110044479370117, + "learning_rate": 4.9756107006520245e-06, + "loss": 0.5727, + "num_input_tokens_seen": 34450176, + "step": 10940 + }, + { + "epoch": 0.7006593687984124, + "grad_norm": 17.457172393798828, + "learning_rate": 4.9755327949623796e-06, + "loss": 0.5875, + "num_input_tokens_seen": 34465600, + "step": 10945 + }, + { + "epoch": 0.7009794507393893, + "grad_norm": 39.62924575805664, + "learning_rate": 4.975454765657522e-06, + "loss": 0.4471, + "num_input_tokens_seen": 34481600, + "step": 10950 + }, + { + "epoch": 0.7012995326803662, + "grad_norm": 14.343010902404785, + "learning_rate": 4.975376612741347e-06, + "loss": 0.4347, + "num_input_tokens_seen": 34498752, + "step": 10955 + }, + { + "epoch": 0.701619614621343, + "grad_norm": 19.94302749633789, + "learning_rate": 4.975298336217757e-06, + "loss": 0.5944, + "num_input_tokens_seen": 34514240, + "step": 10960 + }, + { + "epoch": 0.7019396965623199, + "grad_norm": 32.8018913269043, + "learning_rate": 4.975219936090664e-06, + "loss": 0.5639, + "num_input_tokens_seen": 34528704, + "step": 10965 + }, + { + "epoch": 0.7022597785032968, + "grad_norm": 41.371543884277344, + "learning_rate": 4.9751414123639776e-06, + "loss": 0.5451, + "num_input_tokens_seen": 34544704, + "step": 10970 + }, + { + "epoch": 0.7025798604442738, + "grad_norm": 39.590843200683594, + "learning_rate": 4.975062765041623e-06, + "loss": 0.5939, + "num_input_tokens_seen": 34560000, + "step": 10975 + }, + { + "epoch": 0.7028999423852507, + "grad_norm": 53.62720489501953, + "learning_rate": 4.974983994127526e-06, + "loss": 0.4441, + "num_input_tokens_seen": 34575296, + "step": 10980 + }, + { + "epoch": 0.7032200243262275, + "grad_norm": 29.170114517211914, + "learning_rate": 4.974905099625619e-06, + "loss": 0.493, + "num_input_tokens_seen": 34590528, + "step": 10985 + }, + { + "epoch": 0.7035401062672044, + "grad_norm": 28.610198974609375, + "learning_rate": 4.974826081539843e-06, + "loss": 0.5594, + "num_input_tokens_seen": 34605312, + "step": 10990 + }, + { + "epoch": 0.7038601882081813, + "grad_norm": 25.826879501342773, + "learning_rate": 4.974746939874144e-06, + "loss": 0.5058, + "num_input_tokens_seen": 34620736, + "step": 10995 + }, + { + "epoch": 0.7041802701491582, + "grad_norm": 31.611469268798828, + "learning_rate": 4.974667674632474e-06, + "loss": 0.5211, + "num_input_tokens_seen": 34636736, + "step": 11000 + }, + { + "epoch": 0.7045003520901351, + "grad_norm": 29.08265495300293, + "learning_rate": 4.974588285818789e-06, + "loss": 0.5566, + "num_input_tokens_seen": 34651776, + "step": 11005 + }, + { + "epoch": 0.7048204340311119, + "grad_norm": 42.62849044799805, + "learning_rate": 4.9745087734370545e-06, + "loss": 0.4289, + "num_input_tokens_seen": 34666176, + "step": 11010 + }, + { + "epoch": 0.7051405159720888, + "grad_norm": 14.510835647583008, + "learning_rate": 4.974429137491242e-06, + "loss": 0.3934, + "num_input_tokens_seen": 34682112, + "step": 11015 + }, + { + "epoch": 0.7054605979130657, + "grad_norm": 30.592445373535156, + "learning_rate": 4.974349377985327e-06, + "loss": 0.5075, + "num_input_tokens_seen": 34697024, + "step": 11020 + }, + { + "epoch": 0.7057806798540426, + "grad_norm": 29.75505828857422, + "learning_rate": 4.974269494923293e-06, + "loss": 0.5785, + "num_input_tokens_seen": 34712704, + "step": 11025 + }, + { + "epoch": 0.7061007617950196, + "grad_norm": 12.969154357910156, + "learning_rate": 4.974189488309128e-06, + "loss": 0.3854, + "num_input_tokens_seen": 34728256, + "step": 11030 + }, + { + "epoch": 0.7064208437359965, + "grad_norm": 14.834078788757324, + "learning_rate": 4.974109358146827e-06, + "loss": 0.4843, + "num_input_tokens_seen": 34744512, + "step": 11035 + }, + { + "epoch": 0.7067409256769733, + "grad_norm": 50.77027130126953, + "learning_rate": 4.974029104440392e-06, + "loss": 0.5917, + "num_input_tokens_seen": 34758912, + "step": 11040 + }, + { + "epoch": 0.7070610076179502, + "grad_norm": 37.987953186035156, + "learning_rate": 4.97394872719383e-06, + "loss": 0.4637, + "num_input_tokens_seen": 34774848, + "step": 11045 + }, + { + "epoch": 0.7073810895589271, + "grad_norm": 23.26326560974121, + "learning_rate": 4.973868226411156e-06, + "loss": 0.5963, + "num_input_tokens_seen": 34790016, + "step": 11050 + }, + { + "epoch": 0.707701171499904, + "grad_norm": 20.291336059570312, + "learning_rate": 4.9737876020963875e-06, + "loss": 0.5213, + "num_input_tokens_seen": 34804992, + "step": 11055 + }, + { + "epoch": 0.7080212534408808, + "grad_norm": 44.33049392700195, + "learning_rate": 4.9737068542535525e-06, + "loss": 0.3757, + "num_input_tokens_seen": 34820288, + "step": 11060 + }, + { + "epoch": 0.7083413353818577, + "grad_norm": 41.77791213989258, + "learning_rate": 4.9736259828866814e-06, + "loss": 0.4366, + "num_input_tokens_seen": 34836096, + "step": 11065 + }, + { + "epoch": 0.7086614173228346, + "grad_norm": 33.22407531738281, + "learning_rate": 4.9735449879998145e-06, + "loss": 0.6283, + "num_input_tokens_seen": 34851584, + "step": 11070 + }, + { + "epoch": 0.7089814992638115, + "grad_norm": 19.874685287475586, + "learning_rate": 4.973463869596994e-06, + "loss": 0.4686, + "num_input_tokens_seen": 34868032, + "step": 11075 + }, + { + "epoch": 0.7093015812047885, + "grad_norm": 32.64516830444336, + "learning_rate": 4.973382627682273e-06, + "loss": 0.5039, + "num_input_tokens_seen": 34884032, + "step": 11080 + }, + { + "epoch": 0.7096216631457654, + "grad_norm": 41.61967849731445, + "learning_rate": 4.973301262259705e-06, + "loss": 0.4791, + "num_input_tokens_seen": 34899456, + "step": 11085 + }, + { + "epoch": 0.7099417450867422, + "grad_norm": 24.165958404541016, + "learning_rate": 4.973219773333356e-06, + "loss": 0.4094, + "num_input_tokens_seen": 34915456, + "step": 11090 + }, + { + "epoch": 0.7102618270277191, + "grad_norm": 17.815196990966797, + "learning_rate": 4.9731381609072945e-06, + "loss": 0.4623, + "num_input_tokens_seen": 34931328, + "step": 11095 + }, + { + "epoch": 0.710581908968696, + "grad_norm": 23.490400314331055, + "learning_rate": 4.973056424985595e-06, + "loss": 0.5094, + "num_input_tokens_seen": 34946816, + "step": 11100 + }, + { + "epoch": 0.7109019909096729, + "grad_norm": 16.62025260925293, + "learning_rate": 4.97297456557234e-06, + "loss": 0.4377, + "num_input_tokens_seen": 34964544, + "step": 11105 + }, + { + "epoch": 0.7112220728506498, + "grad_norm": 45.24882888793945, + "learning_rate": 4.972892582671615e-06, + "loss": 0.4891, + "num_input_tokens_seen": 34982208, + "step": 11110 + }, + { + "epoch": 0.7115421547916266, + "grad_norm": 46.19004821777344, + "learning_rate": 4.972810476287516e-06, + "loss": 0.6064, + "num_input_tokens_seen": 35000576, + "step": 11115 + }, + { + "epoch": 0.7118622367326035, + "grad_norm": 22.54388999938965, + "learning_rate": 4.9727282464241425e-06, + "loss": 0.3861, + "num_input_tokens_seen": 35014912, + "step": 11120 + }, + { + "epoch": 0.7121823186735804, + "grad_norm": 29.071796417236328, + "learning_rate": 4.9726458930855994e-06, + "loss": 0.4301, + "num_input_tokens_seen": 35029632, + "step": 11125 + }, + { + "epoch": 0.7125024006145573, + "grad_norm": 35.63718795776367, + "learning_rate": 4.9725634162760005e-06, + "loss": 0.487, + "num_input_tokens_seen": 35044992, + "step": 11130 + }, + { + "epoch": 0.7128224825555343, + "grad_norm": 23.073711395263672, + "learning_rate": 4.972480815999464e-06, + "loss": 0.6388, + "num_input_tokens_seen": 35060544, + "step": 11135 + }, + { + "epoch": 0.7131425644965111, + "grad_norm": 29.106698989868164, + "learning_rate": 4.972398092260115e-06, + "loss": 0.5645, + "num_input_tokens_seen": 35075648, + "step": 11140 + }, + { + "epoch": 0.713462646437488, + "grad_norm": 19.76429557800293, + "learning_rate": 4.9723152450620815e-06, + "loss": 0.4202, + "num_input_tokens_seen": 35090880, + "step": 11145 + }, + { + "epoch": 0.7137827283784649, + "grad_norm": 28.271087646484375, + "learning_rate": 4.9722322744095036e-06, + "loss": 0.6373, + "num_input_tokens_seen": 35106432, + "step": 11150 + }, + { + "epoch": 0.7141028103194418, + "grad_norm": 28.26032829284668, + "learning_rate": 4.972149180306524e-06, + "loss": 0.5008, + "num_input_tokens_seen": 35121664, + "step": 11155 + }, + { + "epoch": 0.7144228922604187, + "grad_norm": 36.48896789550781, + "learning_rate": 4.97206596275729e-06, + "loss": 0.6063, + "num_input_tokens_seen": 35137152, + "step": 11160 + }, + { + "epoch": 0.7147429742013955, + "grad_norm": 14.0707426071167, + "learning_rate": 4.971982621765959e-06, + "loss": 0.5116, + "num_input_tokens_seen": 35153088, + "step": 11165 + }, + { + "epoch": 0.7150630561423724, + "grad_norm": 23.861473083496094, + "learning_rate": 4.971899157336691e-06, + "loss": 0.4851, + "num_input_tokens_seen": 35168192, + "step": 11170 + }, + { + "epoch": 0.7153831380833493, + "grad_norm": 25.792312622070312, + "learning_rate": 4.971815569473656e-06, + "loss": 0.5449, + "num_input_tokens_seen": 35184192, + "step": 11175 + }, + { + "epoch": 0.7157032200243262, + "grad_norm": 36.42518997192383, + "learning_rate": 4.971731858181026e-06, + "loss": 0.5011, + "num_input_tokens_seen": 35200256, + "step": 11180 + }, + { + "epoch": 0.7160233019653032, + "grad_norm": 20.914968490600586, + "learning_rate": 4.9716480234629825e-06, + "loss": 0.4837, + "num_input_tokens_seen": 35214720, + "step": 11185 + }, + { + "epoch": 0.71634338390628, + "grad_norm": 28.298616409301758, + "learning_rate": 4.97156406532371e-06, + "loss": 0.4837, + "num_input_tokens_seen": 35230720, + "step": 11190 + }, + { + "epoch": 0.7166634658472569, + "grad_norm": 25.96973991394043, + "learning_rate": 4.971479983767403e-06, + "loss": 0.4826, + "num_input_tokens_seen": 35246400, + "step": 11195 + }, + { + "epoch": 0.7169835477882338, + "grad_norm": 28.832979202270508, + "learning_rate": 4.971395778798258e-06, + "loss": 0.4933, + "num_input_tokens_seen": 35262784, + "step": 11200 + }, + { + "epoch": 0.7173036297292107, + "grad_norm": 20.301227569580078, + "learning_rate": 4.9713114504204815e-06, + "loss": 0.4372, + "num_input_tokens_seen": 35277632, + "step": 11205 + }, + { + "epoch": 0.7176237116701876, + "grad_norm": 28.93275260925293, + "learning_rate": 4.971226998638284e-06, + "loss": 0.4651, + "num_input_tokens_seen": 35292544, + "step": 11210 + }, + { + "epoch": 0.7179437936111644, + "grad_norm": 21.068790435791016, + "learning_rate": 4.971142423455882e-06, + "loss": 0.4884, + "num_input_tokens_seen": 35308352, + "step": 11215 + }, + { + "epoch": 0.7182638755521413, + "grad_norm": 27.603530883789062, + "learning_rate": 4.971057724877499e-06, + "loss": 0.5077, + "num_input_tokens_seen": 35323904, + "step": 11220 + }, + { + "epoch": 0.7185839574931182, + "grad_norm": 81.83708190917969, + "learning_rate": 4.970972902907365e-06, + "loss": 0.5591, + "num_input_tokens_seen": 35340224, + "step": 11225 + }, + { + "epoch": 0.7189040394340951, + "grad_norm": 43.82564163208008, + "learning_rate": 4.970887957549715e-06, + "loss": 0.5306, + "num_input_tokens_seen": 35355776, + "step": 11230 + }, + { + "epoch": 0.719224121375072, + "grad_norm": 57.64933776855469, + "learning_rate": 4.97080288880879e-06, + "loss": 0.5181, + "num_input_tokens_seen": 35371072, + "step": 11235 + }, + { + "epoch": 0.719544203316049, + "grad_norm": 22.23705291748047, + "learning_rate": 4.970717696688839e-06, + "loss": 0.4511, + "num_input_tokens_seen": 35387520, + "step": 11240 + }, + { + "epoch": 0.7198642852570258, + "grad_norm": 41.62242889404297, + "learning_rate": 4.970632381194116e-06, + "loss": 0.56, + "num_input_tokens_seen": 35403840, + "step": 11245 + }, + { + "epoch": 0.7201843671980027, + "grad_norm": 18.74190902709961, + "learning_rate": 4.970546942328881e-06, + "loss": 0.4955, + "num_input_tokens_seen": 35420032, + "step": 11250 + }, + { + "epoch": 0.7205044491389796, + "grad_norm": 37.18020248413086, + "learning_rate": 4.9704613800974e-06, + "loss": 0.412, + "num_input_tokens_seen": 35436032, + "step": 11255 + }, + { + "epoch": 0.7208245310799565, + "grad_norm": 31.869962692260742, + "learning_rate": 4.9703756945039465e-06, + "loss": 0.4155, + "num_input_tokens_seen": 35451136, + "step": 11260 + }, + { + "epoch": 0.7211446130209334, + "grad_norm": 22.688325881958008, + "learning_rate": 4.970289885552797e-06, + "loss": 0.5428, + "num_input_tokens_seen": 35466816, + "step": 11265 + }, + { + "epoch": 0.7214646949619102, + "grad_norm": 28.56060218811035, + "learning_rate": 4.97020395324824e-06, + "loss": 0.4465, + "num_input_tokens_seen": 35483456, + "step": 11270 + }, + { + "epoch": 0.7217847769028871, + "grad_norm": 48.63945388793945, + "learning_rate": 4.970117897594562e-06, + "loss": 0.6662, + "num_input_tokens_seen": 35499584, + "step": 11275 + }, + { + "epoch": 0.722104858843864, + "grad_norm": 29.126535415649414, + "learning_rate": 4.970031718596065e-06, + "loss": 0.577, + "num_input_tokens_seen": 35515328, + "step": 11280 + }, + { + "epoch": 0.7224249407848409, + "grad_norm": 15.446294784545898, + "learning_rate": 4.969945416257048e-06, + "loss": 0.5868, + "num_input_tokens_seen": 35530048, + "step": 11285 + }, + { + "epoch": 0.7227450227258178, + "grad_norm": 27.480016708374023, + "learning_rate": 4.969858990581823e-06, + "loss": 0.4074, + "num_input_tokens_seen": 35545792, + "step": 11290 + }, + { + "epoch": 0.7230651046667947, + "grad_norm": 50.1007080078125, + "learning_rate": 4.969772441574705e-06, + "loss": 0.4847, + "num_input_tokens_seen": 35561344, + "step": 11295 + }, + { + "epoch": 0.7233851866077716, + "grad_norm": 39.8463249206543, + "learning_rate": 4.9696857692400165e-06, + "loss": 0.4858, + "num_input_tokens_seen": 35577024, + "step": 11300 + }, + { + "epoch": 0.7237052685487485, + "grad_norm": 32.19331359863281, + "learning_rate": 4.969598973582084e-06, + "loss": 0.5038, + "num_input_tokens_seen": 35592192, + "step": 11305 + }, + { + "epoch": 0.7240253504897254, + "grad_norm": 18.013023376464844, + "learning_rate": 4.969512054605243e-06, + "loss": 0.4985, + "num_input_tokens_seen": 35607872, + "step": 11310 + }, + { + "epoch": 0.7243454324307023, + "grad_norm": 45.16957473754883, + "learning_rate": 4.969425012313833e-06, + "loss": 0.469, + "num_input_tokens_seen": 35623872, + "step": 11315 + }, + { + "epoch": 0.7246655143716791, + "grad_norm": 33.01842498779297, + "learning_rate": 4.969337846712201e-06, + "loss": 0.4882, + "num_input_tokens_seen": 35638720, + "step": 11320 + }, + { + "epoch": 0.724985596312656, + "grad_norm": 34.721763610839844, + "learning_rate": 4.9692505578046975e-06, + "loss": 0.6997, + "num_input_tokens_seen": 35655680, + "step": 11325 + }, + { + "epoch": 0.7253056782536329, + "grad_norm": 31.12956428527832, + "learning_rate": 4.9691631455956855e-06, + "loss": 0.4621, + "num_input_tokens_seen": 35671168, + "step": 11330 + }, + { + "epoch": 0.7256257601946098, + "grad_norm": 25.725038528442383, + "learning_rate": 4.969075610089526e-06, + "loss": 0.4241, + "num_input_tokens_seen": 35686848, + "step": 11335 + }, + { + "epoch": 0.7259458421355867, + "grad_norm": 28.91339111328125, + "learning_rate": 4.968987951290592e-06, + "loss": 0.401, + "num_input_tokens_seen": 35702720, + "step": 11340 + }, + { + "epoch": 0.7262659240765637, + "grad_norm": 33.39695358276367, + "learning_rate": 4.968900169203259e-06, + "loss": 0.6738, + "num_input_tokens_seen": 35720640, + "step": 11345 + }, + { + "epoch": 0.7265860060175405, + "grad_norm": 61.736976623535156, + "learning_rate": 4.968812263831913e-06, + "loss": 0.4632, + "num_input_tokens_seen": 35735424, + "step": 11350 + }, + { + "epoch": 0.7269060879585174, + "grad_norm": 22.237831115722656, + "learning_rate": 4.968724235180942e-06, + "loss": 0.5915, + "num_input_tokens_seen": 35751168, + "step": 11355 + }, + { + "epoch": 0.7272261698994943, + "grad_norm": 28.805133819580078, + "learning_rate": 4.968636083254741e-06, + "loss": 0.5772, + "num_input_tokens_seen": 35767168, + "step": 11360 + }, + { + "epoch": 0.7275462518404712, + "grad_norm": 31.608308792114258, + "learning_rate": 4.968547808057713e-06, + "loss": 0.6086, + "num_input_tokens_seen": 35782464, + "step": 11365 + }, + { + "epoch": 0.727866333781448, + "grad_norm": 20.44348907470703, + "learning_rate": 4.968459409594266e-06, + "loss": 0.5673, + "num_input_tokens_seen": 35798592, + "step": 11370 + }, + { + "epoch": 0.7281864157224249, + "grad_norm": 48.833187103271484, + "learning_rate": 4.968370887868813e-06, + "loss": 0.5758, + "num_input_tokens_seen": 35814720, + "step": 11375 + }, + { + "epoch": 0.7285064976634018, + "grad_norm": 31.366857528686523, + "learning_rate": 4.968282242885776e-06, + "loss": 0.3895, + "num_input_tokens_seen": 35830016, + "step": 11380 + }, + { + "epoch": 0.7288265796043787, + "grad_norm": 16.456493377685547, + "learning_rate": 4.968193474649581e-06, + "loss": 0.45, + "num_input_tokens_seen": 35845632, + "step": 11385 + }, + { + "epoch": 0.7291466615453556, + "grad_norm": 34.622318267822266, + "learning_rate": 4.968104583164659e-06, + "loss": 0.5233, + "num_input_tokens_seen": 35861184, + "step": 11390 + }, + { + "epoch": 0.7294667434863324, + "grad_norm": 48.18146896362305, + "learning_rate": 4.968015568435451e-06, + "loss": 0.687, + "num_input_tokens_seen": 35876864, + "step": 11395 + }, + { + "epoch": 0.7297868254273094, + "grad_norm": 14.769217491149902, + "learning_rate": 4.967926430466401e-06, + "loss": 0.4534, + "num_input_tokens_seen": 35893568, + "step": 11400 + }, + { + "epoch": 0.7301069073682863, + "grad_norm": 34.30876922607422, + "learning_rate": 4.967837169261959e-06, + "loss": 0.4689, + "num_input_tokens_seen": 35909696, + "step": 11405 + }, + { + "epoch": 0.7304269893092632, + "grad_norm": 55.998817443847656, + "learning_rate": 4.9677477848265835e-06, + "loss": 0.4996, + "num_input_tokens_seen": 35925120, + "step": 11410 + }, + { + "epoch": 0.7307470712502401, + "grad_norm": 24.663997650146484, + "learning_rate": 4.967658277164738e-06, + "loss": 0.597, + "num_input_tokens_seen": 35940800, + "step": 11415 + }, + { + "epoch": 0.731067153191217, + "grad_norm": 8.007280349731445, + "learning_rate": 4.967568646280891e-06, + "loss": 0.455, + "num_input_tokens_seen": 35955968, + "step": 11420 + }, + { + "epoch": 0.7313872351321938, + "grad_norm": 29.32733154296875, + "learning_rate": 4.9674788921795205e-06, + "loss": 0.4821, + "num_input_tokens_seen": 35972096, + "step": 11425 + }, + { + "epoch": 0.7317073170731707, + "grad_norm": 29.861387252807617, + "learning_rate": 4.9673890148651055e-06, + "loss": 0.4694, + "num_input_tokens_seen": 35986624, + "step": 11430 + }, + { + "epoch": 0.7320273990141476, + "grad_norm": 12.677417755126953, + "learning_rate": 4.967299014342136e-06, + "loss": 0.4582, + "num_input_tokens_seen": 36003008, + "step": 11435 + }, + { + "epoch": 0.7323474809551245, + "grad_norm": 21.185171127319336, + "learning_rate": 4.967208890615105e-06, + "loss": 0.6141, + "num_input_tokens_seen": 36017152, + "step": 11440 + }, + { + "epoch": 0.7326675628961014, + "grad_norm": 39.798309326171875, + "learning_rate": 4.967118643688513e-06, + "loss": 0.36, + "num_input_tokens_seen": 36031936, + "step": 11445 + }, + { + "epoch": 0.7329876448370783, + "grad_norm": 24.024234771728516, + "learning_rate": 4.967028273566866e-06, + "loss": 0.5462, + "num_input_tokens_seen": 36048064, + "step": 11450 + }, + { + "epoch": 0.7333077267780552, + "grad_norm": 42.628273010253906, + "learning_rate": 4.966937780254679e-06, + "loss": 0.8065, + "num_input_tokens_seen": 36064256, + "step": 11455 + }, + { + "epoch": 0.7336278087190321, + "grad_norm": 28.999921798706055, + "learning_rate": 4.966847163756468e-06, + "loss": 0.8037, + "num_input_tokens_seen": 36079424, + "step": 11460 + }, + { + "epoch": 0.733947890660009, + "grad_norm": 48.92115020751953, + "learning_rate": 4.96675642407676e-06, + "loss": 0.4751, + "num_input_tokens_seen": 36095424, + "step": 11465 + }, + { + "epoch": 0.7342679726009859, + "grad_norm": 15.393390655517578, + "learning_rate": 4.966665561220083e-06, + "loss": 0.4355, + "num_input_tokens_seen": 36111360, + "step": 11470 + }, + { + "epoch": 0.7345880545419627, + "grad_norm": 17.838760375976562, + "learning_rate": 4.966574575190978e-06, + "loss": 0.4282, + "num_input_tokens_seen": 36127232, + "step": 11475 + }, + { + "epoch": 0.7349081364829396, + "grad_norm": 22.445201873779297, + "learning_rate": 4.966483465993987e-06, + "loss": 0.4187, + "num_input_tokens_seen": 36143168, + "step": 11480 + }, + { + "epoch": 0.7352282184239165, + "grad_norm": 25.161373138427734, + "learning_rate": 4.966392233633658e-06, + "loss": 0.3868, + "num_input_tokens_seen": 36158272, + "step": 11485 + }, + { + "epoch": 0.7355483003648934, + "grad_norm": 36.72962188720703, + "learning_rate": 4.966300878114548e-06, + "loss": 0.4495, + "num_input_tokens_seen": 36173056, + "step": 11490 + }, + { + "epoch": 0.7358683823058703, + "grad_norm": 46.15682601928711, + "learning_rate": 4.9662093994412185e-06, + "loss": 0.4979, + "num_input_tokens_seen": 36187904, + "step": 11495 + }, + { + "epoch": 0.7361884642468471, + "grad_norm": 26.51592254638672, + "learning_rate": 4.966117797618238e-06, + "loss": 0.5348, + "num_input_tokens_seen": 36205632, + "step": 11500 + }, + { + "epoch": 0.7365085461878241, + "grad_norm": 42.4519157409668, + "learning_rate": 4.96602607265018e-06, + "loss": 0.4951, + "num_input_tokens_seen": 36220160, + "step": 11505 + }, + { + "epoch": 0.736828628128801, + "grad_norm": 31.256914138793945, + "learning_rate": 4.965934224541626e-06, + "loss": 0.4787, + "num_input_tokens_seen": 36236416, + "step": 11510 + }, + { + "epoch": 0.7371487100697779, + "grad_norm": 44.00822067260742, + "learning_rate": 4.965842253297159e-06, + "loss": 0.5312, + "num_input_tokens_seen": 36251584, + "step": 11515 + }, + { + "epoch": 0.7374687920107548, + "grad_norm": 12.113954544067383, + "learning_rate": 4.965750158921376e-06, + "loss": 0.4703, + "num_input_tokens_seen": 36265856, + "step": 11520 + }, + { + "epoch": 0.7377888739517316, + "grad_norm": 58.94597625732422, + "learning_rate": 4.965657941418873e-06, + "loss": 0.5367, + "num_input_tokens_seen": 36283904, + "step": 11525 + }, + { + "epoch": 0.7381089558927085, + "grad_norm": 51.4050407409668, + "learning_rate": 4.965565600794256e-06, + "loss": 0.5535, + "num_input_tokens_seen": 36298432, + "step": 11530 + }, + { + "epoch": 0.7384290378336854, + "grad_norm": 16.70289421081543, + "learning_rate": 4.965473137052135e-06, + "loss": 0.3427, + "num_input_tokens_seen": 36313728, + "step": 11535 + }, + { + "epoch": 0.7387491197746623, + "grad_norm": 20.741989135742188, + "learning_rate": 4.965380550197129e-06, + "loss": 0.4738, + "num_input_tokens_seen": 36329088, + "step": 11540 + }, + { + "epoch": 0.7390692017156392, + "grad_norm": 24.97895050048828, + "learning_rate": 4.965287840233859e-06, + "loss": 0.52, + "num_input_tokens_seen": 36345024, + "step": 11545 + }, + { + "epoch": 0.739389283656616, + "grad_norm": 19.770586013793945, + "learning_rate": 4.965195007166955e-06, + "loss": 0.4575, + "num_input_tokens_seen": 36361792, + "step": 11550 + }, + { + "epoch": 0.739709365597593, + "grad_norm": 23.694101333618164, + "learning_rate": 4.965102051001054e-06, + "loss": 0.4301, + "num_input_tokens_seen": 36377152, + "step": 11555 + }, + { + "epoch": 0.7400294475385699, + "grad_norm": 26.350595474243164, + "learning_rate": 4.965008971740796e-06, + "loss": 0.509, + "num_input_tokens_seen": 36394048, + "step": 11560 + }, + { + "epoch": 0.7403495294795468, + "grad_norm": 33.475425720214844, + "learning_rate": 4.96491576939083e-06, + "loss": 0.3984, + "num_input_tokens_seen": 36409152, + "step": 11565 + }, + { + "epoch": 0.7406696114205237, + "grad_norm": 60.16374206542969, + "learning_rate": 4.9648224439558094e-06, + "loss": 0.5386, + "num_input_tokens_seen": 36427072, + "step": 11570 + }, + { + "epoch": 0.7409896933615006, + "grad_norm": 19.359477996826172, + "learning_rate": 4.964728995440396e-06, + "loss": 0.5373, + "num_input_tokens_seen": 36444288, + "step": 11575 + }, + { + "epoch": 0.7413097753024774, + "grad_norm": 34.934913635253906, + "learning_rate": 4.964635423849253e-06, + "loss": 0.6004, + "num_input_tokens_seen": 36460096, + "step": 11580 + }, + { + "epoch": 0.7416298572434543, + "grad_norm": 47.33440017700195, + "learning_rate": 4.964541729187056e-06, + "loss": 0.4622, + "num_input_tokens_seen": 36475264, + "step": 11585 + }, + { + "epoch": 0.7419499391844312, + "grad_norm": 34.103397369384766, + "learning_rate": 4.964447911458483e-06, + "loss": 0.5301, + "num_input_tokens_seen": 36491264, + "step": 11590 + }, + { + "epoch": 0.7422700211254081, + "grad_norm": 49.64181137084961, + "learning_rate": 4.964353970668217e-06, + "loss": 0.4956, + "num_input_tokens_seen": 36506368, + "step": 11595 + }, + { + "epoch": 0.742590103066385, + "grad_norm": 32.67100143432617, + "learning_rate": 4.96425990682095e-06, + "loss": 0.488, + "num_input_tokens_seen": 36522432, + "step": 11600 + }, + { + "epoch": 0.7429101850073618, + "grad_norm": 34.25993347167969, + "learning_rate": 4.96416571992138e-06, + "loss": 0.5611, + "num_input_tokens_seen": 36539008, + "step": 11605 + }, + { + "epoch": 0.7432302669483388, + "grad_norm": 25.90297508239746, + "learning_rate": 4.9640714099742086e-06, + "loss": 0.4755, + "num_input_tokens_seen": 36554240, + "step": 11610 + }, + { + "epoch": 0.7435503488893157, + "grad_norm": 23.502065658569336, + "learning_rate": 4.963976976984146e-06, + "loss": 0.3484, + "num_input_tokens_seen": 36569920, + "step": 11615 + }, + { + "epoch": 0.7438704308302926, + "grad_norm": 50.738502502441406, + "learning_rate": 4.963882420955907e-06, + "loss": 0.6534, + "num_input_tokens_seen": 36585792, + "step": 11620 + }, + { + "epoch": 0.7441905127712695, + "grad_norm": 17.16935920715332, + "learning_rate": 4.9637877418942145e-06, + "loss": 0.4732, + "num_input_tokens_seen": 36601280, + "step": 11625 + }, + { + "epoch": 0.7445105947122463, + "grad_norm": 34.3489875793457, + "learning_rate": 4.963692939803796e-06, + "loss": 0.7017, + "num_input_tokens_seen": 36617152, + "step": 11630 + }, + { + "epoch": 0.7448306766532232, + "grad_norm": 33.32526397705078, + "learning_rate": 4.963598014689385e-06, + "loss": 0.553, + "num_input_tokens_seen": 36632512, + "step": 11635 + }, + { + "epoch": 0.7451507585942001, + "grad_norm": 17.250072479248047, + "learning_rate": 4.963502966555721e-06, + "loss": 0.4394, + "num_input_tokens_seen": 36647744, + "step": 11640 + }, + { + "epoch": 0.745470840535177, + "grad_norm": 27.155546188354492, + "learning_rate": 4.96340779540755e-06, + "loss": 0.5441, + "num_input_tokens_seen": 36663360, + "step": 11645 + }, + { + "epoch": 0.7457909224761539, + "grad_norm": 29.67959976196289, + "learning_rate": 4.9633125012496264e-06, + "loss": 0.5831, + "num_input_tokens_seen": 36678656, + "step": 11650 + }, + { + "epoch": 0.7461110044171307, + "grad_norm": 44.406917572021484, + "learning_rate": 4.963217084086707e-06, + "loss": 0.6294, + "num_input_tokens_seen": 36693952, + "step": 11655 + }, + { + "epoch": 0.7464310863581076, + "grad_norm": 26.321413040161133, + "learning_rate": 4.963121543923557e-06, + "loss": 0.4533, + "num_input_tokens_seen": 36709888, + "step": 11660 + }, + { + "epoch": 0.7467511682990846, + "grad_norm": 13.790727615356445, + "learning_rate": 4.963025880764947e-06, + "loss": 0.4112, + "num_input_tokens_seen": 36725504, + "step": 11665 + }, + { + "epoch": 0.7470712502400615, + "grad_norm": 22.544822692871094, + "learning_rate": 4.962930094615654e-06, + "loss": 0.4568, + "num_input_tokens_seen": 36741376, + "step": 11670 + }, + { + "epoch": 0.7473913321810384, + "grad_norm": 33.481605529785156, + "learning_rate": 4.962834185480461e-06, + "loss": 0.5701, + "num_input_tokens_seen": 36756864, + "step": 11675 + }, + { + "epoch": 0.7477114141220152, + "grad_norm": 14.948266983032227, + "learning_rate": 4.962738153364156e-06, + "loss": 0.598, + "num_input_tokens_seen": 36772608, + "step": 11680 + }, + { + "epoch": 0.7480314960629921, + "grad_norm": 36.600624084472656, + "learning_rate": 4.9626419982715366e-06, + "loss": 0.432, + "num_input_tokens_seen": 36788352, + "step": 11685 + }, + { + "epoch": 0.748351578003969, + "grad_norm": 34.2380485534668, + "learning_rate": 4.962545720207404e-06, + "loss": 0.5105, + "num_input_tokens_seen": 36804096, + "step": 11690 + }, + { + "epoch": 0.7486716599449459, + "grad_norm": 37.07654571533203, + "learning_rate": 4.962449319176564e-06, + "loss": 0.4935, + "num_input_tokens_seen": 36819776, + "step": 11695 + }, + { + "epoch": 0.7489917418859228, + "grad_norm": 34.812095642089844, + "learning_rate": 4.962352795183832e-06, + "loss": 0.7523, + "num_input_tokens_seen": 36835072, + "step": 11700 + }, + { + "epoch": 0.7493118238268996, + "grad_norm": 21.609220504760742, + "learning_rate": 4.962256148234027e-06, + "loss": 0.4038, + "num_input_tokens_seen": 36850816, + "step": 11705 + }, + { + "epoch": 0.7496319057678765, + "grad_norm": 28.78482437133789, + "learning_rate": 4.9621593783319754e-06, + "loss": 0.5105, + "num_input_tokens_seen": 36866368, + "step": 11710 + }, + { + "epoch": 0.7499519877088535, + "grad_norm": 38.12595748901367, + "learning_rate": 4.9620624854825094e-06, + "loss": 0.5124, + "num_input_tokens_seen": 36885376, + "step": 11715 + }, + { + "epoch": 0.7501440368734396, + "eval_loss": 0.49851447343826294, + "eval_runtime": 51.0071, + "eval_samples_per_second": 272.237, + "eval_steps_per_second": 34.034, + "num_input_tokens_seen": 36894016, + "step": 11718 + }, + { + "epoch": 0.7502720696498304, + "grad_norm": 16.429420471191406, + "learning_rate": 4.9619654696904675e-06, + "loss": 0.4566, + "num_input_tokens_seen": 36900288, + "step": 11720 + }, + { + "epoch": 0.7505921515908073, + "grad_norm": 17.386417388916016, + "learning_rate": 4.961868330960693e-06, + "loss": 0.5913, + "num_input_tokens_seen": 36916224, + "step": 11725 + }, + { + "epoch": 0.7509122335317842, + "grad_norm": 17.658143997192383, + "learning_rate": 4.961771069298038e-06, + "loss": 0.4233, + "num_input_tokens_seen": 36931648, + "step": 11730 + }, + { + "epoch": 0.751232315472761, + "grad_norm": 68.43558502197266, + "learning_rate": 4.961673684707359e-06, + "loss": 0.5752, + "num_input_tokens_seen": 36945856, + "step": 11735 + }, + { + "epoch": 0.7515523974137379, + "grad_norm": 22.55080223083496, + "learning_rate": 4.961576177193519e-06, + "loss": 0.5424, + "num_input_tokens_seen": 36961792, + "step": 11740 + }, + { + "epoch": 0.7518724793547148, + "grad_norm": 30.84984588623047, + "learning_rate": 4.961478546761386e-06, + "loss": 0.4873, + "num_input_tokens_seen": 36977152, + "step": 11745 + }, + { + "epoch": 0.7521925612956917, + "grad_norm": 20.749801635742188, + "learning_rate": 4.961380793415835e-06, + "loss": 0.5303, + "num_input_tokens_seen": 36991936, + "step": 11750 + }, + { + "epoch": 0.7525126432366686, + "grad_norm": 41.16276550292969, + "learning_rate": 4.9612829171617494e-06, + "loss": 0.605, + "num_input_tokens_seen": 37006784, + "step": 11755 + }, + { + "epoch": 0.7528327251776454, + "grad_norm": 32.938682556152344, + "learning_rate": 4.961184918004015e-06, + "loss": 0.4821, + "num_input_tokens_seen": 37022208, + "step": 11760 + }, + { + "epoch": 0.7531528071186223, + "grad_norm": 8.230010986328125, + "learning_rate": 4.961086795947525e-06, + "loss": 0.4572, + "num_input_tokens_seen": 37039488, + "step": 11765 + }, + { + "epoch": 0.7534728890595993, + "grad_norm": 32.88573455810547, + "learning_rate": 4.96098855099718e-06, + "loss": 0.5268, + "num_input_tokens_seen": 37056064, + "step": 11770 + }, + { + "epoch": 0.7537929710005762, + "grad_norm": 38.25775146484375, + "learning_rate": 4.960890183157886e-06, + "loss": 0.3497, + "num_input_tokens_seen": 37072256, + "step": 11775 + }, + { + "epoch": 0.7541130529415531, + "grad_norm": 40.565738677978516, + "learning_rate": 4.960791692434554e-06, + "loss": 0.6945, + "num_input_tokens_seen": 37087360, + "step": 11780 + }, + { + "epoch": 0.7544331348825299, + "grad_norm": 26.036649703979492, + "learning_rate": 4.960693078832103e-06, + "loss": 0.4554, + "num_input_tokens_seen": 37102144, + "step": 11785 + }, + { + "epoch": 0.7547532168235068, + "grad_norm": 43.89448928833008, + "learning_rate": 4.960594342355457e-06, + "loss": 0.4506, + "num_input_tokens_seen": 37117568, + "step": 11790 + }, + { + "epoch": 0.7550732987644837, + "grad_norm": 19.971271514892578, + "learning_rate": 4.960495483009546e-06, + "loss": 0.5544, + "num_input_tokens_seen": 37132800, + "step": 11795 + }, + { + "epoch": 0.7553933807054606, + "grad_norm": 28.018898010253906, + "learning_rate": 4.960396500799307e-06, + "loss": 0.7038, + "num_input_tokens_seen": 37147776, + "step": 11800 + }, + { + "epoch": 0.7557134626464375, + "grad_norm": 21.42015266418457, + "learning_rate": 4.960297395729683e-06, + "loss": 0.4506, + "num_input_tokens_seen": 37163904, + "step": 11805 + }, + { + "epoch": 0.7560335445874143, + "grad_norm": 36.440956115722656, + "learning_rate": 4.960198167805621e-06, + "loss": 0.5447, + "num_input_tokens_seen": 37180416, + "step": 11810 + }, + { + "epoch": 0.7563536265283912, + "grad_norm": 23.622392654418945, + "learning_rate": 4.960098817032078e-06, + "loss": 0.4104, + "num_input_tokens_seen": 37196480, + "step": 11815 + }, + { + "epoch": 0.7566737084693682, + "grad_norm": 47.93793487548828, + "learning_rate": 4.959999343414015e-06, + "loss": 0.5251, + "num_input_tokens_seen": 37211648, + "step": 11820 + }, + { + "epoch": 0.7569937904103451, + "grad_norm": 21.53437042236328, + "learning_rate": 4.959899746956397e-06, + "loss": 0.4863, + "num_input_tokens_seen": 37227072, + "step": 11825 + }, + { + "epoch": 0.757313872351322, + "grad_norm": 20.890825271606445, + "learning_rate": 4.9598000276642e-06, + "loss": 0.4256, + "num_input_tokens_seen": 37243968, + "step": 11830 + }, + { + "epoch": 0.7576339542922989, + "grad_norm": 20.32857322692871, + "learning_rate": 4.959700185542401e-06, + "loss": 0.5758, + "num_input_tokens_seen": 37259200, + "step": 11835 + }, + { + "epoch": 0.7579540362332757, + "grad_norm": 18.578102111816406, + "learning_rate": 4.959600220595988e-06, + "loss": 0.3839, + "num_input_tokens_seen": 37275072, + "step": 11840 + }, + { + "epoch": 0.7582741181742526, + "grad_norm": 42.450531005859375, + "learning_rate": 4.959500132829951e-06, + "loss": 0.4547, + "num_input_tokens_seen": 37291392, + "step": 11845 + }, + { + "epoch": 0.7585942001152295, + "grad_norm": 18.2155818939209, + "learning_rate": 4.959399922249289e-06, + "loss": 0.4876, + "num_input_tokens_seen": 37307648, + "step": 11850 + }, + { + "epoch": 0.7589142820562064, + "grad_norm": 18.84035301208496, + "learning_rate": 4.959299588859005e-06, + "loss": 0.3464, + "num_input_tokens_seen": 37323136, + "step": 11855 + }, + { + "epoch": 0.7592343639971832, + "grad_norm": 22.5133056640625, + "learning_rate": 4.959199132664109e-06, + "loss": 0.4314, + "num_input_tokens_seen": 37338496, + "step": 11860 + }, + { + "epoch": 0.7595544459381601, + "grad_norm": 19.123458862304688, + "learning_rate": 4.959098553669619e-06, + "loss": 0.445, + "num_input_tokens_seen": 37356800, + "step": 11865 + }, + { + "epoch": 0.759874527879137, + "grad_norm": 32.58523941040039, + "learning_rate": 4.958997851880555e-06, + "loss": 0.4906, + "num_input_tokens_seen": 37372800, + "step": 11870 + }, + { + "epoch": 0.760194609820114, + "grad_norm": 30.94862937927246, + "learning_rate": 4.958897027301947e-06, + "loss": 0.4387, + "num_input_tokens_seen": 37388608, + "step": 11875 + }, + { + "epoch": 0.7605146917610909, + "grad_norm": 21.960783004760742, + "learning_rate": 4.95879607993883e-06, + "loss": 0.5326, + "num_input_tokens_seen": 37405184, + "step": 11880 + }, + { + "epoch": 0.7608347737020678, + "grad_norm": 24.52437973022461, + "learning_rate": 4.958695009796244e-06, + "loss": 0.3786, + "num_input_tokens_seen": 37420096, + "step": 11885 + }, + { + "epoch": 0.7611548556430446, + "grad_norm": 81.4113998413086, + "learning_rate": 4.958593816879236e-06, + "loss": 0.6187, + "num_input_tokens_seen": 37434368, + "step": 11890 + }, + { + "epoch": 0.7614749375840215, + "grad_norm": 18.345722198486328, + "learning_rate": 4.958492501192859e-06, + "loss": 0.4273, + "num_input_tokens_seen": 37449344, + "step": 11895 + }, + { + "epoch": 0.7617950195249984, + "grad_norm": 19.46068572998047, + "learning_rate": 4.958391062742173e-06, + "loss": 0.3901, + "num_input_tokens_seen": 37464448, + "step": 11900 + }, + { + "epoch": 0.7621151014659753, + "grad_norm": 28.388235092163086, + "learning_rate": 4.958289501532242e-06, + "loss": 0.4547, + "num_input_tokens_seen": 37481856, + "step": 11905 + }, + { + "epoch": 0.7624351834069522, + "grad_norm": 20.216365814208984, + "learning_rate": 4.958187817568138e-06, + "loss": 0.463, + "num_input_tokens_seen": 37497856, + "step": 11910 + }, + { + "epoch": 0.762755265347929, + "grad_norm": 36.701255798339844, + "learning_rate": 4.958086010854938e-06, + "loss": 0.5234, + "num_input_tokens_seen": 37513600, + "step": 11915 + }, + { + "epoch": 0.7630753472889059, + "grad_norm": 39.579734802246094, + "learning_rate": 4.957984081397728e-06, + "loss": 0.5496, + "num_input_tokens_seen": 37529408, + "step": 11920 + }, + { + "epoch": 0.7633954292298829, + "grad_norm": 25.52025604248047, + "learning_rate": 4.957882029201595e-06, + "loss": 0.4377, + "num_input_tokens_seen": 37545984, + "step": 11925 + }, + { + "epoch": 0.7637155111708598, + "grad_norm": 14.069209098815918, + "learning_rate": 4.957779854271636e-06, + "loss": 0.5404, + "num_input_tokens_seen": 37561856, + "step": 11930 + }, + { + "epoch": 0.7640355931118367, + "grad_norm": 29.69396209716797, + "learning_rate": 4.957677556612953e-06, + "loss": 0.4171, + "num_input_tokens_seen": 37578624, + "step": 11935 + }, + { + "epoch": 0.7643556750528135, + "grad_norm": 40.6478157043457, + "learning_rate": 4.957575136230655e-06, + "loss": 0.6082, + "num_input_tokens_seen": 37593920, + "step": 11940 + }, + { + "epoch": 0.7646757569937904, + "grad_norm": 39.59920883178711, + "learning_rate": 4.9574725931298565e-06, + "loss": 0.8119, + "num_input_tokens_seen": 37609664, + "step": 11945 + }, + { + "epoch": 0.7649958389347673, + "grad_norm": 33.5262336730957, + "learning_rate": 4.957369927315676e-06, + "loss": 0.6045, + "num_input_tokens_seen": 37626048, + "step": 11950 + }, + { + "epoch": 0.7653159208757442, + "grad_norm": 23.755706787109375, + "learning_rate": 4.957267138793241e-06, + "loss": 0.6714, + "num_input_tokens_seen": 37641792, + "step": 11955 + }, + { + "epoch": 0.7656360028167211, + "grad_norm": 17.03995132446289, + "learning_rate": 4.957164227567685e-06, + "loss": 0.5358, + "num_input_tokens_seen": 37656512, + "step": 11960 + }, + { + "epoch": 0.7659560847576979, + "grad_norm": 31.2430477142334, + "learning_rate": 4.957061193644147e-06, + "loss": 0.4715, + "num_input_tokens_seen": 37672000, + "step": 11965 + }, + { + "epoch": 0.7662761666986748, + "grad_norm": 27.047229766845703, + "learning_rate": 4.95695803702777e-06, + "loss": 0.4264, + "num_input_tokens_seen": 37687232, + "step": 11970 + }, + { + "epoch": 0.7665962486396517, + "grad_norm": 31.531978607177734, + "learning_rate": 4.956854757723708e-06, + "loss": 0.4438, + "num_input_tokens_seen": 37702656, + "step": 11975 + }, + { + "epoch": 0.7669163305806287, + "grad_norm": 31.11536979675293, + "learning_rate": 4.956751355737116e-06, + "loss": 0.5411, + "num_input_tokens_seen": 37718080, + "step": 11980 + }, + { + "epoch": 0.7672364125216056, + "grad_norm": 23.407224655151367, + "learning_rate": 4.956647831073158e-06, + "loss": 0.3796, + "num_input_tokens_seen": 37735680, + "step": 11985 + }, + { + "epoch": 0.7675564944625825, + "grad_norm": 72.07378387451172, + "learning_rate": 4.956544183737003e-06, + "loss": 0.4634, + "num_input_tokens_seen": 37749952, + "step": 11990 + }, + { + "epoch": 0.7678765764035593, + "grad_norm": 43.014652252197266, + "learning_rate": 4.956440413733828e-06, + "loss": 0.7163, + "num_input_tokens_seen": 37765312, + "step": 11995 + }, + { + "epoch": 0.7681966583445362, + "grad_norm": 36.80216598510742, + "learning_rate": 4.956336521068814e-06, + "loss": 0.5274, + "num_input_tokens_seen": 37780160, + "step": 12000 + }, + { + "epoch": 0.7685167402855131, + "grad_norm": 21.061689376831055, + "learning_rate": 4.956232505747148e-06, + "loss": 0.3818, + "num_input_tokens_seen": 37796352, + "step": 12005 + }, + { + "epoch": 0.76883682222649, + "grad_norm": 50.6421012878418, + "learning_rate": 4.9561283677740254e-06, + "loss": 0.4416, + "num_input_tokens_seen": 37812864, + "step": 12010 + }, + { + "epoch": 0.7691569041674668, + "grad_norm": 23.72433853149414, + "learning_rate": 4.956024107154645e-06, + "loss": 0.5501, + "num_input_tokens_seen": 37827840, + "step": 12015 + }, + { + "epoch": 0.7694769861084437, + "grad_norm": 30.328454971313477, + "learning_rate": 4.9559197238942135e-06, + "loss": 0.5076, + "num_input_tokens_seen": 37847040, + "step": 12020 + }, + { + "epoch": 0.7697970680494206, + "grad_norm": 51.722652435302734, + "learning_rate": 4.955815217997944e-06, + "loss": 0.4629, + "num_input_tokens_seen": 37862464, + "step": 12025 + }, + { + "epoch": 0.7701171499903975, + "grad_norm": 30.70990562438965, + "learning_rate": 4.9557105894710545e-06, + "loss": 0.5349, + "num_input_tokens_seen": 37877760, + "step": 12030 + }, + { + "epoch": 0.7704372319313745, + "grad_norm": 17.455947875976562, + "learning_rate": 4.955605838318769e-06, + "loss": 0.4056, + "num_input_tokens_seen": 37892800, + "step": 12035 + }, + { + "epoch": 0.7707573138723514, + "grad_norm": 34.319034576416016, + "learning_rate": 4.955500964546319e-06, + "loss": 0.7183, + "num_input_tokens_seen": 37909056, + "step": 12040 + }, + { + "epoch": 0.7710773958133282, + "grad_norm": 60.929508209228516, + "learning_rate": 4.9553959681589404e-06, + "loss": 0.4416, + "num_input_tokens_seen": 37925376, + "step": 12045 + }, + { + "epoch": 0.7713974777543051, + "grad_norm": 21.978933334350586, + "learning_rate": 4.955290849161878e-06, + "loss": 0.494, + "num_input_tokens_seen": 37941632, + "step": 12050 + }, + { + "epoch": 0.771717559695282, + "grad_norm": 23.66015625, + "learning_rate": 4.955185607560379e-06, + "loss": 0.4919, + "num_input_tokens_seen": 37957056, + "step": 12055 + }, + { + "epoch": 0.7720376416362589, + "grad_norm": 37.81621170043945, + "learning_rate": 4.955080243359699e-06, + "loss": 0.5699, + "num_input_tokens_seen": 37973184, + "step": 12060 + }, + { + "epoch": 0.7723577235772358, + "grad_norm": 23.1015682220459, + "learning_rate": 4.954974756565101e-06, + "loss": 0.4621, + "num_input_tokens_seen": 37988288, + "step": 12065 + }, + { + "epoch": 0.7726778055182126, + "grad_norm": 44.676719665527344, + "learning_rate": 4.95486914718185e-06, + "loss": 0.5975, + "num_input_tokens_seen": 38004224, + "step": 12070 + }, + { + "epoch": 0.7729978874591895, + "grad_norm": 33.76252365112305, + "learning_rate": 4.954763415215221e-06, + "loss": 0.6549, + "num_input_tokens_seen": 38019456, + "step": 12075 + }, + { + "epoch": 0.7733179694001664, + "grad_norm": 26.371417999267578, + "learning_rate": 4.954657560670494e-06, + "loss": 0.584, + "num_input_tokens_seen": 38035264, + "step": 12080 + }, + { + "epoch": 0.7736380513411434, + "grad_norm": 43.929054260253906, + "learning_rate": 4.9545515835529535e-06, + "loss": 0.522, + "num_input_tokens_seen": 38051072, + "step": 12085 + }, + { + "epoch": 0.7739581332821203, + "grad_norm": 31.342266082763672, + "learning_rate": 4.954445483867892e-06, + "loss": 0.5556, + "num_input_tokens_seen": 38066688, + "step": 12090 + }, + { + "epoch": 0.7742782152230971, + "grad_norm": 24.561965942382812, + "learning_rate": 4.9543392616206085e-06, + "loss": 0.4577, + "num_input_tokens_seen": 38082048, + "step": 12095 + }, + { + "epoch": 0.774598297164074, + "grad_norm": 27.430803298950195, + "learning_rate": 4.954232916816406e-06, + "loss": 0.4157, + "num_input_tokens_seen": 38097344, + "step": 12100 + }, + { + "epoch": 0.7749183791050509, + "grad_norm": 33.13986587524414, + "learning_rate": 4.954126449460596e-06, + "loss": 0.6133, + "num_input_tokens_seen": 38112320, + "step": 12105 + }, + { + "epoch": 0.7752384610460278, + "grad_norm": 23.77056884765625, + "learning_rate": 4.954019859558493e-06, + "loss": 0.4779, + "num_input_tokens_seen": 38128000, + "step": 12110 + }, + { + "epoch": 0.7755585429870047, + "grad_norm": 29.16203498840332, + "learning_rate": 4.953913147115422e-06, + "loss": 0.4682, + "num_input_tokens_seen": 38143168, + "step": 12115 + }, + { + "epoch": 0.7758786249279815, + "grad_norm": 17.931978225708008, + "learning_rate": 4.953806312136709e-06, + "loss": 0.3558, + "num_input_tokens_seen": 38157824, + "step": 12120 + }, + { + "epoch": 0.7761987068689584, + "grad_norm": 35.75004959106445, + "learning_rate": 4.953699354627692e-06, + "loss": 0.4056, + "num_input_tokens_seen": 38172864, + "step": 12125 + }, + { + "epoch": 0.7765187888099353, + "grad_norm": 35.30179214477539, + "learning_rate": 4.953592274593709e-06, + "loss": 0.6804, + "num_input_tokens_seen": 38187264, + "step": 12130 + }, + { + "epoch": 0.7768388707509122, + "grad_norm": 18.301549911499023, + "learning_rate": 4.953485072040108e-06, + "loss": 0.4035, + "num_input_tokens_seen": 38204288, + "step": 12135 + }, + { + "epoch": 0.7771589526918892, + "grad_norm": 29.0173282623291, + "learning_rate": 4.9533777469722425e-06, + "loss": 0.3822, + "num_input_tokens_seen": 38219264, + "step": 12140 + }, + { + "epoch": 0.777479034632866, + "grad_norm": 27.2005672454834, + "learning_rate": 4.953270299395472e-06, + "loss": 0.6172, + "num_input_tokens_seen": 38234496, + "step": 12145 + }, + { + "epoch": 0.7777991165738429, + "grad_norm": 27.898971557617188, + "learning_rate": 4.953162729315161e-06, + "loss": 0.3488, + "num_input_tokens_seen": 38251072, + "step": 12150 + }, + { + "epoch": 0.7781191985148198, + "grad_norm": 21.636789321899414, + "learning_rate": 4.953055036736681e-06, + "loss": 0.5489, + "num_input_tokens_seen": 38266560, + "step": 12155 + }, + { + "epoch": 0.7784392804557967, + "grad_norm": 41.12586975097656, + "learning_rate": 4.952947221665409e-06, + "loss": 0.4762, + "num_input_tokens_seen": 38282944, + "step": 12160 + }, + { + "epoch": 0.7787593623967736, + "grad_norm": 24.706520080566406, + "learning_rate": 4.952839284106731e-06, + "loss": 0.4529, + "num_input_tokens_seen": 38299264, + "step": 12165 + }, + { + "epoch": 0.7790794443377504, + "grad_norm": 40.58860778808594, + "learning_rate": 4.952731224066036e-06, + "loss": 0.5542, + "num_input_tokens_seen": 38314368, + "step": 12170 + }, + { + "epoch": 0.7793995262787273, + "grad_norm": 23.51359748840332, + "learning_rate": 4.952623041548719e-06, + "loss": 0.5084, + "num_input_tokens_seen": 38329664, + "step": 12175 + }, + { + "epoch": 0.7797196082197042, + "grad_norm": 18.299924850463867, + "learning_rate": 4.952514736560182e-06, + "loss": 0.4042, + "num_input_tokens_seen": 38345408, + "step": 12180 + }, + { + "epoch": 0.7800396901606811, + "grad_norm": 29.535202026367188, + "learning_rate": 4.9524063091058335e-06, + "loss": 0.4182, + "num_input_tokens_seen": 38362688, + "step": 12185 + }, + { + "epoch": 0.7803597721016581, + "grad_norm": 15.210477828979492, + "learning_rate": 4.952297759191089e-06, + "loss": 0.3383, + "num_input_tokens_seen": 38377600, + "step": 12190 + }, + { + "epoch": 0.780679854042635, + "grad_norm": 36.80657196044922, + "learning_rate": 4.952189086821367e-06, + "loss": 0.5621, + "num_input_tokens_seen": 38392640, + "step": 12195 + }, + { + "epoch": 0.7809999359836118, + "grad_norm": 43.96888732910156, + "learning_rate": 4.952080292002096e-06, + "loss": 0.6388, + "num_input_tokens_seen": 38408448, + "step": 12200 + }, + { + "epoch": 0.7813200179245887, + "grad_norm": 19.570650100708008, + "learning_rate": 4.9519713747387075e-06, + "loss": 0.5865, + "num_input_tokens_seen": 38424512, + "step": 12205 + }, + { + "epoch": 0.7816400998655656, + "grad_norm": 33.0003776550293, + "learning_rate": 4.95186233503664e-06, + "loss": 0.6003, + "num_input_tokens_seen": 38439296, + "step": 12210 + }, + { + "epoch": 0.7819601818065425, + "grad_norm": 13.441228866577148, + "learning_rate": 4.951753172901339e-06, + "loss": 0.3559, + "num_input_tokens_seen": 38455424, + "step": 12215 + }, + { + "epoch": 0.7822802637475194, + "grad_norm": 22.939117431640625, + "learning_rate": 4.951643888338256e-06, + "loss": 0.5281, + "num_input_tokens_seen": 38470976, + "step": 12220 + }, + { + "epoch": 0.7826003456884962, + "grad_norm": 29.38359260559082, + "learning_rate": 4.951534481352845e-06, + "loss": 0.663, + "num_input_tokens_seen": 38486016, + "step": 12225 + }, + { + "epoch": 0.7829204276294731, + "grad_norm": 17.259965896606445, + "learning_rate": 4.951424951950574e-06, + "loss": 0.4021, + "num_input_tokens_seen": 38501248, + "step": 12230 + }, + { + "epoch": 0.78324050957045, + "grad_norm": 17.153247833251953, + "learning_rate": 4.951315300136909e-06, + "loss": 0.4555, + "num_input_tokens_seen": 38516992, + "step": 12235 + }, + { + "epoch": 0.7835605915114269, + "grad_norm": 17.752426147460938, + "learning_rate": 4.951205525917326e-06, + "loss": 0.4991, + "num_input_tokens_seen": 38531456, + "step": 12240 + }, + { + "epoch": 0.7838806734524039, + "grad_norm": 32.788238525390625, + "learning_rate": 4.951095629297308e-06, + "loss": 0.4745, + "num_input_tokens_seen": 38546880, + "step": 12245 + }, + { + "epoch": 0.7842007553933807, + "grad_norm": 31.27967643737793, + "learning_rate": 4.9509856102823404e-06, + "loss": 0.4795, + "num_input_tokens_seen": 38562176, + "step": 12250 + }, + { + "epoch": 0.7845208373343576, + "grad_norm": 31.50762176513672, + "learning_rate": 4.950875468877918e-06, + "loss": 0.53, + "num_input_tokens_seen": 38577472, + "step": 12255 + }, + { + "epoch": 0.7848409192753345, + "grad_norm": 17.525625228881836, + "learning_rate": 4.9507652050895415e-06, + "loss": 0.4954, + "num_input_tokens_seen": 38593088, + "step": 12260 + }, + { + "epoch": 0.7851610012163114, + "grad_norm": 31.49003791809082, + "learning_rate": 4.950654818922716e-06, + "loss": 0.6472, + "num_input_tokens_seen": 38607936, + "step": 12265 + }, + { + "epoch": 0.7854810831572883, + "grad_norm": 25.40071678161621, + "learning_rate": 4.950544310382954e-06, + "loss": 0.3883, + "num_input_tokens_seen": 38625024, + "step": 12270 + }, + { + "epoch": 0.7858011650982651, + "grad_norm": 34.3929328918457, + "learning_rate": 4.950433679475774e-06, + "loss": 0.4866, + "num_input_tokens_seen": 38641792, + "step": 12275 + }, + { + "epoch": 0.786121247039242, + "grad_norm": 34.23332214355469, + "learning_rate": 4.950322926206699e-06, + "loss": 0.5882, + "num_input_tokens_seen": 38656896, + "step": 12280 + }, + { + "epoch": 0.7864413289802189, + "grad_norm": 41.69090270996094, + "learning_rate": 4.950212050581261e-06, + "loss": 0.4555, + "num_input_tokens_seen": 38672448, + "step": 12285 + }, + { + "epoch": 0.7867614109211958, + "grad_norm": 56.060447692871094, + "learning_rate": 4.950101052604995e-06, + "loss": 0.4815, + "num_input_tokens_seen": 38688576, + "step": 12290 + }, + { + "epoch": 0.7870814928621728, + "grad_norm": 25.314077377319336, + "learning_rate": 4.9499899322834455e-06, + "loss": 0.6147, + "num_input_tokens_seen": 38704064, + "step": 12295 + }, + { + "epoch": 0.7874015748031497, + "grad_norm": 24.315431594848633, + "learning_rate": 4.949878689622161e-06, + "loss": 0.4661, + "num_input_tokens_seen": 38718976, + "step": 12300 + }, + { + "epoch": 0.7877216567441265, + "grad_norm": 25.249897003173828, + "learning_rate": 4.949767324626694e-06, + "loss": 0.5098, + "num_input_tokens_seen": 38734272, + "step": 12305 + }, + { + "epoch": 0.7880417386851034, + "grad_norm": 12.71366024017334, + "learning_rate": 4.949655837302608e-06, + "loss": 0.4012, + "num_input_tokens_seen": 38749504, + "step": 12310 + }, + { + "epoch": 0.7883618206260803, + "grad_norm": 48.49773406982422, + "learning_rate": 4.949544227655469e-06, + "loss": 0.6175, + "num_input_tokens_seen": 38764352, + "step": 12315 + }, + { + "epoch": 0.7886819025670572, + "grad_norm": 20.69541358947754, + "learning_rate": 4.9494324956908505e-06, + "loss": 0.4962, + "num_input_tokens_seen": 38780096, + "step": 12320 + }, + { + "epoch": 0.789001984508034, + "grad_norm": 26.589277267456055, + "learning_rate": 4.949320641414332e-06, + "loss": 0.4824, + "num_input_tokens_seen": 38795712, + "step": 12325 + }, + { + "epoch": 0.7893220664490109, + "grad_norm": 30.000301361083984, + "learning_rate": 4.9492086648314984e-06, + "loss": 0.4731, + "num_input_tokens_seen": 38811328, + "step": 12330 + }, + { + "epoch": 0.7896421483899878, + "grad_norm": 21.9301815032959, + "learning_rate": 4.949096565947942e-06, + "loss": 0.4184, + "num_input_tokens_seen": 38826240, + "step": 12335 + }, + { + "epoch": 0.7899622303309647, + "grad_norm": 32.35263442993164, + "learning_rate": 4.94898434476926e-06, + "loss": 0.4814, + "num_input_tokens_seen": 38842624, + "step": 12340 + }, + { + "epoch": 0.7902823122719416, + "grad_norm": 30.180362701416016, + "learning_rate": 4.9488720013010556e-06, + "loss": 0.394, + "num_input_tokens_seen": 38857472, + "step": 12345 + }, + { + "epoch": 0.7906023942129186, + "grad_norm": 27.999338150024414, + "learning_rate": 4.948759535548939e-06, + "loss": 0.586, + "num_input_tokens_seen": 38872256, + "step": 12350 + }, + { + "epoch": 0.7909224761538954, + "grad_norm": 41.136783599853516, + "learning_rate": 4.948646947518527e-06, + "loss": 0.5738, + "num_input_tokens_seen": 38890048, + "step": 12355 + }, + { + "epoch": 0.7912425580948723, + "grad_norm": 32.7123908996582, + "learning_rate": 4.948534237215441e-06, + "loss": 0.3512, + "num_input_tokens_seen": 38905664, + "step": 12360 + }, + { + "epoch": 0.7915626400358492, + "grad_norm": 17.122509002685547, + "learning_rate": 4.948421404645308e-06, + "loss": 0.4868, + "num_input_tokens_seen": 38926144, + "step": 12365 + }, + { + "epoch": 0.7918827219768261, + "grad_norm": 32.28532791137695, + "learning_rate": 4.948308449813764e-06, + "loss": 0.62, + "num_input_tokens_seen": 38942720, + "step": 12370 + }, + { + "epoch": 0.792202803917803, + "grad_norm": 13.206892967224121, + "learning_rate": 4.948195372726449e-06, + "loss": 0.4897, + "num_input_tokens_seen": 38958144, + "step": 12375 + }, + { + "epoch": 0.7925228858587798, + "grad_norm": 20.65040397644043, + "learning_rate": 4.948082173389011e-06, + "loss": 0.3435, + "num_input_tokens_seen": 38973376, + "step": 12380 + }, + { + "epoch": 0.7928429677997567, + "grad_norm": 52.96775436401367, + "learning_rate": 4.947968851807098e-06, + "loss": 0.4778, + "num_input_tokens_seen": 38989504, + "step": 12385 + }, + { + "epoch": 0.7931630497407336, + "grad_norm": 28.429698944091797, + "learning_rate": 4.947855407986373e-06, + "loss": 0.5404, + "num_input_tokens_seen": 39005056, + "step": 12390 + }, + { + "epoch": 0.7934831316817105, + "grad_norm": 37.90256881713867, + "learning_rate": 4.947741841932499e-06, + "loss": 0.5248, + "num_input_tokens_seen": 39020608, + "step": 12395 + }, + { + "epoch": 0.7938032136226874, + "grad_norm": 28.458553314208984, + "learning_rate": 4.947628153651147e-06, + "loss": 0.6509, + "num_input_tokens_seen": 39035712, + "step": 12400 + }, + { + "epoch": 0.7941232955636643, + "grad_norm": 54.94640350341797, + "learning_rate": 4.947514343147995e-06, + "loss": 0.4714, + "num_input_tokens_seen": 39050880, + "step": 12405 + }, + { + "epoch": 0.7944433775046412, + "grad_norm": 36.928043365478516, + "learning_rate": 4.947400410428725e-06, + "loss": 0.5796, + "num_input_tokens_seen": 39065792, + "step": 12410 + }, + { + "epoch": 0.7947634594456181, + "grad_norm": 53.746707916259766, + "learning_rate": 4.947286355499026e-06, + "loss": 0.4938, + "num_input_tokens_seen": 39080960, + "step": 12415 + }, + { + "epoch": 0.795083541386595, + "grad_norm": 21.46417999267578, + "learning_rate": 4.947172178364595e-06, + "loss": 0.3604, + "num_input_tokens_seen": 39097216, + "step": 12420 + }, + { + "epoch": 0.7954036233275719, + "grad_norm": 42.78304672241211, + "learning_rate": 4.947057879031131e-06, + "loss": 0.321, + "num_input_tokens_seen": 39113152, + "step": 12425 + }, + { + "epoch": 0.7957237052685487, + "grad_norm": 40.769901275634766, + "learning_rate": 4.946943457504343e-06, + "loss": 0.618, + "num_input_tokens_seen": 39130176, + "step": 12430 + }, + { + "epoch": 0.7960437872095256, + "grad_norm": 20.88823699951172, + "learning_rate": 4.946828913789945e-06, + "loss": 0.3819, + "num_input_tokens_seen": 39145792, + "step": 12435 + }, + { + "epoch": 0.7963638691505025, + "grad_norm": 34.74875259399414, + "learning_rate": 4.9467142478936555e-06, + "loss": 0.593, + "num_input_tokens_seen": 39161280, + "step": 12440 + }, + { + "epoch": 0.7966839510914794, + "grad_norm": 21.593141555786133, + "learning_rate": 4.946599459821202e-06, + "loss": 0.3727, + "num_input_tokens_seen": 39176512, + "step": 12445 + }, + { + "epoch": 0.7970040330324563, + "grad_norm": 35.31416702270508, + "learning_rate": 4.946484549578314e-06, + "loss": 0.5835, + "num_input_tokens_seen": 39191808, + "step": 12450 + }, + { + "epoch": 0.7973241149734333, + "grad_norm": 17.56813621520996, + "learning_rate": 4.946369517170731e-06, + "loss": 0.3811, + "num_input_tokens_seen": 39208640, + "step": 12455 + }, + { + "epoch": 0.7976441969144101, + "grad_norm": 24.09280776977539, + "learning_rate": 4.946254362604198e-06, + "loss": 0.4684, + "num_input_tokens_seen": 39223232, + "step": 12460 + }, + { + "epoch": 0.797964278855387, + "grad_norm": 25.416366577148438, + "learning_rate": 4.9461390858844635e-06, + "loss": 0.4334, + "num_input_tokens_seen": 39238656, + "step": 12465 + }, + { + "epoch": 0.7982843607963639, + "grad_norm": 48.90129089355469, + "learning_rate": 4.946023687017285e-06, + "loss": 0.553, + "num_input_tokens_seen": 39255872, + "step": 12470 + }, + { + "epoch": 0.7986044427373408, + "grad_norm": 36.01192855834961, + "learning_rate": 4.945908166008424e-06, + "loss": 0.4521, + "num_input_tokens_seen": 39270784, + "step": 12475 + }, + { + "epoch": 0.7989245246783176, + "grad_norm": 25.20235252380371, + "learning_rate": 4.945792522863649e-06, + "loss": 0.4391, + "num_input_tokens_seen": 39286080, + "step": 12480 + }, + { + "epoch": 0.7992446066192945, + "grad_norm": 14.561528205871582, + "learning_rate": 4.945676757588736e-06, + "loss": 0.5249, + "num_input_tokens_seen": 39301312, + "step": 12485 + }, + { + "epoch": 0.7995646885602714, + "grad_norm": 19.597991943359375, + "learning_rate": 4.945560870189465e-06, + "loss": 0.4352, + "num_input_tokens_seen": 39316736, + "step": 12490 + }, + { + "epoch": 0.7998847705012483, + "grad_norm": 51.69601821899414, + "learning_rate": 4.945444860671622e-06, + "loss": 0.4878, + "num_input_tokens_seen": 39331264, + "step": 12495 + }, + { + "epoch": 0.8002048524422252, + "grad_norm": 50.69813537597656, + "learning_rate": 4.945328729041e-06, + "loss": 0.4433, + "num_input_tokens_seen": 39345856, + "step": 12500 + }, + { + "epoch": 0.800524934383202, + "grad_norm": 21.11165428161621, + "learning_rate": 4.945212475303399e-06, + "loss": 0.5399, + "num_input_tokens_seen": 39361280, + "step": 12505 + }, + { + "epoch": 0.800845016324179, + "grad_norm": 22.143356323242188, + "learning_rate": 4.9450960994646245e-06, + "loss": 0.4515, + "num_input_tokens_seen": 39375872, + "step": 12510 + }, + { + "epoch": 0.8011650982651559, + "grad_norm": 35.790130615234375, + "learning_rate": 4.944979601530486e-06, + "loss": 0.3918, + "num_input_tokens_seen": 39392320, + "step": 12515 + }, + { + "epoch": 0.8014851802061328, + "grad_norm": 19.947914123535156, + "learning_rate": 4.944862981506802e-06, + "loss": 0.4591, + "num_input_tokens_seen": 39407680, + "step": 12520 + }, + { + "epoch": 0.8018052621471097, + "grad_norm": 14.365900993347168, + "learning_rate": 4.944746239399395e-06, + "loss": 0.4409, + "num_input_tokens_seen": 39424512, + "step": 12525 + }, + { + "epoch": 0.8021253440880866, + "grad_norm": 46.890689849853516, + "learning_rate": 4.944629375214095e-06, + "loss": 0.505, + "num_input_tokens_seen": 39439296, + "step": 12530 + }, + { + "epoch": 0.8024454260290634, + "grad_norm": 42.080116271972656, + "learning_rate": 4.944512388956738e-06, + "loss": 0.577, + "num_input_tokens_seen": 39456576, + "step": 12535 + }, + { + "epoch": 0.8027655079700403, + "grad_norm": 24.142608642578125, + "learning_rate": 4.944395280633165e-06, + "loss": 0.3687, + "num_input_tokens_seen": 39472512, + "step": 12540 + }, + { + "epoch": 0.8030855899110172, + "grad_norm": 27.50400161743164, + "learning_rate": 4.944278050249224e-06, + "loss": 0.4122, + "num_input_tokens_seen": 39488192, + "step": 12545 + }, + { + "epoch": 0.8034056718519941, + "grad_norm": 44.11458206176758, + "learning_rate": 4.944160697810769e-06, + "loss": 0.5375, + "num_input_tokens_seen": 39503552, + "step": 12550 + }, + { + "epoch": 0.803725753792971, + "grad_norm": 62.32224655151367, + "learning_rate": 4.94404322332366e-06, + "loss": 0.5382, + "num_input_tokens_seen": 39519744, + "step": 12555 + }, + { + "epoch": 0.804045835733948, + "grad_norm": 25.999778747558594, + "learning_rate": 4.943925626793764e-06, + "loss": 0.6759, + "num_input_tokens_seen": 39535232, + "step": 12560 + }, + { + "epoch": 0.8043659176749248, + "grad_norm": 50.524513244628906, + "learning_rate": 4.943807908226951e-06, + "loss": 0.5241, + "num_input_tokens_seen": 39549568, + "step": 12565 + }, + { + "epoch": 0.8046859996159017, + "grad_norm": 22.72760581970215, + "learning_rate": 4.9436900676291e-06, + "loss": 0.4066, + "num_input_tokens_seen": 39566016, + "step": 12570 + }, + { + "epoch": 0.8050060815568786, + "grad_norm": 38.74031448364258, + "learning_rate": 4.943572105006097e-06, + "loss": 0.4023, + "num_input_tokens_seen": 39581760, + "step": 12575 + }, + { + "epoch": 0.8053261634978555, + "grad_norm": 31.33940887451172, + "learning_rate": 4.94345402036383e-06, + "loss": 0.5066, + "num_input_tokens_seen": 39597888, + "step": 12580 + }, + { + "epoch": 0.8056462454388323, + "grad_norm": 12.513606071472168, + "learning_rate": 4.9433358137081974e-06, + "loss": 0.5907, + "num_input_tokens_seen": 39613568, + "step": 12585 + }, + { + "epoch": 0.8059663273798092, + "grad_norm": 23.77779197692871, + "learning_rate": 4.943217485045101e-06, + "loss": 0.4664, + "num_input_tokens_seen": 39628096, + "step": 12590 + }, + { + "epoch": 0.8062864093207861, + "grad_norm": 33.992828369140625, + "learning_rate": 4.94309903438045e-06, + "loss": 0.3563, + "num_input_tokens_seen": 39642560, + "step": 12595 + }, + { + "epoch": 0.806606491261763, + "grad_norm": 14.916972160339355, + "learning_rate": 4.9429804617201585e-06, + "loss": 0.4185, + "num_input_tokens_seen": 39658112, + "step": 12600 + }, + { + "epoch": 0.8069265732027399, + "grad_norm": 22.950515747070312, + "learning_rate": 4.9428617670701475e-06, + "loss": 0.5237, + "num_input_tokens_seen": 39674112, + "step": 12605 + }, + { + "epoch": 0.8072466551437167, + "grad_norm": 53.5792350769043, + "learning_rate": 4.942742950436344e-06, + "loss": 0.4363, + "num_input_tokens_seen": 39690432, + "step": 12610 + }, + { + "epoch": 0.8075667370846937, + "grad_norm": 19.655059814453125, + "learning_rate": 4.942624011824683e-06, + "loss": 0.4683, + "num_input_tokens_seen": 39705792, + "step": 12615 + }, + { + "epoch": 0.8078868190256706, + "grad_norm": 25.16796875, + "learning_rate": 4.942504951241101e-06, + "loss": 0.5312, + "num_input_tokens_seen": 39722048, + "step": 12620 + }, + { + "epoch": 0.8082069009666475, + "grad_norm": 16.916597366333008, + "learning_rate": 4.9423857686915435e-06, + "loss": 0.4972, + "num_input_tokens_seen": 39738304, + "step": 12625 + }, + { + "epoch": 0.8085269829076244, + "grad_norm": 24.043014526367188, + "learning_rate": 4.9422664641819634e-06, + "loss": 0.4825, + "num_input_tokens_seen": 39753728, + "step": 12630 + }, + { + "epoch": 0.8088470648486012, + "grad_norm": 25.786624908447266, + "learning_rate": 4.942147037718317e-06, + "loss": 0.4945, + "num_input_tokens_seen": 39769600, + "step": 12635 + }, + { + "epoch": 0.8091671467895781, + "grad_norm": 31.529565811157227, + "learning_rate": 4.942027489306569e-06, + "loss": 0.4703, + "num_input_tokens_seen": 39784000, + "step": 12640 + }, + { + "epoch": 0.809487228730555, + "grad_norm": 26.131879806518555, + "learning_rate": 4.941907818952687e-06, + "loss": 0.4305, + "num_input_tokens_seen": 39800320, + "step": 12645 + }, + { + "epoch": 0.8098073106715319, + "grad_norm": 19.291120529174805, + "learning_rate": 4.941788026662649e-06, + "loss": 0.4837, + "num_input_tokens_seen": 39815360, + "step": 12650 + }, + { + "epoch": 0.8101273926125088, + "grad_norm": 21.583480834960938, + "learning_rate": 4.941668112442436e-06, + "loss": 0.4893, + "num_input_tokens_seen": 39829952, + "step": 12655 + }, + { + "epoch": 0.8104474745534856, + "grad_norm": 34.25395202636719, + "learning_rate": 4.941548076298035e-06, + "loss": 0.473, + "num_input_tokens_seen": 39845376, + "step": 12660 + }, + { + "epoch": 0.8107675564944626, + "grad_norm": 19.940641403198242, + "learning_rate": 4.94142791823544e-06, + "loss": 0.5856, + "num_input_tokens_seen": 39861696, + "step": 12665 + }, + { + "epoch": 0.8110876384354395, + "grad_norm": 29.504732131958008, + "learning_rate": 4.941307638260653e-06, + "loss": 0.48, + "num_input_tokens_seen": 39877440, + "step": 12670 + }, + { + "epoch": 0.8114077203764164, + "grad_norm": 35.255069732666016, + "learning_rate": 4.941187236379678e-06, + "loss": 0.4398, + "num_input_tokens_seen": 39893760, + "step": 12675 + }, + { + "epoch": 0.8117278023173933, + "grad_norm": 15.788914680480957, + "learning_rate": 4.941066712598528e-06, + "loss": 0.3593, + "num_input_tokens_seen": 39909568, + "step": 12680 + }, + { + "epoch": 0.8120478842583702, + "grad_norm": 27.053300857543945, + "learning_rate": 4.940946066923222e-06, + "loss": 0.5778, + "num_input_tokens_seen": 39923648, + "step": 12685 + }, + { + "epoch": 0.812367966199347, + "grad_norm": 25.632741928100586, + "learning_rate": 4.940825299359784e-06, + "loss": 0.5954, + "num_input_tokens_seen": 39939008, + "step": 12690 + }, + { + "epoch": 0.8126880481403239, + "grad_norm": 22.01157569885254, + "learning_rate": 4.9407044099142435e-06, + "loss": 0.5824, + "num_input_tokens_seen": 39955072, + "step": 12695 + }, + { + "epoch": 0.8130081300813008, + "grad_norm": 19.42447280883789, + "learning_rate": 4.940583398592639e-06, + "loss": 0.5653, + "num_input_tokens_seen": 39970880, + "step": 12700 + }, + { + "epoch": 0.8133282120222777, + "grad_norm": 40.7269172668457, + "learning_rate": 4.940462265401012e-06, + "loss": 0.5909, + "num_input_tokens_seen": 39986240, + "step": 12705 + }, + { + "epoch": 0.8136482939632546, + "grad_norm": 28.861509323120117, + "learning_rate": 4.940341010345409e-06, + "loss": 0.4893, + "num_input_tokens_seen": 40001856, + "step": 12710 + }, + { + "epoch": 0.8139683759042314, + "grad_norm": 14.567154884338379, + "learning_rate": 4.94021963343189e-06, + "loss": 0.5328, + "num_input_tokens_seen": 40018112, + "step": 12715 + }, + { + "epoch": 0.8142884578452084, + "grad_norm": 28.108407974243164, + "learning_rate": 4.940098134666512e-06, + "loss": 0.4697, + "num_input_tokens_seen": 40033664, + "step": 12720 + }, + { + "epoch": 0.8146085397861853, + "grad_norm": 27.83951187133789, + "learning_rate": 4.939976514055344e-06, + "loss": 0.4335, + "num_input_tokens_seen": 40048768, + "step": 12725 + }, + { + "epoch": 0.8149286217271622, + "grad_norm": 19.642047882080078, + "learning_rate": 4.939854771604457e-06, + "loss": 0.4576, + "num_input_tokens_seen": 40063232, + "step": 12730 + }, + { + "epoch": 0.8152487036681391, + "grad_norm": 39.4839973449707, + "learning_rate": 4.939732907319933e-06, + "loss": 0.5915, + "num_input_tokens_seen": 40079296, + "step": 12735 + }, + { + "epoch": 0.8155687856091159, + "grad_norm": 61.5767936706543, + "learning_rate": 4.9396109212078545e-06, + "loss": 0.57, + "num_input_tokens_seen": 40094976, + "step": 12740 + }, + { + "epoch": 0.8158888675500928, + "grad_norm": 17.254674911499023, + "learning_rate": 4.939488813274315e-06, + "loss": 0.4082, + "num_input_tokens_seen": 40110464, + "step": 12745 + }, + { + "epoch": 0.8162089494910697, + "grad_norm": 26.54833984375, + "learning_rate": 4.93936658352541e-06, + "loss": 0.6115, + "num_input_tokens_seen": 40125568, + "step": 12750 + }, + { + "epoch": 0.8165290314320466, + "grad_norm": 36.88290786743164, + "learning_rate": 4.939244231967244e-06, + "loss": 0.4509, + "num_input_tokens_seen": 40141440, + "step": 12755 + }, + { + "epoch": 0.8168491133730235, + "grad_norm": 17.768274307250977, + "learning_rate": 4.939121758605927e-06, + "loss": 0.5462, + "num_input_tokens_seen": 40156416, + "step": 12760 + }, + { + "epoch": 0.8171691953140003, + "grad_norm": 48.71873474121094, + "learning_rate": 4.938999163447574e-06, + "loss": 0.5609, + "num_input_tokens_seen": 40172928, + "step": 12765 + }, + { + "epoch": 0.8174892772549772, + "grad_norm": 25.142093658447266, + "learning_rate": 4.9388764464983075e-06, + "loss": 0.3699, + "num_input_tokens_seen": 40188096, + "step": 12770 + }, + { + "epoch": 0.8178093591959542, + "grad_norm": 30.43142318725586, + "learning_rate": 4.938753607764255e-06, + "loss": 0.5429, + "num_input_tokens_seen": 40204032, + "step": 12775 + }, + { + "epoch": 0.8181294411369311, + "grad_norm": 32.3233757019043, + "learning_rate": 4.93863064725155e-06, + "loss": 0.4195, + "num_input_tokens_seen": 40218944, + "step": 12780 + }, + { + "epoch": 0.818449523077908, + "grad_norm": 26.823089599609375, + "learning_rate": 4.9385075649663334e-06, + "loss": 0.7037, + "num_input_tokens_seen": 40235456, + "step": 12785 + }, + { + "epoch": 0.8187696050188848, + "grad_norm": 26.400774002075195, + "learning_rate": 4.93838436091475e-06, + "loss": 0.5339, + "num_input_tokens_seen": 40250176, + "step": 12790 + }, + { + "epoch": 0.8190896869598617, + "grad_norm": 32.69841384887695, + "learning_rate": 4.9382610351029535e-06, + "loss": 0.4056, + "num_input_tokens_seen": 40265472, + "step": 12795 + }, + { + "epoch": 0.8194097689008386, + "grad_norm": 22.46593475341797, + "learning_rate": 4.9381375875371005e-06, + "loss": 0.4793, + "num_input_tokens_seen": 40281728, + "step": 12800 + }, + { + "epoch": 0.8197298508418155, + "grad_norm": 28.652982711791992, + "learning_rate": 4.938014018223356e-06, + "loss": 0.4221, + "num_input_tokens_seen": 40296768, + "step": 12805 + }, + { + "epoch": 0.8200499327827924, + "grad_norm": 44.009708404541016, + "learning_rate": 4.937890327167891e-06, + "loss": 0.6157, + "num_input_tokens_seen": 40311488, + "step": 12810 + }, + { + "epoch": 0.8203700147237692, + "grad_norm": 31.545637130737305, + "learning_rate": 4.937766514376882e-06, + "loss": 0.3821, + "num_input_tokens_seen": 40327232, + "step": 12815 + }, + { + "epoch": 0.8206900966647461, + "grad_norm": 29.173532485961914, + "learning_rate": 4.937642579856511e-06, + "loss": 0.6138, + "num_input_tokens_seen": 40344064, + "step": 12820 + }, + { + "epoch": 0.8210101786057231, + "grad_norm": 25.285598754882812, + "learning_rate": 4.9375185236129665e-06, + "loss": 0.5867, + "num_input_tokens_seen": 40359040, + "step": 12825 + }, + { + "epoch": 0.8213302605467, + "grad_norm": 35.65903091430664, + "learning_rate": 4.937394345652444e-06, + "loss": 0.4853, + "num_input_tokens_seen": 40376384, + "step": 12830 + }, + { + "epoch": 0.8216503424876769, + "grad_norm": 22.301610946655273, + "learning_rate": 4.937270045981143e-06, + "loss": 0.701, + "num_input_tokens_seen": 40391936, + "step": 12835 + }, + { + "epoch": 0.8219704244286538, + "grad_norm": 12.361640930175781, + "learning_rate": 4.937145624605272e-06, + "loss": 0.3718, + "num_input_tokens_seen": 40408832, + "step": 12840 + }, + { + "epoch": 0.8222905063696306, + "grad_norm": 37.81869125366211, + "learning_rate": 4.937021081531042e-06, + "loss": 0.5003, + "num_input_tokens_seen": 40425280, + "step": 12845 + }, + { + "epoch": 0.8226105883106075, + "grad_norm": 28.089384078979492, + "learning_rate": 4.936896416764674e-06, + "loss": 0.4998, + "num_input_tokens_seen": 40440832, + "step": 12850 + }, + { + "epoch": 0.8229306702515844, + "grad_norm": 24.44416618347168, + "learning_rate": 4.936771630312392e-06, + "loss": 0.3566, + "num_input_tokens_seen": 40458624, + "step": 12855 + }, + { + "epoch": 0.8232507521925613, + "grad_norm": 36.96171951293945, + "learning_rate": 4.936646722180428e-06, + "loss": 0.5574, + "num_input_tokens_seen": 40474688, + "step": 12860 + }, + { + "epoch": 0.8235708341335382, + "grad_norm": 20.50065803527832, + "learning_rate": 4.936521692375018e-06, + "loss": 0.4617, + "num_input_tokens_seen": 40490816, + "step": 12865 + }, + { + "epoch": 0.823890916074515, + "grad_norm": 33.088993072509766, + "learning_rate": 4.9363965409024065e-06, + "loss": 0.6127, + "num_input_tokens_seen": 40506112, + "step": 12870 + }, + { + "epoch": 0.8242109980154919, + "grad_norm": 30.135425567626953, + "learning_rate": 4.936271267768842e-06, + "loss": 0.5227, + "num_input_tokens_seen": 40521344, + "step": 12875 + }, + { + "epoch": 0.8245310799564689, + "grad_norm": 19.448020935058594, + "learning_rate": 4.936145872980581e-06, + "loss": 0.3535, + "num_input_tokens_seen": 40537024, + "step": 12880 + }, + { + "epoch": 0.8248511618974458, + "grad_norm": 34.00297927856445, + "learning_rate": 4.9360203565438845e-06, + "loss": 0.5369, + "num_input_tokens_seen": 40552640, + "step": 12885 + }, + { + "epoch": 0.8251712438384227, + "grad_norm": 34.16431427001953, + "learning_rate": 4.9358947184650195e-06, + "loss": 0.4837, + "num_input_tokens_seen": 40568000, + "step": 12890 + }, + { + "epoch": 0.8254913257793995, + "grad_norm": 44.73707962036133, + "learning_rate": 4.935768958750261e-06, + "loss": 0.4927, + "num_input_tokens_seen": 40584960, + "step": 12895 + }, + { + "epoch": 0.8258114077203764, + "grad_norm": 27.964298248291016, + "learning_rate": 4.935643077405888e-06, + "loss": 0.5667, + "num_input_tokens_seen": 40600704, + "step": 12900 + }, + { + "epoch": 0.8261314896613533, + "grad_norm": 21.717243194580078, + "learning_rate": 4.9355170744381866e-06, + "loss": 0.4383, + "num_input_tokens_seen": 40615872, + "step": 12905 + }, + { + "epoch": 0.8264515716023302, + "grad_norm": 30.985429763793945, + "learning_rate": 4.9353909498534495e-06, + "loss": 0.5866, + "num_input_tokens_seen": 40632640, + "step": 12910 + }, + { + "epoch": 0.8267716535433071, + "grad_norm": 74.21586608886719, + "learning_rate": 4.935264703657972e-06, + "loss": 0.4792, + "num_input_tokens_seen": 40648064, + "step": 12915 + }, + { + "epoch": 0.8270917354842839, + "grad_norm": 17.311750411987305, + "learning_rate": 4.9351383358580615e-06, + "loss": 0.5778, + "num_input_tokens_seen": 40664704, + "step": 12920 + }, + { + "epoch": 0.8274118174252608, + "grad_norm": 30.289045333862305, + "learning_rate": 4.935011846460026e-06, + "loss": 0.6154, + "num_input_tokens_seen": 40681024, + "step": 12925 + }, + { + "epoch": 0.8277318993662378, + "grad_norm": 30.403566360473633, + "learning_rate": 4.934885235470183e-06, + "loss": 0.5939, + "num_input_tokens_seen": 40695936, + "step": 12930 + }, + { + "epoch": 0.8280519813072147, + "grad_norm": 37.181610107421875, + "learning_rate": 4.934758502894853e-06, + "loss": 0.4226, + "num_input_tokens_seen": 40713472, + "step": 12935 + }, + { + "epoch": 0.8283720632481916, + "grad_norm": 36.70668411254883, + "learning_rate": 4.934631648740367e-06, + "loss": 0.508, + "num_input_tokens_seen": 40729344, + "step": 12940 + }, + { + "epoch": 0.8286921451891684, + "grad_norm": 20.139968872070312, + "learning_rate": 4.934504673013057e-06, + "loss": 0.4391, + "num_input_tokens_seen": 40745856, + "step": 12945 + }, + { + "epoch": 0.8290122271301453, + "grad_norm": 17.534713745117188, + "learning_rate": 4.934377575719265e-06, + "loss": 0.4285, + "num_input_tokens_seen": 40761920, + "step": 12950 + }, + { + "epoch": 0.8293323090711222, + "grad_norm": 17.585739135742188, + "learning_rate": 4.934250356865337e-06, + "loss": 0.3555, + "num_input_tokens_seen": 40776960, + "step": 12955 + }, + { + "epoch": 0.8296523910120991, + "grad_norm": 38.245906829833984, + "learning_rate": 4.934123016457625e-06, + "loss": 0.5046, + "num_input_tokens_seen": 40792192, + "step": 12960 + }, + { + "epoch": 0.829972472953076, + "grad_norm": 44.021026611328125, + "learning_rate": 4.933995554502489e-06, + "loss": 0.5857, + "num_input_tokens_seen": 40808256, + "step": 12965 + }, + { + "epoch": 0.8302925548940528, + "grad_norm": 31.312297821044922, + "learning_rate": 4.933867971006294e-06, + "loss": 0.5344, + "num_input_tokens_seen": 40823424, + "step": 12970 + }, + { + "epoch": 0.8306126368350297, + "grad_norm": 24.300586700439453, + "learning_rate": 4.933740265975409e-06, + "loss": 0.4892, + "num_input_tokens_seen": 40840960, + "step": 12975 + }, + { + "epoch": 0.8309327187760066, + "grad_norm": 22.319454193115234, + "learning_rate": 4.933612439416212e-06, + "loss": 0.5096, + "num_input_tokens_seen": 40857536, + "step": 12980 + }, + { + "epoch": 0.8312528007169836, + "grad_norm": 17.51906967163086, + "learning_rate": 4.933484491335086e-06, + "loss": 0.4609, + "num_input_tokens_seen": 40874240, + "step": 12985 + }, + { + "epoch": 0.8315728826579605, + "grad_norm": 46.94606399536133, + "learning_rate": 4.933356421738421e-06, + "loss": 0.5848, + "num_input_tokens_seen": 40888960, + "step": 12990 + }, + { + "epoch": 0.8318929645989374, + "grad_norm": 58.9561767578125, + "learning_rate": 4.9332282306326105e-06, + "loss": 0.6522, + "num_input_tokens_seen": 40904512, + "step": 12995 + }, + { + "epoch": 0.8322130465399142, + "grad_norm": 22.625329971313477, + "learning_rate": 4.933099918024057e-06, + "loss": 0.4739, + "num_input_tokens_seen": 40921856, + "step": 13000 + }, + { + "epoch": 0.8325331284808911, + "grad_norm": 24.74147605895996, + "learning_rate": 4.932971483919166e-06, + "loss": 0.472, + "num_input_tokens_seen": 40938752, + "step": 13005 + }, + { + "epoch": 0.832853210421868, + "grad_norm": 124.3624038696289, + "learning_rate": 4.932842928324353e-06, + "loss": 0.4399, + "num_input_tokens_seen": 40954048, + "step": 13010 + }, + { + "epoch": 0.8331732923628449, + "grad_norm": 29.347949981689453, + "learning_rate": 4.9327142512460355e-06, + "loss": 0.511, + "num_input_tokens_seen": 40972672, + "step": 13015 + }, + { + "epoch": 0.8334933743038218, + "grad_norm": 24.716100692749023, + "learning_rate": 4.932585452690641e-06, + "loss": 0.3619, + "num_input_tokens_seen": 40987648, + "step": 13020 + }, + { + "epoch": 0.8338134562447986, + "grad_norm": 26.774375915527344, + "learning_rate": 4.932456532664599e-06, + "loss": 0.4629, + "num_input_tokens_seen": 41003328, + "step": 13025 + }, + { + "epoch": 0.8341335381857755, + "grad_norm": 25.43076515197754, + "learning_rate": 4.932327491174348e-06, + "loss": 0.4555, + "num_input_tokens_seen": 41018624, + "step": 13030 + }, + { + "epoch": 0.8344536201267524, + "grad_norm": 18.790071487426758, + "learning_rate": 4.932198328226332e-06, + "loss": 0.4254, + "num_input_tokens_seen": 41034624, + "step": 13035 + }, + { + "epoch": 0.8347737020677294, + "grad_norm": 22.35611343383789, + "learning_rate": 4.932069043827e-06, + "loss": 0.554, + "num_input_tokens_seen": 41049664, + "step": 13040 + }, + { + "epoch": 0.8350937840087063, + "grad_norm": 42.99100112915039, + "learning_rate": 4.931939637982809e-06, + "loss": 0.5138, + "num_input_tokens_seen": 41066368, + "step": 13045 + }, + { + "epoch": 0.8354138659496831, + "grad_norm": 39.45537567138672, + "learning_rate": 4.931810110700219e-06, + "loss": 0.4383, + "num_input_tokens_seen": 41082048, + "step": 13050 + }, + { + "epoch": 0.83573394789066, + "grad_norm": 14.179198265075684, + "learning_rate": 4.931680461985699e-06, + "loss": 0.5354, + "num_input_tokens_seen": 41098048, + "step": 13055 + }, + { + "epoch": 0.8360540298316369, + "grad_norm": 15.691359519958496, + "learning_rate": 4.931550691845724e-06, + "loss": 0.4701, + "num_input_tokens_seen": 41112768, + "step": 13060 + }, + { + "epoch": 0.8363741117726138, + "grad_norm": 16.20827865600586, + "learning_rate": 4.9314208002867715e-06, + "loss": 0.5058, + "num_input_tokens_seen": 41127488, + "step": 13065 + }, + { + "epoch": 0.8366941937135907, + "grad_norm": 16.75193214416504, + "learning_rate": 4.9312907873153295e-06, + "loss": 0.3496, + "num_input_tokens_seen": 41142272, + "step": 13070 + }, + { + "epoch": 0.8370142756545675, + "grad_norm": 33.75775909423828, + "learning_rate": 4.931160652937889e-06, + "loss": 0.3812, + "num_input_tokens_seen": 41157952, + "step": 13075 + }, + { + "epoch": 0.8373343575955444, + "grad_norm": 34.2646598815918, + "learning_rate": 4.93103039716095e-06, + "loss": 0.6256, + "num_input_tokens_seen": 41172992, + "step": 13080 + }, + { + "epoch": 0.8376544395365213, + "grad_norm": 61.4666633605957, + "learning_rate": 4.930900019991016e-06, + "loss": 0.5281, + "num_input_tokens_seen": 41187776, + "step": 13085 + }, + { + "epoch": 0.8379745214774983, + "grad_norm": 43.994056701660156, + "learning_rate": 4.930769521434597e-06, + "loss": 0.4531, + "num_input_tokens_seen": 41204416, + "step": 13090 + }, + { + "epoch": 0.8382946034184752, + "grad_norm": 32.103126525878906, + "learning_rate": 4.930638901498208e-06, + "loss": 0.5544, + "num_input_tokens_seen": 41220032, + "step": 13095 + }, + { + "epoch": 0.838614685359452, + "grad_norm": 26.079814910888672, + "learning_rate": 4.930508160188374e-06, + "loss": 0.5094, + "num_input_tokens_seen": 41235776, + "step": 13100 + }, + { + "epoch": 0.8389347673004289, + "grad_norm": 34.051387786865234, + "learning_rate": 4.930377297511623e-06, + "loss": 0.4823, + "num_input_tokens_seen": 41252160, + "step": 13105 + }, + { + "epoch": 0.8392548492414058, + "grad_norm": 34.460960388183594, + "learning_rate": 4.930246313474488e-06, + "loss": 0.4357, + "num_input_tokens_seen": 41269056, + "step": 13110 + }, + { + "epoch": 0.8395749311823827, + "grad_norm": 27.137113571166992, + "learning_rate": 4.930115208083512e-06, + "loss": 0.4334, + "num_input_tokens_seen": 41285312, + "step": 13115 + }, + { + "epoch": 0.8398950131233596, + "grad_norm": 31.60802459716797, + "learning_rate": 4.92998398134524e-06, + "loss": 0.4662, + "num_input_tokens_seen": 41300992, + "step": 13120 + }, + { + "epoch": 0.8402150950643364, + "grad_norm": 15.288379669189453, + "learning_rate": 4.9298526332662255e-06, + "loss": 0.3655, + "num_input_tokens_seen": 41317952, + "step": 13125 + }, + { + "epoch": 0.8405351770053133, + "grad_norm": 29.702817916870117, + "learning_rate": 4.929721163853028e-06, + "loss": 0.4765, + "num_input_tokens_seen": 41333440, + "step": 13130 + }, + { + "epoch": 0.8408552589462902, + "grad_norm": 25.811702728271484, + "learning_rate": 4.929589573112209e-06, + "loss": 0.5691, + "num_input_tokens_seen": 41349312, + "step": 13135 + }, + { + "epoch": 0.8411753408872671, + "grad_norm": 39.91619873046875, + "learning_rate": 4.929457861050344e-06, + "loss": 0.4671, + "num_input_tokens_seen": 41364288, + "step": 13140 + }, + { + "epoch": 0.8414954228282441, + "grad_norm": 39.84480285644531, + "learning_rate": 4.929326027674007e-06, + "loss": 0.4784, + "num_input_tokens_seen": 41379904, + "step": 13145 + }, + { + "epoch": 0.841815504769221, + "grad_norm": 20.17856216430664, + "learning_rate": 4.929194072989783e-06, + "loss": 0.4815, + "num_input_tokens_seen": 41394432, + "step": 13150 + }, + { + "epoch": 0.8421355867101978, + "grad_norm": 39.43102264404297, + "learning_rate": 4.92906199700426e-06, + "loss": 0.405, + "num_input_tokens_seen": 41409984, + "step": 13155 + }, + { + "epoch": 0.8424556686511747, + "grad_norm": 24.833770751953125, + "learning_rate": 4.9289297997240325e-06, + "loss": 0.5116, + "num_input_tokens_seen": 41425984, + "step": 13160 + }, + { + "epoch": 0.8427757505921516, + "grad_norm": 48.2048454284668, + "learning_rate": 4.928797481155704e-06, + "loss": 0.5143, + "num_input_tokens_seen": 41441920, + "step": 13165 + }, + { + "epoch": 0.8430958325331285, + "grad_norm": 15.395936012268066, + "learning_rate": 4.928665041305879e-06, + "loss": 0.43, + "num_input_tokens_seen": 41457664, + "step": 13170 + }, + { + "epoch": 0.8434159144741054, + "grad_norm": 34.37747573852539, + "learning_rate": 4.928532480181173e-06, + "loss": 0.4053, + "num_input_tokens_seen": 41472832, + "step": 13175 + }, + { + "epoch": 0.8437359964150822, + "grad_norm": 12.79721450805664, + "learning_rate": 4.928399797788205e-06, + "loss": 0.4232, + "num_input_tokens_seen": 41490368, + "step": 13180 + }, + { + "epoch": 0.8440560783560591, + "grad_norm": 36.110958099365234, + "learning_rate": 4.9282669941336e-06, + "loss": 0.5054, + "num_input_tokens_seen": 41506624, + "step": 13185 + }, + { + "epoch": 0.844376160297036, + "grad_norm": 9.966791152954102, + "learning_rate": 4.9281340692239895e-06, + "loss": 0.3315, + "num_input_tokens_seen": 41523264, + "step": 13190 + }, + { + "epoch": 0.844696242238013, + "grad_norm": 31.65178108215332, + "learning_rate": 4.928001023066011e-06, + "loss": 0.4889, + "num_input_tokens_seen": 41538944, + "step": 13195 + }, + { + "epoch": 0.8450163241789899, + "grad_norm": 38.550235748291016, + "learning_rate": 4.92786785566631e-06, + "loss": 0.4197, + "num_input_tokens_seen": 41554880, + "step": 13200 + }, + { + "epoch": 0.8453364061199667, + "grad_norm": 26.909671783447266, + "learning_rate": 4.927734567031533e-06, + "loss": 0.6719, + "num_input_tokens_seen": 41569280, + "step": 13205 + }, + { + "epoch": 0.8456564880609436, + "grad_norm": 20.98311996459961, + "learning_rate": 4.9276011571683375e-06, + "loss": 0.3513, + "num_input_tokens_seen": 41584576, + "step": 13210 + }, + { + "epoch": 0.8459765700019205, + "grad_norm": 23.238496780395508, + "learning_rate": 4.927467626083385e-06, + "loss": 0.4335, + "num_input_tokens_seen": 41600000, + "step": 13215 + }, + { + "epoch": 0.8462966519428974, + "grad_norm": 35.49116897583008, + "learning_rate": 4.927333973783344e-06, + "loss": 0.477, + "num_input_tokens_seen": 41615040, + "step": 13220 + }, + { + "epoch": 0.8466167338838743, + "grad_norm": 24.879253387451172, + "learning_rate": 4.9272002002748875e-06, + "loss": 0.4326, + "num_input_tokens_seen": 41630208, + "step": 13225 + }, + { + "epoch": 0.8469368158248511, + "grad_norm": 31.963420867919922, + "learning_rate": 4.927066305564696e-06, + "loss": 0.5416, + "num_input_tokens_seen": 41645440, + "step": 13230 + }, + { + "epoch": 0.847256897765828, + "grad_norm": 32.115177154541016, + "learning_rate": 4.926932289659456e-06, + "loss": 0.4848, + "num_input_tokens_seen": 41661184, + "step": 13235 + }, + { + "epoch": 0.8475769797068049, + "grad_norm": 40.35222244262695, + "learning_rate": 4.926798152565858e-06, + "loss": 0.5978, + "num_input_tokens_seen": 41676224, + "step": 13240 + }, + { + "epoch": 0.8478970616477818, + "grad_norm": 17.793275833129883, + "learning_rate": 4.926663894290601e-06, + "loss": 0.4259, + "num_input_tokens_seen": 41690816, + "step": 13245 + }, + { + "epoch": 0.8482171435887588, + "grad_norm": 30.294057846069336, + "learning_rate": 4.926529514840389e-06, + "loss": 0.4539, + "num_input_tokens_seen": 41706688, + "step": 13250 + }, + { + "epoch": 0.8485372255297357, + "grad_norm": 29.85148048400879, + "learning_rate": 4.926395014221933e-06, + "loss": 0.4773, + "num_input_tokens_seen": 41721920, + "step": 13255 + }, + { + "epoch": 0.8488573074707125, + "grad_norm": 24.58063316345215, + "learning_rate": 4.926260392441948e-06, + "loss": 0.482, + "num_input_tokens_seen": 41738112, + "step": 13260 + }, + { + "epoch": 0.8491773894116894, + "grad_norm": 22.97865867614746, + "learning_rate": 4.926125649507157e-06, + "loss": 0.4383, + "num_input_tokens_seen": 41753792, + "step": 13265 + }, + { + "epoch": 0.8494974713526663, + "grad_norm": 19.795001983642578, + "learning_rate": 4.92599078542429e-06, + "loss": 0.3857, + "num_input_tokens_seen": 41771328, + "step": 13270 + }, + { + "epoch": 0.8498175532936432, + "grad_norm": 26.760692596435547, + "learning_rate": 4.925855800200078e-06, + "loss": 0.5227, + "num_input_tokens_seen": 41787712, + "step": 13275 + }, + { + "epoch": 0.85013763523462, + "grad_norm": 16.44198989868164, + "learning_rate": 4.925720693841263e-06, + "loss": 0.3605, + "num_input_tokens_seen": 41803072, + "step": 13280 + }, + { + "epoch": 0.8504577171755969, + "grad_norm": 22.533470153808594, + "learning_rate": 4.925585466354592e-06, + "loss": 0.4722, + "num_input_tokens_seen": 41818688, + "step": 13285 + }, + { + "epoch": 0.8507777991165738, + "grad_norm": 26.158443450927734, + "learning_rate": 4.925450117746817e-06, + "loss": 0.4006, + "num_input_tokens_seen": 41833792, + "step": 13290 + }, + { + "epoch": 0.8510978810575507, + "grad_norm": 18.079103469848633, + "learning_rate": 4.925314648024697e-06, + "loss": 0.5135, + "num_input_tokens_seen": 41850880, + "step": 13295 + }, + { + "epoch": 0.8514179629985277, + "grad_norm": 8.973053932189941, + "learning_rate": 4.925179057194997e-06, + "loss": 0.3769, + "num_input_tokens_seen": 41866560, + "step": 13300 + }, + { + "epoch": 0.8517380449395046, + "grad_norm": 50.72419357299805, + "learning_rate": 4.925043345264486e-06, + "loss": 0.5361, + "num_input_tokens_seen": 41882240, + "step": 13305 + }, + { + "epoch": 0.8520581268804814, + "grad_norm": 38.961647033691406, + "learning_rate": 4.9249075122399435e-06, + "loss": 0.6143, + "num_input_tokens_seen": 41898880, + "step": 13310 + }, + { + "epoch": 0.8523782088214583, + "grad_norm": 21.974992752075195, + "learning_rate": 4.9247715581281495e-06, + "loss": 0.3941, + "num_input_tokens_seen": 41915968, + "step": 13315 + }, + { + "epoch": 0.8526982907624352, + "grad_norm": 22.76986312866211, + "learning_rate": 4.924635482935895e-06, + "loss": 0.4157, + "num_input_tokens_seen": 41930816, + "step": 13320 + }, + { + "epoch": 0.8530183727034121, + "grad_norm": 47.523189544677734, + "learning_rate": 4.924499286669974e-06, + "loss": 0.6425, + "num_input_tokens_seen": 41947200, + "step": 13325 + }, + { + "epoch": 0.853338454644389, + "grad_norm": 20.862659454345703, + "learning_rate": 4.9243629693371865e-06, + "loss": 0.3953, + "num_input_tokens_seen": 41962240, + "step": 13330 + }, + { + "epoch": 0.8536585365853658, + "grad_norm": 33.32276916503906, + "learning_rate": 4.924226530944341e-06, + "loss": 0.6252, + "num_input_tokens_seen": 41978752, + "step": 13335 + }, + { + "epoch": 0.8539786185263427, + "grad_norm": 17.542695999145508, + "learning_rate": 4.924089971498249e-06, + "loss": 0.4485, + "num_input_tokens_seen": 41994112, + "step": 13340 + }, + { + "epoch": 0.8542987004673196, + "grad_norm": 17.37729835510254, + "learning_rate": 4.923953291005731e-06, + "loss": 0.4313, + "num_input_tokens_seen": 42010432, + "step": 13345 + }, + { + "epoch": 0.8546187824082965, + "grad_norm": 21.817760467529297, + "learning_rate": 4.923816489473612e-06, + "loss": 0.4504, + "num_input_tokens_seen": 42025984, + "step": 13350 + }, + { + "epoch": 0.8549388643492735, + "grad_norm": 42.90483856201172, + "learning_rate": 4.923679566908722e-06, + "loss": 0.4746, + "num_input_tokens_seen": 42040832, + "step": 13355 + }, + { + "epoch": 0.8552589462902503, + "grad_norm": 25.37998390197754, + "learning_rate": 4.923542523317899e-06, + "loss": 0.5126, + "num_input_tokens_seen": 42057536, + "step": 13360 + }, + { + "epoch": 0.8555790282312272, + "grad_norm": 17.29766273498535, + "learning_rate": 4.923405358707986e-06, + "loss": 0.4442, + "num_input_tokens_seen": 42073152, + "step": 13365 + }, + { + "epoch": 0.8558991101722041, + "grad_norm": 51.03695297241211, + "learning_rate": 4.923268073085833e-06, + "loss": 0.4596, + "num_input_tokens_seen": 42088512, + "step": 13370 + }, + { + "epoch": 0.856219192113181, + "grad_norm": 34.60176086425781, + "learning_rate": 4.923130666458294e-06, + "loss": 0.507, + "num_input_tokens_seen": 42103552, + "step": 13375 + }, + { + "epoch": 0.8565392740541579, + "grad_norm": 35.568199157714844, + "learning_rate": 4.922993138832232e-06, + "loss": 0.4841, + "num_input_tokens_seen": 42119872, + "step": 13380 + }, + { + "epoch": 0.8568593559951347, + "grad_norm": 25.07211685180664, + "learning_rate": 4.922855490214514e-06, + "loss": 0.5768, + "num_input_tokens_seen": 42136832, + "step": 13385 + }, + { + "epoch": 0.8571794379361116, + "grad_norm": 20.896028518676758, + "learning_rate": 4.922717720612012e-06, + "loss": 0.4577, + "num_input_tokens_seen": 42152896, + "step": 13390 + }, + { + "epoch": 0.8574995198770885, + "grad_norm": 45.588287353515625, + "learning_rate": 4.922579830031606e-06, + "loss": 0.6398, + "num_input_tokens_seen": 42168064, + "step": 13395 + }, + { + "epoch": 0.8578196018180654, + "grad_norm": 23.594860076904297, + "learning_rate": 4.922441818480183e-06, + "loss": 0.4283, + "num_input_tokens_seen": 42182784, + "step": 13400 + }, + { + "epoch": 0.8581396837590423, + "grad_norm": 31.79343605041504, + "learning_rate": 4.922303685964634e-06, + "loss": 0.4372, + "num_input_tokens_seen": 42199744, + "step": 13405 + }, + { + "epoch": 0.8584597657000193, + "grad_norm": 34.61197280883789, + "learning_rate": 4.922165432491855e-06, + "loss": 0.3618, + "num_input_tokens_seen": 42215040, + "step": 13410 + }, + { + "epoch": 0.8587798476409961, + "grad_norm": 33.271644592285156, + "learning_rate": 4.922027058068752e-06, + "loss": 0.4587, + "num_input_tokens_seen": 42230144, + "step": 13415 + }, + { + "epoch": 0.859099929581973, + "grad_norm": 21.19793701171875, + "learning_rate": 4.9218885627022336e-06, + "loss": 0.5418, + "num_input_tokens_seen": 42246528, + "step": 13420 + }, + { + "epoch": 0.8594200115229499, + "grad_norm": 20.974641799926758, + "learning_rate": 4.921749946399215e-06, + "loss": 0.5169, + "num_input_tokens_seen": 42263168, + "step": 13425 + }, + { + "epoch": 0.8597400934639268, + "grad_norm": 20.59470558166504, + "learning_rate": 4.921611209166618e-06, + "loss": 0.5094, + "num_input_tokens_seen": 42278912, + "step": 13430 + }, + { + "epoch": 0.8600601754049036, + "grad_norm": 40.85513687133789, + "learning_rate": 4.921472351011372e-06, + "loss": 0.495, + "num_input_tokens_seen": 42294656, + "step": 13435 + }, + { + "epoch": 0.8603802573458805, + "grad_norm": 35.73142623901367, + "learning_rate": 4.92133337194041e-06, + "loss": 0.7637, + "num_input_tokens_seen": 42311552, + "step": 13440 + }, + { + "epoch": 0.8607003392868574, + "grad_norm": 28.92548370361328, + "learning_rate": 4.9211942719606705e-06, + "loss": 0.5743, + "num_input_tokens_seen": 42327552, + "step": 13445 + }, + { + "epoch": 0.8610204212278343, + "grad_norm": 22.03003692626953, + "learning_rate": 4.921055051079101e-06, + "loss": 0.4584, + "num_input_tokens_seen": 42343360, + "step": 13450 + }, + { + "epoch": 0.8613405031688112, + "grad_norm": 20.971149444580078, + "learning_rate": 4.920915709302653e-06, + "loss": 0.5159, + "num_input_tokens_seen": 42360064, + "step": 13455 + }, + { + "epoch": 0.8616605851097882, + "grad_norm": 20.588260650634766, + "learning_rate": 4.920776246638285e-06, + "loss": 0.4687, + "num_input_tokens_seen": 42374976, + "step": 13460 + }, + { + "epoch": 0.861980667050765, + "grad_norm": 20.75661849975586, + "learning_rate": 4.920636663092961e-06, + "loss": 0.4043, + "num_input_tokens_seen": 42391616, + "step": 13465 + }, + { + "epoch": 0.8623007489917419, + "grad_norm": 31.391155242919922, + "learning_rate": 4.9204969586736495e-06, + "loss": 0.4318, + "num_input_tokens_seen": 42406528, + "step": 13470 + }, + { + "epoch": 0.8626208309327188, + "grad_norm": 17.920978546142578, + "learning_rate": 4.9203571333873284e-06, + "loss": 0.5153, + "num_input_tokens_seen": 42424320, + "step": 13475 + }, + { + "epoch": 0.8629409128736957, + "grad_norm": 38.58307647705078, + "learning_rate": 4.92021718724098e-06, + "loss": 0.611, + "num_input_tokens_seen": 42440960, + "step": 13480 + }, + { + "epoch": 0.8632609948146726, + "grad_norm": 18.055194854736328, + "learning_rate": 4.92007712024159e-06, + "loss": 0.3195, + "num_input_tokens_seen": 42456448, + "step": 13485 + }, + { + "epoch": 0.8635810767556494, + "grad_norm": 32.95689392089844, + "learning_rate": 4.9199369323961554e-06, + "loss": 0.4619, + "num_input_tokens_seen": 42472448, + "step": 13490 + }, + { + "epoch": 0.8639011586966263, + "grad_norm": 52.96099090576172, + "learning_rate": 4.919796623711675e-06, + "loss": 0.6213, + "num_input_tokens_seen": 42487808, + "step": 13495 + }, + { + "epoch": 0.8642212406376032, + "grad_norm": 29.09337043762207, + "learning_rate": 4.919656194195156e-06, + "loss": 0.4896, + "num_input_tokens_seen": 42503744, + "step": 13500 + }, + { + "epoch": 0.8645413225785801, + "grad_norm": 31.27094268798828, + "learning_rate": 4.9195156438536095e-06, + "loss": 0.6244, + "num_input_tokens_seen": 42520768, + "step": 13505 + }, + { + "epoch": 0.864861404519557, + "grad_norm": 32.4491081237793, + "learning_rate": 4.9193749726940555e-06, + "loss": 0.5117, + "num_input_tokens_seen": 42537408, + "step": 13510 + }, + { + "epoch": 0.8651814864605339, + "grad_norm": 37.44065475463867, + "learning_rate": 4.919234180723517e-06, + "loss": 0.4307, + "num_input_tokens_seen": 42553728, + "step": 13515 + }, + { + "epoch": 0.8655015684015108, + "grad_norm": 11.892657279968262, + "learning_rate": 4.9190932679490245e-06, + "loss": 0.4898, + "num_input_tokens_seen": 42568896, + "step": 13520 + }, + { + "epoch": 0.8658216503424877, + "grad_norm": 20.983333587646484, + "learning_rate": 4.918952234377615e-06, + "loss": 0.4423, + "num_input_tokens_seen": 42584000, + "step": 13525 + }, + { + "epoch": 0.8661417322834646, + "grad_norm": 75.1211166381836, + "learning_rate": 4.9188110800163306e-06, + "loss": 0.4808, + "num_input_tokens_seen": 42600192, + "step": 13530 + }, + { + "epoch": 0.8664618142244415, + "grad_norm": 27.450029373168945, + "learning_rate": 4.9186698048722205e-06, + "loss": 0.3951, + "num_input_tokens_seen": 42614656, + "step": 13535 + }, + { + "epoch": 0.8667818961654183, + "grad_norm": 29.14250946044922, + "learning_rate": 4.918528408952338e-06, + "loss": 0.4872, + "num_input_tokens_seen": 42629504, + "step": 13540 + }, + { + "epoch": 0.8671019781063952, + "grad_norm": 22.198408126831055, + "learning_rate": 4.9183868922637446e-06, + "loss": 0.5433, + "num_input_tokens_seen": 42644224, + "step": 13545 + }, + { + "epoch": 0.8674220600473721, + "grad_norm": 26.059404373168945, + "learning_rate": 4.918245254813507e-06, + "loss": 0.359, + "num_input_tokens_seen": 42659584, + "step": 13550 + }, + { + "epoch": 0.867742141988349, + "grad_norm": 29.043962478637695, + "learning_rate": 4.918103496608698e-06, + "loss": 0.4165, + "num_input_tokens_seen": 42675776, + "step": 13555 + }, + { + "epoch": 0.8680622239293259, + "grad_norm": 26.36627769470215, + "learning_rate": 4.917961617656395e-06, + "loss": 0.5199, + "num_input_tokens_seen": 42693184, + "step": 13560 + }, + { + "epoch": 0.8683823058703029, + "grad_norm": 55.22026443481445, + "learning_rate": 4.917819617963684e-06, + "loss": 0.5467, + "num_input_tokens_seen": 42710784, + "step": 13565 + }, + { + "epoch": 0.8687023878112797, + "grad_norm": 40.350711822509766, + "learning_rate": 4.917677497537655e-06, + "loss": 0.5478, + "num_input_tokens_seen": 42727040, + "step": 13570 + }, + { + "epoch": 0.8690224697522566, + "grad_norm": 28.591360092163086, + "learning_rate": 4.917535256385405e-06, + "loss": 0.4198, + "num_input_tokens_seen": 42742208, + "step": 13575 + }, + { + "epoch": 0.8693425516932335, + "grad_norm": 19.032127380371094, + "learning_rate": 4.917392894514037e-06, + "loss": 0.464, + "num_input_tokens_seen": 42758464, + "step": 13580 + }, + { + "epoch": 0.8696626336342104, + "grad_norm": 59.290260314941406, + "learning_rate": 4.917250411930659e-06, + "loss": 0.5945, + "num_input_tokens_seen": 42774016, + "step": 13585 + }, + { + "epoch": 0.8699827155751872, + "grad_norm": 28.568464279174805, + "learning_rate": 4.917107808642387e-06, + "loss": 0.4659, + "num_input_tokens_seen": 42789248, + "step": 13590 + }, + { + "epoch": 0.8703027975161641, + "grad_norm": 20.477519989013672, + "learning_rate": 4.916965084656341e-06, + "loss": 0.4435, + "num_input_tokens_seen": 42804608, + "step": 13595 + }, + { + "epoch": 0.870622879457141, + "grad_norm": 30.901222229003906, + "learning_rate": 4.916822239979649e-06, + "loss": 0.5812, + "num_input_tokens_seen": 42819584, + "step": 13600 + }, + { + "epoch": 0.8709429613981179, + "grad_norm": 19.624187469482422, + "learning_rate": 4.916679274619442e-06, + "loss": 0.4933, + "num_input_tokens_seen": 42835200, + "step": 13605 + }, + { + "epoch": 0.8712630433390948, + "grad_norm": 37.41313934326172, + "learning_rate": 4.91653618858286e-06, + "loss": 0.4593, + "num_input_tokens_seen": 42852672, + "step": 13610 + }, + { + "epoch": 0.8715831252800716, + "grad_norm": 17.307376861572266, + "learning_rate": 4.916392981877048e-06, + "loss": 0.3882, + "num_input_tokens_seen": 42868672, + "step": 13615 + }, + { + "epoch": 0.8719032072210486, + "grad_norm": 42.932254791259766, + "learning_rate": 4.916249654509159e-06, + "loss": 0.5423, + "num_input_tokens_seen": 42884096, + "step": 13620 + }, + { + "epoch": 0.8722232891620255, + "grad_norm": 39.14038848876953, + "learning_rate": 4.916106206486346e-06, + "loss": 0.5687, + "num_input_tokens_seen": 42898752, + "step": 13625 + }, + { + "epoch": 0.8725433711030024, + "grad_norm": 19.00009536743164, + "learning_rate": 4.915962637815774e-06, + "loss": 0.632, + "num_input_tokens_seen": 42914688, + "step": 13630 + }, + { + "epoch": 0.8728634530439793, + "grad_norm": 27.598228454589844, + "learning_rate": 4.915818948504614e-06, + "loss": 0.4514, + "num_input_tokens_seen": 42930688, + "step": 13635 + }, + { + "epoch": 0.8731835349849562, + "grad_norm": 13.647698402404785, + "learning_rate": 4.9156751385600375e-06, + "loss": 0.5396, + "num_input_tokens_seen": 42947008, + "step": 13640 + }, + { + "epoch": 0.873503616925933, + "grad_norm": 14.717652320861816, + "learning_rate": 4.915531207989228e-06, + "loss": 0.6046, + "num_input_tokens_seen": 42962816, + "step": 13645 + }, + { + "epoch": 0.8738236988669099, + "grad_norm": 24.897993087768555, + "learning_rate": 4.915387156799371e-06, + "loss": 0.4804, + "num_input_tokens_seen": 42977664, + "step": 13650 + }, + { + "epoch": 0.8741437808078868, + "grad_norm": 52.85260772705078, + "learning_rate": 4.9152429849976625e-06, + "loss": 0.5684, + "num_input_tokens_seen": 42993472, + "step": 13655 + }, + { + "epoch": 0.8744638627488637, + "grad_norm": 30.04971694946289, + "learning_rate": 4.915098692591299e-06, + "loss": 0.581, + "num_input_tokens_seen": 43010688, + "step": 13660 + }, + { + "epoch": 0.8747839446898406, + "grad_norm": 31.44503402709961, + "learning_rate": 4.914954279587486e-06, + "loss": 0.4333, + "num_input_tokens_seen": 43027392, + "step": 13665 + }, + { + "epoch": 0.8751040266308175, + "grad_norm": 29.245370864868164, + "learning_rate": 4.914809745993437e-06, + "loss": 0.5035, + "num_input_tokens_seen": 43043008, + "step": 13670 + }, + { + "epoch": 0.8754241085717944, + "grad_norm": 29.41805648803711, + "learning_rate": 4.9146650918163655e-06, + "loss": 0.5147, + "num_input_tokens_seen": 43059072, + "step": 13675 + }, + { + "epoch": 0.8757441905127713, + "grad_norm": 39.111534118652344, + "learning_rate": 4.914520317063498e-06, + "loss": 0.4494, + "num_input_tokens_seen": 43074624, + "step": 13680 + }, + { + "epoch": 0.8760642724537482, + "grad_norm": 28.3315372467041, + "learning_rate": 4.914375421742062e-06, + "loss": 0.5185, + "num_input_tokens_seen": 43089536, + "step": 13685 + }, + { + "epoch": 0.8763843543947251, + "grad_norm": 24.2620849609375, + "learning_rate": 4.914230405859294e-06, + "loss": 0.5262, + "num_input_tokens_seen": 43104512, + "step": 13690 + }, + { + "epoch": 0.8767044363357019, + "grad_norm": 48.637752532958984, + "learning_rate": 4.914085269422435e-06, + "loss": 0.4122, + "num_input_tokens_seen": 43120640, + "step": 13695 + }, + { + "epoch": 0.8770245182766788, + "grad_norm": 20.070533752441406, + "learning_rate": 4.913940012438732e-06, + "loss": 0.5458, + "num_input_tokens_seen": 43136384, + "step": 13700 + }, + { + "epoch": 0.8773446002176557, + "grad_norm": 38.205162048339844, + "learning_rate": 4.9137946349154386e-06, + "loss": 0.4664, + "num_input_tokens_seen": 43153216, + "step": 13705 + }, + { + "epoch": 0.8776646821586326, + "grad_norm": 28.587425231933594, + "learning_rate": 4.913649136859814e-06, + "loss": 0.4954, + "num_input_tokens_seen": 43167936, + "step": 13710 + }, + { + "epoch": 0.8779847640996095, + "grad_norm": 25.78879165649414, + "learning_rate": 4.9135035182791235e-06, + "loss": 0.4044, + "num_input_tokens_seen": 43183872, + "step": 13715 + }, + { + "epoch": 0.8783048460405863, + "grad_norm": 23.328710556030273, + "learning_rate": 4.913357779180639e-06, + "loss": 0.6712, + "num_input_tokens_seen": 43200256, + "step": 13720 + }, + { + "epoch": 0.8786249279815633, + "grad_norm": 47.117061614990234, + "learning_rate": 4.9132119195716375e-06, + "loss": 0.5041, + "num_input_tokens_seen": 43217600, + "step": 13725 + }, + { + "epoch": 0.8789450099225402, + "grad_norm": 40.03658676147461, + "learning_rate": 4.9130659394594025e-06, + "loss": 0.4631, + "num_input_tokens_seen": 43233344, + "step": 13730 + }, + { + "epoch": 0.8792650918635171, + "grad_norm": 39.55255889892578, + "learning_rate": 4.912919838851224e-06, + "loss": 0.7038, + "num_input_tokens_seen": 43249280, + "step": 13735 + }, + { + "epoch": 0.879585173804494, + "grad_norm": 40.65654373168945, + "learning_rate": 4.912773617754398e-06, + "loss": 0.508, + "num_input_tokens_seen": 43265024, + "step": 13740 + }, + { + "epoch": 0.8799052557454708, + "grad_norm": 21.504850387573242, + "learning_rate": 4.912627276176224e-06, + "loss": 0.5256, + "num_input_tokens_seen": 43280576, + "step": 13745 + }, + { + "epoch": 0.8802253376864477, + "grad_norm": 21.506023406982422, + "learning_rate": 4.912480814124011e-06, + "loss": 0.451, + "num_input_tokens_seen": 43296064, + "step": 13750 + }, + { + "epoch": 0.8805454196274246, + "grad_norm": 22.782751083374023, + "learning_rate": 4.912334231605073e-06, + "loss": 0.5008, + "num_input_tokens_seen": 43311552, + "step": 13755 + }, + { + "epoch": 0.8808655015684015, + "grad_norm": 27.124351501464844, + "learning_rate": 4.912187528626729e-06, + "loss": 0.5462, + "num_input_tokens_seen": 43326272, + "step": 13760 + }, + { + "epoch": 0.8811855835093784, + "grad_norm": 42.058162689208984, + "learning_rate": 4.912040705196303e-06, + "loss": 0.452, + "num_input_tokens_seen": 43342592, + "step": 13765 + }, + { + "epoch": 0.8815056654503552, + "grad_norm": 20.77344512939453, + "learning_rate": 4.911893761321129e-06, + "loss": 0.3922, + "num_input_tokens_seen": 43357888, + "step": 13770 + }, + { + "epoch": 0.8818257473913321, + "grad_norm": 17.105632781982422, + "learning_rate": 4.911746697008543e-06, + "loss": 0.3391, + "num_input_tokens_seen": 43374272, + "step": 13775 + }, + { + "epoch": 0.8821458293323091, + "grad_norm": 40.84295654296875, + "learning_rate": 4.91159951226589e-06, + "loss": 0.3914, + "num_input_tokens_seen": 43393280, + "step": 13780 + }, + { + "epoch": 0.882465911273286, + "grad_norm": 10.729305267333984, + "learning_rate": 4.9114522071005185e-06, + "loss": 0.4902, + "num_input_tokens_seen": 43409600, + "step": 13785 + }, + { + "epoch": 0.8827859932142629, + "grad_norm": 30.652618408203125, + "learning_rate": 4.911304781519785e-06, + "loss": 0.5714, + "num_input_tokens_seen": 43424384, + "step": 13790 + }, + { + "epoch": 0.8831060751552398, + "grad_norm": 30.534679412841797, + "learning_rate": 4.911157235531051e-06, + "loss": 0.4055, + "num_input_tokens_seen": 43440128, + "step": 13795 + }, + { + "epoch": 0.8834261570962166, + "grad_norm": 29.33319664001465, + "learning_rate": 4.911009569141683e-06, + "loss": 0.578, + "num_input_tokens_seen": 43455168, + "step": 13800 + }, + { + "epoch": 0.8837462390371935, + "grad_norm": 35.25693130493164, + "learning_rate": 4.910861782359057e-06, + "loss": 0.4172, + "num_input_tokens_seen": 43472064, + "step": 13805 + }, + { + "epoch": 0.8840663209781704, + "grad_norm": 40.561859130859375, + "learning_rate": 4.910713875190549e-06, + "loss": 0.6041, + "num_input_tokens_seen": 43488000, + "step": 13810 + }, + { + "epoch": 0.8843864029191473, + "grad_norm": 22.874284744262695, + "learning_rate": 4.910565847643549e-06, + "loss": 0.4479, + "num_input_tokens_seen": 43503488, + "step": 13815 + }, + { + "epoch": 0.8847064848601242, + "grad_norm": 37.40224838256836, + "learning_rate": 4.910417699725446e-06, + "loss": 0.5161, + "num_input_tokens_seen": 43518336, + "step": 13820 + }, + { + "epoch": 0.885026566801101, + "grad_norm": 43.05880355834961, + "learning_rate": 4.910269431443639e-06, + "loss": 0.42, + "num_input_tokens_seen": 43532800, + "step": 13825 + }, + { + "epoch": 0.885346648742078, + "grad_norm": 19.310914993286133, + "learning_rate": 4.9101210428055306e-06, + "loss": 0.3951, + "num_input_tokens_seen": 43547648, + "step": 13830 + }, + { + "epoch": 0.8856667306830549, + "grad_norm": 30.862680435180664, + "learning_rate": 4.909972533818531e-06, + "loss": 0.4051, + "num_input_tokens_seen": 43562688, + "step": 13835 + }, + { + "epoch": 0.8859868126240318, + "grad_norm": 30.024288177490234, + "learning_rate": 4.909823904490057e-06, + "loss": 0.4491, + "num_input_tokens_seen": 43578176, + "step": 13840 + }, + { + "epoch": 0.8863068945650087, + "grad_norm": 28.19719886779785, + "learning_rate": 4.909675154827529e-06, + "loss": 0.4316, + "num_input_tokens_seen": 43593920, + "step": 13845 + }, + { + "epoch": 0.8866269765059855, + "grad_norm": 15.752680778503418, + "learning_rate": 4.909526284838375e-06, + "loss": 0.5023, + "num_input_tokens_seen": 43610944, + "step": 13850 + }, + { + "epoch": 0.8869470584469624, + "grad_norm": 24.6102237701416, + "learning_rate": 4.90937729453003e-06, + "loss": 0.5231, + "num_input_tokens_seen": 43627008, + "step": 13855 + }, + { + "epoch": 0.8872671403879393, + "grad_norm": 78.39057922363281, + "learning_rate": 4.909228183909932e-06, + "loss": 0.6215, + "num_input_tokens_seen": 43642752, + "step": 13860 + }, + { + "epoch": 0.8875872223289162, + "grad_norm": 15.736237525939941, + "learning_rate": 4.909078952985529e-06, + "loss": 0.416, + "num_input_tokens_seen": 43658496, + "step": 13865 + }, + { + "epoch": 0.8879073042698931, + "grad_norm": 38.207740783691406, + "learning_rate": 4.90892960176427e-06, + "loss": 0.5573, + "num_input_tokens_seen": 43674304, + "step": 13870 + }, + { + "epoch": 0.8882273862108699, + "grad_norm": 20.234508514404297, + "learning_rate": 4.908780130253615e-06, + "loss": 0.446, + "num_input_tokens_seen": 43689536, + "step": 13875 + }, + { + "epoch": 0.8885474681518468, + "grad_norm": 49.29079055786133, + "learning_rate": 4.908630538461027e-06, + "loss": 0.4856, + "num_input_tokens_seen": 43706496, + "step": 13880 + }, + { + "epoch": 0.8888675500928238, + "grad_norm": 13.962396621704102, + "learning_rate": 4.908480826393976e-06, + "loss": 0.378, + "num_input_tokens_seen": 43721408, + "step": 13885 + }, + { + "epoch": 0.8891876320338007, + "grad_norm": 27.218778610229492, + "learning_rate": 4.908330994059939e-06, + "loss": 0.5503, + "num_input_tokens_seen": 43737536, + "step": 13890 + }, + { + "epoch": 0.8895077139747776, + "grad_norm": 20.0692195892334, + "learning_rate": 4.908181041466396e-06, + "loss": 0.5514, + "num_input_tokens_seen": 43752640, + "step": 13895 + }, + { + "epoch": 0.8898277959157544, + "grad_norm": 36.2763557434082, + "learning_rate": 4.9080309686208344e-06, + "loss": 0.5013, + "num_input_tokens_seen": 43768384, + "step": 13900 + }, + { + "epoch": 0.8901478778567313, + "grad_norm": 33.243072509765625, + "learning_rate": 4.90788077553075e-06, + "loss": 0.3755, + "num_input_tokens_seen": 43784320, + "step": 13905 + }, + { + "epoch": 0.8904679597977082, + "grad_norm": 26.989826202392578, + "learning_rate": 4.907730462203642e-06, + "loss": 0.5875, + "num_input_tokens_seen": 43799232, + "step": 13910 + }, + { + "epoch": 0.8907880417386851, + "grad_norm": 42.340919494628906, + "learning_rate": 4.907580028647016e-06, + "loss": 0.5377, + "num_input_tokens_seen": 43815360, + "step": 13915 + }, + { + "epoch": 0.891108123679662, + "grad_norm": 25.303728103637695, + "learning_rate": 4.907429474868384e-06, + "loss": 0.4908, + "num_input_tokens_seen": 43830336, + "step": 13920 + }, + { + "epoch": 0.8914282056206388, + "grad_norm": 25.550262451171875, + "learning_rate": 4.9072788008752635e-06, + "loss": 0.484, + "num_input_tokens_seen": 43846656, + "step": 13925 + }, + { + "epoch": 0.8917482875616157, + "grad_norm": 29.423751831054688, + "learning_rate": 4.9071280066751794e-06, + "loss": 0.4801, + "num_input_tokens_seen": 43862720, + "step": 13930 + }, + { + "epoch": 0.8920683695025927, + "grad_norm": 23.896730422973633, + "learning_rate": 4.906977092275661e-06, + "loss": 0.4507, + "num_input_tokens_seen": 43876800, + "step": 13935 + }, + { + "epoch": 0.8923884514435696, + "grad_norm": 50.10870361328125, + "learning_rate": 4.9068260576842435e-06, + "loss": 0.3272, + "num_input_tokens_seen": 43892160, + "step": 13940 + }, + { + "epoch": 0.8927085333845465, + "grad_norm": 20.786771774291992, + "learning_rate": 4.90667490290847e-06, + "loss": 0.4564, + "num_input_tokens_seen": 43908416, + "step": 13945 + }, + { + "epoch": 0.8930286153255234, + "grad_norm": 32.080989837646484, + "learning_rate": 4.906523627955887e-06, + "loss": 0.4872, + "num_input_tokens_seen": 43923712, + "step": 13950 + }, + { + "epoch": 0.8933486972665002, + "grad_norm": 30.33047866821289, + "learning_rate": 4.90637223283405e-06, + "loss": 0.5561, + "num_input_tokens_seen": 43939520, + "step": 13955 + }, + { + "epoch": 0.8936687792074771, + "grad_norm": 36.767295837402344, + "learning_rate": 4.9062207175505174e-06, + "loss": 0.5394, + "num_input_tokens_seen": 43954688, + "step": 13960 + }, + { + "epoch": 0.893988861148454, + "grad_norm": 44.78806686401367, + "learning_rate": 4.906069082112856e-06, + "loss": 0.4483, + "num_input_tokens_seen": 43969600, + "step": 13965 + }, + { + "epoch": 0.8943089430894309, + "grad_norm": 62.88533020019531, + "learning_rate": 4.905917326528638e-06, + "loss": 0.4856, + "num_input_tokens_seen": 43985472, + "step": 13970 + }, + { + "epoch": 0.8946290250304078, + "grad_norm": 38.27035903930664, + "learning_rate": 4.905765450805439e-06, + "loss": 0.5622, + "num_input_tokens_seen": 44000768, + "step": 13975 + }, + { + "epoch": 0.8949491069713846, + "grad_norm": 39.34568786621094, + "learning_rate": 4.905613454950846e-06, + "loss": 0.689, + "num_input_tokens_seen": 44017088, + "step": 13980 + }, + { + "epoch": 0.8952691889123615, + "grad_norm": 25.83319854736328, + "learning_rate": 4.905461338972447e-06, + "loss": 0.493, + "num_input_tokens_seen": 44031488, + "step": 13985 + }, + { + "epoch": 0.8955892708533385, + "grad_norm": 18.637706756591797, + "learning_rate": 4.9053091028778375e-06, + "loss": 0.4007, + "num_input_tokens_seen": 44047296, + "step": 13990 + }, + { + "epoch": 0.8959093527943154, + "grad_norm": 19.747743606567383, + "learning_rate": 4.905156746674622e-06, + "loss": 0.4141, + "num_input_tokens_seen": 44063744, + "step": 13995 + }, + { + "epoch": 0.8962294347352923, + "grad_norm": 21.253217697143555, + "learning_rate": 4.905004270370405e-06, + "loss": 0.5588, + "num_input_tokens_seen": 44079168, + "step": 14000 + }, + { + "epoch": 0.8965495166762691, + "grad_norm": 24.03567123413086, + "learning_rate": 4.904851673972803e-06, + "loss": 0.4681, + "num_input_tokens_seen": 44093824, + "step": 14005 + }, + { + "epoch": 0.896869598617246, + "grad_norm": 23.275915145874023, + "learning_rate": 4.904698957489434e-06, + "loss": 0.6702, + "num_input_tokens_seen": 44111296, + "step": 14010 + }, + { + "epoch": 0.8971896805582229, + "grad_norm": 25.491289138793945, + "learning_rate": 4.904546120927925e-06, + "loss": 0.5934, + "num_input_tokens_seen": 44128000, + "step": 14015 + }, + { + "epoch": 0.8975097624991998, + "grad_norm": 19.939895629882812, + "learning_rate": 4.904393164295908e-06, + "loss": 0.438, + "num_input_tokens_seen": 44143488, + "step": 14020 + }, + { + "epoch": 0.8978298444401767, + "grad_norm": 48.81606674194336, + "learning_rate": 4.90424008760102e-06, + "loss": 0.4309, + "num_input_tokens_seen": 44158976, + "step": 14025 + }, + { + "epoch": 0.8981499263811535, + "grad_norm": 34.11859893798828, + "learning_rate": 4.904086890850905e-06, + "loss": 0.6214, + "num_input_tokens_seen": 44175808, + "step": 14030 + }, + { + "epoch": 0.8984700083221304, + "grad_norm": 25.234249114990234, + "learning_rate": 4.903933574053213e-06, + "loss": 0.4426, + "num_input_tokens_seen": 44190336, + "step": 14035 + }, + { + "epoch": 0.8987900902631074, + "grad_norm": 23.822786331176758, + "learning_rate": 4.9037801372156e-06, + "loss": 0.4952, + "num_input_tokens_seen": 44205696, + "step": 14040 + }, + { + "epoch": 0.8991101722040843, + "grad_norm": 22.487895965576172, + "learning_rate": 4.903626580345729e-06, + "loss": 0.5449, + "num_input_tokens_seen": 44222016, + "step": 14045 + }, + { + "epoch": 0.8994302541450612, + "grad_norm": 17.34914207458496, + "learning_rate": 4.9034729034512655e-06, + "loss": 0.494, + "num_input_tokens_seen": 44238592, + "step": 14050 + }, + { + "epoch": 0.899750336086038, + "grad_norm": 17.64523696899414, + "learning_rate": 4.903319106539884e-06, + "loss": 0.3467, + "num_input_tokens_seen": 44254016, + "step": 14055 + }, + { + "epoch": 0.9000704180270149, + "grad_norm": 29.87889862060547, + "learning_rate": 4.9031651896192655e-06, + "loss": 0.5265, + "num_input_tokens_seen": 44269376, + "step": 14060 + }, + { + "epoch": 0.9003904999679918, + "grad_norm": 32.22871780395508, + "learning_rate": 4.903011152697095e-06, + "loss": 0.6117, + "num_input_tokens_seen": 44284672, + "step": 14065 + }, + { + "epoch": 0.9007105819089687, + "grad_norm": 37.92398452758789, + "learning_rate": 4.902856995781064e-06, + "loss": 0.5628, + "num_input_tokens_seen": 44300224, + "step": 14070 + }, + { + "epoch": 0.9010306638499456, + "grad_norm": 42.109066009521484, + "learning_rate": 4.902702718878871e-06, + "loss": 0.6203, + "num_input_tokens_seen": 44315264, + "step": 14075 + }, + { + "epoch": 0.9013507457909224, + "grad_norm": 17.769067764282227, + "learning_rate": 4.902548321998219e-06, + "loss": 0.5193, + "num_input_tokens_seen": 44330176, + "step": 14080 + }, + { + "epoch": 0.9016708277318993, + "grad_norm": 27.142732620239258, + "learning_rate": 4.902393805146819e-06, + "loss": 0.5402, + "num_input_tokens_seen": 44345152, + "step": 14085 + }, + { + "epoch": 0.9019909096728762, + "grad_norm": 37.86245346069336, + "learning_rate": 4.902239168332386e-06, + "loss": 0.485, + "num_input_tokens_seen": 44361152, + "step": 14090 + }, + { + "epoch": 0.9023109916138532, + "grad_norm": 27.393285751342773, + "learning_rate": 4.902084411562641e-06, + "loss": 0.5202, + "num_input_tokens_seen": 44376128, + "step": 14095 + }, + { + "epoch": 0.9026310735548301, + "grad_norm": 37.82294845581055, + "learning_rate": 4.901929534845313e-06, + "loss": 0.4646, + "num_input_tokens_seen": 44392192, + "step": 14100 + }, + { + "epoch": 0.902951155495807, + "grad_norm": 34.11417007446289, + "learning_rate": 4.9017745381881345e-06, + "loss": 0.6618, + "num_input_tokens_seen": 44407680, + "step": 14105 + }, + { + "epoch": 0.9032712374367838, + "grad_norm": 18.987247467041016, + "learning_rate": 4.901619421598847e-06, + "loss": 0.5255, + "num_input_tokens_seen": 44424384, + "step": 14110 + }, + { + "epoch": 0.9035913193777607, + "grad_norm": 52.68825912475586, + "learning_rate": 4.901464185085194e-06, + "loss": 0.485, + "num_input_tokens_seen": 44439744, + "step": 14115 + }, + { + "epoch": 0.9039114013187376, + "grad_norm": 26.87746810913086, + "learning_rate": 4.9013088286549295e-06, + "loss": 0.515, + "num_input_tokens_seen": 44454272, + "step": 14120 + }, + { + "epoch": 0.9042314832597145, + "grad_norm": 37.02253341674805, + "learning_rate": 4.901153352315809e-06, + "loss": 0.4794, + "num_input_tokens_seen": 44468992, + "step": 14125 + }, + { + "epoch": 0.9045515652006914, + "grad_norm": 36.252967834472656, + "learning_rate": 4.900997756075599e-06, + "loss": 0.4356, + "num_input_tokens_seen": 44484864, + "step": 14130 + }, + { + "epoch": 0.9048716471416682, + "grad_norm": 18.981725692749023, + "learning_rate": 4.900842039942065e-06, + "loss": 0.4222, + "num_input_tokens_seen": 44499968, + "step": 14135 + }, + { + "epoch": 0.9051917290826451, + "grad_norm": 32.22833251953125, + "learning_rate": 4.900686203922986e-06, + "loss": 0.5553, + "num_input_tokens_seen": 44517952, + "step": 14140 + }, + { + "epoch": 0.905511811023622, + "grad_norm": 15.709972381591797, + "learning_rate": 4.900530248026143e-06, + "loss": 0.399, + "num_input_tokens_seen": 44532928, + "step": 14145 + }, + { + "epoch": 0.905831892964599, + "grad_norm": 26.67451286315918, + "learning_rate": 4.900374172259324e-06, + "loss": 0.4205, + "num_input_tokens_seen": 44548288, + "step": 14150 + }, + { + "epoch": 0.9061519749055759, + "grad_norm": 21.399648666381836, + "learning_rate": 4.900217976630321e-06, + "loss": 0.4555, + "num_input_tokens_seen": 44563712, + "step": 14155 + }, + { + "epoch": 0.9064720568465527, + "grad_norm": 32.53878402709961, + "learning_rate": 4.9000616611469344e-06, + "loss": 0.4886, + "num_input_tokens_seen": 44579264, + "step": 14160 + }, + { + "epoch": 0.9067921387875296, + "grad_norm": 36.08686828613281, + "learning_rate": 4.89990522581697e-06, + "loss": 0.3468, + "num_input_tokens_seen": 44594176, + "step": 14165 + }, + { + "epoch": 0.9071122207285065, + "grad_norm": 21.341764450073242, + "learning_rate": 4.89974867064824e-06, + "loss": 0.516, + "num_input_tokens_seen": 44609664, + "step": 14170 + }, + { + "epoch": 0.9074323026694834, + "grad_norm": 25.244583129882812, + "learning_rate": 4.899591995648561e-06, + "loss": 0.5396, + "num_input_tokens_seen": 44626944, + "step": 14175 + }, + { + "epoch": 0.9077523846104603, + "grad_norm": 19.96651268005371, + "learning_rate": 4.899435200825756e-06, + "loss": 0.3364, + "num_input_tokens_seen": 44643520, + "step": 14180 + }, + { + "epoch": 0.9080724665514371, + "grad_norm": 53.98974609375, + "learning_rate": 4.899278286187656e-06, + "loss": 0.5203, + "num_input_tokens_seen": 44657984, + "step": 14185 + }, + { + "epoch": 0.908392548492414, + "grad_norm": 31.047040939331055, + "learning_rate": 4.899121251742095e-06, + "loss": 0.5656, + "num_input_tokens_seen": 44673024, + "step": 14190 + }, + { + "epoch": 0.9087126304333909, + "grad_norm": 37.90438461303711, + "learning_rate": 4.898964097496917e-06, + "loss": 0.5394, + "num_input_tokens_seen": 44687424, + "step": 14195 + }, + { + "epoch": 0.9090327123743679, + "grad_norm": 39.93819046020508, + "learning_rate": 4.898806823459966e-06, + "loss": 0.4064, + "num_input_tokens_seen": 44702976, + "step": 14200 + }, + { + "epoch": 0.9093527943153448, + "grad_norm": 32.97382736206055, + "learning_rate": 4.898649429639097e-06, + "loss": 0.5029, + "num_input_tokens_seen": 44718144, + "step": 14205 + }, + { + "epoch": 0.9096728762563216, + "grad_norm": 26.624399185180664, + "learning_rate": 4.898491916042171e-06, + "loss": 0.4381, + "num_input_tokens_seen": 44734912, + "step": 14210 + }, + { + "epoch": 0.9099929581972985, + "grad_norm": 25.22661781311035, + "learning_rate": 4.898334282677051e-06, + "loss": 0.4233, + "num_input_tokens_seen": 44749888, + "step": 14215 + }, + { + "epoch": 0.9103130401382754, + "grad_norm": 18.826391220092773, + "learning_rate": 4.898176529551609e-06, + "loss": 0.5408, + "num_input_tokens_seen": 44764544, + "step": 14220 + }, + { + "epoch": 0.9106331220792523, + "grad_norm": 47.150115966796875, + "learning_rate": 4.898018656673724e-06, + "loss": 0.3792, + "num_input_tokens_seen": 44780992, + "step": 14225 + }, + { + "epoch": 0.9109532040202292, + "grad_norm": 31.791690826416016, + "learning_rate": 4.8978606640512774e-06, + "loss": 0.589, + "num_input_tokens_seen": 44796672, + "step": 14230 + }, + { + "epoch": 0.911273285961206, + "grad_norm": 21.559505462646484, + "learning_rate": 4.897702551692159e-06, + "loss": 0.5243, + "num_input_tokens_seen": 44812224, + "step": 14235 + }, + { + "epoch": 0.9115933679021829, + "grad_norm": 29.585309982299805, + "learning_rate": 4.897544319604265e-06, + "loss": 0.4864, + "num_input_tokens_seen": 44827136, + "step": 14240 + }, + { + "epoch": 0.9119134498431598, + "grad_norm": 17.683839797973633, + "learning_rate": 4.897385967795496e-06, + "loss": 0.5151, + "num_input_tokens_seen": 44842176, + "step": 14245 + }, + { + "epoch": 0.9122335317841367, + "grad_norm": 36.18199920654297, + "learning_rate": 4.897227496273759e-06, + "loss": 0.6346, + "num_input_tokens_seen": 44858880, + "step": 14250 + }, + { + "epoch": 0.9125536137251137, + "grad_norm": 23.900527954101562, + "learning_rate": 4.897068905046967e-06, + "loss": 0.562, + "num_input_tokens_seen": 44875328, + "step": 14255 + }, + { + "epoch": 0.9128736956660906, + "grad_norm": 23.37824058532715, + "learning_rate": 4.896910194123041e-06, + "loss": 0.4998, + "num_input_tokens_seen": 44892032, + "step": 14260 + }, + { + "epoch": 0.9131937776070674, + "grad_norm": 19.152633666992188, + "learning_rate": 4.896751363509904e-06, + "loss": 0.4398, + "num_input_tokens_seen": 44907328, + "step": 14265 + }, + { + "epoch": 0.9135138595480443, + "grad_norm": 27.099348068237305, + "learning_rate": 4.896592413215489e-06, + "loss": 0.377, + "num_input_tokens_seen": 44922560, + "step": 14270 + }, + { + "epoch": 0.9138339414890212, + "grad_norm": 22.929664611816406, + "learning_rate": 4.8964333432477315e-06, + "loss": 0.5012, + "num_input_tokens_seen": 44937728, + "step": 14275 + }, + { + "epoch": 0.9141540234299981, + "grad_norm": 37.90034103393555, + "learning_rate": 4.8962741536145755e-06, + "loss": 0.5723, + "num_input_tokens_seen": 44954560, + "step": 14280 + }, + { + "epoch": 0.914474105370975, + "grad_norm": 23.69683074951172, + "learning_rate": 4.89611484432397e-06, + "loss": 0.5088, + "num_input_tokens_seen": 44971520, + "step": 14285 + }, + { + "epoch": 0.9147941873119518, + "grad_norm": 45.28712844848633, + "learning_rate": 4.89595541538387e-06, + "loss": 0.6032, + "num_input_tokens_seen": 44987392, + "step": 14290 + }, + { + "epoch": 0.9151142692529287, + "grad_norm": 32.548545837402344, + "learning_rate": 4.8957958668022374e-06, + "loss": 0.4007, + "num_input_tokens_seen": 45002688, + "step": 14295 + }, + { + "epoch": 0.9154343511939056, + "grad_norm": 39.89044189453125, + "learning_rate": 4.8956361985870374e-06, + "loss": 0.4718, + "num_input_tokens_seen": 45017792, + "step": 14300 + }, + { + "epoch": 0.9157544331348826, + "grad_norm": 33.6307373046875, + "learning_rate": 4.895476410746245e-06, + "loss": 0.5366, + "num_input_tokens_seen": 45032640, + "step": 14305 + }, + { + "epoch": 0.9160745150758595, + "grad_norm": 26.945533752441406, + "learning_rate": 4.895316503287837e-06, + "loss": 0.4542, + "num_input_tokens_seen": 45048256, + "step": 14310 + }, + { + "epoch": 0.9163945970168363, + "grad_norm": 26.089040756225586, + "learning_rate": 4.8951564762198e-06, + "loss": 0.536, + "num_input_tokens_seen": 45064192, + "step": 14315 + }, + { + "epoch": 0.9167146789578132, + "grad_norm": 19.46035385131836, + "learning_rate": 4.894996329550125e-06, + "loss": 0.447, + "num_input_tokens_seen": 45079488, + "step": 14320 + }, + { + "epoch": 0.9170347608987901, + "grad_norm": 17.08335304260254, + "learning_rate": 4.894836063286809e-06, + "loss": 0.5554, + "num_input_tokens_seen": 45095616, + "step": 14325 + }, + { + "epoch": 0.917354842839767, + "grad_norm": 29.120027542114258, + "learning_rate": 4.894675677437853e-06, + "loss": 0.5817, + "num_input_tokens_seen": 45110592, + "step": 14330 + }, + { + "epoch": 0.9176749247807439, + "grad_norm": 37.060794830322266, + "learning_rate": 4.894515172011268e-06, + "loss": 0.5129, + "num_input_tokens_seen": 45127168, + "step": 14335 + }, + { + "epoch": 0.9179950067217207, + "grad_norm": 28.194604873657227, + "learning_rate": 4.894354547015067e-06, + "loss": 0.4606, + "num_input_tokens_seen": 45142208, + "step": 14340 + }, + { + "epoch": 0.9183150886626976, + "grad_norm": 58.32842254638672, + "learning_rate": 4.894193802457272e-06, + "loss": 0.5549, + "num_input_tokens_seen": 45157184, + "step": 14345 + }, + { + "epoch": 0.9186351706036745, + "grad_norm": 17.933534622192383, + "learning_rate": 4.8940329383459095e-06, + "loss": 0.5885, + "num_input_tokens_seen": 45174464, + "step": 14350 + }, + { + "epoch": 0.9189552525446514, + "grad_norm": 28.26095199584961, + "learning_rate": 4.8938719546890126e-06, + "loss": 0.4462, + "num_input_tokens_seen": 45188992, + "step": 14355 + }, + { + "epoch": 0.9192753344856284, + "grad_norm": 22.145668029785156, + "learning_rate": 4.893710851494619e-06, + "loss": 0.6097, + "num_input_tokens_seen": 45204032, + "step": 14360 + }, + { + "epoch": 0.9195954164266052, + "grad_norm": 48.080177307128906, + "learning_rate": 4.8935496287707736e-06, + "loss": 0.6409, + "num_input_tokens_seen": 45219328, + "step": 14365 + }, + { + "epoch": 0.9199154983675821, + "grad_norm": 25.87553596496582, + "learning_rate": 4.893388286525528e-06, + "loss": 0.5678, + "num_input_tokens_seen": 45235584, + "step": 14370 + }, + { + "epoch": 0.920235580308559, + "grad_norm": 41.69355773925781, + "learning_rate": 4.8932268247669366e-06, + "loss": 0.4068, + "num_input_tokens_seen": 45250880, + "step": 14375 + }, + { + "epoch": 0.9205556622495359, + "grad_norm": 26.638471603393555, + "learning_rate": 4.893065243503065e-06, + "loss": 0.549, + "num_input_tokens_seen": 45266752, + "step": 14380 + }, + { + "epoch": 0.9208757441905128, + "grad_norm": 14.011316299438477, + "learning_rate": 4.892903542741979e-06, + "loss": 0.4711, + "num_input_tokens_seen": 45282496, + "step": 14385 + }, + { + "epoch": 0.9211958261314896, + "grad_norm": 25.446640014648438, + "learning_rate": 4.892741722491755e-06, + "loss": 0.4715, + "num_input_tokens_seen": 45297024, + "step": 14390 + }, + { + "epoch": 0.9215159080724665, + "grad_norm": 47.60031509399414, + "learning_rate": 4.892579782760472e-06, + "loss": 0.4262, + "num_input_tokens_seen": 45314176, + "step": 14395 + }, + { + "epoch": 0.9218359900134434, + "grad_norm": 26.948062896728516, + "learning_rate": 4.8924177235562186e-06, + "loss": 0.4715, + "num_input_tokens_seen": 45329344, + "step": 14400 + }, + { + "epoch": 0.9221560719544203, + "grad_norm": 17.333749771118164, + "learning_rate": 4.892255544887084e-06, + "loss": 0.4856, + "num_input_tokens_seen": 45344384, + "step": 14405 + }, + { + "epoch": 0.9224761538953972, + "grad_norm": 17.389209747314453, + "learning_rate": 4.8920932467611705e-06, + "loss": 0.4845, + "num_input_tokens_seen": 45360192, + "step": 14410 + }, + { + "epoch": 0.9227962358363742, + "grad_norm": 17.73093032836914, + "learning_rate": 4.891930829186579e-06, + "loss": 0.455, + "num_input_tokens_seen": 45376000, + "step": 14415 + }, + { + "epoch": 0.923116317777351, + "grad_norm": 26.76286506652832, + "learning_rate": 4.89176829217142e-06, + "loss": 0.6976, + "num_input_tokens_seen": 45390400, + "step": 14420 + }, + { + "epoch": 0.9234363997183279, + "grad_norm": 32.58744430541992, + "learning_rate": 4.891605635723812e-06, + "loss": 0.597, + "num_input_tokens_seen": 45406976, + "step": 14425 + }, + { + "epoch": 0.9237564816593048, + "grad_norm": 37.18525314331055, + "learning_rate": 4.891442859851876e-06, + "loss": 0.4891, + "num_input_tokens_seen": 45422080, + "step": 14430 + }, + { + "epoch": 0.9240765636002817, + "grad_norm": 43.51406478881836, + "learning_rate": 4.891279964563739e-06, + "loss": 0.4595, + "num_input_tokens_seen": 45439296, + "step": 14435 + }, + { + "epoch": 0.9243966455412586, + "grad_norm": 26.862491607666016, + "learning_rate": 4.891116949867537e-06, + "loss": 0.3903, + "num_input_tokens_seen": 45454208, + "step": 14440 + }, + { + "epoch": 0.9247167274822354, + "grad_norm": 40.28186798095703, + "learning_rate": 4.89095381577141e-06, + "loss": 0.4345, + "num_input_tokens_seen": 45470400, + "step": 14445 + }, + { + "epoch": 0.9250368094232123, + "grad_norm": 28.613534927368164, + "learning_rate": 4.890790562283503e-06, + "loss": 0.5197, + "num_input_tokens_seen": 45486528, + "step": 14450 + }, + { + "epoch": 0.9253568913641892, + "grad_norm": 21.444284439086914, + "learning_rate": 4.8906271894119685e-06, + "loss": 0.4466, + "num_input_tokens_seen": 45501440, + "step": 14455 + }, + { + "epoch": 0.9256769733051661, + "grad_norm": 46.7179069519043, + "learning_rate": 4.890463697164965e-06, + "loss": 0.4898, + "num_input_tokens_seen": 45517760, + "step": 14460 + }, + { + "epoch": 0.9259970552461431, + "grad_norm": 23.454675674438477, + "learning_rate": 4.890300085550654e-06, + "loss": 0.4846, + "num_input_tokens_seen": 45533056, + "step": 14465 + }, + { + "epoch": 0.9263171371871199, + "grad_norm": 24.203632354736328, + "learning_rate": 4.890136354577209e-06, + "loss": 0.477, + "num_input_tokens_seen": 45549248, + "step": 14470 + }, + { + "epoch": 0.9266372191280968, + "grad_norm": 20.70198631286621, + "learning_rate": 4.889972504252804e-06, + "loss": 0.3856, + "num_input_tokens_seen": 45565760, + "step": 14475 + }, + { + "epoch": 0.9269573010690737, + "grad_norm": 21.579423904418945, + "learning_rate": 4.889808534585621e-06, + "loss": 0.519, + "num_input_tokens_seen": 45581568, + "step": 14480 + }, + { + "epoch": 0.9272773830100506, + "grad_norm": 34.17148208618164, + "learning_rate": 4.889644445583848e-06, + "loss": 0.6125, + "num_input_tokens_seen": 45596928, + "step": 14485 + }, + { + "epoch": 0.9275974649510275, + "grad_norm": 41.836299896240234, + "learning_rate": 4.889480237255678e-06, + "loss": 0.486, + "num_input_tokens_seen": 45612800, + "step": 14490 + }, + { + "epoch": 0.9279175468920043, + "grad_norm": 38.52415466308594, + "learning_rate": 4.889315909609311e-06, + "loss": 0.5381, + "num_input_tokens_seen": 45627584, + "step": 14495 + }, + { + "epoch": 0.9282376288329812, + "grad_norm": 23.385683059692383, + "learning_rate": 4.889151462652955e-06, + "loss": 0.4355, + "num_input_tokens_seen": 45643840, + "step": 14500 + }, + { + "epoch": 0.9285577107739581, + "grad_norm": 31.85991859436035, + "learning_rate": 4.888986896394817e-06, + "loss": 0.4318, + "num_input_tokens_seen": 45659072, + "step": 14505 + }, + { + "epoch": 0.928877792714935, + "grad_norm": 30.94779396057129, + "learning_rate": 4.888822210843119e-06, + "loss": 0.7709, + "num_input_tokens_seen": 45675328, + "step": 14510 + }, + { + "epoch": 0.9291978746559119, + "grad_norm": 52.87501907348633, + "learning_rate": 4.888657406006082e-06, + "loss": 0.5242, + "num_input_tokens_seen": 45690816, + "step": 14515 + }, + { + "epoch": 0.9295179565968889, + "grad_norm": 16.781530380249023, + "learning_rate": 4.888492481891937e-06, + "loss": 0.3906, + "num_input_tokens_seen": 45706432, + "step": 14520 + }, + { + "epoch": 0.9298380385378657, + "grad_norm": 36.27010726928711, + "learning_rate": 4.888327438508918e-06, + "loss": 0.4153, + "num_input_tokens_seen": 45721920, + "step": 14525 + }, + { + "epoch": 0.9301581204788426, + "grad_norm": 29.505386352539062, + "learning_rate": 4.888162275865267e-06, + "loss": 0.402, + "num_input_tokens_seen": 45738048, + "step": 14530 + }, + { + "epoch": 0.9304782024198195, + "grad_norm": 12.105571746826172, + "learning_rate": 4.887996993969231e-06, + "loss": 0.3587, + "num_input_tokens_seen": 45753856, + "step": 14535 + }, + { + "epoch": 0.9307982843607964, + "grad_norm": 15.666716575622559, + "learning_rate": 4.887831592829064e-06, + "loss": 0.5039, + "num_input_tokens_seen": 45768704, + "step": 14540 + }, + { + "epoch": 0.9311183663017732, + "grad_norm": 17.499473571777344, + "learning_rate": 4.887666072453026e-06, + "loss": 0.3943, + "num_input_tokens_seen": 45783936, + "step": 14545 + }, + { + "epoch": 0.9314384482427501, + "grad_norm": 49.778778076171875, + "learning_rate": 4.887500432849379e-06, + "loss": 0.6048, + "num_input_tokens_seen": 45800320, + "step": 14550 + }, + { + "epoch": 0.931758530183727, + "grad_norm": 28.577571868896484, + "learning_rate": 4.887334674026399e-06, + "loss": 0.4135, + "num_input_tokens_seen": 45816512, + "step": 14555 + }, + { + "epoch": 0.9320786121247039, + "grad_norm": 26.46240997314453, + "learning_rate": 4.887168795992359e-06, + "loss": 0.4462, + "num_input_tokens_seen": 45831936, + "step": 14560 + }, + { + "epoch": 0.9323986940656808, + "grad_norm": 19.787797927856445, + "learning_rate": 4.887002798755544e-06, + "loss": 0.503, + "num_input_tokens_seen": 45846784, + "step": 14565 + }, + { + "epoch": 0.9327187760066578, + "grad_norm": 45.143035888671875, + "learning_rate": 4.886836682324243e-06, + "loss": 0.5613, + "num_input_tokens_seen": 45862080, + "step": 14570 + }, + { + "epoch": 0.9330388579476346, + "grad_norm": 20.82832908630371, + "learning_rate": 4.886670446706751e-06, + "loss": 0.5538, + "num_input_tokens_seen": 45878528, + "step": 14575 + }, + { + "epoch": 0.9333589398886115, + "grad_norm": 33.63450622558594, + "learning_rate": 4.886504091911367e-06, + "loss": 0.5014, + "num_input_tokens_seen": 45894016, + "step": 14580 + }, + { + "epoch": 0.9336790218295884, + "grad_norm": 34.04465866088867, + "learning_rate": 4.886337617946401e-06, + "loss": 0.5506, + "num_input_tokens_seen": 45909504, + "step": 14585 + }, + { + "epoch": 0.9339991037705653, + "grad_norm": 50.13507843017578, + "learning_rate": 4.886171024820163e-06, + "loss": 0.5049, + "num_input_tokens_seen": 45925376, + "step": 14590 + }, + { + "epoch": 0.9343191857115422, + "grad_norm": 40.674259185791016, + "learning_rate": 4.886004312540974e-06, + "loss": 0.6034, + "num_input_tokens_seen": 45940224, + "step": 14595 + }, + { + "epoch": 0.934639267652519, + "grad_norm": 18.404634475708008, + "learning_rate": 4.885837481117158e-06, + "loss": 0.4263, + "num_input_tokens_seen": 45955072, + "step": 14600 + }, + { + "epoch": 0.9349593495934959, + "grad_norm": 34.00730895996094, + "learning_rate": 4.885670530557046e-06, + "loss": 0.4194, + "num_input_tokens_seen": 45970240, + "step": 14605 + }, + { + "epoch": 0.9352794315344728, + "grad_norm": 42.983123779296875, + "learning_rate": 4.885503460868973e-06, + "loss": 0.8094, + "num_input_tokens_seen": 45985856, + "step": 14610 + }, + { + "epoch": 0.9355995134754497, + "grad_norm": 23.464391708374023, + "learning_rate": 4.8853362720612844e-06, + "loss": 0.4448, + "num_input_tokens_seen": 46000256, + "step": 14615 + }, + { + "epoch": 0.9359195954164266, + "grad_norm": 22.114858627319336, + "learning_rate": 4.885168964142326e-06, + "loss": 0.4281, + "num_input_tokens_seen": 46015616, + "step": 14620 + }, + { + "epoch": 0.9362396773574035, + "grad_norm": 21.224071502685547, + "learning_rate": 4.885001537120454e-06, + "loss": 0.4017, + "num_input_tokens_seen": 46031040, + "step": 14625 + }, + { + "epoch": 0.9365597592983804, + "grad_norm": 34.89697265625, + "learning_rate": 4.884833991004027e-06, + "loss": 0.4725, + "num_input_tokens_seen": 46046016, + "step": 14630 + }, + { + "epoch": 0.9368798412393573, + "grad_norm": 29.258129119873047, + "learning_rate": 4.884666325801414e-06, + "loss": 0.4231, + "num_input_tokens_seen": 46062528, + "step": 14635 + }, + { + "epoch": 0.9371999231803342, + "grad_norm": 19.088459014892578, + "learning_rate": 4.884498541520985e-06, + "loss": 0.4505, + "num_input_tokens_seen": 46078144, + "step": 14640 + }, + { + "epoch": 0.9375200051213111, + "grad_norm": 62.39804458618164, + "learning_rate": 4.88433063817112e-06, + "loss": 0.6019, + "num_input_tokens_seen": 46096896, + "step": 14645 + }, + { + "epoch": 0.9378400870622879, + "grad_norm": 31.45743179321289, + "learning_rate": 4.884162615760202e-06, + "loss": 0.4503, + "num_input_tokens_seen": 46111808, + "step": 14650 + }, + { + "epoch": 0.9381601690032648, + "grad_norm": 25.911813735961914, + "learning_rate": 4.883994474296622e-06, + "loss": 0.4946, + "num_input_tokens_seen": 46127936, + "step": 14655 + }, + { + "epoch": 0.9384802509442417, + "grad_norm": 22.902437210083008, + "learning_rate": 4.883826213788775e-06, + "loss": 0.4874, + "num_input_tokens_seen": 46142848, + "step": 14660 + }, + { + "epoch": 0.9388003328852186, + "grad_norm": 31.40627670288086, + "learning_rate": 4.883657834245065e-06, + "loss": 0.7442, + "num_input_tokens_seen": 46158848, + "step": 14665 + }, + { + "epoch": 0.9391204148261955, + "grad_norm": 36.51689147949219, + "learning_rate": 4.883489335673898e-06, + "loss": 0.5611, + "num_input_tokens_seen": 46174912, + "step": 14670 + }, + { + "epoch": 0.9394404967671725, + "grad_norm": 16.68758201599121, + "learning_rate": 4.883320718083688e-06, + "loss": 0.4503, + "num_input_tokens_seen": 46189248, + "step": 14675 + }, + { + "epoch": 0.9397605787081493, + "grad_norm": 29.165836334228516, + "learning_rate": 4.8831519814828565e-06, + "loss": 0.4028, + "num_input_tokens_seen": 46204928, + "step": 14680 + }, + { + "epoch": 0.9400806606491262, + "grad_norm": 30.18831443786621, + "learning_rate": 4.8829831258798285e-06, + "loss": 0.4413, + "num_input_tokens_seen": 46220160, + "step": 14685 + }, + { + "epoch": 0.9404007425901031, + "grad_norm": 23.1508731842041, + "learning_rate": 4.8828141512830355e-06, + "loss": 0.5157, + "num_input_tokens_seen": 46235584, + "step": 14690 + }, + { + "epoch": 0.94072082453108, + "grad_norm": 21.167736053466797, + "learning_rate": 4.8826450577009154e-06, + "loss": 0.3311, + "num_input_tokens_seen": 46251904, + "step": 14695 + }, + { + "epoch": 0.9410409064720568, + "grad_norm": 34.590450286865234, + "learning_rate": 4.882475845141912e-06, + "loss": 0.498, + "num_input_tokens_seen": 46268032, + "step": 14700 + }, + { + "epoch": 0.9413609884130337, + "grad_norm": 41.922332763671875, + "learning_rate": 4.882306513614474e-06, + "loss": 0.4591, + "num_input_tokens_seen": 46283392, + "step": 14705 + }, + { + "epoch": 0.9416810703540106, + "grad_norm": 22.766149520874023, + "learning_rate": 4.882137063127059e-06, + "loss": 0.4144, + "num_input_tokens_seen": 46298752, + "step": 14710 + }, + { + "epoch": 0.9420011522949875, + "grad_norm": 25.683002471923828, + "learning_rate": 4.881967493688126e-06, + "loss": 0.3803, + "num_input_tokens_seen": 46314624, + "step": 14715 + }, + { + "epoch": 0.9423212342359644, + "grad_norm": 25.721643447875977, + "learning_rate": 4.881797805306144e-06, + "loss": 0.4852, + "num_input_tokens_seen": 46331712, + "step": 14720 + }, + { + "epoch": 0.9426413161769412, + "grad_norm": 51.51065444946289, + "learning_rate": 4.881627997989587e-06, + "loss": 0.5661, + "num_input_tokens_seen": 46347200, + "step": 14725 + }, + { + "epoch": 0.9429613981179182, + "grad_norm": 22.58102035522461, + "learning_rate": 4.881458071746932e-06, + "loss": 0.4971, + "num_input_tokens_seen": 46363008, + "step": 14730 + }, + { + "epoch": 0.9432814800588951, + "grad_norm": 37.09565353393555, + "learning_rate": 4.881288026586665e-06, + "loss": 0.4977, + "num_input_tokens_seen": 46378816, + "step": 14735 + }, + { + "epoch": 0.943601561999872, + "grad_norm": 14.81660270690918, + "learning_rate": 4.881117862517278e-06, + "loss": 0.3572, + "num_input_tokens_seen": 46395200, + "step": 14740 + }, + { + "epoch": 0.9439216439408489, + "grad_norm": 40.24591064453125, + "learning_rate": 4.880947579547268e-06, + "loss": 0.4358, + "num_input_tokens_seen": 46411840, + "step": 14745 + }, + { + "epoch": 0.9442417258818258, + "grad_norm": 35.51625442504883, + "learning_rate": 4.880777177685138e-06, + "loss": 0.6732, + "num_input_tokens_seen": 46427776, + "step": 14750 + }, + { + "epoch": 0.9445618078228026, + "grad_norm": 70.6446533203125, + "learning_rate": 4.880606656939397e-06, + "loss": 0.58, + "num_input_tokens_seen": 46444736, + "step": 14755 + }, + { + "epoch": 0.9448818897637795, + "grad_norm": 21.513286590576172, + "learning_rate": 4.880436017318559e-06, + "loss": 0.5092, + "num_input_tokens_seen": 46460672, + "step": 14760 + }, + { + "epoch": 0.9452019717047564, + "grad_norm": 15.545209884643555, + "learning_rate": 4.8802652588311465e-06, + "loss": 0.4399, + "num_input_tokens_seen": 46476736, + "step": 14765 + }, + { + "epoch": 0.9455220536457333, + "grad_norm": 26.899688720703125, + "learning_rate": 4.8800943814856835e-06, + "loss": 0.5088, + "num_input_tokens_seen": 46492416, + "step": 14770 + }, + { + "epoch": 0.9458421355867102, + "grad_norm": 32.293548583984375, + "learning_rate": 4.879923385290706e-06, + "loss": 0.3932, + "num_input_tokens_seen": 46507264, + "step": 14775 + }, + { + "epoch": 0.946162217527687, + "grad_norm": 26.700654983520508, + "learning_rate": 4.879752270254751e-06, + "loss": 0.4766, + "num_input_tokens_seen": 46524224, + "step": 14780 + }, + { + "epoch": 0.946482299468664, + "grad_norm": 35.54378128051758, + "learning_rate": 4.879581036386363e-06, + "loss": 0.6264, + "num_input_tokens_seen": 46539456, + "step": 14785 + }, + { + "epoch": 0.9468023814096409, + "grad_norm": 18.802343368530273, + "learning_rate": 4.8794096836940926e-06, + "loss": 0.4624, + "num_input_tokens_seen": 46555136, + "step": 14790 + }, + { + "epoch": 0.9471224633506178, + "grad_norm": 25.095474243164062, + "learning_rate": 4.879238212186498e-06, + "loss": 0.4513, + "num_input_tokens_seen": 46570432, + "step": 14795 + }, + { + "epoch": 0.9474425452915947, + "grad_norm": 24.524484634399414, + "learning_rate": 4.8790666218721385e-06, + "loss": 0.4042, + "num_input_tokens_seen": 46586304, + "step": 14800 + }, + { + "epoch": 0.9477626272325715, + "grad_norm": 34.6104736328125, + "learning_rate": 4.878894912759584e-06, + "loss": 0.4828, + "num_input_tokens_seen": 46602432, + "step": 14805 + }, + { + "epoch": 0.9480827091735484, + "grad_norm": 29.66895866394043, + "learning_rate": 4.87872308485741e-06, + "loss": 0.4343, + "num_input_tokens_seen": 46617472, + "step": 14810 + }, + { + "epoch": 0.9484027911145253, + "grad_norm": 27.422914505004883, + "learning_rate": 4.878551138174195e-06, + "loss": 0.5112, + "num_input_tokens_seen": 46632896, + "step": 14815 + }, + { + "epoch": 0.9487228730555022, + "grad_norm": 19.85472869873047, + "learning_rate": 4.8783790727185246e-06, + "loss": 0.3885, + "num_input_tokens_seen": 46649408, + "step": 14820 + }, + { + "epoch": 0.9490429549964791, + "grad_norm": 49.1286735534668, + "learning_rate": 4.878206888498993e-06, + "loss": 0.4176, + "num_input_tokens_seen": 46665344, + "step": 14825 + }, + { + "epoch": 0.9493630369374559, + "grad_norm": 33.42776870727539, + "learning_rate": 4.878034585524196e-06, + "loss": 0.5009, + "num_input_tokens_seen": 46680704, + "step": 14830 + }, + { + "epoch": 0.9496831188784329, + "grad_norm": 43.16392135620117, + "learning_rate": 4.877862163802739e-06, + "loss": 0.5474, + "num_input_tokens_seen": 46695936, + "step": 14835 + }, + { + "epoch": 0.9500032008194098, + "grad_norm": 33.74732208251953, + "learning_rate": 4.877689623343232e-06, + "loss": 0.4303, + "num_input_tokens_seen": 46711680, + "step": 14840 + }, + { + "epoch": 0.9503232827603867, + "grad_norm": 14.519062995910645, + "learning_rate": 4.87751696415429e-06, + "loss": 0.4389, + "num_input_tokens_seen": 46727040, + "step": 14845 + }, + { + "epoch": 0.9506433647013636, + "grad_norm": 33.18307113647461, + "learning_rate": 4.877344186244534e-06, + "loss": 0.5531, + "num_input_tokens_seen": 46742720, + "step": 14850 + }, + { + "epoch": 0.9509634466423404, + "grad_norm": 25.078052520751953, + "learning_rate": 4.877171289622593e-06, + "loss": 0.462, + "num_input_tokens_seen": 46758336, + "step": 14855 + }, + { + "epoch": 0.9512835285833173, + "grad_norm": 18.293424606323242, + "learning_rate": 4.8769982742971e-06, + "loss": 0.5603, + "num_input_tokens_seen": 46773312, + "step": 14860 + }, + { + "epoch": 0.9516036105242942, + "grad_norm": 35.73983383178711, + "learning_rate": 4.876825140276694e-06, + "loss": 0.3841, + "num_input_tokens_seen": 46787968, + "step": 14865 + }, + { + "epoch": 0.9519236924652711, + "grad_norm": 46.6758918762207, + "learning_rate": 4.876651887570022e-06, + "loss": 0.3905, + "num_input_tokens_seen": 46803712, + "step": 14870 + }, + { + "epoch": 0.952243774406248, + "grad_norm": 25.678918838500977, + "learning_rate": 4.876478516185733e-06, + "loss": 0.4292, + "num_input_tokens_seen": 46820288, + "step": 14875 + }, + { + "epoch": 0.9525638563472248, + "grad_norm": 38.868526458740234, + "learning_rate": 4.8763050261324866e-06, + "loss": 0.4137, + "num_input_tokens_seen": 46835904, + "step": 14880 + }, + { + "epoch": 0.9528839382882017, + "grad_norm": 28.000843048095703, + "learning_rate": 4.876131417418945e-06, + "loss": 0.5489, + "num_input_tokens_seen": 46851136, + "step": 14885 + }, + { + "epoch": 0.9532040202291787, + "grad_norm": 44.40879821777344, + "learning_rate": 4.8759576900537765e-06, + "loss": 0.5037, + "num_input_tokens_seen": 46867456, + "step": 14890 + }, + { + "epoch": 0.9535241021701556, + "grad_norm": 52.396732330322266, + "learning_rate": 4.875783844045656e-06, + "loss": 0.4738, + "num_input_tokens_seen": 46882816, + "step": 14895 + }, + { + "epoch": 0.9538441841111325, + "grad_norm": 37.91914749145508, + "learning_rate": 4.875609879403267e-06, + "loss": 0.5471, + "num_input_tokens_seen": 46898624, + "step": 14900 + }, + { + "epoch": 0.9541642660521094, + "grad_norm": 29.24949073791504, + "learning_rate": 4.875435796135294e-06, + "loss": 0.5049, + "num_input_tokens_seen": 46914304, + "step": 14905 + }, + { + "epoch": 0.9544843479930862, + "grad_norm": 44.36515426635742, + "learning_rate": 4.875261594250431e-06, + "loss": 0.535, + "num_input_tokens_seen": 46930368, + "step": 14910 + }, + { + "epoch": 0.9548044299340631, + "grad_norm": 32.773929595947266, + "learning_rate": 4.875087273757375e-06, + "loss": 0.4771, + "num_input_tokens_seen": 46945792, + "step": 14915 + }, + { + "epoch": 0.95512451187504, + "grad_norm": 19.221288681030273, + "learning_rate": 4.874912834664833e-06, + "loss": 0.4557, + "num_input_tokens_seen": 46962048, + "step": 14920 + }, + { + "epoch": 0.9554445938160169, + "grad_norm": 35.4656867980957, + "learning_rate": 4.874738276981515e-06, + "loss": 0.5768, + "num_input_tokens_seen": 46976768, + "step": 14925 + }, + { + "epoch": 0.9557646757569938, + "grad_norm": 37.80232238769531, + "learning_rate": 4.874563600716137e-06, + "loss": 0.5047, + "num_input_tokens_seen": 46991424, + "step": 14930 + }, + { + "epoch": 0.9560847576979706, + "grad_norm": 34.0182991027832, + "learning_rate": 4.8743888058774205e-06, + "loss": 0.5943, + "num_input_tokens_seen": 47006656, + "step": 14935 + }, + { + "epoch": 0.9564048396389476, + "grad_norm": 26.965696334838867, + "learning_rate": 4.874213892474094e-06, + "loss": 0.4124, + "num_input_tokens_seen": 47021824, + "step": 14940 + }, + { + "epoch": 0.9567249215799245, + "grad_norm": 28.756380081176758, + "learning_rate": 4.874038860514895e-06, + "loss": 0.5393, + "num_input_tokens_seen": 47037440, + "step": 14945 + }, + { + "epoch": 0.9570450035209014, + "grad_norm": 11.14742374420166, + "learning_rate": 4.873863710008559e-06, + "loss": 0.4314, + "num_input_tokens_seen": 47053760, + "step": 14950 + }, + { + "epoch": 0.9573650854618783, + "grad_norm": 24.828872680664062, + "learning_rate": 4.873688440963835e-06, + "loss": 0.4475, + "num_input_tokens_seen": 47068928, + "step": 14955 + }, + { + "epoch": 0.9576851674028551, + "grad_norm": 30.138290405273438, + "learning_rate": 4.873513053389475e-06, + "loss": 0.4712, + "num_input_tokens_seen": 47084672, + "step": 14960 + }, + { + "epoch": 0.958005249343832, + "grad_norm": 51.0120735168457, + "learning_rate": 4.873337547294236e-06, + "loss": 0.5139, + "num_input_tokens_seen": 47101632, + "step": 14965 + }, + { + "epoch": 0.9583253312848089, + "grad_norm": 26.678205490112305, + "learning_rate": 4.873161922686882e-06, + "loss": 0.4392, + "num_input_tokens_seen": 47116480, + "step": 14970 + }, + { + "epoch": 0.9586454132257858, + "grad_norm": 33.8390998840332, + "learning_rate": 4.872986179576182e-06, + "loss": 0.6716, + "num_input_tokens_seen": 47132992, + "step": 14975 + }, + { + "epoch": 0.9589654951667627, + "grad_norm": 30.286205291748047, + "learning_rate": 4.872810317970914e-06, + "loss": 0.4487, + "num_input_tokens_seen": 47149056, + "step": 14980 + }, + { + "epoch": 0.9592855771077395, + "grad_norm": 13.968469619750977, + "learning_rate": 4.872634337879858e-06, + "loss": 0.408, + "num_input_tokens_seen": 47164864, + "step": 14985 + }, + { + "epoch": 0.9596056590487164, + "grad_norm": 24.5262451171875, + "learning_rate": 4.8724582393118015e-06, + "loss": 0.5247, + "num_input_tokens_seen": 47180544, + "step": 14990 + }, + { + "epoch": 0.9599257409896934, + "grad_norm": 28.953218460083008, + "learning_rate": 4.872282022275538e-06, + "loss": 0.4967, + "num_input_tokens_seen": 47196608, + "step": 14995 + }, + { + "epoch": 0.9602458229306703, + "grad_norm": 12.856378555297852, + "learning_rate": 4.8721056867798675e-06, + "loss": 0.4217, + "num_input_tokens_seen": 47213504, + "step": 15000 + }, + { + "epoch": 0.9605659048716472, + "grad_norm": 20.475292205810547, + "learning_rate": 4.871929232833595e-06, + "loss": 0.3406, + "num_input_tokens_seen": 47228288, + "step": 15005 + }, + { + "epoch": 0.960885986812624, + "grad_norm": 23.70345115661621, + "learning_rate": 4.871752660445531e-06, + "loss": 0.5522, + "num_input_tokens_seen": 47243712, + "step": 15010 + }, + { + "epoch": 0.9612060687536009, + "grad_norm": 27.942752838134766, + "learning_rate": 4.871575969624493e-06, + "loss": 0.6114, + "num_input_tokens_seen": 47259904, + "step": 15015 + }, + { + "epoch": 0.9615261506945778, + "grad_norm": 23.370555877685547, + "learning_rate": 4.871399160379305e-06, + "loss": 0.574, + "num_input_tokens_seen": 47275072, + "step": 15020 + }, + { + "epoch": 0.9618462326355547, + "grad_norm": 29.461627960205078, + "learning_rate": 4.871222232718795e-06, + "loss": 0.5103, + "num_input_tokens_seen": 47290688, + "step": 15025 + }, + { + "epoch": 0.9621663145765316, + "grad_norm": 18.768863677978516, + "learning_rate": 4.871045186651797e-06, + "loss": 0.4575, + "num_input_tokens_seen": 47306496, + "step": 15030 + }, + { + "epoch": 0.9624863965175084, + "grad_norm": 25.668598175048828, + "learning_rate": 4.870868022187153e-06, + "loss": 0.3715, + "num_input_tokens_seen": 47321280, + "step": 15035 + }, + { + "epoch": 0.9628064784584853, + "grad_norm": 31.903993606567383, + "learning_rate": 4.87069073933371e-06, + "loss": 0.5453, + "num_input_tokens_seen": 47337536, + "step": 15040 + }, + { + "epoch": 0.9631265603994623, + "grad_norm": 23.908790588378906, + "learning_rate": 4.87051333810032e-06, + "loss": 0.447, + "num_input_tokens_seen": 47353024, + "step": 15045 + }, + { + "epoch": 0.9634466423404392, + "grad_norm": 18.207738876342773, + "learning_rate": 4.87033581849584e-06, + "loss": 0.4071, + "num_input_tokens_seen": 47369088, + "step": 15050 + }, + { + "epoch": 0.9637667242814161, + "grad_norm": 29.772151947021484, + "learning_rate": 4.870158180529138e-06, + "loss": 0.7118, + "num_input_tokens_seen": 47384320, + "step": 15055 + }, + { + "epoch": 0.964086806222393, + "grad_norm": 21.970617294311523, + "learning_rate": 4.869980424209081e-06, + "loss": 0.5317, + "num_input_tokens_seen": 47400896, + "step": 15060 + }, + { + "epoch": 0.9644068881633698, + "grad_norm": 20.074068069458008, + "learning_rate": 4.869802549544546e-06, + "loss": 0.385, + "num_input_tokens_seen": 47416896, + "step": 15065 + }, + { + "epoch": 0.9647269701043467, + "grad_norm": 63.25687026977539, + "learning_rate": 4.8696245565444165e-06, + "loss": 0.732, + "num_input_tokens_seen": 47432320, + "step": 15070 + }, + { + "epoch": 0.9650470520453236, + "grad_norm": 19.94368553161621, + "learning_rate": 4.869446445217578e-06, + "loss": 0.4796, + "num_input_tokens_seen": 47448320, + "step": 15075 + }, + { + "epoch": 0.9653671339863005, + "grad_norm": 16.650339126586914, + "learning_rate": 4.869268215572928e-06, + "loss": 0.5214, + "num_input_tokens_seen": 47463488, + "step": 15080 + }, + { + "epoch": 0.9656872159272774, + "grad_norm": 36.248435974121094, + "learning_rate": 4.8690898676193635e-06, + "loss": 0.3804, + "num_input_tokens_seen": 47479104, + "step": 15085 + }, + { + "epoch": 0.9660072978682542, + "grad_norm": 20.66890525817871, + "learning_rate": 4.8689114013657914e-06, + "loss": 0.3869, + "num_input_tokens_seen": 47496448, + "step": 15090 + }, + { + "epoch": 0.9663273798092311, + "grad_norm": 58.833858489990234, + "learning_rate": 4.868732816821122e-06, + "loss": 0.5534, + "num_input_tokens_seen": 47511936, + "step": 15095 + }, + { + "epoch": 0.9666474617502081, + "grad_norm": 32.0379524230957, + "learning_rate": 4.8685541139942745e-06, + "loss": 0.5165, + "num_input_tokens_seen": 47528320, + "step": 15100 + }, + { + "epoch": 0.966967543691185, + "grad_norm": 32.7225227355957, + "learning_rate": 4.868375292894173e-06, + "loss": 0.4627, + "num_input_tokens_seen": 47543296, + "step": 15105 + }, + { + "epoch": 0.9672876256321619, + "grad_norm": 8.671682357788086, + "learning_rate": 4.868196353529745e-06, + "loss": 0.4229, + "num_input_tokens_seen": 47558656, + "step": 15110 + }, + { + "epoch": 0.9676077075731387, + "grad_norm": 21.93455696105957, + "learning_rate": 4.868017295909926e-06, + "loss": 0.5188, + "num_input_tokens_seen": 47574720, + "step": 15115 + }, + { + "epoch": 0.9679277895141156, + "grad_norm": 39.03199768066406, + "learning_rate": 4.867838120043659e-06, + "loss": 0.4448, + "num_input_tokens_seen": 47590272, + "step": 15120 + }, + { + "epoch": 0.9682478714550925, + "grad_norm": 38.82822799682617, + "learning_rate": 4.867658825939889e-06, + "loss": 0.4908, + "num_input_tokens_seen": 47605696, + "step": 15125 + }, + { + "epoch": 0.9685679533960694, + "grad_norm": 27.70238494873047, + "learning_rate": 4.86747941360757e-06, + "loss": 0.4739, + "num_input_tokens_seen": 47621760, + "step": 15130 + }, + { + "epoch": 0.9688880353370463, + "grad_norm": 15.632917404174805, + "learning_rate": 4.8672998830556616e-06, + "loss": 0.3881, + "num_input_tokens_seen": 47639296, + "step": 15135 + }, + { + "epoch": 0.9692081172780231, + "grad_norm": 32.16522216796875, + "learning_rate": 4.8671202342931275e-06, + "loss": 0.2948, + "num_input_tokens_seen": 47654656, + "step": 15140 + }, + { + "epoch": 0.969528199219, + "grad_norm": 44.452571868896484, + "learning_rate": 4.866940467328938e-06, + "loss": 0.5355, + "num_input_tokens_seen": 47671168, + "step": 15145 + }, + { + "epoch": 0.9698482811599769, + "grad_norm": 23.60201644897461, + "learning_rate": 4.8667605821720714e-06, + "loss": 0.468, + "num_input_tokens_seen": 47686400, + "step": 15150 + }, + { + "epoch": 0.9701683631009539, + "grad_norm": 39.427406311035156, + "learning_rate": 4.866580578831509e-06, + "loss": 0.4837, + "num_input_tokens_seen": 47701760, + "step": 15155 + }, + { + "epoch": 0.9704884450419308, + "grad_norm": 24.860809326171875, + "learning_rate": 4.86640045731624e-06, + "loss": 0.5242, + "num_input_tokens_seen": 47717248, + "step": 15160 + }, + { + "epoch": 0.9708085269829076, + "grad_norm": 71.07876586914062, + "learning_rate": 4.866220217635258e-06, + "loss": 0.5109, + "num_input_tokens_seen": 47734336, + "step": 15165 + }, + { + "epoch": 0.9711286089238845, + "grad_norm": 46.75459671020508, + "learning_rate": 4.866039859797563e-06, + "loss": 0.5269, + "num_input_tokens_seen": 47750464, + "step": 15170 + }, + { + "epoch": 0.9714486908648614, + "grad_norm": 46.90291976928711, + "learning_rate": 4.865859383812162e-06, + "loss": 0.8597, + "num_input_tokens_seen": 47765824, + "step": 15175 + }, + { + "epoch": 0.9717687728058383, + "grad_norm": 23.863445281982422, + "learning_rate": 4.865678789688067e-06, + "loss": 0.434, + "num_input_tokens_seen": 47781760, + "step": 15180 + }, + { + "epoch": 0.9720888547468152, + "grad_norm": 17.769699096679688, + "learning_rate": 4.865498077434295e-06, + "loss": 0.4185, + "num_input_tokens_seen": 47797312, + "step": 15185 + }, + { + "epoch": 0.972408936687792, + "grad_norm": 22.50734519958496, + "learning_rate": 4.865317247059871e-06, + "loss": 0.4009, + "num_input_tokens_seen": 47813440, + "step": 15190 + }, + { + "epoch": 0.9727290186287689, + "grad_norm": 38.52193069458008, + "learning_rate": 4.865136298573824e-06, + "loss": 0.4948, + "num_input_tokens_seen": 47829440, + "step": 15195 + }, + { + "epoch": 0.9730491005697458, + "grad_norm": 20.863082885742188, + "learning_rate": 4.86495523198519e-06, + "loss": 0.5519, + "num_input_tokens_seen": 47844608, + "step": 15200 + }, + { + "epoch": 0.9733691825107228, + "grad_norm": 12.063892364501953, + "learning_rate": 4.86477404730301e-06, + "loss": 0.465, + "num_input_tokens_seen": 47860160, + "step": 15205 + }, + { + "epoch": 0.9736892644516997, + "grad_norm": 29.530609130859375, + "learning_rate": 4.864592744536332e-06, + "loss": 0.4305, + "num_input_tokens_seen": 47875648, + "step": 15210 + }, + { + "epoch": 0.9740093463926766, + "grad_norm": 31.943798065185547, + "learning_rate": 4.864411323694208e-06, + "loss": 0.4308, + "num_input_tokens_seen": 47892736, + "step": 15215 + }, + { + "epoch": 0.9743294283336534, + "grad_norm": 35.23329162597656, + "learning_rate": 4.8642297847857e-06, + "loss": 0.5133, + "num_input_tokens_seen": 47908992, + "step": 15220 + }, + { + "epoch": 0.9746495102746303, + "grad_norm": 30.034753799438477, + "learning_rate": 4.8640481278198704e-06, + "loss": 0.5394, + "num_input_tokens_seen": 47924736, + "step": 15225 + }, + { + "epoch": 0.9749695922156072, + "grad_norm": 35.523372650146484, + "learning_rate": 4.863866352805791e-06, + "loss": 0.5153, + "num_input_tokens_seen": 47941056, + "step": 15230 + }, + { + "epoch": 0.9752896741565841, + "grad_norm": 16.876171112060547, + "learning_rate": 4.863684459752539e-06, + "loss": 0.476, + "num_input_tokens_seen": 47957824, + "step": 15235 + }, + { + "epoch": 0.975609756097561, + "grad_norm": 73.53807830810547, + "learning_rate": 4.863502448669197e-06, + "loss": 0.7246, + "num_input_tokens_seen": 47973056, + "step": 15240 + }, + { + "epoch": 0.9759298380385378, + "grad_norm": 23.57513427734375, + "learning_rate": 4.863320319564854e-06, + "loss": 0.5526, + "num_input_tokens_seen": 47988928, + "step": 15245 + }, + { + "epoch": 0.9762499199795147, + "grad_norm": 44.4081916809082, + "learning_rate": 4.863138072448604e-06, + "loss": 0.5295, + "num_input_tokens_seen": 48004032, + "step": 15250 + }, + { + "epoch": 0.9765700019204916, + "grad_norm": 12.872322082519531, + "learning_rate": 4.8629557073295486e-06, + "loss": 0.5599, + "num_input_tokens_seen": 48020800, + "step": 15255 + }, + { + "epoch": 0.9768900838614686, + "grad_norm": 22.457061767578125, + "learning_rate": 4.862773224216793e-06, + "loss": 0.6066, + "num_input_tokens_seen": 48036800, + "step": 15260 + }, + { + "epoch": 0.9772101658024455, + "grad_norm": 25.40117073059082, + "learning_rate": 4.86259062311945e-06, + "loss": 0.3402, + "num_input_tokens_seen": 48051264, + "step": 15265 + }, + { + "epoch": 0.9775302477434223, + "grad_norm": 25.571382522583008, + "learning_rate": 4.862407904046637e-06, + "loss": 0.3295, + "num_input_tokens_seen": 48066176, + "step": 15270 + }, + { + "epoch": 0.9778503296843992, + "grad_norm": 26.409080505371094, + "learning_rate": 4.862225067007479e-06, + "loss": 0.3461, + "num_input_tokens_seen": 48081984, + "step": 15275 + }, + { + "epoch": 0.9781704116253761, + "grad_norm": 28.307559967041016, + "learning_rate": 4.862042112011105e-06, + "loss": 0.569, + "num_input_tokens_seen": 48096896, + "step": 15280 + }, + { + "epoch": 0.978490493566353, + "grad_norm": 26.418262481689453, + "learning_rate": 4.861859039066652e-06, + "loss": 0.396, + "num_input_tokens_seen": 48112128, + "step": 15285 + }, + { + "epoch": 0.9788105755073299, + "grad_norm": 17.86612319946289, + "learning_rate": 4.861675848183261e-06, + "loss": 0.449, + "num_input_tokens_seen": 48127616, + "step": 15290 + }, + { + "epoch": 0.9791306574483067, + "grad_norm": 31.178787231445312, + "learning_rate": 4.861492539370079e-06, + "loss": 0.5732, + "num_input_tokens_seen": 48142144, + "step": 15295 + }, + { + "epoch": 0.9794507393892836, + "grad_norm": 32.299468994140625, + "learning_rate": 4.861309112636261e-06, + "loss": 0.4656, + "num_input_tokens_seen": 48158272, + "step": 15300 + }, + { + "epoch": 0.9797708213302605, + "grad_norm": 35.431827545166016, + "learning_rate": 4.861125567990965e-06, + "loss": 0.5226, + "num_input_tokens_seen": 48173120, + "step": 15305 + }, + { + "epoch": 0.9800909032712375, + "grad_norm": 36.018436431884766, + "learning_rate": 4.860941905443357e-06, + "loss": 0.4565, + "num_input_tokens_seen": 48188672, + "step": 15310 + }, + { + "epoch": 0.9804109852122144, + "grad_norm": 20.84773063659668, + "learning_rate": 4.860758125002608e-06, + "loss": 0.4428, + "num_input_tokens_seen": 48204480, + "step": 15315 + }, + { + "epoch": 0.9807310671531912, + "grad_norm": 50.11709976196289, + "learning_rate": 4.860574226677894e-06, + "loss": 0.4556, + "num_input_tokens_seen": 48219584, + "step": 15320 + }, + { + "epoch": 0.9810511490941681, + "grad_norm": 29.625394821166992, + "learning_rate": 4.860390210478401e-06, + "loss": 0.39, + "num_input_tokens_seen": 48235904, + "step": 15325 + }, + { + "epoch": 0.981371231035145, + "grad_norm": 30.308406829833984, + "learning_rate": 4.860206076413314e-06, + "loss": 0.4555, + "num_input_tokens_seen": 48252992, + "step": 15330 + }, + { + "epoch": 0.9816913129761219, + "grad_norm": 39.42976379394531, + "learning_rate": 4.860021824491829e-06, + "loss": 0.47, + "num_input_tokens_seen": 48267840, + "step": 15335 + }, + { + "epoch": 0.9820113949170988, + "grad_norm": 27.23233985900879, + "learning_rate": 4.859837454723149e-06, + "loss": 0.6372, + "num_input_tokens_seen": 48282688, + "step": 15340 + }, + { + "epoch": 0.9823314768580756, + "grad_norm": 18.02949333190918, + "learning_rate": 4.859652967116477e-06, + "loss": 0.6663, + "num_input_tokens_seen": 48300096, + "step": 15345 + }, + { + "epoch": 0.9826515587990525, + "grad_norm": 16.263151168823242, + "learning_rate": 4.859468361681027e-06, + "loss": 0.5205, + "num_input_tokens_seen": 48315136, + "step": 15350 + }, + { + "epoch": 0.9829716407400294, + "grad_norm": 21.199119567871094, + "learning_rate": 4.859283638426017e-06, + "loss": 0.4813, + "num_input_tokens_seen": 48330240, + "step": 15355 + }, + { + "epoch": 0.9832917226810063, + "grad_norm": 19.6123104095459, + "learning_rate": 4.859098797360672e-06, + "loss": 0.3946, + "num_input_tokens_seen": 48345280, + "step": 15360 + }, + { + "epoch": 0.9836118046219833, + "grad_norm": 28.187320709228516, + "learning_rate": 4.858913838494221e-06, + "loss": 0.4358, + "num_input_tokens_seen": 48361792, + "step": 15365 + }, + { + "epoch": 0.9839318865629602, + "grad_norm": 22.358415603637695, + "learning_rate": 4.858728761835898e-06, + "loss": 0.4608, + "num_input_tokens_seen": 48377408, + "step": 15370 + }, + { + "epoch": 0.984251968503937, + "grad_norm": 22.216629028320312, + "learning_rate": 4.858543567394949e-06, + "loss": 0.4886, + "num_input_tokens_seen": 48392896, + "step": 15375 + }, + { + "epoch": 0.9845720504449139, + "grad_norm": 37.263038635253906, + "learning_rate": 4.8583582551806186e-06, + "loss": 0.7478, + "num_input_tokens_seen": 48407552, + "step": 15380 + }, + { + "epoch": 0.9848921323858908, + "grad_norm": 15.807393074035645, + "learning_rate": 4.858172825202161e-06, + "loss": 0.3833, + "num_input_tokens_seen": 48424320, + "step": 15385 + }, + { + "epoch": 0.9852122143268677, + "grad_norm": 38.290889739990234, + "learning_rate": 4.857987277468836e-06, + "loss": 0.6789, + "num_input_tokens_seen": 48441984, + "step": 15390 + }, + { + "epoch": 0.9855322962678446, + "grad_norm": 25.537233352661133, + "learning_rate": 4.857801611989909e-06, + "loss": 0.5358, + "num_input_tokens_seen": 48456832, + "step": 15395 + }, + { + "epoch": 0.9858523782088214, + "grad_norm": 29.17599105834961, + "learning_rate": 4.857615828774651e-06, + "loss": 0.399, + "num_input_tokens_seen": 48472512, + "step": 15400 + }, + { + "epoch": 0.9861724601497983, + "grad_norm": 26.48043441772461, + "learning_rate": 4.857429927832339e-06, + "loss": 0.3159, + "num_input_tokens_seen": 48488832, + "step": 15405 + }, + { + "epoch": 0.9864925420907752, + "grad_norm": 34.03196334838867, + "learning_rate": 4.857243909172255e-06, + "loss": 0.524, + "num_input_tokens_seen": 48503936, + "step": 15410 + }, + { + "epoch": 0.9868126240317522, + "grad_norm": 21.064453125, + "learning_rate": 4.857057772803688e-06, + "loss": 0.5741, + "num_input_tokens_seen": 48519040, + "step": 15415 + }, + { + "epoch": 0.9871327059727291, + "grad_norm": 17.02109146118164, + "learning_rate": 4.856871518735935e-06, + "loss": 0.6914, + "num_input_tokens_seen": 48535040, + "step": 15420 + }, + { + "epoch": 0.9874527879137059, + "grad_norm": 23.904624938964844, + "learning_rate": 4.856685146978294e-06, + "loss": 0.425, + "num_input_tokens_seen": 48551808, + "step": 15425 + }, + { + "epoch": 0.9877728698546828, + "grad_norm": 27.03287124633789, + "learning_rate": 4.856498657540072e-06, + "loss": 0.5954, + "num_input_tokens_seen": 48566592, + "step": 15430 + }, + { + "epoch": 0.9880929517956597, + "grad_norm": 28.24097442626953, + "learning_rate": 4.856312050430582e-06, + "loss": 0.3705, + "num_input_tokens_seen": 48582720, + "step": 15435 + }, + { + "epoch": 0.9884130337366366, + "grad_norm": 45.561981201171875, + "learning_rate": 4.856125325659143e-06, + "loss": 0.5104, + "num_input_tokens_seen": 48599104, + "step": 15440 + }, + { + "epoch": 0.9887331156776135, + "grad_norm": 11.351827621459961, + "learning_rate": 4.855938483235076e-06, + "loss": 0.4091, + "num_input_tokens_seen": 48615040, + "step": 15445 + }, + { + "epoch": 0.9890531976185903, + "grad_norm": 24.972919464111328, + "learning_rate": 4.855751523167713e-06, + "loss": 0.4871, + "num_input_tokens_seen": 48630976, + "step": 15450 + }, + { + "epoch": 0.9893732795595672, + "grad_norm": 25.05316162109375, + "learning_rate": 4.85556444546639e-06, + "loss": 0.4397, + "num_input_tokens_seen": 48646080, + "step": 15455 + }, + { + "epoch": 0.9896933615005441, + "grad_norm": 44.679935455322266, + "learning_rate": 4.855377250140449e-06, + "loss": 0.4126, + "num_input_tokens_seen": 48662528, + "step": 15460 + }, + { + "epoch": 0.990013443441521, + "grad_norm": 39.04172134399414, + "learning_rate": 4.855189937199235e-06, + "loss": 0.4326, + "num_input_tokens_seen": 48680000, + "step": 15465 + }, + { + "epoch": 0.990333525382498, + "grad_norm": 26.142587661743164, + "learning_rate": 4.855002506652104e-06, + "loss": 0.4272, + "num_input_tokens_seen": 48696256, + "step": 15470 + }, + { + "epoch": 0.9906536073234748, + "grad_norm": 25.44029426574707, + "learning_rate": 4.854814958508416e-06, + "loss": 0.3104, + "num_input_tokens_seen": 48711168, + "step": 15475 + }, + { + "epoch": 0.9909736892644517, + "grad_norm": 37.974124908447266, + "learning_rate": 4.854627292777533e-06, + "loss": 0.8846, + "num_input_tokens_seen": 48726848, + "step": 15480 + }, + { + "epoch": 0.9912937712054286, + "grad_norm": 44.746707916259766, + "learning_rate": 4.854439509468829e-06, + "loss": 0.5502, + "num_input_tokens_seen": 48743232, + "step": 15485 + }, + { + "epoch": 0.9916138531464055, + "grad_norm": 61.623565673828125, + "learning_rate": 4.854251608591679e-06, + "loss": 0.5215, + "num_input_tokens_seen": 48758080, + "step": 15490 + }, + { + "epoch": 0.9919339350873824, + "grad_norm": 23.460647583007812, + "learning_rate": 4.854063590155467e-06, + "loss": 0.4002, + "num_input_tokens_seen": 48773120, + "step": 15495 + }, + { + "epoch": 0.9922540170283592, + "grad_norm": 40.87631607055664, + "learning_rate": 4.8538754541695806e-06, + "loss": 0.5051, + "num_input_tokens_seen": 48789056, + "step": 15500 + }, + { + "epoch": 0.9925740989693361, + "grad_norm": 45.82258224487305, + "learning_rate": 4.8536872006434155e-06, + "loss": 0.5882, + "num_input_tokens_seen": 48804288, + "step": 15505 + }, + { + "epoch": 0.992894180910313, + "grad_norm": 27.765323638916016, + "learning_rate": 4.853498829586371e-06, + "loss": 0.4437, + "num_input_tokens_seen": 48818816, + "step": 15510 + }, + { + "epoch": 0.9932142628512899, + "grad_norm": 36.32001495361328, + "learning_rate": 4.8533103410078544e-06, + "loss": 0.5251, + "num_input_tokens_seen": 48835520, + "step": 15515 + }, + { + "epoch": 0.9935343447922668, + "grad_norm": 14.219404220581055, + "learning_rate": 4.853121734917276e-06, + "loss": 0.3338, + "num_input_tokens_seen": 48851328, + "step": 15520 + }, + { + "epoch": 0.9938544267332438, + "grad_norm": 48.362518310546875, + "learning_rate": 4.852933011324057e-06, + "loss": 0.4314, + "num_input_tokens_seen": 48866304, + "step": 15525 + }, + { + "epoch": 0.9941745086742206, + "grad_norm": 33.74608612060547, + "learning_rate": 4.852744170237619e-06, + "loss": 0.4162, + "num_input_tokens_seen": 48882752, + "step": 15530 + }, + { + "epoch": 0.9944945906151975, + "grad_norm": 32.31060028076172, + "learning_rate": 4.852555211667391e-06, + "loss": 0.5135, + "num_input_tokens_seen": 48898304, + "step": 15535 + }, + { + "epoch": 0.9948146725561744, + "grad_norm": 22.47760772705078, + "learning_rate": 4.852366135622811e-06, + "loss": 0.6086, + "num_input_tokens_seen": 48914048, + "step": 15540 + }, + { + "epoch": 0.9951347544971513, + "grad_norm": 31.676982879638672, + "learning_rate": 4.852176942113318e-06, + "loss": 0.4028, + "num_input_tokens_seen": 48930176, + "step": 15545 + }, + { + "epoch": 0.9954548364381282, + "grad_norm": 26.14257049560547, + "learning_rate": 4.851987631148361e-06, + "loss": 0.5957, + "num_input_tokens_seen": 48945920, + "step": 15550 + }, + { + "epoch": 0.995774918379105, + "grad_norm": 31.967426300048828, + "learning_rate": 4.851798202737393e-06, + "loss": 0.5758, + "num_input_tokens_seen": 48960832, + "step": 15555 + }, + { + "epoch": 0.9960950003200819, + "grad_norm": 23.868547439575195, + "learning_rate": 4.851608656889874e-06, + "loss": 0.3928, + "num_input_tokens_seen": 48977280, + "step": 15560 + }, + { + "epoch": 0.9964150822610588, + "grad_norm": 24.888898849487305, + "learning_rate": 4.851418993615266e-06, + "loss": 0.3878, + "num_input_tokens_seen": 48992512, + "step": 15565 + }, + { + "epoch": 0.9967351642020357, + "grad_norm": 15.661918640136719, + "learning_rate": 4.851229212923042e-06, + "loss": 0.4631, + "num_input_tokens_seen": 49008128, + "step": 15570 + }, + { + "epoch": 0.9970552461430127, + "grad_norm": 30.29024314880371, + "learning_rate": 4.851039314822678e-06, + "loss": 0.4514, + "num_input_tokens_seen": 49024512, + "step": 15575 + }, + { + "epoch": 0.9973753280839895, + "grad_norm": 38.949031829833984, + "learning_rate": 4.850849299323657e-06, + "loss": 0.3798, + "num_input_tokens_seen": 49040128, + "step": 15580 + }, + { + "epoch": 0.9976954100249664, + "grad_norm": 22.578025817871094, + "learning_rate": 4.8506591664354676e-06, + "loss": 0.4323, + "num_input_tokens_seen": 49055360, + "step": 15585 + }, + { + "epoch": 0.9980154919659433, + "grad_norm": 32.73869705200195, + "learning_rate": 4.850468916167603e-06, + "loss": 0.4633, + "num_input_tokens_seen": 49071360, + "step": 15590 + }, + { + "epoch": 0.9983355739069202, + "grad_norm": 24.161319732666016, + "learning_rate": 4.850278548529563e-06, + "loss": 0.3504, + "num_input_tokens_seen": 49086144, + "step": 15595 + }, + { + "epoch": 0.9986556558478971, + "grad_norm": 34.02627944946289, + "learning_rate": 4.850088063530856e-06, + "loss": 0.5339, + "num_input_tokens_seen": 49101312, + "step": 15600 + }, + { + "epoch": 0.9989757377888739, + "grad_norm": 34.71751022338867, + "learning_rate": 4.849897461180991e-06, + "loss": 0.3432, + "num_input_tokens_seen": 49116672, + "step": 15605 + }, + { + "epoch": 0.9992958197298508, + "grad_norm": 15.38386344909668, + "learning_rate": 4.849706741489489e-06, + "loss": 0.3068, + "num_input_tokens_seen": 49132288, + "step": 15610 + }, + { + "epoch": 0.9996159016708277, + "grad_norm": 12.211482048034668, + "learning_rate": 4.84951590446587e-06, + "loss": 0.5035, + "num_input_tokens_seen": 49148096, + "step": 15615 + }, + { + "epoch": 0.9999359836118046, + "grad_norm": 9.575580596923828, + "learning_rate": 4.849324950119665e-06, + "loss": 0.343, + "num_input_tokens_seen": 49163840, + "step": 15620 + }, + { + "epoch": 1.0001920491645861, + "eval_loss": 0.4853726923465729, + "eval_runtime": 51.0168, + "eval_samples_per_second": 272.185, + "eval_steps_per_second": 34.028, + "num_input_tokens_seen": 49176512, + "step": 15624 + }, + { + "epoch": 1.0002560655527815, + "grad_norm": 27.017749786376953, + "learning_rate": 4.849133878460409e-06, + "loss": 0.3136, + "num_input_tokens_seen": 49180096, + "step": 15625 + }, + { + "epoch": 1.0005761474937584, + "grad_norm": 19.1455135345459, + "learning_rate": 4.848942689497643e-06, + "loss": 0.267, + "num_input_tokens_seen": 49195648, + "step": 15630 + }, + { + "epoch": 1.0008962294347352, + "grad_norm": 13.99332332611084, + "learning_rate": 4.848751383240915e-06, + "loss": 0.4077, + "num_input_tokens_seen": 49212544, + "step": 15635 + }, + { + "epoch": 1.0012163113757122, + "grad_norm": 27.3225154876709, + "learning_rate": 4.848559959699778e-06, + "loss": 0.3645, + "num_input_tokens_seen": 49228672, + "step": 15640 + }, + { + "epoch": 1.001536393316689, + "grad_norm": 9.88764476776123, + "learning_rate": 4.848368418883788e-06, + "loss": 0.3051, + "num_input_tokens_seen": 49243072, + "step": 15645 + }, + { + "epoch": 1.001856475257666, + "grad_norm": 37.570953369140625, + "learning_rate": 4.848176760802512e-06, + "loss": 0.3817, + "num_input_tokens_seen": 49259520, + "step": 15650 + }, + { + "epoch": 1.002176557198643, + "grad_norm": 65.50390625, + "learning_rate": 4.84798498546552e-06, + "loss": 0.2813, + "num_input_tokens_seen": 49274752, + "step": 15655 + }, + { + "epoch": 1.0024966391396197, + "grad_norm": 33.80902862548828, + "learning_rate": 4.847793092882388e-06, + "loss": 0.3692, + "num_input_tokens_seen": 49289728, + "step": 15660 + }, + { + "epoch": 1.0028167210805967, + "grad_norm": 47.36775207519531, + "learning_rate": 4.847601083062699e-06, + "loss": 0.4069, + "num_input_tokens_seen": 49304960, + "step": 15665 + }, + { + "epoch": 1.0031368030215735, + "grad_norm": 12.741809844970703, + "learning_rate": 4.847408956016039e-06, + "loss": 0.4501, + "num_input_tokens_seen": 49321152, + "step": 15670 + }, + { + "epoch": 1.0034568849625505, + "grad_norm": 15.099333763122559, + "learning_rate": 4.847216711752003e-06, + "loss": 0.2486, + "num_input_tokens_seen": 49337984, + "step": 15675 + }, + { + "epoch": 1.0037769669035272, + "grad_norm": 28.908143997192383, + "learning_rate": 4.847024350280191e-06, + "loss": 0.2594, + "num_input_tokens_seen": 49352768, + "step": 15680 + }, + { + "epoch": 1.0040970488445042, + "grad_norm": 16.366575241088867, + "learning_rate": 4.846831871610207e-06, + "loss": 0.2567, + "num_input_tokens_seen": 49368832, + "step": 15685 + }, + { + "epoch": 1.004417130785481, + "grad_norm": 12.994856834411621, + "learning_rate": 4.846639275751664e-06, + "loss": 0.3306, + "num_input_tokens_seen": 49384640, + "step": 15690 + }, + { + "epoch": 1.004737212726458, + "grad_norm": 17.435606002807617, + "learning_rate": 4.84644656271418e-06, + "loss": 0.3599, + "num_input_tokens_seen": 49399936, + "step": 15695 + }, + { + "epoch": 1.0050572946674348, + "grad_norm": 19.32331085205078, + "learning_rate": 4.846253732507376e-06, + "loss": 0.2855, + "num_input_tokens_seen": 49415680, + "step": 15700 + }, + { + "epoch": 1.0053773766084118, + "grad_norm": 33.46013259887695, + "learning_rate": 4.846060785140882e-06, + "loss": 0.3478, + "num_input_tokens_seen": 49430592, + "step": 15705 + }, + { + "epoch": 1.0056974585493887, + "grad_norm": 16.799819946289062, + "learning_rate": 4.845867720624332e-06, + "loss": 0.3396, + "num_input_tokens_seen": 49445312, + "step": 15710 + }, + { + "epoch": 1.0060175404903655, + "grad_norm": 27.430076599121094, + "learning_rate": 4.845674538967367e-06, + "loss": 0.3172, + "num_input_tokens_seen": 49460352, + "step": 15715 + }, + { + "epoch": 1.0063376224313425, + "grad_norm": 36.73637390136719, + "learning_rate": 4.8454812401796355e-06, + "loss": 0.2856, + "num_input_tokens_seen": 49476352, + "step": 15720 + }, + { + "epoch": 1.0066577043723193, + "grad_norm": 10.64133358001709, + "learning_rate": 4.845287824270787e-06, + "loss": 0.2585, + "num_input_tokens_seen": 49492096, + "step": 15725 + }, + { + "epoch": 1.0069777863132963, + "grad_norm": 12.223121643066406, + "learning_rate": 4.8450942912504805e-06, + "loss": 0.2834, + "num_input_tokens_seen": 49506944, + "step": 15730 + }, + { + "epoch": 1.007297868254273, + "grad_norm": 54.26447677612305, + "learning_rate": 4.844900641128381e-06, + "loss": 0.3926, + "num_input_tokens_seen": 49523904, + "step": 15735 + }, + { + "epoch": 1.00761795019525, + "grad_norm": 47.8484001159668, + "learning_rate": 4.844706873914156e-06, + "loss": 0.4609, + "num_input_tokens_seen": 49538624, + "step": 15740 + }, + { + "epoch": 1.0079380321362268, + "grad_norm": 30.106849670410156, + "learning_rate": 4.844512989617484e-06, + "loss": 0.4485, + "num_input_tokens_seen": 49554816, + "step": 15745 + }, + { + "epoch": 1.0082581140772038, + "grad_norm": 23.42662811279297, + "learning_rate": 4.844318988248045e-06, + "loss": 0.2031, + "num_input_tokens_seen": 49570496, + "step": 15750 + }, + { + "epoch": 1.0085781960181806, + "grad_norm": 17.633113861083984, + "learning_rate": 4.844124869815528e-06, + "loss": 0.5065, + "num_input_tokens_seen": 49586432, + "step": 15755 + }, + { + "epoch": 1.0088982779591575, + "grad_norm": 48.068355560302734, + "learning_rate": 4.843930634329623e-06, + "loss": 0.3074, + "num_input_tokens_seen": 49603264, + "step": 15760 + }, + { + "epoch": 1.0092183599001345, + "grad_norm": 22.288013458251953, + "learning_rate": 4.8437362818000325e-06, + "loss": 0.2569, + "num_input_tokens_seen": 49619008, + "step": 15765 + }, + { + "epoch": 1.0095384418411113, + "grad_norm": 17.734498977661133, + "learning_rate": 4.843541812236461e-06, + "loss": 0.3842, + "num_input_tokens_seen": 49635456, + "step": 15770 + }, + { + "epoch": 1.0098585237820883, + "grad_norm": 13.934710502624512, + "learning_rate": 4.843347225648617e-06, + "loss": 0.379, + "num_input_tokens_seen": 49651776, + "step": 15775 + }, + { + "epoch": 1.010178605723065, + "grad_norm": 10.098052978515625, + "learning_rate": 4.843152522046219e-06, + "loss": 0.3251, + "num_input_tokens_seen": 49667264, + "step": 15780 + }, + { + "epoch": 1.010498687664042, + "grad_norm": 17.172670364379883, + "learning_rate": 4.84295770143899e-06, + "loss": 0.2912, + "num_input_tokens_seen": 49683392, + "step": 15785 + }, + { + "epoch": 1.0108187696050188, + "grad_norm": 30.209701538085938, + "learning_rate": 4.842762763836656e-06, + "loss": 0.2347, + "num_input_tokens_seen": 49698560, + "step": 15790 + }, + { + "epoch": 1.0111388515459958, + "grad_norm": 44.14800262451172, + "learning_rate": 4.842567709248953e-06, + "loss": 0.4577, + "num_input_tokens_seen": 49713984, + "step": 15795 + }, + { + "epoch": 1.0114589334869726, + "grad_norm": 26.44842529296875, + "learning_rate": 4.842372537685621e-06, + "loss": 0.2336, + "num_input_tokens_seen": 49729920, + "step": 15800 + }, + { + "epoch": 1.0117790154279496, + "grad_norm": 20.457998275756836, + "learning_rate": 4.842177249156405e-06, + "loss": 0.2783, + "num_input_tokens_seen": 49747008, + "step": 15805 + }, + { + "epoch": 1.0120990973689263, + "grad_norm": 19.346782684326172, + "learning_rate": 4.8419818436710575e-06, + "loss": 0.3888, + "num_input_tokens_seen": 49763584, + "step": 15810 + }, + { + "epoch": 1.0124191793099033, + "grad_norm": 26.963144302368164, + "learning_rate": 4.841786321239336e-06, + "loss": 0.4723, + "num_input_tokens_seen": 49778368, + "step": 15815 + }, + { + "epoch": 1.0127392612508803, + "grad_norm": 9.117230415344238, + "learning_rate": 4.841590681871002e-06, + "loss": 0.325, + "num_input_tokens_seen": 49794368, + "step": 15820 + }, + { + "epoch": 1.013059343191857, + "grad_norm": 17.352519989013672, + "learning_rate": 4.8413949255758274e-06, + "loss": 0.3768, + "num_input_tokens_seen": 49810688, + "step": 15825 + }, + { + "epoch": 1.013379425132834, + "grad_norm": 48.96626281738281, + "learning_rate": 4.841199052363586e-06, + "loss": 0.4759, + "num_input_tokens_seen": 49825920, + "step": 15830 + }, + { + "epoch": 1.0136995070738108, + "grad_norm": 28.722761154174805, + "learning_rate": 4.841003062244059e-06, + "loss": 0.5284, + "num_input_tokens_seen": 49841856, + "step": 15835 + }, + { + "epoch": 1.0140195890147878, + "grad_norm": 42.195987701416016, + "learning_rate": 4.840806955227033e-06, + "loss": 0.3258, + "num_input_tokens_seen": 49857088, + "step": 15840 + }, + { + "epoch": 1.0143396709557646, + "grad_norm": 43.51691436767578, + "learning_rate": 4.8406107313223e-06, + "loss": 0.3578, + "num_input_tokens_seen": 49872832, + "step": 15845 + }, + { + "epoch": 1.0146597528967416, + "grad_norm": 21.669477462768555, + "learning_rate": 4.840414390539659e-06, + "loss": 0.3216, + "num_input_tokens_seen": 49887936, + "step": 15850 + }, + { + "epoch": 1.0149798348377184, + "grad_norm": 15.210870742797852, + "learning_rate": 4.840217932888915e-06, + "loss": 0.2599, + "num_input_tokens_seen": 49902656, + "step": 15855 + }, + { + "epoch": 1.0152999167786954, + "grad_norm": 21.39766502380371, + "learning_rate": 4.840021358379876e-06, + "loss": 0.3158, + "num_input_tokens_seen": 49918464, + "step": 15860 + }, + { + "epoch": 1.0156199987196723, + "grad_norm": 14.095654487609863, + "learning_rate": 4.83982466702236e-06, + "loss": 0.2552, + "num_input_tokens_seen": 49933696, + "step": 15865 + }, + { + "epoch": 1.0159400806606491, + "grad_norm": 16.09145736694336, + "learning_rate": 4.839627858826186e-06, + "loss": 0.3292, + "num_input_tokens_seen": 49949760, + "step": 15870 + }, + { + "epoch": 1.016260162601626, + "grad_norm": 30.574026107788086, + "learning_rate": 4.839430933801185e-06, + "loss": 0.3322, + "num_input_tokens_seen": 49966080, + "step": 15875 + }, + { + "epoch": 1.0165802445426029, + "grad_norm": 15.25211238861084, + "learning_rate": 4.839233891957188e-06, + "loss": 0.2365, + "num_input_tokens_seen": 49980928, + "step": 15880 + }, + { + "epoch": 1.0169003264835799, + "grad_norm": 38.41034698486328, + "learning_rate": 4.839036733304036e-06, + "loss": 0.3634, + "num_input_tokens_seen": 49996160, + "step": 15885 + }, + { + "epoch": 1.0172204084245566, + "grad_norm": 30.06112289428711, + "learning_rate": 4.838839457851573e-06, + "loss": 0.2996, + "num_input_tokens_seen": 50011008, + "step": 15890 + }, + { + "epoch": 1.0175404903655336, + "grad_norm": 12.735444068908691, + "learning_rate": 4.83864206560965e-06, + "loss": 0.2945, + "num_input_tokens_seen": 50026304, + "step": 15895 + }, + { + "epoch": 1.0178605723065104, + "grad_norm": 8.774861335754395, + "learning_rate": 4.838444556588123e-06, + "loss": 0.2465, + "num_input_tokens_seen": 50041856, + "step": 15900 + }, + { + "epoch": 1.0181806542474874, + "grad_norm": 46.276329040527344, + "learning_rate": 4.838246930796856e-06, + "loss": 0.4178, + "num_input_tokens_seen": 50057152, + "step": 15905 + }, + { + "epoch": 1.0185007361884642, + "grad_norm": 64.4219970703125, + "learning_rate": 4.838049188245717e-06, + "loss": 0.2823, + "num_input_tokens_seen": 50075392, + "step": 15910 + }, + { + "epoch": 1.0188208181294411, + "grad_norm": 40.590087890625, + "learning_rate": 4.8378513289445785e-06, + "loss": 0.3396, + "num_input_tokens_seen": 50090816, + "step": 15915 + }, + { + "epoch": 1.0191409000704181, + "grad_norm": 9.677297592163086, + "learning_rate": 4.837653352903324e-06, + "loss": 0.2388, + "num_input_tokens_seen": 50106112, + "step": 15920 + }, + { + "epoch": 1.019460982011395, + "grad_norm": 11.849546432495117, + "learning_rate": 4.837455260131836e-06, + "loss": 0.4008, + "num_input_tokens_seen": 50121664, + "step": 15925 + }, + { + "epoch": 1.019781063952372, + "grad_norm": 31.377010345458984, + "learning_rate": 4.837257050640009e-06, + "loss": 0.3061, + "num_input_tokens_seen": 50137856, + "step": 15930 + }, + { + "epoch": 1.0201011458933487, + "grad_norm": 16.51397132873535, + "learning_rate": 4.837058724437738e-06, + "loss": 0.295, + "num_input_tokens_seen": 50153984, + "step": 15935 + }, + { + "epoch": 1.0204212278343257, + "grad_norm": 36.56599426269531, + "learning_rate": 4.836860281534928e-06, + "loss": 0.3458, + "num_input_tokens_seen": 50170112, + "step": 15940 + }, + { + "epoch": 1.0207413097753024, + "grad_norm": 14.328011512756348, + "learning_rate": 4.836661721941488e-06, + "loss": 0.2094, + "num_input_tokens_seen": 50185216, + "step": 15945 + }, + { + "epoch": 1.0210613917162794, + "grad_norm": 33.77052307128906, + "learning_rate": 4.836463045667333e-06, + "loss": 0.3635, + "num_input_tokens_seen": 50203200, + "step": 15950 + }, + { + "epoch": 1.0213814736572562, + "grad_norm": 13.844655990600586, + "learning_rate": 4.836264252722384e-06, + "loss": 0.3291, + "num_input_tokens_seen": 50218304, + "step": 15955 + }, + { + "epoch": 1.0217015555982332, + "grad_norm": 16.3697452545166, + "learning_rate": 4.8360653431165665e-06, + "loss": 0.3045, + "num_input_tokens_seen": 50234048, + "step": 15960 + }, + { + "epoch": 1.02202163753921, + "grad_norm": 24.419179916381836, + "learning_rate": 4.835866316859814e-06, + "loss": 0.289, + "num_input_tokens_seen": 50250112, + "step": 15965 + }, + { + "epoch": 1.022341719480187, + "grad_norm": 16.967554092407227, + "learning_rate": 4.835667173962065e-06, + "loss": 0.1814, + "num_input_tokens_seen": 50266752, + "step": 15970 + }, + { + "epoch": 1.022661801421164, + "grad_norm": 30.238712310791016, + "learning_rate": 4.835467914433262e-06, + "loss": 0.3621, + "num_input_tokens_seen": 50281792, + "step": 15975 + }, + { + "epoch": 1.0229818833621407, + "grad_norm": 63.85428237915039, + "learning_rate": 4.835268538283359e-06, + "loss": 0.3635, + "num_input_tokens_seen": 50296896, + "step": 15980 + }, + { + "epoch": 1.0233019653031177, + "grad_norm": 33.79995346069336, + "learning_rate": 4.835069045522307e-06, + "loss": 0.2286, + "num_input_tokens_seen": 50313280, + "step": 15985 + }, + { + "epoch": 1.0236220472440944, + "grad_norm": 6.336806297302246, + "learning_rate": 4.834869436160071e-06, + "loss": 0.3084, + "num_input_tokens_seen": 50329088, + "step": 15990 + }, + { + "epoch": 1.0239421291850714, + "grad_norm": 46.86308670043945, + "learning_rate": 4.834669710206617e-06, + "loss": 0.4138, + "num_input_tokens_seen": 50345472, + "step": 15995 + }, + { + "epoch": 1.0242622111260482, + "grad_norm": 29.908206939697266, + "learning_rate": 4.834469867671918e-06, + "loss": 0.2707, + "num_input_tokens_seen": 50360128, + "step": 16000 + }, + { + "epoch": 1.0245822930670252, + "grad_norm": 53.494869232177734, + "learning_rate": 4.8342699085659545e-06, + "loss": 0.3587, + "num_input_tokens_seen": 50375424, + "step": 16005 + }, + { + "epoch": 1.024902375008002, + "grad_norm": 27.26243019104004, + "learning_rate": 4.83406983289871e-06, + "loss": 0.3576, + "num_input_tokens_seen": 50390656, + "step": 16010 + }, + { + "epoch": 1.025222456948979, + "grad_norm": 36.8646354675293, + "learning_rate": 4.8338696406801755e-06, + "loss": 0.268, + "num_input_tokens_seen": 50407296, + "step": 16015 + }, + { + "epoch": 1.0255425388899557, + "grad_norm": 27.234512329101562, + "learning_rate": 4.8336693319203485e-06, + "loss": 0.3322, + "num_input_tokens_seen": 50422912, + "step": 16020 + }, + { + "epoch": 1.0258626208309327, + "grad_norm": 13.271559715270996, + "learning_rate": 4.83346890662923e-06, + "loss": 0.3111, + "num_input_tokens_seen": 50438208, + "step": 16025 + }, + { + "epoch": 1.0261827027719097, + "grad_norm": 63.03966522216797, + "learning_rate": 4.833268364816829e-06, + "loss": 0.3233, + "num_input_tokens_seen": 50453760, + "step": 16030 + }, + { + "epoch": 1.0265027847128865, + "grad_norm": 17.5965518951416, + "learning_rate": 4.833067706493159e-06, + "loss": 0.296, + "num_input_tokens_seen": 50469504, + "step": 16035 + }, + { + "epoch": 1.0268228666538635, + "grad_norm": 9.75537395477295, + "learning_rate": 4.832866931668242e-06, + "loss": 0.31, + "num_input_tokens_seen": 50485568, + "step": 16040 + }, + { + "epoch": 1.0271429485948402, + "grad_norm": 23.646615982055664, + "learning_rate": 4.8326660403521005e-06, + "loss": 0.2915, + "num_input_tokens_seen": 50500480, + "step": 16045 + }, + { + "epoch": 1.0274630305358172, + "grad_norm": 19.77527618408203, + "learning_rate": 4.832465032554768e-06, + "loss": 0.3051, + "num_input_tokens_seen": 50516352, + "step": 16050 + }, + { + "epoch": 1.027783112476794, + "grad_norm": 12.780741691589355, + "learning_rate": 4.832263908286281e-06, + "loss": 0.3903, + "num_input_tokens_seen": 50531456, + "step": 16055 + }, + { + "epoch": 1.028103194417771, + "grad_norm": 42.58659362792969, + "learning_rate": 4.832062667556682e-06, + "loss": 0.3955, + "num_input_tokens_seen": 50546240, + "step": 16060 + }, + { + "epoch": 1.0284232763587478, + "grad_norm": 33.404579162597656, + "learning_rate": 4.8318613103760215e-06, + "loss": 0.3919, + "num_input_tokens_seen": 50560768, + "step": 16065 + }, + { + "epoch": 1.0287433582997247, + "grad_norm": 69.37775421142578, + "learning_rate": 4.831659836754353e-06, + "loss": 0.4741, + "num_input_tokens_seen": 50578816, + "step": 16070 + }, + { + "epoch": 1.0290634402407015, + "grad_norm": 20.93167495727539, + "learning_rate": 4.831458246701738e-06, + "loss": 0.389, + "num_input_tokens_seen": 50594048, + "step": 16075 + }, + { + "epoch": 1.0293835221816785, + "grad_norm": 13.3658447265625, + "learning_rate": 4.831256540228242e-06, + "loss": 0.2532, + "num_input_tokens_seen": 50609344, + "step": 16080 + }, + { + "epoch": 1.0297036041226555, + "grad_norm": 49.49187469482422, + "learning_rate": 4.831054717343936e-06, + "loss": 0.3798, + "num_input_tokens_seen": 50624832, + "step": 16085 + }, + { + "epoch": 1.0300236860636323, + "grad_norm": 17.410297393798828, + "learning_rate": 4.830852778058901e-06, + "loss": 0.2135, + "num_input_tokens_seen": 50641152, + "step": 16090 + }, + { + "epoch": 1.0303437680046093, + "grad_norm": 11.342344284057617, + "learning_rate": 4.830650722383219e-06, + "loss": 0.3473, + "num_input_tokens_seen": 50657472, + "step": 16095 + }, + { + "epoch": 1.030663849945586, + "grad_norm": 16.70142936706543, + "learning_rate": 4.83044855032698e-06, + "loss": 0.3126, + "num_input_tokens_seen": 50673536, + "step": 16100 + }, + { + "epoch": 1.030983931886563, + "grad_norm": 12.814266204833984, + "learning_rate": 4.830246261900279e-06, + "loss": 0.282, + "num_input_tokens_seen": 50688832, + "step": 16105 + }, + { + "epoch": 1.0313040138275398, + "grad_norm": 27.243228912353516, + "learning_rate": 4.830043857113217e-06, + "loss": 0.2764, + "num_input_tokens_seen": 50705728, + "step": 16110 + }, + { + "epoch": 1.0316240957685168, + "grad_norm": 39.38770294189453, + "learning_rate": 4.829841335975902e-06, + "loss": 0.3087, + "num_input_tokens_seen": 50722112, + "step": 16115 + }, + { + "epoch": 1.0319441777094935, + "grad_norm": 20.238323211669922, + "learning_rate": 4.8296386984984466e-06, + "loss": 0.326, + "num_input_tokens_seen": 50737408, + "step": 16120 + }, + { + "epoch": 1.0322642596504705, + "grad_norm": 23.26394271850586, + "learning_rate": 4.829435944690969e-06, + "loss": 0.277, + "num_input_tokens_seen": 50753152, + "step": 16125 + }, + { + "epoch": 1.0325843415914475, + "grad_norm": 19.027103424072266, + "learning_rate": 4.829233074563594e-06, + "loss": 0.4376, + "num_input_tokens_seen": 50768704, + "step": 16130 + }, + { + "epoch": 1.0329044235324243, + "grad_norm": 9.21601390838623, + "learning_rate": 4.8290300881264516e-06, + "loss": 0.2066, + "num_input_tokens_seen": 50783936, + "step": 16135 + }, + { + "epoch": 1.0332245054734013, + "grad_norm": 17.640274047851562, + "learning_rate": 4.8288269853896786e-06, + "loss": 0.3182, + "num_input_tokens_seen": 50799488, + "step": 16140 + }, + { + "epoch": 1.033544587414378, + "grad_norm": 26.437942504882812, + "learning_rate": 4.828623766363416e-06, + "loss": 0.4688, + "num_input_tokens_seen": 50814720, + "step": 16145 + }, + { + "epoch": 1.033864669355355, + "grad_norm": 22.678754806518555, + "learning_rate": 4.828420431057812e-06, + "loss": 0.2703, + "num_input_tokens_seen": 50829632, + "step": 16150 + }, + { + "epoch": 1.0341847512963318, + "grad_norm": 27.943832397460938, + "learning_rate": 4.828216979483019e-06, + "loss": 0.2409, + "num_input_tokens_seen": 50845376, + "step": 16155 + }, + { + "epoch": 1.0345048332373088, + "grad_norm": 40.01248550415039, + "learning_rate": 4.828013411649198e-06, + "loss": 0.411, + "num_input_tokens_seen": 50861888, + "step": 16160 + }, + { + "epoch": 1.0348249151782856, + "grad_norm": 20.231054306030273, + "learning_rate": 4.827809727566514e-06, + "loss": 0.3307, + "num_input_tokens_seen": 50877952, + "step": 16165 + }, + { + "epoch": 1.0351449971192626, + "grad_norm": 13.945439338684082, + "learning_rate": 4.827605927245137e-06, + "loss": 0.3777, + "num_input_tokens_seen": 50892608, + "step": 16170 + }, + { + "epoch": 1.0354650790602393, + "grad_norm": 32.0569953918457, + "learning_rate": 4.827402010695244e-06, + "loss": 0.3058, + "num_input_tokens_seen": 50909568, + "step": 16175 + }, + { + "epoch": 1.0357851610012163, + "grad_norm": 20.56334686279297, + "learning_rate": 4.827197977927017e-06, + "loss": 0.326, + "num_input_tokens_seen": 50926144, + "step": 16180 + }, + { + "epoch": 1.0361052429421933, + "grad_norm": 26.022022247314453, + "learning_rate": 4.826993828950646e-06, + "loss": 0.2551, + "num_input_tokens_seen": 50942080, + "step": 16185 + }, + { + "epoch": 1.03642532488317, + "grad_norm": 49.97541427612305, + "learning_rate": 4.826789563776323e-06, + "loss": 0.2769, + "num_input_tokens_seen": 50958592, + "step": 16190 + }, + { + "epoch": 1.036745406824147, + "grad_norm": 29.028453826904297, + "learning_rate": 4.826585182414249e-06, + "loss": 0.2245, + "num_input_tokens_seen": 50974464, + "step": 16195 + }, + { + "epoch": 1.0370654887651238, + "grad_norm": 32.65736389160156, + "learning_rate": 4.826380684874631e-06, + "loss": 0.4052, + "num_input_tokens_seen": 50990016, + "step": 16200 + }, + { + "epoch": 1.0373855707061008, + "grad_norm": 74.38031005859375, + "learning_rate": 4.826176071167678e-06, + "loss": 0.4216, + "num_input_tokens_seen": 51005376, + "step": 16205 + }, + { + "epoch": 1.0377056526470776, + "grad_norm": 33.98324966430664, + "learning_rate": 4.825971341303609e-06, + "loss": 0.4809, + "num_input_tokens_seen": 51020416, + "step": 16210 + }, + { + "epoch": 1.0380257345880546, + "grad_norm": 10.334056854248047, + "learning_rate": 4.825766495292647e-06, + "loss": 0.3424, + "num_input_tokens_seen": 51036096, + "step": 16215 + }, + { + "epoch": 1.0383458165290314, + "grad_norm": 11.155962944030762, + "learning_rate": 4.825561533145021e-06, + "loss": 0.3262, + "num_input_tokens_seen": 51051328, + "step": 16220 + }, + { + "epoch": 1.0386658984700083, + "grad_norm": 11.378095626831055, + "learning_rate": 4.825356454870964e-06, + "loss": 0.4197, + "num_input_tokens_seen": 51067776, + "step": 16225 + }, + { + "epoch": 1.038985980410985, + "grad_norm": 14.840147972106934, + "learning_rate": 4.825151260480719e-06, + "loss": 0.3289, + "num_input_tokens_seen": 51084736, + "step": 16230 + }, + { + "epoch": 1.039306062351962, + "grad_norm": 16.852476119995117, + "learning_rate": 4.824945949984532e-06, + "loss": 0.2553, + "num_input_tokens_seen": 51100352, + "step": 16235 + }, + { + "epoch": 1.039626144292939, + "grad_norm": 18.19135284423828, + "learning_rate": 4.8247405233926534e-06, + "loss": 0.2415, + "num_input_tokens_seen": 51116352, + "step": 16240 + }, + { + "epoch": 1.0399462262339159, + "grad_norm": 24.732208251953125, + "learning_rate": 4.824534980715343e-06, + "loss": 0.3407, + "num_input_tokens_seen": 51132736, + "step": 16245 + }, + { + "epoch": 1.0402663081748929, + "grad_norm": 12.478070259094238, + "learning_rate": 4.824329321962863e-06, + "loss": 0.3125, + "num_input_tokens_seen": 51149696, + "step": 16250 + }, + { + "epoch": 1.0405863901158696, + "grad_norm": 20.791152954101562, + "learning_rate": 4.824123547145484e-06, + "loss": 0.2986, + "num_input_tokens_seen": 51165248, + "step": 16255 + }, + { + "epoch": 1.0409064720568466, + "grad_norm": 14.304107666015625, + "learning_rate": 4.82391765627348e-06, + "loss": 0.3958, + "num_input_tokens_seen": 51181632, + "step": 16260 + }, + { + "epoch": 1.0412265539978234, + "grad_norm": 29.200611114501953, + "learning_rate": 4.823711649357135e-06, + "loss": 0.3475, + "num_input_tokens_seen": 51196864, + "step": 16265 + }, + { + "epoch": 1.0415466359388004, + "grad_norm": 12.866266250610352, + "learning_rate": 4.823505526406732e-06, + "loss": 0.2704, + "num_input_tokens_seen": 51213184, + "step": 16270 + }, + { + "epoch": 1.0418667178797771, + "grad_norm": 33.661598205566406, + "learning_rate": 4.823299287432567e-06, + "loss": 0.2751, + "num_input_tokens_seen": 51229504, + "step": 16275 + }, + { + "epoch": 1.0421867998207541, + "grad_norm": 13.146200180053711, + "learning_rate": 4.823092932444937e-06, + "loss": 0.264, + "num_input_tokens_seen": 51244800, + "step": 16280 + }, + { + "epoch": 1.042506881761731, + "grad_norm": 14.791889190673828, + "learning_rate": 4.822886461454146e-06, + "loss": 0.2822, + "num_input_tokens_seen": 51260032, + "step": 16285 + }, + { + "epoch": 1.0428269637027079, + "grad_norm": 46.67515563964844, + "learning_rate": 4.822679874470505e-06, + "loss": 0.3556, + "num_input_tokens_seen": 51275392, + "step": 16290 + }, + { + "epoch": 1.0431470456436849, + "grad_norm": 29.42057991027832, + "learning_rate": 4.8224731715043295e-06, + "loss": 0.3518, + "num_input_tokens_seen": 51290944, + "step": 16295 + }, + { + "epoch": 1.0434671275846616, + "grad_norm": 38.22080993652344, + "learning_rate": 4.822266352565941e-06, + "loss": 0.2881, + "num_input_tokens_seen": 51307200, + "step": 16300 + }, + { + "epoch": 1.0437872095256386, + "grad_norm": 23.950098037719727, + "learning_rate": 4.822059417665667e-06, + "loss": 0.3328, + "num_input_tokens_seen": 51321920, + "step": 16305 + }, + { + "epoch": 1.0441072914666154, + "grad_norm": 17.94292449951172, + "learning_rate": 4.821852366813841e-06, + "loss": 0.233, + "num_input_tokens_seen": 51337344, + "step": 16310 + }, + { + "epoch": 1.0444273734075924, + "grad_norm": 27.60181427001953, + "learning_rate": 4.821645200020802e-06, + "loss": 0.2586, + "num_input_tokens_seen": 51352256, + "step": 16315 + }, + { + "epoch": 1.0447474553485692, + "grad_norm": 21.739505767822266, + "learning_rate": 4.821437917296895e-06, + "loss": 0.3016, + "num_input_tokens_seen": 51367936, + "step": 16320 + }, + { + "epoch": 1.0450675372895462, + "grad_norm": 33.618438720703125, + "learning_rate": 4.82123051865247e-06, + "loss": 0.4314, + "num_input_tokens_seen": 51382592, + "step": 16325 + }, + { + "epoch": 1.045387619230523, + "grad_norm": 23.19209098815918, + "learning_rate": 4.821023004097883e-06, + "loss": 0.4091, + "num_input_tokens_seen": 51398528, + "step": 16330 + }, + { + "epoch": 1.0457077011715, + "grad_norm": 25.247411727905273, + "learning_rate": 4.820815373643498e-06, + "loss": 0.3303, + "num_input_tokens_seen": 51413376, + "step": 16335 + }, + { + "epoch": 1.0460277831124767, + "grad_norm": 16.42780876159668, + "learning_rate": 4.820607627299682e-06, + "loss": 0.3399, + "num_input_tokens_seen": 51432064, + "step": 16340 + }, + { + "epoch": 1.0463478650534537, + "grad_norm": 24.350339889526367, + "learning_rate": 4.820399765076808e-06, + "loss": 0.3488, + "num_input_tokens_seen": 51447232, + "step": 16345 + }, + { + "epoch": 1.0466679469944307, + "grad_norm": 26.973148345947266, + "learning_rate": 4.8201917869852565e-06, + "loss": 0.3695, + "num_input_tokens_seen": 51463168, + "step": 16350 + }, + { + "epoch": 1.0469880289354074, + "grad_norm": 17.12081527709961, + "learning_rate": 4.819983693035412e-06, + "loss": 0.1727, + "num_input_tokens_seen": 51481216, + "step": 16355 + }, + { + "epoch": 1.0473081108763844, + "grad_norm": 39.01164627075195, + "learning_rate": 4.819775483237667e-06, + "loss": 0.2678, + "num_input_tokens_seen": 51497856, + "step": 16360 + }, + { + "epoch": 1.0476281928173612, + "grad_norm": 29.45097541809082, + "learning_rate": 4.819567157602418e-06, + "loss": 0.3309, + "num_input_tokens_seen": 51512896, + "step": 16365 + }, + { + "epoch": 1.0479482747583382, + "grad_norm": 22.932531356811523, + "learning_rate": 4.819358716140067e-06, + "loss": 0.3148, + "num_input_tokens_seen": 51528448, + "step": 16370 + }, + { + "epoch": 1.048268356699315, + "grad_norm": 16.500131607055664, + "learning_rate": 4.819150158861023e-06, + "loss": 0.3235, + "num_input_tokens_seen": 51544768, + "step": 16375 + }, + { + "epoch": 1.048588438640292, + "grad_norm": 25.05584144592285, + "learning_rate": 4.8189414857756985e-06, + "loss": 0.285, + "num_input_tokens_seen": 51560448, + "step": 16380 + }, + { + "epoch": 1.0489085205812687, + "grad_norm": 32.80569839477539, + "learning_rate": 4.818732696894517e-06, + "loss": 0.3436, + "num_input_tokens_seen": 51575616, + "step": 16385 + }, + { + "epoch": 1.0492286025222457, + "grad_norm": 6.426510334014893, + "learning_rate": 4.8185237922279014e-06, + "loss": 0.2012, + "num_input_tokens_seen": 51591680, + "step": 16390 + }, + { + "epoch": 1.0495486844632227, + "grad_norm": 13.961211204528809, + "learning_rate": 4.818314771786284e-06, + "loss": 0.2593, + "num_input_tokens_seen": 51607680, + "step": 16395 + }, + { + "epoch": 1.0498687664041995, + "grad_norm": 20.368444442749023, + "learning_rate": 4.818105635580104e-06, + "loss": 0.2339, + "num_input_tokens_seen": 51622912, + "step": 16400 + }, + { + "epoch": 1.0501888483451765, + "grad_norm": 30.47559356689453, + "learning_rate": 4.817896383619802e-06, + "loss": 0.4946, + "num_input_tokens_seen": 51638144, + "step": 16405 + }, + { + "epoch": 1.0505089302861532, + "grad_norm": 36.76587677001953, + "learning_rate": 4.817687015915829e-06, + "loss": 0.2627, + "num_input_tokens_seen": 51654336, + "step": 16410 + }, + { + "epoch": 1.0508290122271302, + "grad_norm": 7.377636432647705, + "learning_rate": 4.817477532478638e-06, + "loss": 0.3434, + "num_input_tokens_seen": 51669760, + "step": 16415 + }, + { + "epoch": 1.051149094168107, + "grad_norm": 24.228498458862305, + "learning_rate": 4.817267933318691e-06, + "loss": 0.3096, + "num_input_tokens_seen": 51685440, + "step": 16420 + }, + { + "epoch": 1.051469176109084, + "grad_norm": 20.44950294494629, + "learning_rate": 4.817058218446453e-06, + "loss": 0.3697, + "num_input_tokens_seen": 51700544, + "step": 16425 + }, + { + "epoch": 1.0517892580500607, + "grad_norm": 22.547842025756836, + "learning_rate": 4.816848387872397e-06, + "loss": 0.2282, + "num_input_tokens_seen": 51716096, + "step": 16430 + }, + { + "epoch": 1.0521093399910377, + "grad_norm": 18.74854278564453, + "learning_rate": 4.8166384416070005e-06, + "loss": 0.3054, + "num_input_tokens_seen": 51733312, + "step": 16435 + }, + { + "epoch": 1.0524294219320145, + "grad_norm": 49.37860870361328, + "learning_rate": 4.8164283796607466e-06, + "loss": 0.3531, + "num_input_tokens_seen": 51748992, + "step": 16440 + }, + { + "epoch": 1.0527495038729915, + "grad_norm": 23.665855407714844, + "learning_rate": 4.8162182020441265e-06, + "loss": 0.4066, + "num_input_tokens_seen": 51764096, + "step": 16445 + }, + { + "epoch": 1.0530695858139685, + "grad_norm": 43.06725311279297, + "learning_rate": 4.816007908767633e-06, + "loss": 0.3169, + "num_input_tokens_seen": 51779584, + "step": 16450 + }, + { + "epoch": 1.0533896677549452, + "grad_norm": 27.130748748779297, + "learning_rate": 4.815797499841769e-06, + "loss": 0.3345, + "num_input_tokens_seen": 51794368, + "step": 16455 + }, + { + "epoch": 1.0537097496959222, + "grad_norm": 17.443307876586914, + "learning_rate": 4.81558697527704e-06, + "loss": 0.2701, + "num_input_tokens_seen": 51809664, + "step": 16460 + }, + { + "epoch": 1.054029831636899, + "grad_norm": 36.665096282958984, + "learning_rate": 4.815376335083959e-06, + "loss": 0.332, + "num_input_tokens_seen": 51824832, + "step": 16465 + }, + { + "epoch": 1.054349913577876, + "grad_norm": 34.498775482177734, + "learning_rate": 4.815165579273044e-06, + "loss": 0.5044, + "num_input_tokens_seen": 51841408, + "step": 16470 + }, + { + "epoch": 1.0546699955188528, + "grad_norm": 27.824323654174805, + "learning_rate": 4.8149547078548195e-06, + "loss": 0.2284, + "num_input_tokens_seen": 51857024, + "step": 16475 + }, + { + "epoch": 1.0549900774598298, + "grad_norm": 14.40594482421875, + "learning_rate": 4.814743720839815e-06, + "loss": 0.2576, + "num_input_tokens_seen": 51873088, + "step": 16480 + }, + { + "epoch": 1.0553101594008065, + "grad_norm": 38.39655303955078, + "learning_rate": 4.814532618238566e-06, + "loss": 0.3573, + "num_input_tokens_seen": 51888832, + "step": 16485 + }, + { + "epoch": 1.0556302413417835, + "grad_norm": 17.310503005981445, + "learning_rate": 4.814321400061614e-06, + "loss": 0.2721, + "num_input_tokens_seen": 51905088, + "step": 16490 + }, + { + "epoch": 1.0559503232827603, + "grad_norm": 22.92515754699707, + "learning_rate": 4.814110066319506e-06, + "loss": 0.3373, + "num_input_tokens_seen": 51920448, + "step": 16495 + }, + { + "epoch": 1.0562704052237373, + "grad_norm": 32.86568832397461, + "learning_rate": 4.8138986170227955e-06, + "loss": 0.2952, + "num_input_tokens_seen": 51936960, + "step": 16500 + }, + { + "epoch": 1.0565904871647143, + "grad_norm": 14.619465827941895, + "learning_rate": 4.81368705218204e-06, + "loss": 0.3214, + "num_input_tokens_seen": 51953536, + "step": 16505 + }, + { + "epoch": 1.056910569105691, + "grad_norm": 30.624147415161133, + "learning_rate": 4.8134753718078054e-06, + "loss": 0.3257, + "num_input_tokens_seen": 51968704, + "step": 16510 + }, + { + "epoch": 1.057230651046668, + "grad_norm": 26.784770965576172, + "learning_rate": 4.813263575910661e-06, + "loss": 0.314, + "num_input_tokens_seen": 51983808, + "step": 16515 + }, + { + "epoch": 1.0575507329876448, + "grad_norm": 50.709842681884766, + "learning_rate": 4.813051664501182e-06, + "loss": 0.2979, + "num_input_tokens_seen": 51999424, + "step": 16520 + }, + { + "epoch": 1.0578708149286218, + "grad_norm": 52.703147888183594, + "learning_rate": 4.812839637589953e-06, + "loss": 0.3579, + "num_input_tokens_seen": 52014784, + "step": 16525 + }, + { + "epoch": 1.0581908968695986, + "grad_norm": 23.658693313598633, + "learning_rate": 4.812627495187558e-06, + "loss": 0.3112, + "num_input_tokens_seen": 52030656, + "step": 16530 + }, + { + "epoch": 1.0585109788105755, + "grad_norm": 23.7495174407959, + "learning_rate": 4.812415237304593e-06, + "loss": 0.3321, + "num_input_tokens_seen": 52045888, + "step": 16535 + }, + { + "epoch": 1.0588310607515523, + "grad_norm": 25.678619384765625, + "learning_rate": 4.812202863951655e-06, + "loss": 0.2757, + "num_input_tokens_seen": 52060672, + "step": 16540 + }, + { + "epoch": 1.0591511426925293, + "grad_norm": 13.198908805847168, + "learning_rate": 4.811990375139351e-06, + "loss": 0.2529, + "num_input_tokens_seen": 52075456, + "step": 16545 + }, + { + "epoch": 1.059471224633506, + "grad_norm": 17.748586654663086, + "learning_rate": 4.81177777087829e-06, + "loss": 0.3074, + "num_input_tokens_seen": 52089600, + "step": 16550 + }, + { + "epoch": 1.059791306574483, + "grad_norm": 78.24514770507812, + "learning_rate": 4.8115650511790875e-06, + "loss": 0.3159, + "num_input_tokens_seen": 52104768, + "step": 16555 + }, + { + "epoch": 1.06011138851546, + "grad_norm": 29.102611541748047, + "learning_rate": 4.811352216052368e-06, + "loss": 0.2456, + "num_input_tokens_seen": 52120576, + "step": 16560 + }, + { + "epoch": 1.0604314704564368, + "grad_norm": 17.809877395629883, + "learning_rate": 4.8111392655087585e-06, + "loss": 0.2515, + "num_input_tokens_seen": 52136256, + "step": 16565 + }, + { + "epoch": 1.0607515523974138, + "grad_norm": 21.133174896240234, + "learning_rate": 4.810926199558892e-06, + "loss": 0.3077, + "num_input_tokens_seen": 52152256, + "step": 16570 + }, + { + "epoch": 1.0610716343383906, + "grad_norm": 26.565460205078125, + "learning_rate": 4.810713018213407e-06, + "loss": 0.364, + "num_input_tokens_seen": 52167488, + "step": 16575 + }, + { + "epoch": 1.0613917162793676, + "grad_norm": 7.079984188079834, + "learning_rate": 4.810499721482952e-06, + "loss": 0.4094, + "num_input_tokens_seen": 52182208, + "step": 16580 + }, + { + "epoch": 1.0617117982203443, + "grad_norm": 21.540420532226562, + "learning_rate": 4.8102863093781745e-06, + "loss": 0.2917, + "num_input_tokens_seen": 52198720, + "step": 16585 + }, + { + "epoch": 1.0620318801613213, + "grad_norm": 35.29466247558594, + "learning_rate": 4.810072781909734e-06, + "loss": 0.5487, + "num_input_tokens_seen": 52215552, + "step": 16590 + }, + { + "epoch": 1.062351962102298, + "grad_norm": 15.81296157836914, + "learning_rate": 4.80985913908829e-06, + "loss": 0.2942, + "num_input_tokens_seen": 52232064, + "step": 16595 + }, + { + "epoch": 1.062672044043275, + "grad_norm": 37.80410385131836, + "learning_rate": 4.809645380924512e-06, + "loss": 0.4301, + "num_input_tokens_seen": 52248064, + "step": 16600 + }, + { + "epoch": 1.0629921259842519, + "grad_norm": 21.821714401245117, + "learning_rate": 4.809431507429076e-06, + "loss": 0.438, + "num_input_tokens_seen": 52265024, + "step": 16605 + }, + { + "epoch": 1.0633122079252288, + "grad_norm": 17.11227798461914, + "learning_rate": 4.809217518612659e-06, + "loss": 0.1769, + "num_input_tokens_seen": 52280320, + "step": 16610 + }, + { + "epoch": 1.0636322898662058, + "grad_norm": 21.7027587890625, + "learning_rate": 4.809003414485947e-06, + "loss": 0.2284, + "num_input_tokens_seen": 52295616, + "step": 16615 + }, + { + "epoch": 1.0639523718071826, + "grad_norm": 16.57803726196289, + "learning_rate": 4.8087891950596314e-06, + "loss": 0.3144, + "num_input_tokens_seen": 52311680, + "step": 16620 + }, + { + "epoch": 1.0642724537481596, + "grad_norm": 45.16407775878906, + "learning_rate": 4.80857486034441e-06, + "loss": 0.3209, + "num_input_tokens_seen": 52326464, + "step": 16625 + }, + { + "epoch": 1.0645925356891364, + "grad_norm": 28.55582046508789, + "learning_rate": 4.808360410350985e-06, + "loss": 0.3622, + "num_input_tokens_seen": 52341696, + "step": 16630 + }, + { + "epoch": 1.0649126176301134, + "grad_norm": 27.73451805114746, + "learning_rate": 4.808145845090065e-06, + "loss": 0.3042, + "num_input_tokens_seen": 52357504, + "step": 16635 + }, + { + "epoch": 1.0652326995710901, + "grad_norm": 12.788614273071289, + "learning_rate": 4.807931164572364e-06, + "loss": 0.3643, + "num_input_tokens_seen": 52373888, + "step": 16640 + }, + { + "epoch": 1.0655527815120671, + "grad_norm": 24.097732543945312, + "learning_rate": 4.807716368808602e-06, + "loss": 0.2879, + "num_input_tokens_seen": 52389440, + "step": 16645 + }, + { + "epoch": 1.0658728634530439, + "grad_norm": 39.82802200317383, + "learning_rate": 4.807501457809505e-06, + "loss": 0.2488, + "num_input_tokens_seen": 52404608, + "step": 16650 + }, + { + "epoch": 1.0661929453940209, + "grad_norm": 22.434125900268555, + "learning_rate": 4.807286431585805e-06, + "loss": 0.3124, + "num_input_tokens_seen": 52419520, + "step": 16655 + }, + { + "epoch": 1.0665130273349979, + "grad_norm": 12.810970306396484, + "learning_rate": 4.807071290148238e-06, + "loss": 0.2219, + "num_input_tokens_seen": 52434944, + "step": 16660 + }, + { + "epoch": 1.0668331092759746, + "grad_norm": 10.189210891723633, + "learning_rate": 4.806856033507549e-06, + "loss": 0.2712, + "num_input_tokens_seen": 52451008, + "step": 16665 + }, + { + "epoch": 1.0671531912169516, + "grad_norm": 16.977922439575195, + "learning_rate": 4.806640661674485e-06, + "loss": 0.3997, + "num_input_tokens_seen": 52467392, + "step": 16670 + }, + { + "epoch": 1.0674732731579284, + "grad_norm": 33.93855667114258, + "learning_rate": 4.806425174659801e-06, + "loss": 0.2614, + "num_input_tokens_seen": 52482432, + "step": 16675 + }, + { + "epoch": 1.0677933550989054, + "grad_norm": 9.237256050109863, + "learning_rate": 4.806209572474258e-06, + "loss": 0.3247, + "num_input_tokens_seen": 52497472, + "step": 16680 + }, + { + "epoch": 1.0681134370398822, + "grad_norm": 11.896576881408691, + "learning_rate": 4.805993855128622e-06, + "loss": 0.1651, + "num_input_tokens_seen": 52512960, + "step": 16685 + }, + { + "epoch": 1.0684335189808591, + "grad_norm": 49.328697204589844, + "learning_rate": 4.8057780226336636e-06, + "loss": 0.3146, + "num_input_tokens_seen": 52528000, + "step": 16690 + }, + { + "epoch": 1.068753600921836, + "grad_norm": 22.427656173706055, + "learning_rate": 4.805562075000161e-06, + "loss": 0.4474, + "num_input_tokens_seen": 52542976, + "step": 16695 + }, + { + "epoch": 1.069073682862813, + "grad_norm": 20.053789138793945, + "learning_rate": 4.805346012238898e-06, + "loss": 0.3965, + "num_input_tokens_seen": 52558208, + "step": 16700 + }, + { + "epoch": 1.0693937648037897, + "grad_norm": 41.03416061401367, + "learning_rate": 4.805129834360664e-06, + "loss": 0.3391, + "num_input_tokens_seen": 52574976, + "step": 16705 + }, + { + "epoch": 1.0697138467447667, + "grad_norm": 11.497386932373047, + "learning_rate": 4.8049135413762514e-06, + "loss": 0.2333, + "num_input_tokens_seen": 52590208, + "step": 16710 + }, + { + "epoch": 1.0700339286857437, + "grad_norm": 31.44280433654785, + "learning_rate": 4.804697133296463e-06, + "loss": 0.3121, + "num_input_tokens_seen": 52606720, + "step": 16715 + }, + { + "epoch": 1.0703540106267204, + "grad_norm": 20.285934448242188, + "learning_rate": 4.804480610132104e-06, + "loss": 0.3195, + "num_input_tokens_seen": 52623872, + "step": 16720 + }, + { + "epoch": 1.0706740925676974, + "grad_norm": 28.626113891601562, + "learning_rate": 4.804263971893988e-06, + "loss": 0.223, + "num_input_tokens_seen": 52639040, + "step": 16725 + }, + { + "epoch": 1.0709941745086742, + "grad_norm": 10.408917427062988, + "learning_rate": 4.804047218592932e-06, + "loss": 0.301, + "num_input_tokens_seen": 52655680, + "step": 16730 + }, + { + "epoch": 1.0713142564496512, + "grad_norm": 29.66434097290039, + "learning_rate": 4.8038303502397584e-06, + "loss": 0.3257, + "num_input_tokens_seen": 52670912, + "step": 16735 + }, + { + "epoch": 1.071634338390628, + "grad_norm": 22.797931671142578, + "learning_rate": 4.803613366845297e-06, + "loss": 0.3093, + "num_input_tokens_seen": 52686656, + "step": 16740 + }, + { + "epoch": 1.071954420331605, + "grad_norm": 45.213775634765625, + "learning_rate": 4.803396268420384e-06, + "loss": 0.259, + "num_input_tokens_seen": 52700736, + "step": 16745 + }, + { + "epoch": 1.0722745022725817, + "grad_norm": 46.16619110107422, + "learning_rate": 4.803179054975859e-06, + "loss": 0.3664, + "num_input_tokens_seen": 52716352, + "step": 16750 + }, + { + "epoch": 1.0725945842135587, + "grad_norm": 29.576200485229492, + "learning_rate": 4.802961726522568e-06, + "loss": 0.3951, + "num_input_tokens_seen": 52730944, + "step": 16755 + }, + { + "epoch": 1.0729146661545355, + "grad_norm": 32.62112808227539, + "learning_rate": 4.8027442830713655e-06, + "loss": 0.2282, + "num_input_tokens_seen": 52746368, + "step": 16760 + }, + { + "epoch": 1.0732347480955124, + "grad_norm": 16.921857833862305, + "learning_rate": 4.802526724633107e-06, + "loss": 0.3642, + "num_input_tokens_seen": 52762624, + "step": 16765 + }, + { + "epoch": 1.0735548300364894, + "grad_norm": 29.999767303466797, + "learning_rate": 4.802309051218658e-06, + "loss": 0.3688, + "num_input_tokens_seen": 52777728, + "step": 16770 + }, + { + "epoch": 1.0738749119774662, + "grad_norm": 29.245208740234375, + "learning_rate": 4.802091262838886e-06, + "loss": 0.3491, + "num_input_tokens_seen": 52793920, + "step": 16775 + }, + { + "epoch": 1.0741949939184432, + "grad_norm": 19.823223114013672, + "learning_rate": 4.801873359504669e-06, + "loss": 0.3184, + "num_input_tokens_seen": 52809792, + "step": 16780 + }, + { + "epoch": 1.07451507585942, + "grad_norm": 18.37125015258789, + "learning_rate": 4.801655341226886e-06, + "loss": 0.4376, + "num_input_tokens_seen": 52824960, + "step": 16785 + }, + { + "epoch": 1.074835157800397, + "grad_norm": 11.977819442749023, + "learning_rate": 4.801437208016424e-06, + "loss": 0.3442, + "num_input_tokens_seen": 52839616, + "step": 16790 + }, + { + "epoch": 1.0751552397413737, + "grad_norm": 32.57524490356445, + "learning_rate": 4.801218959884176e-06, + "loss": 0.2801, + "num_input_tokens_seen": 52855616, + "step": 16795 + }, + { + "epoch": 1.0754753216823507, + "grad_norm": 42.36808776855469, + "learning_rate": 4.801000596841039e-06, + "loss": 0.4209, + "num_input_tokens_seen": 52873088, + "step": 16800 + }, + { + "epoch": 1.0757954036233275, + "grad_norm": 39.68159866333008, + "learning_rate": 4.800782118897917e-06, + "loss": 0.2287, + "num_input_tokens_seen": 52889280, + "step": 16805 + }, + { + "epoch": 1.0761154855643045, + "grad_norm": 17.37569236755371, + "learning_rate": 4.8005635260657224e-06, + "loss": 0.3968, + "num_input_tokens_seen": 52904896, + "step": 16810 + }, + { + "epoch": 1.0764355675052815, + "grad_norm": 7.414278030395508, + "learning_rate": 4.800344818355368e-06, + "loss": 0.2281, + "num_input_tokens_seen": 52920640, + "step": 16815 + }, + { + "epoch": 1.0767556494462582, + "grad_norm": 15.947702407836914, + "learning_rate": 4.800125995777775e-06, + "loss": 0.2776, + "num_input_tokens_seen": 52935680, + "step": 16820 + }, + { + "epoch": 1.0770757313872352, + "grad_norm": 29.27289581298828, + "learning_rate": 4.79990705834387e-06, + "loss": 0.3489, + "num_input_tokens_seen": 52950784, + "step": 16825 + }, + { + "epoch": 1.077395813328212, + "grad_norm": 25.018556594848633, + "learning_rate": 4.799688006064587e-06, + "loss": 0.2919, + "num_input_tokens_seen": 52965824, + "step": 16830 + }, + { + "epoch": 1.077715895269189, + "grad_norm": 8.3266019821167, + "learning_rate": 4.799468838950863e-06, + "loss": 0.2484, + "num_input_tokens_seen": 52981568, + "step": 16835 + }, + { + "epoch": 1.0780359772101658, + "grad_norm": 31.74022102355957, + "learning_rate": 4.799249557013643e-06, + "loss": 0.3151, + "num_input_tokens_seen": 52997632, + "step": 16840 + }, + { + "epoch": 1.0783560591511427, + "grad_norm": 10.551453590393066, + "learning_rate": 4.7990301602638764e-06, + "loss": 0.2957, + "num_input_tokens_seen": 53013056, + "step": 16845 + }, + { + "epoch": 1.0786761410921195, + "grad_norm": 30.193498611450195, + "learning_rate": 4.79881064871252e-06, + "loss": 0.3658, + "num_input_tokens_seen": 53028096, + "step": 16850 + }, + { + "epoch": 1.0789962230330965, + "grad_norm": 36.96535873413086, + "learning_rate": 4.798591022370532e-06, + "loss": 0.2365, + "num_input_tokens_seen": 53043968, + "step": 16855 + }, + { + "epoch": 1.0793163049740733, + "grad_norm": 11.687283515930176, + "learning_rate": 4.798371281248883e-06, + "loss": 0.3099, + "num_input_tokens_seen": 53060224, + "step": 16860 + }, + { + "epoch": 1.0796363869150503, + "grad_norm": 21.379304885864258, + "learning_rate": 4.798151425358543e-06, + "loss": 0.3508, + "num_input_tokens_seen": 53076544, + "step": 16865 + }, + { + "epoch": 1.079956468856027, + "grad_norm": 9.467506408691406, + "learning_rate": 4.7979314547104915e-06, + "loss": 0.2995, + "num_input_tokens_seen": 53092224, + "step": 16870 + }, + { + "epoch": 1.080276550797004, + "grad_norm": 46.4112663269043, + "learning_rate": 4.797711369315713e-06, + "loss": 0.3381, + "num_input_tokens_seen": 53106752, + "step": 16875 + }, + { + "epoch": 1.080596632737981, + "grad_norm": 26.24725914001465, + "learning_rate": 4.797491169185197e-06, + "loss": 0.3309, + "num_input_tokens_seen": 53121600, + "step": 16880 + }, + { + "epoch": 1.0809167146789578, + "grad_norm": 17.808170318603516, + "learning_rate": 4.797270854329938e-06, + "loss": 0.2545, + "num_input_tokens_seen": 53135936, + "step": 16885 + }, + { + "epoch": 1.0812367966199348, + "grad_norm": 17.12225341796875, + "learning_rate": 4.7970504247609405e-06, + "loss": 0.3368, + "num_input_tokens_seen": 53152000, + "step": 16890 + }, + { + "epoch": 1.0815568785609115, + "grad_norm": 12.625984191894531, + "learning_rate": 4.7968298804892085e-06, + "loss": 0.4705, + "num_input_tokens_seen": 53167424, + "step": 16895 + }, + { + "epoch": 1.0818769605018885, + "grad_norm": 9.215609550476074, + "learning_rate": 4.796609221525756e-06, + "loss": 0.1764, + "num_input_tokens_seen": 53182272, + "step": 16900 + }, + { + "epoch": 1.0821970424428653, + "grad_norm": 25.385284423828125, + "learning_rate": 4.7963884478816025e-06, + "loss": 0.3175, + "num_input_tokens_seen": 53199360, + "step": 16905 + }, + { + "epoch": 1.0825171243838423, + "grad_norm": 34.66509246826172, + "learning_rate": 4.79616755956777e-06, + "loss": 0.3394, + "num_input_tokens_seen": 53214912, + "step": 16910 + }, + { + "epoch": 1.082837206324819, + "grad_norm": 44.216678619384766, + "learning_rate": 4.795946556595291e-06, + "loss": 0.312, + "num_input_tokens_seen": 53230208, + "step": 16915 + }, + { + "epoch": 1.083157288265796, + "grad_norm": 20.449331283569336, + "learning_rate": 4.7957254389752e-06, + "loss": 0.298, + "num_input_tokens_seen": 53245312, + "step": 16920 + }, + { + "epoch": 1.083477370206773, + "grad_norm": 37.11669158935547, + "learning_rate": 4.795504206718538e-06, + "loss": 0.4834, + "num_input_tokens_seen": 53260352, + "step": 16925 + }, + { + "epoch": 1.0837974521477498, + "grad_norm": 17.156429290771484, + "learning_rate": 4.795282859836353e-06, + "loss": 0.3298, + "num_input_tokens_seen": 53276608, + "step": 16930 + }, + { + "epoch": 1.0841175340887268, + "grad_norm": 30.24496841430664, + "learning_rate": 4.7950613983396975e-06, + "loss": 0.2241, + "num_input_tokens_seen": 53292864, + "step": 16935 + }, + { + "epoch": 1.0844376160297036, + "grad_norm": 30.382417678833008, + "learning_rate": 4.794839822239631e-06, + "loss": 0.3922, + "num_input_tokens_seen": 53308288, + "step": 16940 + }, + { + "epoch": 1.0847576979706806, + "grad_norm": 34.261287689208984, + "learning_rate": 4.794618131547217e-06, + "loss": 0.3077, + "num_input_tokens_seen": 53324096, + "step": 16945 + }, + { + "epoch": 1.0850777799116573, + "grad_norm": 16.281801223754883, + "learning_rate": 4.7943963262735245e-06, + "loss": 0.3316, + "num_input_tokens_seen": 53341248, + "step": 16950 + }, + { + "epoch": 1.0853978618526343, + "grad_norm": 23.921308517456055, + "learning_rate": 4.794174406429632e-06, + "loss": 0.3751, + "num_input_tokens_seen": 53357184, + "step": 16955 + }, + { + "epoch": 1.085717943793611, + "grad_norm": 19.36733627319336, + "learning_rate": 4.793952372026619e-06, + "loss": 0.3232, + "num_input_tokens_seen": 53371392, + "step": 16960 + }, + { + "epoch": 1.086038025734588, + "grad_norm": 37.614593505859375, + "learning_rate": 4.7937302230755735e-06, + "loss": 0.2409, + "num_input_tokens_seen": 53387904, + "step": 16965 + }, + { + "epoch": 1.0863581076755648, + "grad_norm": 15.26233196258545, + "learning_rate": 4.793507959587588e-06, + "loss": 0.3087, + "num_input_tokens_seen": 53403968, + "step": 16970 + }, + { + "epoch": 1.0866781896165418, + "grad_norm": 52.16918182373047, + "learning_rate": 4.793285581573762e-06, + "loss": 0.2574, + "num_input_tokens_seen": 53419776, + "step": 16975 + }, + { + "epoch": 1.0869982715575188, + "grad_norm": 16.95928192138672, + "learning_rate": 4.793063089045199e-06, + "loss": 0.3332, + "num_input_tokens_seen": 53434816, + "step": 16980 + }, + { + "epoch": 1.0873183534984956, + "grad_norm": 29.081209182739258, + "learning_rate": 4.792840482013009e-06, + "loss": 0.3752, + "num_input_tokens_seen": 53450688, + "step": 16985 + }, + { + "epoch": 1.0876384354394726, + "grad_norm": 26.25627899169922, + "learning_rate": 4.792617760488308e-06, + "loss": 0.5535, + "num_input_tokens_seen": 53466240, + "step": 16990 + }, + { + "epoch": 1.0879585173804494, + "grad_norm": 33.296165466308594, + "learning_rate": 4.792394924482219e-06, + "loss": 0.3152, + "num_input_tokens_seen": 53481728, + "step": 16995 + }, + { + "epoch": 1.0882785993214263, + "grad_norm": 60.31883239746094, + "learning_rate": 4.792171974005868e-06, + "loss": 0.4126, + "num_input_tokens_seen": 53496896, + "step": 17000 + }, + { + "epoch": 1.0885986812624031, + "grad_norm": 22.247684478759766, + "learning_rate": 4.791948909070388e-06, + "loss": 0.3726, + "num_input_tokens_seen": 53512192, + "step": 17005 + }, + { + "epoch": 1.08891876320338, + "grad_norm": 21.908349990844727, + "learning_rate": 4.7917257296869165e-06, + "loss": 0.3589, + "num_input_tokens_seen": 53527424, + "step": 17010 + }, + { + "epoch": 1.0892388451443569, + "grad_norm": 33.00432586669922, + "learning_rate": 4.7915024358666005e-06, + "loss": 0.2884, + "num_input_tokens_seen": 53542016, + "step": 17015 + }, + { + "epoch": 1.0895589270853339, + "grad_norm": 13.651270866394043, + "learning_rate": 4.791279027620589e-06, + "loss": 0.2984, + "num_input_tokens_seen": 53557824, + "step": 17020 + }, + { + "epoch": 1.0898790090263106, + "grad_norm": 16.808879852294922, + "learning_rate": 4.7910555049600374e-06, + "loss": 0.2232, + "num_input_tokens_seen": 53571904, + "step": 17025 + }, + { + "epoch": 1.0901990909672876, + "grad_norm": 16.576988220214844, + "learning_rate": 4.790831867896107e-06, + "loss": 0.3296, + "num_input_tokens_seen": 53588800, + "step": 17030 + }, + { + "epoch": 1.0905191729082646, + "grad_norm": 10.19067096710205, + "learning_rate": 4.790608116439966e-06, + "loss": 0.2592, + "num_input_tokens_seen": 53605824, + "step": 17035 + }, + { + "epoch": 1.0908392548492414, + "grad_norm": 24.06890296936035, + "learning_rate": 4.790384250602788e-06, + "loss": 0.3961, + "num_input_tokens_seen": 53622016, + "step": 17040 + }, + { + "epoch": 1.0911593367902184, + "grad_norm": 39.48931884765625, + "learning_rate": 4.7901602703957494e-06, + "loss": 0.2924, + "num_input_tokens_seen": 53637888, + "step": 17045 + }, + { + "epoch": 1.0914794187311951, + "grad_norm": 13.692458152770996, + "learning_rate": 4.7899361758300365e-06, + "loss": 0.3298, + "num_input_tokens_seen": 53653760, + "step": 17050 + }, + { + "epoch": 1.0917995006721721, + "grad_norm": 169.7841033935547, + "learning_rate": 4.789711966916839e-06, + "loss": 0.2407, + "num_input_tokens_seen": 53668160, + "step": 17055 + }, + { + "epoch": 1.092119582613149, + "grad_norm": 24.872053146362305, + "learning_rate": 4.7894876436673525e-06, + "loss": 0.3244, + "num_input_tokens_seen": 53683136, + "step": 17060 + }, + { + "epoch": 1.0924396645541259, + "grad_norm": 13.74329662322998, + "learning_rate": 4.789263206092778e-06, + "loss": 0.3165, + "num_input_tokens_seen": 53698752, + "step": 17065 + }, + { + "epoch": 1.0927597464951027, + "grad_norm": 18.108680725097656, + "learning_rate": 4.789038654204323e-06, + "loss": 0.2705, + "num_input_tokens_seen": 53714496, + "step": 17070 + }, + { + "epoch": 1.0930798284360796, + "grad_norm": 13.816596031188965, + "learning_rate": 4.788813988013202e-06, + "loss": 0.2991, + "num_input_tokens_seen": 53729984, + "step": 17075 + }, + { + "epoch": 1.0933999103770566, + "grad_norm": 37.88239288330078, + "learning_rate": 4.788589207530632e-06, + "loss": 0.3435, + "num_input_tokens_seen": 53746048, + "step": 17080 + }, + { + "epoch": 1.0937199923180334, + "grad_norm": 23.74174690246582, + "learning_rate": 4.788364312767837e-06, + "loss": 0.2946, + "num_input_tokens_seen": 53761728, + "step": 17085 + }, + { + "epoch": 1.0940400742590104, + "grad_norm": 47.450138092041016, + "learning_rate": 4.788139303736049e-06, + "loss": 0.3548, + "num_input_tokens_seen": 53776704, + "step": 17090 + }, + { + "epoch": 1.0943601561999872, + "grad_norm": 28.872812271118164, + "learning_rate": 4.787914180446502e-06, + "loss": 0.2661, + "num_input_tokens_seen": 53794048, + "step": 17095 + }, + { + "epoch": 1.0946802381409642, + "grad_norm": 25.817848205566406, + "learning_rate": 4.7876889429104374e-06, + "loss": 0.3857, + "num_input_tokens_seen": 53808576, + "step": 17100 + }, + { + "epoch": 1.095000320081941, + "grad_norm": 6.787534713745117, + "learning_rate": 4.787463591139104e-06, + "loss": 0.3105, + "num_input_tokens_seen": 53823360, + "step": 17105 + }, + { + "epoch": 1.095320402022918, + "grad_norm": 39.71086120605469, + "learning_rate": 4.787238125143754e-06, + "loss": 0.4111, + "num_input_tokens_seen": 53838912, + "step": 17110 + }, + { + "epoch": 1.0956404839638947, + "grad_norm": 44.99237823486328, + "learning_rate": 4.787012544935646e-06, + "loss": 0.4905, + "num_input_tokens_seen": 53854336, + "step": 17115 + }, + { + "epoch": 1.0959605659048717, + "grad_norm": 16.328718185424805, + "learning_rate": 4.786786850526044e-06, + "loss": 0.2755, + "num_input_tokens_seen": 53871680, + "step": 17120 + }, + { + "epoch": 1.0962806478458484, + "grad_norm": 13.432721138000488, + "learning_rate": 4.7865610419262175e-06, + "loss": 0.2438, + "num_input_tokens_seen": 53887360, + "step": 17125 + }, + { + "epoch": 1.0966007297868254, + "grad_norm": 7.847712516784668, + "learning_rate": 4.786335119147443e-06, + "loss": 0.2652, + "num_input_tokens_seen": 53902144, + "step": 17130 + }, + { + "epoch": 1.0969208117278022, + "grad_norm": 22.676624298095703, + "learning_rate": 4.786109082201003e-06, + "loss": 0.5099, + "num_input_tokens_seen": 53919488, + "step": 17135 + }, + { + "epoch": 1.0972408936687792, + "grad_norm": 27.935199737548828, + "learning_rate": 4.7858829310981815e-06, + "loss": 0.4236, + "num_input_tokens_seen": 53936000, + "step": 17140 + }, + { + "epoch": 1.0975609756097562, + "grad_norm": 22.293540954589844, + "learning_rate": 4.785656665850274e-06, + "loss": 0.3126, + "num_input_tokens_seen": 53953920, + "step": 17145 + }, + { + "epoch": 1.097881057550733, + "grad_norm": 19.922319412231445, + "learning_rate": 4.7854302864685785e-06, + "loss": 0.401, + "num_input_tokens_seen": 53970240, + "step": 17150 + }, + { + "epoch": 1.09820113949171, + "grad_norm": 68.94795989990234, + "learning_rate": 4.7852037929643976e-06, + "loss": 0.3472, + "num_input_tokens_seen": 53985984, + "step": 17155 + }, + { + "epoch": 1.0985212214326867, + "grad_norm": 11.608325004577637, + "learning_rate": 4.784977185349044e-06, + "loss": 0.2382, + "num_input_tokens_seen": 54002432, + "step": 17160 + }, + { + "epoch": 1.0988413033736637, + "grad_norm": 16.174448013305664, + "learning_rate": 4.784750463633831e-06, + "loss": 0.2789, + "num_input_tokens_seen": 54018304, + "step": 17165 + }, + { + "epoch": 1.0991613853146405, + "grad_norm": 19.969619750976562, + "learning_rate": 4.784523627830081e-06, + "loss": 0.2712, + "num_input_tokens_seen": 54032384, + "step": 17170 + }, + { + "epoch": 1.0994814672556175, + "grad_norm": 37.980865478515625, + "learning_rate": 4.7842966779491205e-06, + "loss": 0.3503, + "num_input_tokens_seen": 54048128, + "step": 17175 + }, + { + "epoch": 1.0998015491965942, + "grad_norm": 44.602577209472656, + "learning_rate": 4.7840696140022825e-06, + "loss": 0.3664, + "num_input_tokens_seen": 54063040, + "step": 17180 + }, + { + "epoch": 1.1001216311375712, + "grad_norm": 28.573366165161133, + "learning_rate": 4.783842436000905e-06, + "loss": 0.2494, + "num_input_tokens_seen": 54078720, + "step": 17185 + }, + { + "epoch": 1.1004417130785482, + "grad_norm": 19.147682189941406, + "learning_rate": 4.783615143956332e-06, + "loss": 0.3827, + "num_input_tokens_seen": 54093248, + "step": 17190 + }, + { + "epoch": 1.100761795019525, + "grad_norm": 46.993896484375, + "learning_rate": 4.7833877378799145e-06, + "loss": 0.2817, + "num_input_tokens_seen": 54109120, + "step": 17195 + }, + { + "epoch": 1.101081876960502, + "grad_norm": 10.0511474609375, + "learning_rate": 4.783160217783007e-06, + "loss": 0.3777, + "num_input_tokens_seen": 54124544, + "step": 17200 + }, + { + "epoch": 1.1014019589014787, + "grad_norm": 17.103355407714844, + "learning_rate": 4.7829325836769705e-06, + "loss": 0.2661, + "num_input_tokens_seen": 54139456, + "step": 17205 + }, + { + "epoch": 1.1017220408424557, + "grad_norm": 17.166311264038086, + "learning_rate": 4.782704835573172e-06, + "loss": 0.3123, + "num_input_tokens_seen": 54154112, + "step": 17210 + }, + { + "epoch": 1.1020421227834325, + "grad_norm": 9.176955223083496, + "learning_rate": 4.782476973482984e-06, + "loss": 0.3453, + "num_input_tokens_seen": 54169664, + "step": 17215 + }, + { + "epoch": 1.1023622047244095, + "grad_norm": 13.287752151489258, + "learning_rate": 4.782248997417785e-06, + "loss": 0.2779, + "num_input_tokens_seen": 54186752, + "step": 17220 + }, + { + "epoch": 1.1026822866653863, + "grad_norm": 102.3969497680664, + "learning_rate": 4.782020907388959e-06, + "loss": 0.2646, + "num_input_tokens_seen": 54204928, + "step": 17225 + }, + { + "epoch": 1.1030023686063632, + "grad_norm": 13.511734962463379, + "learning_rate": 4.781792703407896e-06, + "loss": 0.3441, + "num_input_tokens_seen": 54220160, + "step": 17230 + }, + { + "epoch": 1.10332245054734, + "grad_norm": 26.169490814208984, + "learning_rate": 4.78156438548599e-06, + "loss": 0.4479, + "num_input_tokens_seen": 54235392, + "step": 17235 + }, + { + "epoch": 1.103642532488317, + "grad_norm": 24.241703033447266, + "learning_rate": 4.781335953634643e-06, + "loss": 0.3998, + "num_input_tokens_seen": 54250560, + "step": 17240 + }, + { + "epoch": 1.103962614429294, + "grad_norm": 18.09502410888672, + "learning_rate": 4.781107407865262e-06, + "loss": 0.3057, + "num_input_tokens_seen": 54267072, + "step": 17245 + }, + { + "epoch": 1.1042826963702708, + "grad_norm": 15.385008811950684, + "learning_rate": 4.780878748189259e-06, + "loss": 0.3702, + "num_input_tokens_seen": 54283584, + "step": 17250 + }, + { + "epoch": 1.1046027783112478, + "grad_norm": 9.982892036437988, + "learning_rate": 4.780649974618051e-06, + "loss": 0.2723, + "num_input_tokens_seen": 54299584, + "step": 17255 + }, + { + "epoch": 1.1049228602522245, + "grad_norm": 24.04676628112793, + "learning_rate": 4.780421087163064e-06, + "loss": 0.3932, + "num_input_tokens_seen": 54315968, + "step": 17260 + }, + { + "epoch": 1.1052429421932015, + "grad_norm": 20.768314361572266, + "learning_rate": 4.7801920858357255e-06, + "loss": 0.4483, + "num_input_tokens_seen": 54331392, + "step": 17265 + }, + { + "epoch": 1.1055630241341783, + "grad_norm": 9.628994941711426, + "learning_rate": 4.779962970647471e-06, + "loss": 0.3162, + "num_input_tokens_seen": 54346624, + "step": 17270 + }, + { + "epoch": 1.1058831060751553, + "grad_norm": 21.063539505004883, + "learning_rate": 4.779733741609742e-06, + "loss": 0.3045, + "num_input_tokens_seen": 54363904, + "step": 17275 + }, + { + "epoch": 1.106203188016132, + "grad_norm": 30.85053062438965, + "learning_rate": 4.779504398733985e-06, + "loss": 0.2935, + "num_input_tokens_seen": 54378240, + "step": 17280 + }, + { + "epoch": 1.106523269957109, + "grad_norm": 14.720510482788086, + "learning_rate": 4.779274942031651e-06, + "loss": 0.3299, + "num_input_tokens_seen": 54393856, + "step": 17285 + }, + { + "epoch": 1.1068433518980858, + "grad_norm": 29.849124908447266, + "learning_rate": 4.7790453715142e-06, + "loss": 0.3736, + "num_input_tokens_seen": 54409792, + "step": 17290 + }, + { + "epoch": 1.1071634338390628, + "grad_norm": 33.005306243896484, + "learning_rate": 4.778815687193093e-06, + "loss": 0.3891, + "num_input_tokens_seen": 54425024, + "step": 17295 + }, + { + "epoch": 1.1074835157800398, + "grad_norm": 42.2361946105957, + "learning_rate": 4.778585889079801e-06, + "loss": 0.3918, + "num_input_tokens_seen": 54440384, + "step": 17300 + }, + { + "epoch": 1.1078035977210166, + "grad_norm": 7.200146675109863, + "learning_rate": 4.778355977185798e-06, + "loss": 0.4742, + "num_input_tokens_seen": 54456064, + "step": 17305 + }, + { + "epoch": 1.1081236796619935, + "grad_norm": 54.34252166748047, + "learning_rate": 4.778125951522565e-06, + "loss": 0.4649, + "num_input_tokens_seen": 54470976, + "step": 17310 + }, + { + "epoch": 1.1084437616029703, + "grad_norm": 39.4710578918457, + "learning_rate": 4.77789581210159e-06, + "loss": 0.4483, + "num_input_tokens_seen": 54486720, + "step": 17315 + }, + { + "epoch": 1.1087638435439473, + "grad_norm": 12.342926025390625, + "learning_rate": 4.777665558934361e-06, + "loss": 0.2347, + "num_input_tokens_seen": 54502592, + "step": 17320 + }, + { + "epoch": 1.109083925484924, + "grad_norm": 21.712831497192383, + "learning_rate": 4.7774351920323795e-06, + "loss": 0.3618, + "num_input_tokens_seen": 54517376, + "step": 17325 + }, + { + "epoch": 1.109404007425901, + "grad_norm": 24.945068359375, + "learning_rate": 4.777204711407146e-06, + "loss": 0.3037, + "num_input_tokens_seen": 54533248, + "step": 17330 + }, + { + "epoch": 1.1097240893668778, + "grad_norm": 22.27788734436035, + "learning_rate": 4.776974117070172e-06, + "loss": 0.3779, + "num_input_tokens_seen": 54548800, + "step": 17335 + }, + { + "epoch": 1.1100441713078548, + "grad_norm": 20.6523380279541, + "learning_rate": 4.77674340903297e-06, + "loss": 0.3112, + "num_input_tokens_seen": 54564352, + "step": 17340 + }, + { + "epoch": 1.1103642532488318, + "grad_norm": 29.72662353515625, + "learning_rate": 4.776512587307062e-06, + "loss": 0.3762, + "num_input_tokens_seen": 54580480, + "step": 17345 + }, + { + "epoch": 1.1106843351898086, + "grad_norm": 28.280303955078125, + "learning_rate": 4.776281651903972e-06, + "loss": 0.2193, + "num_input_tokens_seen": 54594368, + "step": 17350 + }, + { + "epoch": 1.1110044171307856, + "grad_norm": 38.83024215698242, + "learning_rate": 4.776050602835234e-06, + "loss": 0.3551, + "num_input_tokens_seen": 54609216, + "step": 17355 + }, + { + "epoch": 1.1113244990717623, + "grad_norm": 52.48920440673828, + "learning_rate": 4.775819440112385e-06, + "loss": 0.3503, + "num_input_tokens_seen": 54624640, + "step": 17360 + }, + { + "epoch": 1.1116445810127393, + "grad_norm": 12.849809646606445, + "learning_rate": 4.775588163746966e-06, + "loss": 0.3011, + "num_input_tokens_seen": 54640384, + "step": 17365 + }, + { + "epoch": 1.111964662953716, + "grad_norm": 6.499053478240967, + "learning_rate": 4.775356773750528e-06, + "loss": 0.3279, + "num_input_tokens_seen": 54655616, + "step": 17370 + }, + { + "epoch": 1.112284744894693, + "grad_norm": 23.570463180541992, + "learning_rate": 4.7751252701346255e-06, + "loss": 0.3209, + "num_input_tokens_seen": 54670656, + "step": 17375 + }, + { + "epoch": 1.1126048268356699, + "grad_norm": 34.90394973754883, + "learning_rate": 4.7748936529108154e-06, + "loss": 0.3393, + "num_input_tokens_seen": 54686464, + "step": 17380 + }, + { + "epoch": 1.1129249087766468, + "grad_norm": 34.88651657104492, + "learning_rate": 4.774661922090667e-06, + "loss": 0.4474, + "num_input_tokens_seen": 54703232, + "step": 17385 + }, + { + "epoch": 1.1132449907176236, + "grad_norm": 25.21185302734375, + "learning_rate": 4.774430077685751e-06, + "loss": 0.2873, + "num_input_tokens_seen": 54720512, + "step": 17390 + }, + { + "epoch": 1.1135650726586006, + "grad_norm": 31.275835037231445, + "learning_rate": 4.774198119707644e-06, + "loss": 0.3057, + "num_input_tokens_seen": 54737216, + "step": 17395 + }, + { + "epoch": 1.1138851545995774, + "grad_norm": 25.10533905029297, + "learning_rate": 4.7739660481679285e-06, + "loss": 0.2909, + "num_input_tokens_seen": 54752640, + "step": 17400 + }, + { + "epoch": 1.1142052365405544, + "grad_norm": 27.922386169433594, + "learning_rate": 4.773733863078193e-06, + "loss": 0.3757, + "num_input_tokens_seen": 54767936, + "step": 17405 + }, + { + "epoch": 1.1145253184815314, + "grad_norm": 17.737470626831055, + "learning_rate": 4.773501564450032e-06, + "loss": 0.2134, + "num_input_tokens_seen": 54783488, + "step": 17410 + }, + { + "epoch": 1.1148454004225081, + "grad_norm": 23.45142936706543, + "learning_rate": 4.773269152295045e-06, + "loss": 0.2501, + "num_input_tokens_seen": 54800192, + "step": 17415 + }, + { + "epoch": 1.1151654823634851, + "grad_norm": 34.22190856933594, + "learning_rate": 4.773036626624838e-06, + "loss": 0.3489, + "num_input_tokens_seen": 54815936, + "step": 17420 + }, + { + "epoch": 1.1154855643044619, + "grad_norm": 30.487060546875, + "learning_rate": 4.772803987451021e-06, + "loss": 0.277, + "num_input_tokens_seen": 54831488, + "step": 17425 + }, + { + "epoch": 1.1158056462454389, + "grad_norm": 29.468204498291016, + "learning_rate": 4.772571234785212e-06, + "loss": 0.2835, + "num_input_tokens_seen": 54846784, + "step": 17430 + }, + { + "epoch": 1.1161257281864156, + "grad_norm": 14.257030487060547, + "learning_rate": 4.7723383686390324e-06, + "loss": 0.3408, + "num_input_tokens_seen": 54863040, + "step": 17435 + }, + { + "epoch": 1.1164458101273926, + "grad_norm": 12.460914611816406, + "learning_rate": 4.772105389024111e-06, + "loss": 0.3138, + "num_input_tokens_seen": 54877696, + "step": 17440 + }, + { + "epoch": 1.1167658920683694, + "grad_norm": 12.706742286682129, + "learning_rate": 4.771872295952082e-06, + "loss": 0.3693, + "num_input_tokens_seen": 54892672, + "step": 17445 + }, + { + "epoch": 1.1170859740093464, + "grad_norm": 22.082717895507812, + "learning_rate": 4.771639089434584e-06, + "loss": 0.3598, + "num_input_tokens_seen": 54907712, + "step": 17450 + }, + { + "epoch": 1.1174060559503234, + "grad_norm": 33.280452728271484, + "learning_rate": 4.771405769483262e-06, + "loss": 0.2294, + "num_input_tokens_seen": 54923072, + "step": 17455 + }, + { + "epoch": 1.1177261378913002, + "grad_norm": 8.827515602111816, + "learning_rate": 4.771172336109768e-06, + "loss": 0.2831, + "num_input_tokens_seen": 54938240, + "step": 17460 + }, + { + "epoch": 1.1180462198322771, + "grad_norm": 36.385536193847656, + "learning_rate": 4.7709387893257565e-06, + "loss": 0.4827, + "num_input_tokens_seen": 54953984, + "step": 17465 + }, + { + "epoch": 1.118366301773254, + "grad_norm": 15.181402206420898, + "learning_rate": 4.770705129142892e-06, + "loss": 0.3294, + "num_input_tokens_seen": 54969600, + "step": 17470 + }, + { + "epoch": 1.118686383714231, + "grad_norm": 14.046625137329102, + "learning_rate": 4.77047135557284e-06, + "loss": 0.4694, + "num_input_tokens_seen": 54985216, + "step": 17475 + }, + { + "epoch": 1.1190064656552077, + "grad_norm": 31.73661231994629, + "learning_rate": 4.770237468627276e-06, + "loss": 0.3939, + "num_input_tokens_seen": 55000064, + "step": 17480 + }, + { + "epoch": 1.1193265475961847, + "grad_norm": 53.075294494628906, + "learning_rate": 4.770003468317877e-06, + "loss": 0.5221, + "num_input_tokens_seen": 55015808, + "step": 17485 + }, + { + "epoch": 1.1196466295371614, + "grad_norm": 24.33258056640625, + "learning_rate": 4.769769354656329e-06, + "loss": 0.287, + "num_input_tokens_seen": 55030848, + "step": 17490 + }, + { + "epoch": 1.1199667114781384, + "grad_norm": 11.482661247253418, + "learning_rate": 4.769535127654323e-06, + "loss": 0.258, + "num_input_tokens_seen": 55046080, + "step": 17495 + }, + { + "epoch": 1.1202867934191152, + "grad_norm": 8.351834297180176, + "learning_rate": 4.769300787323553e-06, + "loss": 0.2615, + "num_input_tokens_seen": 55062208, + "step": 17500 + }, + { + "epoch": 1.1206068753600922, + "grad_norm": 35.408287048339844, + "learning_rate": 4.769066333675723e-06, + "loss": 0.3679, + "num_input_tokens_seen": 55077696, + "step": 17505 + }, + { + "epoch": 1.1209269573010692, + "grad_norm": 13.543793678283691, + "learning_rate": 4.768831766722539e-06, + "loss": 0.2117, + "num_input_tokens_seen": 55093184, + "step": 17510 + }, + { + "epoch": 1.121247039242046, + "grad_norm": 32.927059173583984, + "learning_rate": 4.768597086475715e-06, + "loss": 0.2977, + "num_input_tokens_seen": 55109440, + "step": 17515 + }, + { + "epoch": 1.121567121183023, + "grad_norm": 37.97990798950195, + "learning_rate": 4.768362292946968e-06, + "loss": 0.2157, + "num_input_tokens_seen": 55123968, + "step": 17520 + }, + { + "epoch": 1.1218872031239997, + "grad_norm": 25.466615676879883, + "learning_rate": 4.768127386148025e-06, + "loss": 0.245, + "num_input_tokens_seen": 55140416, + "step": 17525 + }, + { + "epoch": 1.1222072850649767, + "grad_norm": 65.39518737792969, + "learning_rate": 4.767892366090614e-06, + "loss": 0.3131, + "num_input_tokens_seen": 55156224, + "step": 17530 + }, + { + "epoch": 1.1225273670059535, + "grad_norm": 13.063695907592773, + "learning_rate": 4.767657232786471e-06, + "loss": 0.4164, + "num_input_tokens_seen": 55171584, + "step": 17535 + }, + { + "epoch": 1.1228474489469304, + "grad_norm": 34.16926574707031, + "learning_rate": 4.767421986247338e-06, + "loss": 0.4182, + "num_input_tokens_seen": 55186624, + "step": 17540 + }, + { + "epoch": 1.1231675308879072, + "grad_norm": 19.4957218170166, + "learning_rate": 4.767186626484962e-06, + "loss": 0.5068, + "num_input_tokens_seen": 55202944, + "step": 17545 + }, + { + "epoch": 1.1234876128288842, + "grad_norm": 34.8454475402832, + "learning_rate": 4.766951153511095e-06, + "loss": 0.3982, + "num_input_tokens_seen": 55219520, + "step": 17550 + }, + { + "epoch": 1.123807694769861, + "grad_norm": 32.273895263671875, + "learning_rate": 4.766715567337494e-06, + "loss": 0.3083, + "num_input_tokens_seen": 55235008, + "step": 17555 + }, + { + "epoch": 1.124127776710838, + "grad_norm": 11.425713539123535, + "learning_rate": 4.766479867975926e-06, + "loss": 0.2455, + "num_input_tokens_seen": 55251008, + "step": 17560 + }, + { + "epoch": 1.124447858651815, + "grad_norm": 23.130998611450195, + "learning_rate": 4.766244055438159e-06, + "loss": 0.4543, + "num_input_tokens_seen": 55265728, + "step": 17565 + }, + { + "epoch": 1.1247679405927917, + "grad_norm": 17.309297561645508, + "learning_rate": 4.766008129735968e-06, + "loss": 0.3649, + "num_input_tokens_seen": 55281984, + "step": 17570 + }, + { + "epoch": 1.1250880225337687, + "grad_norm": 25.14297103881836, + "learning_rate": 4.765772090881135e-06, + "loss": 0.3141, + "num_input_tokens_seen": 55297152, + "step": 17575 + }, + { + "epoch": 1.1254081044747455, + "grad_norm": 36.22516632080078, + "learning_rate": 4.765535938885444e-06, + "loss": 0.2546, + "num_input_tokens_seen": 55312768, + "step": 17580 + }, + { + "epoch": 1.1257281864157225, + "grad_norm": 27.73340606689453, + "learning_rate": 4.76529967376069e-06, + "loss": 0.3107, + "num_input_tokens_seen": 55328832, + "step": 17585 + }, + { + "epoch": 1.1260482683566992, + "grad_norm": 30.010549545288086, + "learning_rate": 4.7650632955186695e-06, + "loss": 0.3397, + "num_input_tokens_seen": 55343808, + "step": 17590 + }, + { + "epoch": 1.1263683502976762, + "grad_norm": 16.07807159423828, + "learning_rate": 4.764826804171186e-06, + "loss": 0.2761, + "num_input_tokens_seen": 55359872, + "step": 17595 + }, + { + "epoch": 1.126688432238653, + "grad_norm": 6.437229156494141, + "learning_rate": 4.764590199730051e-06, + "loss": 0.3031, + "num_input_tokens_seen": 55374784, + "step": 17600 + }, + { + "epoch": 1.12700851417963, + "grad_norm": 11.581077575683594, + "learning_rate": 4.764353482207075e-06, + "loss": 0.3103, + "num_input_tokens_seen": 55391872, + "step": 17605 + }, + { + "epoch": 1.127328596120607, + "grad_norm": 19.82222557067871, + "learning_rate": 4.764116651614081e-06, + "loss": 0.4341, + "num_input_tokens_seen": 55407232, + "step": 17610 + }, + { + "epoch": 1.1276486780615838, + "grad_norm": 14.324782371520996, + "learning_rate": 4.763879707962895e-06, + "loss": 0.3801, + "num_input_tokens_seen": 55422336, + "step": 17615 + }, + { + "epoch": 1.1279687600025607, + "grad_norm": 23.450485229492188, + "learning_rate": 4.7636426512653486e-06, + "loss": 0.2858, + "num_input_tokens_seen": 55438528, + "step": 17620 + }, + { + "epoch": 1.1282888419435375, + "grad_norm": 44.402008056640625, + "learning_rate": 4.76340548153328e-06, + "loss": 0.4079, + "num_input_tokens_seen": 55457152, + "step": 17625 + }, + { + "epoch": 1.1286089238845145, + "grad_norm": 26.573993682861328, + "learning_rate": 4.7631681987785305e-06, + "loss": 0.2035, + "num_input_tokens_seen": 55473024, + "step": 17630 + }, + { + "epoch": 1.1289290058254913, + "grad_norm": 46.689491271972656, + "learning_rate": 4.762930803012949e-06, + "loss": 0.4639, + "num_input_tokens_seen": 55489024, + "step": 17635 + }, + { + "epoch": 1.1292490877664683, + "grad_norm": 28.549654006958008, + "learning_rate": 4.762693294248391e-06, + "loss": 0.3218, + "num_input_tokens_seen": 55504832, + "step": 17640 + }, + { + "epoch": 1.129569169707445, + "grad_norm": 14.269451141357422, + "learning_rate": 4.762455672496716e-06, + "loss": 0.2533, + "num_input_tokens_seen": 55520320, + "step": 17645 + }, + { + "epoch": 1.129889251648422, + "grad_norm": 10.978174209594727, + "learning_rate": 4.7622179377697895e-06, + "loss": 0.3186, + "num_input_tokens_seen": 55535360, + "step": 17650 + }, + { + "epoch": 1.1302093335893988, + "grad_norm": 27.75611114501953, + "learning_rate": 4.761980090079482e-06, + "loss": 0.4017, + "num_input_tokens_seen": 55550464, + "step": 17655 + }, + { + "epoch": 1.1305294155303758, + "grad_norm": 50.9631233215332, + "learning_rate": 4.761742129437671e-06, + "loss": 0.395, + "num_input_tokens_seen": 55565248, + "step": 17660 + }, + { + "epoch": 1.1308494974713525, + "grad_norm": 15.21599292755127, + "learning_rate": 4.761504055856239e-06, + "loss": 0.3577, + "num_input_tokens_seen": 55580672, + "step": 17665 + }, + { + "epoch": 1.1311695794123295, + "grad_norm": 21.788589477539062, + "learning_rate": 4.7612658693470745e-06, + "loss": 0.562, + "num_input_tokens_seen": 55596608, + "step": 17670 + }, + { + "epoch": 1.1314896613533065, + "grad_norm": 32.67219543457031, + "learning_rate": 4.761027569922072e-06, + "loss": 0.3617, + "num_input_tokens_seen": 55611904, + "step": 17675 + }, + { + "epoch": 1.1318097432942833, + "grad_norm": 20.884124755859375, + "learning_rate": 4.760789157593129e-06, + "loss": 0.3807, + "num_input_tokens_seen": 55627968, + "step": 17680 + }, + { + "epoch": 1.1321298252352603, + "grad_norm": 33.954063415527344, + "learning_rate": 4.760550632372151e-06, + "loss": 0.356, + "num_input_tokens_seen": 55644928, + "step": 17685 + }, + { + "epoch": 1.132449907176237, + "grad_norm": 19.28286361694336, + "learning_rate": 4.76031199427105e-06, + "loss": 0.4617, + "num_input_tokens_seen": 55660352, + "step": 17690 + }, + { + "epoch": 1.132769989117214, + "grad_norm": 22.2919921875, + "learning_rate": 4.760073243301741e-06, + "loss": 0.2799, + "num_input_tokens_seen": 55676672, + "step": 17695 + }, + { + "epoch": 1.1330900710581908, + "grad_norm": 12.675660133361816, + "learning_rate": 4.759834379476147e-06, + "loss": 0.3001, + "num_input_tokens_seen": 55691328, + "step": 17700 + }, + { + "epoch": 1.1334101529991678, + "grad_norm": 46.14750671386719, + "learning_rate": 4.759595402806194e-06, + "loss": 0.5102, + "num_input_tokens_seen": 55706752, + "step": 17705 + }, + { + "epoch": 1.1337302349401446, + "grad_norm": 21.049474716186523, + "learning_rate": 4.759356313303818e-06, + "loss": 0.3296, + "num_input_tokens_seen": 55722944, + "step": 17710 + }, + { + "epoch": 1.1340503168811216, + "grad_norm": 31.75536346435547, + "learning_rate": 4.759117110980955e-06, + "loss": 0.2977, + "num_input_tokens_seen": 55739776, + "step": 17715 + }, + { + "epoch": 1.1343703988220986, + "grad_norm": 24.16501808166504, + "learning_rate": 4.7588777958495515e-06, + "loss": 0.4228, + "num_input_tokens_seen": 55755712, + "step": 17720 + }, + { + "epoch": 1.1346904807630753, + "grad_norm": 28.304262161254883, + "learning_rate": 4.758638367921556e-06, + "loss": 0.3189, + "num_input_tokens_seen": 55772032, + "step": 17725 + }, + { + "epoch": 1.1350105627040523, + "grad_norm": 18.455989837646484, + "learning_rate": 4.758398827208927e-06, + "loss": 0.3287, + "num_input_tokens_seen": 55789248, + "step": 17730 + }, + { + "epoch": 1.135330644645029, + "grad_norm": 33.872230529785156, + "learning_rate": 4.758159173723623e-06, + "loss": 0.3426, + "num_input_tokens_seen": 55804224, + "step": 17735 + }, + { + "epoch": 1.135650726586006, + "grad_norm": 30.147846221923828, + "learning_rate": 4.757919407477613e-06, + "loss": 0.4571, + "num_input_tokens_seen": 55818816, + "step": 17740 + }, + { + "epoch": 1.1359708085269828, + "grad_norm": 12.410906791687012, + "learning_rate": 4.757679528482867e-06, + "loss": 0.3233, + "num_input_tokens_seen": 55834496, + "step": 17745 + }, + { + "epoch": 1.1362908904679598, + "grad_norm": 17.223140716552734, + "learning_rate": 4.757439536751367e-06, + "loss": 0.3372, + "num_input_tokens_seen": 55851200, + "step": 17750 + }, + { + "epoch": 1.1366109724089366, + "grad_norm": 18.592784881591797, + "learning_rate": 4.757199432295094e-06, + "loss": 0.2493, + "num_input_tokens_seen": 55867712, + "step": 17755 + }, + { + "epoch": 1.1369310543499136, + "grad_norm": 25.44942855834961, + "learning_rate": 4.756959215126039e-06, + "loss": 0.2704, + "num_input_tokens_seen": 55882496, + "step": 17760 + }, + { + "epoch": 1.1372511362908906, + "grad_norm": 23.140743255615234, + "learning_rate": 4.756718885256197e-06, + "loss": 0.4733, + "num_input_tokens_seen": 55897984, + "step": 17765 + }, + { + "epoch": 1.1375712182318674, + "grad_norm": 47.74863815307617, + "learning_rate": 4.7564784426975685e-06, + "loss": 0.3585, + "num_input_tokens_seen": 55913216, + "step": 17770 + }, + { + "epoch": 1.1378913001728443, + "grad_norm": 9.187592506408691, + "learning_rate": 4.756237887462161e-06, + "loss": 0.2183, + "num_input_tokens_seen": 55928320, + "step": 17775 + }, + { + "epoch": 1.1382113821138211, + "grad_norm": 16.336057662963867, + "learning_rate": 4.755997219561984e-06, + "loss": 0.31, + "num_input_tokens_seen": 55944576, + "step": 17780 + }, + { + "epoch": 1.138531464054798, + "grad_norm": 15.676985740661621, + "learning_rate": 4.7557564390090585e-06, + "loss": 0.2849, + "num_input_tokens_seen": 55960256, + "step": 17785 + }, + { + "epoch": 1.1388515459957749, + "grad_norm": 15.888855934143066, + "learning_rate": 4.755515545815406e-06, + "loss": 0.3296, + "num_input_tokens_seen": 55975872, + "step": 17790 + }, + { + "epoch": 1.1391716279367519, + "grad_norm": 26.52968978881836, + "learning_rate": 4.755274539993056e-06, + "loss": 0.3347, + "num_input_tokens_seen": 55992192, + "step": 17795 + }, + { + "epoch": 1.1394917098777286, + "grad_norm": 47.03224563598633, + "learning_rate": 4.755033421554042e-06, + "loss": 0.3867, + "num_input_tokens_seen": 56008064, + "step": 17800 + }, + { + "epoch": 1.1398117918187056, + "grad_norm": 35.44361877441406, + "learning_rate": 4.754792190510405e-06, + "loss": 0.4431, + "num_input_tokens_seen": 56024320, + "step": 17805 + }, + { + "epoch": 1.1401318737596824, + "grad_norm": 6.895241737365723, + "learning_rate": 4.7545508468741905e-06, + "loss": 0.4699, + "num_input_tokens_seen": 56042688, + "step": 17810 + }, + { + "epoch": 1.1404519557006594, + "grad_norm": 13.685063362121582, + "learning_rate": 4.754309390657451e-06, + "loss": 0.3477, + "num_input_tokens_seen": 56057280, + "step": 17815 + }, + { + "epoch": 1.1407720376416361, + "grad_norm": 22.50419044494629, + "learning_rate": 4.754067821872242e-06, + "loss": 0.2841, + "num_input_tokens_seen": 56073600, + "step": 17820 + }, + { + "epoch": 1.1410921195826131, + "grad_norm": 16.779268264770508, + "learning_rate": 4.753826140530628e-06, + "loss": 0.3666, + "num_input_tokens_seen": 56089344, + "step": 17825 + }, + { + "epoch": 1.1414122015235901, + "grad_norm": 15.772339820861816, + "learning_rate": 4.753584346644675e-06, + "loss": 0.344, + "num_input_tokens_seen": 56105856, + "step": 17830 + }, + { + "epoch": 1.141732283464567, + "grad_norm": 34.52896499633789, + "learning_rate": 4.753342440226459e-06, + "loss": 0.4252, + "num_input_tokens_seen": 56121344, + "step": 17835 + }, + { + "epoch": 1.142052365405544, + "grad_norm": 18.64345932006836, + "learning_rate": 4.753100421288059e-06, + "loss": 0.3486, + "num_input_tokens_seen": 56137984, + "step": 17840 + }, + { + "epoch": 1.1423724473465207, + "grad_norm": 12.242098808288574, + "learning_rate": 4.752858289841559e-06, + "loss": 0.3367, + "num_input_tokens_seen": 56153024, + "step": 17845 + }, + { + "epoch": 1.1426925292874976, + "grad_norm": 22.974958419799805, + "learning_rate": 4.7526160458990515e-06, + "loss": 0.2596, + "num_input_tokens_seen": 56169088, + "step": 17850 + }, + { + "epoch": 1.1430126112284744, + "grad_norm": 13.49227523803711, + "learning_rate": 4.752373689472633e-06, + "loss": 0.2969, + "num_input_tokens_seen": 56184576, + "step": 17855 + }, + { + "epoch": 1.1433326931694514, + "grad_norm": 18.258522033691406, + "learning_rate": 4.752131220574403e-06, + "loss": 0.33, + "num_input_tokens_seen": 56200320, + "step": 17860 + }, + { + "epoch": 1.1436527751104282, + "grad_norm": 28.924272537231445, + "learning_rate": 4.751888639216471e-06, + "loss": 0.3765, + "num_input_tokens_seen": 56215360, + "step": 17865 + }, + { + "epoch": 1.1439728570514052, + "grad_norm": 21.529407501220703, + "learning_rate": 4.751645945410951e-06, + "loss": 0.3007, + "num_input_tokens_seen": 56231360, + "step": 17870 + }, + { + "epoch": 1.1442929389923822, + "grad_norm": 20.60247039794922, + "learning_rate": 4.75140313916996e-06, + "loss": 0.2715, + "num_input_tokens_seen": 56247488, + "step": 17875 + }, + { + "epoch": 1.144613020933359, + "grad_norm": 31.41061019897461, + "learning_rate": 4.751160220505623e-06, + "loss": 0.4135, + "num_input_tokens_seen": 56262912, + "step": 17880 + }, + { + "epoch": 1.144933102874336, + "grad_norm": 18.071334838867188, + "learning_rate": 4.750917189430072e-06, + "loss": 0.3799, + "num_input_tokens_seen": 56278784, + "step": 17885 + }, + { + "epoch": 1.1452531848153127, + "grad_norm": 10.42909049987793, + "learning_rate": 4.75067404595544e-06, + "loss": 0.3399, + "num_input_tokens_seen": 56293312, + "step": 17890 + }, + { + "epoch": 1.1455732667562897, + "grad_norm": 42.47047424316406, + "learning_rate": 4.7504307900938694e-06, + "loss": 0.4924, + "num_input_tokens_seen": 56308352, + "step": 17895 + }, + { + "epoch": 1.1458933486972664, + "grad_norm": 40.166786193847656, + "learning_rate": 4.750187421857507e-06, + "loss": 0.2666, + "num_input_tokens_seen": 56323904, + "step": 17900 + }, + { + "epoch": 1.1462134306382434, + "grad_norm": 31.915149688720703, + "learning_rate": 4.749943941258507e-06, + "loss": 0.3543, + "num_input_tokens_seen": 56339328, + "step": 17905 + }, + { + "epoch": 1.1465335125792202, + "grad_norm": 13.406525611877441, + "learning_rate": 4.749700348309025e-06, + "loss": 0.3061, + "num_input_tokens_seen": 56355968, + "step": 17910 + }, + { + "epoch": 1.1468535945201972, + "grad_norm": 19.62630844116211, + "learning_rate": 4.749456643021226e-06, + "loss": 0.2885, + "num_input_tokens_seen": 56372544, + "step": 17915 + }, + { + "epoch": 1.1471736764611742, + "grad_norm": 10.509824752807617, + "learning_rate": 4.749212825407279e-06, + "loss": 0.2448, + "num_input_tokens_seen": 56388288, + "step": 17920 + }, + { + "epoch": 1.147493758402151, + "grad_norm": 18.390697479248047, + "learning_rate": 4.74896889547936e-06, + "loss": 0.3832, + "num_input_tokens_seen": 56404160, + "step": 17925 + }, + { + "epoch": 1.1478138403431277, + "grad_norm": 38.742679595947266, + "learning_rate": 4.748724853249648e-06, + "loss": 0.2836, + "num_input_tokens_seen": 56419264, + "step": 17930 + }, + { + "epoch": 1.1481339222841047, + "grad_norm": 11.571593284606934, + "learning_rate": 4.74848069873033e-06, + "loss": 0.3568, + "num_input_tokens_seen": 56435520, + "step": 17935 + }, + { + "epoch": 1.1484540042250817, + "grad_norm": 33.9571418762207, + "learning_rate": 4.748236431933598e-06, + "loss": 0.3754, + "num_input_tokens_seen": 56451264, + "step": 17940 + }, + { + "epoch": 1.1487740861660585, + "grad_norm": 30.484844207763672, + "learning_rate": 4.74799205287165e-06, + "loss": 0.2532, + "num_input_tokens_seen": 56466624, + "step": 17945 + }, + { + "epoch": 1.1490941681070355, + "grad_norm": 21.983688354492188, + "learning_rate": 4.747747561556687e-06, + "loss": 0.3551, + "num_input_tokens_seen": 56481856, + "step": 17950 + }, + { + "epoch": 1.1494142500480122, + "grad_norm": 21.4134521484375, + "learning_rate": 4.7475029580009205e-06, + "loss": 0.3701, + "num_input_tokens_seen": 56498048, + "step": 17955 + }, + { + "epoch": 1.1497343319889892, + "grad_norm": 14.97579574584961, + "learning_rate": 4.747258242216561e-06, + "loss": 0.329, + "num_input_tokens_seen": 56512960, + "step": 17960 + }, + { + "epoch": 1.150054413929966, + "grad_norm": 20.95728874206543, + "learning_rate": 4.747013414215832e-06, + "loss": 0.3293, + "num_input_tokens_seen": 56528256, + "step": 17965 + }, + { + "epoch": 1.150374495870943, + "grad_norm": 23.91965675354004, + "learning_rate": 4.746768474010957e-06, + "loss": 0.3477, + "num_input_tokens_seen": 56543552, + "step": 17970 + }, + { + "epoch": 1.1506945778119197, + "grad_norm": 33.84221649169922, + "learning_rate": 4.746523421614168e-06, + "loss": 0.4648, + "num_input_tokens_seen": 56559936, + "step": 17975 + }, + { + "epoch": 1.1510146597528967, + "grad_norm": 18.341445922851562, + "learning_rate": 4.7462782570377e-06, + "loss": 0.3169, + "num_input_tokens_seen": 56575872, + "step": 17980 + }, + { + "epoch": 1.1513347416938737, + "grad_norm": 54.16480255126953, + "learning_rate": 4.746032980293796e-06, + "loss": 0.2964, + "num_input_tokens_seen": 56591936, + "step": 17985 + }, + { + "epoch": 1.1516548236348505, + "grad_norm": 20.177122116088867, + "learning_rate": 4.745787591394705e-06, + "loss": 0.2924, + "num_input_tokens_seen": 56607040, + "step": 17990 + }, + { + "epoch": 1.1519749055758275, + "grad_norm": 19.55206871032715, + "learning_rate": 4.74554209035268e-06, + "loss": 0.2968, + "num_input_tokens_seen": 56623872, + "step": 17995 + }, + { + "epoch": 1.1522949875168043, + "grad_norm": 43.36288070678711, + "learning_rate": 4.745296477179978e-06, + "loss": 0.3764, + "num_input_tokens_seen": 56641280, + "step": 18000 + }, + { + "epoch": 1.1526150694577812, + "grad_norm": 16.5266056060791, + "learning_rate": 4.7450507518888665e-06, + "loss": 0.4219, + "num_input_tokens_seen": 56657408, + "step": 18005 + }, + { + "epoch": 1.152935151398758, + "grad_norm": 25.53843116760254, + "learning_rate": 4.744804914491613e-06, + "loss": 0.4024, + "num_input_tokens_seen": 56672896, + "step": 18010 + }, + { + "epoch": 1.153255233339735, + "grad_norm": 21.490962982177734, + "learning_rate": 4.744558965000497e-06, + "loss": 0.3147, + "num_input_tokens_seen": 56688960, + "step": 18015 + }, + { + "epoch": 1.1535753152807118, + "grad_norm": 13.09670352935791, + "learning_rate": 4.744312903427797e-06, + "loss": 0.3024, + "num_input_tokens_seen": 56704768, + "step": 18020 + }, + { + "epoch": 1.1538953972216888, + "grad_norm": 27.20735740661621, + "learning_rate": 4.7440667297858e-06, + "loss": 0.4075, + "num_input_tokens_seen": 56720576, + "step": 18025 + }, + { + "epoch": 1.1542154791626658, + "grad_norm": 29.780588150024414, + "learning_rate": 4.7438204440868e-06, + "loss": 0.311, + "num_input_tokens_seen": 56737344, + "step": 18030 + }, + { + "epoch": 1.1545355611036425, + "grad_norm": 52.991676330566406, + "learning_rate": 4.743574046343095e-06, + "loss": 0.3828, + "num_input_tokens_seen": 56752128, + "step": 18035 + }, + { + "epoch": 1.1548556430446195, + "grad_norm": 10.646166801452637, + "learning_rate": 4.743327536566988e-06, + "loss": 0.1886, + "num_input_tokens_seen": 56767424, + "step": 18040 + }, + { + "epoch": 1.1551757249855963, + "grad_norm": 27.502601623535156, + "learning_rate": 4.7430809147707885e-06, + "loss": 0.2718, + "num_input_tokens_seen": 56783552, + "step": 18045 + }, + { + "epoch": 1.1554958069265733, + "grad_norm": 31.335416793823242, + "learning_rate": 4.742834180966812e-06, + "loss": 0.4257, + "num_input_tokens_seen": 56798528, + "step": 18050 + }, + { + "epoch": 1.15581588886755, + "grad_norm": 18.986495971679688, + "learning_rate": 4.74258733516738e-06, + "loss": 0.3652, + "num_input_tokens_seen": 56815808, + "step": 18055 + }, + { + "epoch": 1.156135970808527, + "grad_norm": 19.936918258666992, + "learning_rate": 4.742340377384816e-06, + "loss": 0.2541, + "num_input_tokens_seen": 56831552, + "step": 18060 + }, + { + "epoch": 1.1564560527495038, + "grad_norm": 9.677896499633789, + "learning_rate": 4.742093307631456e-06, + "loss": 0.2047, + "num_input_tokens_seen": 56846656, + "step": 18065 + }, + { + "epoch": 1.1567761346904808, + "grad_norm": 12.084104537963867, + "learning_rate": 4.7418461259196326e-06, + "loss": 0.1776, + "num_input_tokens_seen": 56862656, + "step": 18070 + }, + { + "epoch": 1.1570962166314576, + "grad_norm": 21.71306037902832, + "learning_rate": 4.741598832261692e-06, + "loss": 0.3047, + "num_input_tokens_seen": 56878144, + "step": 18075 + }, + { + "epoch": 1.1574162985724346, + "grad_norm": 22.70412254333496, + "learning_rate": 4.741351426669982e-06, + "loss": 0.2693, + "num_input_tokens_seen": 56895808, + "step": 18080 + }, + { + "epoch": 1.1577363805134113, + "grad_norm": 21.738567352294922, + "learning_rate": 4.741103909156856e-06, + "loss": 0.3429, + "num_input_tokens_seen": 56910784, + "step": 18085 + }, + { + "epoch": 1.1580564624543883, + "grad_norm": 18.365835189819336, + "learning_rate": 4.740856279734674e-06, + "loss": 0.2965, + "num_input_tokens_seen": 56928000, + "step": 18090 + }, + { + "epoch": 1.1583765443953653, + "grad_norm": 52.03500747680664, + "learning_rate": 4.740608538415802e-06, + "loss": 0.2712, + "num_input_tokens_seen": 56943616, + "step": 18095 + }, + { + "epoch": 1.158696626336342, + "grad_norm": 79.30585479736328, + "learning_rate": 4.74036068521261e-06, + "loss": 0.4541, + "num_input_tokens_seen": 56959424, + "step": 18100 + }, + { + "epoch": 1.159016708277319, + "grad_norm": 61.55967712402344, + "learning_rate": 4.740112720137476e-06, + "loss": 0.4573, + "num_input_tokens_seen": 56977152, + "step": 18105 + }, + { + "epoch": 1.1593367902182958, + "grad_norm": 29.349233627319336, + "learning_rate": 4.73986464320278e-06, + "loss": 0.3981, + "num_input_tokens_seen": 56993024, + "step": 18110 + }, + { + "epoch": 1.1596568721592728, + "grad_norm": 19.499469757080078, + "learning_rate": 4.739616454420912e-06, + "loss": 0.2842, + "num_input_tokens_seen": 57008320, + "step": 18115 + }, + { + "epoch": 1.1599769541002496, + "grad_norm": 41.784400939941406, + "learning_rate": 4.739368153804264e-06, + "loss": 0.3857, + "num_input_tokens_seen": 57024256, + "step": 18120 + }, + { + "epoch": 1.1602970360412266, + "grad_norm": 13.503829002380371, + "learning_rate": 4.739119741365233e-06, + "loss": 0.222, + "num_input_tokens_seen": 57039616, + "step": 18125 + }, + { + "epoch": 1.1606171179822034, + "grad_norm": 16.161989212036133, + "learning_rate": 4.738871217116226e-06, + "loss": 0.3577, + "num_input_tokens_seen": 57055744, + "step": 18130 + }, + { + "epoch": 1.1609371999231803, + "grad_norm": 36.94822311401367, + "learning_rate": 4.7386225810696535e-06, + "loss": 0.3976, + "num_input_tokens_seen": 57070720, + "step": 18135 + }, + { + "epoch": 1.1612572818641573, + "grad_norm": 19.674522399902344, + "learning_rate": 4.7383738332379295e-06, + "loss": 0.3639, + "num_input_tokens_seen": 57086080, + "step": 18140 + }, + { + "epoch": 1.161577363805134, + "grad_norm": 14.510128021240234, + "learning_rate": 4.738124973633474e-06, + "loss": 0.3494, + "num_input_tokens_seen": 57101184, + "step": 18145 + }, + { + "epoch": 1.161897445746111, + "grad_norm": 26.0491943359375, + "learning_rate": 4.737876002268716e-06, + "loss": 0.3558, + "num_input_tokens_seen": 57119680, + "step": 18150 + }, + { + "epoch": 1.1622175276870879, + "grad_norm": 15.298977851867676, + "learning_rate": 4.737626919156089e-06, + "loss": 0.2029, + "num_input_tokens_seen": 57135808, + "step": 18155 + }, + { + "epoch": 1.1625376096280648, + "grad_norm": 24.49024772644043, + "learning_rate": 4.737377724308027e-06, + "loss": 0.4155, + "num_input_tokens_seen": 57151296, + "step": 18160 + }, + { + "epoch": 1.1628576915690416, + "grad_norm": 29.14506721496582, + "learning_rate": 4.737128417736976e-06, + "loss": 0.3585, + "num_input_tokens_seen": 57166656, + "step": 18165 + }, + { + "epoch": 1.1631777735100186, + "grad_norm": 20.330495834350586, + "learning_rate": 4.7368789994553845e-06, + "loss": 0.343, + "num_input_tokens_seen": 57184320, + "step": 18170 + }, + { + "epoch": 1.1634978554509954, + "grad_norm": 19.83164405822754, + "learning_rate": 4.736629469475708e-06, + "loss": 0.2874, + "num_input_tokens_seen": 57199360, + "step": 18175 + }, + { + "epoch": 1.1638179373919724, + "grad_norm": 36.86490249633789, + "learning_rate": 4.7363798278104046e-06, + "loss": 0.2837, + "num_input_tokens_seen": 57214528, + "step": 18180 + }, + { + "epoch": 1.1641380193329494, + "grad_norm": 34.39811325073242, + "learning_rate": 4.736130074471942e-06, + "loss": 0.4724, + "num_input_tokens_seen": 57230784, + "step": 18185 + }, + { + "epoch": 1.1644581012739261, + "grad_norm": 10.29695987701416, + "learning_rate": 4.735880209472792e-06, + "loss": 0.253, + "num_input_tokens_seen": 57246656, + "step": 18190 + }, + { + "epoch": 1.164778183214903, + "grad_norm": 15.414774894714355, + "learning_rate": 4.73563023282543e-06, + "loss": 0.282, + "num_input_tokens_seen": 57261696, + "step": 18195 + }, + { + "epoch": 1.1650982651558799, + "grad_norm": 21.555538177490234, + "learning_rate": 4.735380144542339e-06, + "loss": 0.4472, + "num_input_tokens_seen": 57277824, + "step": 18200 + }, + { + "epoch": 1.1654183470968569, + "grad_norm": 29.456661224365234, + "learning_rate": 4.7351299446360065e-06, + "loss": 0.3102, + "num_input_tokens_seen": 57293440, + "step": 18205 + }, + { + "epoch": 1.1657384290378336, + "grad_norm": 37.15040969848633, + "learning_rate": 4.734879633118928e-06, + "loss": 0.2573, + "num_input_tokens_seen": 57308928, + "step": 18210 + }, + { + "epoch": 1.1660585109788106, + "grad_norm": 32.64241409301758, + "learning_rate": 4.734629210003601e-06, + "loss": 0.2893, + "num_input_tokens_seen": 57325056, + "step": 18215 + }, + { + "epoch": 1.1663785929197874, + "grad_norm": 16.974445343017578, + "learning_rate": 4.734378675302532e-06, + "loss": 0.3903, + "num_input_tokens_seen": 57340608, + "step": 18220 + }, + { + "epoch": 1.1666986748607644, + "grad_norm": 42.05740737915039, + "learning_rate": 4.73412802902823e-06, + "loss": 0.2948, + "num_input_tokens_seen": 57355520, + "step": 18225 + }, + { + "epoch": 1.1670187568017412, + "grad_norm": 11.584325790405273, + "learning_rate": 4.733877271193211e-06, + "loss": 0.2962, + "num_input_tokens_seen": 57371328, + "step": 18230 + }, + { + "epoch": 1.1673388387427182, + "grad_norm": 30.027908325195312, + "learning_rate": 4.733626401809997e-06, + "loss": 0.3113, + "num_input_tokens_seen": 57389056, + "step": 18235 + }, + { + "epoch": 1.167658920683695, + "grad_norm": 15.775495529174805, + "learning_rate": 4.733375420891115e-06, + "loss": 0.195, + "num_input_tokens_seen": 57403456, + "step": 18240 + }, + { + "epoch": 1.167979002624672, + "grad_norm": 30.460636138916016, + "learning_rate": 4.733124328449098e-06, + "loss": 0.3968, + "num_input_tokens_seen": 57419072, + "step": 18245 + }, + { + "epoch": 1.168299084565649, + "grad_norm": 39.179710388183594, + "learning_rate": 4.732873124496483e-06, + "loss": 0.2418, + "num_input_tokens_seen": 57434624, + "step": 18250 + }, + { + "epoch": 1.1686191665066257, + "grad_norm": 13.252092361450195, + "learning_rate": 4.732621809045817e-06, + "loss": 0.3917, + "num_input_tokens_seen": 57451008, + "step": 18255 + }, + { + "epoch": 1.1689392484476027, + "grad_norm": 27.368871688842773, + "learning_rate": 4.732370382109644e-06, + "loss": 0.4322, + "num_input_tokens_seen": 57467264, + "step": 18260 + }, + { + "epoch": 1.1692593303885794, + "grad_norm": 32.48051071166992, + "learning_rate": 4.732118843700525e-06, + "loss": 0.465, + "num_input_tokens_seen": 57483072, + "step": 18265 + }, + { + "epoch": 1.1695794123295564, + "grad_norm": 20.697063446044922, + "learning_rate": 4.731867193831016e-06, + "loss": 0.2412, + "num_input_tokens_seen": 57498240, + "step": 18270 + }, + { + "epoch": 1.1698994942705332, + "grad_norm": 43.83451461791992, + "learning_rate": 4.7316154325136855e-06, + "loss": 0.3972, + "num_input_tokens_seen": 57514112, + "step": 18275 + }, + { + "epoch": 1.1702195762115102, + "grad_norm": 10.918686866760254, + "learning_rate": 4.731363559761104e-06, + "loss": 0.3429, + "num_input_tokens_seen": 57530496, + "step": 18280 + }, + { + "epoch": 1.170539658152487, + "grad_norm": 44.730709075927734, + "learning_rate": 4.731111575585849e-06, + "loss": 0.3387, + "num_input_tokens_seen": 57547648, + "step": 18285 + }, + { + "epoch": 1.170859740093464, + "grad_norm": 48.052642822265625, + "learning_rate": 4.730859480000503e-06, + "loss": 0.2618, + "num_input_tokens_seen": 57563136, + "step": 18290 + }, + { + "epoch": 1.171179822034441, + "grad_norm": 8.025613784790039, + "learning_rate": 4.730607273017655e-06, + "loss": 0.2651, + "num_input_tokens_seen": 57578880, + "step": 18295 + }, + { + "epoch": 1.1714999039754177, + "grad_norm": 42.159881591796875, + "learning_rate": 4.730354954649899e-06, + "loss": 0.3716, + "num_input_tokens_seen": 57594624, + "step": 18300 + }, + { + "epoch": 1.1718199859163947, + "grad_norm": 31.624736785888672, + "learning_rate": 4.730102524909835e-06, + "loss": 0.2422, + "num_input_tokens_seen": 57609856, + "step": 18305 + }, + { + "epoch": 1.1721400678573715, + "grad_norm": 11.43880844116211, + "learning_rate": 4.729849983810066e-06, + "loss": 0.2457, + "num_input_tokens_seen": 57624896, + "step": 18310 + }, + { + "epoch": 1.1724601497983484, + "grad_norm": 35.68716812133789, + "learning_rate": 4.729597331363203e-06, + "loss": 0.396, + "num_input_tokens_seen": 57639744, + "step": 18315 + }, + { + "epoch": 1.1727802317393252, + "grad_norm": 19.514575958251953, + "learning_rate": 4.7293445675818635e-06, + "loss": 0.2308, + "num_input_tokens_seen": 57654912, + "step": 18320 + }, + { + "epoch": 1.1731003136803022, + "grad_norm": 14.364127159118652, + "learning_rate": 4.729091692478668e-06, + "loss": 0.2958, + "num_input_tokens_seen": 57669760, + "step": 18325 + }, + { + "epoch": 1.173420395621279, + "grad_norm": 10.304557800292969, + "learning_rate": 4.728838706066245e-06, + "loss": 0.3218, + "num_input_tokens_seen": 57685888, + "step": 18330 + }, + { + "epoch": 1.173740477562256, + "grad_norm": 33.08051681518555, + "learning_rate": 4.728585608357226e-06, + "loss": 0.2418, + "num_input_tokens_seen": 57702080, + "step": 18335 + }, + { + "epoch": 1.1740605595032327, + "grad_norm": 24.3721981048584, + "learning_rate": 4.728332399364251e-06, + "loss": 0.3131, + "num_input_tokens_seen": 57716992, + "step": 18340 + }, + { + "epoch": 1.1743806414442097, + "grad_norm": 36.07631301879883, + "learning_rate": 4.728079079099962e-06, + "loss": 0.2745, + "num_input_tokens_seen": 57733248, + "step": 18345 + }, + { + "epoch": 1.1747007233851865, + "grad_norm": 38.79359817504883, + "learning_rate": 4.727825647577009e-06, + "loss": 0.2247, + "num_input_tokens_seen": 57748288, + "step": 18350 + }, + { + "epoch": 1.1750208053261635, + "grad_norm": 21.174264907836914, + "learning_rate": 4.727572104808049e-06, + "loss": 0.3643, + "num_input_tokens_seen": 57764480, + "step": 18355 + }, + { + "epoch": 1.1753408872671405, + "grad_norm": 29.208715438842773, + "learning_rate": 4.72731845080574e-06, + "loss": 0.536, + "num_input_tokens_seen": 57780160, + "step": 18360 + }, + { + "epoch": 1.1756609692081172, + "grad_norm": 22.50992774963379, + "learning_rate": 4.72706468558275e-06, + "loss": 0.331, + "num_input_tokens_seen": 57794560, + "step": 18365 + }, + { + "epoch": 1.1759810511490942, + "grad_norm": 23.504533767700195, + "learning_rate": 4.726810809151749e-06, + "loss": 0.4336, + "num_input_tokens_seen": 57810176, + "step": 18370 + }, + { + "epoch": 1.176301133090071, + "grad_norm": 26.033355712890625, + "learning_rate": 4.726556821525415e-06, + "loss": 0.4832, + "num_input_tokens_seen": 57826240, + "step": 18375 + }, + { + "epoch": 1.176621215031048, + "grad_norm": 16.434282302856445, + "learning_rate": 4.726302722716433e-06, + "loss": 0.274, + "num_input_tokens_seen": 57842560, + "step": 18380 + }, + { + "epoch": 1.1769412969720248, + "grad_norm": 9.878183364868164, + "learning_rate": 4.726048512737488e-06, + "loss": 0.3844, + "num_input_tokens_seen": 57858176, + "step": 18385 + }, + { + "epoch": 1.1772613789130018, + "grad_norm": 20.873783111572266, + "learning_rate": 4.725794191601275e-06, + "loss": 0.2736, + "num_input_tokens_seen": 57874432, + "step": 18390 + }, + { + "epoch": 1.1775814608539785, + "grad_norm": 32.77096176147461, + "learning_rate": 4.725539759320494e-06, + "loss": 0.3454, + "num_input_tokens_seen": 57890496, + "step": 18395 + }, + { + "epoch": 1.1779015427949555, + "grad_norm": 22.18227767944336, + "learning_rate": 4.72528521590785e-06, + "loss": 0.2779, + "num_input_tokens_seen": 57905856, + "step": 18400 + }, + { + "epoch": 1.1782216247359325, + "grad_norm": 23.856708526611328, + "learning_rate": 4.725030561376054e-06, + "loss": 0.4571, + "num_input_tokens_seen": 57922112, + "step": 18405 + }, + { + "epoch": 1.1785417066769093, + "grad_norm": 46.64455795288086, + "learning_rate": 4.724775795737821e-06, + "loss": 0.3497, + "num_input_tokens_seen": 57937152, + "step": 18410 + }, + { + "epoch": 1.1788617886178863, + "grad_norm": 9.414470672607422, + "learning_rate": 4.724520919005873e-06, + "loss": 0.2677, + "num_input_tokens_seen": 57952832, + "step": 18415 + }, + { + "epoch": 1.179181870558863, + "grad_norm": 34.32206726074219, + "learning_rate": 4.724265931192938e-06, + "loss": 0.2739, + "num_input_tokens_seen": 57968448, + "step": 18420 + }, + { + "epoch": 1.17950195249984, + "grad_norm": 24.75952911376953, + "learning_rate": 4.724010832311747e-06, + "loss": 0.333, + "num_input_tokens_seen": 57984192, + "step": 18425 + }, + { + "epoch": 1.1798220344408168, + "grad_norm": 14.251849174499512, + "learning_rate": 4.723755622375041e-06, + "loss": 0.2662, + "num_input_tokens_seen": 57999744, + "step": 18430 + }, + { + "epoch": 1.1801421163817938, + "grad_norm": 48.04781723022461, + "learning_rate": 4.723500301395562e-06, + "loss": 0.3882, + "num_input_tokens_seen": 58014976, + "step": 18435 + }, + { + "epoch": 1.1804621983227706, + "grad_norm": 32.248416900634766, + "learning_rate": 4.723244869386059e-06, + "loss": 0.3419, + "num_input_tokens_seen": 58030464, + "step": 18440 + }, + { + "epoch": 1.1807822802637475, + "grad_norm": 31.138734817504883, + "learning_rate": 4.7229893263592885e-06, + "loss": 0.3195, + "num_input_tokens_seen": 58045760, + "step": 18445 + }, + { + "epoch": 1.1811023622047245, + "grad_norm": 9.666783332824707, + "learning_rate": 4.722733672328009e-06, + "loss": 0.276, + "num_input_tokens_seen": 58061184, + "step": 18450 + }, + { + "epoch": 1.1814224441457013, + "grad_norm": 38.26860427856445, + "learning_rate": 4.722477907304988e-06, + "loss": 0.2063, + "num_input_tokens_seen": 58076288, + "step": 18455 + }, + { + "epoch": 1.181742526086678, + "grad_norm": 6.198585510253906, + "learning_rate": 4.722222031302998e-06, + "loss": 0.338, + "num_input_tokens_seen": 58091072, + "step": 18460 + }, + { + "epoch": 1.182062608027655, + "grad_norm": 26.80122184753418, + "learning_rate": 4.721966044334814e-06, + "loss": 0.415, + "num_input_tokens_seen": 58105984, + "step": 18465 + }, + { + "epoch": 1.182382689968632, + "grad_norm": 15.703590393066406, + "learning_rate": 4.72170994641322e-06, + "loss": 0.3179, + "num_input_tokens_seen": 58122176, + "step": 18470 + }, + { + "epoch": 1.1827027719096088, + "grad_norm": 13.793859481811523, + "learning_rate": 4.721453737551003e-06, + "loss": 0.3703, + "num_input_tokens_seen": 58137280, + "step": 18475 + }, + { + "epoch": 1.1830228538505858, + "grad_norm": 40.98777770996094, + "learning_rate": 4.721197417760958e-06, + "loss": 0.2988, + "num_input_tokens_seen": 58153216, + "step": 18480 + }, + { + "epoch": 1.1833429357915626, + "grad_norm": 10.4723482131958, + "learning_rate": 4.720940987055884e-06, + "loss": 0.3089, + "num_input_tokens_seen": 58170304, + "step": 18485 + }, + { + "epoch": 1.1836630177325396, + "grad_norm": 27.42559814453125, + "learning_rate": 4.720684445448585e-06, + "loss": 0.2705, + "num_input_tokens_seen": 58187328, + "step": 18490 + }, + { + "epoch": 1.1839830996735163, + "grad_norm": 35.62530517578125, + "learning_rate": 4.7204277929518725e-06, + "loss": 0.3306, + "num_input_tokens_seen": 58203008, + "step": 18495 + }, + { + "epoch": 1.1843031816144933, + "grad_norm": 26.57889747619629, + "learning_rate": 4.720171029578561e-06, + "loss": 0.2484, + "num_input_tokens_seen": 58220288, + "step": 18500 + }, + { + "epoch": 1.18462326355547, + "grad_norm": 25.909589767456055, + "learning_rate": 4.719914155341473e-06, + "loss": 0.4562, + "num_input_tokens_seen": 58235264, + "step": 18505 + }, + { + "epoch": 1.184943345496447, + "grad_norm": 31.718360900878906, + "learning_rate": 4.719657170253436e-06, + "loss": 0.367, + "num_input_tokens_seen": 58250176, + "step": 18510 + }, + { + "epoch": 1.185263427437424, + "grad_norm": 39.758384704589844, + "learning_rate": 4.71940007432728e-06, + "loss": 0.3158, + "num_input_tokens_seen": 58266432, + "step": 18515 + }, + { + "epoch": 1.1855835093784008, + "grad_norm": 30.775436401367188, + "learning_rate": 4.719142867575847e-06, + "loss": 0.347, + "num_input_tokens_seen": 58282368, + "step": 18520 + }, + { + "epoch": 1.1859035913193778, + "grad_norm": 8.909859657287598, + "learning_rate": 4.718885550011977e-06, + "loss": 0.1971, + "num_input_tokens_seen": 58297856, + "step": 18525 + }, + { + "epoch": 1.1862236732603546, + "grad_norm": 33.578773498535156, + "learning_rate": 4.71862812164852e-06, + "loss": 0.372, + "num_input_tokens_seen": 58313344, + "step": 18530 + }, + { + "epoch": 1.1865437552013316, + "grad_norm": 29.584659576416016, + "learning_rate": 4.718370582498331e-06, + "loss": 0.38, + "num_input_tokens_seen": 58329024, + "step": 18535 + }, + { + "epoch": 1.1868638371423084, + "grad_norm": 55.64480972290039, + "learning_rate": 4.718112932574271e-06, + "loss": 0.5988, + "num_input_tokens_seen": 58344320, + "step": 18540 + }, + { + "epoch": 1.1871839190832854, + "grad_norm": 9.809324264526367, + "learning_rate": 4.7178551718892045e-06, + "loss": 0.3548, + "num_input_tokens_seen": 58362432, + "step": 18545 + }, + { + "epoch": 1.1875040010242621, + "grad_norm": 47.05046463012695, + "learning_rate": 4.717597300456003e-06, + "loss": 0.3206, + "num_input_tokens_seen": 58377536, + "step": 18550 + }, + { + "epoch": 1.1878240829652391, + "grad_norm": 62.21199035644531, + "learning_rate": 4.717339318287543e-06, + "loss": 0.4159, + "num_input_tokens_seen": 58393088, + "step": 18555 + }, + { + "epoch": 1.188144164906216, + "grad_norm": 12.271576881408691, + "learning_rate": 4.717081225396708e-06, + "loss": 0.5395, + "num_input_tokens_seen": 58408000, + "step": 18560 + }, + { + "epoch": 1.1884642468471929, + "grad_norm": 22.90167236328125, + "learning_rate": 4.716823021796385e-06, + "loss": 0.2805, + "num_input_tokens_seen": 58423168, + "step": 18565 + }, + { + "epoch": 1.1887843287881699, + "grad_norm": 37.14637756347656, + "learning_rate": 4.716564707499467e-06, + "loss": 0.4687, + "num_input_tokens_seen": 58437952, + "step": 18570 + }, + { + "epoch": 1.1891044107291466, + "grad_norm": 27.129735946655273, + "learning_rate": 4.716306282518852e-06, + "loss": 0.2759, + "num_input_tokens_seen": 58454784, + "step": 18575 + }, + { + "epoch": 1.1894244926701236, + "grad_norm": 25.98549461364746, + "learning_rate": 4.716047746867447e-06, + "loss": 0.4194, + "num_input_tokens_seen": 58470592, + "step": 18580 + }, + { + "epoch": 1.1897445746111004, + "grad_norm": 20.43709945678711, + "learning_rate": 4.7157891005581605e-06, + "loss": 0.2592, + "num_input_tokens_seen": 58486080, + "step": 18585 + }, + { + "epoch": 1.1900646565520774, + "grad_norm": 39.80817413330078, + "learning_rate": 4.715530343603907e-06, + "loss": 0.4028, + "num_input_tokens_seen": 58502016, + "step": 18590 + }, + { + "epoch": 1.1903847384930542, + "grad_norm": 32.82676315307617, + "learning_rate": 4.71527147601761e-06, + "loss": 0.372, + "num_input_tokens_seen": 58521280, + "step": 18595 + }, + { + "epoch": 1.1907048204340311, + "grad_norm": 72.57610321044922, + "learning_rate": 4.7150124978121924e-06, + "loss": 0.5035, + "num_input_tokens_seen": 58536960, + "step": 18600 + }, + { + "epoch": 1.191024902375008, + "grad_norm": 23.30989646911621, + "learning_rate": 4.7147534090005896e-06, + "loss": 0.3701, + "num_input_tokens_seen": 58552448, + "step": 18605 + }, + { + "epoch": 1.191344984315985, + "grad_norm": 20.721412658691406, + "learning_rate": 4.714494209595738e-06, + "loss": 0.4091, + "num_input_tokens_seen": 58567424, + "step": 18610 + }, + { + "epoch": 1.1916650662569617, + "grad_norm": 21.73589515686035, + "learning_rate": 4.714234899610579e-06, + "loss": 0.2487, + "num_input_tokens_seen": 58583296, + "step": 18615 + }, + { + "epoch": 1.1919851481979387, + "grad_norm": 12.265676498413086, + "learning_rate": 4.713975479058064e-06, + "loss": 0.2477, + "num_input_tokens_seen": 58598976, + "step": 18620 + }, + { + "epoch": 1.1923052301389157, + "grad_norm": 31.363983154296875, + "learning_rate": 4.713715947951145e-06, + "loss": 0.3458, + "num_input_tokens_seen": 58615872, + "step": 18625 + }, + { + "epoch": 1.1926253120798924, + "grad_norm": 21.557554244995117, + "learning_rate": 4.7134563063027825e-06, + "loss": 0.2194, + "num_input_tokens_seen": 58630912, + "step": 18630 + }, + { + "epoch": 1.1929453940208694, + "grad_norm": 30.480670928955078, + "learning_rate": 4.713196554125942e-06, + "loss": 0.4626, + "num_input_tokens_seen": 58646144, + "step": 18635 + }, + { + "epoch": 1.1932654759618462, + "grad_norm": 31.754995346069336, + "learning_rate": 4.712936691433593e-06, + "loss": 0.3525, + "num_input_tokens_seen": 58661248, + "step": 18640 + }, + { + "epoch": 1.1935855579028232, + "grad_norm": 25.952096939086914, + "learning_rate": 4.712676718238714e-06, + "loss": 0.3118, + "num_input_tokens_seen": 58677824, + "step": 18645 + }, + { + "epoch": 1.1939056398438, + "grad_norm": 20.935260772705078, + "learning_rate": 4.712416634554283e-06, + "loss": 0.253, + "num_input_tokens_seen": 58692544, + "step": 18650 + }, + { + "epoch": 1.194225721784777, + "grad_norm": 25.57931137084961, + "learning_rate": 4.71215644039329e-06, + "loss": 0.311, + "num_input_tokens_seen": 58709248, + "step": 18655 + }, + { + "epoch": 1.1945458037257537, + "grad_norm": 45.26525115966797, + "learning_rate": 4.711896135768728e-06, + "loss": 0.306, + "num_input_tokens_seen": 58724416, + "step": 18660 + }, + { + "epoch": 1.1948658856667307, + "grad_norm": 21.907739639282227, + "learning_rate": 4.711635720693593e-06, + "loss": 0.326, + "num_input_tokens_seen": 58739392, + "step": 18665 + }, + { + "epoch": 1.1951859676077077, + "grad_norm": 18.516231536865234, + "learning_rate": 4.711375195180891e-06, + "loss": 0.298, + "num_input_tokens_seen": 58755072, + "step": 18670 + }, + { + "epoch": 1.1955060495486844, + "grad_norm": 27.7014102935791, + "learning_rate": 4.71111455924363e-06, + "loss": 0.415, + "num_input_tokens_seen": 58771840, + "step": 18675 + }, + { + "epoch": 1.1958261314896614, + "grad_norm": 15.65793514251709, + "learning_rate": 4.710853812894825e-06, + "loss": 0.3134, + "num_input_tokens_seen": 58787648, + "step": 18680 + }, + { + "epoch": 1.1961462134306382, + "grad_norm": 37.254066467285156, + "learning_rate": 4.710592956147497e-06, + "loss": 0.1993, + "num_input_tokens_seen": 58803520, + "step": 18685 + }, + { + "epoch": 1.1964662953716152, + "grad_norm": 37.50849914550781, + "learning_rate": 4.710331989014671e-06, + "loss": 0.3489, + "num_input_tokens_seen": 58820352, + "step": 18690 + }, + { + "epoch": 1.196786377312592, + "grad_norm": 13.680181503295898, + "learning_rate": 4.710070911509379e-06, + "loss": 0.3614, + "num_input_tokens_seen": 58836032, + "step": 18695 + }, + { + "epoch": 1.197106459253569, + "grad_norm": 24.067977905273438, + "learning_rate": 4.709809723644657e-06, + "loss": 0.3157, + "num_input_tokens_seen": 58852096, + "step": 18700 + }, + { + "epoch": 1.1974265411945457, + "grad_norm": 12.619222640991211, + "learning_rate": 4.709548425433548e-06, + "loss": 0.3826, + "num_input_tokens_seen": 58867776, + "step": 18705 + }, + { + "epoch": 1.1977466231355227, + "grad_norm": 12.64548110961914, + "learning_rate": 4.7092870168891e-06, + "loss": 0.3949, + "num_input_tokens_seen": 58882112, + "step": 18710 + }, + { + "epoch": 1.1980667050764997, + "grad_norm": 17.309431076049805, + "learning_rate": 4.709025498024367e-06, + "loss": 0.3101, + "num_input_tokens_seen": 58897792, + "step": 18715 + }, + { + "epoch": 1.1983867870174765, + "grad_norm": 20.689693450927734, + "learning_rate": 4.708763868852405e-06, + "loss": 0.3016, + "num_input_tokens_seen": 58912576, + "step": 18720 + }, + { + "epoch": 1.1987068689584532, + "grad_norm": 26.340726852416992, + "learning_rate": 4.708502129386282e-06, + "loss": 0.2631, + "num_input_tokens_seen": 58928704, + "step": 18725 + }, + { + "epoch": 1.1990269508994302, + "grad_norm": 35.72788619995117, + "learning_rate": 4.708240279639066e-06, + "loss": 0.3297, + "num_input_tokens_seen": 58946624, + "step": 18730 + }, + { + "epoch": 1.1993470328404072, + "grad_norm": 23.788005828857422, + "learning_rate": 4.7079783196238324e-06, + "loss": 0.3274, + "num_input_tokens_seen": 58962496, + "step": 18735 + }, + { + "epoch": 1.199667114781384, + "grad_norm": 47.66315460205078, + "learning_rate": 4.707716249353662e-06, + "loss": 0.3828, + "num_input_tokens_seen": 58978048, + "step": 18740 + }, + { + "epoch": 1.199987196722361, + "grad_norm": 39.25474166870117, + "learning_rate": 4.7074540688416425e-06, + "loss": 0.2367, + "num_input_tokens_seen": 58994688, + "step": 18745 + }, + { + "epoch": 1.2003072786633378, + "grad_norm": 34.8856086730957, + "learning_rate": 4.707191778100865e-06, + "loss": 0.2651, + "num_input_tokens_seen": 59010240, + "step": 18750 + }, + { + "epoch": 1.2006273606043147, + "grad_norm": 53.86892318725586, + "learning_rate": 4.706929377144427e-06, + "loss": 0.3063, + "num_input_tokens_seen": 59026368, + "step": 18755 + }, + { + "epoch": 1.2009474425452915, + "grad_norm": 9.659152030944824, + "learning_rate": 4.706666865985431e-06, + "loss": 0.3671, + "num_input_tokens_seen": 59041920, + "step": 18760 + }, + { + "epoch": 1.2012675244862685, + "grad_norm": 24.514175415039062, + "learning_rate": 4.706404244636986e-06, + "loss": 0.3089, + "num_input_tokens_seen": 59057600, + "step": 18765 + }, + { + "epoch": 1.2015876064272453, + "grad_norm": 13.80402660369873, + "learning_rate": 4.7061415131122055e-06, + "loss": 0.2474, + "num_input_tokens_seen": 59074304, + "step": 18770 + }, + { + "epoch": 1.2019076883682223, + "grad_norm": 13.538579940795898, + "learning_rate": 4.70587867142421e-06, + "loss": 0.4899, + "num_input_tokens_seen": 59089472, + "step": 18775 + }, + { + "epoch": 1.2022277703091993, + "grad_norm": 24.059415817260742, + "learning_rate": 4.705615719586123e-06, + "loss": 0.2991, + "num_input_tokens_seen": 59105664, + "step": 18780 + }, + { + "epoch": 1.202547852250176, + "grad_norm": 21.40675163269043, + "learning_rate": 4.705352657611075e-06, + "loss": 0.2843, + "num_input_tokens_seen": 59121408, + "step": 18785 + }, + { + "epoch": 1.202867934191153, + "grad_norm": 8.37218952178955, + "learning_rate": 4.705089485512203e-06, + "loss": 0.331, + "num_input_tokens_seen": 59137088, + "step": 18790 + }, + { + "epoch": 1.2031880161321298, + "grad_norm": 13.719682693481445, + "learning_rate": 4.704826203302649e-06, + "loss": 0.3433, + "num_input_tokens_seen": 59153344, + "step": 18795 + }, + { + "epoch": 1.2035080980731068, + "grad_norm": 35.47945785522461, + "learning_rate": 4.704562810995558e-06, + "loss": 0.3083, + "num_input_tokens_seen": 59169536, + "step": 18800 + }, + { + "epoch": 1.2038281800140835, + "grad_norm": 52.5627326965332, + "learning_rate": 4.704299308604083e-06, + "loss": 0.4914, + "num_input_tokens_seen": 59185216, + "step": 18805 + }, + { + "epoch": 1.2041482619550605, + "grad_norm": 14.947468757629395, + "learning_rate": 4.704035696141383e-06, + "loss": 0.3957, + "num_input_tokens_seen": 59200960, + "step": 18810 + }, + { + "epoch": 1.2044683438960373, + "grad_norm": 18.580204010009766, + "learning_rate": 4.703771973620621e-06, + "loss": 0.3012, + "num_input_tokens_seen": 59216192, + "step": 18815 + }, + { + "epoch": 1.2047884258370143, + "grad_norm": 42.10118103027344, + "learning_rate": 4.7035081410549645e-06, + "loss": 0.3132, + "num_input_tokens_seen": 59232576, + "step": 18820 + }, + { + "epoch": 1.2051085077779913, + "grad_norm": 22.137310028076172, + "learning_rate": 4.703244198457591e-06, + "loss": 0.2981, + "num_input_tokens_seen": 59247424, + "step": 18825 + }, + { + "epoch": 1.205428589718968, + "grad_norm": 27.21042251586914, + "learning_rate": 4.702980145841677e-06, + "loss": 0.297, + "num_input_tokens_seen": 59262848, + "step": 18830 + }, + { + "epoch": 1.205748671659945, + "grad_norm": 22.315021514892578, + "learning_rate": 4.70271598322041e-06, + "loss": 0.2949, + "num_input_tokens_seen": 59278208, + "step": 18835 + }, + { + "epoch": 1.2060687536009218, + "grad_norm": 14.942492485046387, + "learning_rate": 4.7024517106069785e-06, + "loss": 0.3256, + "num_input_tokens_seen": 59293504, + "step": 18840 + }, + { + "epoch": 1.2063888355418988, + "grad_norm": 16.58744239807129, + "learning_rate": 4.702187328014583e-06, + "loss": 0.4049, + "num_input_tokens_seen": 59308416, + "step": 18845 + }, + { + "epoch": 1.2067089174828756, + "grad_norm": 12.619394302368164, + "learning_rate": 4.701922835456423e-06, + "loss": 0.3353, + "num_input_tokens_seen": 59323264, + "step": 18850 + }, + { + "epoch": 1.2070289994238526, + "grad_norm": 33.1295166015625, + "learning_rate": 4.701658232945705e-06, + "loss": 0.2967, + "num_input_tokens_seen": 59339584, + "step": 18855 + }, + { + "epoch": 1.2073490813648293, + "grad_norm": 59.919673919677734, + "learning_rate": 4.701393520495643e-06, + "loss": 0.3586, + "num_input_tokens_seen": 59354880, + "step": 18860 + }, + { + "epoch": 1.2076691633058063, + "grad_norm": 20.0297794342041, + "learning_rate": 4.701128698119456e-06, + "loss": 0.4013, + "num_input_tokens_seen": 59374464, + "step": 18865 + }, + { + "epoch": 1.207989245246783, + "grad_norm": 26.89152717590332, + "learning_rate": 4.700863765830366e-06, + "loss": 0.3701, + "num_input_tokens_seen": 59389760, + "step": 18870 + }, + { + "epoch": 1.20830932718776, + "grad_norm": 28.875944137573242, + "learning_rate": 4.700598723641604e-06, + "loss": 0.2765, + "num_input_tokens_seen": 59403968, + "step": 18875 + }, + { + "epoch": 1.2086294091287368, + "grad_norm": 12.868134498596191, + "learning_rate": 4.700333571566405e-06, + "loss": 0.3123, + "num_input_tokens_seen": 59419520, + "step": 18880 + }, + { + "epoch": 1.2089494910697138, + "grad_norm": 52.555519104003906, + "learning_rate": 4.7000683096180065e-06, + "loss": 0.3644, + "num_input_tokens_seen": 59435136, + "step": 18885 + }, + { + "epoch": 1.2092695730106908, + "grad_norm": 15.992376327514648, + "learning_rate": 4.699802937809658e-06, + "loss": 0.3423, + "num_input_tokens_seen": 59450944, + "step": 18890 + }, + { + "epoch": 1.2095896549516676, + "grad_norm": 38.563926696777344, + "learning_rate": 4.699537456154609e-06, + "loss": 0.2624, + "num_input_tokens_seen": 59467840, + "step": 18895 + }, + { + "epoch": 1.2099097368926446, + "grad_norm": 38.09050369262695, + "learning_rate": 4.6992718646661165e-06, + "loss": 0.2928, + "num_input_tokens_seen": 59483584, + "step": 18900 + }, + { + "epoch": 1.2102298188336214, + "grad_norm": 9.326614379882812, + "learning_rate": 4.699006163357443e-06, + "loss": 0.4202, + "num_input_tokens_seen": 59499072, + "step": 18905 + }, + { + "epoch": 1.2105499007745983, + "grad_norm": 24.08602523803711, + "learning_rate": 4.698740352241854e-06, + "loss": 0.2843, + "num_input_tokens_seen": 59514176, + "step": 18910 + }, + { + "epoch": 1.210869982715575, + "grad_norm": 33.48146057128906, + "learning_rate": 4.698474431332626e-06, + "loss": 0.3067, + "num_input_tokens_seen": 59530560, + "step": 18915 + }, + { + "epoch": 1.211190064656552, + "grad_norm": 66.30277252197266, + "learning_rate": 4.698208400643036e-06, + "loss": 0.2967, + "num_input_tokens_seen": 59546304, + "step": 18920 + }, + { + "epoch": 1.2115101465975289, + "grad_norm": 10.764317512512207, + "learning_rate": 4.697942260186369e-06, + "loss": 0.3578, + "num_input_tokens_seen": 59562048, + "step": 18925 + }, + { + "epoch": 1.2118302285385059, + "grad_norm": 17.45640754699707, + "learning_rate": 4.697676009975914e-06, + "loss": 0.3102, + "num_input_tokens_seen": 59578304, + "step": 18930 + }, + { + "epoch": 1.2121503104794829, + "grad_norm": 16.084854125976562, + "learning_rate": 4.6974096500249665e-06, + "loss": 0.3362, + "num_input_tokens_seen": 59595200, + "step": 18935 + }, + { + "epoch": 1.2124703924204596, + "grad_norm": 16.16868782043457, + "learning_rate": 4.697143180346827e-06, + "loss": 0.2218, + "num_input_tokens_seen": 59611968, + "step": 18940 + }, + { + "epoch": 1.2127904743614366, + "grad_norm": 36.7818717956543, + "learning_rate": 4.696876600954801e-06, + "loss": 0.3227, + "num_input_tokens_seen": 59627648, + "step": 18945 + }, + { + "epoch": 1.2131105563024134, + "grad_norm": 21.108985900878906, + "learning_rate": 4.696609911862201e-06, + "loss": 0.3606, + "num_input_tokens_seen": 59643712, + "step": 18950 + }, + { + "epoch": 1.2134306382433904, + "grad_norm": 54.14109802246094, + "learning_rate": 4.696343113082344e-06, + "loss": 0.4345, + "num_input_tokens_seen": 59658560, + "step": 18955 + }, + { + "epoch": 1.2137507201843671, + "grad_norm": 35.20137405395508, + "learning_rate": 4.696076204628551e-06, + "loss": 0.3038, + "num_input_tokens_seen": 59674176, + "step": 18960 + }, + { + "epoch": 1.2140708021253441, + "grad_norm": 21.232759475708008, + "learning_rate": 4.6958091865141525e-06, + "loss": 0.3547, + "num_input_tokens_seen": 59688704, + "step": 18965 + }, + { + "epoch": 1.214390884066321, + "grad_norm": 37.539310455322266, + "learning_rate": 4.6955420587524804e-06, + "loss": 0.438, + "num_input_tokens_seen": 59705600, + "step": 18970 + }, + { + "epoch": 1.2147109660072979, + "grad_norm": 25.433605194091797, + "learning_rate": 4.695274821356874e-06, + "loss": 0.2542, + "num_input_tokens_seen": 59720896, + "step": 18975 + }, + { + "epoch": 1.2150310479482749, + "grad_norm": 55.720115661621094, + "learning_rate": 4.695007474340678e-06, + "loss": 0.3752, + "num_input_tokens_seen": 59736000, + "step": 18980 + }, + { + "epoch": 1.2153511298892516, + "grad_norm": 32.39884948730469, + "learning_rate": 4.694740017717243e-06, + "loss": 0.3307, + "num_input_tokens_seen": 59751168, + "step": 18985 + }, + { + "epoch": 1.2156712118302284, + "grad_norm": 12.805998802185059, + "learning_rate": 4.694472451499922e-06, + "loss": 0.3134, + "num_input_tokens_seen": 59766144, + "step": 18990 + }, + { + "epoch": 1.2159912937712054, + "grad_norm": 12.24770450592041, + "learning_rate": 4.6942047757020784e-06, + "loss": 0.248, + "num_input_tokens_seen": 59781632, + "step": 18995 + }, + { + "epoch": 1.2163113757121824, + "grad_norm": 17.59039306640625, + "learning_rate": 4.6939369903370765e-06, + "loss": 0.3223, + "num_input_tokens_seen": 59797632, + "step": 19000 + }, + { + "epoch": 1.2166314576531592, + "grad_norm": 29.481853485107422, + "learning_rate": 4.693669095418289e-06, + "loss": 0.2876, + "num_input_tokens_seen": 59813632, + "step": 19005 + }, + { + "epoch": 1.2169515395941362, + "grad_norm": 39.6058235168457, + "learning_rate": 4.693401090959095e-06, + "loss": 0.4526, + "num_input_tokens_seen": 59829056, + "step": 19010 + }, + { + "epoch": 1.217271621535113, + "grad_norm": 14.963493347167969, + "learning_rate": 4.693132976972874e-06, + "loss": 0.327, + "num_input_tokens_seen": 59844224, + "step": 19015 + }, + { + "epoch": 1.21759170347609, + "grad_norm": 26.577106475830078, + "learning_rate": 4.692864753473016e-06, + "loss": 0.3835, + "num_input_tokens_seen": 59859776, + "step": 19020 + }, + { + "epoch": 1.2179117854170667, + "grad_norm": 21.89593505859375, + "learning_rate": 4.6925964204729145e-06, + "loss": 0.455, + "num_input_tokens_seen": 59875456, + "step": 19025 + }, + { + "epoch": 1.2182318673580437, + "grad_norm": 14.466358184814453, + "learning_rate": 4.6923279779859685e-06, + "loss": 0.3682, + "num_input_tokens_seen": 59891840, + "step": 19030 + }, + { + "epoch": 1.2185519492990204, + "grad_norm": 24.67951774597168, + "learning_rate": 4.692059426025583e-06, + "loss": 0.3199, + "num_input_tokens_seen": 59907904, + "step": 19035 + }, + { + "epoch": 1.2188720312399974, + "grad_norm": 24.823122024536133, + "learning_rate": 4.691790764605168e-06, + "loss": 0.3169, + "num_input_tokens_seen": 59924672, + "step": 19040 + }, + { + "epoch": 1.2191921131809744, + "grad_norm": 40.144203186035156, + "learning_rate": 4.691521993738139e-06, + "loss": 0.3917, + "num_input_tokens_seen": 59940736, + "step": 19045 + }, + { + "epoch": 1.2195121951219512, + "grad_norm": 5.951833724975586, + "learning_rate": 4.691253113437916e-06, + "loss": 0.2466, + "num_input_tokens_seen": 59956992, + "step": 19050 + }, + { + "epoch": 1.2198322770629282, + "grad_norm": 26.025407791137695, + "learning_rate": 4.690984123717926e-06, + "loss": 0.5251, + "num_input_tokens_seen": 59972800, + "step": 19055 + }, + { + "epoch": 1.220152359003905, + "grad_norm": 15.221487045288086, + "learning_rate": 4.690715024591603e-06, + "loss": 0.3085, + "num_input_tokens_seen": 59989120, + "step": 19060 + }, + { + "epoch": 1.220472440944882, + "grad_norm": 22.482152938842773, + "learning_rate": 4.690445816072381e-06, + "loss": 0.3967, + "num_input_tokens_seen": 60004928, + "step": 19065 + }, + { + "epoch": 1.2207925228858587, + "grad_norm": 25.357664108276367, + "learning_rate": 4.690176498173705e-06, + "loss": 0.27, + "num_input_tokens_seen": 60020352, + "step": 19070 + }, + { + "epoch": 1.2211126048268357, + "grad_norm": 32.7623405456543, + "learning_rate": 4.689907070909024e-06, + "loss": 0.4531, + "num_input_tokens_seen": 60035456, + "step": 19075 + }, + { + "epoch": 1.2214326867678125, + "grad_norm": 9.303071022033691, + "learning_rate": 4.68963753429179e-06, + "loss": 0.3457, + "num_input_tokens_seen": 60050752, + "step": 19080 + }, + { + "epoch": 1.2217527687087895, + "grad_norm": 35.341209411621094, + "learning_rate": 4.689367888335462e-06, + "loss": 0.298, + "num_input_tokens_seen": 60066496, + "step": 19085 + }, + { + "epoch": 1.2220728506497665, + "grad_norm": 18.1331729888916, + "learning_rate": 4.689098133053507e-06, + "loss": 0.3365, + "num_input_tokens_seen": 60081728, + "step": 19090 + }, + { + "epoch": 1.2223929325907432, + "grad_norm": 7.743076324462891, + "learning_rate": 4.688828268459393e-06, + "loss": 0.2386, + "num_input_tokens_seen": 60098048, + "step": 19095 + }, + { + "epoch": 1.2227130145317202, + "grad_norm": 26.174863815307617, + "learning_rate": 4.6885582945665964e-06, + "loss": 0.4498, + "num_input_tokens_seen": 60112640, + "step": 19100 + }, + { + "epoch": 1.223033096472697, + "grad_norm": 11.846506118774414, + "learning_rate": 4.688288211388599e-06, + "loss": 0.2333, + "num_input_tokens_seen": 60128384, + "step": 19105 + }, + { + "epoch": 1.223353178413674, + "grad_norm": 12.731735229492188, + "learning_rate": 4.688018018938887e-06, + "loss": 0.3721, + "num_input_tokens_seen": 60144448, + "step": 19110 + }, + { + "epoch": 1.2236732603546507, + "grad_norm": 53.14432907104492, + "learning_rate": 4.6877477172309505e-06, + "loss": 0.4246, + "num_input_tokens_seen": 60159936, + "step": 19115 + }, + { + "epoch": 1.2239933422956277, + "grad_norm": 17.43817901611328, + "learning_rate": 4.687477306278289e-06, + "loss": 0.4009, + "num_input_tokens_seen": 60175808, + "step": 19120 + }, + { + "epoch": 1.2243134242366045, + "grad_norm": 12.340170860290527, + "learning_rate": 4.687206786094406e-06, + "loss": 0.3246, + "num_input_tokens_seen": 60191808, + "step": 19125 + }, + { + "epoch": 1.2246335061775815, + "grad_norm": 20.69961929321289, + "learning_rate": 4.686936156692809e-06, + "loss": 0.3218, + "num_input_tokens_seen": 60208192, + "step": 19130 + }, + { + "epoch": 1.2249535881185583, + "grad_norm": 19.660924911499023, + "learning_rate": 4.686665418087011e-06, + "loss": 0.3329, + "num_input_tokens_seen": 60222848, + "step": 19135 + }, + { + "epoch": 1.2252736700595352, + "grad_norm": 26.10528564453125, + "learning_rate": 4.686394570290532e-06, + "loss": 0.3869, + "num_input_tokens_seen": 60238336, + "step": 19140 + }, + { + "epoch": 1.225593752000512, + "grad_norm": 27.559946060180664, + "learning_rate": 4.686123613316896e-06, + "loss": 0.2233, + "num_input_tokens_seen": 60253376, + "step": 19145 + }, + { + "epoch": 1.225913833941489, + "grad_norm": 15.935603141784668, + "learning_rate": 4.685852547179635e-06, + "loss": 0.4171, + "num_input_tokens_seen": 60269056, + "step": 19150 + }, + { + "epoch": 1.226233915882466, + "grad_norm": 23.05735969543457, + "learning_rate": 4.685581371892283e-06, + "loss": 0.1714, + "num_input_tokens_seen": 60288704, + "step": 19155 + }, + { + "epoch": 1.2265539978234428, + "grad_norm": 22.555015563964844, + "learning_rate": 4.685310087468381e-06, + "loss": 0.317, + "num_input_tokens_seen": 60305152, + "step": 19160 + }, + { + "epoch": 1.2268740797644198, + "grad_norm": 11.968162536621094, + "learning_rate": 4.685038693921477e-06, + "loss": 0.3379, + "num_input_tokens_seen": 60319680, + "step": 19165 + }, + { + "epoch": 1.2271941617053965, + "grad_norm": 15.480086326599121, + "learning_rate": 4.684767191265122e-06, + "loss": 0.243, + "num_input_tokens_seen": 60334528, + "step": 19170 + }, + { + "epoch": 1.2275142436463735, + "grad_norm": 8.971840858459473, + "learning_rate": 4.684495579512873e-06, + "loss": 0.2693, + "num_input_tokens_seen": 60349824, + "step": 19175 + }, + { + "epoch": 1.2278343255873503, + "grad_norm": 47.13580322265625, + "learning_rate": 4.684223858678294e-06, + "loss": 0.4104, + "num_input_tokens_seen": 60364544, + "step": 19180 + }, + { + "epoch": 1.2281544075283273, + "grad_norm": 29.741819381713867, + "learning_rate": 4.683952028774952e-06, + "loss": 0.443, + "num_input_tokens_seen": 60380672, + "step": 19185 + }, + { + "epoch": 1.228474489469304, + "grad_norm": 69.77448272705078, + "learning_rate": 4.683680089816422e-06, + "loss": 0.3621, + "num_input_tokens_seen": 60396096, + "step": 19190 + }, + { + "epoch": 1.228794571410281, + "grad_norm": 20.486854553222656, + "learning_rate": 4.683408041816282e-06, + "loss": 0.336, + "num_input_tokens_seen": 60412224, + "step": 19195 + }, + { + "epoch": 1.229114653351258, + "grad_norm": 26.46690559387207, + "learning_rate": 4.683135884788118e-06, + "loss": 0.2713, + "num_input_tokens_seen": 60428160, + "step": 19200 + }, + { + "epoch": 1.2294347352922348, + "grad_norm": 14.917972564697266, + "learning_rate": 4.682863618745519e-06, + "loss": 0.2679, + "num_input_tokens_seen": 60444288, + "step": 19205 + }, + { + "epoch": 1.2297548172332118, + "grad_norm": 13.669499397277832, + "learning_rate": 4.6825912437020814e-06, + "loss": 0.2873, + "num_input_tokens_seen": 60459840, + "step": 19210 + }, + { + "epoch": 1.2300748991741886, + "grad_norm": 100.46513366699219, + "learning_rate": 4.682318759671406e-06, + "loss": 0.404, + "num_input_tokens_seen": 60474752, + "step": 19215 + }, + { + "epoch": 1.2303949811151655, + "grad_norm": 14.477781295776367, + "learning_rate": 4.682046166667098e-06, + "loss": 0.1633, + "num_input_tokens_seen": 60489408, + "step": 19220 + }, + { + "epoch": 1.2307150630561423, + "grad_norm": 41.72959518432617, + "learning_rate": 4.6817734647027715e-06, + "loss": 0.363, + "num_input_tokens_seen": 60504704, + "step": 19225 + }, + { + "epoch": 1.2310351449971193, + "grad_norm": 31.17050552368164, + "learning_rate": 4.6815006537920424e-06, + "loss": 0.2782, + "num_input_tokens_seen": 60520960, + "step": 19230 + }, + { + "epoch": 1.231355226938096, + "grad_norm": 45.50294876098633, + "learning_rate": 4.681227733948533e-06, + "loss": 0.4007, + "num_input_tokens_seen": 60535424, + "step": 19235 + }, + { + "epoch": 1.231675308879073, + "grad_norm": 14.450515747070312, + "learning_rate": 4.680954705185873e-06, + "loss": 0.2642, + "num_input_tokens_seen": 60550976, + "step": 19240 + }, + { + "epoch": 1.23199539082005, + "grad_norm": 16.786252975463867, + "learning_rate": 4.680681567517694e-06, + "loss": 0.2975, + "num_input_tokens_seen": 60566784, + "step": 19245 + }, + { + "epoch": 1.2323154727610268, + "grad_norm": 41.08578109741211, + "learning_rate": 4.6804083209576375e-06, + "loss": 0.3432, + "num_input_tokens_seen": 60582400, + "step": 19250 + }, + { + "epoch": 1.2326355547020036, + "grad_norm": 22.572046279907227, + "learning_rate": 4.680134965519346e-06, + "loss": 0.3691, + "num_input_tokens_seen": 60597056, + "step": 19255 + }, + { + "epoch": 1.2329556366429806, + "grad_norm": 11.293978691101074, + "learning_rate": 4.67986150121647e-06, + "loss": 0.2932, + "num_input_tokens_seen": 60612864, + "step": 19260 + }, + { + "epoch": 1.2332757185839576, + "grad_norm": 58.63357925415039, + "learning_rate": 4.679587928062666e-06, + "loss": 0.326, + "num_input_tokens_seen": 60627968, + "step": 19265 + }, + { + "epoch": 1.2335958005249343, + "grad_norm": 37.763648986816406, + "learning_rate": 4.679314246071594e-06, + "loss": 0.4103, + "num_input_tokens_seen": 60644160, + "step": 19270 + }, + { + "epoch": 1.2339158824659113, + "grad_norm": 23.682357788085938, + "learning_rate": 4.67904045525692e-06, + "loss": 0.3367, + "num_input_tokens_seen": 60659648, + "step": 19275 + }, + { + "epoch": 1.234235964406888, + "grad_norm": 37.54637908935547, + "learning_rate": 4.678766555632315e-06, + "loss": 0.4156, + "num_input_tokens_seen": 60675712, + "step": 19280 + }, + { + "epoch": 1.234556046347865, + "grad_norm": 9.498174667358398, + "learning_rate": 4.678492547211459e-06, + "loss": 0.2761, + "num_input_tokens_seen": 60691456, + "step": 19285 + }, + { + "epoch": 1.2348761282888419, + "grad_norm": 8.001338958740234, + "learning_rate": 4.67821843000803e-06, + "loss": 0.3125, + "num_input_tokens_seen": 60708096, + "step": 19290 + }, + { + "epoch": 1.2351962102298188, + "grad_norm": 29.50577735900879, + "learning_rate": 4.67794420403572e-06, + "loss": 0.5368, + "num_input_tokens_seen": 60724352, + "step": 19295 + }, + { + "epoch": 1.2355162921707956, + "grad_norm": 31.80756378173828, + "learning_rate": 4.677669869308221e-06, + "loss": 0.3617, + "num_input_tokens_seen": 60739968, + "step": 19300 + }, + { + "epoch": 1.2358363741117726, + "grad_norm": 13.274550437927246, + "learning_rate": 4.677395425839231e-06, + "loss": 0.3916, + "num_input_tokens_seen": 60754240, + "step": 19305 + }, + { + "epoch": 1.2361564560527496, + "grad_norm": 47.49919891357422, + "learning_rate": 4.677120873642455e-06, + "loss": 0.5291, + "num_input_tokens_seen": 60769728, + "step": 19310 + }, + { + "epoch": 1.2364765379937264, + "grad_norm": 16.534587860107422, + "learning_rate": 4.676846212731604e-06, + "loss": 0.3187, + "num_input_tokens_seen": 60785408, + "step": 19315 + }, + { + "epoch": 1.2367966199347034, + "grad_norm": 42.33103942871094, + "learning_rate": 4.6765714431203914e-06, + "loss": 0.3089, + "num_input_tokens_seen": 60801152, + "step": 19320 + }, + { + "epoch": 1.2371167018756801, + "grad_norm": 28.033870697021484, + "learning_rate": 4.676296564822538e-06, + "loss": 0.2893, + "num_input_tokens_seen": 60817088, + "step": 19325 + }, + { + "epoch": 1.2374367838166571, + "grad_norm": 8.320382118225098, + "learning_rate": 4.67602157785177e-06, + "loss": 0.3399, + "num_input_tokens_seen": 60833088, + "step": 19330 + }, + { + "epoch": 1.2377568657576339, + "grad_norm": 17.512697219848633, + "learning_rate": 4.675746482221819e-06, + "loss": 0.2608, + "num_input_tokens_seen": 60850496, + "step": 19335 + }, + { + "epoch": 1.2380769476986109, + "grad_norm": 18.1157283782959, + "learning_rate": 4.6754712779464215e-06, + "loss": 0.3079, + "num_input_tokens_seen": 60867200, + "step": 19340 + }, + { + "epoch": 1.2383970296395876, + "grad_norm": 28.63307762145996, + "learning_rate": 4.675195965039321e-06, + "loss": 0.4516, + "num_input_tokens_seen": 60883072, + "step": 19345 + }, + { + "epoch": 1.2387171115805646, + "grad_norm": 10.280823707580566, + "learning_rate": 4.674920543514263e-06, + "loss": 0.3426, + "num_input_tokens_seen": 60899392, + "step": 19350 + }, + { + "epoch": 1.2390371935215416, + "grad_norm": 25.340667724609375, + "learning_rate": 4.674645013385002e-06, + "loss": 0.2972, + "num_input_tokens_seen": 60914560, + "step": 19355 + }, + { + "epoch": 1.2393572754625184, + "grad_norm": 18.750410079956055, + "learning_rate": 4.674369374665297e-06, + "loss": 0.4091, + "num_input_tokens_seen": 60929088, + "step": 19360 + }, + { + "epoch": 1.2396773574034954, + "grad_norm": 29.218711853027344, + "learning_rate": 4.6740936273689105e-06, + "loss": 0.3021, + "num_input_tokens_seen": 60944576, + "step": 19365 + }, + { + "epoch": 1.2399974393444722, + "grad_norm": 27.181718826293945, + "learning_rate": 4.673817771509613e-06, + "loss": 0.4317, + "num_input_tokens_seen": 60959808, + "step": 19370 + }, + { + "epoch": 1.2403175212854491, + "grad_norm": 16.304479598999023, + "learning_rate": 4.67354180710118e-06, + "loss": 0.2599, + "num_input_tokens_seen": 60975488, + "step": 19375 + }, + { + "epoch": 1.240637603226426, + "grad_norm": 23.996826171875, + "learning_rate": 4.673265734157389e-06, + "loss": 0.2838, + "num_input_tokens_seen": 60991168, + "step": 19380 + }, + { + "epoch": 1.240957685167403, + "grad_norm": 30.391830444335938, + "learning_rate": 4.672989552692029e-06, + "loss": 0.3576, + "num_input_tokens_seen": 61006912, + "step": 19385 + }, + { + "epoch": 1.2412777671083797, + "grad_norm": 55.03622817993164, + "learning_rate": 4.672713262718888e-06, + "loss": 0.3117, + "num_input_tokens_seen": 61022528, + "step": 19390 + }, + { + "epoch": 1.2415978490493567, + "grad_norm": 46.5276985168457, + "learning_rate": 4.672436864251765e-06, + "loss": 0.4646, + "num_input_tokens_seen": 61037056, + "step": 19395 + }, + { + "epoch": 1.2419179309903334, + "grad_norm": 34.59336471557617, + "learning_rate": 4.67216035730446e-06, + "loss": 0.2904, + "num_input_tokens_seen": 61052288, + "step": 19400 + }, + { + "epoch": 1.2422380129313104, + "grad_norm": 11.25224781036377, + "learning_rate": 4.671883741890782e-06, + "loss": 0.3429, + "num_input_tokens_seen": 61069248, + "step": 19405 + }, + { + "epoch": 1.2425580948722872, + "grad_norm": 20.135583877563477, + "learning_rate": 4.671607018024541e-06, + "loss": 0.3064, + "num_input_tokens_seen": 61084096, + "step": 19410 + }, + { + "epoch": 1.2428781768132642, + "grad_norm": 11.300013542175293, + "learning_rate": 4.671330185719559e-06, + "loss": 0.2906, + "num_input_tokens_seen": 61098752, + "step": 19415 + }, + { + "epoch": 1.2431982587542412, + "grad_norm": 20.281906127929688, + "learning_rate": 4.6710532449896575e-06, + "loss": 0.2736, + "num_input_tokens_seen": 61114560, + "step": 19420 + }, + { + "epoch": 1.243518340695218, + "grad_norm": 14.538592338562012, + "learning_rate": 4.670776195848664e-06, + "loss": 0.2584, + "num_input_tokens_seen": 61130176, + "step": 19425 + }, + { + "epoch": 1.243838422636195, + "grad_norm": 34.056976318359375, + "learning_rate": 4.670499038310416e-06, + "loss": 0.3111, + "num_input_tokens_seen": 61145472, + "step": 19430 + }, + { + "epoch": 1.2441585045771717, + "grad_norm": 39.83113098144531, + "learning_rate": 4.670221772388751e-06, + "loss": 0.3217, + "num_input_tokens_seen": 61161408, + "step": 19435 + }, + { + "epoch": 1.2444785865181487, + "grad_norm": 8.270475387573242, + "learning_rate": 4.669944398097515e-06, + "loss": 0.2143, + "num_input_tokens_seen": 61176576, + "step": 19440 + }, + { + "epoch": 1.2447986684591255, + "grad_norm": 13.598193168640137, + "learning_rate": 4.669666915450559e-06, + "loss": 0.471, + "num_input_tokens_seen": 61191616, + "step": 19445 + }, + { + "epoch": 1.2451187504001024, + "grad_norm": 13.40571403503418, + "learning_rate": 4.669389324461738e-06, + "loss": 0.3311, + "num_input_tokens_seen": 61207168, + "step": 19450 + }, + { + "epoch": 1.2454388323410792, + "grad_norm": 38.58952713012695, + "learning_rate": 4.669111625144914e-06, + "loss": 0.4144, + "num_input_tokens_seen": 61223872, + "step": 19455 + }, + { + "epoch": 1.2457589142820562, + "grad_norm": 20.198057174682617, + "learning_rate": 4.6688338175139544e-06, + "loss": 0.2513, + "num_input_tokens_seen": 61239680, + "step": 19460 + }, + { + "epoch": 1.2460789962230332, + "grad_norm": 67.97029113769531, + "learning_rate": 4.668555901582731e-06, + "loss": 0.285, + "num_input_tokens_seen": 61255616, + "step": 19465 + }, + { + "epoch": 1.24639907816401, + "grad_norm": 12.698660850524902, + "learning_rate": 4.668277877365122e-06, + "loss": 0.4098, + "num_input_tokens_seen": 61270976, + "step": 19470 + }, + { + "epoch": 1.246719160104987, + "grad_norm": 6.026050090789795, + "learning_rate": 4.6679997448750095e-06, + "loss": 0.2872, + "num_input_tokens_seen": 61285824, + "step": 19475 + }, + { + "epoch": 1.2470392420459637, + "grad_norm": 35.6258659362793, + "learning_rate": 4.667721504126281e-06, + "loss": 0.4148, + "num_input_tokens_seen": 61300672, + "step": 19480 + }, + { + "epoch": 1.2473593239869407, + "grad_norm": 30.864606857299805, + "learning_rate": 4.667443155132834e-06, + "loss": 0.3055, + "num_input_tokens_seen": 61317760, + "step": 19485 + }, + { + "epoch": 1.2476794059279175, + "grad_norm": 31.25807762145996, + "learning_rate": 4.667164697908564e-06, + "loss": 0.4127, + "num_input_tokens_seen": 61333888, + "step": 19490 + }, + { + "epoch": 1.2479994878688945, + "grad_norm": 18.759681701660156, + "learning_rate": 4.666886132467379e-06, + "loss": 0.4217, + "num_input_tokens_seen": 61350080, + "step": 19495 + }, + { + "epoch": 1.2483195698098712, + "grad_norm": 25.128324508666992, + "learning_rate": 4.666607458823186e-06, + "loss": 0.4635, + "num_input_tokens_seen": 61366912, + "step": 19500 + }, + { + "epoch": 1.2486396517508482, + "grad_norm": 16.606714248657227, + "learning_rate": 4.666328676989903e-06, + "loss": 0.322, + "num_input_tokens_seen": 61384256, + "step": 19505 + }, + { + "epoch": 1.2489597336918252, + "grad_norm": 36.82194137573242, + "learning_rate": 4.666049786981449e-06, + "loss": 0.2763, + "num_input_tokens_seen": 61399040, + "step": 19510 + }, + { + "epoch": 1.249279815632802, + "grad_norm": 18.196887969970703, + "learning_rate": 4.665770788811751e-06, + "loss": 0.2761, + "num_input_tokens_seen": 61416832, + "step": 19515 + }, + { + "epoch": 1.249599897573779, + "grad_norm": 45.46992492675781, + "learning_rate": 4.6654916824947415e-06, + "loss": 0.3629, + "num_input_tokens_seen": 61433152, + "step": 19520 + }, + { + "epoch": 1.2499199795147558, + "grad_norm": 20.093456268310547, + "learning_rate": 4.665212468044356e-06, + "loss": 0.3424, + "num_input_tokens_seen": 61450176, + "step": 19525 + }, + { + "epoch": 1.2502400614557327, + "grad_norm": 8.08393383026123, + "learning_rate": 4.66493314547454e-06, + "loss": 0.265, + "num_input_tokens_seen": 61465280, + "step": 19530 + }, + { + "epoch": 1.2502400614557327, + "eval_loss": 0.5115891098976135, + "eval_runtime": 51.1599, + "eval_samples_per_second": 271.423, + "eval_steps_per_second": 33.933, + "num_input_tokens_seen": 61465280, + "step": 19530 + }, + { + "epoch": 1.2505601433967095, + "grad_norm": 28.651538848876953, + "learning_rate": 4.664653714799239e-06, + "loss": 0.3263, + "num_input_tokens_seen": 61480960, + "step": 19535 + }, + { + "epoch": 1.2508802253376865, + "grad_norm": 18.552165985107422, + "learning_rate": 4.664374176032406e-06, + "loss": 0.3483, + "num_input_tokens_seen": 61496064, + "step": 19540 + }, + { + "epoch": 1.2512003072786633, + "grad_norm": 18.215551376342773, + "learning_rate": 4.6640945291880006e-06, + "loss": 0.3428, + "num_input_tokens_seen": 61512000, + "step": 19545 + }, + { + "epoch": 1.2515203892196403, + "grad_norm": 52.37979507446289, + "learning_rate": 4.6638147742799865e-06, + "loss": 0.3861, + "num_input_tokens_seen": 61527552, + "step": 19550 + }, + { + "epoch": 1.2518404711606173, + "grad_norm": 27.910350799560547, + "learning_rate": 4.663534911322334e-06, + "loss": 0.3321, + "num_input_tokens_seen": 61542656, + "step": 19555 + }, + { + "epoch": 1.252160553101594, + "grad_norm": 30.659748077392578, + "learning_rate": 4.663254940329017e-06, + "loss": 0.3228, + "num_input_tokens_seen": 61557568, + "step": 19560 + }, + { + "epoch": 1.2524806350425708, + "grad_norm": 45.084712982177734, + "learning_rate": 4.662974861314016e-06, + "loss": 0.3364, + "num_input_tokens_seen": 61574464, + "step": 19565 + }, + { + "epoch": 1.2528007169835478, + "grad_norm": 21.224836349487305, + "learning_rate": 4.662694674291318e-06, + "loss": 0.294, + "num_input_tokens_seen": 61589568, + "step": 19570 + }, + { + "epoch": 1.2531207989245248, + "grad_norm": 16.121063232421875, + "learning_rate": 4.662414379274912e-06, + "loss": 0.4253, + "num_input_tokens_seen": 61605440, + "step": 19575 + }, + { + "epoch": 1.2534408808655015, + "grad_norm": 33.78435134887695, + "learning_rate": 4.662133976278796e-06, + "loss": 0.3526, + "num_input_tokens_seen": 61621056, + "step": 19580 + }, + { + "epoch": 1.2537609628064785, + "grad_norm": 21.26799964904785, + "learning_rate": 4.661853465316971e-06, + "loss": 0.2846, + "num_input_tokens_seen": 61636736, + "step": 19585 + }, + { + "epoch": 1.2540810447474553, + "grad_norm": 15.626419067382812, + "learning_rate": 4.6615728464034455e-06, + "loss": 0.2533, + "num_input_tokens_seen": 61654208, + "step": 19590 + }, + { + "epoch": 1.2544011266884323, + "grad_norm": 17.935726165771484, + "learning_rate": 4.66129211955223e-06, + "loss": 0.2326, + "num_input_tokens_seen": 61670464, + "step": 19595 + }, + { + "epoch": 1.254721208629409, + "grad_norm": 11.352901458740234, + "learning_rate": 4.661011284777345e-06, + "loss": 0.3779, + "num_input_tokens_seen": 61686208, + "step": 19600 + }, + { + "epoch": 1.255041290570386, + "grad_norm": 19.753170013427734, + "learning_rate": 4.660730342092812e-06, + "loss": 0.275, + "num_input_tokens_seen": 61703232, + "step": 19605 + }, + { + "epoch": 1.2553613725113628, + "grad_norm": 20.883886337280273, + "learning_rate": 4.660449291512661e-06, + "loss": 0.2649, + "num_input_tokens_seen": 61719680, + "step": 19610 + }, + { + "epoch": 1.2556814544523398, + "grad_norm": 12.436278343200684, + "learning_rate": 4.6601681330509255e-06, + "loss": 0.2393, + "num_input_tokens_seen": 61734336, + "step": 19615 + }, + { + "epoch": 1.2560015363933168, + "grad_norm": 50.89828872680664, + "learning_rate": 4.6598868667216454e-06, + "loss": 0.4221, + "num_input_tokens_seen": 61749120, + "step": 19620 + }, + { + "epoch": 1.2563216183342936, + "grad_norm": 23.075458526611328, + "learning_rate": 4.659605492538866e-06, + "loss": 0.3938, + "num_input_tokens_seen": 61763072, + "step": 19625 + }, + { + "epoch": 1.2566417002752703, + "grad_norm": 51.307830810546875, + "learning_rate": 4.659324010516637e-06, + "loss": 0.524, + "num_input_tokens_seen": 61778432, + "step": 19630 + }, + { + "epoch": 1.2569617822162473, + "grad_norm": 20.365407943725586, + "learning_rate": 4.6590424206690145e-06, + "loss": 0.3252, + "num_input_tokens_seen": 61793088, + "step": 19635 + }, + { + "epoch": 1.2572818641572243, + "grad_norm": 33.19477844238281, + "learning_rate": 4.658760723010059e-06, + "loss": 0.4921, + "num_input_tokens_seen": 61810496, + "step": 19640 + }, + { + "epoch": 1.257601946098201, + "grad_norm": 10.974283218383789, + "learning_rate": 4.6584789175538384e-06, + "loss": 0.2689, + "num_input_tokens_seen": 61826240, + "step": 19645 + }, + { + "epoch": 1.257922028039178, + "grad_norm": 20.995267868041992, + "learning_rate": 4.658197004314423e-06, + "loss": 0.3473, + "num_input_tokens_seen": 61842624, + "step": 19650 + }, + { + "epoch": 1.2582421099801548, + "grad_norm": 10.768928527832031, + "learning_rate": 4.657914983305891e-06, + "loss": 0.347, + "num_input_tokens_seen": 61858240, + "step": 19655 + }, + { + "epoch": 1.2585621919211318, + "grad_norm": 41.289512634277344, + "learning_rate": 4.657632854542325e-06, + "loss": 0.3243, + "num_input_tokens_seen": 61873856, + "step": 19660 + }, + { + "epoch": 1.2588822738621088, + "grad_norm": 13.97238826751709, + "learning_rate": 4.657350618037813e-06, + "loss": 0.3672, + "num_input_tokens_seen": 61891008, + "step": 19665 + }, + { + "epoch": 1.2592023558030856, + "grad_norm": 19.737703323364258, + "learning_rate": 4.6570682738064485e-06, + "loss": 0.2677, + "num_input_tokens_seen": 61907200, + "step": 19670 + }, + { + "epoch": 1.2595224377440624, + "grad_norm": 4.77448844909668, + "learning_rate": 4.65678582186233e-06, + "loss": 0.1919, + "num_input_tokens_seen": 61922432, + "step": 19675 + }, + { + "epoch": 1.2598425196850394, + "grad_norm": 36.350887298583984, + "learning_rate": 4.6565032622195615e-06, + "loss": 0.4322, + "num_input_tokens_seen": 61936576, + "step": 19680 + }, + { + "epoch": 1.2601626016260163, + "grad_norm": 24.465900421142578, + "learning_rate": 4.656220594892253e-06, + "loss": 0.3127, + "num_input_tokens_seen": 61952064, + "step": 19685 + }, + { + "epoch": 1.2604826835669931, + "grad_norm": 28.81222152709961, + "learning_rate": 4.655937819894521e-06, + "loss": 0.4121, + "num_input_tokens_seen": 61968256, + "step": 19690 + }, + { + "epoch": 1.26080276550797, + "grad_norm": 9.118790626525879, + "learning_rate": 4.655654937240482e-06, + "loss": 0.2803, + "num_input_tokens_seen": 61984192, + "step": 19695 + }, + { + "epoch": 1.2611228474489469, + "grad_norm": 48.97355651855469, + "learning_rate": 4.655371946944265e-06, + "loss": 0.3284, + "num_input_tokens_seen": 61999872, + "step": 19700 + }, + { + "epoch": 1.2614429293899239, + "grad_norm": 14.885171890258789, + "learning_rate": 4.655088849019999e-06, + "loss": 0.3631, + "num_input_tokens_seen": 62015424, + "step": 19705 + }, + { + "epoch": 1.2617630113309006, + "grad_norm": 42.127357482910156, + "learning_rate": 4.654805643481822e-06, + "loss": 0.5244, + "num_input_tokens_seen": 62030912, + "step": 19710 + }, + { + "epoch": 1.2620830932718776, + "grad_norm": 36.18693161010742, + "learning_rate": 4.654522330343875e-06, + "loss": 0.273, + "num_input_tokens_seen": 62045696, + "step": 19715 + }, + { + "epoch": 1.2624031752128544, + "grad_norm": 39.11583709716797, + "learning_rate": 4.654238909620305e-06, + "loss": 0.3918, + "num_input_tokens_seen": 62060288, + "step": 19720 + }, + { + "epoch": 1.2627232571538314, + "grad_norm": 17.434024810791016, + "learning_rate": 4.6539553813252645e-06, + "loss": 0.2186, + "num_input_tokens_seen": 62077888, + "step": 19725 + }, + { + "epoch": 1.2630433390948084, + "grad_norm": 20.91417121887207, + "learning_rate": 4.653671745472913e-06, + "loss": 0.3008, + "num_input_tokens_seen": 62094336, + "step": 19730 + }, + { + "epoch": 1.2633634210357851, + "grad_norm": 58.459259033203125, + "learning_rate": 4.6533880020774126e-06, + "loss": 0.3185, + "num_input_tokens_seen": 62110400, + "step": 19735 + }, + { + "epoch": 1.2636835029767621, + "grad_norm": 27.879777908325195, + "learning_rate": 4.653104151152931e-06, + "loss": 0.3232, + "num_input_tokens_seen": 62126720, + "step": 19740 + }, + { + "epoch": 1.264003584917739, + "grad_norm": 16.264875411987305, + "learning_rate": 4.652820192713643e-06, + "loss": 0.3325, + "num_input_tokens_seen": 62142976, + "step": 19745 + }, + { + "epoch": 1.2643236668587159, + "grad_norm": 23.936487197875977, + "learning_rate": 4.652536126773729e-06, + "loss": 0.2464, + "num_input_tokens_seen": 62158080, + "step": 19750 + }, + { + "epoch": 1.2646437487996927, + "grad_norm": 8.6255521774292, + "learning_rate": 4.652251953347373e-06, + "loss": 0.2626, + "num_input_tokens_seen": 62173824, + "step": 19755 + }, + { + "epoch": 1.2649638307406696, + "grad_norm": 7.688754558563232, + "learning_rate": 4.651967672448765e-06, + "loss": 0.4023, + "num_input_tokens_seen": 62189248, + "step": 19760 + }, + { + "epoch": 1.2652839126816464, + "grad_norm": 23.51131820678711, + "learning_rate": 4.6516832840921e-06, + "loss": 0.4716, + "num_input_tokens_seen": 62205568, + "step": 19765 + }, + { + "epoch": 1.2656039946226234, + "grad_norm": 36.77297592163086, + "learning_rate": 4.65139878829158e-06, + "loss": 0.4309, + "num_input_tokens_seen": 62220992, + "step": 19770 + }, + { + "epoch": 1.2659240765636004, + "grad_norm": 25.424097061157227, + "learning_rate": 4.651114185061412e-06, + "loss": 0.3571, + "num_input_tokens_seen": 62235968, + "step": 19775 + }, + { + "epoch": 1.2662441585045772, + "grad_norm": 21.15170669555664, + "learning_rate": 4.650829474415805e-06, + "loss": 0.3261, + "num_input_tokens_seen": 62251136, + "step": 19780 + }, + { + "epoch": 1.266564240445554, + "grad_norm": 11.696151733398438, + "learning_rate": 4.650544656368977e-06, + "loss": 0.2417, + "num_input_tokens_seen": 62267264, + "step": 19785 + }, + { + "epoch": 1.266884322386531, + "grad_norm": 13.480581283569336, + "learning_rate": 4.65025973093515e-06, + "loss": 0.3685, + "num_input_tokens_seen": 62282112, + "step": 19790 + }, + { + "epoch": 1.267204404327508, + "grad_norm": 35.071449279785156, + "learning_rate": 4.649974698128554e-06, + "loss": 0.2891, + "num_input_tokens_seen": 62297408, + "step": 19795 + }, + { + "epoch": 1.2675244862684847, + "grad_norm": 14.930333137512207, + "learning_rate": 4.64968955796342e-06, + "loss": 0.3548, + "num_input_tokens_seen": 62314304, + "step": 19800 + }, + { + "epoch": 1.2678445682094617, + "grad_norm": 25.318819046020508, + "learning_rate": 4.6494043104539864e-06, + "loss": 0.3924, + "num_input_tokens_seen": 62329600, + "step": 19805 + }, + { + "epoch": 1.2681646501504384, + "grad_norm": 19.79739761352539, + "learning_rate": 4.649118955614498e-06, + "loss": 0.2952, + "num_input_tokens_seen": 62346880, + "step": 19810 + }, + { + "epoch": 1.2684847320914154, + "grad_norm": 32.45429611206055, + "learning_rate": 4.6488334934592025e-06, + "loss": 0.2824, + "num_input_tokens_seen": 62363392, + "step": 19815 + }, + { + "epoch": 1.2688048140323924, + "grad_norm": 46.96654510498047, + "learning_rate": 4.648547924002356e-06, + "loss": 0.3602, + "num_input_tokens_seen": 62379584, + "step": 19820 + }, + { + "epoch": 1.2691248959733692, + "grad_norm": 23.420373916625977, + "learning_rate": 4.648262247258217e-06, + "loss": 0.3378, + "num_input_tokens_seen": 62394880, + "step": 19825 + }, + { + "epoch": 1.269444977914346, + "grad_norm": 23.08353614807129, + "learning_rate": 4.647976463241051e-06, + "loss": 0.2125, + "num_input_tokens_seen": 62409600, + "step": 19830 + }, + { + "epoch": 1.269765059855323, + "grad_norm": 67.60214233398438, + "learning_rate": 4.64769057196513e-06, + "loss": 0.351, + "num_input_tokens_seen": 62424960, + "step": 19835 + }, + { + "epoch": 1.2700851417963, + "grad_norm": 11.70783519744873, + "learning_rate": 4.647404573444727e-06, + "loss": 0.4485, + "num_input_tokens_seen": 62439936, + "step": 19840 + }, + { + "epoch": 1.2704052237372767, + "grad_norm": 21.553773880004883, + "learning_rate": 4.647118467694126e-06, + "loss": 0.4064, + "num_input_tokens_seen": 62455360, + "step": 19845 + }, + { + "epoch": 1.2707253056782537, + "grad_norm": 34.44068145751953, + "learning_rate": 4.646832254727614e-06, + "loss": 0.4465, + "num_input_tokens_seen": 62472128, + "step": 19850 + }, + { + "epoch": 1.2710453876192305, + "grad_norm": 43.742950439453125, + "learning_rate": 4.646545934559479e-06, + "loss": 0.4335, + "num_input_tokens_seen": 62487936, + "step": 19855 + }, + { + "epoch": 1.2713654695602075, + "grad_norm": 22.047273635864258, + "learning_rate": 4.646259507204023e-06, + "loss": 0.3685, + "num_input_tokens_seen": 62502848, + "step": 19860 + }, + { + "epoch": 1.2716855515011842, + "grad_norm": 85.31450653076172, + "learning_rate": 4.645972972675545e-06, + "loss": 0.5468, + "num_input_tokens_seen": 62517888, + "step": 19865 + }, + { + "epoch": 1.2720056334421612, + "grad_norm": 9.466475486755371, + "learning_rate": 4.645686330988356e-06, + "loss": 0.319, + "num_input_tokens_seen": 62533312, + "step": 19870 + }, + { + "epoch": 1.272325715383138, + "grad_norm": 27.357406616210938, + "learning_rate": 4.645399582156768e-06, + "loss": 0.328, + "num_input_tokens_seen": 62548160, + "step": 19875 + }, + { + "epoch": 1.272645797324115, + "grad_norm": 24.68719482421875, + "learning_rate": 4.645112726195099e-06, + "loss": 0.2244, + "num_input_tokens_seen": 62564480, + "step": 19880 + }, + { + "epoch": 1.272965879265092, + "grad_norm": 25.575443267822266, + "learning_rate": 4.644825763117675e-06, + "loss": 0.2637, + "num_input_tokens_seen": 62580288, + "step": 19885 + }, + { + "epoch": 1.2732859612060687, + "grad_norm": 21.686996459960938, + "learning_rate": 4.644538692938824e-06, + "loss": 0.3284, + "num_input_tokens_seen": 62595392, + "step": 19890 + }, + { + "epoch": 1.2736060431470455, + "grad_norm": 42.343692779541016, + "learning_rate": 4.6442515156728806e-06, + "loss": 0.3854, + "num_input_tokens_seen": 62609792, + "step": 19895 + }, + { + "epoch": 1.2739261250880225, + "grad_norm": 14.073848724365234, + "learning_rate": 4.643964231334186e-06, + "loss": 0.2897, + "num_input_tokens_seen": 62624576, + "step": 19900 + }, + { + "epoch": 1.2742462070289995, + "grad_norm": 33.02923583984375, + "learning_rate": 4.643676839937086e-06, + "loss": 0.2879, + "num_input_tokens_seen": 62641664, + "step": 19905 + }, + { + "epoch": 1.2745662889699763, + "grad_norm": 26.535064697265625, + "learning_rate": 4.643389341495929e-06, + "loss": 0.3125, + "num_input_tokens_seen": 62656448, + "step": 19910 + }, + { + "epoch": 1.2748863709109532, + "grad_norm": 32.02016830444336, + "learning_rate": 4.643101736025074e-06, + "loss": 0.3907, + "num_input_tokens_seen": 62671936, + "step": 19915 + }, + { + "epoch": 1.27520645285193, + "grad_norm": 26.56601333618164, + "learning_rate": 4.642814023538881e-06, + "loss": 0.2957, + "num_input_tokens_seen": 62687168, + "step": 19920 + }, + { + "epoch": 1.275526534792907, + "grad_norm": 15.151817321777344, + "learning_rate": 4.642526204051717e-06, + "loss": 0.2726, + "num_input_tokens_seen": 62702400, + "step": 19925 + }, + { + "epoch": 1.275846616733884, + "grad_norm": 27.25908088684082, + "learning_rate": 4.642238277577954e-06, + "loss": 0.3796, + "num_input_tokens_seen": 62717824, + "step": 19930 + }, + { + "epoch": 1.2761666986748608, + "grad_norm": 17.396102905273438, + "learning_rate": 4.64195024413197e-06, + "loss": 0.309, + "num_input_tokens_seen": 62735040, + "step": 19935 + }, + { + "epoch": 1.2764867806158375, + "grad_norm": 44.2664909362793, + "learning_rate": 4.641662103728149e-06, + "loss": 0.3371, + "num_input_tokens_seen": 62750144, + "step": 19940 + }, + { + "epoch": 1.2768068625568145, + "grad_norm": 15.426281929016113, + "learning_rate": 4.641373856380878e-06, + "loss": 0.3251, + "num_input_tokens_seen": 62765824, + "step": 19945 + }, + { + "epoch": 1.2771269444977915, + "grad_norm": 28.618907928466797, + "learning_rate": 4.641085502104549e-06, + "loss": 0.3829, + "num_input_tokens_seen": 62782464, + "step": 19950 + }, + { + "epoch": 1.2774470264387683, + "grad_norm": 45.78504180908203, + "learning_rate": 4.640797040913563e-06, + "loss": 0.3555, + "num_input_tokens_seen": 62799040, + "step": 19955 + }, + { + "epoch": 1.2777671083797453, + "grad_norm": 15.075507164001465, + "learning_rate": 4.640508472822324e-06, + "loss": 0.3871, + "num_input_tokens_seen": 62813952, + "step": 19960 + }, + { + "epoch": 1.278087190320722, + "grad_norm": 20.63251304626465, + "learning_rate": 4.640219797845242e-06, + "loss": 0.2359, + "num_input_tokens_seen": 62829056, + "step": 19965 + }, + { + "epoch": 1.278407272261699, + "grad_norm": 27.320077896118164, + "learning_rate": 4.6399310159967306e-06, + "loss": 0.315, + "num_input_tokens_seen": 62844288, + "step": 19970 + }, + { + "epoch": 1.2787273542026758, + "grad_norm": 15.874052047729492, + "learning_rate": 4.639642127291211e-06, + "loss": 0.3672, + "num_input_tokens_seen": 62859584, + "step": 19975 + }, + { + "epoch": 1.2790474361436528, + "grad_norm": 31.600866317749023, + "learning_rate": 4.639353131743108e-06, + "loss": 0.4307, + "num_input_tokens_seen": 62874432, + "step": 19980 + }, + { + "epoch": 1.2793675180846296, + "grad_norm": 24.037071228027344, + "learning_rate": 4.639064029366854e-06, + "loss": 0.2447, + "num_input_tokens_seen": 62890112, + "step": 19985 + }, + { + "epoch": 1.2796876000256066, + "grad_norm": 66.49239349365234, + "learning_rate": 4.638774820176884e-06, + "loss": 0.2688, + "num_input_tokens_seen": 62906432, + "step": 19990 + }, + { + "epoch": 1.2800076819665835, + "grad_norm": 48.30184555053711, + "learning_rate": 4.63848550418764e-06, + "loss": 0.3082, + "num_input_tokens_seen": 62924288, + "step": 19995 + }, + { + "epoch": 1.2803277639075603, + "grad_norm": 28.229482650756836, + "learning_rate": 4.638196081413569e-06, + "loss": 0.3916, + "num_input_tokens_seen": 62939712, + "step": 20000 + }, + { + "epoch": 1.2806478458485373, + "grad_norm": 14.611214637756348, + "learning_rate": 4.637906551869123e-06, + "loss": 0.3147, + "num_input_tokens_seen": 62954816, + "step": 20005 + }, + { + "epoch": 1.280967927789514, + "grad_norm": 28.183094024658203, + "learning_rate": 4.63761691556876e-06, + "loss": 0.4312, + "num_input_tokens_seen": 62971008, + "step": 20010 + }, + { + "epoch": 1.281288009730491, + "grad_norm": 14.22891616821289, + "learning_rate": 4.637327172526942e-06, + "loss": 0.3879, + "num_input_tokens_seen": 62986240, + "step": 20015 + }, + { + "epoch": 1.2816080916714678, + "grad_norm": 31.083349227905273, + "learning_rate": 4.637037322758139e-06, + "loss": 0.4595, + "num_input_tokens_seen": 63000768, + "step": 20020 + }, + { + "epoch": 1.2819281736124448, + "grad_norm": 42.897823333740234, + "learning_rate": 4.636747366276822e-06, + "loss": 0.287, + "num_input_tokens_seen": 63017344, + "step": 20025 + }, + { + "epoch": 1.2822482555534216, + "grad_norm": 36.379756927490234, + "learning_rate": 4.6364573030974725e-06, + "loss": 0.3764, + "num_input_tokens_seen": 63031872, + "step": 20030 + }, + { + "epoch": 1.2825683374943986, + "grad_norm": 15.947949409484863, + "learning_rate": 4.636167133234574e-06, + "loss": 0.3154, + "num_input_tokens_seen": 63047936, + "step": 20035 + }, + { + "epoch": 1.2828884194353756, + "grad_norm": 57.88821792602539, + "learning_rate": 4.6358768567026145e-06, + "loss": 0.5253, + "num_input_tokens_seen": 63062144, + "step": 20040 + }, + { + "epoch": 1.2832085013763523, + "grad_norm": 10.90529727935791, + "learning_rate": 4.635586473516092e-06, + "loss": 0.3548, + "num_input_tokens_seen": 63077376, + "step": 20045 + }, + { + "epoch": 1.283528583317329, + "grad_norm": 16.653974533081055, + "learning_rate": 4.6352959836895024e-06, + "loss": 0.3181, + "num_input_tokens_seen": 63092928, + "step": 20050 + }, + { + "epoch": 1.283848665258306, + "grad_norm": 28.51357650756836, + "learning_rate": 4.635005387237355e-06, + "loss": 0.4008, + "num_input_tokens_seen": 63108224, + "step": 20055 + }, + { + "epoch": 1.284168747199283, + "grad_norm": 27.81861114501953, + "learning_rate": 4.63471468417416e-06, + "loss": 0.4035, + "num_input_tokens_seen": 63123584, + "step": 20060 + }, + { + "epoch": 1.2844888291402599, + "grad_norm": 36.65446853637695, + "learning_rate": 4.634423874514433e-06, + "loss": 0.3448, + "num_input_tokens_seen": 63138432, + "step": 20065 + }, + { + "epoch": 1.2848089110812368, + "grad_norm": 43.14455032348633, + "learning_rate": 4.634132958272694e-06, + "loss": 0.3615, + "num_input_tokens_seen": 63153600, + "step": 20070 + }, + { + "epoch": 1.2851289930222136, + "grad_norm": 49.184024810791016, + "learning_rate": 4.633841935463473e-06, + "loss": 0.3817, + "num_input_tokens_seen": 63167936, + "step": 20075 + }, + { + "epoch": 1.2854490749631906, + "grad_norm": 17.90545654296875, + "learning_rate": 4.6335508061013005e-06, + "loss": 0.2778, + "num_input_tokens_seen": 63183680, + "step": 20080 + }, + { + "epoch": 1.2857691569041676, + "grad_norm": 82.61245727539062, + "learning_rate": 4.633259570200713e-06, + "loss": 0.39, + "num_input_tokens_seen": 63199552, + "step": 20085 + }, + { + "epoch": 1.2860892388451444, + "grad_norm": 39.22623062133789, + "learning_rate": 4.6329682277762544e-06, + "loss": 0.397, + "num_input_tokens_seen": 63214848, + "step": 20090 + }, + { + "epoch": 1.2864093207861211, + "grad_norm": 9.228119850158691, + "learning_rate": 4.632676778842473e-06, + "loss": 0.3408, + "num_input_tokens_seen": 63230272, + "step": 20095 + }, + { + "epoch": 1.2867294027270981, + "grad_norm": 21.30014991760254, + "learning_rate": 4.632385223413922e-06, + "loss": 0.2205, + "num_input_tokens_seen": 63247424, + "step": 20100 + }, + { + "epoch": 1.2870494846680751, + "grad_norm": 26.81897735595703, + "learning_rate": 4.63209356150516e-06, + "loss": 0.3853, + "num_input_tokens_seen": 63263360, + "step": 20105 + }, + { + "epoch": 1.2873695666090519, + "grad_norm": 16.13037109375, + "learning_rate": 4.631801793130751e-06, + "loss": 0.2982, + "num_input_tokens_seen": 63278784, + "step": 20110 + }, + { + "epoch": 1.2876896485500289, + "grad_norm": 21.939165115356445, + "learning_rate": 4.631509918305266e-06, + "loss": 0.2606, + "num_input_tokens_seen": 63294784, + "step": 20115 + }, + { + "epoch": 1.2880097304910056, + "grad_norm": 14.802593231201172, + "learning_rate": 4.6312179370432766e-06, + "loss": 0.3204, + "num_input_tokens_seen": 63309760, + "step": 20120 + }, + { + "epoch": 1.2883298124319826, + "grad_norm": 68.96308898925781, + "learning_rate": 4.630925849359366e-06, + "loss": 0.4828, + "num_input_tokens_seen": 63325312, + "step": 20125 + }, + { + "epoch": 1.2886498943729594, + "grad_norm": 13.89660358428955, + "learning_rate": 4.630633655268116e-06, + "loss": 0.2336, + "num_input_tokens_seen": 63340416, + "step": 20130 + }, + { + "epoch": 1.2889699763139364, + "grad_norm": 83.94876861572266, + "learning_rate": 4.6303413547841215e-06, + "loss": 0.4746, + "num_input_tokens_seen": 63356352, + "step": 20135 + }, + { + "epoch": 1.2892900582549132, + "grad_norm": 14.173920631408691, + "learning_rate": 4.6300489479219745e-06, + "loss": 0.3209, + "num_input_tokens_seen": 63371904, + "step": 20140 + }, + { + "epoch": 1.2896101401958902, + "grad_norm": 81.1184310913086, + "learning_rate": 4.62975643469628e-06, + "loss": 0.2791, + "num_input_tokens_seen": 63387712, + "step": 20145 + }, + { + "epoch": 1.2899302221368671, + "grad_norm": 16.402090072631836, + "learning_rate": 4.629463815121641e-06, + "loss": 0.2382, + "num_input_tokens_seen": 63403392, + "step": 20150 + }, + { + "epoch": 1.290250304077844, + "grad_norm": 58.21274948120117, + "learning_rate": 4.6291710892126715e-06, + "loss": 0.4451, + "num_input_tokens_seen": 63418496, + "step": 20155 + }, + { + "epoch": 1.2905703860188207, + "grad_norm": 39.637786865234375, + "learning_rate": 4.628878256983989e-06, + "loss": 0.4002, + "num_input_tokens_seen": 63434432, + "step": 20160 + }, + { + "epoch": 1.2908904679597977, + "grad_norm": 9.19649600982666, + "learning_rate": 4.628585318450214e-06, + "loss": 0.3181, + "num_input_tokens_seen": 63450752, + "step": 20165 + }, + { + "epoch": 1.2912105499007747, + "grad_norm": 17.429899215698242, + "learning_rate": 4.6282922736259764e-06, + "loss": 0.4293, + "num_input_tokens_seen": 63466176, + "step": 20170 + }, + { + "epoch": 1.2915306318417514, + "grad_norm": 26.097366333007812, + "learning_rate": 4.627999122525908e-06, + "loss": 0.3507, + "num_input_tokens_seen": 63481280, + "step": 20175 + }, + { + "epoch": 1.2918507137827284, + "grad_norm": 26.40875816345215, + "learning_rate": 4.627705865164648e-06, + "loss": 0.4395, + "num_input_tokens_seen": 63496640, + "step": 20180 + }, + { + "epoch": 1.2921707957237052, + "grad_norm": 13.566640853881836, + "learning_rate": 4.627412501556841e-06, + "loss": 0.2385, + "num_input_tokens_seen": 63513280, + "step": 20185 + }, + { + "epoch": 1.2924908776646822, + "grad_norm": 11.376802444458008, + "learning_rate": 4.627119031717133e-06, + "loss": 0.2578, + "num_input_tokens_seen": 63529472, + "step": 20190 + }, + { + "epoch": 1.2928109596056592, + "grad_norm": 54.76176834106445, + "learning_rate": 4.626825455660182e-06, + "loss": 0.2907, + "num_input_tokens_seen": 63545088, + "step": 20195 + }, + { + "epoch": 1.293131041546636, + "grad_norm": 22.424129486083984, + "learning_rate": 4.6265317734006445e-06, + "loss": 0.4634, + "num_input_tokens_seen": 63560064, + "step": 20200 + }, + { + "epoch": 1.2934511234876127, + "grad_norm": 42.5429573059082, + "learning_rate": 4.626237984953188e-06, + "loss": 0.463, + "num_input_tokens_seen": 63576064, + "step": 20205 + }, + { + "epoch": 1.2937712054285897, + "grad_norm": 19.08965301513672, + "learning_rate": 4.625944090332481e-06, + "loss": 0.4143, + "num_input_tokens_seen": 63590976, + "step": 20210 + }, + { + "epoch": 1.2940912873695667, + "grad_norm": 11.460601806640625, + "learning_rate": 4.625650089553201e-06, + "loss": 0.3367, + "num_input_tokens_seen": 63607552, + "step": 20215 + }, + { + "epoch": 1.2944113693105435, + "grad_norm": 16.990629196166992, + "learning_rate": 4.625355982630027e-06, + "loss": 0.3755, + "num_input_tokens_seen": 63624896, + "step": 20220 + }, + { + "epoch": 1.2947314512515204, + "grad_norm": 8.15034294128418, + "learning_rate": 4.625061769577645e-06, + "loss": 0.2755, + "num_input_tokens_seen": 63640576, + "step": 20225 + }, + { + "epoch": 1.2950515331924972, + "grad_norm": 38.72111129760742, + "learning_rate": 4.624767450410748e-06, + "loss": 0.3411, + "num_input_tokens_seen": 63655552, + "step": 20230 + }, + { + "epoch": 1.2953716151334742, + "grad_norm": 15.985675811767578, + "learning_rate": 4.624473025144031e-06, + "loss": 0.4274, + "num_input_tokens_seen": 63671296, + "step": 20235 + }, + { + "epoch": 1.295691697074451, + "grad_norm": 12.228888511657715, + "learning_rate": 4.624178493792199e-06, + "loss": 0.2648, + "num_input_tokens_seen": 63685632, + "step": 20240 + }, + { + "epoch": 1.296011779015428, + "grad_norm": 31.010648727416992, + "learning_rate": 4.623883856369956e-06, + "loss": 0.3499, + "num_input_tokens_seen": 63701184, + "step": 20245 + }, + { + "epoch": 1.2963318609564047, + "grad_norm": 29.322511672973633, + "learning_rate": 4.623589112892017e-06, + "loss": 0.334, + "num_input_tokens_seen": 63716992, + "step": 20250 + }, + { + "epoch": 1.2966519428973817, + "grad_norm": 16.258861541748047, + "learning_rate": 4.623294263373098e-06, + "loss": 0.3291, + "num_input_tokens_seen": 63733184, + "step": 20255 + }, + { + "epoch": 1.2969720248383587, + "grad_norm": 23.598955154418945, + "learning_rate": 4.6229993078279245e-06, + "loss": 0.3076, + "num_input_tokens_seen": 63747904, + "step": 20260 + }, + { + "epoch": 1.2972921067793355, + "grad_norm": 23.401613235473633, + "learning_rate": 4.6227042462712235e-06, + "loss": 0.2991, + "num_input_tokens_seen": 63763904, + "step": 20265 + }, + { + "epoch": 1.2976121887203125, + "grad_norm": 17.207134246826172, + "learning_rate": 4.622409078717729e-06, + "loss": 0.272, + "num_input_tokens_seen": 63779648, + "step": 20270 + }, + { + "epoch": 1.2979322706612892, + "grad_norm": 60.13539123535156, + "learning_rate": 4.62211380518218e-06, + "loss": 0.4574, + "num_input_tokens_seen": 63795008, + "step": 20275 + }, + { + "epoch": 1.2982523526022662, + "grad_norm": 26.674650192260742, + "learning_rate": 4.621818425679322e-06, + "loss": 0.2077, + "num_input_tokens_seen": 63810112, + "step": 20280 + }, + { + "epoch": 1.298572434543243, + "grad_norm": 36.215415954589844, + "learning_rate": 4.621522940223903e-06, + "loss": 0.3095, + "num_input_tokens_seen": 63827264, + "step": 20285 + }, + { + "epoch": 1.29889251648422, + "grad_norm": 77.99750518798828, + "learning_rate": 4.6212273488306806e-06, + "loss": 0.5271, + "num_input_tokens_seen": 63843520, + "step": 20290 + }, + { + "epoch": 1.2992125984251968, + "grad_norm": 35.3199462890625, + "learning_rate": 4.620931651514412e-06, + "loss": 0.6525, + "num_input_tokens_seen": 63859264, + "step": 20295 + }, + { + "epoch": 1.2995326803661738, + "grad_norm": 14.762571334838867, + "learning_rate": 4.620635848289865e-06, + "loss": 0.4714, + "num_input_tokens_seen": 63874816, + "step": 20300 + }, + { + "epoch": 1.2998527623071507, + "grad_norm": 12.285124778747559, + "learning_rate": 4.620339939171809e-06, + "loss": 0.2975, + "num_input_tokens_seen": 63889728, + "step": 20305 + }, + { + "epoch": 1.3001728442481275, + "grad_norm": 20.751035690307617, + "learning_rate": 4.620043924175022e-06, + "loss": 0.3383, + "num_input_tokens_seen": 63905472, + "step": 20310 + }, + { + "epoch": 1.3004929261891043, + "grad_norm": 43.181697845458984, + "learning_rate": 4.619747803314284e-06, + "loss": 0.2967, + "num_input_tokens_seen": 63920320, + "step": 20315 + }, + { + "epoch": 1.3008130081300813, + "grad_norm": 24.89830780029297, + "learning_rate": 4.619451576604381e-06, + "loss": 0.293, + "num_input_tokens_seen": 63935488, + "step": 20320 + }, + { + "epoch": 1.3011330900710583, + "grad_norm": 30.811887741088867, + "learning_rate": 4.619155244060107e-06, + "loss": 0.2501, + "num_input_tokens_seen": 63951616, + "step": 20325 + }, + { + "epoch": 1.301453172012035, + "grad_norm": 19.581472396850586, + "learning_rate": 4.618858805696258e-06, + "loss": 0.4311, + "num_input_tokens_seen": 63965952, + "step": 20330 + }, + { + "epoch": 1.301773253953012, + "grad_norm": 28.384902954101562, + "learning_rate": 4.6185622615276375e-06, + "loss": 0.3449, + "num_input_tokens_seen": 63981312, + "step": 20335 + }, + { + "epoch": 1.3020933358939888, + "grad_norm": 49.75076675415039, + "learning_rate": 4.618265611569052e-06, + "loss": 0.4485, + "num_input_tokens_seen": 63995584, + "step": 20340 + }, + { + "epoch": 1.3024134178349658, + "grad_norm": 52.955692291259766, + "learning_rate": 4.617968855835317e-06, + "loss": 0.3986, + "num_input_tokens_seen": 64012608, + "step": 20345 + }, + { + "epoch": 1.3027334997759428, + "grad_norm": 23.118093490600586, + "learning_rate": 4.617671994341248e-06, + "loss": 0.2306, + "num_input_tokens_seen": 64028992, + "step": 20350 + }, + { + "epoch": 1.3030535817169195, + "grad_norm": 9.825486183166504, + "learning_rate": 4.61737502710167e-06, + "loss": 0.3203, + "num_input_tokens_seen": 64045056, + "step": 20355 + }, + { + "epoch": 1.3033736636578963, + "grad_norm": 12.913497924804688, + "learning_rate": 4.6170779541314134e-06, + "loss": 0.4393, + "num_input_tokens_seen": 64059840, + "step": 20360 + }, + { + "epoch": 1.3036937455988733, + "grad_norm": 8.421270370483398, + "learning_rate": 4.616780775445311e-06, + "loss": 0.3302, + "num_input_tokens_seen": 64076224, + "step": 20365 + }, + { + "epoch": 1.3040138275398503, + "grad_norm": 56.48768615722656, + "learning_rate": 4.6164834910582014e-06, + "loss": 0.3779, + "num_input_tokens_seen": 64091328, + "step": 20370 + }, + { + "epoch": 1.304333909480827, + "grad_norm": 18.82474708557129, + "learning_rate": 4.6161861009849315e-06, + "loss": 0.4574, + "num_input_tokens_seen": 64105472, + "step": 20375 + }, + { + "epoch": 1.304653991421804, + "grad_norm": 19.7633056640625, + "learning_rate": 4.615888605240351e-06, + "loss": 0.3478, + "num_input_tokens_seen": 64120256, + "step": 20380 + }, + { + "epoch": 1.3049740733627808, + "grad_norm": 22.578678131103516, + "learning_rate": 4.615591003839314e-06, + "loss": 0.3416, + "num_input_tokens_seen": 64135680, + "step": 20385 + }, + { + "epoch": 1.3052941553037578, + "grad_norm": 39.34196090698242, + "learning_rate": 4.615293296796683e-06, + "loss": 0.3507, + "num_input_tokens_seen": 64150592, + "step": 20390 + }, + { + "epoch": 1.3056142372447346, + "grad_norm": 18.163753509521484, + "learning_rate": 4.614995484127322e-06, + "loss": 0.3035, + "num_input_tokens_seen": 64166080, + "step": 20395 + }, + { + "epoch": 1.3059343191857116, + "grad_norm": 20.400644302368164, + "learning_rate": 4.6146975658461025e-06, + "loss": 0.3247, + "num_input_tokens_seen": 64181632, + "step": 20400 + }, + { + "epoch": 1.3062544011266883, + "grad_norm": 32.202781677246094, + "learning_rate": 4.614399541967902e-06, + "loss": 0.2535, + "num_input_tokens_seen": 64197760, + "step": 20405 + }, + { + "epoch": 1.3065744830676653, + "grad_norm": 9.473235130310059, + "learning_rate": 4.614101412507602e-06, + "loss": 0.3006, + "num_input_tokens_seen": 64212416, + "step": 20410 + }, + { + "epoch": 1.3068945650086423, + "grad_norm": 20.453170776367188, + "learning_rate": 4.61380317748009e-06, + "loss": 0.2959, + "num_input_tokens_seen": 64228032, + "step": 20415 + }, + { + "epoch": 1.307214646949619, + "grad_norm": 11.073919296264648, + "learning_rate": 4.613504836900258e-06, + "loss": 0.2968, + "num_input_tokens_seen": 64243008, + "step": 20420 + }, + { + "epoch": 1.3075347288905959, + "grad_norm": 19.764598846435547, + "learning_rate": 4.613206390783003e-06, + "loss": 0.4151, + "num_input_tokens_seen": 64257920, + "step": 20425 + }, + { + "epoch": 1.3078548108315728, + "grad_norm": 30.293262481689453, + "learning_rate": 4.612907839143227e-06, + "loss": 0.3256, + "num_input_tokens_seen": 64273664, + "step": 20430 + }, + { + "epoch": 1.3081748927725498, + "grad_norm": 10.972074508666992, + "learning_rate": 4.61260918199584e-06, + "loss": 0.2772, + "num_input_tokens_seen": 64288704, + "step": 20435 + }, + { + "epoch": 1.3084949747135266, + "grad_norm": 46.75788116455078, + "learning_rate": 4.612310419355754e-06, + "loss": 0.3003, + "num_input_tokens_seen": 64304896, + "step": 20440 + }, + { + "epoch": 1.3088150566545036, + "grad_norm": 28.557538986206055, + "learning_rate": 4.61201155123789e-06, + "loss": 0.2842, + "num_input_tokens_seen": 64321664, + "step": 20445 + }, + { + "epoch": 1.3091351385954804, + "grad_norm": 27.84176254272461, + "learning_rate": 4.611712577657169e-06, + "loss": 0.3828, + "num_input_tokens_seen": 64337152, + "step": 20450 + }, + { + "epoch": 1.3094552205364574, + "grad_norm": 43.67847442626953, + "learning_rate": 4.611413498628521e-06, + "loss": 0.3349, + "num_input_tokens_seen": 64352320, + "step": 20455 + }, + { + "epoch": 1.3097753024774343, + "grad_norm": 27.865806579589844, + "learning_rate": 4.611114314166881e-06, + "loss": 0.2901, + "num_input_tokens_seen": 64368256, + "step": 20460 + }, + { + "epoch": 1.3100953844184111, + "grad_norm": 19.1796932220459, + "learning_rate": 4.610815024287189e-06, + "loss": 0.297, + "num_input_tokens_seen": 64382848, + "step": 20465 + }, + { + "epoch": 1.3104154663593879, + "grad_norm": 13.69913387298584, + "learning_rate": 4.610515629004391e-06, + "loss": 0.3665, + "num_input_tokens_seen": 64398336, + "step": 20470 + }, + { + "epoch": 1.3107355483003649, + "grad_norm": 9.677964210510254, + "learning_rate": 4.610216128333433e-06, + "loss": 0.3125, + "num_input_tokens_seen": 64413632, + "step": 20475 + }, + { + "epoch": 1.3110556302413419, + "grad_norm": 23.32699203491211, + "learning_rate": 4.6099165222892746e-06, + "loss": 0.3254, + "num_input_tokens_seen": 64428480, + "step": 20480 + }, + { + "epoch": 1.3113757121823186, + "grad_norm": 18.01469612121582, + "learning_rate": 4.609616810886875e-06, + "loss": 0.382, + "num_input_tokens_seen": 64443904, + "step": 20485 + }, + { + "epoch": 1.3116957941232956, + "grad_norm": 24.625574111938477, + "learning_rate": 4.609316994141201e-06, + "loss": 0.3723, + "num_input_tokens_seen": 64461056, + "step": 20490 + }, + { + "epoch": 1.3120158760642724, + "grad_norm": 23.407428741455078, + "learning_rate": 4.6090170720672234e-06, + "loss": 0.3374, + "num_input_tokens_seen": 64476416, + "step": 20495 + }, + { + "epoch": 1.3123359580052494, + "grad_norm": 31.30431365966797, + "learning_rate": 4.608717044679918e-06, + "loss": 0.2786, + "num_input_tokens_seen": 64492544, + "step": 20500 + }, + { + "epoch": 1.3126560399462264, + "grad_norm": 31.056259155273438, + "learning_rate": 4.6084169119942674e-06, + "loss": 0.3677, + "num_input_tokens_seen": 64507456, + "step": 20505 + }, + { + "epoch": 1.3129761218872031, + "grad_norm": 39.42869567871094, + "learning_rate": 4.608116674025259e-06, + "loss": 0.3362, + "num_input_tokens_seen": 64523392, + "step": 20510 + }, + { + "epoch": 1.31329620382818, + "grad_norm": 24.08234977722168, + "learning_rate": 4.6078163307878845e-06, + "loss": 0.2361, + "num_input_tokens_seen": 64539968, + "step": 20515 + }, + { + "epoch": 1.313616285769157, + "grad_norm": 14.783381462097168, + "learning_rate": 4.607515882297141e-06, + "loss": 0.2773, + "num_input_tokens_seen": 64555200, + "step": 20520 + }, + { + "epoch": 1.313936367710134, + "grad_norm": 20.633743286132812, + "learning_rate": 4.607215328568032e-06, + "loss": 0.4187, + "num_input_tokens_seen": 64570944, + "step": 20525 + }, + { + "epoch": 1.3142564496511107, + "grad_norm": 20.731861114501953, + "learning_rate": 4.6069146696155664e-06, + "loss": 0.3009, + "num_input_tokens_seen": 64585984, + "step": 20530 + }, + { + "epoch": 1.3145765315920876, + "grad_norm": 21.85526466369629, + "learning_rate": 4.606613905454757e-06, + "loss": 0.4433, + "num_input_tokens_seen": 64601216, + "step": 20535 + }, + { + "epoch": 1.3148966135330644, + "grad_norm": 46.98306655883789, + "learning_rate": 4.606313036100621e-06, + "loss": 0.4154, + "num_input_tokens_seen": 64617344, + "step": 20540 + }, + { + "epoch": 1.3152166954740414, + "grad_norm": 26.9970760345459, + "learning_rate": 4.606012061568184e-06, + "loss": 0.3504, + "num_input_tokens_seen": 64632576, + "step": 20545 + }, + { + "epoch": 1.3155367774150182, + "grad_norm": 7.700857639312744, + "learning_rate": 4.605710981872474e-06, + "loss": 0.3389, + "num_input_tokens_seen": 64647808, + "step": 20550 + }, + { + "epoch": 1.3158568593559952, + "grad_norm": 15.133967399597168, + "learning_rate": 4.605409797028526e-06, + "loss": 0.4269, + "num_input_tokens_seen": 64664960, + "step": 20555 + }, + { + "epoch": 1.316176941296972, + "grad_norm": 18.136316299438477, + "learning_rate": 4.60510850705138e-06, + "loss": 0.3313, + "num_input_tokens_seen": 64681152, + "step": 20560 + }, + { + "epoch": 1.316497023237949, + "grad_norm": 14.406203269958496, + "learning_rate": 4.60480711195608e-06, + "loss": 0.2658, + "num_input_tokens_seen": 64696576, + "step": 20565 + }, + { + "epoch": 1.316817105178926, + "grad_norm": 10.819345474243164, + "learning_rate": 4.604505611757676e-06, + "loss": 0.2836, + "num_input_tokens_seen": 64711552, + "step": 20570 + }, + { + "epoch": 1.3171371871199027, + "grad_norm": 24.241270065307617, + "learning_rate": 4.604204006471223e-06, + "loss": 0.527, + "num_input_tokens_seen": 64726720, + "step": 20575 + }, + { + "epoch": 1.3174572690608795, + "grad_norm": 14.869595527648926, + "learning_rate": 4.603902296111784e-06, + "loss": 0.3877, + "num_input_tokens_seen": 64741760, + "step": 20580 + }, + { + "epoch": 1.3177773510018564, + "grad_norm": 35.174251556396484, + "learning_rate": 4.6036004806944225e-06, + "loss": 0.366, + "num_input_tokens_seen": 64757696, + "step": 20585 + }, + { + "epoch": 1.3180974329428334, + "grad_norm": 49.6598014831543, + "learning_rate": 4.603298560234209e-06, + "loss": 0.4845, + "num_input_tokens_seen": 64773056, + "step": 20590 + }, + { + "epoch": 1.3184175148838102, + "grad_norm": 8.953691482543945, + "learning_rate": 4.602996534746223e-06, + "loss": 0.2912, + "num_input_tokens_seen": 64787904, + "step": 20595 + }, + { + "epoch": 1.3187375968247872, + "grad_norm": 15.064729690551758, + "learning_rate": 4.6026944042455434e-06, + "loss": 0.3269, + "num_input_tokens_seen": 64803712, + "step": 20600 + }, + { + "epoch": 1.319057678765764, + "grad_norm": 9.096686363220215, + "learning_rate": 4.602392168747258e-06, + "loss": 0.2514, + "num_input_tokens_seen": 64819264, + "step": 20605 + }, + { + "epoch": 1.319377760706741, + "grad_norm": 15.889388084411621, + "learning_rate": 4.602089828266458e-06, + "loss": 0.4147, + "num_input_tokens_seen": 64835136, + "step": 20610 + }, + { + "epoch": 1.319697842647718, + "grad_norm": 20.57215690612793, + "learning_rate": 4.601787382818242e-06, + "loss": 0.2751, + "num_input_tokens_seen": 64850560, + "step": 20615 + }, + { + "epoch": 1.3200179245886947, + "grad_norm": 33.1170539855957, + "learning_rate": 4.601484832417712e-06, + "loss": 0.2697, + "num_input_tokens_seen": 64865920, + "step": 20620 + }, + { + "epoch": 1.3203380065296715, + "grad_norm": 8.83367919921875, + "learning_rate": 4.6011821770799744e-06, + "loss": 0.3781, + "num_input_tokens_seen": 64883520, + "step": 20625 + }, + { + "epoch": 1.3206580884706485, + "grad_norm": 8.451212882995605, + "learning_rate": 4.600879416820146e-06, + "loss": 0.2386, + "num_input_tokens_seen": 64898816, + "step": 20630 + }, + { + "epoch": 1.3209781704116255, + "grad_norm": 16.89657974243164, + "learning_rate": 4.60057655165334e-06, + "loss": 0.3511, + "num_input_tokens_seen": 64915392, + "step": 20635 + }, + { + "epoch": 1.3212982523526022, + "grad_norm": 11.312844276428223, + "learning_rate": 4.6002735815946845e-06, + "loss": 0.3353, + "num_input_tokens_seen": 64930176, + "step": 20640 + }, + { + "epoch": 1.3216183342935792, + "grad_norm": 55.320159912109375, + "learning_rate": 4.5999705066593046e-06, + "loss": 0.3782, + "num_input_tokens_seen": 64945600, + "step": 20645 + }, + { + "epoch": 1.321938416234556, + "grad_norm": 11.158468246459961, + "learning_rate": 4.599667326862337e-06, + "loss": 0.2715, + "num_input_tokens_seen": 64960704, + "step": 20650 + }, + { + "epoch": 1.322258498175533, + "grad_norm": 56.21776580810547, + "learning_rate": 4.599364042218919e-06, + "loss": 0.3153, + "num_input_tokens_seen": 64976832, + "step": 20655 + }, + { + "epoch": 1.3225785801165097, + "grad_norm": 16.644861221313477, + "learning_rate": 4.599060652744197e-06, + "loss": 0.2554, + "num_input_tokens_seen": 64992256, + "step": 20660 + }, + { + "epoch": 1.3228986620574867, + "grad_norm": 15.701600074768066, + "learning_rate": 4.598757158453319e-06, + "loss": 0.2524, + "num_input_tokens_seen": 65008256, + "step": 20665 + }, + { + "epoch": 1.3232187439984635, + "grad_norm": 12.045296669006348, + "learning_rate": 4.598453559361441e-06, + "loss": 0.2878, + "num_input_tokens_seen": 65022720, + "step": 20670 + }, + { + "epoch": 1.3235388259394405, + "grad_norm": 7.479244709014893, + "learning_rate": 4.598149855483722e-06, + "loss": 0.1734, + "num_input_tokens_seen": 65038400, + "step": 20675 + }, + { + "epoch": 1.3238589078804175, + "grad_norm": 20.919757843017578, + "learning_rate": 4.597846046835329e-06, + "loss": 0.411, + "num_input_tokens_seen": 65054272, + "step": 20680 + }, + { + "epoch": 1.3241789898213943, + "grad_norm": 15.529032707214355, + "learning_rate": 4.597542133431431e-06, + "loss": 0.4788, + "num_input_tokens_seen": 65071488, + "step": 20685 + }, + { + "epoch": 1.3244990717623712, + "grad_norm": 19.615203857421875, + "learning_rate": 4.597238115287204e-06, + "loss": 0.3476, + "num_input_tokens_seen": 65088512, + "step": 20690 + }, + { + "epoch": 1.324819153703348, + "grad_norm": 33.618099212646484, + "learning_rate": 4.596933992417831e-06, + "loss": 0.4639, + "num_input_tokens_seen": 65104512, + "step": 20695 + }, + { + "epoch": 1.325139235644325, + "grad_norm": 11.18836784362793, + "learning_rate": 4.596629764838496e-06, + "loss": 0.2843, + "num_input_tokens_seen": 65120256, + "step": 20700 + }, + { + "epoch": 1.3254593175853018, + "grad_norm": 17.669795989990234, + "learning_rate": 4.596325432564392e-06, + "loss": 0.4256, + "num_input_tokens_seen": 65134720, + "step": 20705 + }, + { + "epoch": 1.3257793995262788, + "grad_norm": 8.678454399108887, + "learning_rate": 4.596020995610715e-06, + "loss": 0.3155, + "num_input_tokens_seen": 65153344, + "step": 20710 + }, + { + "epoch": 1.3260994814672555, + "grad_norm": 27.456127166748047, + "learning_rate": 4.595716453992668e-06, + "loss": 0.3079, + "num_input_tokens_seen": 65169280, + "step": 20715 + }, + { + "epoch": 1.3264195634082325, + "grad_norm": 71.40352630615234, + "learning_rate": 4.595411807725456e-06, + "loss": 0.4038, + "num_input_tokens_seen": 65184640, + "step": 20720 + }, + { + "epoch": 1.3267396453492095, + "grad_norm": 30.375797271728516, + "learning_rate": 4.595107056824294e-06, + "loss": 0.4733, + "num_input_tokens_seen": 65200128, + "step": 20725 + }, + { + "epoch": 1.3270597272901863, + "grad_norm": 17.422107696533203, + "learning_rate": 4.594802201304398e-06, + "loss": 0.3263, + "num_input_tokens_seen": 65216256, + "step": 20730 + }, + { + "epoch": 1.327379809231163, + "grad_norm": 25.107969284057617, + "learning_rate": 4.594497241180992e-06, + "loss": 0.4066, + "num_input_tokens_seen": 65231936, + "step": 20735 + }, + { + "epoch": 1.32769989117214, + "grad_norm": 18.608951568603516, + "learning_rate": 4.594192176469303e-06, + "loss": 0.318, + "num_input_tokens_seen": 65247872, + "step": 20740 + }, + { + "epoch": 1.328019973113117, + "grad_norm": 32.1141357421875, + "learning_rate": 4.593887007184565e-06, + "loss": 0.4635, + "num_input_tokens_seen": 65262976, + "step": 20745 + }, + { + "epoch": 1.3283400550540938, + "grad_norm": 17.290620803833008, + "learning_rate": 4.5935817333420176e-06, + "loss": 0.2729, + "num_input_tokens_seen": 65278336, + "step": 20750 + }, + { + "epoch": 1.3286601369950708, + "grad_norm": 13.826888084411621, + "learning_rate": 4.5932763549569026e-06, + "loss": 0.3281, + "num_input_tokens_seen": 65294912, + "step": 20755 + }, + { + "epoch": 1.3289802189360476, + "grad_norm": 15.912928581237793, + "learning_rate": 4.59297087204447e-06, + "loss": 0.2953, + "num_input_tokens_seen": 65312000, + "step": 20760 + }, + { + "epoch": 1.3293003008770246, + "grad_norm": 34.10730743408203, + "learning_rate": 4.592665284619975e-06, + "loss": 0.2865, + "num_input_tokens_seen": 65327360, + "step": 20765 + }, + { + "epoch": 1.3296203828180015, + "grad_norm": 14.994406700134277, + "learning_rate": 4.5923595926986745e-06, + "loss": 0.387, + "num_input_tokens_seen": 65344832, + "step": 20770 + }, + { + "epoch": 1.3299404647589783, + "grad_norm": 26.318946838378906, + "learning_rate": 4.592053796295835e-06, + "loss": 0.344, + "num_input_tokens_seen": 65362304, + "step": 20775 + }, + { + "epoch": 1.330260546699955, + "grad_norm": 23.536701202392578, + "learning_rate": 4.591747895426726e-06, + "loss": 0.3077, + "num_input_tokens_seen": 65378304, + "step": 20780 + }, + { + "epoch": 1.330580628640932, + "grad_norm": 9.558457374572754, + "learning_rate": 4.591441890106623e-06, + "loss": 0.2716, + "num_input_tokens_seen": 65393344, + "step": 20785 + }, + { + "epoch": 1.330900710581909, + "grad_norm": 28.58504867553711, + "learning_rate": 4.591135780350805e-06, + "loss": 0.315, + "num_input_tokens_seen": 65409408, + "step": 20790 + }, + { + "epoch": 1.3312207925228858, + "grad_norm": 71.49117279052734, + "learning_rate": 4.590829566174559e-06, + "loss": 0.4502, + "num_input_tokens_seen": 65424832, + "step": 20795 + }, + { + "epoch": 1.3315408744638628, + "grad_norm": 21.724933624267578, + "learning_rate": 4.590523247593176e-06, + "loss": 0.3672, + "num_input_tokens_seen": 65441216, + "step": 20800 + }, + { + "epoch": 1.3318609564048396, + "grad_norm": 35.63298797607422, + "learning_rate": 4.590216824621951e-06, + "loss": 0.4188, + "num_input_tokens_seen": 65457600, + "step": 20805 + }, + { + "epoch": 1.3321810383458166, + "grad_norm": 28.17648696899414, + "learning_rate": 4.589910297276185e-06, + "loss": 0.4023, + "num_input_tokens_seen": 65473728, + "step": 20810 + }, + { + "epoch": 1.3325011202867934, + "grad_norm": 24.369714736938477, + "learning_rate": 4.589603665571184e-06, + "loss": 0.2281, + "num_input_tokens_seen": 65489728, + "step": 20815 + }, + { + "epoch": 1.3328212022277703, + "grad_norm": 25.105648040771484, + "learning_rate": 4.5892969295222615e-06, + "loss": 0.2818, + "num_input_tokens_seen": 65506176, + "step": 20820 + }, + { + "epoch": 1.333141284168747, + "grad_norm": 31.157155990600586, + "learning_rate": 4.588990089144731e-06, + "loss": 0.3487, + "num_input_tokens_seen": 65521728, + "step": 20825 + }, + { + "epoch": 1.333461366109724, + "grad_norm": 15.469901084899902, + "learning_rate": 4.5886831444539195e-06, + "loss": 0.2838, + "num_input_tokens_seen": 65536832, + "step": 20830 + }, + { + "epoch": 1.333781448050701, + "grad_norm": 19.556093215942383, + "learning_rate": 4.588376095465149e-06, + "loss": 0.3904, + "num_input_tokens_seen": 65555776, + "step": 20835 + }, + { + "epoch": 1.3341015299916779, + "grad_norm": 18.439790725708008, + "learning_rate": 4.588068942193755e-06, + "loss": 0.3607, + "num_input_tokens_seen": 65570944, + "step": 20840 + }, + { + "epoch": 1.3344216119326546, + "grad_norm": 33.653411865234375, + "learning_rate": 4.587761684655075e-06, + "loss": 0.284, + "num_input_tokens_seen": 65586048, + "step": 20845 + }, + { + "epoch": 1.3347416938736316, + "grad_norm": 21.861356735229492, + "learning_rate": 4.587454322864451e-06, + "loss": 0.4453, + "num_input_tokens_seen": 65601408, + "step": 20850 + }, + { + "epoch": 1.3350617758146086, + "grad_norm": 26.64455223083496, + "learning_rate": 4.5871468568372305e-06, + "loss": 0.2531, + "num_input_tokens_seen": 65616256, + "step": 20855 + }, + { + "epoch": 1.3353818577555854, + "grad_norm": 22.370277404785156, + "learning_rate": 4.586839286588768e-06, + "loss": 0.2526, + "num_input_tokens_seen": 65631936, + "step": 20860 + }, + { + "epoch": 1.3357019396965624, + "grad_norm": 15.608209609985352, + "learning_rate": 4.5865316121344215e-06, + "loss": 0.3054, + "num_input_tokens_seen": 65647488, + "step": 20865 + }, + { + "epoch": 1.3360220216375391, + "grad_norm": 26.505685806274414, + "learning_rate": 4.586223833489555e-06, + "loss": 0.2884, + "num_input_tokens_seen": 65664704, + "step": 20870 + }, + { + "epoch": 1.3363421035785161, + "grad_norm": 28.008808135986328, + "learning_rate": 4.585915950669536e-06, + "loss": 0.4319, + "num_input_tokens_seen": 65680256, + "step": 20875 + }, + { + "epoch": 1.3366621855194931, + "grad_norm": 15.790971755981445, + "learning_rate": 4.585607963689741e-06, + "loss": 0.1895, + "num_input_tokens_seen": 65696000, + "step": 20880 + }, + { + "epoch": 1.3369822674604699, + "grad_norm": 11.296832084655762, + "learning_rate": 4.5852998725655465e-06, + "loss": 0.2608, + "num_input_tokens_seen": 65712640, + "step": 20885 + }, + { + "epoch": 1.3373023494014467, + "grad_norm": 31.53887176513672, + "learning_rate": 4.584991677312339e-06, + "loss": 0.4189, + "num_input_tokens_seen": 65728064, + "step": 20890 + }, + { + "epoch": 1.3376224313424236, + "grad_norm": 26.809690475463867, + "learning_rate": 4.584683377945507e-06, + "loss": 0.2957, + "num_input_tokens_seen": 65746048, + "step": 20895 + }, + { + "epoch": 1.3379425132834006, + "grad_norm": 15.59483528137207, + "learning_rate": 4.584374974480445e-06, + "loss": 0.2619, + "num_input_tokens_seen": 65761344, + "step": 20900 + }, + { + "epoch": 1.3382625952243774, + "grad_norm": 32.04833984375, + "learning_rate": 4.584066466932556e-06, + "loss": 0.2377, + "num_input_tokens_seen": 65777088, + "step": 20905 + }, + { + "epoch": 1.3385826771653544, + "grad_norm": 9.680489540100098, + "learning_rate": 4.583757855317242e-06, + "loss": 0.322, + "num_input_tokens_seen": 65793024, + "step": 20910 + }, + { + "epoch": 1.3389027591063312, + "grad_norm": 65.40319061279297, + "learning_rate": 4.5834491396499145e-06, + "loss": 0.3514, + "num_input_tokens_seen": 65807744, + "step": 20915 + }, + { + "epoch": 1.3392228410473082, + "grad_norm": 22.336626052856445, + "learning_rate": 4.58314031994599e-06, + "loss": 0.3269, + "num_input_tokens_seen": 65824000, + "step": 20920 + }, + { + "epoch": 1.339542922988285, + "grad_norm": 44.92398452758789, + "learning_rate": 4.582831396220888e-06, + "loss": 0.3618, + "num_input_tokens_seen": 65840064, + "step": 20925 + }, + { + "epoch": 1.339863004929262, + "grad_norm": 20.358598709106445, + "learning_rate": 4.582522368490035e-06, + "loss": 0.3187, + "num_input_tokens_seen": 65855552, + "step": 20930 + }, + { + "epoch": 1.3401830868702387, + "grad_norm": 11.153732299804688, + "learning_rate": 4.582213236768863e-06, + "loss": 0.2784, + "num_input_tokens_seen": 65872448, + "step": 20935 + }, + { + "epoch": 1.3405031688112157, + "grad_norm": 51.880313873291016, + "learning_rate": 4.5819040010728064e-06, + "loss": 0.3654, + "num_input_tokens_seen": 65888448, + "step": 20940 + }, + { + "epoch": 1.3408232507521927, + "grad_norm": 13.079294204711914, + "learning_rate": 4.58159466141731e-06, + "loss": 0.3229, + "num_input_tokens_seen": 65902912, + "step": 20945 + }, + { + "epoch": 1.3411433326931694, + "grad_norm": 36.79719543457031, + "learning_rate": 4.581285217817818e-06, + "loss": 0.4098, + "num_input_tokens_seen": 65918016, + "step": 20950 + }, + { + "epoch": 1.3414634146341464, + "grad_norm": 30.867626190185547, + "learning_rate": 4.580975670289783e-06, + "loss": 0.3622, + "num_input_tokens_seen": 65934656, + "step": 20955 + }, + { + "epoch": 1.3417834965751232, + "grad_norm": 8.882917404174805, + "learning_rate": 4.580666018848663e-06, + "loss": 0.3221, + "num_input_tokens_seen": 65950080, + "step": 20960 + }, + { + "epoch": 1.3421035785161002, + "grad_norm": 11.687495231628418, + "learning_rate": 4.580356263509919e-06, + "loss": 0.3835, + "num_input_tokens_seen": 65965568, + "step": 20965 + }, + { + "epoch": 1.342423660457077, + "grad_norm": 29.041046142578125, + "learning_rate": 4.58004640428902e-06, + "loss": 0.406, + "num_input_tokens_seen": 65980672, + "step": 20970 + }, + { + "epoch": 1.342743742398054, + "grad_norm": 24.370420455932617, + "learning_rate": 4.579736441201439e-06, + "loss": 0.4468, + "num_input_tokens_seen": 65996096, + "step": 20975 + }, + { + "epoch": 1.3430638243390307, + "grad_norm": 16.883878707885742, + "learning_rate": 4.579426374262652e-06, + "loss": 0.388, + "num_input_tokens_seen": 66013120, + "step": 20980 + }, + { + "epoch": 1.3433839062800077, + "grad_norm": 53.8387451171875, + "learning_rate": 4.579116203488143e-06, + "loss": 0.4042, + "num_input_tokens_seen": 66028160, + "step": 20985 + }, + { + "epoch": 1.3437039882209847, + "grad_norm": 35.17137908935547, + "learning_rate": 4.578805928893401e-06, + "loss": 0.4532, + "num_input_tokens_seen": 66043904, + "step": 20990 + }, + { + "epoch": 1.3440240701619615, + "grad_norm": 9.83181095123291, + "learning_rate": 4.578495550493919e-06, + "loss": 0.2902, + "num_input_tokens_seen": 66058624, + "step": 20995 + }, + { + "epoch": 1.3443441521029382, + "grad_norm": 24.405336380004883, + "learning_rate": 4.578185068305195e-06, + "loss": 0.3911, + "num_input_tokens_seen": 66074816, + "step": 21000 + }, + { + "epoch": 1.3446642340439152, + "grad_norm": 5.3024468421936035, + "learning_rate": 4.577874482342734e-06, + "loss": 0.2529, + "num_input_tokens_seen": 66091584, + "step": 21005 + }, + { + "epoch": 1.3449843159848922, + "grad_norm": 25.161151885986328, + "learning_rate": 4.577563792622046e-06, + "loss": 0.3473, + "num_input_tokens_seen": 66106752, + "step": 21010 + }, + { + "epoch": 1.345304397925869, + "grad_norm": 15.724794387817383, + "learning_rate": 4.577252999158642e-06, + "loss": 0.3127, + "num_input_tokens_seen": 66122880, + "step": 21015 + }, + { + "epoch": 1.345624479866846, + "grad_norm": 11.288260459899902, + "learning_rate": 4.576942101968044e-06, + "loss": 0.3402, + "num_input_tokens_seen": 66138368, + "step": 21020 + }, + { + "epoch": 1.3459445618078227, + "grad_norm": 38.135498046875, + "learning_rate": 4.576631101065776e-06, + "loss": 0.3136, + "num_input_tokens_seen": 66154112, + "step": 21025 + }, + { + "epoch": 1.3462646437487997, + "grad_norm": 20.698209762573242, + "learning_rate": 4.576319996467366e-06, + "loss": 0.2815, + "num_input_tokens_seen": 66169408, + "step": 21030 + }, + { + "epoch": 1.3465847256897767, + "grad_norm": 25.618375778198242, + "learning_rate": 4.576008788188353e-06, + "loss": 0.3331, + "num_input_tokens_seen": 66184512, + "step": 21035 + }, + { + "epoch": 1.3469048076307535, + "grad_norm": 34.97414779663086, + "learning_rate": 4.575697476244273e-06, + "loss": 0.3088, + "num_input_tokens_seen": 66200512, + "step": 21040 + }, + { + "epoch": 1.3472248895717303, + "grad_norm": 41.438961029052734, + "learning_rate": 4.575386060650673e-06, + "loss": 0.2739, + "num_input_tokens_seen": 66216512, + "step": 21045 + }, + { + "epoch": 1.3475449715127072, + "grad_norm": 20.297529220581055, + "learning_rate": 4.575074541423104e-06, + "loss": 0.2862, + "num_input_tokens_seen": 66232576, + "step": 21050 + }, + { + "epoch": 1.3478650534536842, + "grad_norm": 17.718713760375977, + "learning_rate": 4.574762918577121e-06, + "loss": 0.2462, + "num_input_tokens_seen": 66247232, + "step": 21055 + }, + { + "epoch": 1.348185135394661, + "grad_norm": 26.563922882080078, + "learning_rate": 4.574451192128286e-06, + "loss": 0.449, + "num_input_tokens_seen": 66262784, + "step": 21060 + }, + { + "epoch": 1.348505217335638, + "grad_norm": 20.45589828491211, + "learning_rate": 4.574139362092162e-06, + "loss": 0.3133, + "num_input_tokens_seen": 66278272, + "step": 21065 + }, + { + "epoch": 1.3488252992766148, + "grad_norm": 30.89104652404785, + "learning_rate": 4.573827428484322e-06, + "loss": 0.3309, + "num_input_tokens_seen": 66294528, + "step": 21070 + }, + { + "epoch": 1.3491453812175918, + "grad_norm": 13.189988136291504, + "learning_rate": 4.5735153913203436e-06, + "loss": 0.4058, + "num_input_tokens_seen": 66309760, + "step": 21075 + }, + { + "epoch": 1.3494654631585685, + "grad_norm": 22.10744857788086, + "learning_rate": 4.573203250615807e-06, + "loss": 0.3035, + "num_input_tokens_seen": 66324800, + "step": 21080 + }, + { + "epoch": 1.3497855450995455, + "grad_norm": 20.13356590270996, + "learning_rate": 4.572891006386298e-06, + "loss": 0.2789, + "num_input_tokens_seen": 66340160, + "step": 21085 + }, + { + "epoch": 1.3501056270405223, + "grad_norm": 19.696134567260742, + "learning_rate": 4.5725786586474105e-06, + "loss": 0.339, + "num_input_tokens_seen": 66355584, + "step": 21090 + }, + { + "epoch": 1.3504257089814993, + "grad_norm": 36.97060012817383, + "learning_rate": 4.57226620741474e-06, + "loss": 0.4497, + "num_input_tokens_seen": 66370624, + "step": 21095 + }, + { + "epoch": 1.3507457909224763, + "grad_norm": 21.022720336914062, + "learning_rate": 4.571953652703889e-06, + "loss": 0.3005, + "num_input_tokens_seen": 66385728, + "step": 21100 + }, + { + "epoch": 1.351065872863453, + "grad_norm": 12.025588035583496, + "learning_rate": 4.571640994530465e-06, + "loss": 0.3838, + "num_input_tokens_seen": 66401600, + "step": 21105 + }, + { + "epoch": 1.3513859548044298, + "grad_norm": 17.91087532043457, + "learning_rate": 4.571328232910081e-06, + "loss": 0.3371, + "num_input_tokens_seen": 66416896, + "step": 21110 + }, + { + "epoch": 1.3517060367454068, + "grad_norm": 40.42356872558594, + "learning_rate": 4.571015367858354e-06, + "loss": 0.4107, + "num_input_tokens_seen": 66432000, + "step": 21115 + }, + { + "epoch": 1.3520261186863838, + "grad_norm": 16.580280303955078, + "learning_rate": 4.570702399390906e-06, + "loss": 0.3327, + "num_input_tokens_seen": 66447104, + "step": 21120 + }, + { + "epoch": 1.3523462006273606, + "grad_norm": 42.303550720214844, + "learning_rate": 4.570389327523367e-06, + "loss": 0.4573, + "num_input_tokens_seen": 66462080, + "step": 21125 + }, + { + "epoch": 1.3526662825683375, + "grad_norm": 29.794218063354492, + "learning_rate": 4.5700761522713694e-06, + "loss": 0.3239, + "num_input_tokens_seen": 66478720, + "step": 21130 + }, + { + "epoch": 1.3529863645093143, + "grad_norm": 24.782896041870117, + "learning_rate": 4.569762873650551e-06, + "loss": 0.2951, + "num_input_tokens_seen": 66492992, + "step": 21135 + }, + { + "epoch": 1.3533064464502913, + "grad_norm": 27.507070541381836, + "learning_rate": 4.5694494916765564e-06, + "loss": 0.2879, + "num_input_tokens_seen": 66508608, + "step": 21140 + }, + { + "epoch": 1.3536265283912683, + "grad_norm": 10.91601848602295, + "learning_rate": 4.569136006365033e-06, + "loss": 0.3293, + "num_input_tokens_seen": 66524608, + "step": 21145 + }, + { + "epoch": 1.353946610332245, + "grad_norm": 32.416603088378906, + "learning_rate": 4.568822417731635e-06, + "loss": 0.2751, + "num_input_tokens_seen": 66539328, + "step": 21150 + }, + { + "epoch": 1.3542666922732218, + "grad_norm": 28.05717658996582, + "learning_rate": 4.568508725792022e-06, + "loss": 0.2727, + "num_input_tokens_seen": 66555776, + "step": 21155 + }, + { + "epoch": 1.3545867742141988, + "grad_norm": 50.21076965332031, + "learning_rate": 4.568194930561857e-06, + "loss": 0.3236, + "num_input_tokens_seen": 66570816, + "step": 21160 + }, + { + "epoch": 1.3549068561551758, + "grad_norm": 24.72370719909668, + "learning_rate": 4.567881032056811e-06, + "loss": 0.3128, + "num_input_tokens_seen": 66586560, + "step": 21165 + }, + { + "epoch": 1.3552269380961526, + "grad_norm": 32.95760726928711, + "learning_rate": 4.567567030292557e-06, + "loss": 0.4138, + "num_input_tokens_seen": 66603840, + "step": 21170 + }, + { + "epoch": 1.3555470200371296, + "grad_norm": 19.285249710083008, + "learning_rate": 4.567252925284775e-06, + "loss": 0.3511, + "num_input_tokens_seen": 66619072, + "step": 21175 + }, + { + "epoch": 1.3558671019781063, + "grad_norm": 11.325016021728516, + "learning_rate": 4.56693871704915e-06, + "loss": 0.3001, + "num_input_tokens_seen": 66634560, + "step": 21180 + }, + { + "epoch": 1.3561871839190833, + "grad_norm": 26.243927001953125, + "learning_rate": 4.5666244056013705e-06, + "loss": 0.3181, + "num_input_tokens_seen": 66650112, + "step": 21185 + }, + { + "epoch": 1.35650726586006, + "grad_norm": 48.58623504638672, + "learning_rate": 4.566309990957134e-06, + "loss": 0.3988, + "num_input_tokens_seen": 66664832, + "step": 21190 + }, + { + "epoch": 1.356827347801037, + "grad_norm": 18.637304306030273, + "learning_rate": 4.5659954731321405e-06, + "loss": 0.3962, + "num_input_tokens_seen": 66680000, + "step": 21195 + }, + { + "epoch": 1.3571474297420139, + "grad_norm": 66.02689361572266, + "learning_rate": 4.565680852142093e-06, + "loss": 0.3252, + "num_input_tokens_seen": 66695296, + "step": 21200 + }, + { + "epoch": 1.3574675116829908, + "grad_norm": 32.469017028808594, + "learning_rate": 4.565366128002704e-06, + "loss": 0.4093, + "num_input_tokens_seen": 66711040, + "step": 21205 + }, + { + "epoch": 1.3577875936239678, + "grad_norm": 53.09552764892578, + "learning_rate": 4.565051300729689e-06, + "loss": 0.3274, + "num_input_tokens_seen": 66726720, + "step": 21210 + }, + { + "epoch": 1.3581076755649446, + "grad_norm": 25.76593017578125, + "learning_rate": 4.564736370338768e-06, + "loss": 0.3752, + "num_input_tokens_seen": 66743360, + "step": 21215 + }, + { + "epoch": 1.3584277575059216, + "grad_norm": 13.00086498260498, + "learning_rate": 4.564421336845668e-06, + "loss": 0.2026, + "num_input_tokens_seen": 66758912, + "step": 21220 + }, + { + "epoch": 1.3587478394468984, + "grad_norm": 25.950016021728516, + "learning_rate": 4.564106200266119e-06, + "loss": 0.3119, + "num_input_tokens_seen": 66774144, + "step": 21225 + }, + { + "epoch": 1.3590679213878754, + "grad_norm": 22.492929458618164, + "learning_rate": 4.563790960615858e-06, + "loss": 0.3391, + "num_input_tokens_seen": 66790080, + "step": 21230 + }, + { + "epoch": 1.3593880033288521, + "grad_norm": 14.839498519897461, + "learning_rate": 4.563475617910627e-06, + "loss": 0.3171, + "num_input_tokens_seen": 66805376, + "step": 21235 + }, + { + "epoch": 1.3597080852698291, + "grad_norm": 12.400606155395508, + "learning_rate": 4.5631601721661715e-06, + "loss": 0.2565, + "num_input_tokens_seen": 66823040, + "step": 21240 + }, + { + "epoch": 1.3600281672108059, + "grad_norm": 36.89653778076172, + "learning_rate": 4.562844623398242e-06, + "loss": 0.3125, + "num_input_tokens_seen": 66838144, + "step": 21245 + }, + { + "epoch": 1.3603482491517829, + "grad_norm": 55.07625961303711, + "learning_rate": 4.5625289716226e-06, + "loss": 0.4704, + "num_input_tokens_seen": 66854144, + "step": 21250 + }, + { + "epoch": 1.3606683310927599, + "grad_norm": 19.583057403564453, + "learning_rate": 4.5622132168550025e-06, + "loss": 0.4002, + "num_input_tokens_seen": 66869568, + "step": 21255 + }, + { + "epoch": 1.3609884130337366, + "grad_norm": 16.703632354736328, + "learning_rate": 4.5618973591112186e-06, + "loss": 0.3586, + "num_input_tokens_seen": 66885184, + "step": 21260 + }, + { + "epoch": 1.3613084949747134, + "grad_norm": 19.205238342285156, + "learning_rate": 4.5615813984070215e-06, + "loss": 0.393, + "num_input_tokens_seen": 66900480, + "step": 21265 + }, + { + "epoch": 1.3616285769156904, + "grad_norm": 33.04966354370117, + "learning_rate": 4.561265334758187e-06, + "loss": 0.2858, + "num_input_tokens_seen": 66918144, + "step": 21270 + }, + { + "epoch": 1.3619486588566674, + "grad_norm": 6.430321216583252, + "learning_rate": 4.560949168180499e-06, + "loss": 0.3018, + "num_input_tokens_seen": 66933376, + "step": 21275 + }, + { + "epoch": 1.3622687407976442, + "grad_norm": 42.24263381958008, + "learning_rate": 4.560632898689744e-06, + "loss": 0.3694, + "num_input_tokens_seen": 66949248, + "step": 21280 + }, + { + "epoch": 1.3625888227386211, + "grad_norm": 29.584157943725586, + "learning_rate": 4.560316526301716e-06, + "loss": 0.4479, + "num_input_tokens_seen": 66964224, + "step": 21285 + }, + { + "epoch": 1.362908904679598, + "grad_norm": 25.649242401123047, + "learning_rate": 4.560000051032212e-06, + "loss": 0.3103, + "num_input_tokens_seen": 66979264, + "step": 21290 + }, + { + "epoch": 1.363228986620575, + "grad_norm": 54.8951301574707, + "learning_rate": 4.559683472897037e-06, + "loss": 0.4098, + "num_input_tokens_seen": 66994176, + "step": 21295 + }, + { + "epoch": 1.363549068561552, + "grad_norm": 38.284828186035156, + "learning_rate": 4.5593667919119956e-06, + "loss": 0.3061, + "num_input_tokens_seen": 67009856, + "step": 21300 + }, + { + "epoch": 1.3638691505025287, + "grad_norm": 28.125364303588867, + "learning_rate": 4.559050008092904e-06, + "loss": 0.2881, + "num_input_tokens_seen": 67026304, + "step": 21305 + }, + { + "epoch": 1.3641892324435054, + "grad_norm": 28.090803146362305, + "learning_rate": 4.558733121455582e-06, + "loss": 0.3375, + "num_input_tokens_seen": 67041984, + "step": 21310 + }, + { + "epoch": 1.3645093143844824, + "grad_norm": 8.138267517089844, + "learning_rate": 4.55841613201585e-06, + "loss": 0.4684, + "num_input_tokens_seen": 67057344, + "step": 21315 + }, + { + "epoch": 1.3648293963254594, + "grad_norm": 120.21456146240234, + "learning_rate": 4.558099039789539e-06, + "loss": 0.3709, + "num_input_tokens_seen": 67072512, + "step": 21320 + }, + { + "epoch": 1.3651494782664362, + "grad_norm": 19.639848709106445, + "learning_rate": 4.557781844792482e-06, + "loss": 0.3174, + "num_input_tokens_seen": 67088384, + "step": 21325 + }, + { + "epoch": 1.3654695602074132, + "grad_norm": 21.77197265625, + "learning_rate": 4.557464547040517e-06, + "loss": 0.3135, + "num_input_tokens_seen": 67105024, + "step": 21330 + }, + { + "epoch": 1.36578964214839, + "grad_norm": 14.337247848510742, + "learning_rate": 4.557147146549491e-06, + "loss": 0.4033, + "num_input_tokens_seen": 67134528, + "step": 21335 + }, + { + "epoch": 1.366109724089367, + "grad_norm": 34.869964599609375, + "learning_rate": 4.556829643335251e-06, + "loss": 0.3901, + "num_input_tokens_seen": 67149760, + "step": 21340 + }, + { + "epoch": 1.3664298060303437, + "grad_norm": 26.091344833374023, + "learning_rate": 4.556512037413653e-06, + "loss": 0.2852, + "num_input_tokens_seen": 67164800, + "step": 21345 + }, + { + "epoch": 1.3667498879713207, + "grad_norm": 29.926584243774414, + "learning_rate": 4.556194328800555e-06, + "loss": 0.3968, + "num_input_tokens_seen": 67180416, + "step": 21350 + }, + { + "epoch": 1.3670699699122975, + "grad_norm": 26.98785400390625, + "learning_rate": 4.555876517511822e-06, + "loss": 0.3061, + "num_input_tokens_seen": 67195840, + "step": 21355 + }, + { + "epoch": 1.3673900518532744, + "grad_norm": 20.19057846069336, + "learning_rate": 4.555558603563326e-06, + "loss": 0.2871, + "num_input_tokens_seen": 67211328, + "step": 21360 + }, + { + "epoch": 1.3677101337942514, + "grad_norm": 34.207977294921875, + "learning_rate": 4.55524058697094e-06, + "loss": 0.3657, + "num_input_tokens_seen": 67225920, + "step": 21365 + }, + { + "epoch": 1.3680302157352282, + "grad_norm": 10.01500415802002, + "learning_rate": 4.554922467750544e-06, + "loss": 0.3366, + "num_input_tokens_seen": 67240832, + "step": 21370 + }, + { + "epoch": 1.368350297676205, + "grad_norm": 8.111189842224121, + "learning_rate": 4.554604245918024e-06, + "loss": 0.4125, + "num_input_tokens_seen": 67256128, + "step": 21375 + }, + { + "epoch": 1.368670379617182, + "grad_norm": 21.10219955444336, + "learning_rate": 4.55428592148927e-06, + "loss": 0.3582, + "num_input_tokens_seen": 67272512, + "step": 21380 + }, + { + "epoch": 1.368990461558159, + "grad_norm": 25.50391387939453, + "learning_rate": 4.553967494480177e-06, + "loss": 0.2941, + "num_input_tokens_seen": 67288064, + "step": 21385 + }, + { + "epoch": 1.3693105434991357, + "grad_norm": 11.970836639404297, + "learning_rate": 4.553648964906646e-06, + "loss": 0.2946, + "num_input_tokens_seen": 67304128, + "step": 21390 + }, + { + "epoch": 1.3696306254401127, + "grad_norm": 34.97956848144531, + "learning_rate": 4.553330332784583e-06, + "loss": 0.352, + "num_input_tokens_seen": 67319232, + "step": 21395 + }, + { + "epoch": 1.3699507073810895, + "grad_norm": 24.59123420715332, + "learning_rate": 4.553011598129899e-06, + "loss": 0.3625, + "num_input_tokens_seen": 67335296, + "step": 21400 + }, + { + "epoch": 1.3702707893220665, + "grad_norm": 12.119502067565918, + "learning_rate": 4.552692760958509e-06, + "loss": 0.3375, + "num_input_tokens_seen": 67349760, + "step": 21405 + }, + { + "epoch": 1.3705908712630435, + "grad_norm": 10.310080528259277, + "learning_rate": 4.552373821286334e-06, + "loss": 0.3049, + "num_input_tokens_seen": 67364672, + "step": 21410 + }, + { + "epoch": 1.3709109532040202, + "grad_norm": 13.700472831726074, + "learning_rate": 4.5520547791293025e-06, + "loss": 0.3813, + "num_input_tokens_seen": 67379648, + "step": 21415 + }, + { + "epoch": 1.371231035144997, + "grad_norm": 22.170907974243164, + "learning_rate": 4.551735634503343e-06, + "loss": 0.2384, + "num_input_tokens_seen": 67394432, + "step": 21420 + }, + { + "epoch": 1.371551117085974, + "grad_norm": 41.851158142089844, + "learning_rate": 4.551416387424393e-06, + "loss": 0.3446, + "num_input_tokens_seen": 67409536, + "step": 21425 + }, + { + "epoch": 1.371871199026951, + "grad_norm": 14.323034286499023, + "learning_rate": 4.551097037908394e-06, + "loss": 0.3889, + "num_input_tokens_seen": 67425664, + "step": 21430 + }, + { + "epoch": 1.3721912809679278, + "grad_norm": 10.534730911254883, + "learning_rate": 4.5507775859712935e-06, + "loss": 0.3429, + "num_input_tokens_seen": 67440960, + "step": 21435 + }, + { + "epoch": 1.3725113629089047, + "grad_norm": 10.54249095916748, + "learning_rate": 4.550458031629041e-06, + "loss": 0.3046, + "num_input_tokens_seen": 67455744, + "step": 21440 + }, + { + "epoch": 1.3728314448498815, + "grad_norm": 19.287057876586914, + "learning_rate": 4.550138374897596e-06, + "loss": 0.3499, + "num_input_tokens_seen": 67471104, + "step": 21445 + }, + { + "epoch": 1.3731515267908585, + "grad_norm": 40.035457611083984, + "learning_rate": 4.549818615792919e-06, + "loss": 0.4008, + "num_input_tokens_seen": 67486272, + "step": 21450 + }, + { + "epoch": 1.3734716087318353, + "grad_norm": 38.43830490112305, + "learning_rate": 4.549498754330978e-06, + "loss": 0.3321, + "num_input_tokens_seen": 67502144, + "step": 21455 + }, + { + "epoch": 1.3737916906728123, + "grad_norm": 25.099674224853516, + "learning_rate": 4.549178790527744e-06, + "loss": 0.285, + "num_input_tokens_seen": 67517248, + "step": 21460 + }, + { + "epoch": 1.374111772613789, + "grad_norm": 19.73263168334961, + "learning_rate": 4.548858724399195e-06, + "loss": 0.3678, + "num_input_tokens_seen": 67532864, + "step": 21465 + }, + { + "epoch": 1.374431854554766, + "grad_norm": 16.38426971435547, + "learning_rate": 4.548538555961314e-06, + "loss": 0.2855, + "num_input_tokens_seen": 67548608, + "step": 21470 + }, + { + "epoch": 1.374751936495743, + "grad_norm": 23.93389892578125, + "learning_rate": 4.548218285230087e-06, + "loss": 0.3305, + "num_input_tokens_seen": 67564544, + "step": 21475 + }, + { + "epoch": 1.3750720184367198, + "grad_norm": 21.625276565551758, + "learning_rate": 4.547897912221509e-06, + "loss": 0.3284, + "num_input_tokens_seen": 67579776, + "step": 21480 + }, + { + "epoch": 1.3753921003776968, + "grad_norm": 25.98988914489746, + "learning_rate": 4.547577436951575e-06, + "loss": 0.4116, + "num_input_tokens_seen": 67596096, + "step": 21485 + }, + { + "epoch": 1.3757121823186735, + "grad_norm": 22.144550323486328, + "learning_rate": 4.547256859436289e-06, + "loss": 0.3156, + "num_input_tokens_seen": 67611136, + "step": 21490 + }, + { + "epoch": 1.3760322642596505, + "grad_norm": 32.53596878051758, + "learning_rate": 4.54693617969166e-06, + "loss": 0.3529, + "num_input_tokens_seen": 67627840, + "step": 21495 + }, + { + "epoch": 1.3763523462006273, + "grad_norm": 28.568029403686523, + "learning_rate": 4.5466153977337004e-06, + "loss": 0.348, + "num_input_tokens_seen": 67643456, + "step": 21500 + }, + { + "epoch": 1.3766724281416043, + "grad_norm": 16.21377182006836, + "learning_rate": 4.5462945135784285e-06, + "loss": 0.4476, + "num_input_tokens_seen": 67659840, + "step": 21505 + }, + { + "epoch": 1.376992510082581, + "grad_norm": 16.59497833251953, + "learning_rate": 4.545973527241867e-06, + "loss": 0.357, + "num_input_tokens_seen": 67674624, + "step": 21510 + }, + { + "epoch": 1.377312592023558, + "grad_norm": 29.669082641601562, + "learning_rate": 4.545652438740045e-06, + "loss": 0.4121, + "num_input_tokens_seen": 67689664, + "step": 21515 + }, + { + "epoch": 1.377632673964535, + "grad_norm": 32.38674545288086, + "learning_rate": 4.5453312480889946e-06, + "loss": 0.3684, + "num_input_tokens_seen": 67705280, + "step": 21520 + }, + { + "epoch": 1.3779527559055118, + "grad_norm": 16.485950469970703, + "learning_rate": 4.545009955304756e-06, + "loss": 0.2759, + "num_input_tokens_seen": 67720640, + "step": 21525 + }, + { + "epoch": 1.3782728378464886, + "grad_norm": 17.955957412719727, + "learning_rate": 4.544688560403372e-06, + "loss": 0.3539, + "num_input_tokens_seen": 67735424, + "step": 21530 + }, + { + "epoch": 1.3785929197874656, + "grad_norm": 23.42761993408203, + "learning_rate": 4.544367063400893e-06, + "loss": 0.287, + "num_input_tokens_seen": 67750528, + "step": 21535 + }, + { + "epoch": 1.3789130017284426, + "grad_norm": 34.223594665527344, + "learning_rate": 4.544045464313371e-06, + "loss": 0.2853, + "num_input_tokens_seen": 67766464, + "step": 21540 + }, + { + "epoch": 1.3792330836694193, + "grad_norm": 22.9965763092041, + "learning_rate": 4.543723763156864e-06, + "loss": 0.2795, + "num_input_tokens_seen": 67782144, + "step": 21545 + }, + { + "epoch": 1.3795531656103963, + "grad_norm": 36.429168701171875, + "learning_rate": 4.543401959947439e-06, + "loss": 0.3607, + "num_input_tokens_seen": 67798208, + "step": 21550 + }, + { + "epoch": 1.379873247551373, + "grad_norm": 13.410932540893555, + "learning_rate": 4.543080054701164e-06, + "loss": 0.278, + "num_input_tokens_seen": 67813504, + "step": 21555 + }, + { + "epoch": 1.38019332949235, + "grad_norm": 32.94371795654297, + "learning_rate": 4.542758047434113e-06, + "loss": 0.3043, + "num_input_tokens_seen": 67829248, + "step": 21560 + }, + { + "epoch": 1.380513411433327, + "grad_norm": 60.808258056640625, + "learning_rate": 4.542435938162365e-06, + "loss": 0.3572, + "num_input_tokens_seen": 67844544, + "step": 21565 + }, + { + "epoch": 1.3808334933743038, + "grad_norm": 10.637088775634766, + "learning_rate": 4.542113726902005e-06, + "loss": 0.2984, + "num_input_tokens_seen": 67860032, + "step": 21570 + }, + { + "epoch": 1.3811535753152806, + "grad_norm": 24.152820587158203, + "learning_rate": 4.5417914136691225e-06, + "loss": 0.4087, + "num_input_tokens_seen": 67875584, + "step": 21575 + }, + { + "epoch": 1.3814736572562576, + "grad_norm": 26.2590274810791, + "learning_rate": 4.541468998479812e-06, + "loss": 0.3223, + "num_input_tokens_seen": 67891584, + "step": 21580 + }, + { + "epoch": 1.3817937391972346, + "grad_norm": 7.221618175506592, + "learning_rate": 4.5411464813501736e-06, + "loss": 0.296, + "num_input_tokens_seen": 67909120, + "step": 21585 + }, + { + "epoch": 1.3821138211382114, + "grad_norm": 42.458106994628906, + "learning_rate": 4.540823862296311e-06, + "loss": 0.2323, + "num_input_tokens_seen": 67925952, + "step": 21590 + }, + { + "epoch": 1.3824339030791883, + "grad_norm": 22.516460418701172, + "learning_rate": 4.5405011413343355e-06, + "loss": 0.3601, + "num_input_tokens_seen": 67941120, + "step": 21595 + }, + { + "epoch": 1.382753985020165, + "grad_norm": 31.999347686767578, + "learning_rate": 4.540178318480362e-06, + "loss": 0.3833, + "num_input_tokens_seen": 67957120, + "step": 21600 + }, + { + "epoch": 1.383074066961142, + "grad_norm": 33.22263717651367, + "learning_rate": 4.53985539375051e-06, + "loss": 0.3215, + "num_input_tokens_seen": 67972288, + "step": 21605 + }, + { + "epoch": 1.3833941489021189, + "grad_norm": 25.71856689453125, + "learning_rate": 4.539532367160905e-06, + "loss": 0.4772, + "num_input_tokens_seen": 67988544, + "step": 21610 + }, + { + "epoch": 1.3837142308430959, + "grad_norm": 18.04705047607422, + "learning_rate": 4.539209238727677e-06, + "loss": 0.2616, + "num_input_tokens_seen": 68003200, + "step": 21615 + }, + { + "epoch": 1.3840343127840726, + "grad_norm": 22.712167739868164, + "learning_rate": 4.5388860084669616e-06, + "loss": 0.2397, + "num_input_tokens_seen": 68019392, + "step": 21620 + }, + { + "epoch": 1.3843543947250496, + "grad_norm": 50.4032096862793, + "learning_rate": 4.538562676394898e-06, + "loss": 0.2829, + "num_input_tokens_seen": 68035328, + "step": 21625 + }, + { + "epoch": 1.3846744766660266, + "grad_norm": 28.549089431762695, + "learning_rate": 4.5382392425276345e-06, + "loss": 0.3202, + "num_input_tokens_seen": 68051136, + "step": 21630 + }, + { + "epoch": 1.3849945586070034, + "grad_norm": 25.48466682434082, + "learning_rate": 4.537915706881319e-06, + "loss": 0.3868, + "num_input_tokens_seen": 68066176, + "step": 21635 + }, + { + "epoch": 1.3853146405479801, + "grad_norm": 21.95659828186035, + "learning_rate": 4.5375920694721085e-06, + "loss": 0.2949, + "num_input_tokens_seen": 68081536, + "step": 21640 + }, + { + "epoch": 1.3856347224889571, + "grad_norm": 14.85952377319336, + "learning_rate": 4.537268330316163e-06, + "loss": 0.3576, + "num_input_tokens_seen": 68097472, + "step": 21645 + }, + { + "epoch": 1.3859548044299341, + "grad_norm": 64.99490356445312, + "learning_rate": 4.53694448942965e-06, + "loss": 0.4295, + "num_input_tokens_seen": 68113728, + "step": 21650 + }, + { + "epoch": 1.386274886370911, + "grad_norm": 13.988936424255371, + "learning_rate": 4.536620546828738e-06, + "loss": 0.3789, + "num_input_tokens_seen": 68129920, + "step": 21655 + }, + { + "epoch": 1.3865949683118879, + "grad_norm": 18.651737213134766, + "learning_rate": 4.536296502529605e-06, + "loss": 0.3784, + "num_input_tokens_seen": 68144832, + "step": 21660 + }, + { + "epoch": 1.3869150502528647, + "grad_norm": 14.036263465881348, + "learning_rate": 4.535972356548431e-06, + "loss": 0.2829, + "num_input_tokens_seen": 68161728, + "step": 21665 + }, + { + "epoch": 1.3872351321938416, + "grad_norm": 58.661720275878906, + "learning_rate": 4.535648108901403e-06, + "loss": 0.3346, + "num_input_tokens_seen": 68177280, + "step": 21670 + }, + { + "epoch": 1.3875552141348186, + "grad_norm": 43.36638641357422, + "learning_rate": 4.535323759604712e-06, + "loss": 0.3269, + "num_input_tokens_seen": 68192640, + "step": 21675 + }, + { + "epoch": 1.3878752960757954, + "grad_norm": 76.3670654296875, + "learning_rate": 4.534999308674553e-06, + "loss": 0.2403, + "num_input_tokens_seen": 68208960, + "step": 21680 + }, + { + "epoch": 1.3881953780167722, + "grad_norm": 15.19135856628418, + "learning_rate": 4.534674756127129e-06, + "loss": 0.3585, + "num_input_tokens_seen": 68224192, + "step": 21685 + }, + { + "epoch": 1.3885154599577492, + "grad_norm": 11.139825820922852, + "learning_rate": 4.534350101978646e-06, + "loss": 0.231, + "num_input_tokens_seen": 68240128, + "step": 21690 + }, + { + "epoch": 1.3888355418987262, + "grad_norm": 20.596439361572266, + "learning_rate": 4.5340253462453155e-06, + "loss": 0.2199, + "num_input_tokens_seen": 68257408, + "step": 21695 + }, + { + "epoch": 1.389155623839703, + "grad_norm": 31.215574264526367, + "learning_rate": 4.533700488943354e-06, + "loss": 0.2658, + "num_input_tokens_seen": 68273408, + "step": 21700 + }, + { + "epoch": 1.38947570578068, + "grad_norm": 39.857547760009766, + "learning_rate": 4.533375530088983e-06, + "loss": 0.3838, + "num_input_tokens_seen": 68290688, + "step": 21705 + }, + { + "epoch": 1.3897957877216567, + "grad_norm": 34.296504974365234, + "learning_rate": 4.533050469698429e-06, + "loss": 0.2756, + "num_input_tokens_seen": 68306560, + "step": 21710 + }, + { + "epoch": 1.3901158696626337, + "grad_norm": 31.093183517456055, + "learning_rate": 4.5327253077879255e-06, + "loss": 0.2777, + "num_input_tokens_seen": 68321472, + "step": 21715 + }, + { + "epoch": 1.3904359516036104, + "grad_norm": 14.919479370117188, + "learning_rate": 4.532400044373707e-06, + "loss": 0.3467, + "num_input_tokens_seen": 68336640, + "step": 21720 + }, + { + "epoch": 1.3907560335445874, + "grad_norm": 14.325254440307617, + "learning_rate": 4.532074679472017e-06, + "loss": 0.4255, + "num_input_tokens_seen": 68354880, + "step": 21725 + }, + { + "epoch": 1.3910761154855642, + "grad_norm": 36.78325271606445, + "learning_rate": 4.531749213099103e-06, + "loss": 0.2911, + "num_input_tokens_seen": 68370944, + "step": 21730 + }, + { + "epoch": 1.3913961974265412, + "grad_norm": 28.2026309967041, + "learning_rate": 4.531423645271215e-06, + "loss": 0.414, + "num_input_tokens_seen": 68387264, + "step": 21735 + }, + { + "epoch": 1.3917162793675182, + "grad_norm": 11.24506950378418, + "learning_rate": 4.531097976004613e-06, + "loss": 0.3035, + "num_input_tokens_seen": 68403968, + "step": 21740 + }, + { + "epoch": 1.392036361308495, + "grad_norm": 42.759220123291016, + "learning_rate": 4.530772205315557e-06, + "loss": 0.4344, + "num_input_tokens_seen": 68419200, + "step": 21745 + }, + { + "epoch": 1.392356443249472, + "grad_norm": 16.450014114379883, + "learning_rate": 4.530446333220314e-06, + "loss": 0.2462, + "num_input_tokens_seen": 68434816, + "step": 21750 + }, + { + "epoch": 1.3926765251904487, + "grad_norm": 35.73378372192383, + "learning_rate": 4.530120359735158e-06, + "loss": 0.3194, + "num_input_tokens_seen": 68451200, + "step": 21755 + }, + { + "epoch": 1.3929966071314257, + "grad_norm": 13.111351013183594, + "learning_rate": 4.529794284876367e-06, + "loss": 0.2332, + "num_input_tokens_seen": 68468480, + "step": 21760 + }, + { + "epoch": 1.3933166890724025, + "grad_norm": 25.366077423095703, + "learning_rate": 4.529468108660221e-06, + "loss": 0.3941, + "num_input_tokens_seen": 68484928, + "step": 21765 + }, + { + "epoch": 1.3936367710133795, + "grad_norm": 67.05518341064453, + "learning_rate": 4.529141831103009e-06, + "loss": 0.4212, + "num_input_tokens_seen": 68499648, + "step": 21770 + }, + { + "epoch": 1.3939568529543562, + "grad_norm": 26.96265411376953, + "learning_rate": 4.528815452221023e-06, + "loss": 0.2763, + "num_input_tokens_seen": 68515072, + "step": 21775 + }, + { + "epoch": 1.3942769348953332, + "grad_norm": 27.347036361694336, + "learning_rate": 4.528488972030561e-06, + "loss": 0.3863, + "num_input_tokens_seen": 68530688, + "step": 21780 + }, + { + "epoch": 1.3945970168363102, + "grad_norm": 11.994495391845703, + "learning_rate": 4.528162390547926e-06, + "loss": 0.3287, + "num_input_tokens_seen": 68545536, + "step": 21785 + }, + { + "epoch": 1.394917098777287, + "grad_norm": 18.374771118164062, + "learning_rate": 4.527835707789426e-06, + "loss": 0.3477, + "num_input_tokens_seen": 68559680, + "step": 21790 + }, + { + "epoch": 1.3952371807182637, + "grad_norm": 13.018945693969727, + "learning_rate": 4.527508923771373e-06, + "loss": 0.3721, + "num_input_tokens_seen": 68574784, + "step": 21795 + }, + { + "epoch": 1.3955572626592407, + "grad_norm": 38.96711349487305, + "learning_rate": 4.527182038510085e-06, + "loss": 0.3482, + "num_input_tokens_seen": 68590336, + "step": 21800 + }, + { + "epoch": 1.3958773446002177, + "grad_norm": 21.017677307128906, + "learning_rate": 4.5268550520218854e-06, + "loss": 0.3791, + "num_input_tokens_seen": 68606592, + "step": 21805 + }, + { + "epoch": 1.3961974265411945, + "grad_norm": 29.231115341186523, + "learning_rate": 4.526527964323102e-06, + "loss": 0.2334, + "num_input_tokens_seen": 68621376, + "step": 21810 + }, + { + "epoch": 1.3965175084821715, + "grad_norm": 28.933732986450195, + "learning_rate": 4.526200775430068e-06, + "loss": 0.494, + "num_input_tokens_seen": 68638464, + "step": 21815 + }, + { + "epoch": 1.3968375904231483, + "grad_norm": 14.406923294067383, + "learning_rate": 4.525873485359121e-06, + "loss": 0.3872, + "num_input_tokens_seen": 68653824, + "step": 21820 + }, + { + "epoch": 1.3971576723641252, + "grad_norm": 15.28013801574707, + "learning_rate": 4.525546094126606e-06, + "loss": 0.3141, + "num_input_tokens_seen": 68670144, + "step": 21825 + }, + { + "epoch": 1.3974777543051022, + "grad_norm": 15.290909767150879, + "learning_rate": 4.525218601748867e-06, + "loss": 0.2614, + "num_input_tokens_seen": 68685824, + "step": 21830 + }, + { + "epoch": 1.397797836246079, + "grad_norm": 12.591951370239258, + "learning_rate": 4.5248910082422625e-06, + "loss": 0.3664, + "num_input_tokens_seen": 68701120, + "step": 21835 + }, + { + "epoch": 1.3981179181870558, + "grad_norm": 68.44755554199219, + "learning_rate": 4.524563313623147e-06, + "loss": 0.3722, + "num_input_tokens_seen": 68716416, + "step": 21840 + }, + { + "epoch": 1.3984380001280328, + "grad_norm": 22.399410247802734, + "learning_rate": 4.524235517907885e-06, + "loss": 0.3258, + "num_input_tokens_seen": 68731648, + "step": 21845 + }, + { + "epoch": 1.3987580820690098, + "grad_norm": 30.020721435546875, + "learning_rate": 4.523907621112846e-06, + "loss": 0.2361, + "num_input_tokens_seen": 68746560, + "step": 21850 + }, + { + "epoch": 1.3990781640099865, + "grad_norm": 30.099180221557617, + "learning_rate": 4.523579623254403e-06, + "loss": 0.2422, + "num_input_tokens_seen": 68764352, + "step": 21855 + }, + { + "epoch": 1.3993982459509635, + "grad_norm": 11.114237785339355, + "learning_rate": 4.523251524348933e-06, + "loss": 0.3823, + "num_input_tokens_seen": 68779584, + "step": 21860 + }, + { + "epoch": 1.3997183278919403, + "grad_norm": 25.757980346679688, + "learning_rate": 4.522923324412821e-06, + "loss": 0.5307, + "num_input_tokens_seen": 68795584, + "step": 21865 + }, + { + "epoch": 1.4000384098329173, + "grad_norm": 22.946334838867188, + "learning_rate": 4.5225950234624545e-06, + "loss": 0.4641, + "num_input_tokens_seen": 68811648, + "step": 21870 + }, + { + "epoch": 1.400358491773894, + "grad_norm": 12.625066757202148, + "learning_rate": 4.5222666215142284e-06, + "loss": 0.3552, + "num_input_tokens_seen": 68826816, + "step": 21875 + }, + { + "epoch": 1.400678573714871, + "grad_norm": 33.04092788696289, + "learning_rate": 4.521938118584541e-06, + "loss": 0.4762, + "num_input_tokens_seen": 68842432, + "step": 21880 + }, + { + "epoch": 1.4009986556558478, + "grad_norm": 14.166179656982422, + "learning_rate": 4.521609514689796e-06, + "loss": 0.3016, + "num_input_tokens_seen": 68859456, + "step": 21885 + }, + { + "epoch": 1.4013187375968248, + "grad_norm": 43.724483489990234, + "learning_rate": 4.5212808098464015e-06, + "loss": 0.416, + "num_input_tokens_seen": 68874816, + "step": 21890 + }, + { + "epoch": 1.4016388195378018, + "grad_norm": 13.636518478393555, + "learning_rate": 4.5209520040707725e-06, + "loss": 0.2954, + "num_input_tokens_seen": 68890752, + "step": 21895 + }, + { + "epoch": 1.4019589014787786, + "grad_norm": 37.66740798950195, + "learning_rate": 4.5206230973793266e-06, + "loss": 0.428, + "num_input_tokens_seen": 68906304, + "step": 21900 + }, + { + "epoch": 1.4022789834197553, + "grad_norm": 39.363182067871094, + "learning_rate": 4.520294089788488e-06, + "loss": 0.3513, + "num_input_tokens_seen": 68922752, + "step": 21905 + }, + { + "epoch": 1.4025990653607323, + "grad_norm": 22.39931869506836, + "learning_rate": 4.519964981314686e-06, + "loss": 0.3209, + "num_input_tokens_seen": 68938624, + "step": 21910 + }, + { + "epoch": 1.4029191473017093, + "grad_norm": 28.42327308654785, + "learning_rate": 4.519635771974355e-06, + "loss": 0.364, + "num_input_tokens_seen": 68955008, + "step": 21915 + }, + { + "epoch": 1.403239229242686, + "grad_norm": 21.12906265258789, + "learning_rate": 4.519306461783933e-06, + "loss": 0.3084, + "num_input_tokens_seen": 68971840, + "step": 21920 + }, + { + "epoch": 1.403559311183663, + "grad_norm": 23.013168334960938, + "learning_rate": 4.5189770507598655e-06, + "loss": 0.3228, + "num_input_tokens_seen": 68987968, + "step": 21925 + }, + { + "epoch": 1.4038793931246398, + "grad_norm": 28.71949577331543, + "learning_rate": 4.518647538918599e-06, + "loss": 0.484, + "num_input_tokens_seen": 69003136, + "step": 21930 + }, + { + "epoch": 1.4041994750656168, + "grad_norm": 44.591102600097656, + "learning_rate": 4.51831792627659e-06, + "loss": 0.4617, + "num_input_tokens_seen": 69019072, + "step": 21935 + }, + { + "epoch": 1.4045195570065938, + "grad_norm": 11.718499183654785, + "learning_rate": 4.517988212850296e-06, + "loss": 0.2423, + "num_input_tokens_seen": 69033728, + "step": 21940 + }, + { + "epoch": 1.4048396389475706, + "grad_norm": 14.455977439880371, + "learning_rate": 4.517658398656183e-06, + "loss": 0.294, + "num_input_tokens_seen": 69049920, + "step": 21945 + }, + { + "epoch": 1.4051597208885473, + "grad_norm": 22.65013313293457, + "learning_rate": 4.517328483710719e-06, + "loss": 0.3204, + "num_input_tokens_seen": 69065280, + "step": 21950 + }, + { + "epoch": 1.4054798028295243, + "grad_norm": 37.209197998046875, + "learning_rate": 4.516998468030378e-06, + "loss": 0.2705, + "num_input_tokens_seen": 69080704, + "step": 21955 + }, + { + "epoch": 1.4057998847705013, + "grad_norm": 39.38862228393555, + "learning_rate": 4.5166683516316405e-06, + "loss": 0.3338, + "num_input_tokens_seen": 69096128, + "step": 21960 + }, + { + "epoch": 1.406119966711478, + "grad_norm": 24.814102172851562, + "learning_rate": 4.516338134530989e-06, + "loss": 0.3733, + "num_input_tokens_seen": 69110976, + "step": 21965 + }, + { + "epoch": 1.406440048652455, + "grad_norm": 17.890779495239258, + "learning_rate": 4.516007816744914e-06, + "loss": 0.3231, + "num_input_tokens_seen": 69126080, + "step": 21970 + }, + { + "epoch": 1.4067601305934319, + "grad_norm": 43.896942138671875, + "learning_rate": 4.515677398289909e-06, + "loss": 0.2929, + "num_input_tokens_seen": 69143552, + "step": 21975 + }, + { + "epoch": 1.4070802125344088, + "grad_norm": 24.841569900512695, + "learning_rate": 4.515346879182475e-06, + "loss": 0.2825, + "num_input_tokens_seen": 69159424, + "step": 21980 + }, + { + "epoch": 1.4074002944753856, + "grad_norm": 77.52684783935547, + "learning_rate": 4.515016259439115e-06, + "loss": 0.348, + "num_input_tokens_seen": 69175744, + "step": 21985 + }, + { + "epoch": 1.4077203764163626, + "grad_norm": 35.73418426513672, + "learning_rate": 4.514685539076339e-06, + "loss": 0.376, + "num_input_tokens_seen": 69190784, + "step": 21990 + }, + { + "epoch": 1.4080404583573394, + "grad_norm": 20.744461059570312, + "learning_rate": 4.514354718110662e-06, + "loss": 0.3178, + "num_input_tokens_seen": 69205888, + "step": 21995 + }, + { + "epoch": 1.4083605402983164, + "grad_norm": 12.869234085083008, + "learning_rate": 4.514023796558601e-06, + "loss": 0.4727, + "num_input_tokens_seen": 69220352, + "step": 22000 + }, + { + "epoch": 1.4086806222392934, + "grad_norm": 17.249074935913086, + "learning_rate": 4.5136927744366835e-06, + "loss": 0.2668, + "num_input_tokens_seen": 69236992, + "step": 22005 + }, + { + "epoch": 1.4090007041802701, + "grad_norm": 16.35548973083496, + "learning_rate": 4.5133616517614375e-06, + "loss": 0.2625, + "num_input_tokens_seen": 69253376, + "step": 22010 + }, + { + "epoch": 1.4093207861212471, + "grad_norm": 19.130306243896484, + "learning_rate": 4.513030428549398e-06, + "loss": 0.3506, + "num_input_tokens_seen": 69268480, + "step": 22015 + }, + { + "epoch": 1.4096408680622239, + "grad_norm": 28.03226661682129, + "learning_rate": 4.512699104817104e-06, + "loss": 0.3796, + "num_input_tokens_seen": 69283456, + "step": 22020 + }, + { + "epoch": 1.4099609500032009, + "grad_norm": 15.457958221435547, + "learning_rate": 4.512367680581101e-06, + "loss": 0.5586, + "num_input_tokens_seen": 69298432, + "step": 22025 + }, + { + "epoch": 1.4102810319441776, + "grad_norm": 39.028987884521484, + "learning_rate": 4.512036155857939e-06, + "loss": 0.4369, + "num_input_tokens_seen": 69312960, + "step": 22030 + }, + { + "epoch": 1.4106011138851546, + "grad_norm": 16.53969383239746, + "learning_rate": 4.511704530664169e-06, + "loss": 0.248, + "num_input_tokens_seen": 69327488, + "step": 22035 + }, + { + "epoch": 1.4109211958261314, + "grad_norm": 9.322970390319824, + "learning_rate": 4.511372805016355e-06, + "loss": 0.227, + "num_input_tokens_seen": 69342912, + "step": 22040 + }, + { + "epoch": 1.4112412777671084, + "grad_norm": 30.253870010375977, + "learning_rate": 4.51104097893106e-06, + "loss": 0.2542, + "num_input_tokens_seen": 69358592, + "step": 22045 + }, + { + "epoch": 1.4115613597080854, + "grad_norm": 8.545331001281738, + "learning_rate": 4.510709052424854e-06, + "loss": 0.336, + "num_input_tokens_seen": 69373632, + "step": 22050 + }, + { + "epoch": 1.4118814416490622, + "grad_norm": 12.178878784179688, + "learning_rate": 4.510377025514311e-06, + "loss": 0.2679, + "num_input_tokens_seen": 69390720, + "step": 22055 + }, + { + "epoch": 1.412201523590039, + "grad_norm": 41.098472595214844, + "learning_rate": 4.51004489821601e-06, + "loss": 0.3682, + "num_input_tokens_seen": 69406528, + "step": 22060 + }, + { + "epoch": 1.412521605531016, + "grad_norm": 40.25080108642578, + "learning_rate": 4.509712670546538e-06, + "loss": 0.4413, + "num_input_tokens_seen": 69422976, + "step": 22065 + }, + { + "epoch": 1.412841687471993, + "grad_norm": 12.519380569458008, + "learning_rate": 4.509380342522483e-06, + "loss": 0.2929, + "num_input_tokens_seen": 69440448, + "step": 22070 + }, + { + "epoch": 1.4131617694129697, + "grad_norm": 35.840614318847656, + "learning_rate": 4.50904791416044e-06, + "loss": 0.4309, + "num_input_tokens_seen": 69455744, + "step": 22075 + }, + { + "epoch": 1.4134818513539467, + "grad_norm": 27.672832489013672, + "learning_rate": 4.508715385477009e-06, + "loss": 0.2869, + "num_input_tokens_seen": 69471168, + "step": 22080 + }, + { + "epoch": 1.4138019332949234, + "grad_norm": 28.890657424926758, + "learning_rate": 4.508382756488795e-06, + "loss": 0.4174, + "num_input_tokens_seen": 69486976, + "step": 22085 + }, + { + "epoch": 1.4141220152359004, + "grad_norm": 29.163206100463867, + "learning_rate": 4.508050027212407e-06, + "loss": 0.4435, + "num_input_tokens_seen": 69503616, + "step": 22090 + }, + { + "epoch": 1.4144420971768774, + "grad_norm": 31.730255126953125, + "learning_rate": 4.507717197664461e-06, + "loss": 0.2814, + "num_input_tokens_seen": 69519744, + "step": 22095 + }, + { + "epoch": 1.4147621791178542, + "grad_norm": 17.173011779785156, + "learning_rate": 4.507384267861576e-06, + "loss": 0.3086, + "num_input_tokens_seen": 69536256, + "step": 22100 + }, + { + "epoch": 1.415082261058831, + "grad_norm": 19.882030487060547, + "learning_rate": 4.507051237820375e-06, + "loss": 0.4418, + "num_input_tokens_seen": 69552384, + "step": 22105 + }, + { + "epoch": 1.415402342999808, + "grad_norm": 10.702885627746582, + "learning_rate": 4.506718107557491e-06, + "loss": 0.2333, + "num_input_tokens_seen": 69568064, + "step": 22110 + }, + { + "epoch": 1.415722424940785, + "grad_norm": 11.814531326293945, + "learning_rate": 4.506384877089557e-06, + "loss": 0.4221, + "num_input_tokens_seen": 69583040, + "step": 22115 + }, + { + "epoch": 1.4160425068817617, + "grad_norm": 23.633350372314453, + "learning_rate": 4.506051546433213e-06, + "loss": 0.5593, + "num_input_tokens_seen": 69598016, + "step": 22120 + }, + { + "epoch": 1.4163625888227387, + "grad_norm": 18.983642578125, + "learning_rate": 4.505718115605103e-06, + "loss": 0.2703, + "num_input_tokens_seen": 69612736, + "step": 22125 + }, + { + "epoch": 1.4166826707637155, + "grad_norm": 8.659831047058105, + "learning_rate": 4.505384584621879e-06, + "loss": 0.2639, + "num_input_tokens_seen": 69628544, + "step": 22130 + }, + { + "epoch": 1.4170027527046924, + "grad_norm": 28.523704528808594, + "learning_rate": 4.505050953500194e-06, + "loss": 0.312, + "num_input_tokens_seen": 69643904, + "step": 22135 + }, + { + "epoch": 1.4173228346456692, + "grad_norm": 22.4547119140625, + "learning_rate": 4.50471722225671e-06, + "loss": 0.3547, + "num_input_tokens_seen": 69659200, + "step": 22140 + }, + { + "epoch": 1.4176429165866462, + "grad_norm": 36.01512908935547, + "learning_rate": 4.504383390908088e-06, + "loss": 0.3342, + "num_input_tokens_seen": 69676800, + "step": 22145 + }, + { + "epoch": 1.417962998527623, + "grad_norm": 26.936813354492188, + "learning_rate": 4.504049459471e-06, + "loss": 0.3124, + "num_input_tokens_seen": 69692544, + "step": 22150 + }, + { + "epoch": 1.4182830804686, + "grad_norm": 20.587594985961914, + "learning_rate": 4.503715427962121e-06, + "loss": 0.2454, + "num_input_tokens_seen": 69708096, + "step": 22155 + }, + { + "epoch": 1.418603162409577, + "grad_norm": 34.89826965332031, + "learning_rate": 4.503381296398131e-06, + "loss": 0.361, + "num_input_tokens_seen": 69722944, + "step": 22160 + }, + { + "epoch": 1.4189232443505537, + "grad_norm": 18.561140060424805, + "learning_rate": 4.503047064795713e-06, + "loss": 0.2242, + "num_input_tokens_seen": 69738048, + "step": 22165 + }, + { + "epoch": 1.4192433262915305, + "grad_norm": 41.24684524536133, + "learning_rate": 4.50271273317156e-06, + "loss": 0.4147, + "num_input_tokens_seen": 69753728, + "step": 22170 + }, + { + "epoch": 1.4195634082325075, + "grad_norm": 32.71208572387695, + "learning_rate": 4.502378301542364e-06, + "loss": 0.3756, + "num_input_tokens_seen": 69769728, + "step": 22175 + }, + { + "epoch": 1.4198834901734845, + "grad_norm": 15.580161094665527, + "learning_rate": 4.502043769924825e-06, + "loss": 0.3585, + "num_input_tokens_seen": 69784448, + "step": 22180 + }, + { + "epoch": 1.4202035721144612, + "grad_norm": 29.735445022583008, + "learning_rate": 4.501709138335649e-06, + "loss": 0.4064, + "num_input_tokens_seen": 69800704, + "step": 22185 + }, + { + "epoch": 1.4205236540554382, + "grad_norm": 24.4270076751709, + "learning_rate": 4.501374406791546e-06, + "loss": 0.4838, + "num_input_tokens_seen": 69817152, + "step": 22190 + }, + { + "epoch": 1.420843735996415, + "grad_norm": 27.397811889648438, + "learning_rate": 4.501039575309229e-06, + "loss": 0.3046, + "num_input_tokens_seen": 69833088, + "step": 22195 + }, + { + "epoch": 1.421163817937392, + "grad_norm": 16.23682975769043, + "learning_rate": 4.500704643905418e-06, + "loss": 0.3752, + "num_input_tokens_seen": 69847936, + "step": 22200 + }, + { + "epoch": 1.421483899878369, + "grad_norm": 27.552778244018555, + "learning_rate": 4.500369612596839e-06, + "loss": 0.4038, + "num_input_tokens_seen": 69865408, + "step": 22205 + }, + { + "epoch": 1.4218039818193458, + "grad_norm": 27.64942741394043, + "learning_rate": 4.5000344814002215e-06, + "loss": 0.3607, + "num_input_tokens_seen": 69881216, + "step": 22210 + }, + { + "epoch": 1.4221240637603225, + "grad_norm": 40.23762512207031, + "learning_rate": 4.499699250332299e-06, + "loss": 0.4551, + "num_input_tokens_seen": 69899520, + "step": 22215 + }, + { + "epoch": 1.4224441457012995, + "grad_norm": 15.623388290405273, + "learning_rate": 4.499363919409813e-06, + "loss": 0.3379, + "num_input_tokens_seen": 69915904, + "step": 22220 + }, + { + "epoch": 1.4227642276422765, + "grad_norm": 36.69139862060547, + "learning_rate": 4.499028488649506e-06, + "loss": 0.2545, + "num_input_tokens_seen": 69932416, + "step": 22225 + }, + { + "epoch": 1.4230843095832533, + "grad_norm": 26.389129638671875, + "learning_rate": 4.49869295806813e-06, + "loss": 0.3255, + "num_input_tokens_seen": 69947328, + "step": 22230 + }, + { + "epoch": 1.4234043915242303, + "grad_norm": 39.668949127197266, + "learning_rate": 4.498357327682437e-06, + "loss": 0.4659, + "num_input_tokens_seen": 69962048, + "step": 22235 + }, + { + "epoch": 1.423724473465207, + "grad_norm": 51.76716995239258, + "learning_rate": 4.49802159750919e-06, + "loss": 0.346, + "num_input_tokens_seen": 69976320, + "step": 22240 + }, + { + "epoch": 1.424044555406184, + "grad_norm": 13.755658149719238, + "learning_rate": 4.497685767565151e-06, + "loss": 0.2972, + "num_input_tokens_seen": 69991744, + "step": 22245 + }, + { + "epoch": 1.424364637347161, + "grad_norm": 46.2292366027832, + "learning_rate": 4.49734983786709e-06, + "loss": 0.3883, + "num_input_tokens_seen": 70006912, + "step": 22250 + }, + { + "epoch": 1.4246847192881378, + "grad_norm": 33.19850540161133, + "learning_rate": 4.497013808431781e-06, + "loss": 0.3055, + "num_input_tokens_seen": 70023360, + "step": 22255 + }, + { + "epoch": 1.4250048012291145, + "grad_norm": 21.509008407592773, + "learning_rate": 4.496677679276006e-06, + "loss": 0.272, + "num_input_tokens_seen": 70039232, + "step": 22260 + }, + { + "epoch": 1.4253248831700915, + "grad_norm": 25.152254104614258, + "learning_rate": 4.496341450416548e-06, + "loss": 0.3403, + "num_input_tokens_seen": 70055552, + "step": 22265 + }, + { + "epoch": 1.4256449651110685, + "grad_norm": 10.271512031555176, + "learning_rate": 4.496005121870196e-06, + "loss": 0.4385, + "num_input_tokens_seen": 70071360, + "step": 22270 + }, + { + "epoch": 1.4259650470520453, + "grad_norm": 12.75533390045166, + "learning_rate": 4.495668693653745e-06, + "loss": 0.3796, + "num_input_tokens_seen": 70086720, + "step": 22275 + }, + { + "epoch": 1.4262851289930223, + "grad_norm": 10.896594047546387, + "learning_rate": 4.495332165783995e-06, + "loss": 0.3569, + "num_input_tokens_seen": 70102336, + "step": 22280 + }, + { + "epoch": 1.426605210933999, + "grad_norm": 53.16889190673828, + "learning_rate": 4.494995538277749e-06, + "loss": 0.4547, + "num_input_tokens_seen": 70117504, + "step": 22285 + }, + { + "epoch": 1.426925292874976, + "grad_norm": 8.817838668823242, + "learning_rate": 4.494658811151817e-06, + "loss": 0.258, + "num_input_tokens_seen": 70133120, + "step": 22290 + }, + { + "epoch": 1.4272453748159528, + "grad_norm": 24.79494857788086, + "learning_rate": 4.4943219844230155e-06, + "loss": 0.2478, + "num_input_tokens_seen": 70149120, + "step": 22295 + }, + { + "epoch": 1.4275654567569298, + "grad_norm": 19.95254135131836, + "learning_rate": 4.493985058108161e-06, + "loss": 0.3174, + "num_input_tokens_seen": 70165056, + "step": 22300 + }, + { + "epoch": 1.4278855386979066, + "grad_norm": 25.1198673248291, + "learning_rate": 4.493648032224079e-06, + "loss": 0.2791, + "num_input_tokens_seen": 70180032, + "step": 22305 + }, + { + "epoch": 1.4282056206388836, + "grad_norm": 22.85353660583496, + "learning_rate": 4.493310906787599e-06, + "loss": 0.341, + "num_input_tokens_seen": 70195456, + "step": 22310 + }, + { + "epoch": 1.4285257025798606, + "grad_norm": 28.3323917388916, + "learning_rate": 4.492973681815555e-06, + "loss": 0.2998, + "num_input_tokens_seen": 70211264, + "step": 22315 + }, + { + "epoch": 1.4288457845208373, + "grad_norm": 32.573848724365234, + "learning_rate": 4.492636357324787e-06, + "loss": 0.3334, + "num_input_tokens_seen": 70226176, + "step": 22320 + }, + { + "epoch": 1.429165866461814, + "grad_norm": 11.648639678955078, + "learning_rate": 4.492298933332138e-06, + "loss": 0.2868, + "num_input_tokens_seen": 70242432, + "step": 22325 + }, + { + "epoch": 1.429485948402791, + "grad_norm": 19.818023681640625, + "learning_rate": 4.4919614098544585e-06, + "loss": 0.3106, + "num_input_tokens_seen": 70258560, + "step": 22330 + }, + { + "epoch": 1.429806030343768, + "grad_norm": 29.83259391784668, + "learning_rate": 4.491623786908601e-06, + "loss": 0.333, + "num_input_tokens_seen": 70274944, + "step": 22335 + }, + { + "epoch": 1.4301261122847448, + "grad_norm": 69.61201477050781, + "learning_rate": 4.491286064511426e-06, + "loss": 0.3755, + "num_input_tokens_seen": 70290560, + "step": 22340 + }, + { + "epoch": 1.4304461942257218, + "grad_norm": 11.828417778015137, + "learning_rate": 4.490948242679796e-06, + "loss": 0.283, + "num_input_tokens_seen": 70306880, + "step": 22345 + }, + { + "epoch": 1.4307662761666986, + "grad_norm": 14.692072868347168, + "learning_rate": 4.490610321430583e-06, + "loss": 0.2891, + "num_input_tokens_seen": 70322496, + "step": 22350 + }, + { + "epoch": 1.4310863581076756, + "grad_norm": 13.314473152160645, + "learning_rate": 4.4902723007806585e-06, + "loss": 0.2829, + "num_input_tokens_seen": 70338496, + "step": 22355 + }, + { + "epoch": 1.4314064400486526, + "grad_norm": 11.266854286193848, + "learning_rate": 4.4899341807469025e-06, + "loss": 0.2031, + "num_input_tokens_seen": 70354176, + "step": 22360 + }, + { + "epoch": 1.4317265219896294, + "grad_norm": 17.0103702545166, + "learning_rate": 4.489595961346198e-06, + "loss": 0.4652, + "num_input_tokens_seen": 70370304, + "step": 22365 + }, + { + "epoch": 1.4320466039306061, + "grad_norm": 8.401859283447266, + "learning_rate": 4.489257642595436e-06, + "loss": 0.257, + "num_input_tokens_seen": 70385856, + "step": 22370 + }, + { + "epoch": 1.4323666858715831, + "grad_norm": 34.84402847290039, + "learning_rate": 4.488919224511508e-06, + "loss": 0.299, + "num_input_tokens_seen": 70401344, + "step": 22375 + }, + { + "epoch": 1.43268676781256, + "grad_norm": 47.016441345214844, + "learning_rate": 4.488580707111314e-06, + "loss": 0.3748, + "num_input_tokens_seen": 70416384, + "step": 22380 + }, + { + "epoch": 1.4330068497535369, + "grad_norm": 13.774107933044434, + "learning_rate": 4.488242090411758e-06, + "loss": 0.3671, + "num_input_tokens_seen": 70431552, + "step": 22385 + }, + { + "epoch": 1.4333269316945139, + "grad_norm": 20.268983840942383, + "learning_rate": 4.487903374429748e-06, + "loss": 0.3749, + "num_input_tokens_seen": 70448192, + "step": 22390 + }, + { + "epoch": 1.4336470136354906, + "grad_norm": 28.726158142089844, + "learning_rate": 4.487564559182197e-06, + "loss": 0.3445, + "num_input_tokens_seen": 70464128, + "step": 22395 + }, + { + "epoch": 1.4339670955764676, + "grad_norm": 10.031693458557129, + "learning_rate": 4.487225644686027e-06, + "loss": 0.3425, + "num_input_tokens_seen": 70480192, + "step": 22400 + }, + { + "epoch": 1.4342871775174444, + "grad_norm": 15.069178581237793, + "learning_rate": 4.486886630958157e-06, + "loss": 0.3538, + "num_input_tokens_seen": 70495360, + "step": 22405 + }, + { + "epoch": 1.4346072594584214, + "grad_norm": 53.24327087402344, + "learning_rate": 4.486547518015519e-06, + "loss": 0.3122, + "num_input_tokens_seen": 70511232, + "step": 22410 + }, + { + "epoch": 1.4349273413993981, + "grad_norm": 15.3602933883667, + "learning_rate": 4.486208305875046e-06, + "loss": 0.2743, + "num_input_tokens_seen": 70527488, + "step": 22415 + }, + { + "epoch": 1.4352474233403751, + "grad_norm": 52.56821823120117, + "learning_rate": 4.4858689945536755e-06, + "loss": 0.3068, + "num_input_tokens_seen": 70542656, + "step": 22420 + }, + { + "epoch": 1.4355675052813521, + "grad_norm": 23.82716178894043, + "learning_rate": 4.4855295840683516e-06, + "loss": 0.2907, + "num_input_tokens_seen": 70557568, + "step": 22425 + }, + { + "epoch": 1.435887587222329, + "grad_norm": 33.698516845703125, + "learning_rate": 4.485190074436022e-06, + "loss": 0.2776, + "num_input_tokens_seen": 70574592, + "step": 22430 + }, + { + "epoch": 1.4362076691633059, + "grad_norm": 36.4338493347168, + "learning_rate": 4.484850465673641e-06, + "loss": 0.2727, + "num_input_tokens_seen": 70590784, + "step": 22435 + }, + { + "epoch": 1.4365277511042827, + "grad_norm": 11.60348892211914, + "learning_rate": 4.4845107577981676e-06, + "loss": 0.2752, + "num_input_tokens_seen": 70605248, + "step": 22440 + }, + { + "epoch": 1.4368478330452596, + "grad_norm": 44.71347427368164, + "learning_rate": 4.484170950826562e-06, + "loss": 0.4739, + "num_input_tokens_seen": 70622208, + "step": 22445 + }, + { + "epoch": 1.4371679149862364, + "grad_norm": 32.887996673583984, + "learning_rate": 4.483831044775795e-06, + "loss": 0.3522, + "num_input_tokens_seen": 70639680, + "step": 22450 + }, + { + "epoch": 1.4374879969272134, + "grad_norm": 19.21990394592285, + "learning_rate": 4.4834910396628394e-06, + "loss": 0.3858, + "num_input_tokens_seen": 70655296, + "step": 22455 + }, + { + "epoch": 1.4378080788681902, + "grad_norm": 20.281904220581055, + "learning_rate": 4.483150935504672e-06, + "loss": 0.3428, + "num_input_tokens_seen": 70671424, + "step": 22460 + }, + { + "epoch": 1.4381281608091672, + "grad_norm": 10.571027755737305, + "learning_rate": 4.482810732318278e-06, + "loss": 0.3458, + "num_input_tokens_seen": 70686528, + "step": 22465 + }, + { + "epoch": 1.4384482427501442, + "grad_norm": 55.29423522949219, + "learning_rate": 4.482470430120643e-06, + "loss": 0.4523, + "num_input_tokens_seen": 70703360, + "step": 22470 + }, + { + "epoch": 1.438768324691121, + "grad_norm": 63.38310241699219, + "learning_rate": 4.482130028928761e-06, + "loss": 0.4637, + "num_input_tokens_seen": 70718784, + "step": 22475 + }, + { + "epoch": 1.4390884066320977, + "grad_norm": 57.71052169799805, + "learning_rate": 4.481789528759631e-06, + "loss": 0.3014, + "num_input_tokens_seen": 70734976, + "step": 22480 + }, + { + "epoch": 1.4394084885730747, + "grad_norm": 19.281742095947266, + "learning_rate": 4.481448929630254e-06, + "loss": 0.2595, + "num_input_tokens_seen": 70752064, + "step": 22485 + }, + { + "epoch": 1.4397285705140517, + "grad_norm": 16.368343353271484, + "learning_rate": 4.481108231557639e-06, + "loss": 0.3057, + "num_input_tokens_seen": 70767040, + "step": 22490 + }, + { + "epoch": 1.4400486524550284, + "grad_norm": 15.885059356689453, + "learning_rate": 4.480767434558797e-06, + "loss": 0.3347, + "num_input_tokens_seen": 70782272, + "step": 22495 + }, + { + "epoch": 1.4403687343960054, + "grad_norm": 13.067845344543457, + "learning_rate": 4.480426538650747e-06, + "loss": 0.3547, + "num_input_tokens_seen": 70797760, + "step": 22500 + }, + { + "epoch": 1.4406888163369822, + "grad_norm": 26.655366897583008, + "learning_rate": 4.480085543850512e-06, + "loss": 0.1942, + "num_input_tokens_seen": 70813440, + "step": 22505 + }, + { + "epoch": 1.4410088982779592, + "grad_norm": 17.192930221557617, + "learning_rate": 4.479744450175119e-06, + "loss": 0.3961, + "num_input_tokens_seen": 70829248, + "step": 22510 + }, + { + "epoch": 1.4413289802189362, + "grad_norm": 48.81412124633789, + "learning_rate": 4.4794032576416005e-06, + "loss": 0.4574, + "num_input_tokens_seen": 70844800, + "step": 22515 + }, + { + "epoch": 1.441649062159913, + "grad_norm": 25.69742202758789, + "learning_rate": 4.479061966266992e-06, + "loss": 0.3219, + "num_input_tokens_seen": 70861056, + "step": 22520 + }, + { + "epoch": 1.4419691441008897, + "grad_norm": 33.81061935424805, + "learning_rate": 4.478720576068339e-06, + "loss": 0.3847, + "num_input_tokens_seen": 70876480, + "step": 22525 + }, + { + "epoch": 1.4422892260418667, + "grad_norm": 41.02249526977539, + "learning_rate": 4.478379087062687e-06, + "loss": 0.2795, + "num_input_tokens_seen": 70891840, + "step": 22530 + }, + { + "epoch": 1.4426093079828437, + "grad_norm": 79.7904052734375, + "learning_rate": 4.478037499267087e-06, + "loss": 0.4677, + "num_input_tokens_seen": 70907584, + "step": 22535 + }, + { + "epoch": 1.4429293899238205, + "grad_norm": 47.760780334472656, + "learning_rate": 4.477695812698599e-06, + "loss": 0.3079, + "num_input_tokens_seen": 70922496, + "step": 22540 + }, + { + "epoch": 1.4432494718647975, + "grad_norm": 58.69429397583008, + "learning_rate": 4.477354027374283e-06, + "loss": 0.3876, + "num_input_tokens_seen": 70938304, + "step": 22545 + }, + { + "epoch": 1.4435695538057742, + "grad_norm": 33.31800079345703, + "learning_rate": 4.477012143311207e-06, + "loss": 0.423, + "num_input_tokens_seen": 70953600, + "step": 22550 + }, + { + "epoch": 1.4438896357467512, + "grad_norm": 13.293316841125488, + "learning_rate": 4.476670160526442e-06, + "loss": 0.3551, + "num_input_tokens_seen": 70970240, + "step": 22555 + }, + { + "epoch": 1.444209717687728, + "grad_norm": 32.315067291259766, + "learning_rate": 4.476328079037065e-06, + "loss": 0.3449, + "num_input_tokens_seen": 70984512, + "step": 22560 + }, + { + "epoch": 1.444529799628705, + "grad_norm": 18.33697509765625, + "learning_rate": 4.475985898860158e-06, + "loss": 0.3704, + "num_input_tokens_seen": 70998976, + "step": 22565 + }, + { + "epoch": 1.4448498815696817, + "grad_norm": 12.786323547363281, + "learning_rate": 4.475643620012808e-06, + "loss": 0.3395, + "num_input_tokens_seen": 71014976, + "step": 22570 + }, + { + "epoch": 1.4451699635106587, + "grad_norm": 69.33946228027344, + "learning_rate": 4.475301242512107e-06, + "loss": 0.3526, + "num_input_tokens_seen": 71030464, + "step": 22575 + }, + { + "epoch": 1.4454900454516357, + "grad_norm": 33.68419647216797, + "learning_rate": 4.47495876637515e-06, + "loss": 0.3768, + "num_input_tokens_seen": 71045888, + "step": 22580 + }, + { + "epoch": 1.4458101273926125, + "grad_norm": 18.985923767089844, + "learning_rate": 4.474616191619041e-06, + "loss": 0.4105, + "num_input_tokens_seen": 71061248, + "step": 22585 + }, + { + "epoch": 1.4461302093335893, + "grad_norm": 41.770198822021484, + "learning_rate": 4.474273518260883e-06, + "loss": 0.3056, + "num_input_tokens_seen": 71076096, + "step": 22590 + }, + { + "epoch": 1.4464502912745663, + "grad_norm": 6.311476707458496, + "learning_rate": 4.47393074631779e-06, + "loss": 0.2822, + "num_input_tokens_seen": 71091776, + "step": 22595 + }, + { + "epoch": 1.4467703732155432, + "grad_norm": 11.181997299194336, + "learning_rate": 4.473587875806878e-06, + "loss": 0.3365, + "num_input_tokens_seen": 71108096, + "step": 22600 + }, + { + "epoch": 1.44709045515652, + "grad_norm": 10.327377319335938, + "learning_rate": 4.473244906745265e-06, + "loss": 0.3015, + "num_input_tokens_seen": 71123584, + "step": 22605 + }, + { + "epoch": 1.447410537097497, + "grad_norm": 13.253120422363281, + "learning_rate": 4.472901839150082e-06, + "loss": 0.3567, + "num_input_tokens_seen": 71140352, + "step": 22610 + }, + { + "epoch": 1.4477306190384738, + "grad_norm": 18.6685733795166, + "learning_rate": 4.472558673038457e-06, + "loss": 0.4354, + "num_input_tokens_seen": 71157376, + "step": 22615 + }, + { + "epoch": 1.4480507009794508, + "grad_norm": 36.22529983520508, + "learning_rate": 4.472215408427526e-06, + "loss": 0.3941, + "num_input_tokens_seen": 71173120, + "step": 22620 + }, + { + "epoch": 1.4483707829204278, + "grad_norm": 79.22407531738281, + "learning_rate": 4.471872045334431e-06, + "loss": 0.4154, + "num_input_tokens_seen": 71189632, + "step": 22625 + }, + { + "epoch": 1.4486908648614045, + "grad_norm": 27.364892959594727, + "learning_rate": 4.471528583776317e-06, + "loss": 0.4497, + "num_input_tokens_seen": 71205952, + "step": 22630 + }, + { + "epoch": 1.4490109468023813, + "grad_norm": 25.960596084594727, + "learning_rate": 4.471185023770335e-06, + "loss": 0.5939, + "num_input_tokens_seen": 71221696, + "step": 22635 + }, + { + "epoch": 1.4493310287433583, + "grad_norm": 12.009482383728027, + "learning_rate": 4.47084136533364e-06, + "loss": 0.3606, + "num_input_tokens_seen": 71236352, + "step": 22640 + }, + { + "epoch": 1.4496511106843353, + "grad_norm": 5.330964088439941, + "learning_rate": 4.470497608483393e-06, + "loss": 0.3271, + "num_input_tokens_seen": 71251136, + "step": 22645 + }, + { + "epoch": 1.449971192625312, + "grad_norm": 32.287696838378906, + "learning_rate": 4.47015375323676e-06, + "loss": 0.3462, + "num_input_tokens_seen": 71267008, + "step": 22650 + }, + { + "epoch": 1.450291274566289, + "grad_norm": 33.29390335083008, + "learning_rate": 4.46980979961091e-06, + "loss": 0.5195, + "num_input_tokens_seen": 71281536, + "step": 22655 + }, + { + "epoch": 1.4506113565072658, + "grad_norm": 13.032526969909668, + "learning_rate": 4.4694657476230195e-06, + "loss": 0.4385, + "num_input_tokens_seen": 71296768, + "step": 22660 + }, + { + "epoch": 1.4509314384482428, + "grad_norm": 11.154670715332031, + "learning_rate": 4.469121597290268e-06, + "loss": 0.346, + "num_input_tokens_seen": 71313216, + "step": 22665 + }, + { + "epoch": 1.4512515203892196, + "grad_norm": 28.21047019958496, + "learning_rate": 4.46877734862984e-06, + "loss": 0.3773, + "num_input_tokens_seen": 71328640, + "step": 22670 + }, + { + "epoch": 1.4515716023301966, + "grad_norm": 33.39168167114258, + "learning_rate": 4.468433001658927e-06, + "loss": 0.4061, + "num_input_tokens_seen": 71344000, + "step": 22675 + }, + { + "epoch": 1.4518916842711733, + "grad_norm": 37.27350616455078, + "learning_rate": 4.468088556394723e-06, + "loss": 0.2994, + "num_input_tokens_seen": 71360768, + "step": 22680 + }, + { + "epoch": 1.4522117662121503, + "grad_norm": 11.822672843933105, + "learning_rate": 4.467744012854428e-06, + "loss": 0.31, + "num_input_tokens_seen": 71376576, + "step": 22685 + }, + { + "epoch": 1.4525318481531273, + "grad_norm": 19.628067016601562, + "learning_rate": 4.467399371055246e-06, + "loss": 0.4804, + "num_input_tokens_seen": 71391488, + "step": 22690 + }, + { + "epoch": 1.452851930094104, + "grad_norm": 11.551011085510254, + "learning_rate": 4.467054631014388e-06, + "loss": 0.4239, + "num_input_tokens_seen": 71406912, + "step": 22695 + }, + { + "epoch": 1.453172012035081, + "grad_norm": 11.649160385131836, + "learning_rate": 4.4667097927490675e-06, + "loss": 0.3706, + "num_input_tokens_seen": 71422144, + "step": 22700 + }, + { + "epoch": 1.4534920939760578, + "grad_norm": 21.440401077270508, + "learning_rate": 4.4663648562765034e-06, + "loss": 0.4073, + "num_input_tokens_seen": 71438080, + "step": 22705 + }, + { + "epoch": 1.4538121759170348, + "grad_norm": 17.449464797973633, + "learning_rate": 4.466019821613923e-06, + "loss": 0.3904, + "num_input_tokens_seen": 71453312, + "step": 22710 + }, + { + "epoch": 1.4541322578580116, + "grad_norm": 9.899881362915039, + "learning_rate": 4.46567468877855e-06, + "loss": 0.302, + "num_input_tokens_seen": 71467776, + "step": 22715 + }, + { + "epoch": 1.4544523397989886, + "grad_norm": 10.382338523864746, + "learning_rate": 4.4653294577876245e-06, + "loss": 0.2381, + "num_input_tokens_seen": 71482496, + "step": 22720 + }, + { + "epoch": 1.4547724217399653, + "grad_norm": 19.170644760131836, + "learning_rate": 4.464984128658382e-06, + "loss": 0.3746, + "num_input_tokens_seen": 71496960, + "step": 22725 + }, + { + "epoch": 1.4550925036809423, + "grad_norm": 34.654239654541016, + "learning_rate": 4.4646387014080675e-06, + "loss": 0.4394, + "num_input_tokens_seen": 71512768, + "step": 22730 + }, + { + "epoch": 1.4554125856219193, + "grad_norm": 18.298328399658203, + "learning_rate": 4.46429317605393e-06, + "loss": 0.3005, + "num_input_tokens_seen": 71529344, + "step": 22735 + }, + { + "epoch": 1.455732667562896, + "grad_norm": 17.372774124145508, + "learning_rate": 4.463947552613222e-06, + "loss": 0.3661, + "num_input_tokens_seen": 71544576, + "step": 22740 + }, + { + "epoch": 1.4560527495038729, + "grad_norm": 12.262580871582031, + "learning_rate": 4.463601831103204e-06, + "loss": 0.2361, + "num_input_tokens_seen": 71559488, + "step": 22745 + }, + { + "epoch": 1.4563728314448499, + "grad_norm": 34.42842102050781, + "learning_rate": 4.463256011541138e-06, + "loss": 0.3353, + "num_input_tokens_seen": 71575424, + "step": 22750 + }, + { + "epoch": 1.4566929133858268, + "grad_norm": 16.364269256591797, + "learning_rate": 4.462910093944293e-06, + "loss": 0.2346, + "num_input_tokens_seen": 71590720, + "step": 22755 + }, + { + "epoch": 1.4570129953268036, + "grad_norm": 28.059993743896484, + "learning_rate": 4.462564078329942e-06, + "loss": 0.6359, + "num_input_tokens_seen": 71605696, + "step": 22760 + }, + { + "epoch": 1.4573330772677806, + "grad_norm": 15.500569343566895, + "learning_rate": 4.462217964715364e-06, + "loss": 0.4651, + "num_input_tokens_seen": 71620672, + "step": 22765 + }, + { + "epoch": 1.4576531592087574, + "grad_norm": 17.291946411132812, + "learning_rate": 4.4618717531178425e-06, + "loss": 0.3109, + "num_input_tokens_seen": 71637184, + "step": 22770 + }, + { + "epoch": 1.4579732411497344, + "grad_norm": 42.98705291748047, + "learning_rate": 4.461525443554663e-06, + "loss": 0.2394, + "num_input_tokens_seen": 71652480, + "step": 22775 + }, + { + "epoch": 1.4582933230907114, + "grad_norm": 13.084677696228027, + "learning_rate": 4.461179036043122e-06, + "loss": 0.2355, + "num_input_tokens_seen": 71668096, + "step": 22780 + }, + { + "epoch": 1.4586134050316881, + "grad_norm": 15.360008239746094, + "learning_rate": 4.460832530600514e-06, + "loss": 0.4601, + "num_input_tokens_seen": 71682560, + "step": 22785 + }, + { + "epoch": 1.458933486972665, + "grad_norm": 31.329017639160156, + "learning_rate": 4.460485927244142e-06, + "loss": 0.2997, + "num_input_tokens_seen": 71697920, + "step": 22790 + }, + { + "epoch": 1.4592535689136419, + "grad_norm": 46.12474822998047, + "learning_rate": 4.460139225991317e-06, + "loss": 0.2865, + "num_input_tokens_seen": 71713280, + "step": 22795 + }, + { + "epoch": 1.4595736508546189, + "grad_norm": 17.709980010986328, + "learning_rate": 4.4597924268593474e-06, + "loss": 0.3617, + "num_input_tokens_seen": 71729088, + "step": 22800 + }, + { + "epoch": 1.4598937327955956, + "grad_norm": 11.281773567199707, + "learning_rate": 4.459445529865553e-06, + "loss": 0.3602, + "num_input_tokens_seen": 71744896, + "step": 22805 + }, + { + "epoch": 1.4602138147365726, + "grad_norm": 40.30482482910156, + "learning_rate": 4.4590985350272555e-06, + "loss": 0.329, + "num_input_tokens_seen": 71759936, + "step": 22810 + }, + { + "epoch": 1.4605338966775494, + "grad_norm": 19.596086502075195, + "learning_rate": 4.458751442361781e-06, + "loss": 0.354, + "num_input_tokens_seen": 71775552, + "step": 22815 + }, + { + "epoch": 1.4608539786185264, + "grad_norm": 35.25666046142578, + "learning_rate": 4.458404251886462e-06, + "loss": 0.2664, + "num_input_tokens_seen": 71791936, + "step": 22820 + }, + { + "epoch": 1.4611740605595032, + "grad_norm": 26.760454177856445, + "learning_rate": 4.458056963618636e-06, + "loss": 0.4964, + "num_input_tokens_seen": 71807552, + "step": 22825 + }, + { + "epoch": 1.4614941425004802, + "grad_norm": 18.172645568847656, + "learning_rate": 4.4577095775756445e-06, + "loss": 0.3495, + "num_input_tokens_seen": 71823360, + "step": 22830 + }, + { + "epoch": 1.461814224441457, + "grad_norm": 22.10906410217285, + "learning_rate": 4.457362093774834e-06, + "loss": 0.2641, + "num_input_tokens_seen": 71839104, + "step": 22835 + }, + { + "epoch": 1.462134306382434, + "grad_norm": 16.465015411376953, + "learning_rate": 4.457014512233557e-06, + "loss": 0.2915, + "num_input_tokens_seen": 71854592, + "step": 22840 + }, + { + "epoch": 1.462454388323411, + "grad_norm": 63.256507873535156, + "learning_rate": 4.456666832969167e-06, + "loss": 0.2737, + "num_input_tokens_seen": 71870592, + "step": 22845 + }, + { + "epoch": 1.4627744702643877, + "grad_norm": 25.856060028076172, + "learning_rate": 4.456319055999027e-06, + "loss": 0.4396, + "num_input_tokens_seen": 71888832, + "step": 22850 + }, + { + "epoch": 1.4630945522053644, + "grad_norm": 24.5388126373291, + "learning_rate": 4.455971181340506e-06, + "loss": 0.3411, + "num_input_tokens_seen": 71903808, + "step": 22855 + }, + { + "epoch": 1.4634146341463414, + "grad_norm": 16.45661735534668, + "learning_rate": 4.455623209010971e-06, + "loss": 0.4472, + "num_input_tokens_seen": 71921792, + "step": 22860 + }, + { + "epoch": 1.4637347160873184, + "grad_norm": 26.06262969970703, + "learning_rate": 4.4552751390278e-06, + "loss": 0.4584, + "num_input_tokens_seen": 71937088, + "step": 22865 + }, + { + "epoch": 1.4640547980282952, + "grad_norm": 10.5672607421875, + "learning_rate": 4.454926971408372e-06, + "loss": 0.2597, + "num_input_tokens_seen": 71952256, + "step": 22870 + }, + { + "epoch": 1.4643748799692722, + "grad_norm": 21.144912719726562, + "learning_rate": 4.454578706170075e-06, + "loss": 0.3663, + "num_input_tokens_seen": 71967872, + "step": 22875 + }, + { + "epoch": 1.464694961910249, + "grad_norm": 33.33611297607422, + "learning_rate": 4.454230343330298e-06, + "loss": 0.231, + "num_input_tokens_seen": 71982912, + "step": 22880 + }, + { + "epoch": 1.465015043851226, + "grad_norm": 18.130268096923828, + "learning_rate": 4.453881882906437e-06, + "loss": 0.3808, + "num_input_tokens_seen": 71999424, + "step": 22885 + }, + { + "epoch": 1.465335125792203, + "grad_norm": 28.929317474365234, + "learning_rate": 4.453533324915892e-06, + "loss": 0.3723, + "num_input_tokens_seen": 72016512, + "step": 22890 + }, + { + "epoch": 1.4656552077331797, + "grad_norm": 48.82442855834961, + "learning_rate": 4.453184669376068e-06, + "loss": 0.3125, + "num_input_tokens_seen": 72031744, + "step": 22895 + }, + { + "epoch": 1.4659752896741565, + "grad_norm": 11.38783073425293, + "learning_rate": 4.4528359163043764e-06, + "loss": 0.2324, + "num_input_tokens_seen": 72047488, + "step": 22900 + }, + { + "epoch": 1.4662953716151335, + "grad_norm": 20.664701461791992, + "learning_rate": 4.452487065718231e-06, + "loss": 0.3736, + "num_input_tokens_seen": 72063296, + "step": 22905 + }, + { + "epoch": 1.4666154535561104, + "grad_norm": 11.229755401611328, + "learning_rate": 4.452138117635051e-06, + "loss": 0.438, + "num_input_tokens_seen": 72079104, + "step": 22910 + }, + { + "epoch": 1.4669355354970872, + "grad_norm": 11.823142051696777, + "learning_rate": 4.4517890720722625e-06, + "loss": 0.3682, + "num_input_tokens_seen": 72094400, + "step": 22915 + }, + { + "epoch": 1.4672556174380642, + "grad_norm": 71.6689453125, + "learning_rate": 4.451439929047293e-06, + "loss": 0.4157, + "num_input_tokens_seen": 72109760, + "step": 22920 + }, + { + "epoch": 1.467575699379041, + "grad_norm": 40.437225341796875, + "learning_rate": 4.451090688577579e-06, + "loss": 0.4526, + "num_input_tokens_seen": 72126784, + "step": 22925 + }, + { + "epoch": 1.467895781320018, + "grad_norm": 40.49624252319336, + "learning_rate": 4.450741350680559e-06, + "loss": 0.2457, + "num_input_tokens_seen": 72142272, + "step": 22930 + }, + { + "epoch": 1.4682158632609947, + "grad_norm": 20.223604202270508, + "learning_rate": 4.450391915373676e-06, + "loss": 0.4563, + "num_input_tokens_seen": 72158208, + "step": 22935 + }, + { + "epoch": 1.4685359452019717, + "grad_norm": 9.3389310836792, + "learning_rate": 4.450042382674381e-06, + "loss": 0.3258, + "num_input_tokens_seen": 72173376, + "step": 22940 + }, + { + "epoch": 1.4688560271429485, + "grad_norm": 20.708282470703125, + "learning_rate": 4.449692752600127e-06, + "loss": 0.2837, + "num_input_tokens_seen": 72188480, + "step": 22945 + }, + { + "epoch": 1.4691761090839255, + "grad_norm": 49.738975524902344, + "learning_rate": 4.449343025168371e-06, + "loss": 0.3976, + "num_input_tokens_seen": 72205056, + "step": 22950 + }, + { + "epoch": 1.4694961910249025, + "grad_norm": 25.993412017822266, + "learning_rate": 4.44899320039658e-06, + "loss": 0.3797, + "num_input_tokens_seen": 72220160, + "step": 22955 + }, + { + "epoch": 1.4698162729658792, + "grad_norm": 20.504215240478516, + "learning_rate": 4.448643278302219e-06, + "loss": 0.3993, + "num_input_tokens_seen": 72234688, + "step": 22960 + }, + { + "epoch": 1.4701363549068562, + "grad_norm": 12.066117286682129, + "learning_rate": 4.448293258902764e-06, + "loss": 0.34, + "num_input_tokens_seen": 72249856, + "step": 22965 + }, + { + "epoch": 1.470456436847833, + "grad_norm": 31.7047176361084, + "learning_rate": 4.44794314221569e-06, + "loss": 0.2964, + "num_input_tokens_seen": 72266048, + "step": 22970 + }, + { + "epoch": 1.47077651878881, + "grad_norm": 21.611604690551758, + "learning_rate": 4.447592928258484e-06, + "loss": 0.3189, + "num_input_tokens_seen": 72280960, + "step": 22975 + }, + { + "epoch": 1.4710966007297868, + "grad_norm": 5.169671535491943, + "learning_rate": 4.44724261704863e-06, + "loss": 0.3027, + "num_input_tokens_seen": 72298304, + "step": 22980 + }, + { + "epoch": 1.4714166826707638, + "grad_norm": 22.978710174560547, + "learning_rate": 4.446892208603625e-06, + "loss": 0.4437, + "num_input_tokens_seen": 72314944, + "step": 22985 + }, + { + "epoch": 1.4717367646117405, + "grad_norm": 29.39580535888672, + "learning_rate": 4.446541702940962e-06, + "loss": 0.3845, + "num_input_tokens_seen": 72330880, + "step": 22990 + }, + { + "epoch": 1.4720568465527175, + "grad_norm": 43.408809661865234, + "learning_rate": 4.446191100078147e-06, + "loss": 0.3411, + "num_input_tokens_seen": 72345536, + "step": 22995 + }, + { + "epoch": 1.4723769284936945, + "grad_norm": 18.36666488647461, + "learning_rate": 4.445840400032684e-06, + "loss": 0.2296, + "num_input_tokens_seen": 72361408, + "step": 23000 + }, + { + "epoch": 1.4726970104346713, + "grad_norm": 25.42845344543457, + "learning_rate": 4.445489602822088e-06, + "loss": 0.4182, + "num_input_tokens_seen": 72377216, + "step": 23005 + }, + { + "epoch": 1.473017092375648, + "grad_norm": 9.104423522949219, + "learning_rate": 4.445138708463875e-06, + "loss": 0.3062, + "num_input_tokens_seen": 72393536, + "step": 23010 + }, + { + "epoch": 1.473337174316625, + "grad_norm": 31.65671157836914, + "learning_rate": 4.444787716975567e-06, + "loss": 0.2834, + "num_input_tokens_seen": 72408896, + "step": 23015 + }, + { + "epoch": 1.473657256257602, + "grad_norm": 26.524961471557617, + "learning_rate": 4.44443662837469e-06, + "loss": 0.4575, + "num_input_tokens_seen": 72424896, + "step": 23020 + }, + { + "epoch": 1.4739773381985788, + "grad_norm": 66.15751647949219, + "learning_rate": 4.444085442678776e-06, + "loss": 0.3044, + "num_input_tokens_seen": 72440384, + "step": 23025 + }, + { + "epoch": 1.4742974201395558, + "grad_norm": 30.888790130615234, + "learning_rate": 4.443734159905361e-06, + "loss": 0.3461, + "num_input_tokens_seen": 72455040, + "step": 23030 + }, + { + "epoch": 1.4746175020805325, + "grad_norm": 31.540544509887695, + "learning_rate": 4.443382780071987e-06, + "loss": 0.3132, + "num_input_tokens_seen": 72470912, + "step": 23035 + }, + { + "epoch": 1.4749375840215095, + "grad_norm": 32.862972259521484, + "learning_rate": 4.443031303196199e-06, + "loss": 0.4875, + "num_input_tokens_seen": 72486464, + "step": 23040 + }, + { + "epoch": 1.4752576659624865, + "grad_norm": 21.321603775024414, + "learning_rate": 4.4426797292955504e-06, + "loss": 0.3707, + "num_input_tokens_seen": 72501440, + "step": 23045 + }, + { + "epoch": 1.4755777479034633, + "grad_norm": 12.592437744140625, + "learning_rate": 4.4423280583875935e-06, + "loss": 0.2885, + "num_input_tokens_seen": 72516480, + "step": 23050 + }, + { + "epoch": 1.47589782984444, + "grad_norm": 32.07666015625, + "learning_rate": 4.441976290489891e-06, + "loss": 0.42, + "num_input_tokens_seen": 72532608, + "step": 23055 + }, + { + "epoch": 1.476217911785417, + "grad_norm": 21.141199111938477, + "learning_rate": 4.441624425620008e-06, + "loss": 0.3192, + "num_input_tokens_seen": 72548480, + "step": 23060 + }, + { + "epoch": 1.476537993726394, + "grad_norm": 26.900009155273438, + "learning_rate": 4.4412724637955154e-06, + "loss": 0.3426, + "num_input_tokens_seen": 72564800, + "step": 23065 + }, + { + "epoch": 1.4768580756673708, + "grad_norm": 32.66264343261719, + "learning_rate": 4.440920405033987e-06, + "loss": 0.3251, + "num_input_tokens_seen": 72580672, + "step": 23070 + }, + { + "epoch": 1.4771781576083478, + "grad_norm": 13.83929443359375, + "learning_rate": 4.440568249353003e-06, + "loss": 0.2693, + "num_input_tokens_seen": 72596736, + "step": 23075 + }, + { + "epoch": 1.4774982395493246, + "grad_norm": 13.587257385253906, + "learning_rate": 4.44021599677015e-06, + "loss": 0.2994, + "num_input_tokens_seen": 72615296, + "step": 23080 + }, + { + "epoch": 1.4778183214903016, + "grad_norm": 17.42547607421875, + "learning_rate": 4.439863647303015e-06, + "loss": 0.3802, + "num_input_tokens_seen": 72630144, + "step": 23085 + }, + { + "epoch": 1.4781384034312783, + "grad_norm": 20.96609878540039, + "learning_rate": 4.439511200969194e-06, + "loss": 0.2283, + "num_input_tokens_seen": 72645440, + "step": 23090 + }, + { + "epoch": 1.4784584853722553, + "grad_norm": 44.32087326049805, + "learning_rate": 4.439158657786286e-06, + "loss": 0.4254, + "num_input_tokens_seen": 72660480, + "step": 23095 + }, + { + "epoch": 1.478778567313232, + "grad_norm": 12.046208381652832, + "learning_rate": 4.438806017771897e-06, + "loss": 0.3099, + "num_input_tokens_seen": 72677696, + "step": 23100 + }, + { + "epoch": 1.479098649254209, + "grad_norm": 12.516480445861816, + "learning_rate": 4.438453280943633e-06, + "loss": 0.3439, + "num_input_tokens_seen": 72694912, + "step": 23105 + }, + { + "epoch": 1.479418731195186, + "grad_norm": 34.57258605957031, + "learning_rate": 4.438100447319109e-06, + "loss": 0.4268, + "num_input_tokens_seen": 72709440, + "step": 23110 + }, + { + "epoch": 1.4797388131361628, + "grad_norm": 16.114316940307617, + "learning_rate": 4.437747516915945e-06, + "loss": 0.3321, + "num_input_tokens_seen": 72724416, + "step": 23115 + }, + { + "epoch": 1.4800588950771396, + "grad_norm": 31.17903709411621, + "learning_rate": 4.437394489751762e-06, + "loss": 0.3163, + "num_input_tokens_seen": 72738560, + "step": 23120 + }, + { + "epoch": 1.4803789770181166, + "grad_norm": 9.3411865234375, + "learning_rate": 4.43704136584419e-06, + "loss": 0.29, + "num_input_tokens_seen": 72753536, + "step": 23125 + }, + { + "epoch": 1.4806990589590936, + "grad_norm": 17.809537887573242, + "learning_rate": 4.436688145210862e-06, + "loss": 0.4311, + "num_input_tokens_seen": 72769728, + "step": 23130 + }, + { + "epoch": 1.4810191409000704, + "grad_norm": 11.883620262145996, + "learning_rate": 4.436334827869416e-06, + "loss": 0.2633, + "num_input_tokens_seen": 72784448, + "step": 23135 + }, + { + "epoch": 1.4813392228410474, + "grad_norm": 17.93930435180664, + "learning_rate": 4.435981413837495e-06, + "loss": 0.3225, + "num_input_tokens_seen": 72800384, + "step": 23140 + }, + { + "epoch": 1.4816593047820241, + "grad_norm": 30.481740951538086, + "learning_rate": 4.435627903132747e-06, + "loss": 0.3205, + "num_input_tokens_seen": 72816064, + "step": 23145 + }, + { + "epoch": 1.4819793867230011, + "grad_norm": 17.1138916015625, + "learning_rate": 4.435274295772822e-06, + "loss": 0.3254, + "num_input_tokens_seen": 72832960, + "step": 23150 + }, + { + "epoch": 1.482299468663978, + "grad_norm": 51.17075729370117, + "learning_rate": 4.43492059177538e-06, + "loss": 0.4106, + "num_input_tokens_seen": 72847424, + "step": 23155 + }, + { + "epoch": 1.4826195506049549, + "grad_norm": 23.02770233154297, + "learning_rate": 4.434566791158083e-06, + "loss": 0.4817, + "num_input_tokens_seen": 72863168, + "step": 23160 + }, + { + "epoch": 1.4829396325459316, + "grad_norm": 30.340328216552734, + "learning_rate": 4.434212893938596e-06, + "loss": 0.2532, + "num_input_tokens_seen": 72878592, + "step": 23165 + }, + { + "epoch": 1.4832597144869086, + "grad_norm": 5.69491720199585, + "learning_rate": 4.433858900134593e-06, + "loss": 0.3743, + "num_input_tokens_seen": 72894016, + "step": 23170 + }, + { + "epoch": 1.4835797964278856, + "grad_norm": 14.190735816955566, + "learning_rate": 4.433504809763749e-06, + "loss": 0.4044, + "num_input_tokens_seen": 72912000, + "step": 23175 + }, + { + "epoch": 1.4838998783688624, + "grad_norm": 25.580305099487305, + "learning_rate": 4.4331506228437465e-06, + "loss": 0.2907, + "num_input_tokens_seen": 72928128, + "step": 23180 + }, + { + "epoch": 1.4842199603098394, + "grad_norm": 40.71619415283203, + "learning_rate": 4.432796339392272e-06, + "loss": 0.3626, + "num_input_tokens_seen": 72945024, + "step": 23185 + }, + { + "epoch": 1.4845400422508161, + "grad_norm": 9.838669776916504, + "learning_rate": 4.432441959427014e-06, + "loss": 0.3614, + "num_input_tokens_seen": 72961216, + "step": 23190 + }, + { + "epoch": 1.4848601241917931, + "grad_norm": 16.866193771362305, + "learning_rate": 4.432087482965671e-06, + "loss": 0.333, + "num_input_tokens_seen": 72976640, + "step": 23195 + }, + { + "epoch": 1.48518020613277, + "grad_norm": 45.02706527709961, + "learning_rate": 4.431732910025943e-06, + "loss": 0.2421, + "num_input_tokens_seen": 72992256, + "step": 23200 + }, + { + "epoch": 1.485500288073747, + "grad_norm": 32.59799575805664, + "learning_rate": 4.431378240625537e-06, + "loss": 0.3324, + "num_input_tokens_seen": 73007936, + "step": 23205 + }, + { + "epoch": 1.4858203700147237, + "grad_norm": 36.981441497802734, + "learning_rate": 4.43102347478216e-06, + "loss": 0.3451, + "num_input_tokens_seen": 73023808, + "step": 23210 + }, + { + "epoch": 1.4861404519557007, + "grad_norm": 7.7920966148376465, + "learning_rate": 4.4306686125135305e-06, + "loss": 0.2459, + "num_input_tokens_seen": 73038464, + "step": 23215 + }, + { + "epoch": 1.4864605338966776, + "grad_norm": 36.0333366394043, + "learning_rate": 4.430313653837365e-06, + "loss": 0.2915, + "num_input_tokens_seen": 73055104, + "step": 23220 + }, + { + "epoch": 1.4867806158376544, + "grad_norm": 30.05455207824707, + "learning_rate": 4.429958598771392e-06, + "loss": 0.3593, + "num_input_tokens_seen": 73070336, + "step": 23225 + }, + { + "epoch": 1.4871006977786314, + "grad_norm": 22.463314056396484, + "learning_rate": 4.429603447333338e-06, + "loss": 0.2923, + "num_input_tokens_seen": 73086912, + "step": 23230 + }, + { + "epoch": 1.4874207797196082, + "grad_norm": 17.10938835144043, + "learning_rate": 4.42924819954094e-06, + "loss": 0.3545, + "num_input_tokens_seen": 73103616, + "step": 23235 + }, + { + "epoch": 1.4877408616605852, + "grad_norm": 50.807777404785156, + "learning_rate": 4.428892855411935e-06, + "loss": 0.3, + "num_input_tokens_seen": 73119872, + "step": 23240 + }, + { + "epoch": 1.488060943601562, + "grad_norm": 11.861859321594238, + "learning_rate": 4.428537414964069e-06, + "loss": 0.3219, + "num_input_tokens_seen": 73136192, + "step": 23245 + }, + { + "epoch": 1.488381025542539, + "grad_norm": 20.309370040893555, + "learning_rate": 4.4281818782150895e-06, + "loss": 0.3172, + "num_input_tokens_seen": 73151744, + "step": 23250 + }, + { + "epoch": 1.4887011074835157, + "grad_norm": 23.42339324951172, + "learning_rate": 4.42782624518275e-06, + "loss": 0.3102, + "num_input_tokens_seen": 73166656, + "step": 23255 + }, + { + "epoch": 1.4890211894244927, + "grad_norm": 20.636646270751953, + "learning_rate": 4.42747051588481e-06, + "loss": 0.2958, + "num_input_tokens_seen": 73181248, + "step": 23260 + }, + { + "epoch": 1.4893412713654697, + "grad_norm": 22.604646682739258, + "learning_rate": 4.427114690339032e-06, + "loss": 0.3511, + "num_input_tokens_seen": 73196608, + "step": 23265 + }, + { + "epoch": 1.4896613533064464, + "grad_norm": 11.145572662353516, + "learning_rate": 4.4267587685631855e-06, + "loss": 0.3076, + "num_input_tokens_seen": 73212096, + "step": 23270 + }, + { + "epoch": 1.4899814352474232, + "grad_norm": 17.48297882080078, + "learning_rate": 4.426402750575041e-06, + "loss": 0.3852, + "num_input_tokens_seen": 73229376, + "step": 23275 + }, + { + "epoch": 1.4903015171884002, + "grad_norm": 11.029443740844727, + "learning_rate": 4.426046636392377e-06, + "loss": 0.292, + "num_input_tokens_seen": 73244608, + "step": 23280 + }, + { + "epoch": 1.4906215991293772, + "grad_norm": 11.027935028076172, + "learning_rate": 4.425690426032977e-06, + "loss": 0.3003, + "num_input_tokens_seen": 73260864, + "step": 23285 + }, + { + "epoch": 1.490941681070354, + "grad_norm": 53.00083541870117, + "learning_rate": 4.425334119514628e-06, + "loss": 0.3578, + "num_input_tokens_seen": 73277568, + "step": 23290 + }, + { + "epoch": 1.491261763011331, + "grad_norm": 21.839048385620117, + "learning_rate": 4.424977716855121e-06, + "loss": 0.3293, + "num_input_tokens_seen": 73294208, + "step": 23295 + }, + { + "epoch": 1.4915818449523077, + "grad_norm": 22.235116958618164, + "learning_rate": 4.424621218072255e-06, + "loss": 0.42, + "num_input_tokens_seen": 73309120, + "step": 23300 + }, + { + "epoch": 1.4919019268932847, + "grad_norm": 19.320228576660156, + "learning_rate": 4.424264623183829e-06, + "loss": 0.4557, + "num_input_tokens_seen": 73324288, + "step": 23305 + }, + { + "epoch": 1.4922220088342617, + "grad_norm": 10.38984489440918, + "learning_rate": 4.4239079322076515e-06, + "loss": 0.3435, + "num_input_tokens_seen": 73340736, + "step": 23310 + }, + { + "epoch": 1.4925420907752385, + "grad_norm": 20.57088279724121, + "learning_rate": 4.423551145161533e-06, + "loss": 0.2707, + "num_input_tokens_seen": 73355456, + "step": 23315 + }, + { + "epoch": 1.4928621727162152, + "grad_norm": 33.98820114135742, + "learning_rate": 4.4231942620632895e-06, + "loss": 0.2235, + "num_input_tokens_seen": 73370816, + "step": 23320 + }, + { + "epoch": 1.4931822546571922, + "grad_norm": 16.234567642211914, + "learning_rate": 4.422837282930743e-06, + "loss": 0.2683, + "num_input_tokens_seen": 73385536, + "step": 23325 + }, + { + "epoch": 1.4935023365981692, + "grad_norm": 8.324563026428223, + "learning_rate": 4.422480207781718e-06, + "loss": 0.3443, + "num_input_tokens_seen": 73400576, + "step": 23330 + }, + { + "epoch": 1.493822418539146, + "grad_norm": 19.88330841064453, + "learning_rate": 4.422123036634044e-06, + "loss": 0.3525, + "num_input_tokens_seen": 73415296, + "step": 23335 + }, + { + "epoch": 1.494142500480123, + "grad_norm": 18.48143196105957, + "learning_rate": 4.421765769505559e-06, + "loss": 0.3324, + "num_input_tokens_seen": 73431232, + "step": 23340 + }, + { + "epoch": 1.4944625824210998, + "grad_norm": 20.27999496459961, + "learning_rate": 4.421408406414101e-06, + "loss": 0.3936, + "num_input_tokens_seen": 73446336, + "step": 23345 + }, + { + "epoch": 1.4947826643620767, + "grad_norm": 65.13218688964844, + "learning_rate": 4.421050947377515e-06, + "loss": 0.4062, + "num_input_tokens_seen": 73461440, + "step": 23350 + }, + { + "epoch": 1.4951027463030535, + "grad_norm": 25.740787506103516, + "learning_rate": 4.4206933924136515e-06, + "loss": 0.3053, + "num_input_tokens_seen": 73476992, + "step": 23355 + }, + { + "epoch": 1.4954228282440305, + "grad_norm": 28.882522583007812, + "learning_rate": 4.4203357415403645e-06, + "loss": 0.395, + "num_input_tokens_seen": 73494208, + "step": 23360 + }, + { + "epoch": 1.4957429101850073, + "grad_norm": 48.68393325805664, + "learning_rate": 4.419977994775513e-06, + "loss": 0.3168, + "num_input_tokens_seen": 73509760, + "step": 23365 + }, + { + "epoch": 1.4960629921259843, + "grad_norm": 15.120891571044922, + "learning_rate": 4.419620152136962e-06, + "loss": 0.211, + "num_input_tokens_seen": 73525568, + "step": 23370 + }, + { + "epoch": 1.4963830740669612, + "grad_norm": 14.928274154663086, + "learning_rate": 4.419262213642579e-06, + "loss": 0.3046, + "num_input_tokens_seen": 73541824, + "step": 23375 + }, + { + "epoch": 1.496703156007938, + "grad_norm": 17.355607986450195, + "learning_rate": 4.418904179310238e-06, + "loss": 0.299, + "num_input_tokens_seen": 73558080, + "step": 23380 + }, + { + "epoch": 1.4970232379489148, + "grad_norm": 23.99725341796875, + "learning_rate": 4.418546049157817e-06, + "loss": 0.3257, + "num_input_tokens_seen": 73575232, + "step": 23385 + }, + { + "epoch": 1.4973433198898918, + "grad_norm": 23.53554916381836, + "learning_rate": 4.4181878232032005e-06, + "loss": 0.3447, + "num_input_tokens_seen": 73590720, + "step": 23390 + }, + { + "epoch": 1.4976634018308688, + "grad_norm": 39.822689056396484, + "learning_rate": 4.417829501464276e-06, + "loss": 0.513, + "num_input_tokens_seen": 73606336, + "step": 23395 + }, + { + "epoch": 1.4979834837718455, + "grad_norm": 20.32220458984375, + "learning_rate": 4.417471083958935e-06, + "loss": 0.4223, + "num_input_tokens_seen": 73622336, + "step": 23400 + }, + { + "epoch": 1.4983035657128225, + "grad_norm": 81.96637725830078, + "learning_rate": 4.417112570705075e-06, + "loss": 0.471, + "num_input_tokens_seen": 73640320, + "step": 23405 + }, + { + "epoch": 1.4986236476537993, + "grad_norm": 50.47869873046875, + "learning_rate": 4.4167539617206005e-06, + "loss": 0.4929, + "num_input_tokens_seen": 73656832, + "step": 23410 + }, + { + "epoch": 1.4989437295947763, + "grad_norm": 10.903700828552246, + "learning_rate": 4.416395257023417e-06, + "loss": 0.3796, + "num_input_tokens_seen": 73672448, + "step": 23415 + }, + { + "epoch": 1.4992638115357533, + "grad_norm": 25.99533462524414, + "learning_rate": 4.416036456631436e-06, + "loss": 0.3427, + "num_input_tokens_seen": 73689600, + "step": 23420 + }, + { + "epoch": 1.49958389347673, + "grad_norm": 34.742637634277344, + "learning_rate": 4.415677560562575e-06, + "loss": 0.3204, + "num_input_tokens_seen": 73705152, + "step": 23425 + }, + { + "epoch": 1.4999039754177068, + "grad_norm": 24.663631439208984, + "learning_rate": 4.4153185688347555e-06, + "loss": 0.3287, + "num_input_tokens_seen": 73720512, + "step": 23430 + }, + { + "epoch": 1.5002240573586838, + "grad_norm": 8.575324058532715, + "learning_rate": 4.414959481465902e-06, + "loss": 0.2897, + "num_input_tokens_seen": 73736704, + "step": 23435 + }, + { + "epoch": 1.5002880737468791, + "eval_loss": 0.4805953800678253, + "eval_runtime": 51.0043, + "eval_samples_per_second": 272.252, + "eval_steps_per_second": 34.036, + "num_input_tokens_seen": 73739776, + "step": 23436 + }, + { + "epoch": 1.5005441392996608, + "grad_norm": 28.710079193115234, + "learning_rate": 4.414600298473947e-06, + "loss": 0.4865, + "num_input_tokens_seen": 73752704, + "step": 23440 + }, + { + "epoch": 1.5008642212406376, + "grad_norm": 46.61126708984375, + "learning_rate": 4.414241019876826e-06, + "loss": 0.3438, + "num_input_tokens_seen": 73768896, + "step": 23445 + }, + { + "epoch": 1.5011843031816143, + "grad_norm": 16.880874633789062, + "learning_rate": 4.4138816456924795e-06, + "loss": 0.3562, + "num_input_tokens_seen": 73783744, + "step": 23450 + }, + { + "epoch": 1.5015043851225913, + "grad_norm": 19.13260841369629, + "learning_rate": 4.413522175938854e-06, + "loss": 0.2767, + "num_input_tokens_seen": 73798784, + "step": 23455 + }, + { + "epoch": 1.5018244670635683, + "grad_norm": 17.25163459777832, + "learning_rate": 4.413162610633896e-06, + "loss": 0.2411, + "num_input_tokens_seen": 73815424, + "step": 23460 + }, + { + "epoch": 1.5021445490045453, + "grad_norm": 50.61468505859375, + "learning_rate": 4.412802949795563e-06, + "loss": 0.2666, + "num_input_tokens_seen": 73831488, + "step": 23465 + }, + { + "epoch": 1.502464630945522, + "grad_norm": 40.52401351928711, + "learning_rate": 4.412443193441815e-06, + "loss": 0.3185, + "num_input_tokens_seen": 73846976, + "step": 23470 + }, + { + "epoch": 1.5027847128864988, + "grad_norm": 10.49886703491211, + "learning_rate": 4.412083341590615e-06, + "loss": 0.1718, + "num_input_tokens_seen": 73863168, + "step": 23475 + }, + { + "epoch": 1.5031047948274758, + "grad_norm": 24.236894607543945, + "learning_rate": 4.411723394259933e-06, + "loss": 0.2745, + "num_input_tokens_seen": 73878912, + "step": 23480 + }, + { + "epoch": 1.5034248767684528, + "grad_norm": 14.496673583984375, + "learning_rate": 4.411363351467742e-06, + "loss": 0.3519, + "num_input_tokens_seen": 73895104, + "step": 23485 + }, + { + "epoch": 1.5037449587094296, + "grad_norm": 17.39808464050293, + "learning_rate": 4.411003213232021e-06, + "loss": 0.2996, + "num_input_tokens_seen": 73909568, + "step": 23490 + }, + { + "epoch": 1.5040650406504064, + "grad_norm": 33.80690002441406, + "learning_rate": 4.410642979570754e-06, + "loss": 0.3208, + "num_input_tokens_seen": 73927744, + "step": 23495 + }, + { + "epoch": 1.5043851225913834, + "grad_norm": 17.70661735534668, + "learning_rate": 4.410282650501928e-06, + "loss": 0.4484, + "num_input_tokens_seen": 73943040, + "step": 23500 + }, + { + "epoch": 1.5047052045323603, + "grad_norm": 29.48416519165039, + "learning_rate": 4.409922226043537e-06, + "loss": 0.3276, + "num_input_tokens_seen": 73959360, + "step": 23505 + }, + { + "epoch": 1.5050252864733373, + "grad_norm": 9.57868766784668, + "learning_rate": 4.40956170621358e-06, + "loss": 0.2826, + "num_input_tokens_seen": 73974144, + "step": 23510 + }, + { + "epoch": 1.505345368414314, + "grad_norm": 48.720462799072266, + "learning_rate": 4.409201091030057e-06, + "loss": 0.3449, + "num_input_tokens_seen": 73988736, + "step": 23515 + }, + { + "epoch": 1.5056654503552909, + "grad_norm": 39.342872619628906, + "learning_rate": 4.408840380510975e-06, + "loss": 0.2782, + "num_input_tokens_seen": 74004928, + "step": 23520 + }, + { + "epoch": 1.5059855322962679, + "grad_norm": 11.810346603393555, + "learning_rate": 4.408479574674348e-06, + "loss": 0.2831, + "num_input_tokens_seen": 74020544, + "step": 23525 + }, + { + "epoch": 1.5063056142372448, + "grad_norm": 26.952028274536133, + "learning_rate": 4.408118673538192e-06, + "loss": 0.3303, + "num_input_tokens_seen": 74035648, + "step": 23530 + }, + { + "epoch": 1.5066256961782216, + "grad_norm": 14.448351860046387, + "learning_rate": 4.407757677120529e-06, + "loss": 0.375, + "num_input_tokens_seen": 74051008, + "step": 23535 + }, + { + "epoch": 1.5069457781191984, + "grad_norm": 23.93194007873535, + "learning_rate": 4.407396585439384e-06, + "loss": 0.4906, + "num_input_tokens_seen": 74066368, + "step": 23540 + }, + { + "epoch": 1.5072658600601754, + "grad_norm": 36.863651275634766, + "learning_rate": 4.407035398512789e-06, + "loss": 0.3316, + "num_input_tokens_seen": 74081344, + "step": 23545 + }, + { + "epoch": 1.5075859420011524, + "grad_norm": 40.983055114746094, + "learning_rate": 4.40667411635878e-06, + "loss": 0.4379, + "num_input_tokens_seen": 74098624, + "step": 23550 + }, + { + "epoch": 1.5079060239421291, + "grad_norm": 21.0656681060791, + "learning_rate": 4.406312738995397e-06, + "loss": 0.2507, + "num_input_tokens_seen": 74115072, + "step": 23555 + }, + { + "epoch": 1.5082261058831061, + "grad_norm": 7.729379653930664, + "learning_rate": 4.405951266440685e-06, + "loss": 0.4069, + "num_input_tokens_seen": 74130112, + "step": 23560 + }, + { + "epoch": 1.508546187824083, + "grad_norm": 22.496562957763672, + "learning_rate": 4.405589698712695e-06, + "loss": 0.5056, + "num_input_tokens_seen": 74146432, + "step": 23565 + }, + { + "epoch": 1.5088662697650599, + "grad_norm": 46.54859924316406, + "learning_rate": 4.40522803582948e-06, + "loss": 0.3095, + "num_input_tokens_seen": 74161664, + "step": 23570 + }, + { + "epoch": 1.5091863517060369, + "grad_norm": 17.437280654907227, + "learning_rate": 4.404866277809102e-06, + "loss": 0.3228, + "num_input_tokens_seen": 74177408, + "step": 23575 + }, + { + "epoch": 1.5095064336470136, + "grad_norm": 8.453001022338867, + "learning_rate": 4.404504424669624e-06, + "loss": 0.3015, + "num_input_tokens_seen": 74193088, + "step": 23580 + }, + { + "epoch": 1.5098265155879904, + "grad_norm": 12.729294776916504, + "learning_rate": 4.404142476429116e-06, + "loss": 0.4241, + "num_input_tokens_seen": 74208640, + "step": 23585 + }, + { + "epoch": 1.5101465975289674, + "grad_norm": 22.69521713256836, + "learning_rate": 4.40378043310565e-06, + "loss": 0.2578, + "num_input_tokens_seen": 74224320, + "step": 23590 + }, + { + "epoch": 1.5104666794699444, + "grad_norm": 40.31283187866211, + "learning_rate": 4.403418294717305e-06, + "loss": 0.4185, + "num_input_tokens_seen": 74240192, + "step": 23595 + }, + { + "epoch": 1.5107867614109212, + "grad_norm": 19.648014068603516, + "learning_rate": 4.403056061282166e-06, + "loss": 0.3921, + "num_input_tokens_seen": 74254976, + "step": 23600 + }, + { + "epoch": 1.511106843351898, + "grad_norm": 24.397380828857422, + "learning_rate": 4.40269373281832e-06, + "loss": 0.2923, + "num_input_tokens_seen": 74270208, + "step": 23605 + }, + { + "epoch": 1.511426925292875, + "grad_norm": 18.028200149536133, + "learning_rate": 4.40233130934386e-06, + "loss": 0.3144, + "num_input_tokens_seen": 74285184, + "step": 23610 + }, + { + "epoch": 1.511747007233852, + "grad_norm": 28.99266815185547, + "learning_rate": 4.401968790876882e-06, + "loss": 0.3244, + "num_input_tokens_seen": 74300992, + "step": 23615 + }, + { + "epoch": 1.512067089174829, + "grad_norm": 39.51895523071289, + "learning_rate": 4.40160617743549e-06, + "loss": 0.5305, + "num_input_tokens_seen": 74317504, + "step": 23620 + }, + { + "epoch": 1.5123871711158057, + "grad_norm": 11.146486282348633, + "learning_rate": 4.4012434690377916e-06, + "loss": 0.2176, + "num_input_tokens_seen": 74332928, + "step": 23625 + }, + { + "epoch": 1.5127072530567824, + "grad_norm": 24.47403335571289, + "learning_rate": 4.400880665701897e-06, + "loss": 0.3203, + "num_input_tokens_seen": 74349760, + "step": 23630 + }, + { + "epoch": 1.5130273349977594, + "grad_norm": 10.839372634887695, + "learning_rate": 4.400517767445923e-06, + "loss": 0.2868, + "num_input_tokens_seen": 74364544, + "step": 23635 + }, + { + "epoch": 1.5133474169387364, + "grad_norm": 21.15325927734375, + "learning_rate": 4.4001547742879915e-06, + "loss": 0.2648, + "num_input_tokens_seen": 74380736, + "step": 23640 + }, + { + "epoch": 1.5136674988797132, + "grad_norm": 38.47336196899414, + "learning_rate": 4.3997916862462286e-06, + "loss": 0.3199, + "num_input_tokens_seen": 74396032, + "step": 23645 + }, + { + "epoch": 1.51398758082069, + "grad_norm": 30.250276565551758, + "learning_rate": 4.3994285033387635e-06, + "loss": 0.3758, + "num_input_tokens_seen": 74411264, + "step": 23650 + }, + { + "epoch": 1.514307662761667, + "grad_norm": 9.546077728271484, + "learning_rate": 4.399065225583733e-06, + "loss": 0.2341, + "num_input_tokens_seen": 74425984, + "step": 23655 + }, + { + "epoch": 1.514627744702644, + "grad_norm": 28.52737045288086, + "learning_rate": 4.398701852999277e-06, + "loss": 0.3427, + "num_input_tokens_seen": 74441984, + "step": 23660 + }, + { + "epoch": 1.514947826643621, + "grad_norm": 12.49371337890625, + "learning_rate": 4.398338385603541e-06, + "loss": 0.2755, + "num_input_tokens_seen": 74459200, + "step": 23665 + }, + { + "epoch": 1.5152679085845977, + "grad_norm": 24.897626876831055, + "learning_rate": 4.397974823414673e-06, + "loss": 0.2266, + "num_input_tokens_seen": 74475776, + "step": 23670 + }, + { + "epoch": 1.5155879905255745, + "grad_norm": 7.554750919342041, + "learning_rate": 4.39761116645083e-06, + "loss": 0.303, + "num_input_tokens_seen": 74491712, + "step": 23675 + }, + { + "epoch": 1.5159080724665515, + "grad_norm": 12.613805770874023, + "learning_rate": 4.397247414730169e-06, + "loss": 0.333, + "num_input_tokens_seen": 74508224, + "step": 23680 + }, + { + "epoch": 1.5162281544075285, + "grad_norm": 31.53989601135254, + "learning_rate": 4.396883568270855e-06, + "loss": 0.363, + "num_input_tokens_seen": 74523968, + "step": 23685 + }, + { + "epoch": 1.5165482363485052, + "grad_norm": 20.331317901611328, + "learning_rate": 4.396519627091055e-06, + "loss": 0.211, + "num_input_tokens_seen": 74540416, + "step": 23690 + }, + { + "epoch": 1.516868318289482, + "grad_norm": 56.031341552734375, + "learning_rate": 4.3961555912089434e-06, + "loss": 0.2272, + "num_input_tokens_seen": 74555072, + "step": 23695 + }, + { + "epoch": 1.517188400230459, + "grad_norm": 35.831756591796875, + "learning_rate": 4.395791460642698e-06, + "loss": 0.2657, + "num_input_tokens_seen": 74570240, + "step": 23700 + }, + { + "epoch": 1.517508482171436, + "grad_norm": 49.27998733520508, + "learning_rate": 4.395427235410504e-06, + "loss": 0.3457, + "num_input_tokens_seen": 74585280, + "step": 23705 + }, + { + "epoch": 1.5178285641124127, + "grad_norm": 18.847856521606445, + "learning_rate": 4.395062915530545e-06, + "loss": 0.4364, + "num_input_tokens_seen": 74600576, + "step": 23710 + }, + { + "epoch": 1.5181486460533895, + "grad_norm": 41.95393753051758, + "learning_rate": 4.394698501021015e-06, + "loss": 0.2814, + "num_input_tokens_seen": 74616000, + "step": 23715 + }, + { + "epoch": 1.5184687279943665, + "grad_norm": 27.7684383392334, + "learning_rate": 4.394333991900111e-06, + "loss": 0.2533, + "num_input_tokens_seen": 74631360, + "step": 23720 + }, + { + "epoch": 1.5187888099353435, + "grad_norm": 26.397388458251953, + "learning_rate": 4.393969388186036e-06, + "loss": 0.4002, + "num_input_tokens_seen": 74647680, + "step": 23725 + }, + { + "epoch": 1.5191088918763205, + "grad_norm": 66.21383666992188, + "learning_rate": 4.3936046898969945e-06, + "loss": 0.507, + "num_input_tokens_seen": 74662912, + "step": 23730 + }, + { + "epoch": 1.5194289738172972, + "grad_norm": 89.50996398925781, + "learning_rate": 4.393239897051197e-06, + "loss": 0.3759, + "num_input_tokens_seen": 74679744, + "step": 23735 + }, + { + "epoch": 1.519749055758274, + "grad_norm": 24.352741241455078, + "learning_rate": 4.392875009666862e-06, + "loss": 0.4432, + "num_input_tokens_seen": 74696768, + "step": 23740 + }, + { + "epoch": 1.520069137699251, + "grad_norm": 38.65349197387695, + "learning_rate": 4.392510027762208e-06, + "loss": 0.463, + "num_input_tokens_seen": 74711936, + "step": 23745 + }, + { + "epoch": 1.520389219640228, + "grad_norm": 20.428781509399414, + "learning_rate": 4.392144951355461e-06, + "loss": 0.3643, + "num_input_tokens_seen": 74727552, + "step": 23750 + }, + { + "epoch": 1.5207093015812048, + "grad_norm": 27.703521728515625, + "learning_rate": 4.391779780464851e-06, + "loss": 0.3228, + "num_input_tokens_seen": 74742592, + "step": 23755 + }, + { + "epoch": 1.5210293835221815, + "grad_norm": 16.619956970214844, + "learning_rate": 4.391414515108613e-06, + "loss": 0.2362, + "num_input_tokens_seen": 74758400, + "step": 23760 + }, + { + "epoch": 1.5213494654631585, + "grad_norm": 30.047691345214844, + "learning_rate": 4.391049155304986e-06, + "loss": 0.3775, + "num_input_tokens_seen": 74774784, + "step": 23765 + }, + { + "epoch": 1.5216695474041355, + "grad_norm": 30.234725952148438, + "learning_rate": 4.390683701072214e-06, + "loss": 0.4361, + "num_input_tokens_seen": 74789888, + "step": 23770 + }, + { + "epoch": 1.5219896293451125, + "grad_norm": 27.002870559692383, + "learning_rate": 4.390318152428546e-06, + "loss": 0.3499, + "num_input_tokens_seen": 74805696, + "step": 23775 + }, + { + "epoch": 1.5223097112860893, + "grad_norm": 13.445267677307129, + "learning_rate": 4.3899525093922354e-06, + "loss": 0.3708, + "num_input_tokens_seen": 74820864, + "step": 23780 + }, + { + "epoch": 1.522629793227066, + "grad_norm": 6.1280341148376465, + "learning_rate": 4.3895867719815425e-06, + "loss": 0.2826, + "num_input_tokens_seen": 74837312, + "step": 23785 + }, + { + "epoch": 1.522949875168043, + "grad_norm": 47.51890563964844, + "learning_rate": 4.3892209402147275e-06, + "loss": 0.2652, + "num_input_tokens_seen": 74852736, + "step": 23790 + }, + { + "epoch": 1.52326995710902, + "grad_norm": 13.43519115447998, + "learning_rate": 4.388855014110059e-06, + "loss": 0.231, + "num_input_tokens_seen": 74867840, + "step": 23795 + }, + { + "epoch": 1.5235900390499968, + "grad_norm": 18.548791885375977, + "learning_rate": 4.388488993685811e-06, + "loss": 0.2718, + "num_input_tokens_seen": 74882368, + "step": 23800 + }, + { + "epoch": 1.5239101209909736, + "grad_norm": 34.155052185058594, + "learning_rate": 4.388122878960259e-06, + "loss": 0.3443, + "num_input_tokens_seen": 74896576, + "step": 23805 + }, + { + "epoch": 1.5242302029319506, + "grad_norm": 22.72471046447754, + "learning_rate": 4.387756669951685e-06, + "loss": 0.2606, + "num_input_tokens_seen": 74913792, + "step": 23810 + }, + { + "epoch": 1.5245502848729275, + "grad_norm": 8.391359329223633, + "learning_rate": 4.387390366678376e-06, + "loss": 0.3876, + "num_input_tokens_seen": 74929152, + "step": 23815 + }, + { + "epoch": 1.5248703668139043, + "grad_norm": 24.178741455078125, + "learning_rate": 4.387023969158623e-06, + "loss": 0.2947, + "num_input_tokens_seen": 74945792, + "step": 23820 + }, + { + "epoch": 1.5251904487548813, + "grad_norm": 4.594621658325195, + "learning_rate": 4.3866574774107226e-06, + "loss": 0.423, + "num_input_tokens_seen": 74959872, + "step": 23825 + }, + { + "epoch": 1.525510530695858, + "grad_norm": 35.693817138671875, + "learning_rate": 4.3862908914529745e-06, + "loss": 0.321, + "num_input_tokens_seen": 74976064, + "step": 23830 + }, + { + "epoch": 1.525830612636835, + "grad_norm": 34.058937072753906, + "learning_rate": 4.385924211303685e-06, + "loss": 0.3477, + "num_input_tokens_seen": 74990848, + "step": 23835 + }, + { + "epoch": 1.526150694577812, + "grad_norm": 38.157371520996094, + "learning_rate": 4.385557436981164e-06, + "loss": 0.5634, + "num_input_tokens_seen": 75007232, + "step": 23840 + }, + { + "epoch": 1.5264707765187888, + "grad_norm": 21.181427001953125, + "learning_rate": 4.385190568503725e-06, + "loss": 0.3472, + "num_input_tokens_seen": 75024192, + "step": 23845 + }, + { + "epoch": 1.5267908584597656, + "grad_norm": 18.166810989379883, + "learning_rate": 4.3848236058896894e-06, + "loss": 0.244, + "num_input_tokens_seen": 75039104, + "step": 23850 + }, + { + "epoch": 1.5271109404007426, + "grad_norm": 56.10631561279297, + "learning_rate": 4.38445654915738e-06, + "loss": 0.2927, + "num_input_tokens_seen": 75054464, + "step": 23855 + }, + { + "epoch": 1.5274310223417196, + "grad_norm": 16.792884826660156, + "learning_rate": 4.384089398325128e-06, + "loss": 0.2484, + "num_input_tokens_seen": 75070400, + "step": 23860 + }, + { + "epoch": 1.5277511042826963, + "grad_norm": 16.005483627319336, + "learning_rate": 4.3837221534112636e-06, + "loss": 0.3555, + "num_input_tokens_seen": 75086528, + "step": 23865 + }, + { + "epoch": 1.528071186223673, + "grad_norm": 24.789714813232422, + "learning_rate": 4.383354814434127e-06, + "loss": 0.2417, + "num_input_tokens_seen": 75101760, + "step": 23870 + }, + { + "epoch": 1.52839126816465, + "grad_norm": 6.6551408767700195, + "learning_rate": 4.382987381412061e-06, + "loss": 0.2892, + "num_input_tokens_seen": 75118400, + "step": 23875 + }, + { + "epoch": 1.528711350105627, + "grad_norm": 56.646759033203125, + "learning_rate": 4.382619854363414e-06, + "loss": 0.4187, + "num_input_tokens_seen": 75134336, + "step": 23880 + }, + { + "epoch": 1.529031432046604, + "grad_norm": 15.572362899780273, + "learning_rate": 4.382252233306537e-06, + "loss": 0.2502, + "num_input_tokens_seen": 75153280, + "step": 23885 + }, + { + "epoch": 1.5293515139875808, + "grad_norm": 30.551311492919922, + "learning_rate": 4.381884518259789e-06, + "loss": 0.4253, + "num_input_tokens_seen": 75167808, + "step": 23890 + }, + { + "epoch": 1.5296715959285576, + "grad_norm": 11.767267227172852, + "learning_rate": 4.381516709241529e-06, + "loss": 0.2872, + "num_input_tokens_seen": 75183104, + "step": 23895 + }, + { + "epoch": 1.5299916778695346, + "grad_norm": 25.206804275512695, + "learning_rate": 4.381148806270126e-06, + "loss": 0.2429, + "num_input_tokens_seen": 75198912, + "step": 23900 + }, + { + "epoch": 1.5303117598105116, + "grad_norm": 8.662335395812988, + "learning_rate": 4.3807808093639495e-06, + "loss": 0.3455, + "num_input_tokens_seen": 75214784, + "step": 23905 + }, + { + "epoch": 1.5306318417514884, + "grad_norm": 23.148353576660156, + "learning_rate": 4.380412718541376e-06, + "loss": 0.2963, + "num_input_tokens_seen": 75230592, + "step": 23910 + }, + { + "epoch": 1.5309519236924651, + "grad_norm": 14.20794677734375, + "learning_rate": 4.380044533820787e-06, + "loss": 0.3013, + "num_input_tokens_seen": 75247872, + "step": 23915 + }, + { + "epoch": 1.5312720056334421, + "grad_norm": 25.40830421447754, + "learning_rate": 4.379676255220566e-06, + "loss": 0.3078, + "num_input_tokens_seen": 75264064, + "step": 23920 + }, + { + "epoch": 1.5315920875744191, + "grad_norm": 28.115848541259766, + "learning_rate": 4.379307882759104e-06, + "loss": 0.286, + "num_input_tokens_seen": 75279424, + "step": 23925 + }, + { + "epoch": 1.531912169515396, + "grad_norm": 58.643646240234375, + "learning_rate": 4.378939416454795e-06, + "loss": 0.405, + "num_input_tokens_seen": 75297024, + "step": 23930 + }, + { + "epoch": 1.5322322514563729, + "grad_norm": 26.95443344116211, + "learning_rate": 4.378570856326039e-06, + "loss": 0.3025, + "num_input_tokens_seen": 75312768, + "step": 23935 + }, + { + "epoch": 1.5325523333973496, + "grad_norm": 18.54712677001953, + "learning_rate": 4.3782022023912394e-06, + "loss": 0.3311, + "num_input_tokens_seen": 75328896, + "step": 23940 + }, + { + "epoch": 1.5328724153383266, + "grad_norm": 19.04827117919922, + "learning_rate": 4.377833454668805e-06, + "loss": 0.3668, + "num_input_tokens_seen": 75344640, + "step": 23945 + }, + { + "epoch": 1.5331924972793036, + "grad_norm": 22.893333435058594, + "learning_rate": 4.377464613177148e-06, + "loss": 0.2484, + "num_input_tokens_seen": 75359232, + "step": 23950 + }, + { + "epoch": 1.5335125792202804, + "grad_norm": 16.490989685058594, + "learning_rate": 4.377095677934689e-06, + "loss": 0.3305, + "num_input_tokens_seen": 75375488, + "step": 23955 + }, + { + "epoch": 1.5338326611612572, + "grad_norm": 38.79986572265625, + "learning_rate": 4.376726648959849e-06, + "loss": 0.3971, + "num_input_tokens_seen": 75391168, + "step": 23960 + }, + { + "epoch": 1.5341527431022342, + "grad_norm": 30.69098663330078, + "learning_rate": 4.376357526271055e-06, + "loss": 0.3744, + "num_input_tokens_seen": 75407232, + "step": 23965 + }, + { + "epoch": 1.5344728250432111, + "grad_norm": 21.478321075439453, + "learning_rate": 4.375988309886741e-06, + "loss": 0.2905, + "num_input_tokens_seen": 75422656, + "step": 23970 + }, + { + "epoch": 1.534792906984188, + "grad_norm": 26.414714813232422, + "learning_rate": 4.375618999825341e-06, + "loss": 0.3688, + "num_input_tokens_seen": 75437632, + "step": 23975 + }, + { + "epoch": 1.5351129889251647, + "grad_norm": 23.289535522460938, + "learning_rate": 4.375249596105299e-06, + "loss": 0.3125, + "num_input_tokens_seen": 75454464, + "step": 23980 + }, + { + "epoch": 1.5354330708661417, + "grad_norm": 13.34011173248291, + "learning_rate": 4.37488009874506e-06, + "loss": 0.3745, + "num_input_tokens_seen": 75469696, + "step": 23985 + }, + { + "epoch": 1.5357531528071187, + "grad_norm": 7.886302471160889, + "learning_rate": 4.3745105077630745e-06, + "loss": 0.4366, + "num_input_tokens_seen": 75485696, + "step": 23990 + }, + { + "epoch": 1.5360732347480957, + "grad_norm": 22.867015838623047, + "learning_rate": 4.374140823177798e-06, + "loss": 0.4604, + "num_input_tokens_seen": 75501632, + "step": 23995 + }, + { + "epoch": 1.5363933166890724, + "grad_norm": 71.62651062011719, + "learning_rate": 4.3737710450076905e-06, + "loss": 0.3418, + "num_input_tokens_seen": 75517696, + "step": 24000 + }, + { + "epoch": 1.5367133986300492, + "grad_norm": 20.998682022094727, + "learning_rate": 4.373401173271218e-06, + "loss": 0.3233, + "num_input_tokens_seen": 75533056, + "step": 24005 + }, + { + "epoch": 1.5370334805710262, + "grad_norm": 40.19245147705078, + "learning_rate": 4.373031207986849e-06, + "loss": 0.4352, + "num_input_tokens_seen": 75547904, + "step": 24010 + }, + { + "epoch": 1.5373535625120032, + "grad_norm": 23.767013549804688, + "learning_rate": 4.3726611491730565e-06, + "loss": 0.5884, + "num_input_tokens_seen": 75563584, + "step": 24015 + }, + { + "epoch": 1.53767364445298, + "grad_norm": 41.88764572143555, + "learning_rate": 4.372290996848322e-06, + "loss": 0.3698, + "num_input_tokens_seen": 75578688, + "step": 24020 + }, + { + "epoch": 1.5379937263939567, + "grad_norm": 28.80424690246582, + "learning_rate": 4.371920751031127e-06, + "loss": 0.3471, + "num_input_tokens_seen": 75596672, + "step": 24025 + }, + { + "epoch": 1.5383138083349337, + "grad_norm": 23.23693084716797, + "learning_rate": 4.37155041173996e-06, + "loss": 0.2921, + "num_input_tokens_seen": 75612544, + "step": 24030 + }, + { + "epoch": 1.5386338902759107, + "grad_norm": 20.13777732849121, + "learning_rate": 4.3711799789933144e-06, + "loss": 0.2571, + "num_input_tokens_seen": 75629504, + "step": 24035 + }, + { + "epoch": 1.5389539722168877, + "grad_norm": 11.139585494995117, + "learning_rate": 4.370809452809687e-06, + "loss": 0.2932, + "num_input_tokens_seen": 75647424, + "step": 24040 + }, + { + "epoch": 1.5392740541578644, + "grad_norm": 16.52922821044922, + "learning_rate": 4.37043883320758e-06, + "loss": 0.4218, + "num_input_tokens_seen": 75662528, + "step": 24045 + }, + { + "epoch": 1.5395941360988412, + "grad_norm": 54.664005279541016, + "learning_rate": 4.3700681202055e-06, + "loss": 0.4146, + "num_input_tokens_seen": 75677824, + "step": 24050 + }, + { + "epoch": 1.5399142180398182, + "grad_norm": 44.30727005004883, + "learning_rate": 4.36969731382196e-06, + "loss": 0.2612, + "num_input_tokens_seen": 75693056, + "step": 24055 + }, + { + "epoch": 1.5402342999807952, + "grad_norm": 21.03883934020996, + "learning_rate": 4.369326414075474e-06, + "loss": 0.2937, + "num_input_tokens_seen": 75709184, + "step": 24060 + }, + { + "epoch": 1.540554381921772, + "grad_norm": 33.391727447509766, + "learning_rate": 4.3689554209845645e-06, + "loss": 0.3962, + "num_input_tokens_seen": 75724928, + "step": 24065 + }, + { + "epoch": 1.5408744638627487, + "grad_norm": 15.583953857421875, + "learning_rate": 4.368584334567757e-06, + "loss": 0.3195, + "num_input_tokens_seen": 75740800, + "step": 24070 + }, + { + "epoch": 1.5411945458037257, + "grad_norm": 20.50568199157715, + "learning_rate": 4.36821315484358e-06, + "loss": 0.3436, + "num_input_tokens_seen": 75756672, + "step": 24075 + }, + { + "epoch": 1.5415146277447027, + "grad_norm": 19.350980758666992, + "learning_rate": 4.367841881830569e-06, + "loss": 0.2751, + "num_input_tokens_seen": 75773440, + "step": 24080 + }, + { + "epoch": 1.5418347096856795, + "grad_norm": 42.66481018066406, + "learning_rate": 4.367470515547264e-06, + "loss": 0.2876, + "num_input_tokens_seen": 75793280, + "step": 24085 + }, + { + "epoch": 1.5421547916266565, + "grad_norm": 9.723962783813477, + "learning_rate": 4.367099056012209e-06, + "loss": 0.1805, + "num_input_tokens_seen": 75808384, + "step": 24090 + }, + { + "epoch": 1.5424748735676332, + "grad_norm": 10.043252944946289, + "learning_rate": 4.366727503243953e-06, + "loss": 0.2682, + "num_input_tokens_seen": 75824768, + "step": 24095 + }, + { + "epoch": 1.5427949555086102, + "grad_norm": 22.751169204711914, + "learning_rate": 4.366355857261048e-06, + "loss": 0.3056, + "num_input_tokens_seen": 75842240, + "step": 24100 + }, + { + "epoch": 1.5431150374495872, + "grad_norm": 4.751477241516113, + "learning_rate": 4.3659841180820525e-06, + "loss": 0.1769, + "num_input_tokens_seen": 75857344, + "step": 24105 + }, + { + "epoch": 1.543435119390564, + "grad_norm": 14.579075813293457, + "learning_rate": 4.365612285725531e-06, + "loss": 0.2045, + "num_input_tokens_seen": 75873024, + "step": 24110 + }, + { + "epoch": 1.5437552013315408, + "grad_norm": 26.374895095825195, + "learning_rate": 4.365240360210048e-06, + "loss": 0.2491, + "num_input_tokens_seen": 75888256, + "step": 24115 + }, + { + "epoch": 1.5440752832725178, + "grad_norm": 15.270223617553711, + "learning_rate": 4.364868341554179e-06, + "loss": 0.3277, + "num_input_tokens_seen": 75905728, + "step": 24120 + }, + { + "epoch": 1.5443953652134947, + "grad_norm": 12.513550758361816, + "learning_rate": 4.364496229776497e-06, + "loss": 0.2696, + "num_input_tokens_seen": 75921024, + "step": 24125 + }, + { + "epoch": 1.5447154471544715, + "grad_norm": 33.47792434692383, + "learning_rate": 4.364124024895586e-06, + "loss": 0.283, + "num_input_tokens_seen": 75935616, + "step": 24130 + }, + { + "epoch": 1.5450355290954483, + "grad_norm": 31.541732788085938, + "learning_rate": 4.363751726930031e-06, + "loss": 0.2721, + "num_input_tokens_seen": 75951296, + "step": 24135 + }, + { + "epoch": 1.5453556110364253, + "grad_norm": 22.773714065551758, + "learning_rate": 4.363379335898423e-06, + "loss": 0.3857, + "num_input_tokens_seen": 75966272, + "step": 24140 + }, + { + "epoch": 1.5456756929774023, + "grad_norm": 36.134918212890625, + "learning_rate": 4.363006851819357e-06, + "loss": 0.4448, + "num_input_tokens_seen": 75982080, + "step": 24145 + }, + { + "epoch": 1.5459957749183793, + "grad_norm": 12.584202766418457, + "learning_rate": 4.362634274711432e-06, + "loss": 0.3493, + "num_input_tokens_seen": 75997760, + "step": 24150 + }, + { + "epoch": 1.546315856859356, + "grad_norm": 34.13473892211914, + "learning_rate": 4.362261604593254e-06, + "loss": 0.372, + "num_input_tokens_seen": 76014528, + "step": 24155 + }, + { + "epoch": 1.5466359388003328, + "grad_norm": 31.546199798583984, + "learning_rate": 4.3618888414834315e-06, + "loss": 0.4029, + "num_input_tokens_seen": 76030656, + "step": 24160 + }, + { + "epoch": 1.5469560207413098, + "grad_norm": 29.551467895507812, + "learning_rate": 4.361515985400578e-06, + "loss": 0.328, + "num_input_tokens_seen": 76045248, + "step": 24165 + }, + { + "epoch": 1.5472761026822868, + "grad_norm": 38.8585205078125, + "learning_rate": 4.361143036363313e-06, + "loss": 0.3907, + "num_input_tokens_seen": 76061440, + "step": 24170 + }, + { + "epoch": 1.5475961846232635, + "grad_norm": 28.232147216796875, + "learning_rate": 4.3607699943902594e-06, + "loss": 0.3093, + "num_input_tokens_seen": 76077504, + "step": 24175 + }, + { + "epoch": 1.5479162665642403, + "grad_norm": 17.80506706237793, + "learning_rate": 4.3603968595000446e-06, + "loss": 0.3813, + "num_input_tokens_seen": 76092160, + "step": 24180 + }, + { + "epoch": 1.5482363485052173, + "grad_norm": 22.550106048583984, + "learning_rate": 4.360023631711301e-06, + "loss": 0.5046, + "num_input_tokens_seen": 76108352, + "step": 24185 + }, + { + "epoch": 1.5485564304461943, + "grad_norm": 12.506699562072754, + "learning_rate": 4.359650311042666e-06, + "loss": 0.3662, + "num_input_tokens_seen": 76125184, + "step": 24190 + }, + { + "epoch": 1.5488765123871713, + "grad_norm": 22.6369686126709, + "learning_rate": 4.3592768975127805e-06, + "loss": 0.3409, + "num_input_tokens_seen": 76141504, + "step": 24195 + }, + { + "epoch": 1.549196594328148, + "grad_norm": 15.648088455200195, + "learning_rate": 4.3589033911402915e-06, + "loss": 0.27, + "num_input_tokens_seen": 76157440, + "step": 24200 + }, + { + "epoch": 1.5495166762691248, + "grad_norm": 29.53404998779297, + "learning_rate": 4.35852979194385e-06, + "loss": 0.3269, + "num_input_tokens_seen": 76173120, + "step": 24205 + }, + { + "epoch": 1.5498367582101018, + "grad_norm": 13.987563133239746, + "learning_rate": 4.358156099942112e-06, + "loss": 0.4056, + "num_input_tokens_seen": 76188096, + "step": 24210 + }, + { + "epoch": 1.5501568401510788, + "grad_norm": 15.202272415161133, + "learning_rate": 4.357782315153736e-06, + "loss": 0.3998, + "num_input_tokens_seen": 76205312, + "step": 24215 + }, + { + "epoch": 1.5504769220920556, + "grad_norm": 16.5585994720459, + "learning_rate": 4.357408437597388e-06, + "loss": 0.3335, + "num_input_tokens_seen": 76220672, + "step": 24220 + }, + { + "epoch": 1.5507970040330323, + "grad_norm": 69.80833435058594, + "learning_rate": 4.357034467291737e-06, + "loss": 0.3525, + "num_input_tokens_seen": 76235392, + "step": 24225 + }, + { + "epoch": 1.5511170859740093, + "grad_norm": 8.110321044921875, + "learning_rate": 4.356660404255459e-06, + "loss": 0.3526, + "num_input_tokens_seen": 76251520, + "step": 24230 + }, + { + "epoch": 1.5514371679149863, + "grad_norm": 22.32573890686035, + "learning_rate": 4.356286248507231e-06, + "loss": 0.3537, + "num_input_tokens_seen": 76267904, + "step": 24235 + }, + { + "epoch": 1.551757249855963, + "grad_norm": 15.953940391540527, + "learning_rate": 4.355912000065737e-06, + "loss": 0.4103, + "num_input_tokens_seen": 76283008, + "step": 24240 + }, + { + "epoch": 1.5520773317969399, + "grad_norm": 12.97615909576416, + "learning_rate": 4.355537658949665e-06, + "loss": 0.3751, + "num_input_tokens_seen": 76297728, + "step": 24245 + }, + { + "epoch": 1.5523974137379168, + "grad_norm": 15.941609382629395, + "learning_rate": 4.355163225177707e-06, + "loss": 0.3512, + "num_input_tokens_seen": 76312384, + "step": 24250 + }, + { + "epoch": 1.5527174956788938, + "grad_norm": 22.420753479003906, + "learning_rate": 4.354788698768561e-06, + "loss": 0.2696, + "num_input_tokens_seen": 76328320, + "step": 24255 + }, + { + "epoch": 1.5530375776198708, + "grad_norm": 32.80766296386719, + "learning_rate": 4.354414079740928e-06, + "loss": 0.3207, + "num_input_tokens_seen": 76344320, + "step": 24260 + }, + { + "epoch": 1.5533576595608476, + "grad_norm": 22.29963493347168, + "learning_rate": 4.354039368113515e-06, + "loss": 0.3513, + "num_input_tokens_seen": 76360256, + "step": 24265 + }, + { + "epoch": 1.5536777415018244, + "grad_norm": 36.12166213989258, + "learning_rate": 4.353664563905034e-06, + "loss": 0.3803, + "num_input_tokens_seen": 76374848, + "step": 24270 + }, + { + "epoch": 1.5539978234428014, + "grad_norm": 12.180557250976562, + "learning_rate": 4.353289667134201e-06, + "loss": 0.41, + "num_input_tokens_seen": 76390912, + "step": 24275 + }, + { + "epoch": 1.5543179053837783, + "grad_norm": 7.872690200805664, + "learning_rate": 4.352914677819733e-06, + "loss": 0.2854, + "num_input_tokens_seen": 76406528, + "step": 24280 + }, + { + "epoch": 1.554637987324755, + "grad_norm": 18.1420955657959, + "learning_rate": 4.352539595980359e-06, + "loss": 0.3075, + "num_input_tokens_seen": 76421760, + "step": 24285 + }, + { + "epoch": 1.5549580692657319, + "grad_norm": 10.484529495239258, + "learning_rate": 4.352164421634807e-06, + "loss": 0.3739, + "num_input_tokens_seen": 76436224, + "step": 24290 + }, + { + "epoch": 1.5552781512067089, + "grad_norm": 25.15521812438965, + "learning_rate": 4.351789154801811e-06, + "loss": 0.341, + "num_input_tokens_seen": 76452224, + "step": 24295 + }, + { + "epoch": 1.5555982331476859, + "grad_norm": 17.77726173400879, + "learning_rate": 4.351413795500111e-06, + "loss": 0.3821, + "num_input_tokens_seen": 76466496, + "step": 24300 + }, + { + "epoch": 1.5559183150886629, + "grad_norm": 16.781747817993164, + "learning_rate": 4.3510383437484495e-06, + "loss": 0.3833, + "num_input_tokens_seen": 76481408, + "step": 24305 + }, + { + "epoch": 1.5562383970296396, + "grad_norm": 12.264725685119629, + "learning_rate": 4.350662799565574e-06, + "loss": 0.2321, + "num_input_tokens_seen": 76499136, + "step": 24310 + }, + { + "epoch": 1.5565584789706164, + "grad_norm": 35.792972564697266, + "learning_rate": 4.350287162970239e-06, + "loss": 0.5105, + "num_input_tokens_seen": 76515968, + "step": 24315 + }, + { + "epoch": 1.5568785609115934, + "grad_norm": 14.040206909179688, + "learning_rate": 4.3499114339812e-06, + "loss": 0.4383, + "num_input_tokens_seen": 76532928, + "step": 24320 + }, + { + "epoch": 1.5571986428525704, + "grad_norm": 53.14059829711914, + "learning_rate": 4.349535612617221e-06, + "loss": 0.4107, + "num_input_tokens_seen": 76547776, + "step": 24325 + }, + { + "epoch": 1.5575187247935471, + "grad_norm": 4.561825275421143, + "learning_rate": 4.349159698897066e-06, + "loss": 0.1995, + "num_input_tokens_seen": 76562304, + "step": 24330 + }, + { + "epoch": 1.557838806734524, + "grad_norm": 11.3154935836792, + "learning_rate": 4.348783692839509e-06, + "loss": 0.3399, + "num_input_tokens_seen": 76578816, + "step": 24335 + }, + { + "epoch": 1.558158888675501, + "grad_norm": 42.48887634277344, + "learning_rate": 4.348407594463323e-06, + "loss": 0.3594, + "num_input_tokens_seen": 76595136, + "step": 24340 + }, + { + "epoch": 1.5584789706164779, + "grad_norm": 42.74595260620117, + "learning_rate": 4.34803140378729e-06, + "loss": 0.382, + "num_input_tokens_seen": 76609792, + "step": 24345 + }, + { + "epoch": 1.5587990525574547, + "grad_norm": 55.92294692993164, + "learning_rate": 4.347655120830195e-06, + "loss": 0.4403, + "num_input_tokens_seen": 76624512, + "step": 24350 + }, + { + "epoch": 1.5591191344984316, + "grad_norm": 40.95530319213867, + "learning_rate": 4.347278745610828e-06, + "loss": 0.4182, + "num_input_tokens_seen": 76640256, + "step": 24355 + }, + { + "epoch": 1.5594392164394084, + "grad_norm": 54.59288787841797, + "learning_rate": 4.346902278147982e-06, + "loss": 0.3146, + "num_input_tokens_seen": 76656640, + "step": 24360 + }, + { + "epoch": 1.5597592983803854, + "grad_norm": 95.78502655029297, + "learning_rate": 4.346525718460456e-06, + "loss": 0.3663, + "num_input_tokens_seen": 76672192, + "step": 24365 + }, + { + "epoch": 1.5600793803213624, + "grad_norm": 10.340946197509766, + "learning_rate": 4.346149066567054e-06, + "loss": 0.3104, + "num_input_tokens_seen": 76688064, + "step": 24370 + }, + { + "epoch": 1.5603994622623392, + "grad_norm": 10.632868766784668, + "learning_rate": 4.345772322486584e-06, + "loss": 0.3194, + "num_input_tokens_seen": 76702208, + "step": 24375 + }, + { + "epoch": 1.560719544203316, + "grad_norm": 43.72793960571289, + "learning_rate": 4.3453954862378586e-06, + "loss": 0.3413, + "num_input_tokens_seen": 76718464, + "step": 24380 + }, + { + "epoch": 1.561039626144293, + "grad_norm": 21.931926727294922, + "learning_rate": 4.345018557839695e-06, + "loss": 0.3397, + "num_input_tokens_seen": 76734400, + "step": 24385 + }, + { + "epoch": 1.56135970808527, + "grad_norm": 19.835464477539062, + "learning_rate": 4.344641537310915e-06, + "loss": 0.3075, + "num_input_tokens_seen": 76750720, + "step": 24390 + }, + { + "epoch": 1.5616797900262467, + "grad_norm": 12.65018367767334, + "learning_rate": 4.344264424670345e-06, + "loss": 0.2896, + "num_input_tokens_seen": 76768320, + "step": 24395 + }, + { + "epoch": 1.5619998719672235, + "grad_norm": 14.006153106689453, + "learning_rate": 4.343887219936815e-06, + "loss": 0.3499, + "num_input_tokens_seen": 76783936, + "step": 24400 + }, + { + "epoch": 1.5623199539082004, + "grad_norm": 20.71320343017578, + "learning_rate": 4.343509923129163e-06, + "loss": 0.246, + "num_input_tokens_seen": 76800256, + "step": 24405 + }, + { + "epoch": 1.5626400358491774, + "grad_norm": 18.914705276489258, + "learning_rate": 4.343132534266228e-06, + "loss": 0.4125, + "num_input_tokens_seen": 76815616, + "step": 24410 + }, + { + "epoch": 1.5629601177901544, + "grad_norm": 27.68150520324707, + "learning_rate": 4.342755053366854e-06, + "loss": 0.2967, + "num_input_tokens_seen": 76830080, + "step": 24415 + }, + { + "epoch": 1.5632801997311312, + "grad_norm": 31.061939239501953, + "learning_rate": 4.342377480449892e-06, + "loss": 0.4358, + "num_input_tokens_seen": 76846656, + "step": 24420 + }, + { + "epoch": 1.563600281672108, + "grad_norm": 54.922264099121094, + "learning_rate": 4.341999815534194e-06, + "loss": 0.3601, + "num_input_tokens_seen": 76861888, + "step": 24425 + }, + { + "epoch": 1.563920363613085, + "grad_norm": 16.778377532958984, + "learning_rate": 4.341622058638621e-06, + "loss": 0.3554, + "num_input_tokens_seen": 76879360, + "step": 24430 + }, + { + "epoch": 1.564240445554062, + "grad_norm": 71.87621307373047, + "learning_rate": 4.341244209782035e-06, + "loss": 0.4199, + "num_input_tokens_seen": 76894208, + "step": 24435 + }, + { + "epoch": 1.5645605274950387, + "grad_norm": 31.972810745239258, + "learning_rate": 4.340866268983303e-06, + "loss": 0.382, + "num_input_tokens_seen": 76910016, + "step": 24440 + }, + { + "epoch": 1.5648806094360155, + "grad_norm": 37.825809478759766, + "learning_rate": 4.340488236261299e-06, + "loss": 0.2945, + "num_input_tokens_seen": 76926592, + "step": 24445 + }, + { + "epoch": 1.5652006913769925, + "grad_norm": 68.82051849365234, + "learning_rate": 4.340110111634899e-06, + "loss": 0.3547, + "num_input_tokens_seen": 76942464, + "step": 24450 + }, + { + "epoch": 1.5655207733179695, + "grad_norm": 31.337331771850586, + "learning_rate": 4.339731895122984e-06, + "loss": 0.4236, + "num_input_tokens_seen": 76957568, + "step": 24455 + }, + { + "epoch": 1.5658408552589465, + "grad_norm": 14.995266914367676, + "learning_rate": 4.339353586744442e-06, + "loss": 0.2503, + "num_input_tokens_seen": 76974336, + "step": 24460 + }, + { + "epoch": 1.5661609371999232, + "grad_norm": 22.49114227294922, + "learning_rate": 4.338975186518162e-06, + "loss": 0.2646, + "num_input_tokens_seen": 76990208, + "step": 24465 + }, + { + "epoch": 1.5664810191409, + "grad_norm": 15.615687370300293, + "learning_rate": 4.338596694463041e-06, + "loss": 0.2416, + "num_input_tokens_seen": 77005120, + "step": 24470 + }, + { + "epoch": 1.566801101081877, + "grad_norm": 36.48321533203125, + "learning_rate": 4.338218110597977e-06, + "loss": 0.3828, + "num_input_tokens_seen": 77019968, + "step": 24475 + }, + { + "epoch": 1.567121183022854, + "grad_norm": 36.9209098815918, + "learning_rate": 4.337839434941876e-06, + "loss": 0.4541, + "num_input_tokens_seen": 77038336, + "step": 24480 + }, + { + "epoch": 1.5674412649638307, + "grad_norm": 23.02457046508789, + "learning_rate": 4.337460667513647e-06, + "loss": 0.2851, + "num_input_tokens_seen": 77053760, + "step": 24485 + }, + { + "epoch": 1.5677613469048075, + "grad_norm": 8.124351501464844, + "learning_rate": 4.337081808332203e-06, + "loss": 0.2864, + "num_input_tokens_seen": 77069632, + "step": 24490 + }, + { + "epoch": 1.5680814288457845, + "grad_norm": 14.400324821472168, + "learning_rate": 4.3367028574164615e-06, + "loss": 0.2691, + "num_input_tokens_seen": 77084992, + "step": 24495 + }, + { + "epoch": 1.5684015107867615, + "grad_norm": 15.716480255126953, + "learning_rate": 4.3363238147853474e-06, + "loss": 0.2951, + "num_input_tokens_seen": 77099840, + "step": 24500 + }, + { + "epoch": 1.5687215927277383, + "grad_norm": 16.41078758239746, + "learning_rate": 4.335944680457787e-06, + "loss": 0.291, + "num_input_tokens_seen": 77115712, + "step": 24505 + }, + { + "epoch": 1.569041674668715, + "grad_norm": 16.457252502441406, + "learning_rate": 4.335565454452713e-06, + "loss": 0.3405, + "num_input_tokens_seen": 77131392, + "step": 24510 + }, + { + "epoch": 1.569361756609692, + "grad_norm": 17.106796264648438, + "learning_rate": 4.33518613678906e-06, + "loss": 0.294, + "num_input_tokens_seen": 77146944, + "step": 24515 + }, + { + "epoch": 1.569681838550669, + "grad_norm": 22.862133026123047, + "learning_rate": 4.334806727485771e-06, + "loss": 0.3917, + "num_input_tokens_seen": 77162240, + "step": 24520 + }, + { + "epoch": 1.570001920491646, + "grad_norm": 26.640756607055664, + "learning_rate": 4.33442722656179e-06, + "loss": 0.387, + "num_input_tokens_seen": 77178304, + "step": 24525 + }, + { + "epoch": 1.5703220024326228, + "grad_norm": 33.263038635253906, + "learning_rate": 4.33404763403607e-06, + "loss": 0.3978, + "num_input_tokens_seen": 77197056, + "step": 24530 + }, + { + "epoch": 1.5706420843735995, + "grad_norm": 16.518217086791992, + "learning_rate": 4.333667949927564e-06, + "loss": 0.3741, + "num_input_tokens_seen": 77213120, + "step": 24535 + }, + { + "epoch": 1.5709621663145765, + "grad_norm": 13.496274948120117, + "learning_rate": 4.3332881742552315e-06, + "loss": 0.2273, + "num_input_tokens_seen": 77229376, + "step": 24540 + }, + { + "epoch": 1.5712822482555535, + "grad_norm": 31.721763610839844, + "learning_rate": 4.332908307038037e-06, + "loss": 0.2782, + "num_input_tokens_seen": 77244800, + "step": 24545 + }, + { + "epoch": 1.5716023301965303, + "grad_norm": 42.596309661865234, + "learning_rate": 4.33252834829495e-06, + "loss": 0.435, + "num_input_tokens_seen": 77260864, + "step": 24550 + }, + { + "epoch": 1.571922412137507, + "grad_norm": 45.62870407104492, + "learning_rate": 4.332148298044941e-06, + "loss": 0.378, + "num_input_tokens_seen": 77276480, + "step": 24555 + }, + { + "epoch": 1.572242494078484, + "grad_norm": 16.79443359375, + "learning_rate": 4.33176815630699e-06, + "loss": 0.3058, + "num_input_tokens_seen": 77291328, + "step": 24560 + }, + { + "epoch": 1.572562576019461, + "grad_norm": 21.735595703125, + "learning_rate": 4.331387923100078e-06, + "loss": 0.2797, + "num_input_tokens_seen": 77306240, + "step": 24565 + }, + { + "epoch": 1.572882657960438, + "grad_norm": 11.658112525939941, + "learning_rate": 4.331007598443193e-06, + "loss": 0.3244, + "num_input_tokens_seen": 77321856, + "step": 24570 + }, + { + "epoch": 1.5732027399014148, + "grad_norm": 35.68001937866211, + "learning_rate": 4.3306271823553255e-06, + "loss": 0.3209, + "num_input_tokens_seen": 77337344, + "step": 24575 + }, + { + "epoch": 1.5735228218423916, + "grad_norm": 19.74411392211914, + "learning_rate": 4.330246674855473e-06, + "loss": 0.2657, + "num_input_tokens_seen": 77353216, + "step": 24580 + }, + { + "epoch": 1.5738429037833686, + "grad_norm": 28.069578170776367, + "learning_rate": 4.329866075962634e-06, + "loss": 0.3113, + "num_input_tokens_seen": 77369152, + "step": 24585 + }, + { + "epoch": 1.5741629857243455, + "grad_norm": 31.616621017456055, + "learning_rate": 4.329485385695815e-06, + "loss": 0.4957, + "num_input_tokens_seen": 77384192, + "step": 24590 + }, + { + "epoch": 1.5744830676653223, + "grad_norm": 10.135931015014648, + "learning_rate": 4.329104604074025e-06, + "loss": 0.3878, + "num_input_tokens_seen": 77400192, + "step": 24595 + }, + { + "epoch": 1.574803149606299, + "grad_norm": 15.00749397277832, + "learning_rate": 4.3287237311162785e-06, + "loss": 0.3286, + "num_input_tokens_seen": 77417472, + "step": 24600 + }, + { + "epoch": 1.575123231547276, + "grad_norm": 35.22787857055664, + "learning_rate": 4.328342766841594e-06, + "loss": 0.3442, + "num_input_tokens_seen": 77432896, + "step": 24605 + }, + { + "epoch": 1.575443313488253, + "grad_norm": 47.992557525634766, + "learning_rate": 4.327961711268996e-06, + "loss": 0.3132, + "num_input_tokens_seen": 77448064, + "step": 24610 + }, + { + "epoch": 1.5757633954292298, + "grad_norm": 15.145438194274902, + "learning_rate": 4.327580564417512e-06, + "loss": 0.3277, + "num_input_tokens_seen": 77464448, + "step": 24615 + }, + { + "epoch": 1.5760834773702068, + "grad_norm": 22.665569305419922, + "learning_rate": 4.327199326306173e-06, + "loss": 0.2517, + "num_input_tokens_seen": 77479936, + "step": 24620 + }, + { + "epoch": 1.5764035593111836, + "grad_norm": 38.296974182128906, + "learning_rate": 4.326817996954018e-06, + "loss": 0.3847, + "num_input_tokens_seen": 77495552, + "step": 24625 + }, + { + "epoch": 1.5767236412521606, + "grad_norm": 46.643890380859375, + "learning_rate": 4.326436576380087e-06, + "loss": 0.4093, + "num_input_tokens_seen": 77511552, + "step": 24630 + }, + { + "epoch": 1.5770437231931376, + "grad_norm": 32.062713623046875, + "learning_rate": 4.3260550646034276e-06, + "loss": 0.3874, + "num_input_tokens_seen": 77527232, + "step": 24635 + }, + { + "epoch": 1.5773638051341143, + "grad_norm": 19.84436798095703, + "learning_rate": 4.3256734616430896e-06, + "loss": 0.2757, + "num_input_tokens_seen": 77542208, + "step": 24640 + }, + { + "epoch": 1.577683887075091, + "grad_norm": 28.325950622558594, + "learning_rate": 4.3252917675181286e-06, + "loss": 0.3221, + "num_input_tokens_seen": 77557888, + "step": 24645 + }, + { + "epoch": 1.578003969016068, + "grad_norm": 13.6900634765625, + "learning_rate": 4.324909982247604e-06, + "loss": 0.3624, + "num_input_tokens_seen": 77573056, + "step": 24650 + }, + { + "epoch": 1.578324050957045, + "grad_norm": 35.359657287597656, + "learning_rate": 4.324528105850581e-06, + "loss": 0.3257, + "num_input_tokens_seen": 77589056, + "step": 24655 + }, + { + "epoch": 1.5786441328980219, + "grad_norm": 18.696849822998047, + "learning_rate": 4.324146138346127e-06, + "loss": 0.2694, + "num_input_tokens_seen": 77606016, + "step": 24660 + }, + { + "epoch": 1.5789642148389986, + "grad_norm": 19.074344635009766, + "learning_rate": 4.323764079753318e-06, + "loss": 0.4408, + "num_input_tokens_seen": 77621440, + "step": 24665 + }, + { + "epoch": 1.5792842967799756, + "grad_norm": 28.3300724029541, + "learning_rate": 4.323381930091229e-06, + "loss": 0.3611, + "num_input_tokens_seen": 77637184, + "step": 24670 + }, + { + "epoch": 1.5796043787209526, + "grad_norm": 32.75640106201172, + "learning_rate": 4.322999689378945e-06, + "loss": 0.4302, + "num_input_tokens_seen": 77653760, + "step": 24675 + }, + { + "epoch": 1.5799244606619296, + "grad_norm": 68.41522979736328, + "learning_rate": 4.322617357635553e-06, + "loss": 0.5172, + "num_input_tokens_seen": 77668480, + "step": 24680 + }, + { + "epoch": 1.5802445426029064, + "grad_norm": 18.477384567260742, + "learning_rate": 4.3222349348801415e-06, + "loss": 0.2008, + "num_input_tokens_seen": 77685632, + "step": 24685 + }, + { + "epoch": 1.5805646245438831, + "grad_norm": 45.01219940185547, + "learning_rate": 4.321852421131811e-06, + "loss": 0.3615, + "num_input_tokens_seen": 77701952, + "step": 24690 + }, + { + "epoch": 1.5808847064848601, + "grad_norm": 10.710610389709473, + "learning_rate": 4.321469816409659e-06, + "loss": 0.1978, + "num_input_tokens_seen": 77717056, + "step": 24695 + }, + { + "epoch": 1.5812047884258371, + "grad_norm": 44.466827392578125, + "learning_rate": 4.3210871207327935e-06, + "loss": 0.4455, + "num_input_tokens_seen": 77732800, + "step": 24700 + }, + { + "epoch": 1.5815248703668139, + "grad_norm": 11.767294883728027, + "learning_rate": 4.320704334120322e-06, + "loss": 0.3804, + "num_input_tokens_seen": 77748224, + "step": 24705 + }, + { + "epoch": 1.5818449523077907, + "grad_norm": 27.711101531982422, + "learning_rate": 4.320321456591359e-06, + "loss": 0.3229, + "num_input_tokens_seen": 77764928, + "step": 24710 + }, + { + "epoch": 1.5821650342487676, + "grad_norm": 33.71489334106445, + "learning_rate": 4.3199384881650255e-06, + "loss": 0.3176, + "num_input_tokens_seen": 77781184, + "step": 24715 + }, + { + "epoch": 1.5824851161897446, + "grad_norm": 7.69612455368042, + "learning_rate": 4.319555428860443e-06, + "loss": 0.216, + "num_input_tokens_seen": 77797888, + "step": 24720 + }, + { + "epoch": 1.5828051981307216, + "grad_norm": 16.273603439331055, + "learning_rate": 4.31917227869674e-06, + "loss": 0.2957, + "num_input_tokens_seen": 77813888, + "step": 24725 + }, + { + "epoch": 1.5831252800716984, + "grad_norm": 14.75451374053955, + "learning_rate": 4.31878903769305e-06, + "loss": 0.2514, + "num_input_tokens_seen": 77829632, + "step": 24730 + }, + { + "epoch": 1.5834453620126752, + "grad_norm": 11.8508882522583, + "learning_rate": 4.318405705868508e-06, + "loss": 0.5077, + "num_input_tokens_seen": 77845504, + "step": 24735 + }, + { + "epoch": 1.5837654439536522, + "grad_norm": 41.16022872924805, + "learning_rate": 4.318022283242257e-06, + "loss": 0.285, + "num_input_tokens_seen": 77860608, + "step": 24740 + }, + { + "epoch": 1.5840855258946291, + "grad_norm": 17.18816375732422, + "learning_rate": 4.3176387698334435e-06, + "loss": 0.3475, + "num_input_tokens_seen": 77876736, + "step": 24745 + }, + { + "epoch": 1.584405607835606, + "grad_norm": 19.365997314453125, + "learning_rate": 4.317255165661217e-06, + "loss": 0.3145, + "num_input_tokens_seen": 77894016, + "step": 24750 + }, + { + "epoch": 1.5847256897765827, + "grad_norm": 35.02308654785156, + "learning_rate": 4.3168714707447336e-06, + "loss": 0.2956, + "num_input_tokens_seen": 77909504, + "step": 24755 + }, + { + "epoch": 1.5850457717175597, + "grad_norm": 37.732513427734375, + "learning_rate": 4.316487685103153e-06, + "loss": 0.3287, + "num_input_tokens_seen": 77925568, + "step": 24760 + }, + { + "epoch": 1.5853658536585367, + "grad_norm": 32.63956069946289, + "learning_rate": 4.316103808755638e-06, + "loss": 0.4362, + "num_input_tokens_seen": 77941632, + "step": 24765 + }, + { + "epoch": 1.5856859355995134, + "grad_norm": 18.19765853881836, + "learning_rate": 4.31571984172136e-06, + "loss": 0.2431, + "num_input_tokens_seen": 77958208, + "step": 24770 + }, + { + "epoch": 1.5860060175404904, + "grad_norm": 29.257877349853516, + "learning_rate": 4.31533578401949e-06, + "loss": 0.45, + "num_input_tokens_seen": 77973312, + "step": 24775 + }, + { + "epoch": 1.5863260994814672, + "grad_norm": 12.058619499206543, + "learning_rate": 4.314951635669207e-06, + "loss": 0.377, + "num_input_tokens_seen": 77989120, + "step": 24780 + }, + { + "epoch": 1.5866461814224442, + "grad_norm": 16.88835906982422, + "learning_rate": 4.314567396689692e-06, + "loss": 0.4339, + "num_input_tokens_seen": 78004672, + "step": 24785 + }, + { + "epoch": 1.5869662633634212, + "grad_norm": 16.916996002197266, + "learning_rate": 4.3141830671001335e-06, + "loss": 0.3368, + "num_input_tokens_seen": 78022208, + "step": 24790 + }, + { + "epoch": 1.587286345304398, + "grad_norm": 50.2310791015625, + "learning_rate": 4.313798646919723e-06, + "loss": 0.4114, + "num_input_tokens_seen": 78038400, + "step": 24795 + }, + { + "epoch": 1.5876064272453747, + "grad_norm": 14.532931327819824, + "learning_rate": 4.3134141361676555e-06, + "loss": 0.3207, + "num_input_tokens_seen": 78053440, + "step": 24800 + }, + { + "epoch": 1.5879265091863517, + "grad_norm": 15.326508522033691, + "learning_rate": 4.3130295348631315e-06, + "loss": 0.276, + "num_input_tokens_seen": 78069312, + "step": 24805 + }, + { + "epoch": 1.5882465911273287, + "grad_norm": 25.281360626220703, + "learning_rate": 4.312644843025356e-06, + "loss": 0.3519, + "num_input_tokens_seen": 78085504, + "step": 24810 + }, + { + "epoch": 1.5885666730683055, + "grad_norm": 8.011087417602539, + "learning_rate": 4.31226006067354e-06, + "loss": 0.2728, + "num_input_tokens_seen": 78101312, + "step": 24815 + }, + { + "epoch": 1.5888867550092822, + "grad_norm": 8.961567878723145, + "learning_rate": 4.3118751878268965e-06, + "loss": 0.2978, + "num_input_tokens_seen": 78118144, + "step": 24820 + }, + { + "epoch": 1.5892068369502592, + "grad_norm": 29.31011390686035, + "learning_rate": 4.311490224504642e-06, + "loss": 0.5495, + "num_input_tokens_seen": 78133312, + "step": 24825 + }, + { + "epoch": 1.5895269188912362, + "grad_norm": 19.626998901367188, + "learning_rate": 4.311105170726002e-06, + "loss": 0.315, + "num_input_tokens_seen": 78149120, + "step": 24830 + }, + { + "epoch": 1.5898470008322132, + "grad_norm": 37.425559997558594, + "learning_rate": 4.310720026510204e-06, + "loss": 0.3602, + "num_input_tokens_seen": 78164480, + "step": 24835 + }, + { + "epoch": 1.59016708277319, + "grad_norm": 13.538053512573242, + "learning_rate": 4.310334791876479e-06, + "loss": 0.3107, + "num_input_tokens_seen": 78179200, + "step": 24840 + }, + { + "epoch": 1.5904871647141667, + "grad_norm": 13.041277885437012, + "learning_rate": 4.309949466844065e-06, + "loss": 0.3701, + "num_input_tokens_seen": 78193920, + "step": 24845 + }, + { + "epoch": 1.5908072466551437, + "grad_norm": 12.32272720336914, + "learning_rate": 4.3095640514322015e-06, + "loss": 0.3104, + "num_input_tokens_seen": 78210112, + "step": 24850 + }, + { + "epoch": 1.5911273285961207, + "grad_norm": 13.483980178833008, + "learning_rate": 4.309178545660136e-06, + "loss": 0.3044, + "num_input_tokens_seen": 78225664, + "step": 24855 + }, + { + "epoch": 1.5914474105370975, + "grad_norm": 10.147761344909668, + "learning_rate": 4.308792949547116e-06, + "loss": 0.3001, + "num_input_tokens_seen": 78241728, + "step": 24860 + }, + { + "epoch": 1.5917674924780743, + "grad_norm": 11.629502296447754, + "learning_rate": 4.308407263112399e-06, + "loss": 0.21, + "num_input_tokens_seen": 78258048, + "step": 24865 + }, + { + "epoch": 1.5920875744190512, + "grad_norm": 29.411590576171875, + "learning_rate": 4.308021486375243e-06, + "loss": 0.3303, + "num_input_tokens_seen": 78273088, + "step": 24870 + }, + { + "epoch": 1.5924076563600282, + "grad_norm": 26.58646011352539, + "learning_rate": 4.307635619354911e-06, + "loss": 0.3772, + "num_input_tokens_seen": 78289472, + "step": 24875 + }, + { + "epoch": 1.592727738301005, + "grad_norm": 14.349428176879883, + "learning_rate": 4.307249662070671e-06, + "loss": 0.4343, + "num_input_tokens_seen": 78303552, + "step": 24880 + }, + { + "epoch": 1.593047820241982, + "grad_norm": 31.915117263793945, + "learning_rate": 4.3068636145417984e-06, + "loss": 0.255, + "num_input_tokens_seen": 78319488, + "step": 24885 + }, + { + "epoch": 1.5933679021829588, + "grad_norm": 69.87520599365234, + "learning_rate": 4.306477476787567e-06, + "loss": 0.5656, + "num_input_tokens_seen": 78335296, + "step": 24890 + }, + { + "epoch": 1.5936879841239358, + "grad_norm": 29.06096839904785, + "learning_rate": 4.30609124882726e-06, + "loss": 0.331, + "num_input_tokens_seen": 78349888, + "step": 24895 + }, + { + "epoch": 1.5940080660649127, + "grad_norm": 16.350854873657227, + "learning_rate": 4.305704930680165e-06, + "loss": 0.4926, + "num_input_tokens_seen": 78365120, + "step": 24900 + }, + { + "epoch": 1.5943281480058895, + "grad_norm": 12.772039413452148, + "learning_rate": 4.30531852236557e-06, + "loss": 0.3521, + "num_input_tokens_seen": 78379456, + "step": 24905 + }, + { + "epoch": 1.5946482299468663, + "grad_norm": 18.989025115966797, + "learning_rate": 4.304932023902773e-06, + "loss": 0.3758, + "num_input_tokens_seen": 78395520, + "step": 24910 + }, + { + "epoch": 1.5949683118878433, + "grad_norm": 14.5812406539917, + "learning_rate": 4.30454543531107e-06, + "loss": 0.3558, + "num_input_tokens_seen": 78412224, + "step": 24915 + }, + { + "epoch": 1.5952883938288203, + "grad_norm": 40.004703521728516, + "learning_rate": 4.30415875660977e-06, + "loss": 0.4059, + "num_input_tokens_seen": 78428224, + "step": 24920 + }, + { + "epoch": 1.595608475769797, + "grad_norm": 19.65005111694336, + "learning_rate": 4.303771987818177e-06, + "loss": 0.3417, + "num_input_tokens_seen": 78445632, + "step": 24925 + }, + { + "epoch": 1.5959285577107738, + "grad_norm": 16.79155158996582, + "learning_rate": 4.303385128955609e-06, + "loss": 0.3598, + "num_input_tokens_seen": 78462464, + "step": 24930 + }, + { + "epoch": 1.5962486396517508, + "grad_norm": 16.672237396240234, + "learning_rate": 4.302998180041379e-06, + "loss": 0.262, + "num_input_tokens_seen": 78476864, + "step": 24935 + }, + { + "epoch": 1.5965687215927278, + "grad_norm": 45.81962585449219, + "learning_rate": 4.3026111410948115e-06, + "loss": 0.3377, + "num_input_tokens_seen": 78492800, + "step": 24940 + }, + { + "epoch": 1.5968888035337048, + "grad_norm": 35.0191650390625, + "learning_rate": 4.302224012135233e-06, + "loss": 0.3009, + "num_input_tokens_seen": 78508288, + "step": 24945 + }, + { + "epoch": 1.5972088854746815, + "grad_norm": 35.971527099609375, + "learning_rate": 4.301836793181976e-06, + "loss": 0.4052, + "num_input_tokens_seen": 78524544, + "step": 24950 + }, + { + "epoch": 1.5975289674156583, + "grad_norm": 7.245255947113037, + "learning_rate": 4.3014494842543745e-06, + "loss": 0.265, + "num_input_tokens_seen": 78540416, + "step": 24955 + }, + { + "epoch": 1.5978490493566353, + "grad_norm": 28.823291778564453, + "learning_rate": 4.3010620853717685e-06, + "loss": 0.4615, + "num_input_tokens_seen": 78555712, + "step": 24960 + }, + { + "epoch": 1.5981691312976123, + "grad_norm": 10.139888763427734, + "learning_rate": 4.300674596553504e-06, + "loss": 0.3191, + "num_input_tokens_seen": 78571200, + "step": 24965 + }, + { + "epoch": 1.598489213238589, + "grad_norm": 28.4096622467041, + "learning_rate": 4.300287017818929e-06, + "loss": 0.3562, + "num_input_tokens_seen": 78586944, + "step": 24970 + }, + { + "epoch": 1.5988092951795658, + "grad_norm": 12.705214500427246, + "learning_rate": 4.299899349187399e-06, + "loss": 0.235, + "num_input_tokens_seen": 78601984, + "step": 24975 + }, + { + "epoch": 1.5991293771205428, + "grad_norm": 34.62396240234375, + "learning_rate": 4.299511590678269e-06, + "loss": 0.31, + "num_input_tokens_seen": 78617920, + "step": 24980 + }, + { + "epoch": 1.5994494590615198, + "grad_norm": 17.621429443359375, + "learning_rate": 4.299123742310904e-06, + "loss": 0.3823, + "num_input_tokens_seen": 78634496, + "step": 24985 + }, + { + "epoch": 1.5997695410024968, + "grad_norm": 18.30769920349121, + "learning_rate": 4.298735804104672e-06, + "loss": 0.2959, + "num_input_tokens_seen": 78648512, + "step": 24990 + }, + { + "epoch": 1.6000896229434736, + "grad_norm": 15.075571060180664, + "learning_rate": 4.2983477760789414e-06, + "loss": 0.3299, + "num_input_tokens_seen": 78665472, + "step": 24995 + }, + { + "epoch": 1.6004097048844503, + "grad_norm": 14.421113967895508, + "learning_rate": 4.297959658253091e-06, + "loss": 0.2835, + "num_input_tokens_seen": 78680512, + "step": 25000 + }, + { + "epoch": 1.6007297868254273, + "grad_norm": 23.0529842376709, + "learning_rate": 4.2975714506465e-06, + "loss": 0.349, + "num_input_tokens_seen": 78694784, + "step": 25005 + }, + { + "epoch": 1.6010498687664043, + "grad_norm": 26.659860610961914, + "learning_rate": 4.297183153278555e-06, + "loss": 0.3617, + "num_input_tokens_seen": 78710464, + "step": 25010 + }, + { + "epoch": 1.601369950707381, + "grad_norm": 40.88199234008789, + "learning_rate": 4.296794766168643e-06, + "loss": 0.2122, + "num_input_tokens_seen": 78726336, + "step": 25015 + }, + { + "epoch": 1.6016900326483579, + "grad_norm": 19.015878677368164, + "learning_rate": 4.2964062893361614e-06, + "loss": 0.3485, + "num_input_tokens_seen": 78741440, + "step": 25020 + }, + { + "epoch": 1.6020101145893348, + "grad_norm": 16.002981185913086, + "learning_rate": 4.296017722800505e-06, + "loss": 0.2863, + "num_input_tokens_seen": 78756480, + "step": 25025 + }, + { + "epoch": 1.6023301965303118, + "grad_norm": 48.447635650634766, + "learning_rate": 4.29562906658108e-06, + "loss": 0.2454, + "num_input_tokens_seen": 78771520, + "step": 25030 + }, + { + "epoch": 1.6026502784712886, + "grad_norm": 28.024662017822266, + "learning_rate": 4.295240320697292e-06, + "loss": 0.41, + "num_input_tokens_seen": 78787136, + "step": 25035 + }, + { + "epoch": 1.6029703604122656, + "grad_norm": 9.63082504272461, + "learning_rate": 4.294851485168553e-06, + "loss": 0.2647, + "num_input_tokens_seen": 78802688, + "step": 25040 + }, + { + "epoch": 1.6032904423532424, + "grad_norm": 8.913525581359863, + "learning_rate": 4.294462560014281e-06, + "loss": 0.282, + "num_input_tokens_seen": 78819072, + "step": 25045 + }, + { + "epoch": 1.6036105242942194, + "grad_norm": 10.371607780456543, + "learning_rate": 4.294073545253895e-06, + "loss": 0.1755, + "num_input_tokens_seen": 78834624, + "step": 25050 + }, + { + "epoch": 1.6039306062351963, + "grad_norm": 25.23495101928711, + "learning_rate": 4.293684440906821e-06, + "loss": 0.3974, + "num_input_tokens_seen": 78850688, + "step": 25055 + }, + { + "epoch": 1.6042506881761731, + "grad_norm": 66.35067749023438, + "learning_rate": 4.29329524699249e-06, + "loss": 0.3474, + "num_input_tokens_seen": 78866816, + "step": 25060 + }, + { + "epoch": 1.6045707701171499, + "grad_norm": 25.756515502929688, + "learning_rate": 4.292905963530334e-06, + "loss": 0.2823, + "num_input_tokens_seen": 78881472, + "step": 25065 + }, + { + "epoch": 1.6048908520581269, + "grad_norm": 15.102428436279297, + "learning_rate": 4.292516590539793e-06, + "loss": 0.2369, + "num_input_tokens_seen": 78896704, + "step": 25070 + }, + { + "epoch": 1.6052109339991039, + "grad_norm": 7.240452289581299, + "learning_rate": 4.292127128040311e-06, + "loss": 0.2956, + "num_input_tokens_seen": 78912000, + "step": 25075 + }, + { + "epoch": 1.6055310159400806, + "grad_norm": 13.153940200805664, + "learning_rate": 4.291737576051335e-06, + "loss": 0.2587, + "num_input_tokens_seen": 78927488, + "step": 25080 + }, + { + "epoch": 1.6058510978810574, + "grad_norm": 39.535858154296875, + "learning_rate": 4.291347934592317e-06, + "loss": 0.2982, + "num_input_tokens_seen": 78943424, + "step": 25085 + }, + { + "epoch": 1.6061711798220344, + "grad_norm": 36.7844352722168, + "learning_rate": 4.290958203682715e-06, + "loss": 0.3259, + "num_input_tokens_seen": 78958784, + "step": 25090 + }, + { + "epoch": 1.6064912617630114, + "grad_norm": 13.693374633789062, + "learning_rate": 4.290568383341987e-06, + "loss": 0.3878, + "num_input_tokens_seen": 78974272, + "step": 25095 + }, + { + "epoch": 1.6068113437039884, + "grad_norm": 26.94315528869629, + "learning_rate": 4.290178473589602e-06, + "loss": 0.3547, + "num_input_tokens_seen": 78989440, + "step": 25100 + }, + { + "epoch": 1.6071314256449651, + "grad_norm": 80.75464630126953, + "learning_rate": 4.289788474445029e-06, + "loss": 0.335, + "num_input_tokens_seen": 79005056, + "step": 25105 + }, + { + "epoch": 1.607451507585942, + "grad_norm": 25.323110580444336, + "learning_rate": 4.289398385927742e-06, + "loss": 0.3209, + "num_input_tokens_seen": 79020864, + "step": 25110 + }, + { + "epoch": 1.607771589526919, + "grad_norm": 43.946495056152344, + "learning_rate": 4.2890082080572205e-06, + "loss": 0.3117, + "num_input_tokens_seen": 79037056, + "step": 25115 + }, + { + "epoch": 1.608091671467896, + "grad_norm": 18.590801239013672, + "learning_rate": 4.288617940852947e-06, + "loss": 0.3472, + "num_input_tokens_seen": 79051648, + "step": 25120 + }, + { + "epoch": 1.6084117534088727, + "grad_norm": 29.057497024536133, + "learning_rate": 4.28822758433441e-06, + "loss": 0.3045, + "num_input_tokens_seen": 79069120, + "step": 25125 + }, + { + "epoch": 1.6087318353498494, + "grad_norm": 32.39029312133789, + "learning_rate": 4.287837138521103e-06, + "loss": 0.2427, + "num_input_tokens_seen": 79085440, + "step": 25130 + }, + { + "epoch": 1.6090519172908264, + "grad_norm": 28.998680114746094, + "learning_rate": 4.287446603432522e-06, + "loss": 0.4127, + "num_input_tokens_seen": 79100672, + "step": 25135 + }, + { + "epoch": 1.6093719992318034, + "grad_norm": 52.007686614990234, + "learning_rate": 4.2870559790881665e-06, + "loss": 0.4773, + "num_input_tokens_seen": 79115904, + "step": 25140 + }, + { + "epoch": 1.6096920811727804, + "grad_norm": 23.19314193725586, + "learning_rate": 4.286665265507544e-06, + "loss": 0.4103, + "num_input_tokens_seen": 79130816, + "step": 25145 + }, + { + "epoch": 1.6100121631137572, + "grad_norm": 16.291603088378906, + "learning_rate": 4.286274462710166e-06, + "loss": 0.2558, + "num_input_tokens_seen": 79145792, + "step": 25150 + }, + { + "epoch": 1.610332245054734, + "grad_norm": 15.338079452514648, + "learning_rate": 4.285883570715545e-06, + "loss": 0.2833, + "num_input_tokens_seen": 79161920, + "step": 25155 + }, + { + "epoch": 1.610652326995711, + "grad_norm": 42.53609848022461, + "learning_rate": 4.2854925895432005e-06, + "loss": 0.3262, + "num_input_tokens_seen": 79177536, + "step": 25160 + }, + { + "epoch": 1.610972408936688, + "grad_norm": 36.535064697265625, + "learning_rate": 4.285101519212657e-06, + "loss": 0.4285, + "num_input_tokens_seen": 79193088, + "step": 25165 + }, + { + "epoch": 1.6112924908776647, + "grad_norm": 25.760665893554688, + "learning_rate": 4.2847103597434415e-06, + "loss": 0.3316, + "num_input_tokens_seen": 79209536, + "step": 25170 + }, + { + "epoch": 1.6116125728186415, + "grad_norm": 11.016261100769043, + "learning_rate": 4.284319111155086e-06, + "loss": 0.2112, + "num_input_tokens_seen": 79225344, + "step": 25175 + }, + { + "epoch": 1.6119326547596184, + "grad_norm": 8.913002014160156, + "learning_rate": 4.28392777346713e-06, + "loss": 0.4177, + "num_input_tokens_seen": 79242304, + "step": 25180 + }, + { + "epoch": 1.6122527367005954, + "grad_norm": 14.04858112335205, + "learning_rate": 4.283536346699112e-06, + "loss": 0.2604, + "num_input_tokens_seen": 79256832, + "step": 25185 + }, + { + "epoch": 1.6125728186415722, + "grad_norm": 6.739816188812256, + "learning_rate": 4.2831448308705795e-06, + "loss": 0.4069, + "num_input_tokens_seen": 79272448, + "step": 25190 + }, + { + "epoch": 1.612892900582549, + "grad_norm": 9.703689575195312, + "learning_rate": 4.282753226001082e-06, + "loss": 0.2348, + "num_input_tokens_seen": 79288832, + "step": 25195 + }, + { + "epoch": 1.613212982523526, + "grad_norm": 35.46308517456055, + "learning_rate": 4.282361532110174e-06, + "loss": 0.4387, + "num_input_tokens_seen": 79305408, + "step": 25200 + }, + { + "epoch": 1.613533064464503, + "grad_norm": 74.20321655273438, + "learning_rate": 4.281969749217415e-06, + "loss": 0.3998, + "num_input_tokens_seen": 79320960, + "step": 25205 + }, + { + "epoch": 1.61385314640548, + "grad_norm": 8.675493240356445, + "learning_rate": 4.281577877342369e-06, + "loss": 0.3832, + "num_input_tokens_seen": 79336064, + "step": 25210 + }, + { + "epoch": 1.6141732283464567, + "grad_norm": 26.989145278930664, + "learning_rate": 4.281185916504604e-06, + "loss": 0.3635, + "num_input_tokens_seen": 79351808, + "step": 25215 + }, + { + "epoch": 1.6144933102874335, + "grad_norm": 123.59730529785156, + "learning_rate": 4.2807938667236915e-06, + "loss": 0.3179, + "num_input_tokens_seen": 79366144, + "step": 25220 + }, + { + "epoch": 1.6148133922284105, + "grad_norm": 43.23637771606445, + "learning_rate": 4.28040172801921e-06, + "loss": 0.3024, + "num_input_tokens_seen": 79381632, + "step": 25225 + }, + { + "epoch": 1.6151334741693875, + "grad_norm": 19.414710998535156, + "learning_rate": 4.28000950041074e-06, + "loss": 0.2843, + "num_input_tokens_seen": 79397248, + "step": 25230 + }, + { + "epoch": 1.6154535561103642, + "grad_norm": 33.55038833618164, + "learning_rate": 4.279617183917866e-06, + "loss": 0.2988, + "num_input_tokens_seen": 79412992, + "step": 25235 + }, + { + "epoch": 1.615773638051341, + "grad_norm": 14.230480194091797, + "learning_rate": 4.2792247785601805e-06, + "loss": 0.3663, + "num_input_tokens_seen": 79428736, + "step": 25240 + }, + { + "epoch": 1.616093719992318, + "grad_norm": 11.359092712402344, + "learning_rate": 4.278832284357277e-06, + "loss": 0.2814, + "num_input_tokens_seen": 79444032, + "step": 25245 + }, + { + "epoch": 1.616413801933295, + "grad_norm": 13.559510231018066, + "learning_rate": 4.278439701328755e-06, + "loss": 0.2383, + "num_input_tokens_seen": 79459904, + "step": 25250 + }, + { + "epoch": 1.616733883874272, + "grad_norm": 5.4502387046813965, + "learning_rate": 4.278047029494218e-06, + "loss": 0.2475, + "num_input_tokens_seen": 79476480, + "step": 25255 + }, + { + "epoch": 1.6170539658152487, + "grad_norm": 21.027463912963867, + "learning_rate": 4.2776542688732734e-06, + "loss": 0.2758, + "num_input_tokens_seen": 79492864, + "step": 25260 + }, + { + "epoch": 1.6173740477562255, + "grad_norm": 16.450471878051758, + "learning_rate": 4.277261419485534e-06, + "loss": 0.4661, + "num_input_tokens_seen": 79509056, + "step": 25265 + }, + { + "epoch": 1.6176941296972025, + "grad_norm": 17.20603370666504, + "learning_rate": 4.2768684813506166e-06, + "loss": 0.1872, + "num_input_tokens_seen": 79526016, + "step": 25270 + }, + { + "epoch": 1.6180142116381795, + "grad_norm": 32.66105651855469, + "learning_rate": 4.276475454488143e-06, + "loss": 0.2831, + "num_input_tokens_seen": 79541248, + "step": 25275 + }, + { + "epoch": 1.6183342935791563, + "grad_norm": 32.00093078613281, + "learning_rate": 4.276082338917739e-06, + "loss": 0.3685, + "num_input_tokens_seen": 79556736, + "step": 25280 + }, + { + "epoch": 1.618654375520133, + "grad_norm": 40.719303131103516, + "learning_rate": 4.275689134659033e-06, + "loss": 0.3198, + "num_input_tokens_seen": 79571648, + "step": 25285 + }, + { + "epoch": 1.61897445746111, + "grad_norm": 17.554288864135742, + "learning_rate": 4.275295841731663e-06, + "loss": 0.2793, + "num_input_tokens_seen": 79586304, + "step": 25290 + }, + { + "epoch": 1.619294539402087, + "grad_norm": 14.823515892028809, + "learning_rate": 4.274902460155263e-06, + "loss": 0.2783, + "num_input_tokens_seen": 79601408, + "step": 25295 + }, + { + "epoch": 1.6196146213430638, + "grad_norm": 22.06226921081543, + "learning_rate": 4.274508989949482e-06, + "loss": 0.3915, + "num_input_tokens_seen": 79616768, + "step": 25300 + }, + { + "epoch": 1.6199347032840408, + "grad_norm": 23.068525314331055, + "learning_rate": 4.274115431133963e-06, + "loss": 0.3715, + "num_input_tokens_seen": 79632128, + "step": 25305 + }, + { + "epoch": 1.6202547852250175, + "grad_norm": 6.807242393493652, + "learning_rate": 4.273721783728362e-06, + "loss": 0.2483, + "num_input_tokens_seen": 79646592, + "step": 25310 + }, + { + "epoch": 1.6205748671659945, + "grad_norm": 19.38157081604004, + "learning_rate": 4.273328047752333e-06, + "loss": 0.3975, + "num_input_tokens_seen": 79662080, + "step": 25315 + }, + { + "epoch": 1.6208949491069715, + "grad_norm": 23.107149124145508, + "learning_rate": 4.272934223225539e-06, + "loss": 0.2074, + "num_input_tokens_seen": 79676992, + "step": 25320 + }, + { + "epoch": 1.6212150310479483, + "grad_norm": 33.02793502807617, + "learning_rate": 4.272540310167644e-06, + "loss": 0.2971, + "num_input_tokens_seen": 79692224, + "step": 25325 + }, + { + "epoch": 1.621535112988925, + "grad_norm": 23.348764419555664, + "learning_rate": 4.272146308598319e-06, + "loss": 0.3212, + "num_input_tokens_seen": 79708672, + "step": 25330 + }, + { + "epoch": 1.621855194929902, + "grad_norm": 18.9488582611084, + "learning_rate": 4.271752218537239e-06, + "loss": 0.3313, + "num_input_tokens_seen": 79724544, + "step": 25335 + }, + { + "epoch": 1.622175276870879, + "grad_norm": 7.90954065322876, + "learning_rate": 4.27135804000408e-06, + "loss": 0.1934, + "num_input_tokens_seen": 79739456, + "step": 25340 + }, + { + "epoch": 1.6224953588118558, + "grad_norm": 20.113399505615234, + "learning_rate": 4.270963773018528e-06, + "loss": 0.297, + "num_input_tokens_seen": 79755648, + "step": 25345 + }, + { + "epoch": 1.6228154407528326, + "grad_norm": 48.55184555053711, + "learning_rate": 4.270569417600271e-06, + "loss": 0.4037, + "num_input_tokens_seen": 79771456, + "step": 25350 + }, + { + "epoch": 1.6231355226938096, + "grad_norm": 12.166651725769043, + "learning_rate": 4.270174973768998e-06, + "loss": 0.3551, + "num_input_tokens_seen": 79786624, + "step": 25355 + }, + { + "epoch": 1.6234556046347866, + "grad_norm": 43.555213928222656, + "learning_rate": 4.269780441544409e-06, + "loss": 0.2503, + "num_input_tokens_seen": 79801536, + "step": 25360 + }, + { + "epoch": 1.6237756865757635, + "grad_norm": 34.917423248291016, + "learning_rate": 4.269385820946203e-06, + "loss": 0.4006, + "num_input_tokens_seen": 79818368, + "step": 25365 + }, + { + "epoch": 1.6240957685167403, + "grad_norm": 22.296300888061523, + "learning_rate": 4.268991111994084e-06, + "loss": 0.2708, + "num_input_tokens_seen": 79834240, + "step": 25370 + }, + { + "epoch": 1.624415850457717, + "grad_norm": 20.341846466064453, + "learning_rate": 4.268596314707764e-06, + "loss": 0.369, + "num_input_tokens_seen": 79849792, + "step": 25375 + }, + { + "epoch": 1.624735932398694, + "grad_norm": 52.605224609375, + "learning_rate": 4.268201429106957e-06, + "loss": 0.4254, + "num_input_tokens_seen": 79865472, + "step": 25380 + }, + { + "epoch": 1.625056014339671, + "grad_norm": 40.09370422363281, + "learning_rate": 4.26780645521138e-06, + "loss": 0.3663, + "num_input_tokens_seen": 79881792, + "step": 25385 + }, + { + "epoch": 1.6253760962806478, + "grad_norm": 9.43689250946045, + "learning_rate": 4.267411393040757e-06, + "loss": 0.3271, + "num_input_tokens_seen": 79897920, + "step": 25390 + }, + { + "epoch": 1.6256961782216246, + "grad_norm": 47.67231750488281, + "learning_rate": 4.267016242614816e-06, + "loss": 0.3354, + "num_input_tokens_seen": 79912640, + "step": 25395 + }, + { + "epoch": 1.6260162601626016, + "grad_norm": 14.865822792053223, + "learning_rate": 4.2666210039532865e-06, + "loss": 0.2257, + "num_input_tokens_seen": 79927936, + "step": 25400 + }, + { + "epoch": 1.6263363421035786, + "grad_norm": 59.64122772216797, + "learning_rate": 4.266225677075907e-06, + "loss": 0.3853, + "num_input_tokens_seen": 79943744, + "step": 25405 + }, + { + "epoch": 1.6266564240445556, + "grad_norm": 51.1346321105957, + "learning_rate": 4.265830262002416e-06, + "loss": 0.2559, + "num_input_tokens_seen": 79958720, + "step": 25410 + }, + { + "epoch": 1.6269765059855323, + "grad_norm": 24.2857723236084, + "learning_rate": 4.265434758752561e-06, + "loss": 0.3398, + "num_input_tokens_seen": 79974912, + "step": 25415 + }, + { + "epoch": 1.627296587926509, + "grad_norm": 24.933820724487305, + "learning_rate": 4.265039167346089e-06, + "loss": 0.3683, + "num_input_tokens_seen": 79990976, + "step": 25420 + }, + { + "epoch": 1.627616669867486, + "grad_norm": 28.04987144470215, + "learning_rate": 4.264643487802756e-06, + "loss": 0.3442, + "num_input_tokens_seen": 80007040, + "step": 25425 + }, + { + "epoch": 1.627936751808463, + "grad_norm": 18.745986938476562, + "learning_rate": 4.264247720142317e-06, + "loss": 0.3008, + "num_input_tokens_seen": 80022144, + "step": 25430 + }, + { + "epoch": 1.6282568337494399, + "grad_norm": 8.126128196716309, + "learning_rate": 4.2638518643845375e-06, + "loss": 0.3936, + "num_input_tokens_seen": 80038272, + "step": 25435 + }, + { + "epoch": 1.6285769156904166, + "grad_norm": 72.2948989868164, + "learning_rate": 4.263455920549184e-06, + "loss": 0.3108, + "num_input_tokens_seen": 80054592, + "step": 25440 + }, + { + "epoch": 1.6288969976313936, + "grad_norm": 74.48081970214844, + "learning_rate": 4.263059888656026e-06, + "loss": 0.3196, + "num_input_tokens_seen": 80069824, + "step": 25445 + }, + { + "epoch": 1.6292170795723706, + "grad_norm": 27.195220947265625, + "learning_rate": 4.262663768724841e-06, + "loss": 0.5387, + "num_input_tokens_seen": 80085568, + "step": 25450 + }, + { + "epoch": 1.6295371615133474, + "grad_norm": 9.407700538635254, + "learning_rate": 4.26226756077541e-06, + "loss": 0.3174, + "num_input_tokens_seen": 80100096, + "step": 25455 + }, + { + "epoch": 1.6298572434543241, + "grad_norm": 19.00909423828125, + "learning_rate": 4.261871264827515e-06, + "loss": 0.3078, + "num_input_tokens_seen": 80115712, + "step": 25460 + }, + { + "epoch": 1.6301773253953011, + "grad_norm": 18.284774780273438, + "learning_rate": 4.2614748809009476e-06, + "loss": 0.436, + "num_input_tokens_seen": 80131264, + "step": 25465 + }, + { + "epoch": 1.6304974073362781, + "grad_norm": 49.232215881347656, + "learning_rate": 4.261078409015499e-06, + "loss": 0.4129, + "num_input_tokens_seen": 80146432, + "step": 25470 + }, + { + "epoch": 1.6308174892772551, + "grad_norm": 10.951411247253418, + "learning_rate": 4.260681849190967e-06, + "loss": 0.3428, + "num_input_tokens_seen": 80162112, + "step": 25475 + }, + { + "epoch": 1.6311375712182319, + "grad_norm": 22.214418411254883, + "learning_rate": 4.260285201447156e-06, + "loss": 0.4165, + "num_input_tokens_seen": 80177472, + "step": 25480 + }, + { + "epoch": 1.6314576531592087, + "grad_norm": 30.43051528930664, + "learning_rate": 4.25988846580387e-06, + "loss": 0.3656, + "num_input_tokens_seen": 80193984, + "step": 25485 + }, + { + "epoch": 1.6317777351001856, + "grad_norm": 13.750630378723145, + "learning_rate": 4.2594916422809214e-06, + "loss": 0.2376, + "num_input_tokens_seen": 80209344, + "step": 25490 + }, + { + "epoch": 1.6320978170411626, + "grad_norm": 7.608510971069336, + "learning_rate": 4.259094730898125e-06, + "loss": 0.2847, + "num_input_tokens_seen": 80224448, + "step": 25495 + }, + { + "epoch": 1.6324178989821394, + "grad_norm": 24.895280838012695, + "learning_rate": 4.2586977316753e-06, + "loss": 0.4157, + "num_input_tokens_seen": 80240064, + "step": 25500 + }, + { + "epoch": 1.6327379809231162, + "grad_norm": 26.71196746826172, + "learning_rate": 4.258300644632272e-06, + "loss": 0.3217, + "num_input_tokens_seen": 80255104, + "step": 25505 + }, + { + "epoch": 1.6330580628640932, + "grad_norm": 8.395367622375488, + "learning_rate": 4.257903469788867e-06, + "loss": 0.2862, + "num_input_tokens_seen": 80270912, + "step": 25510 + }, + { + "epoch": 1.6333781448050702, + "grad_norm": 41.500885009765625, + "learning_rate": 4.257506207164921e-06, + "loss": 0.2993, + "num_input_tokens_seen": 80285760, + "step": 25515 + }, + { + "epoch": 1.6336982267460471, + "grad_norm": 27.035844802856445, + "learning_rate": 4.257108856780268e-06, + "loss": 0.4181, + "num_input_tokens_seen": 80301376, + "step": 25520 + }, + { + "epoch": 1.634018308687024, + "grad_norm": 14.743849754333496, + "learning_rate": 4.256711418654753e-06, + "loss": 0.3956, + "num_input_tokens_seen": 80316992, + "step": 25525 + }, + { + "epoch": 1.6343383906280007, + "grad_norm": 25.548978805541992, + "learning_rate": 4.256313892808219e-06, + "loss": 0.247, + "num_input_tokens_seen": 80331904, + "step": 25530 + }, + { + "epoch": 1.6346584725689777, + "grad_norm": 23.047643661499023, + "learning_rate": 4.255916279260517e-06, + "loss": 0.2341, + "num_input_tokens_seen": 80346944, + "step": 25535 + }, + { + "epoch": 1.6349785545099547, + "grad_norm": 12.490935325622559, + "learning_rate": 4.255518578031503e-06, + "loss": 0.2443, + "num_input_tokens_seen": 80363648, + "step": 25540 + }, + { + "epoch": 1.6352986364509314, + "grad_norm": 14.686007499694824, + "learning_rate": 4.255120789141035e-06, + "loss": 0.303, + "num_input_tokens_seen": 80378496, + "step": 25545 + }, + { + "epoch": 1.6356187183919082, + "grad_norm": 13.617271423339844, + "learning_rate": 4.254722912608977e-06, + "loss": 0.3314, + "num_input_tokens_seen": 80394112, + "step": 25550 + }, + { + "epoch": 1.6359388003328852, + "grad_norm": 8.049337387084961, + "learning_rate": 4.2543249484551976e-06, + "loss": 0.4088, + "num_input_tokens_seen": 80409984, + "step": 25555 + }, + { + "epoch": 1.6362588822738622, + "grad_norm": 59.976261138916016, + "learning_rate": 4.253926896699566e-06, + "loss": 0.3809, + "num_input_tokens_seen": 80426048, + "step": 25560 + }, + { + "epoch": 1.636578964214839, + "grad_norm": 19.964906692504883, + "learning_rate": 4.2535287573619635e-06, + "loss": 0.3026, + "num_input_tokens_seen": 80442112, + "step": 25565 + }, + { + "epoch": 1.636899046155816, + "grad_norm": 15.281968116760254, + "learning_rate": 4.253130530462267e-06, + "loss": 0.2828, + "num_input_tokens_seen": 80457216, + "step": 25570 + }, + { + "epoch": 1.6372191280967927, + "grad_norm": 18.296005249023438, + "learning_rate": 4.252732216020364e-06, + "loss": 0.3377, + "num_input_tokens_seen": 80473280, + "step": 25575 + }, + { + "epoch": 1.6375392100377697, + "grad_norm": 43.06759262084961, + "learning_rate": 4.252333814056145e-06, + "loss": 0.4095, + "num_input_tokens_seen": 80488320, + "step": 25580 + }, + { + "epoch": 1.6378592919787467, + "grad_norm": 11.447834014892578, + "learning_rate": 4.251935324589502e-06, + "loss": 0.3433, + "num_input_tokens_seen": 80504640, + "step": 25585 + }, + { + "epoch": 1.6381793739197235, + "grad_norm": 49.22285079956055, + "learning_rate": 4.2515367476403335e-06, + "loss": 0.4683, + "num_input_tokens_seen": 80519232, + "step": 25590 + }, + { + "epoch": 1.6384994558607002, + "grad_norm": 8.591958999633789, + "learning_rate": 4.251138083228544e-06, + "loss": 0.2173, + "num_input_tokens_seen": 80534720, + "step": 25595 + }, + { + "epoch": 1.6388195378016772, + "grad_norm": 61.80414962768555, + "learning_rate": 4.25073933137404e-06, + "loss": 0.4885, + "num_input_tokens_seen": 80551040, + "step": 25600 + }, + { + "epoch": 1.6391396197426542, + "grad_norm": 30.91948699951172, + "learning_rate": 4.2503404920967326e-06, + "loss": 0.2751, + "num_input_tokens_seen": 80566848, + "step": 25605 + }, + { + "epoch": 1.639459701683631, + "grad_norm": 22.24249267578125, + "learning_rate": 4.249941565416539e-06, + "loss": 0.3141, + "num_input_tokens_seen": 80583040, + "step": 25610 + }, + { + "epoch": 1.6397797836246077, + "grad_norm": 39.87586975097656, + "learning_rate": 4.249542551353378e-06, + "loss": 0.3434, + "num_input_tokens_seen": 80598144, + "step": 25615 + }, + { + "epoch": 1.6400998655655847, + "grad_norm": 20.844751358032227, + "learning_rate": 4.249143449927176e-06, + "loss": 0.2803, + "num_input_tokens_seen": 80614080, + "step": 25620 + }, + { + "epoch": 1.6404199475065617, + "grad_norm": 54.91630935668945, + "learning_rate": 4.24874426115786e-06, + "loss": 0.4398, + "num_input_tokens_seen": 80630144, + "step": 25625 + }, + { + "epoch": 1.6407400294475387, + "grad_norm": 15.633999824523926, + "learning_rate": 4.248344985065364e-06, + "loss": 0.2719, + "num_input_tokens_seen": 80645568, + "step": 25630 + }, + { + "epoch": 1.6410601113885155, + "grad_norm": 22.253223419189453, + "learning_rate": 4.247945621669628e-06, + "loss": 0.3412, + "num_input_tokens_seen": 80663872, + "step": 25635 + }, + { + "epoch": 1.6413801933294923, + "grad_norm": 37.88339614868164, + "learning_rate": 4.2475461709905915e-06, + "loss": 0.3643, + "num_input_tokens_seen": 80679872, + "step": 25640 + }, + { + "epoch": 1.6417002752704692, + "grad_norm": 34.185245513916016, + "learning_rate": 4.247146633048202e-06, + "loss": 0.3802, + "num_input_tokens_seen": 80695488, + "step": 25645 + }, + { + "epoch": 1.6420203572114462, + "grad_norm": 12.654478073120117, + "learning_rate": 4.24674700786241e-06, + "loss": 0.2717, + "num_input_tokens_seen": 80711360, + "step": 25650 + }, + { + "epoch": 1.642340439152423, + "grad_norm": 13.950393676757812, + "learning_rate": 4.246347295453171e-06, + "loss": 0.3598, + "num_input_tokens_seen": 80727808, + "step": 25655 + }, + { + "epoch": 1.6426605210933998, + "grad_norm": 21.323755264282227, + "learning_rate": 4.245947495840444e-06, + "loss": 0.2513, + "num_input_tokens_seen": 80744512, + "step": 25660 + }, + { + "epoch": 1.6429806030343768, + "grad_norm": 17.997859954833984, + "learning_rate": 4.245547609044194e-06, + "loss": 0.408, + "num_input_tokens_seen": 80761280, + "step": 25665 + }, + { + "epoch": 1.6433006849753538, + "grad_norm": 20.47334861755371, + "learning_rate": 4.2451476350843885e-06, + "loss": 0.4443, + "num_input_tokens_seen": 80776320, + "step": 25670 + }, + { + "epoch": 1.6436207669163307, + "grad_norm": 44.61897659301758, + "learning_rate": 4.244747573981e-06, + "loss": 0.3911, + "num_input_tokens_seen": 80792256, + "step": 25675 + }, + { + "epoch": 1.6439408488573075, + "grad_norm": 15.590322494506836, + "learning_rate": 4.2443474257540064e-06, + "loss": 0.3401, + "num_input_tokens_seen": 80807744, + "step": 25680 + }, + { + "epoch": 1.6442609307982843, + "grad_norm": 37.856746673583984, + "learning_rate": 4.243947190423387e-06, + "loss": 0.2824, + "num_input_tokens_seen": 80823360, + "step": 25685 + }, + { + "epoch": 1.6445810127392613, + "grad_norm": 19.123516082763672, + "learning_rate": 4.2435468680091305e-06, + "loss": 0.4377, + "num_input_tokens_seen": 80839424, + "step": 25690 + }, + { + "epoch": 1.6449010946802383, + "grad_norm": 36.35848617553711, + "learning_rate": 4.243146458531224e-06, + "loss": 0.4255, + "num_input_tokens_seen": 80855232, + "step": 25695 + }, + { + "epoch": 1.645221176621215, + "grad_norm": 12.588894844055176, + "learning_rate": 4.2427459620096635e-06, + "loss": 0.3378, + "num_input_tokens_seen": 80870592, + "step": 25700 + }, + { + "epoch": 1.6455412585621918, + "grad_norm": 18.52828025817871, + "learning_rate": 4.242345378464448e-06, + "loss": 0.3124, + "num_input_tokens_seen": 80884544, + "step": 25705 + }, + { + "epoch": 1.6458613405031688, + "grad_norm": 28.199880599975586, + "learning_rate": 4.241944707915579e-06, + "loss": 0.4361, + "num_input_tokens_seen": 80900416, + "step": 25710 + }, + { + "epoch": 1.6461814224441458, + "grad_norm": 25.98511505126953, + "learning_rate": 4.241543950383063e-06, + "loss": 0.3588, + "num_input_tokens_seen": 80915648, + "step": 25715 + }, + { + "epoch": 1.6465015043851225, + "grad_norm": 16.96280860900879, + "learning_rate": 4.241143105886916e-06, + "loss": 0.2621, + "num_input_tokens_seen": 80930944, + "step": 25720 + }, + { + "epoch": 1.6468215863260993, + "grad_norm": 25.299951553344727, + "learning_rate": 4.240742174447151e-06, + "loss": 0.4127, + "num_input_tokens_seen": 80945600, + "step": 25725 + }, + { + "epoch": 1.6471416682670763, + "grad_norm": 45.278507232666016, + "learning_rate": 4.240341156083789e-06, + "loss": 0.3735, + "num_input_tokens_seen": 80961856, + "step": 25730 + }, + { + "epoch": 1.6474617502080533, + "grad_norm": 11.49785327911377, + "learning_rate": 4.239940050816854e-06, + "loss": 0.2615, + "num_input_tokens_seen": 80976640, + "step": 25735 + }, + { + "epoch": 1.6477818321490303, + "grad_norm": 17.920808792114258, + "learning_rate": 4.239538858666377e-06, + "loss": 0.3408, + "num_input_tokens_seen": 80991552, + "step": 25740 + }, + { + "epoch": 1.648101914090007, + "grad_norm": 13.467994689941406, + "learning_rate": 4.23913757965239e-06, + "loss": 0.2641, + "num_input_tokens_seen": 81007168, + "step": 25745 + }, + { + "epoch": 1.6484219960309838, + "grad_norm": 22.213748931884766, + "learning_rate": 4.238736213794931e-06, + "loss": 0.4058, + "num_input_tokens_seen": 81022400, + "step": 25750 + }, + { + "epoch": 1.6487420779719608, + "grad_norm": 12.297605514526367, + "learning_rate": 4.238334761114042e-06, + "loss": 0.2596, + "num_input_tokens_seen": 81037888, + "step": 25755 + }, + { + "epoch": 1.6490621599129378, + "grad_norm": 55.03055953979492, + "learning_rate": 4.23793322162977e-06, + "loss": 0.347, + "num_input_tokens_seen": 81052352, + "step": 25760 + }, + { + "epoch": 1.6493822418539146, + "grad_norm": 24.33421516418457, + "learning_rate": 4.237531595362165e-06, + "loss": 0.4247, + "num_input_tokens_seen": 81068032, + "step": 25765 + }, + { + "epoch": 1.6497023237948913, + "grad_norm": 21.311723709106445, + "learning_rate": 4.237129882331283e-06, + "loss": 0.407, + "num_input_tokens_seen": 81083328, + "step": 25770 + }, + { + "epoch": 1.6500224057358683, + "grad_norm": 27.222871780395508, + "learning_rate": 4.236728082557183e-06, + "loss": 0.2753, + "num_input_tokens_seen": 81099712, + "step": 25775 + }, + { + "epoch": 1.6503424876768453, + "grad_norm": 25.031089782714844, + "learning_rate": 4.236326196059929e-06, + "loss": 0.404, + "num_input_tokens_seen": 81115968, + "step": 25780 + }, + { + "epoch": 1.6506625696178223, + "grad_norm": 29.428110122680664, + "learning_rate": 4.235924222859589e-06, + "loss": 0.4052, + "num_input_tokens_seen": 81130880, + "step": 25785 + }, + { + "epoch": 1.650982651558799, + "grad_norm": 8.493541717529297, + "learning_rate": 4.235522162976234e-06, + "loss": 0.3187, + "num_input_tokens_seen": 81146176, + "step": 25790 + }, + { + "epoch": 1.6513027334997759, + "grad_norm": 23.880329132080078, + "learning_rate": 4.235120016429945e-06, + "loss": 0.2374, + "num_input_tokens_seen": 81161472, + "step": 25795 + }, + { + "epoch": 1.6516228154407528, + "grad_norm": 15.570444107055664, + "learning_rate": 4.234717783240798e-06, + "loss": 0.3276, + "num_input_tokens_seen": 81177152, + "step": 25800 + }, + { + "epoch": 1.6519428973817298, + "grad_norm": 17.82489776611328, + "learning_rate": 4.234315463428881e-06, + "loss": 0.2702, + "num_input_tokens_seen": 81194688, + "step": 25805 + }, + { + "epoch": 1.6522629793227066, + "grad_norm": 28.69721794128418, + "learning_rate": 4.233913057014284e-06, + "loss": 0.2992, + "num_input_tokens_seen": 81210944, + "step": 25810 + }, + { + "epoch": 1.6525830612636834, + "grad_norm": 25.486574172973633, + "learning_rate": 4.233510564017101e-06, + "loss": 0.3213, + "num_input_tokens_seen": 81226624, + "step": 25815 + }, + { + "epoch": 1.6529031432046604, + "grad_norm": 33.60140609741211, + "learning_rate": 4.2331079844574295e-06, + "loss": 0.3132, + "num_input_tokens_seen": 81244224, + "step": 25820 + }, + { + "epoch": 1.6532232251456374, + "grad_norm": 12.84620189666748, + "learning_rate": 4.232705318355372e-06, + "loss": 0.2685, + "num_input_tokens_seen": 81259072, + "step": 25825 + }, + { + "epoch": 1.6535433070866141, + "grad_norm": 21.518829345703125, + "learning_rate": 4.232302565731037e-06, + "loss": 0.241, + "num_input_tokens_seen": 81273856, + "step": 25830 + }, + { + "epoch": 1.6538633890275911, + "grad_norm": 27.325992584228516, + "learning_rate": 4.231899726604534e-06, + "loss": 0.39, + "num_input_tokens_seen": 81288896, + "step": 25835 + }, + { + "epoch": 1.6541834709685679, + "grad_norm": 43.06864547729492, + "learning_rate": 4.2314968009959815e-06, + "loss": 0.3298, + "num_input_tokens_seen": 81304448, + "step": 25840 + }, + { + "epoch": 1.6545035529095449, + "grad_norm": 15.89332103729248, + "learning_rate": 4.231093788925497e-06, + "loss": 0.3232, + "num_input_tokens_seen": 81320576, + "step": 25845 + }, + { + "epoch": 1.6548236348505219, + "grad_norm": 17.195018768310547, + "learning_rate": 4.2306906904132054e-06, + "loss": 0.4215, + "num_input_tokens_seen": 81337152, + "step": 25850 + }, + { + "epoch": 1.6551437167914986, + "grad_norm": 15.840431213378906, + "learning_rate": 4.230287505479236e-06, + "loss": 0.3967, + "num_input_tokens_seen": 81352896, + "step": 25855 + }, + { + "epoch": 1.6554637987324754, + "grad_norm": 8.246745109558105, + "learning_rate": 4.2298842341437204e-06, + "loss": 0.3677, + "num_input_tokens_seen": 81367488, + "step": 25860 + }, + { + "epoch": 1.6557838806734524, + "grad_norm": 25.386472702026367, + "learning_rate": 4.229480876426798e-06, + "loss": 0.3216, + "num_input_tokens_seen": 81382528, + "step": 25865 + }, + { + "epoch": 1.6561039626144294, + "grad_norm": 25.602479934692383, + "learning_rate": 4.2290774323486084e-06, + "loss": 0.325, + "num_input_tokens_seen": 81398272, + "step": 25870 + }, + { + "epoch": 1.6564240445554061, + "grad_norm": 8.033842086791992, + "learning_rate": 4.228673901929298e-06, + "loss": 0.2839, + "num_input_tokens_seen": 81412544, + "step": 25875 + }, + { + "epoch": 1.656744126496383, + "grad_norm": 20.786035537719727, + "learning_rate": 4.228270285189019e-06, + "loss": 0.2743, + "num_input_tokens_seen": 81427968, + "step": 25880 + }, + { + "epoch": 1.65706420843736, + "grad_norm": 13.435007095336914, + "learning_rate": 4.227866582147922e-06, + "loss": 0.4283, + "num_input_tokens_seen": 81442368, + "step": 25885 + }, + { + "epoch": 1.657384290378337, + "grad_norm": 21.54793930053711, + "learning_rate": 4.22746279282617e-06, + "loss": 0.3572, + "num_input_tokens_seen": 81457280, + "step": 25890 + }, + { + "epoch": 1.657704372319314, + "grad_norm": 15.598650932312012, + "learning_rate": 4.227058917243922e-06, + "loss": 0.5254, + "num_input_tokens_seen": 81473280, + "step": 25895 + }, + { + "epoch": 1.6580244542602907, + "grad_norm": 22.29108238220215, + "learning_rate": 4.226654955421349e-06, + "loss": 0.3787, + "num_input_tokens_seen": 81489600, + "step": 25900 + }, + { + "epoch": 1.6583445362012674, + "grad_norm": 12.237943649291992, + "learning_rate": 4.226250907378622e-06, + "loss": 0.4178, + "num_input_tokens_seen": 81505792, + "step": 25905 + }, + { + "epoch": 1.6586646181422444, + "grad_norm": 12.91377067565918, + "learning_rate": 4.2258467731359145e-06, + "loss": 0.3319, + "num_input_tokens_seen": 81521536, + "step": 25910 + }, + { + "epoch": 1.6589847000832214, + "grad_norm": 21.505874633789062, + "learning_rate": 4.2254425527134105e-06, + "loss": 0.3064, + "num_input_tokens_seen": 81537024, + "step": 25915 + }, + { + "epoch": 1.6593047820241982, + "grad_norm": 21.334518432617188, + "learning_rate": 4.225038246131292e-06, + "loss": 0.4137, + "num_input_tokens_seen": 81552448, + "step": 25920 + }, + { + "epoch": 1.659624863965175, + "grad_norm": 27.068391799926758, + "learning_rate": 4.224633853409749e-06, + "loss": 0.2729, + "num_input_tokens_seen": 81567808, + "step": 25925 + }, + { + "epoch": 1.659944945906152, + "grad_norm": 31.802915573120117, + "learning_rate": 4.224229374568974e-06, + "loss": 0.4086, + "num_input_tokens_seen": 81584320, + "step": 25930 + }, + { + "epoch": 1.660265027847129, + "grad_norm": 26.450586318969727, + "learning_rate": 4.2238248096291666e-06, + "loss": 0.3573, + "num_input_tokens_seen": 81599168, + "step": 25935 + }, + { + "epoch": 1.660585109788106, + "grad_norm": 9.814793586730957, + "learning_rate": 4.223420158610526e-06, + "loss": 0.2321, + "num_input_tokens_seen": 81614720, + "step": 25940 + }, + { + "epoch": 1.6609051917290827, + "grad_norm": 19.72450065612793, + "learning_rate": 4.223015421533261e-06, + "loss": 0.4207, + "num_input_tokens_seen": 81629888, + "step": 25945 + }, + { + "epoch": 1.6612252736700595, + "grad_norm": 28.520124435424805, + "learning_rate": 4.222610598417579e-06, + "loss": 0.526, + "num_input_tokens_seen": 81645184, + "step": 25950 + }, + { + "epoch": 1.6615453556110364, + "grad_norm": 21.88719940185547, + "learning_rate": 4.222205689283698e-06, + "loss": 0.289, + "num_input_tokens_seen": 81660928, + "step": 25955 + }, + { + "epoch": 1.6618654375520134, + "grad_norm": 52.30413055419922, + "learning_rate": 4.221800694151835e-06, + "loss": 0.3843, + "num_input_tokens_seen": 81677312, + "step": 25960 + }, + { + "epoch": 1.6621855194929902, + "grad_norm": 17.123769760131836, + "learning_rate": 4.221395613042214e-06, + "loss": 0.4712, + "num_input_tokens_seen": 81692928, + "step": 25965 + }, + { + "epoch": 1.662505601433967, + "grad_norm": 14.3340482711792, + "learning_rate": 4.220990445975062e-06, + "loss": 0.3219, + "num_input_tokens_seen": 81708800, + "step": 25970 + }, + { + "epoch": 1.662825683374944, + "grad_norm": 23.998071670532227, + "learning_rate": 4.220585192970611e-06, + "loss": 0.3933, + "num_input_tokens_seen": 81724224, + "step": 25975 + }, + { + "epoch": 1.663145765315921, + "grad_norm": 26.793546676635742, + "learning_rate": 4.220179854049099e-06, + "loss": 0.4215, + "num_input_tokens_seen": 81740160, + "step": 25980 + }, + { + "epoch": 1.6634658472568977, + "grad_norm": 58.40319061279297, + "learning_rate": 4.219774429230765e-06, + "loss": 0.3602, + "num_input_tokens_seen": 81754560, + "step": 25985 + }, + { + "epoch": 1.6637859291978745, + "grad_norm": 17.825092315673828, + "learning_rate": 4.219368918535853e-06, + "loss": 0.3129, + "num_input_tokens_seen": 81769856, + "step": 25990 + }, + { + "epoch": 1.6641060111388515, + "grad_norm": 23.91872787475586, + "learning_rate": 4.218963321984614e-06, + "loss": 0.2341, + "num_input_tokens_seen": 81785344, + "step": 25995 + }, + { + "epoch": 1.6644260930798285, + "grad_norm": 49.86982727050781, + "learning_rate": 4.2185576395973e-06, + "loss": 0.4718, + "num_input_tokens_seen": 81801920, + "step": 26000 + }, + { + "epoch": 1.6647461750208055, + "grad_norm": 14.090886116027832, + "learning_rate": 4.2181518713941694e-06, + "loss": 0.2502, + "num_input_tokens_seen": 81818368, + "step": 26005 + }, + { + "epoch": 1.6650662569617822, + "grad_norm": 10.523778915405273, + "learning_rate": 4.217746017395483e-06, + "loss": 0.3281, + "num_input_tokens_seen": 81833728, + "step": 26010 + }, + { + "epoch": 1.665386338902759, + "grad_norm": 16.00431251525879, + "learning_rate": 4.217340077621509e-06, + "loss": 0.2023, + "num_input_tokens_seen": 81848960, + "step": 26015 + }, + { + "epoch": 1.665706420843736, + "grad_norm": 14.722018241882324, + "learning_rate": 4.216934052092516e-06, + "loss": 0.348, + "num_input_tokens_seen": 81865280, + "step": 26020 + }, + { + "epoch": 1.666026502784713, + "grad_norm": 8.791991233825684, + "learning_rate": 4.2165279408287806e-06, + "loss": 0.3051, + "num_input_tokens_seen": 81880896, + "step": 26025 + }, + { + "epoch": 1.6663465847256898, + "grad_norm": 25.679933547973633, + "learning_rate": 4.2161217438505794e-06, + "loss": 0.3356, + "num_input_tokens_seen": 81896320, + "step": 26030 + }, + { + "epoch": 1.6666666666666665, + "grad_norm": 13.850444793701172, + "learning_rate": 4.215715461178199e-06, + "loss": 0.3459, + "num_input_tokens_seen": 81912320, + "step": 26035 + }, + { + "epoch": 1.6669867486076435, + "grad_norm": 7.36842679977417, + "learning_rate": 4.215309092831925e-06, + "loss": 0.2861, + "num_input_tokens_seen": 81928448, + "step": 26040 + }, + { + "epoch": 1.6673068305486205, + "grad_norm": 30.720096588134766, + "learning_rate": 4.214902638832049e-06, + "loss": 0.3841, + "num_input_tokens_seen": 81944000, + "step": 26045 + }, + { + "epoch": 1.6676269124895975, + "grad_norm": 38.260414123535156, + "learning_rate": 4.214496099198868e-06, + "loss": 0.277, + "num_input_tokens_seen": 81961216, + "step": 26050 + }, + { + "epoch": 1.6679469944305743, + "grad_norm": 33.32408142089844, + "learning_rate": 4.214089473952682e-06, + "loss": 0.3328, + "num_input_tokens_seen": 81977024, + "step": 26055 + }, + { + "epoch": 1.668267076371551, + "grad_norm": 15.771416664123535, + "learning_rate": 4.213682763113796e-06, + "loss": 0.2846, + "num_input_tokens_seen": 81992768, + "step": 26060 + }, + { + "epoch": 1.668587158312528, + "grad_norm": 18.19325828552246, + "learning_rate": 4.213275966702519e-06, + "loss": 0.2839, + "num_input_tokens_seen": 82009856, + "step": 26065 + }, + { + "epoch": 1.668907240253505, + "grad_norm": 67.19104766845703, + "learning_rate": 4.212869084739165e-06, + "loss": 0.3912, + "num_input_tokens_seen": 82024960, + "step": 26070 + }, + { + "epoch": 1.6692273221944818, + "grad_norm": 15.484397888183594, + "learning_rate": 4.21246211724405e-06, + "loss": 0.3148, + "num_input_tokens_seen": 82040960, + "step": 26075 + }, + { + "epoch": 1.6695474041354585, + "grad_norm": 23.803321838378906, + "learning_rate": 4.212055064237498e-06, + "loss": 0.3879, + "num_input_tokens_seen": 82056896, + "step": 26080 + }, + { + "epoch": 1.6698674860764355, + "grad_norm": 16.28951072692871, + "learning_rate": 4.211647925739834e-06, + "loss": 0.3053, + "num_input_tokens_seen": 82073600, + "step": 26085 + }, + { + "epoch": 1.6701875680174125, + "grad_norm": 13.28773021697998, + "learning_rate": 4.211240701771387e-06, + "loss": 0.2234, + "num_input_tokens_seen": 82088640, + "step": 26090 + }, + { + "epoch": 1.6705076499583893, + "grad_norm": 28.871356964111328, + "learning_rate": 4.210833392352493e-06, + "loss": 0.3423, + "num_input_tokens_seen": 82103744, + "step": 26095 + }, + { + "epoch": 1.6708277318993663, + "grad_norm": 45.5228157043457, + "learning_rate": 4.210425997503491e-06, + "loss": 0.3086, + "num_input_tokens_seen": 82119040, + "step": 26100 + }, + { + "epoch": 1.671147813840343, + "grad_norm": 11.523247718811035, + "learning_rate": 4.2100185172447236e-06, + "loss": 0.3197, + "num_input_tokens_seen": 82137408, + "step": 26105 + }, + { + "epoch": 1.67146789578132, + "grad_norm": 15.444009780883789, + "learning_rate": 4.20961095159654e-06, + "loss": 0.4827, + "num_input_tokens_seen": 82153152, + "step": 26110 + }, + { + "epoch": 1.671787977722297, + "grad_norm": 21.888280868530273, + "learning_rate": 4.209203300579289e-06, + "loss": 0.2823, + "num_input_tokens_seen": 82169024, + "step": 26115 + }, + { + "epoch": 1.6721080596632738, + "grad_norm": 31.689908981323242, + "learning_rate": 4.208795564213329e-06, + "loss": 0.4166, + "num_input_tokens_seen": 82184576, + "step": 26120 + }, + { + "epoch": 1.6724281416042506, + "grad_norm": 17.169593811035156, + "learning_rate": 4.208387742519019e-06, + "loss": 0.3562, + "num_input_tokens_seen": 82199232, + "step": 26125 + }, + { + "epoch": 1.6727482235452276, + "grad_norm": 53.02042007446289, + "learning_rate": 4.207979835516724e-06, + "loss": 0.4136, + "num_input_tokens_seen": 82215744, + "step": 26130 + }, + { + "epoch": 1.6730683054862046, + "grad_norm": 8.976370811462402, + "learning_rate": 4.207571843226812e-06, + "loss": 0.3964, + "num_input_tokens_seen": 82230720, + "step": 26135 + }, + { + "epoch": 1.6733883874271813, + "grad_norm": 11.07219409942627, + "learning_rate": 4.207163765669657e-06, + "loss": 0.2845, + "num_input_tokens_seen": 82248256, + "step": 26140 + }, + { + "epoch": 1.673708469368158, + "grad_norm": 11.372210502624512, + "learning_rate": 4.206755602865635e-06, + "loss": 0.3653, + "num_input_tokens_seen": 82263488, + "step": 26145 + }, + { + "epoch": 1.674028551309135, + "grad_norm": 22.791460037231445, + "learning_rate": 4.20634735483513e-06, + "loss": 0.3548, + "num_input_tokens_seen": 82279360, + "step": 26150 + }, + { + "epoch": 1.674348633250112, + "grad_norm": 12.849616050720215, + "learning_rate": 4.205939021598525e-06, + "loss": 0.295, + "num_input_tokens_seen": 82295104, + "step": 26155 + }, + { + "epoch": 1.674668715191089, + "grad_norm": 9.313014030456543, + "learning_rate": 4.205530603176212e-06, + "loss": 0.2918, + "num_input_tokens_seen": 82311104, + "step": 26160 + }, + { + "epoch": 1.6749887971320658, + "grad_norm": 51.18553924560547, + "learning_rate": 4.205122099588583e-06, + "loss": 0.313, + "num_input_tokens_seen": 82326656, + "step": 26165 + }, + { + "epoch": 1.6753088790730426, + "grad_norm": 16.979562759399414, + "learning_rate": 4.204713510856039e-06, + "loss": 0.2993, + "num_input_tokens_seen": 82343168, + "step": 26170 + }, + { + "epoch": 1.6756289610140196, + "grad_norm": 24.123416900634766, + "learning_rate": 4.204304836998983e-06, + "loss": 0.3222, + "num_input_tokens_seen": 82358208, + "step": 26175 + }, + { + "epoch": 1.6759490429549966, + "grad_norm": 18.77113151550293, + "learning_rate": 4.203896078037819e-06, + "loss": 0.3878, + "num_input_tokens_seen": 82372928, + "step": 26180 + }, + { + "epoch": 1.6762691248959734, + "grad_norm": 17.467958450317383, + "learning_rate": 4.203487233992961e-06, + "loss": 0.3604, + "num_input_tokens_seen": 82388992, + "step": 26185 + }, + { + "epoch": 1.6765892068369501, + "grad_norm": 26.776355743408203, + "learning_rate": 4.203078304884823e-06, + "loss": 0.2632, + "num_input_tokens_seen": 82404416, + "step": 26190 + }, + { + "epoch": 1.676909288777927, + "grad_norm": 23.157114028930664, + "learning_rate": 4.202669290733825e-06, + "loss": 0.3213, + "num_input_tokens_seen": 82419072, + "step": 26195 + }, + { + "epoch": 1.677229370718904, + "grad_norm": 28.059236526489258, + "learning_rate": 4.202260191560393e-06, + "loss": 0.4798, + "num_input_tokens_seen": 82435200, + "step": 26200 + }, + { + "epoch": 1.677549452659881, + "grad_norm": 50.19104766845703, + "learning_rate": 4.201851007384953e-06, + "loss": 0.4395, + "num_input_tokens_seen": 82450752, + "step": 26205 + }, + { + "epoch": 1.6778695346008579, + "grad_norm": 11.711919784545898, + "learning_rate": 4.201441738227938e-06, + "loss": 0.3904, + "num_input_tokens_seen": 82465792, + "step": 26210 + }, + { + "epoch": 1.6781896165418346, + "grad_norm": 31.402544021606445, + "learning_rate": 4.2010323841097855e-06, + "loss": 0.2622, + "num_input_tokens_seen": 82481216, + "step": 26215 + }, + { + "epoch": 1.6785096984828116, + "grad_norm": 23.526926040649414, + "learning_rate": 4.200622945050936e-06, + "loss": 0.3196, + "num_input_tokens_seen": 82496640, + "step": 26220 + }, + { + "epoch": 1.6788297804237886, + "grad_norm": 12.272912979125977, + "learning_rate": 4.200213421071836e-06, + "loss": 0.3211, + "num_input_tokens_seen": 82513600, + "step": 26225 + }, + { + "epoch": 1.6791498623647654, + "grad_norm": 48.38984680175781, + "learning_rate": 4.199803812192932e-06, + "loss": 0.313, + "num_input_tokens_seen": 82528384, + "step": 26230 + }, + { + "epoch": 1.6794699443057421, + "grad_norm": 23.239788055419922, + "learning_rate": 4.199394118434681e-06, + "loss": 0.3087, + "num_input_tokens_seen": 82543552, + "step": 26235 + }, + { + "epoch": 1.6797900262467191, + "grad_norm": 17.303709030151367, + "learning_rate": 4.198984339817538e-06, + "loss": 0.4927, + "num_input_tokens_seen": 82560256, + "step": 26240 + }, + { + "epoch": 1.6801101081876961, + "grad_norm": 12.297698974609375, + "learning_rate": 4.198574476361969e-06, + "loss": 0.3095, + "num_input_tokens_seen": 82576320, + "step": 26245 + }, + { + "epoch": 1.680430190128673, + "grad_norm": 29.94466209411621, + "learning_rate": 4.198164528088436e-06, + "loss": 0.3584, + "num_input_tokens_seen": 82593280, + "step": 26250 + }, + { + "epoch": 1.6807502720696497, + "grad_norm": 18.83812713623047, + "learning_rate": 4.197754495017413e-06, + "loss": 0.2621, + "num_input_tokens_seen": 82608832, + "step": 26255 + }, + { + "epoch": 1.6810703540106267, + "grad_norm": 16.112049102783203, + "learning_rate": 4.1973443771693735e-06, + "loss": 0.2594, + "num_input_tokens_seen": 82625024, + "step": 26260 + }, + { + "epoch": 1.6813904359516036, + "grad_norm": 10.749310493469238, + "learning_rate": 4.1969341745647966e-06, + "loss": 0.3151, + "num_input_tokens_seen": 82640896, + "step": 26265 + }, + { + "epoch": 1.6817105178925806, + "grad_norm": 35.34461975097656, + "learning_rate": 4.196523887224167e-06, + "loss": 0.4024, + "num_input_tokens_seen": 82656960, + "step": 26270 + }, + { + "epoch": 1.6820305998335574, + "grad_norm": 39.62909698486328, + "learning_rate": 4.1961135151679695e-06, + "loss": 0.3083, + "num_input_tokens_seen": 82673024, + "step": 26275 + }, + { + "epoch": 1.6823506817745342, + "grad_norm": 17.992687225341797, + "learning_rate": 4.195703058416701e-06, + "loss": 0.4133, + "num_input_tokens_seen": 82689920, + "step": 26280 + }, + { + "epoch": 1.6826707637155112, + "grad_norm": 37.68491744995117, + "learning_rate": 4.195292516990851e-06, + "loss": 0.4182, + "num_input_tokens_seen": 82705152, + "step": 26285 + }, + { + "epoch": 1.6829908456564882, + "grad_norm": 23.18842315673828, + "learning_rate": 4.194881890910924e-06, + "loss": 0.3226, + "num_input_tokens_seen": 82720448, + "step": 26290 + }, + { + "epoch": 1.683310927597465, + "grad_norm": 16.857284545898438, + "learning_rate": 4.194471180197425e-06, + "loss": 0.3272, + "num_input_tokens_seen": 82737536, + "step": 26295 + }, + { + "epoch": 1.6836310095384417, + "grad_norm": 27.90546989440918, + "learning_rate": 4.1940603848708605e-06, + "loss": 0.5357, + "num_input_tokens_seen": 82752896, + "step": 26300 + }, + { + "epoch": 1.6839510914794187, + "grad_norm": 28.47435760498047, + "learning_rate": 4.193649504951745e-06, + "loss": 0.3715, + "num_input_tokens_seen": 82769920, + "step": 26305 + }, + { + "epoch": 1.6842711734203957, + "grad_norm": 40.33957290649414, + "learning_rate": 4.193238540460595e-06, + "loss": 0.3821, + "num_input_tokens_seen": 82785728, + "step": 26310 + }, + { + "epoch": 1.6845912553613727, + "grad_norm": 20.447261810302734, + "learning_rate": 4.192827491417931e-06, + "loss": 0.306, + "num_input_tokens_seen": 82801920, + "step": 26315 + }, + { + "epoch": 1.6849113373023494, + "grad_norm": 10.764215469360352, + "learning_rate": 4.192416357844281e-06, + "loss": 0.3343, + "num_input_tokens_seen": 82817216, + "step": 26320 + }, + { + "epoch": 1.6852314192433262, + "grad_norm": 26.77023696899414, + "learning_rate": 4.192005139760172e-06, + "loss": 0.2941, + "num_input_tokens_seen": 82832576, + "step": 26325 + }, + { + "epoch": 1.6855515011843032, + "grad_norm": 31.505266189575195, + "learning_rate": 4.191593837186142e-06, + "loss": 0.3261, + "num_input_tokens_seen": 82848448, + "step": 26330 + }, + { + "epoch": 1.6858715831252802, + "grad_norm": 13.514432907104492, + "learning_rate": 4.191182450142725e-06, + "loss": 0.3725, + "num_input_tokens_seen": 82863936, + "step": 26335 + }, + { + "epoch": 1.686191665066257, + "grad_norm": 39.746395111083984, + "learning_rate": 4.190770978650466e-06, + "loss": 0.3422, + "num_input_tokens_seen": 82880192, + "step": 26340 + }, + { + "epoch": 1.6865117470072337, + "grad_norm": 22.8271427154541, + "learning_rate": 4.190359422729911e-06, + "loss": 0.2793, + "num_input_tokens_seen": 82894656, + "step": 26345 + }, + { + "epoch": 1.6868318289482107, + "grad_norm": 13.536922454833984, + "learning_rate": 4.189947782401612e-06, + "loss": 0.2528, + "num_input_tokens_seen": 82910016, + "step": 26350 + }, + { + "epoch": 1.6871519108891877, + "grad_norm": 6.9963459968566895, + "learning_rate": 4.189536057686123e-06, + "loss": 0.3624, + "num_input_tokens_seen": 82925120, + "step": 26355 + }, + { + "epoch": 1.6874719928301645, + "grad_norm": 23.580345153808594, + "learning_rate": 4.189124248604004e-06, + "loss": 0.3493, + "num_input_tokens_seen": 82941184, + "step": 26360 + }, + { + "epoch": 1.6877920747711415, + "grad_norm": 29.82427978515625, + "learning_rate": 4.188712355175818e-06, + "loss": 0.3695, + "num_input_tokens_seen": 82956736, + "step": 26365 + }, + { + "epoch": 1.6881121567121182, + "grad_norm": 24.74789047241211, + "learning_rate": 4.1883003774221335e-06, + "loss": 0.2668, + "num_input_tokens_seen": 82971200, + "step": 26370 + }, + { + "epoch": 1.6884322386530952, + "grad_norm": 24.564922332763672, + "learning_rate": 4.1878883153635205e-06, + "loss": 0.3116, + "num_input_tokens_seen": 82987136, + "step": 26375 + }, + { + "epoch": 1.6887523205940722, + "grad_norm": 16.606468200683594, + "learning_rate": 4.187476169020559e-06, + "loss": 0.3113, + "num_input_tokens_seen": 83003392, + "step": 26380 + }, + { + "epoch": 1.689072402535049, + "grad_norm": 19.94938087463379, + "learning_rate": 4.187063938413827e-06, + "loss": 0.4604, + "num_input_tokens_seen": 83018624, + "step": 26385 + }, + { + "epoch": 1.6893924844760257, + "grad_norm": 5.1412458419799805, + "learning_rate": 4.186651623563908e-06, + "loss": 0.3172, + "num_input_tokens_seen": 83033408, + "step": 26390 + }, + { + "epoch": 1.6897125664170027, + "grad_norm": 32.25971603393555, + "learning_rate": 4.186239224491394e-06, + "loss": 0.3299, + "num_input_tokens_seen": 83051520, + "step": 26395 + }, + { + "epoch": 1.6900326483579797, + "grad_norm": 50.52603530883789, + "learning_rate": 4.185826741216876e-06, + "loss": 0.2201, + "num_input_tokens_seen": 83066816, + "step": 26400 + }, + { + "epoch": 1.6903527302989565, + "grad_norm": 21.98740005493164, + "learning_rate": 4.185414173760952e-06, + "loss": 0.3626, + "num_input_tokens_seen": 83083136, + "step": 26405 + }, + { + "epoch": 1.6906728122399333, + "grad_norm": 12.290205001831055, + "learning_rate": 4.185001522144223e-06, + "loss": 0.3798, + "num_input_tokens_seen": 83097728, + "step": 26410 + }, + { + "epoch": 1.6909928941809103, + "grad_norm": 26.083904266357422, + "learning_rate": 4.184588786387295e-06, + "loss": 0.3527, + "num_input_tokens_seen": 83112704, + "step": 26415 + }, + { + "epoch": 1.6913129761218872, + "grad_norm": 29.745601654052734, + "learning_rate": 4.184175966510777e-06, + "loss": 0.2633, + "num_input_tokens_seen": 83128064, + "step": 26420 + }, + { + "epoch": 1.6916330580628642, + "grad_norm": 51.212642669677734, + "learning_rate": 4.183763062535285e-06, + "loss": 0.5295, + "num_input_tokens_seen": 83144384, + "step": 26425 + }, + { + "epoch": 1.691953140003841, + "grad_norm": 94.00310516357422, + "learning_rate": 4.183350074481434e-06, + "loss": 0.4544, + "num_input_tokens_seen": 83159168, + "step": 26430 + }, + { + "epoch": 1.6922732219448178, + "grad_norm": 42.81989288330078, + "learning_rate": 4.18293700236985e-06, + "loss": 0.292, + "num_input_tokens_seen": 83174720, + "step": 26435 + }, + { + "epoch": 1.6925933038857948, + "grad_norm": 11.408416748046875, + "learning_rate": 4.182523846221158e-06, + "loss": 0.3355, + "num_input_tokens_seen": 83189504, + "step": 26440 + }, + { + "epoch": 1.6929133858267718, + "grad_norm": 28.265365600585938, + "learning_rate": 4.182110606055989e-06, + "loss": 0.444, + "num_input_tokens_seen": 83204992, + "step": 26445 + }, + { + "epoch": 1.6932334677677485, + "grad_norm": 16.701330184936523, + "learning_rate": 4.181697281894978e-06, + "loss": 0.3017, + "num_input_tokens_seen": 83221056, + "step": 26450 + }, + { + "epoch": 1.6935535497087253, + "grad_norm": 36.87248229980469, + "learning_rate": 4.181283873758765e-06, + "loss": 0.4095, + "num_input_tokens_seen": 83237568, + "step": 26455 + }, + { + "epoch": 1.6938736316497023, + "grad_norm": 15.564862251281738, + "learning_rate": 4.1808703816679915e-06, + "loss": 0.3218, + "num_input_tokens_seen": 83254144, + "step": 26460 + }, + { + "epoch": 1.6941937135906793, + "grad_norm": 18.38157081604004, + "learning_rate": 4.180456805643308e-06, + "loss": 0.3484, + "num_input_tokens_seen": 83269056, + "step": 26465 + }, + { + "epoch": 1.6945137955316563, + "grad_norm": 37.14979553222656, + "learning_rate": 4.180043145705363e-06, + "loss": 0.4254, + "num_input_tokens_seen": 83285504, + "step": 26470 + }, + { + "epoch": 1.694833877472633, + "grad_norm": 20.796449661254883, + "learning_rate": 4.1796294018748165e-06, + "loss": 0.3334, + "num_input_tokens_seen": 83301248, + "step": 26475 + }, + { + "epoch": 1.6951539594136098, + "grad_norm": 39.576351165771484, + "learning_rate": 4.179215574172325e-06, + "loss": 0.4201, + "num_input_tokens_seen": 83316864, + "step": 26480 + }, + { + "epoch": 1.6954740413545868, + "grad_norm": 40.08574676513672, + "learning_rate": 4.178801662618555e-06, + "loss": 0.3501, + "num_input_tokens_seen": 83332480, + "step": 26485 + }, + { + "epoch": 1.6957941232955638, + "grad_norm": 30.685523986816406, + "learning_rate": 4.178387667234174e-06, + "loss": 0.311, + "num_input_tokens_seen": 83348224, + "step": 26490 + }, + { + "epoch": 1.6961142052365406, + "grad_norm": 18.653594970703125, + "learning_rate": 4.177973588039856e-06, + "loss": 0.4381, + "num_input_tokens_seen": 83363776, + "step": 26495 + }, + { + "epoch": 1.6964342871775173, + "grad_norm": 57.900909423828125, + "learning_rate": 4.177559425056277e-06, + "loss": 0.3925, + "num_input_tokens_seen": 83380800, + "step": 26500 + }, + { + "epoch": 1.6967543691184943, + "grad_norm": 14.424958229064941, + "learning_rate": 4.177145178304118e-06, + "loss": 0.4818, + "num_input_tokens_seen": 83397632, + "step": 26505 + }, + { + "epoch": 1.6970744510594713, + "grad_norm": 71.64070129394531, + "learning_rate": 4.1767308478040655e-06, + "loss": 0.4123, + "num_input_tokens_seen": 83412352, + "step": 26510 + }, + { + "epoch": 1.697394533000448, + "grad_norm": 13.00489616394043, + "learning_rate": 4.176316433576808e-06, + "loss": 0.3334, + "num_input_tokens_seen": 83427584, + "step": 26515 + }, + { + "epoch": 1.697714614941425, + "grad_norm": 10.613883972167969, + "learning_rate": 4.1759019356430395e-06, + "loss": 0.2709, + "num_input_tokens_seen": 83442240, + "step": 26520 + }, + { + "epoch": 1.6980346968824018, + "grad_norm": 29.012603759765625, + "learning_rate": 4.1754873540234575e-06, + "loss": 0.355, + "num_input_tokens_seen": 83456640, + "step": 26525 + }, + { + "epoch": 1.6983547788233788, + "grad_norm": 21.956510543823242, + "learning_rate": 4.175072688738765e-06, + "loss": 0.2774, + "num_input_tokens_seen": 83472512, + "step": 26530 + }, + { + "epoch": 1.6986748607643558, + "grad_norm": 10.439676284790039, + "learning_rate": 4.174657939809666e-06, + "loss": 0.3193, + "num_input_tokens_seen": 83488768, + "step": 26535 + }, + { + "epoch": 1.6989949427053326, + "grad_norm": 11.812192916870117, + "learning_rate": 4.174243107256874e-06, + "loss": 0.3276, + "num_input_tokens_seen": 83504512, + "step": 26540 + }, + { + "epoch": 1.6993150246463093, + "grad_norm": 15.986824989318848, + "learning_rate": 4.173828191101101e-06, + "loss": 0.3254, + "num_input_tokens_seen": 83519808, + "step": 26545 + }, + { + "epoch": 1.6996351065872863, + "grad_norm": 19.138526916503906, + "learning_rate": 4.173413191363068e-06, + "loss": 0.4312, + "num_input_tokens_seen": 83535104, + "step": 26550 + }, + { + "epoch": 1.6999551885282633, + "grad_norm": 10.587177276611328, + "learning_rate": 4.172998108063495e-06, + "loss": 0.3073, + "num_input_tokens_seen": 83550784, + "step": 26555 + }, + { + "epoch": 1.70027527046924, + "grad_norm": 52.70855712890625, + "learning_rate": 4.172582941223112e-06, + "loss": 0.3819, + "num_input_tokens_seen": 83567296, + "step": 26560 + }, + { + "epoch": 1.7005953524102169, + "grad_norm": 21.115962982177734, + "learning_rate": 4.17216769086265e-06, + "loss": 0.35, + "num_input_tokens_seen": 83582144, + "step": 26565 + }, + { + "epoch": 1.7009154343511939, + "grad_norm": 24.974767684936523, + "learning_rate": 4.171752357002842e-06, + "loss": 0.2856, + "num_input_tokens_seen": 83597952, + "step": 26570 + }, + { + "epoch": 1.7012355162921708, + "grad_norm": 26.90996551513672, + "learning_rate": 4.171336939664429e-06, + "loss": 0.4236, + "num_input_tokens_seen": 83614080, + "step": 26575 + }, + { + "epoch": 1.7015555982331478, + "grad_norm": 17.232328414916992, + "learning_rate": 4.1709214388681565e-06, + "loss": 0.2606, + "num_input_tokens_seen": 83629568, + "step": 26580 + }, + { + "epoch": 1.7018756801741246, + "grad_norm": 31.720050811767578, + "learning_rate": 4.1705058546347694e-06, + "loss": 0.4471, + "num_input_tokens_seen": 83645440, + "step": 26585 + }, + { + "epoch": 1.7021957621151014, + "grad_norm": 22.65837860107422, + "learning_rate": 4.170090186985022e-06, + "loss": 0.4335, + "num_input_tokens_seen": 83660864, + "step": 26590 + }, + { + "epoch": 1.7025158440560784, + "grad_norm": 22.20380210876465, + "learning_rate": 4.169674435939669e-06, + "loss": 0.3545, + "num_input_tokens_seen": 83675840, + "step": 26595 + }, + { + "epoch": 1.7028359259970554, + "grad_norm": 16.663606643676758, + "learning_rate": 4.169258601519473e-06, + "loss": 0.43, + "num_input_tokens_seen": 83692672, + "step": 26600 + }, + { + "epoch": 1.7031560079380321, + "grad_norm": 14.191535949707031, + "learning_rate": 4.168842683745196e-06, + "loss": 0.2911, + "num_input_tokens_seen": 83709376, + "step": 26605 + }, + { + "epoch": 1.703476089879009, + "grad_norm": 58.174652099609375, + "learning_rate": 4.168426682637609e-06, + "loss": 0.3119, + "num_input_tokens_seen": 83726976, + "step": 26610 + }, + { + "epoch": 1.7037961718199859, + "grad_norm": 12.234491348266602, + "learning_rate": 4.168010598217482e-06, + "loss": 0.2443, + "num_input_tokens_seen": 83741888, + "step": 26615 + }, + { + "epoch": 1.7041162537609629, + "grad_norm": 41.86488342285156, + "learning_rate": 4.167594430505596e-06, + "loss": 0.4303, + "num_input_tokens_seen": 83757056, + "step": 26620 + }, + { + "epoch": 1.7044363357019396, + "grad_norm": 67.34346771240234, + "learning_rate": 4.1671781795227305e-06, + "loss": 0.3313, + "num_input_tokens_seen": 83773632, + "step": 26625 + }, + { + "epoch": 1.7047564176429166, + "grad_norm": 6.887158393859863, + "learning_rate": 4.16676184528967e-06, + "loss": 0.3153, + "num_input_tokens_seen": 83789312, + "step": 26630 + }, + { + "epoch": 1.7050764995838934, + "grad_norm": 13.813220977783203, + "learning_rate": 4.1663454278272056e-06, + "loss": 0.4139, + "num_input_tokens_seen": 83805312, + "step": 26635 + }, + { + "epoch": 1.7053965815248704, + "grad_norm": 42.362274169921875, + "learning_rate": 4.165928927156129e-06, + "loss": 0.3763, + "num_input_tokens_seen": 83821504, + "step": 26640 + }, + { + "epoch": 1.7057166634658474, + "grad_norm": 13.856204986572266, + "learning_rate": 4.16551234329724e-06, + "loss": 0.3256, + "num_input_tokens_seen": 83837440, + "step": 26645 + }, + { + "epoch": 1.7060367454068242, + "grad_norm": 40.01100540161133, + "learning_rate": 4.1650956762713395e-06, + "loss": 0.3067, + "num_input_tokens_seen": 83853760, + "step": 26650 + }, + { + "epoch": 1.706356827347801, + "grad_norm": 34.98249435424805, + "learning_rate": 4.164678926099234e-06, + "loss": 0.3456, + "num_input_tokens_seen": 83870080, + "step": 26655 + }, + { + "epoch": 1.706676909288778, + "grad_norm": 44.84291076660156, + "learning_rate": 4.164262092801734e-06, + "loss": 0.4066, + "num_input_tokens_seen": 83885632, + "step": 26660 + }, + { + "epoch": 1.706996991229755, + "grad_norm": 56.852413177490234, + "learning_rate": 4.163845176399655e-06, + "loss": 0.3023, + "num_input_tokens_seen": 83900800, + "step": 26665 + }, + { + "epoch": 1.7073170731707317, + "grad_norm": 10.865196228027344, + "learning_rate": 4.163428176913813e-06, + "loss": 0.1733, + "num_input_tokens_seen": 83915840, + "step": 26670 + }, + { + "epoch": 1.7076371551117084, + "grad_norm": 17.866641998291016, + "learning_rate": 4.1630110943650335e-06, + "loss": 0.4431, + "num_input_tokens_seen": 83931008, + "step": 26675 + }, + { + "epoch": 1.7079572370526854, + "grad_norm": 14.88655948638916, + "learning_rate": 4.162593928774142e-06, + "loss": 0.2354, + "num_input_tokens_seen": 83947648, + "step": 26680 + }, + { + "epoch": 1.7082773189936624, + "grad_norm": 33.48491668701172, + "learning_rate": 4.1621766801619695e-06, + "loss": 0.3764, + "num_input_tokens_seen": 83963008, + "step": 26685 + }, + { + "epoch": 1.7085974009346394, + "grad_norm": 32.843997955322266, + "learning_rate": 4.161759348549352e-06, + "loss": 0.2491, + "num_input_tokens_seen": 83980672, + "step": 26690 + }, + { + "epoch": 1.7089174828756162, + "grad_norm": 8.669686317443848, + "learning_rate": 4.1613419339571284e-06, + "loss": 0.3438, + "num_input_tokens_seen": 83996160, + "step": 26695 + }, + { + "epoch": 1.709237564816593, + "grad_norm": 38.673187255859375, + "learning_rate": 4.160924436406142e-06, + "loss": 0.3834, + "num_input_tokens_seen": 84013824, + "step": 26700 + }, + { + "epoch": 1.70955764675757, + "grad_norm": 23.688697814941406, + "learning_rate": 4.16050685591724e-06, + "loss": 0.3901, + "num_input_tokens_seen": 84030336, + "step": 26705 + }, + { + "epoch": 1.709877728698547, + "grad_norm": 41.02920913696289, + "learning_rate": 4.160089192511276e-06, + "loss": 0.3067, + "num_input_tokens_seen": 84045824, + "step": 26710 + }, + { + "epoch": 1.7101978106395237, + "grad_norm": 47.26680374145508, + "learning_rate": 4.159671446209104e-06, + "loss": 0.2405, + "num_input_tokens_seen": 84061568, + "step": 26715 + }, + { + "epoch": 1.7105178925805005, + "grad_norm": 55.608001708984375, + "learning_rate": 4.159253617031585e-06, + "loss": 0.3842, + "num_input_tokens_seen": 84078080, + "step": 26720 + }, + { + "epoch": 1.7108379745214775, + "grad_norm": 35.996097564697266, + "learning_rate": 4.158835704999583e-06, + "loss": 0.3621, + "num_input_tokens_seen": 84093248, + "step": 26725 + }, + { + "epoch": 1.7111580564624544, + "grad_norm": 21.782812118530273, + "learning_rate": 4.1584177101339675e-06, + "loss": 0.2942, + "num_input_tokens_seen": 84110080, + "step": 26730 + }, + { + "epoch": 1.7114781384034314, + "grad_norm": 71.1346664428711, + "learning_rate": 4.157999632455609e-06, + "loss": 0.3206, + "num_input_tokens_seen": 84125248, + "step": 26735 + }, + { + "epoch": 1.7117982203444082, + "grad_norm": 37.4521369934082, + "learning_rate": 4.157581471985386e-06, + "loss": 0.4036, + "num_input_tokens_seen": 84141824, + "step": 26740 + }, + { + "epoch": 1.712118302285385, + "grad_norm": 39.19282150268555, + "learning_rate": 4.157163228744177e-06, + "loss": 0.3949, + "num_input_tokens_seen": 84157056, + "step": 26745 + }, + { + "epoch": 1.712438384226362, + "grad_norm": 31.21284294128418, + "learning_rate": 4.156744902752868e-06, + "loss": 0.2899, + "num_input_tokens_seen": 84171648, + "step": 26750 + }, + { + "epoch": 1.712758466167339, + "grad_norm": 9.939478874206543, + "learning_rate": 4.15632649403235e-06, + "loss": 0.3951, + "num_input_tokens_seen": 84187136, + "step": 26755 + }, + { + "epoch": 1.7130785481083157, + "grad_norm": 58.72734451293945, + "learning_rate": 4.155908002603513e-06, + "loss": 0.4079, + "num_input_tokens_seen": 84203392, + "step": 26760 + }, + { + "epoch": 1.7133986300492925, + "grad_norm": 35.82041549682617, + "learning_rate": 4.155489428487256e-06, + "loss": 0.4703, + "num_input_tokens_seen": 84219136, + "step": 26765 + }, + { + "epoch": 1.7137187119902695, + "grad_norm": 18.423566818237305, + "learning_rate": 4.15507077170448e-06, + "loss": 0.4732, + "num_input_tokens_seen": 84234304, + "step": 26770 + }, + { + "epoch": 1.7140387939312465, + "grad_norm": 10.843998908996582, + "learning_rate": 4.154652032276091e-06, + "loss": 0.3154, + "num_input_tokens_seen": 84249920, + "step": 26775 + }, + { + "epoch": 1.7143588758722232, + "grad_norm": 11.398558616638184, + "learning_rate": 4.1542332102229975e-06, + "loss": 0.2278, + "num_input_tokens_seen": 84265600, + "step": 26780 + }, + { + "epoch": 1.7146789578132002, + "grad_norm": 10.762773513793945, + "learning_rate": 4.153814305566115e-06, + "loss": 0.3779, + "num_input_tokens_seen": 84283072, + "step": 26785 + }, + { + "epoch": 1.714999039754177, + "grad_norm": 37.26520919799805, + "learning_rate": 4.15339531832636e-06, + "loss": 0.4423, + "num_input_tokens_seen": 84299200, + "step": 26790 + }, + { + "epoch": 1.715319121695154, + "grad_norm": 11.54057788848877, + "learning_rate": 4.152976248524655e-06, + "loss": 0.329, + "num_input_tokens_seen": 84315840, + "step": 26795 + }, + { + "epoch": 1.715639203636131, + "grad_norm": 12.364130973815918, + "learning_rate": 4.152557096181927e-06, + "loss": 0.3311, + "num_input_tokens_seen": 84330240, + "step": 26800 + }, + { + "epoch": 1.7159592855771078, + "grad_norm": 43.96879959106445, + "learning_rate": 4.152137861319104e-06, + "loss": 0.4372, + "num_input_tokens_seen": 84345344, + "step": 26805 + }, + { + "epoch": 1.7162793675180845, + "grad_norm": 16.873699188232422, + "learning_rate": 4.151718543957123e-06, + "loss": 0.4629, + "num_input_tokens_seen": 84360448, + "step": 26810 + }, + { + "epoch": 1.7165994494590615, + "grad_norm": 42.06140899658203, + "learning_rate": 4.151299144116922e-06, + "loss": 0.3464, + "num_input_tokens_seen": 84376256, + "step": 26815 + }, + { + "epoch": 1.7169195314000385, + "grad_norm": 54.6971321105957, + "learning_rate": 4.150879661819441e-06, + "loss": 0.3006, + "num_input_tokens_seen": 84391168, + "step": 26820 + }, + { + "epoch": 1.7172396133410153, + "grad_norm": 31.05072021484375, + "learning_rate": 4.15046009708563e-06, + "loss": 0.3412, + "num_input_tokens_seen": 84406336, + "step": 26825 + }, + { + "epoch": 1.717559695281992, + "grad_norm": 35.16817855834961, + "learning_rate": 4.150040449936439e-06, + "loss": 0.3759, + "num_input_tokens_seen": 84422272, + "step": 26830 + }, + { + "epoch": 1.717879777222969, + "grad_norm": 36.55738067626953, + "learning_rate": 4.1496207203928215e-06, + "loss": 0.2669, + "num_input_tokens_seen": 84436928, + "step": 26835 + }, + { + "epoch": 1.718199859163946, + "grad_norm": 14.055779457092285, + "learning_rate": 4.149200908475739e-06, + "loss": 0.3133, + "num_input_tokens_seen": 84452608, + "step": 26840 + }, + { + "epoch": 1.718519941104923, + "grad_norm": 46.5838737487793, + "learning_rate": 4.1487810142061525e-06, + "loss": 0.37, + "num_input_tokens_seen": 84468736, + "step": 26845 + }, + { + "epoch": 1.7188400230458998, + "grad_norm": 24.1795654296875, + "learning_rate": 4.148361037605031e-06, + "loss": 0.3054, + "num_input_tokens_seen": 84483840, + "step": 26850 + }, + { + "epoch": 1.7191601049868765, + "grad_norm": 43.558292388916016, + "learning_rate": 4.1479409786933455e-06, + "loss": 0.3229, + "num_input_tokens_seen": 84499648, + "step": 26855 + }, + { + "epoch": 1.7194801869278535, + "grad_norm": 40.365108489990234, + "learning_rate": 4.147520837492071e-06, + "loss": 0.383, + "num_input_tokens_seen": 84516224, + "step": 26860 + }, + { + "epoch": 1.7198002688688305, + "grad_norm": 32.10752868652344, + "learning_rate": 4.1471006140221876e-06, + "loss": 0.328, + "num_input_tokens_seen": 84532288, + "step": 26865 + }, + { + "epoch": 1.7201203508098073, + "grad_norm": 49.353328704833984, + "learning_rate": 4.146680308304679e-06, + "loss": 0.2931, + "num_input_tokens_seen": 84547584, + "step": 26870 + }, + { + "epoch": 1.720440432750784, + "grad_norm": 28.869348526000977, + "learning_rate": 4.146259920360533e-06, + "loss": 0.342, + "num_input_tokens_seen": 84562432, + "step": 26875 + }, + { + "epoch": 1.720760514691761, + "grad_norm": 11.988903999328613, + "learning_rate": 4.145839450210741e-06, + "loss": 0.3926, + "num_input_tokens_seen": 84578176, + "step": 26880 + }, + { + "epoch": 1.721080596632738, + "grad_norm": 25.395315170288086, + "learning_rate": 4.145418897876301e-06, + "loss": 0.2463, + "num_input_tokens_seen": 84592448, + "step": 26885 + }, + { + "epoch": 1.721400678573715, + "grad_norm": 30.260265350341797, + "learning_rate": 4.144998263378211e-06, + "loss": 0.4445, + "num_input_tokens_seen": 84607360, + "step": 26890 + }, + { + "epoch": 1.7217207605146918, + "grad_norm": 34.3101692199707, + "learning_rate": 4.144577546737476e-06, + "loss": 0.2856, + "num_input_tokens_seen": 84622528, + "step": 26895 + }, + { + "epoch": 1.7220408424556686, + "grad_norm": 17.822147369384766, + "learning_rate": 4.144156747975105e-06, + "loss": 0.2865, + "num_input_tokens_seen": 84637504, + "step": 26900 + }, + { + "epoch": 1.7223609243966456, + "grad_norm": 20.616968154907227, + "learning_rate": 4.1437358671121095e-06, + "loss": 0.2745, + "num_input_tokens_seen": 84653120, + "step": 26905 + }, + { + "epoch": 1.7226810063376226, + "grad_norm": 20.06969451904297, + "learning_rate": 4.143314904169508e-06, + "loss": 0.2456, + "num_input_tokens_seen": 84668416, + "step": 26910 + }, + { + "epoch": 1.7230010882785993, + "grad_norm": 18.622150421142578, + "learning_rate": 4.142893859168319e-06, + "loss": 0.2846, + "num_input_tokens_seen": 84683584, + "step": 26915 + }, + { + "epoch": 1.723321170219576, + "grad_norm": 36.68124771118164, + "learning_rate": 4.1424727321295684e-06, + "loss": 0.4768, + "num_input_tokens_seen": 84699328, + "step": 26920 + }, + { + "epoch": 1.723641252160553, + "grad_norm": 25.45926284790039, + "learning_rate": 4.142051523074285e-06, + "loss": 0.3569, + "num_input_tokens_seen": 84714688, + "step": 26925 + }, + { + "epoch": 1.72396133410153, + "grad_norm": 41.35776901245117, + "learning_rate": 4.141630232023502e-06, + "loss": 0.4428, + "num_input_tokens_seen": 84730432, + "step": 26930 + }, + { + "epoch": 1.7242814160425068, + "grad_norm": 15.552155494689941, + "learning_rate": 4.1412088589982554e-06, + "loss": 0.2943, + "num_input_tokens_seen": 84746496, + "step": 26935 + }, + { + "epoch": 1.7246014979834836, + "grad_norm": 31.079816818237305, + "learning_rate": 4.1407874040195875e-06, + "loss": 0.3809, + "num_input_tokens_seen": 84762880, + "step": 26940 + }, + { + "epoch": 1.7249215799244606, + "grad_norm": 13.966938018798828, + "learning_rate": 4.140365867108543e-06, + "loss": 0.2378, + "num_input_tokens_seen": 84777984, + "step": 26945 + }, + { + "epoch": 1.7252416618654376, + "grad_norm": 28.99648094177246, + "learning_rate": 4.139944248286172e-06, + "loss": 0.3827, + "num_input_tokens_seen": 84793216, + "step": 26950 + }, + { + "epoch": 1.7255617438064146, + "grad_norm": 36.94422912597656, + "learning_rate": 4.1395225475735265e-06, + "loss": 0.4733, + "num_input_tokens_seen": 84808512, + "step": 26955 + }, + { + "epoch": 1.7258818257473914, + "grad_norm": 28.142749786376953, + "learning_rate": 4.139100764991665e-06, + "loss": 0.3325, + "num_input_tokens_seen": 84825216, + "step": 26960 + }, + { + "epoch": 1.7262019076883681, + "grad_norm": 8.597607612609863, + "learning_rate": 4.13867890056165e-06, + "loss": 0.269, + "num_input_tokens_seen": 84841216, + "step": 26965 + }, + { + "epoch": 1.726521989629345, + "grad_norm": 31.56741714477539, + "learning_rate": 4.138256954304546e-06, + "loss": 0.3949, + "num_input_tokens_seen": 84857024, + "step": 26970 + }, + { + "epoch": 1.726842071570322, + "grad_norm": 16.70409393310547, + "learning_rate": 4.137834926241423e-06, + "loss": 0.3604, + "num_input_tokens_seen": 84872448, + "step": 26975 + }, + { + "epoch": 1.7271621535112989, + "grad_norm": 11.671104431152344, + "learning_rate": 4.137412816393355e-06, + "loss": 0.2039, + "num_input_tokens_seen": 84886976, + "step": 26980 + }, + { + "epoch": 1.7274822354522756, + "grad_norm": 55.725830078125, + "learning_rate": 4.1369906247814195e-06, + "loss": 0.3646, + "num_input_tokens_seen": 84902144, + "step": 26985 + }, + { + "epoch": 1.7278023173932526, + "grad_norm": 27.157140731811523, + "learning_rate": 4.136568351426698e-06, + "loss": 0.3085, + "num_input_tokens_seen": 84916800, + "step": 26990 + }, + { + "epoch": 1.7281223993342296, + "grad_norm": 11.690436363220215, + "learning_rate": 4.13614599635028e-06, + "loss": 0.3572, + "num_input_tokens_seen": 84931904, + "step": 26995 + }, + { + "epoch": 1.7284424812752066, + "grad_norm": 21.6969051361084, + "learning_rate": 4.135723559573251e-06, + "loss": 0.2276, + "num_input_tokens_seen": 84946880, + "step": 27000 + }, + { + "epoch": 1.7287625632161834, + "grad_norm": 50.063472747802734, + "learning_rate": 4.135301041116709e-06, + "loss": 0.385, + "num_input_tokens_seen": 84962752, + "step": 27005 + }, + { + "epoch": 1.7290826451571601, + "grad_norm": 12.270825386047363, + "learning_rate": 4.134878441001751e-06, + "loss": 0.28, + "num_input_tokens_seen": 84977920, + "step": 27010 + }, + { + "epoch": 1.7294027270981371, + "grad_norm": 24.904953002929688, + "learning_rate": 4.134455759249479e-06, + "loss": 0.3346, + "num_input_tokens_seen": 84994048, + "step": 27015 + }, + { + "epoch": 1.7297228090391141, + "grad_norm": 14.835880279541016, + "learning_rate": 4.134032995881e-06, + "loss": 0.3173, + "num_input_tokens_seen": 85011648, + "step": 27020 + }, + { + "epoch": 1.730042890980091, + "grad_norm": 12.656000137329102, + "learning_rate": 4.133610150917425e-06, + "loss": 0.3429, + "num_input_tokens_seen": 85028096, + "step": 27025 + }, + { + "epoch": 1.7303629729210677, + "grad_norm": 12.906412124633789, + "learning_rate": 4.133187224379868e-06, + "loss": 0.1905, + "num_input_tokens_seen": 85043200, + "step": 27030 + }, + { + "epoch": 1.7306830548620447, + "grad_norm": 17.996959686279297, + "learning_rate": 4.1327642162894475e-06, + "loss": 0.3828, + "num_input_tokens_seen": 85059264, + "step": 27035 + }, + { + "epoch": 1.7310031368030216, + "grad_norm": 7.560395240783691, + "learning_rate": 4.132341126667287e-06, + "loss": 0.2137, + "num_input_tokens_seen": 85074624, + "step": 27040 + }, + { + "epoch": 1.7313232187439984, + "grad_norm": 30.698101043701172, + "learning_rate": 4.131917955534514e-06, + "loss": 0.2352, + "num_input_tokens_seen": 85090432, + "step": 27045 + }, + { + "epoch": 1.7316433006849754, + "grad_norm": 18.467771530151367, + "learning_rate": 4.131494702912258e-06, + "loss": 0.4471, + "num_input_tokens_seen": 85105472, + "step": 27050 + }, + { + "epoch": 1.7319633826259522, + "grad_norm": 50.60645294189453, + "learning_rate": 4.131071368821654e-06, + "loss": 0.346, + "num_input_tokens_seen": 85120832, + "step": 27055 + }, + { + "epoch": 1.7322834645669292, + "grad_norm": 18.418245315551758, + "learning_rate": 4.130647953283843e-06, + "loss": 0.3021, + "num_input_tokens_seen": 85136128, + "step": 27060 + }, + { + "epoch": 1.7326035465079062, + "grad_norm": 24.473770141601562, + "learning_rate": 4.130224456319966e-06, + "loss": 0.3896, + "num_input_tokens_seen": 85151744, + "step": 27065 + }, + { + "epoch": 1.732923628448883, + "grad_norm": 13.599064826965332, + "learning_rate": 4.129800877951171e-06, + "loss": 0.4597, + "num_input_tokens_seen": 85167104, + "step": 27070 + }, + { + "epoch": 1.7332437103898597, + "grad_norm": 15.808960914611816, + "learning_rate": 4.1293772181986105e-06, + "loss": 0.3382, + "num_input_tokens_seen": 85182592, + "step": 27075 + }, + { + "epoch": 1.7335637923308367, + "grad_norm": 14.693058013916016, + "learning_rate": 4.128953477083438e-06, + "loss": 0.4818, + "num_input_tokens_seen": 85197440, + "step": 27080 + }, + { + "epoch": 1.7338838742718137, + "grad_norm": 35.3278694152832, + "learning_rate": 4.128529654626814e-06, + "loss": 0.4165, + "num_input_tokens_seen": 85212992, + "step": 27085 + }, + { + "epoch": 1.7342039562127904, + "grad_norm": 49.84886169433594, + "learning_rate": 4.128105750849902e-06, + "loss": 0.3688, + "num_input_tokens_seen": 85227776, + "step": 27090 + }, + { + "epoch": 1.7345240381537672, + "grad_norm": 18.21675682067871, + "learning_rate": 4.12768176577387e-06, + "loss": 0.3282, + "num_input_tokens_seen": 85243648, + "step": 27095 + }, + { + "epoch": 1.7348441200947442, + "grad_norm": 24.86083984375, + "learning_rate": 4.127257699419888e-06, + "loss": 0.4574, + "num_input_tokens_seen": 85259584, + "step": 27100 + }, + { + "epoch": 1.7351642020357212, + "grad_norm": 19.87482261657715, + "learning_rate": 4.126833551809132e-06, + "loss": 0.366, + "num_input_tokens_seen": 85275392, + "step": 27105 + }, + { + "epoch": 1.7354842839766982, + "grad_norm": 9.600282669067383, + "learning_rate": 4.126409322962783e-06, + "loss": 0.3598, + "num_input_tokens_seen": 85290240, + "step": 27110 + }, + { + "epoch": 1.735804365917675, + "grad_norm": 47.184322357177734, + "learning_rate": 4.125985012902024e-06, + "loss": 0.3588, + "num_input_tokens_seen": 85305536, + "step": 27115 + }, + { + "epoch": 1.7361244478586517, + "grad_norm": 12.751972198486328, + "learning_rate": 4.125560621648043e-06, + "loss": 0.3226, + "num_input_tokens_seen": 85321216, + "step": 27120 + }, + { + "epoch": 1.7364445297996287, + "grad_norm": 22.647369384765625, + "learning_rate": 4.125136149222032e-06, + "loss": 0.3335, + "num_input_tokens_seen": 85336640, + "step": 27125 + }, + { + "epoch": 1.7367646117406057, + "grad_norm": 24.37010955810547, + "learning_rate": 4.124711595645186e-06, + "loss": 0.3804, + "num_input_tokens_seen": 85352896, + "step": 27130 + }, + { + "epoch": 1.7370846936815825, + "grad_norm": 24.875688552856445, + "learning_rate": 4.124286960938706e-06, + "loss": 0.3106, + "num_input_tokens_seen": 85367488, + "step": 27135 + }, + { + "epoch": 1.7374047756225592, + "grad_norm": 17.916580200195312, + "learning_rate": 4.123862245123796e-06, + "loss": 0.305, + "num_input_tokens_seen": 85384768, + "step": 27140 + }, + { + "epoch": 1.7377248575635362, + "grad_norm": 10.87287712097168, + "learning_rate": 4.123437448221664e-06, + "loss": 0.404, + "num_input_tokens_seen": 85399552, + "step": 27145 + }, + { + "epoch": 1.7380449395045132, + "grad_norm": 30.80158233642578, + "learning_rate": 4.123012570253522e-06, + "loss": 0.273, + "num_input_tokens_seen": 85414272, + "step": 27150 + }, + { + "epoch": 1.7383650214454902, + "grad_norm": 21.333940505981445, + "learning_rate": 4.122587611240586e-06, + "loss": 0.1753, + "num_input_tokens_seen": 85429376, + "step": 27155 + }, + { + "epoch": 1.738685103386467, + "grad_norm": 116.52115631103516, + "learning_rate": 4.122162571204076e-06, + "loss": 0.3394, + "num_input_tokens_seen": 85444800, + "step": 27160 + }, + { + "epoch": 1.7390051853274437, + "grad_norm": 35.731201171875, + "learning_rate": 4.121737450165217e-06, + "loss": 0.3662, + "num_input_tokens_seen": 85460928, + "step": 27165 + }, + { + "epoch": 1.7393252672684207, + "grad_norm": 25.583879470825195, + "learning_rate": 4.1213122481452376e-06, + "loss": 0.3124, + "num_input_tokens_seen": 85475328, + "step": 27170 + }, + { + "epoch": 1.7396453492093977, + "grad_norm": 40.27971649169922, + "learning_rate": 4.1208869651653695e-06, + "loss": 0.305, + "num_input_tokens_seen": 85491136, + "step": 27175 + }, + { + "epoch": 1.7399654311503745, + "grad_norm": 7.226621150970459, + "learning_rate": 4.120461601246849e-06, + "loss": 0.4092, + "num_input_tokens_seen": 85507520, + "step": 27180 + }, + { + "epoch": 1.7402855130913513, + "grad_norm": 50.3693962097168, + "learning_rate": 4.120036156410917e-06, + "loss": 0.456, + "num_input_tokens_seen": 85522368, + "step": 27185 + }, + { + "epoch": 1.7406055950323283, + "grad_norm": 26.054298400878906, + "learning_rate": 4.1196106306788174e-06, + "loss": 0.3396, + "num_input_tokens_seen": 85538944, + "step": 27190 + }, + { + "epoch": 1.7409256769733052, + "grad_norm": 19.220678329467773, + "learning_rate": 4.119185024071799e-06, + "loss": 0.2535, + "num_input_tokens_seen": 85554368, + "step": 27195 + }, + { + "epoch": 1.741245758914282, + "grad_norm": 47.10236740112305, + "learning_rate": 4.118759336611116e-06, + "loss": 0.361, + "num_input_tokens_seen": 85569408, + "step": 27200 + }, + { + "epoch": 1.7415658408552588, + "grad_norm": 19.559104919433594, + "learning_rate": 4.118333568318022e-06, + "loss": 0.4347, + "num_input_tokens_seen": 85585792, + "step": 27205 + }, + { + "epoch": 1.7418859227962358, + "grad_norm": 38.30542755126953, + "learning_rate": 4.11790771921378e-06, + "loss": 0.462, + "num_input_tokens_seen": 85603200, + "step": 27210 + }, + { + "epoch": 1.7422060047372128, + "grad_norm": 37.52350997924805, + "learning_rate": 4.117481789319653e-06, + "loss": 0.3331, + "num_input_tokens_seen": 85617984, + "step": 27215 + }, + { + "epoch": 1.7425260866781898, + "grad_norm": 13.844328880310059, + "learning_rate": 4.117055778656911e-06, + "loss": 0.2692, + "num_input_tokens_seen": 85633664, + "step": 27220 + }, + { + "epoch": 1.7428461686191665, + "grad_norm": 41.98745346069336, + "learning_rate": 4.116629687246827e-06, + "loss": 0.247, + "num_input_tokens_seen": 85650496, + "step": 27225 + }, + { + "epoch": 1.7431662505601433, + "grad_norm": 46.07705307006836, + "learning_rate": 4.116203515110676e-06, + "loss": 0.3757, + "num_input_tokens_seen": 85665408, + "step": 27230 + }, + { + "epoch": 1.7434863325011203, + "grad_norm": 6.6382246017456055, + "learning_rate": 4.115777262269741e-06, + "loss": 0.1952, + "num_input_tokens_seen": 85681408, + "step": 27235 + }, + { + "epoch": 1.7438064144420973, + "grad_norm": 27.78245735168457, + "learning_rate": 4.1153509287453054e-06, + "loss": 0.3594, + "num_input_tokens_seen": 85697472, + "step": 27240 + }, + { + "epoch": 1.744126496383074, + "grad_norm": 23.90266227722168, + "learning_rate": 4.114924514558658e-06, + "loss": 0.6312, + "num_input_tokens_seen": 85714048, + "step": 27245 + }, + { + "epoch": 1.7444465783240508, + "grad_norm": 9.633004188537598, + "learning_rate": 4.114498019731093e-06, + "loss": 0.3488, + "num_input_tokens_seen": 85728896, + "step": 27250 + }, + { + "epoch": 1.7447666602650278, + "grad_norm": 13.85496711730957, + "learning_rate": 4.114071444283905e-06, + "loss": 0.3511, + "num_input_tokens_seen": 85744128, + "step": 27255 + }, + { + "epoch": 1.7450867422060048, + "grad_norm": 40.0048828125, + "learning_rate": 4.113644788238398e-06, + "loss": 0.2834, + "num_input_tokens_seen": 85760896, + "step": 27260 + }, + { + "epoch": 1.7454068241469818, + "grad_norm": 11.943017959594727, + "learning_rate": 4.113218051615875e-06, + "loss": 0.348, + "num_input_tokens_seen": 85777600, + "step": 27265 + }, + { + "epoch": 1.7457269060879586, + "grad_norm": 15.35317325592041, + "learning_rate": 4.112791234437647e-06, + "loss": 0.3729, + "num_input_tokens_seen": 85792832, + "step": 27270 + }, + { + "epoch": 1.7460469880289353, + "grad_norm": 21.40904426574707, + "learning_rate": 4.112364336725023e-06, + "loss": 0.4245, + "num_input_tokens_seen": 85807424, + "step": 27275 + }, + { + "epoch": 1.7463670699699123, + "grad_norm": 17.876480102539062, + "learning_rate": 4.111937358499324e-06, + "loss": 0.4008, + "num_input_tokens_seen": 85822656, + "step": 27280 + }, + { + "epoch": 1.7466871519108893, + "grad_norm": 18.26160430908203, + "learning_rate": 4.111510299781869e-06, + "loss": 0.2852, + "num_input_tokens_seen": 85837632, + "step": 27285 + }, + { + "epoch": 1.747007233851866, + "grad_norm": 23.566173553466797, + "learning_rate": 4.111083160593983e-06, + "loss": 0.4014, + "num_input_tokens_seen": 85853376, + "step": 27290 + }, + { + "epoch": 1.7473273157928428, + "grad_norm": 15.280135154724121, + "learning_rate": 4.110655940956997e-06, + "loss": 0.2782, + "num_input_tokens_seen": 85868480, + "step": 27295 + }, + { + "epoch": 1.7476473977338198, + "grad_norm": 15.065322875976562, + "learning_rate": 4.110228640892242e-06, + "loss": 0.2557, + "num_input_tokens_seen": 85884608, + "step": 27300 + }, + { + "epoch": 1.7479674796747968, + "grad_norm": 11.977608680725098, + "learning_rate": 4.109801260421057e-06, + "loss": 0.2501, + "num_input_tokens_seen": 85899776, + "step": 27305 + }, + { + "epoch": 1.7482875616157736, + "grad_norm": 44.45491027832031, + "learning_rate": 4.109373799564782e-06, + "loss": 0.2386, + "num_input_tokens_seen": 85915904, + "step": 27310 + }, + { + "epoch": 1.7486076435567506, + "grad_norm": 14.444845199584961, + "learning_rate": 4.1089462583447615e-06, + "loss": 0.4077, + "num_input_tokens_seen": 85930688, + "step": 27315 + }, + { + "epoch": 1.7489277254977273, + "grad_norm": 16.931425094604492, + "learning_rate": 4.108518636782346e-06, + "loss": 0.3356, + "num_input_tokens_seen": 85946304, + "step": 27320 + }, + { + "epoch": 1.7492478074387043, + "grad_norm": 24.713665008544922, + "learning_rate": 4.108090934898888e-06, + "loss": 0.34, + "num_input_tokens_seen": 85961856, + "step": 27325 + }, + { + "epoch": 1.7495678893796813, + "grad_norm": 9.232853889465332, + "learning_rate": 4.107663152715746e-06, + "loss": 0.4829, + "num_input_tokens_seen": 85978048, + "step": 27330 + }, + { + "epoch": 1.749887971320658, + "grad_norm": 16.075883865356445, + "learning_rate": 4.107235290254279e-06, + "loss": 0.3376, + "num_input_tokens_seen": 85994176, + "step": 27335 + }, + { + "epoch": 1.7502080532616349, + "grad_norm": 35.523929595947266, + "learning_rate": 4.106807347535854e-06, + "loss": 0.2995, + "num_input_tokens_seen": 86010048, + "step": 27340 + }, + { + "epoch": 1.7503360860380257, + "eval_loss": 0.477387011051178, + "eval_runtime": 51.0165, + "eval_samples_per_second": 272.186, + "eval_steps_per_second": 34.028, + "num_input_tokens_seen": 86015936, + "step": 27342 + }, + { + "epoch": 1.7505281352026119, + "grad_norm": 21.216514587402344, + "learning_rate": 4.10637932458184e-06, + "loss": 0.5011, + "num_input_tokens_seen": 86026496, + "step": 27345 + }, + { + "epoch": 1.7508482171435888, + "grad_norm": 11.697061538696289, + "learning_rate": 4.105951221413609e-06, + "loss": 0.4033, + "num_input_tokens_seen": 86041792, + "step": 27350 + }, + { + "epoch": 1.7511682990845656, + "grad_norm": 24.948867797851562, + "learning_rate": 4.10552303805254e-06, + "loss": 0.4054, + "num_input_tokens_seen": 86058112, + "step": 27355 + }, + { + "epoch": 1.7514883810255424, + "grad_norm": 38.33189010620117, + "learning_rate": 4.105094774520012e-06, + "loss": 0.5139, + "num_input_tokens_seen": 86076608, + "step": 27360 + }, + { + "epoch": 1.7518084629665194, + "grad_norm": 39.77824401855469, + "learning_rate": 4.104666430837413e-06, + "loss": 0.2469, + "num_input_tokens_seen": 86092992, + "step": 27365 + }, + { + "epoch": 1.7521285449074964, + "grad_norm": 20.823680877685547, + "learning_rate": 4.10423800702613e-06, + "loss": 0.3058, + "num_input_tokens_seen": 86107904, + "step": 27370 + }, + { + "epoch": 1.7524486268484734, + "grad_norm": 27.99875259399414, + "learning_rate": 4.103809503107557e-06, + "loss": 0.3834, + "num_input_tokens_seen": 86124800, + "step": 27375 + }, + { + "epoch": 1.7527687087894501, + "grad_norm": 29.536258697509766, + "learning_rate": 4.103380919103092e-06, + "loss": 0.2621, + "num_input_tokens_seen": 86140096, + "step": 27380 + }, + { + "epoch": 1.753088790730427, + "grad_norm": 36.58606719970703, + "learning_rate": 4.102952255034135e-06, + "loss": 0.3195, + "num_input_tokens_seen": 86155008, + "step": 27385 + }, + { + "epoch": 1.7534088726714039, + "grad_norm": 14.053570747375488, + "learning_rate": 4.102523510922092e-06, + "loss": 0.5096, + "num_input_tokens_seen": 86170496, + "step": 27390 + }, + { + "epoch": 1.7537289546123809, + "grad_norm": 31.369468688964844, + "learning_rate": 4.102094686788373e-06, + "loss": 0.3476, + "num_input_tokens_seen": 86185472, + "step": 27395 + }, + { + "epoch": 1.7540490365533576, + "grad_norm": 18.17263412475586, + "learning_rate": 4.1016657826543895e-06, + "loss": 0.4326, + "num_input_tokens_seen": 86201216, + "step": 27400 + }, + { + "epoch": 1.7543691184943344, + "grad_norm": 11.556639671325684, + "learning_rate": 4.1012367985415595e-06, + "loss": 0.2136, + "num_input_tokens_seen": 86215040, + "step": 27405 + }, + { + "epoch": 1.7546892004353114, + "grad_norm": 34.19783401489258, + "learning_rate": 4.1008077344713045e-06, + "loss": 0.4943, + "num_input_tokens_seen": 86229632, + "step": 27410 + }, + { + "epoch": 1.7550092823762884, + "grad_norm": 21.374292373657227, + "learning_rate": 4.100378590465049e-06, + "loss": 0.3653, + "num_input_tokens_seen": 86243840, + "step": 27415 + }, + { + "epoch": 1.7553293643172654, + "grad_norm": 31.039283752441406, + "learning_rate": 4.099949366544224e-06, + "loss": 0.2443, + "num_input_tokens_seen": 86258368, + "step": 27420 + }, + { + "epoch": 1.7556494462582422, + "grad_norm": 70.57805633544922, + "learning_rate": 4.09952006273026e-06, + "loss": 0.4385, + "num_input_tokens_seen": 86274304, + "step": 27425 + }, + { + "epoch": 1.755969528199219, + "grad_norm": 13.128028869628906, + "learning_rate": 4.099090679044597e-06, + "loss": 0.4115, + "num_input_tokens_seen": 86290304, + "step": 27430 + }, + { + "epoch": 1.756289610140196, + "grad_norm": 18.25042724609375, + "learning_rate": 4.0986612155086745e-06, + "loss": 0.3621, + "num_input_tokens_seen": 86305408, + "step": 27435 + }, + { + "epoch": 1.756609692081173, + "grad_norm": 17.212814331054688, + "learning_rate": 4.098231672143938e-06, + "loss": 0.6266, + "num_input_tokens_seen": 86334336, + "step": 27440 + }, + { + "epoch": 1.7569297740221497, + "grad_norm": 57.49274444580078, + "learning_rate": 4.097802048971836e-06, + "loss": 0.3024, + "num_input_tokens_seen": 86351296, + "step": 27445 + }, + { + "epoch": 1.7572498559631264, + "grad_norm": 43.4875373840332, + "learning_rate": 4.097372346013823e-06, + "loss": 0.357, + "num_input_tokens_seen": 86369920, + "step": 27450 + }, + { + "epoch": 1.7575699379041034, + "grad_norm": 24.51875877380371, + "learning_rate": 4.096942563291355e-06, + "loss": 0.3462, + "num_input_tokens_seen": 86384960, + "step": 27455 + }, + { + "epoch": 1.7578900198450804, + "grad_norm": 29.898351669311523, + "learning_rate": 4.096512700825894e-06, + "loss": 0.358, + "num_input_tokens_seen": 86399808, + "step": 27460 + }, + { + "epoch": 1.7582101017860572, + "grad_norm": 20.3073787689209, + "learning_rate": 4.096082758638904e-06, + "loss": 0.4086, + "num_input_tokens_seen": 86415616, + "step": 27465 + }, + { + "epoch": 1.758530183727034, + "grad_norm": 81.76880645751953, + "learning_rate": 4.095652736751856e-06, + "loss": 0.3551, + "num_input_tokens_seen": 86432128, + "step": 27470 + }, + { + "epoch": 1.758850265668011, + "grad_norm": 25.516443252563477, + "learning_rate": 4.0952226351862204e-06, + "loss": 0.415, + "num_input_tokens_seen": 86446336, + "step": 27475 + }, + { + "epoch": 1.759170347608988, + "grad_norm": 44.249267578125, + "learning_rate": 4.094792453963476e-06, + "loss": 0.2837, + "num_input_tokens_seen": 86461056, + "step": 27480 + }, + { + "epoch": 1.759490429549965, + "grad_norm": 11.963187217712402, + "learning_rate": 4.094362193105104e-06, + "loss": 0.4098, + "num_input_tokens_seen": 86476800, + "step": 27485 + }, + { + "epoch": 1.7598105114909417, + "grad_norm": 30.947101593017578, + "learning_rate": 4.093931852632588e-06, + "loss": 0.3081, + "num_input_tokens_seen": 86493376, + "step": 27490 + }, + { + "epoch": 1.7601305934319185, + "grad_norm": 23.812999725341797, + "learning_rate": 4.093501432567418e-06, + "loss": 0.3491, + "num_input_tokens_seen": 86508544, + "step": 27495 + }, + { + "epoch": 1.7604506753728955, + "grad_norm": 26.261764526367188, + "learning_rate": 4.093070932931087e-06, + "loss": 0.3194, + "num_input_tokens_seen": 86524480, + "step": 27500 + }, + { + "epoch": 1.7607707573138724, + "grad_norm": 6.244785308837891, + "learning_rate": 4.092640353745092e-06, + "loss": 0.2945, + "num_input_tokens_seen": 86539136, + "step": 27505 + }, + { + "epoch": 1.7610908392548492, + "grad_norm": 40.54521179199219, + "learning_rate": 4.092209695030933e-06, + "loss": 0.3528, + "num_input_tokens_seen": 86555328, + "step": 27510 + }, + { + "epoch": 1.761410921195826, + "grad_norm": 8.0313081741333, + "learning_rate": 4.091778956810115e-06, + "loss": 0.2385, + "num_input_tokens_seen": 86570624, + "step": 27515 + }, + { + "epoch": 1.761731003136803, + "grad_norm": 23.870288848876953, + "learning_rate": 4.091348139104147e-06, + "loss": 0.3079, + "num_input_tokens_seen": 86585600, + "step": 27520 + }, + { + "epoch": 1.76205108507778, + "grad_norm": 4.837952136993408, + "learning_rate": 4.0909172419345436e-06, + "loss": 0.4022, + "num_input_tokens_seen": 86601216, + "step": 27525 + }, + { + "epoch": 1.762371167018757, + "grad_norm": 10.197012901306152, + "learning_rate": 4.090486265322818e-06, + "loss": 0.2999, + "num_input_tokens_seen": 86617216, + "step": 27530 + }, + { + "epoch": 1.7626912489597337, + "grad_norm": 29.105348587036133, + "learning_rate": 4.090055209290494e-06, + "loss": 0.2846, + "num_input_tokens_seen": 86632960, + "step": 27535 + }, + { + "epoch": 1.7630113309007105, + "grad_norm": 19.415145874023438, + "learning_rate": 4.089624073859095e-06, + "loss": 0.3345, + "num_input_tokens_seen": 86648576, + "step": 27540 + }, + { + "epoch": 1.7633314128416875, + "grad_norm": 30.581438064575195, + "learning_rate": 4.08919285905015e-06, + "loss": 0.2538, + "num_input_tokens_seen": 86663232, + "step": 27545 + }, + { + "epoch": 1.7636514947826645, + "grad_norm": 10.128600120544434, + "learning_rate": 4.0887615648851906e-06, + "loss": 0.2743, + "num_input_tokens_seen": 86679104, + "step": 27550 + }, + { + "epoch": 1.7639715767236412, + "grad_norm": 14.885442733764648, + "learning_rate": 4.088330191385754e-06, + "loss": 0.3117, + "num_input_tokens_seen": 86695872, + "step": 27555 + }, + { + "epoch": 1.764291658664618, + "grad_norm": 62.028282165527344, + "learning_rate": 4.087898738573382e-06, + "loss": 0.4026, + "num_input_tokens_seen": 86712064, + "step": 27560 + }, + { + "epoch": 1.764611740605595, + "grad_norm": 10.643742561340332, + "learning_rate": 4.087467206469617e-06, + "loss": 0.3008, + "num_input_tokens_seen": 86728832, + "step": 27565 + }, + { + "epoch": 1.764931822546572, + "grad_norm": 17.172273635864258, + "learning_rate": 4.087035595096009e-06, + "loss": 0.3186, + "num_input_tokens_seen": 86743872, + "step": 27570 + }, + { + "epoch": 1.7652519044875488, + "grad_norm": 8.515186309814453, + "learning_rate": 4.08660390447411e-06, + "loss": 0.2668, + "num_input_tokens_seen": 86758720, + "step": 27575 + }, + { + "epoch": 1.7655719864285258, + "grad_norm": 63.66660690307617, + "learning_rate": 4.086172134625477e-06, + "loss": 0.4228, + "num_input_tokens_seen": 86774464, + "step": 27580 + }, + { + "epoch": 1.7658920683695025, + "grad_norm": 10.59974479675293, + "learning_rate": 4.08574028557167e-06, + "loss": 0.2753, + "num_input_tokens_seen": 86790528, + "step": 27585 + }, + { + "epoch": 1.7662121503104795, + "grad_norm": 14.904664993286133, + "learning_rate": 4.085308357334251e-06, + "loss": 0.2646, + "num_input_tokens_seen": 86806016, + "step": 27590 + }, + { + "epoch": 1.7665322322514565, + "grad_norm": 24.46119499206543, + "learning_rate": 4.084876349934792e-06, + "loss": 0.258, + "num_input_tokens_seen": 86821312, + "step": 27595 + }, + { + "epoch": 1.7668523141924333, + "grad_norm": 32.23527145385742, + "learning_rate": 4.084444263394863e-06, + "loss": 0.3119, + "num_input_tokens_seen": 86836800, + "step": 27600 + }, + { + "epoch": 1.76717239613341, + "grad_norm": 30.562053680419922, + "learning_rate": 4.084012097736039e-06, + "loss": 0.4817, + "num_input_tokens_seen": 86852160, + "step": 27605 + }, + { + "epoch": 1.767492478074387, + "grad_norm": 50.89196014404297, + "learning_rate": 4.083579852979903e-06, + "loss": 0.3468, + "num_input_tokens_seen": 86868032, + "step": 27610 + }, + { + "epoch": 1.767812560015364, + "grad_norm": 22.773326873779297, + "learning_rate": 4.083147529148038e-06, + "loss": 0.4362, + "num_input_tokens_seen": 86883456, + "step": 27615 + }, + { + "epoch": 1.7681326419563408, + "grad_norm": 12.961448669433594, + "learning_rate": 4.082715126262031e-06, + "loss": 0.3842, + "num_input_tokens_seen": 86898688, + "step": 27620 + }, + { + "epoch": 1.7684527238973176, + "grad_norm": 19.75737953186035, + "learning_rate": 4.082282644343475e-06, + "loss": 0.2753, + "num_input_tokens_seen": 86915392, + "step": 27625 + }, + { + "epoch": 1.7687728058382945, + "grad_norm": 22.07662010192871, + "learning_rate": 4.0818500834139655e-06, + "loss": 0.3564, + "num_input_tokens_seen": 86931520, + "step": 27630 + }, + { + "epoch": 1.7690928877792715, + "grad_norm": 20.112125396728516, + "learning_rate": 4.081417443495103e-06, + "loss": 0.3086, + "num_input_tokens_seen": 86948608, + "step": 27635 + }, + { + "epoch": 1.7694129697202485, + "grad_norm": 5.3670573234558105, + "learning_rate": 4.08098472460849e-06, + "loss": 0.246, + "num_input_tokens_seen": 86964928, + "step": 27640 + }, + { + "epoch": 1.7697330516612253, + "grad_norm": 32.386043548583984, + "learning_rate": 4.080551926775735e-06, + "loss": 0.3752, + "num_input_tokens_seen": 86981440, + "step": 27645 + }, + { + "epoch": 1.770053133602202, + "grad_norm": 31.11688232421875, + "learning_rate": 4.08011905001845e-06, + "loss": 0.3438, + "num_input_tokens_seen": 86997888, + "step": 27650 + }, + { + "epoch": 1.770373215543179, + "grad_norm": 28.411100387573242, + "learning_rate": 4.079686094358248e-06, + "loss": 0.3245, + "num_input_tokens_seen": 87012288, + "step": 27655 + }, + { + "epoch": 1.770693297484156, + "grad_norm": 11.329375267028809, + "learning_rate": 4.079253059816753e-06, + "loss": 0.2806, + "num_input_tokens_seen": 87028160, + "step": 27660 + }, + { + "epoch": 1.7710133794251328, + "grad_norm": 30.024066925048828, + "learning_rate": 4.078819946415586e-06, + "loss": 0.4045, + "num_input_tokens_seen": 87043968, + "step": 27665 + }, + { + "epoch": 1.7713334613661096, + "grad_norm": 28.671140670776367, + "learning_rate": 4.078386754176374e-06, + "loss": 0.4153, + "num_input_tokens_seen": 87059712, + "step": 27670 + }, + { + "epoch": 1.7716535433070866, + "grad_norm": 18.92298126220703, + "learning_rate": 4.077953483120749e-06, + "loss": 0.3821, + "num_input_tokens_seen": 87077504, + "step": 27675 + }, + { + "epoch": 1.7719736252480636, + "grad_norm": 15.026651382446289, + "learning_rate": 4.077520133270347e-06, + "loss": 0.449, + "num_input_tokens_seen": 87092544, + "step": 27680 + }, + { + "epoch": 1.7722937071890406, + "grad_norm": 16.52621078491211, + "learning_rate": 4.077086704646807e-06, + "loss": 0.4026, + "num_input_tokens_seen": 87108800, + "step": 27685 + }, + { + "epoch": 1.7726137891300173, + "grad_norm": 47.19370651245117, + "learning_rate": 4.07665319727177e-06, + "loss": 0.486, + "num_input_tokens_seen": 87123776, + "step": 27690 + }, + { + "epoch": 1.772933871070994, + "grad_norm": 36.48838806152344, + "learning_rate": 4.076219611166886e-06, + "loss": 0.4676, + "num_input_tokens_seen": 87140544, + "step": 27695 + }, + { + "epoch": 1.773253953011971, + "grad_norm": 27.588045120239258, + "learning_rate": 4.075785946353805e-06, + "loss": 0.4814, + "num_input_tokens_seen": 87157440, + "step": 27700 + }, + { + "epoch": 1.773574034952948, + "grad_norm": 13.914806365966797, + "learning_rate": 4.075352202854181e-06, + "loss": 0.3255, + "num_input_tokens_seen": 87172608, + "step": 27705 + }, + { + "epoch": 1.7738941168939248, + "grad_norm": 26.279155731201172, + "learning_rate": 4.0749183806896745e-06, + "loss": 0.296, + "num_input_tokens_seen": 87189888, + "step": 27710 + }, + { + "epoch": 1.7742141988349016, + "grad_norm": 17.46876335144043, + "learning_rate": 4.074484479881947e-06, + "loss": 0.3075, + "num_input_tokens_seen": 87207680, + "step": 27715 + }, + { + "epoch": 1.7745342807758786, + "grad_norm": 48.7274284362793, + "learning_rate": 4.0740505004526655e-06, + "loss": 0.2536, + "num_input_tokens_seen": 87223936, + "step": 27720 + }, + { + "epoch": 1.7748543627168556, + "grad_norm": 14.351666450500488, + "learning_rate": 4.073616442423502e-06, + "loss": 0.3287, + "num_input_tokens_seen": 87238656, + "step": 27725 + }, + { + "epoch": 1.7751744446578324, + "grad_norm": 15.3496732711792, + "learning_rate": 4.073182305816129e-06, + "loss": 0.2594, + "num_input_tokens_seen": 87254784, + "step": 27730 + }, + { + "epoch": 1.7754945265988091, + "grad_norm": 17.530698776245117, + "learning_rate": 4.072748090652226e-06, + "loss": 0.2666, + "num_input_tokens_seen": 87271424, + "step": 27735 + }, + { + "epoch": 1.7758146085397861, + "grad_norm": 21.08891487121582, + "learning_rate": 4.072313796953476e-06, + "loss": 0.4311, + "num_input_tokens_seen": 87287488, + "step": 27740 + }, + { + "epoch": 1.7761346904807631, + "grad_norm": 23.26708221435547, + "learning_rate": 4.071879424741565e-06, + "loss": 0.2449, + "num_input_tokens_seen": 87302784, + "step": 27745 + }, + { + "epoch": 1.77645477242174, + "grad_norm": 37.56011199951172, + "learning_rate": 4.071444974038182e-06, + "loss": 0.2954, + "num_input_tokens_seen": 87318848, + "step": 27750 + }, + { + "epoch": 1.7767748543627169, + "grad_norm": 65.18070983886719, + "learning_rate": 4.071010444865024e-06, + "loss": 0.3946, + "num_input_tokens_seen": 87334080, + "step": 27755 + }, + { + "epoch": 1.7770949363036936, + "grad_norm": 17.65541648864746, + "learning_rate": 4.070575837243786e-06, + "loss": 0.3141, + "num_input_tokens_seen": 87349504, + "step": 27760 + }, + { + "epoch": 1.7774150182446706, + "grad_norm": 31.266559600830078, + "learning_rate": 4.070141151196173e-06, + "loss": 0.3426, + "num_input_tokens_seen": 87365632, + "step": 27765 + }, + { + "epoch": 1.7777351001856476, + "grad_norm": 19.51246452331543, + "learning_rate": 4.069706386743888e-06, + "loss": 0.391, + "num_input_tokens_seen": 87381248, + "step": 27770 + }, + { + "epoch": 1.7780551821266244, + "grad_norm": 11.216083526611328, + "learning_rate": 4.0692715439086435e-06, + "loss": 0.3247, + "num_input_tokens_seen": 87396608, + "step": 27775 + }, + { + "epoch": 1.7783752640676012, + "grad_norm": 16.02535629272461, + "learning_rate": 4.068836622712151e-06, + "loss": 0.3074, + "num_input_tokens_seen": 87411840, + "step": 27780 + }, + { + "epoch": 1.7786953460085781, + "grad_norm": 60.132240295410156, + "learning_rate": 4.068401623176131e-06, + "loss": 0.3801, + "num_input_tokens_seen": 87428416, + "step": 27785 + }, + { + "epoch": 1.7790154279495551, + "grad_norm": 32.767635345458984, + "learning_rate": 4.067966545322302e-06, + "loss": 0.2938, + "num_input_tokens_seen": 87443776, + "step": 27790 + }, + { + "epoch": 1.7793355098905321, + "grad_norm": 23.81377601623535, + "learning_rate": 4.067531389172392e-06, + "loss": 0.4359, + "num_input_tokens_seen": 87461056, + "step": 27795 + }, + { + "epoch": 1.779655591831509, + "grad_norm": 16.59333610534668, + "learning_rate": 4.067096154748129e-06, + "loss": 0.2891, + "num_input_tokens_seen": 87477248, + "step": 27800 + }, + { + "epoch": 1.7799756737724857, + "grad_norm": 24.692707061767578, + "learning_rate": 4.066660842071246e-06, + "loss": 0.3999, + "num_input_tokens_seen": 87494912, + "step": 27805 + }, + { + "epoch": 1.7802957557134627, + "grad_norm": 6.401570796966553, + "learning_rate": 4.066225451163482e-06, + "loss": 0.3158, + "num_input_tokens_seen": 87511872, + "step": 27810 + }, + { + "epoch": 1.7806158376544396, + "grad_norm": 19.483034133911133, + "learning_rate": 4.065789982046576e-06, + "loss": 0.4262, + "num_input_tokens_seen": 87527488, + "step": 27815 + }, + { + "epoch": 1.7809359195954164, + "grad_norm": 43.863929748535156, + "learning_rate": 4.065354434742276e-06, + "loss": 0.4085, + "num_input_tokens_seen": 87543168, + "step": 27820 + }, + { + "epoch": 1.7812560015363932, + "grad_norm": 10.259968757629395, + "learning_rate": 4.064918809272328e-06, + "loss": 0.2856, + "num_input_tokens_seen": 87559424, + "step": 27825 + }, + { + "epoch": 1.7815760834773702, + "grad_norm": 46.2078857421875, + "learning_rate": 4.064483105658486e-06, + "loss": 0.4188, + "num_input_tokens_seen": 87575360, + "step": 27830 + }, + { + "epoch": 1.7818961654183472, + "grad_norm": 24.626148223876953, + "learning_rate": 4.064047323922506e-06, + "loss": 0.3875, + "num_input_tokens_seen": 87591680, + "step": 27835 + }, + { + "epoch": 1.782216247359324, + "grad_norm": 18.032398223876953, + "learning_rate": 4.063611464086151e-06, + "loss": 0.4363, + "num_input_tokens_seen": 87607168, + "step": 27840 + }, + { + "epoch": 1.782536329300301, + "grad_norm": 27.909940719604492, + "learning_rate": 4.0631755261711835e-06, + "loss": 0.3222, + "num_input_tokens_seen": 87622080, + "step": 27845 + }, + { + "epoch": 1.7828564112412777, + "grad_norm": 25.95849609375, + "learning_rate": 4.0627395101993725e-06, + "loss": 0.4569, + "num_input_tokens_seen": 87638464, + "step": 27850 + }, + { + "epoch": 1.7831764931822547, + "grad_norm": 22.312503814697266, + "learning_rate": 4.0623034161924905e-06, + "loss": 0.2934, + "num_input_tokens_seen": 87654784, + "step": 27855 + }, + { + "epoch": 1.7834965751232317, + "grad_norm": 21.915075302124023, + "learning_rate": 4.061867244172313e-06, + "loss": 0.4392, + "num_input_tokens_seen": 87669248, + "step": 27860 + }, + { + "epoch": 1.7838166570642084, + "grad_norm": 13.810422897338867, + "learning_rate": 4.061430994160621e-06, + "loss": 0.3091, + "num_input_tokens_seen": 87684864, + "step": 27865 + }, + { + "epoch": 1.7841367390051852, + "grad_norm": 38.66695785522461, + "learning_rate": 4.060994666179199e-06, + "loss": 0.3996, + "num_input_tokens_seen": 87700480, + "step": 27870 + }, + { + "epoch": 1.7844568209461622, + "grad_norm": 23.197307586669922, + "learning_rate": 4.0605582602498336e-06, + "loss": 0.4457, + "num_input_tokens_seen": 87716160, + "step": 27875 + }, + { + "epoch": 1.7847769028871392, + "grad_norm": 19.393077850341797, + "learning_rate": 4.060121776394318e-06, + "loss": 0.2791, + "num_input_tokens_seen": 87732160, + "step": 27880 + }, + { + "epoch": 1.785096984828116, + "grad_norm": 14.584737777709961, + "learning_rate": 4.0596852146344465e-06, + "loss": 0.4283, + "num_input_tokens_seen": 87747968, + "step": 27885 + }, + { + "epoch": 1.7854170667690927, + "grad_norm": 58.81187438964844, + "learning_rate": 4.05924857499202e-06, + "loss": 0.3028, + "num_input_tokens_seen": 87763456, + "step": 27890 + }, + { + "epoch": 1.7857371487100697, + "grad_norm": 22.30742073059082, + "learning_rate": 4.0588118574888415e-06, + "loss": 0.226, + "num_input_tokens_seen": 87778752, + "step": 27895 + }, + { + "epoch": 1.7860572306510467, + "grad_norm": 9.75301742553711, + "learning_rate": 4.0583750621467175e-06, + "loss": 0.2787, + "num_input_tokens_seen": 87794496, + "step": 27900 + }, + { + "epoch": 1.7863773125920237, + "grad_norm": 40.78770446777344, + "learning_rate": 4.0579381889874615e-06, + "loss": 0.243, + "num_input_tokens_seen": 87810368, + "step": 27905 + }, + { + "epoch": 1.7866973945330005, + "grad_norm": 15.780549049377441, + "learning_rate": 4.057501238032886e-06, + "loss": 0.3199, + "num_input_tokens_seen": 87827136, + "step": 27910 + }, + { + "epoch": 1.7870174764739772, + "grad_norm": 30.68370246887207, + "learning_rate": 4.057064209304813e-06, + "loss": 0.3267, + "num_input_tokens_seen": 87844800, + "step": 27915 + }, + { + "epoch": 1.7873375584149542, + "grad_norm": 14.805810928344727, + "learning_rate": 4.056627102825062e-06, + "loss": 0.2677, + "num_input_tokens_seen": 87873472, + "step": 27920 + }, + { + "epoch": 1.7876576403559312, + "grad_norm": 13.36874008178711, + "learning_rate": 4.056189918615464e-06, + "loss": 0.2775, + "num_input_tokens_seen": 87888448, + "step": 27925 + }, + { + "epoch": 1.787977722296908, + "grad_norm": 15.583983421325684, + "learning_rate": 4.0557526566978454e-06, + "loss": 0.3773, + "num_input_tokens_seen": 87904128, + "step": 27930 + }, + { + "epoch": 1.7882978042378848, + "grad_norm": 33.32191848754883, + "learning_rate": 4.055315317094044e-06, + "loss": 0.3589, + "num_input_tokens_seen": 87920384, + "step": 27935 + }, + { + "epoch": 1.7886178861788617, + "grad_norm": 19.261018753051758, + "learning_rate": 4.054877899825896e-06, + "loss": 0.2476, + "num_input_tokens_seen": 87935616, + "step": 27940 + }, + { + "epoch": 1.7889379681198387, + "grad_norm": 10.580381393432617, + "learning_rate": 4.054440404915245e-06, + "loss": 0.3289, + "num_input_tokens_seen": 87951424, + "step": 27945 + }, + { + "epoch": 1.7892580500608157, + "grad_norm": 4.949400424957275, + "learning_rate": 4.054002832383936e-06, + "loss": 0.2972, + "num_input_tokens_seen": 87967296, + "step": 27950 + }, + { + "epoch": 1.7895781320017925, + "grad_norm": 24.958356857299805, + "learning_rate": 4.053565182253822e-06, + "loss": 0.5008, + "num_input_tokens_seen": 87982144, + "step": 27955 + }, + { + "epoch": 1.7898982139427693, + "grad_norm": 36.779563903808594, + "learning_rate": 4.053127454546755e-06, + "loss": 0.4279, + "num_input_tokens_seen": 87997120, + "step": 27960 + }, + { + "epoch": 1.7902182958837463, + "grad_norm": 17.087129592895508, + "learning_rate": 4.0526896492845914e-06, + "loss": 0.3353, + "num_input_tokens_seen": 88012288, + "step": 27965 + }, + { + "epoch": 1.7905383778247232, + "grad_norm": 29.888309478759766, + "learning_rate": 4.0522517664891944e-06, + "loss": 0.3642, + "num_input_tokens_seen": 88027776, + "step": 27970 + }, + { + "epoch": 1.7908584597657, + "grad_norm": 7.331301212310791, + "learning_rate": 4.051813806182431e-06, + "loss": 0.2299, + "num_input_tokens_seen": 88042816, + "step": 27975 + }, + { + "epoch": 1.7911785417066768, + "grad_norm": 23.06802749633789, + "learning_rate": 4.051375768386168e-06, + "loss": 0.2514, + "num_input_tokens_seen": 88058112, + "step": 27980 + }, + { + "epoch": 1.7914986236476538, + "grad_norm": 17.613189697265625, + "learning_rate": 4.050937653122281e-06, + "loss": 0.323, + "num_input_tokens_seen": 88074496, + "step": 27985 + }, + { + "epoch": 1.7918187055886308, + "grad_norm": 34.484718322753906, + "learning_rate": 4.0504994604126446e-06, + "loss": 0.3806, + "num_input_tokens_seen": 88090496, + "step": 27990 + }, + { + "epoch": 1.7921387875296075, + "grad_norm": 35.75886535644531, + "learning_rate": 4.0500611902791435e-06, + "loss": 0.4742, + "num_input_tokens_seen": 88105920, + "step": 27995 + }, + { + "epoch": 1.7924588694705843, + "grad_norm": 38.22193908691406, + "learning_rate": 4.049622842743659e-06, + "loss": 0.5227, + "num_input_tokens_seen": 88121600, + "step": 28000 + }, + { + "epoch": 1.7927789514115613, + "grad_norm": 34.935401916503906, + "learning_rate": 4.049184417828081e-06, + "loss": 0.3019, + "num_input_tokens_seen": 88137216, + "step": 28005 + }, + { + "epoch": 1.7930990333525383, + "grad_norm": 27.286128997802734, + "learning_rate": 4.048745915554303e-06, + "loss": 0.346, + "num_input_tokens_seen": 88152832, + "step": 28010 + }, + { + "epoch": 1.7934191152935153, + "grad_norm": 13.56497859954834, + "learning_rate": 4.048307335944221e-06, + "loss": 0.4016, + "num_input_tokens_seen": 88169024, + "step": 28015 + }, + { + "epoch": 1.793739197234492, + "grad_norm": 20.252788543701172, + "learning_rate": 4.047868679019736e-06, + "loss": 0.2495, + "num_input_tokens_seen": 88184064, + "step": 28020 + }, + { + "epoch": 1.7940592791754688, + "grad_norm": 31.058427810668945, + "learning_rate": 4.047429944802752e-06, + "loss": 0.4761, + "num_input_tokens_seen": 88198848, + "step": 28025 + }, + { + "epoch": 1.7943793611164458, + "grad_norm": 39.854000091552734, + "learning_rate": 4.046991133315177e-06, + "loss": 0.4764, + "num_input_tokens_seen": 88214144, + "step": 28030 + }, + { + "epoch": 1.7946994430574228, + "grad_norm": 30.000093460083008, + "learning_rate": 4.046552244578922e-06, + "loss": 0.316, + "num_input_tokens_seen": 88230208, + "step": 28035 + }, + { + "epoch": 1.7950195249983996, + "grad_norm": 10.37868881225586, + "learning_rate": 4.046113278615904e-06, + "loss": 0.2648, + "num_input_tokens_seen": 88245440, + "step": 28040 + }, + { + "epoch": 1.7953396069393763, + "grad_norm": 37.23588180541992, + "learning_rate": 4.045674235448042e-06, + "loss": 0.298, + "num_input_tokens_seen": 88260224, + "step": 28045 + }, + { + "epoch": 1.7956596888803533, + "grad_norm": 9.831892967224121, + "learning_rate": 4.04523511509726e-06, + "loss": 0.3008, + "num_input_tokens_seen": 88276032, + "step": 28050 + }, + { + "epoch": 1.7959797708213303, + "grad_norm": 13.49539566040039, + "learning_rate": 4.044795917585485e-06, + "loss": 0.2457, + "num_input_tokens_seen": 88290816, + "step": 28055 + }, + { + "epoch": 1.7962998527623073, + "grad_norm": 19.91155433654785, + "learning_rate": 4.044356642934649e-06, + "loss": 0.3286, + "num_input_tokens_seen": 88306112, + "step": 28060 + }, + { + "epoch": 1.796619934703284, + "grad_norm": 25.6905574798584, + "learning_rate": 4.043917291166686e-06, + "loss": 0.2843, + "num_input_tokens_seen": 88322112, + "step": 28065 + }, + { + "epoch": 1.7969400166442608, + "grad_norm": 15.080331802368164, + "learning_rate": 4.043477862303535e-06, + "loss": 0.4775, + "num_input_tokens_seen": 88338688, + "step": 28070 + }, + { + "epoch": 1.7972600985852378, + "grad_norm": 9.564716339111328, + "learning_rate": 4.0430383563671395e-06, + "loss": 0.3853, + "num_input_tokens_seen": 88355584, + "step": 28075 + }, + { + "epoch": 1.7975801805262148, + "grad_norm": 25.240707397460938, + "learning_rate": 4.042598773379447e-06, + "loss": 0.4064, + "num_input_tokens_seen": 88371328, + "step": 28080 + }, + { + "epoch": 1.7979002624671916, + "grad_norm": 24.323575973510742, + "learning_rate": 4.042159113362406e-06, + "loss": 0.358, + "num_input_tokens_seen": 88388608, + "step": 28085 + }, + { + "epoch": 1.7982203444081684, + "grad_norm": 10.930630683898926, + "learning_rate": 4.041719376337971e-06, + "loss": 0.369, + "num_input_tokens_seen": 88404096, + "step": 28090 + }, + { + "epoch": 1.7985404263491453, + "grad_norm": 37.451332092285156, + "learning_rate": 4.041279562328102e-06, + "loss": 0.276, + "num_input_tokens_seen": 88418752, + "step": 28095 + }, + { + "epoch": 1.7988605082901223, + "grad_norm": 15.34428596496582, + "learning_rate": 4.040839671354759e-06, + "loss": 0.4765, + "num_input_tokens_seen": 88434752, + "step": 28100 + }, + { + "epoch": 1.799180590231099, + "grad_norm": 35.265193939208984, + "learning_rate": 4.04039970343991e-06, + "loss": 0.3977, + "num_input_tokens_seen": 88450304, + "step": 28105 + }, + { + "epoch": 1.799500672172076, + "grad_norm": 13.182930946350098, + "learning_rate": 4.039959658605522e-06, + "loss": 0.3332, + "num_input_tokens_seen": 88465984, + "step": 28110 + }, + { + "epoch": 1.7998207541130529, + "grad_norm": 13.034355163574219, + "learning_rate": 4.039519536873571e-06, + "loss": 0.264, + "num_input_tokens_seen": 88480576, + "step": 28115 + }, + { + "epoch": 1.8001408360540299, + "grad_norm": 34.20721435546875, + "learning_rate": 4.039079338266033e-06, + "loss": 0.2865, + "num_input_tokens_seen": 88497024, + "step": 28120 + }, + { + "epoch": 1.8004609179950068, + "grad_norm": 39.85515594482422, + "learning_rate": 4.038639062804889e-06, + "loss": 0.3221, + "num_input_tokens_seen": 88512448, + "step": 28125 + }, + { + "epoch": 1.8007809999359836, + "grad_norm": 58.54035568237305, + "learning_rate": 4.038198710512126e-06, + "loss": 0.2948, + "num_input_tokens_seen": 88528192, + "step": 28130 + }, + { + "epoch": 1.8011010818769604, + "grad_norm": 13.02176570892334, + "learning_rate": 4.0377582814097305e-06, + "loss": 0.2623, + "num_input_tokens_seen": 88543424, + "step": 28135 + }, + { + "epoch": 1.8014211638179374, + "grad_norm": 9.444266319274902, + "learning_rate": 4.037317775519697e-06, + "loss": 0.2644, + "num_input_tokens_seen": 88558784, + "step": 28140 + }, + { + "epoch": 1.8017412457589144, + "grad_norm": 29.200315475463867, + "learning_rate": 4.036877192864021e-06, + "loss": 0.2883, + "num_input_tokens_seen": 88573376, + "step": 28145 + }, + { + "epoch": 1.8020613276998911, + "grad_norm": 52.529090881347656, + "learning_rate": 4.036436533464703e-06, + "loss": 0.3588, + "num_input_tokens_seen": 88589504, + "step": 28150 + }, + { + "epoch": 1.802381409640868, + "grad_norm": 70.46646118164062, + "learning_rate": 4.035995797343748e-06, + "loss": 0.4704, + "num_input_tokens_seen": 88605184, + "step": 28155 + }, + { + "epoch": 1.802701491581845, + "grad_norm": 47.94511795043945, + "learning_rate": 4.035554984523163e-06, + "loss": 0.3376, + "num_input_tokens_seen": 88620928, + "step": 28160 + }, + { + "epoch": 1.8030215735228219, + "grad_norm": 31.352428436279297, + "learning_rate": 4.035114095024963e-06, + "loss": 0.4225, + "num_input_tokens_seen": 88637568, + "step": 28165 + }, + { + "epoch": 1.8033416554637989, + "grad_norm": 24.748289108276367, + "learning_rate": 4.034673128871159e-06, + "loss": 0.257, + "num_input_tokens_seen": 88656128, + "step": 28170 + }, + { + "epoch": 1.8036617374047756, + "grad_norm": 47.50680923461914, + "learning_rate": 4.034232086083772e-06, + "loss": 0.4035, + "num_input_tokens_seen": 88671360, + "step": 28175 + }, + { + "epoch": 1.8039818193457524, + "grad_norm": 17.42681121826172, + "learning_rate": 4.033790966684828e-06, + "loss": 0.3485, + "num_input_tokens_seen": 88686784, + "step": 28180 + }, + { + "epoch": 1.8043019012867294, + "grad_norm": 26.209980010986328, + "learning_rate": 4.033349770696351e-06, + "loss": 0.3891, + "num_input_tokens_seen": 88703616, + "step": 28185 + }, + { + "epoch": 1.8046219832277064, + "grad_norm": 23.495330810546875, + "learning_rate": 4.032908498140373e-06, + "loss": 0.451, + "num_input_tokens_seen": 88719168, + "step": 28190 + }, + { + "epoch": 1.8049420651686832, + "grad_norm": 12.793549537658691, + "learning_rate": 4.0324671490389306e-06, + "loss": 0.3382, + "num_input_tokens_seen": 88734080, + "step": 28195 + }, + { + "epoch": 1.80526214710966, + "grad_norm": 10.539767265319824, + "learning_rate": 4.03202572341406e-06, + "loss": 0.2028, + "num_input_tokens_seen": 88750464, + "step": 28200 + }, + { + "epoch": 1.805582229050637, + "grad_norm": 52.643165588378906, + "learning_rate": 4.031584221287806e-06, + "loss": 0.4104, + "num_input_tokens_seen": 88767616, + "step": 28205 + }, + { + "epoch": 1.805902310991614, + "grad_norm": 33.14971160888672, + "learning_rate": 4.031142642682213e-06, + "loss": 0.2934, + "num_input_tokens_seen": 88783552, + "step": 28210 + }, + { + "epoch": 1.806222392932591, + "grad_norm": 41.08024978637695, + "learning_rate": 4.030700987619332e-06, + "loss": 0.4233, + "num_input_tokens_seen": 88798464, + "step": 28215 + }, + { + "epoch": 1.8065424748735677, + "grad_norm": 23.958480834960938, + "learning_rate": 4.030259256121217e-06, + "loss": 0.2509, + "num_input_tokens_seen": 88813376, + "step": 28220 + }, + { + "epoch": 1.8068625568145444, + "grad_norm": 19.694509506225586, + "learning_rate": 4.029817448209926e-06, + "loss": 0.3985, + "num_input_tokens_seen": 88829184, + "step": 28225 + }, + { + "epoch": 1.8071826387555214, + "grad_norm": 15.799253463745117, + "learning_rate": 4.029375563907519e-06, + "loss": 0.2694, + "num_input_tokens_seen": 88844160, + "step": 28230 + }, + { + "epoch": 1.8075027206964984, + "grad_norm": 22.500669479370117, + "learning_rate": 4.0289336032360635e-06, + "loss": 0.2542, + "num_input_tokens_seen": 88860288, + "step": 28235 + }, + { + "epoch": 1.8078228026374752, + "grad_norm": 12.030638694763184, + "learning_rate": 4.028491566217626e-06, + "loss": 0.2856, + "num_input_tokens_seen": 88875264, + "step": 28240 + }, + { + "epoch": 1.808142884578452, + "grad_norm": 28.203163146972656, + "learning_rate": 4.028049452874283e-06, + "loss": 0.4487, + "num_input_tokens_seen": 88890816, + "step": 28245 + }, + { + "epoch": 1.808462966519429, + "grad_norm": 19.704008102416992, + "learning_rate": 4.027607263228109e-06, + "loss": 0.2714, + "num_input_tokens_seen": 88906816, + "step": 28250 + }, + { + "epoch": 1.808783048460406, + "grad_norm": 16.956132888793945, + "learning_rate": 4.0271649973011864e-06, + "loss": 0.2811, + "num_input_tokens_seen": 88922496, + "step": 28255 + }, + { + "epoch": 1.8091031304013827, + "grad_norm": 73.02269744873047, + "learning_rate": 4.026722655115598e-06, + "loss": 0.4157, + "num_input_tokens_seen": 88939072, + "step": 28260 + }, + { + "epoch": 1.8094232123423597, + "grad_norm": 15.289286613464355, + "learning_rate": 4.026280236693433e-06, + "loss": 0.2673, + "num_input_tokens_seen": 88954432, + "step": 28265 + }, + { + "epoch": 1.8097432942833365, + "grad_norm": 18.797353744506836, + "learning_rate": 4.025837742056782e-06, + "loss": 0.3804, + "num_input_tokens_seen": 88969408, + "step": 28270 + }, + { + "epoch": 1.8100633762243135, + "grad_norm": 38.830528259277344, + "learning_rate": 4.025395171227742e-06, + "loss": 0.3138, + "num_input_tokens_seen": 88985280, + "step": 28275 + }, + { + "epoch": 1.8103834581652904, + "grad_norm": 6.7725958824157715, + "learning_rate": 4.024952524228413e-06, + "loss": 0.1757, + "num_input_tokens_seen": 89001664, + "step": 28280 + }, + { + "epoch": 1.8107035401062672, + "grad_norm": 24.007905960083008, + "learning_rate": 4.024509801080899e-06, + "loss": 0.2732, + "num_input_tokens_seen": 89016320, + "step": 28285 + }, + { + "epoch": 1.811023622047244, + "grad_norm": 16.503129959106445, + "learning_rate": 4.024067001807305e-06, + "loss": 0.384, + "num_input_tokens_seen": 89032000, + "step": 28290 + }, + { + "epoch": 1.811343703988221, + "grad_norm": 13.807546615600586, + "learning_rate": 4.023624126429743e-06, + "loss": 0.2746, + "num_input_tokens_seen": 89047104, + "step": 28295 + }, + { + "epoch": 1.811663785929198, + "grad_norm": 21.832841873168945, + "learning_rate": 4.02318117497033e-06, + "loss": 0.3309, + "num_input_tokens_seen": 89063552, + "step": 28300 + }, + { + "epoch": 1.8119838678701747, + "grad_norm": 37.03150939941406, + "learning_rate": 4.022738147451183e-06, + "loss": 0.2325, + "num_input_tokens_seen": 89080320, + "step": 28305 + }, + { + "epoch": 1.8123039498111515, + "grad_norm": 12.773541450500488, + "learning_rate": 4.022295043894424e-06, + "loss": 0.453, + "num_input_tokens_seen": 89098112, + "step": 28310 + }, + { + "epoch": 1.8126240317521285, + "grad_norm": 16.973220825195312, + "learning_rate": 4.02185186432218e-06, + "loss": 0.263, + "num_input_tokens_seen": 89113088, + "step": 28315 + }, + { + "epoch": 1.8129441136931055, + "grad_norm": 11.53625202178955, + "learning_rate": 4.021408608756581e-06, + "loss": 0.3333, + "num_input_tokens_seen": 89128192, + "step": 28320 + }, + { + "epoch": 1.8132641956340825, + "grad_norm": 31.077009201049805, + "learning_rate": 4.020965277219761e-06, + "loss": 0.3234, + "num_input_tokens_seen": 89144256, + "step": 28325 + }, + { + "epoch": 1.8135842775750592, + "grad_norm": 40.910621643066406, + "learning_rate": 4.020521869733858e-06, + "loss": 0.3372, + "num_input_tokens_seen": 89159744, + "step": 28330 + }, + { + "epoch": 1.813904359516036, + "grad_norm": 30.39272689819336, + "learning_rate": 4.020078386321011e-06, + "loss": 0.3835, + "num_input_tokens_seen": 89175424, + "step": 28335 + }, + { + "epoch": 1.814224441457013, + "grad_norm": 12.92762279510498, + "learning_rate": 4.019634827003369e-06, + "loss": 0.4467, + "num_input_tokens_seen": 89191552, + "step": 28340 + }, + { + "epoch": 1.81454452339799, + "grad_norm": 13.77588939666748, + "learning_rate": 4.019191191803078e-06, + "loss": 0.3148, + "num_input_tokens_seen": 89208384, + "step": 28345 + }, + { + "epoch": 1.8148646053389668, + "grad_norm": 13.55179500579834, + "learning_rate": 4.0187474807422935e-06, + "loss": 0.3173, + "num_input_tokens_seen": 89223488, + "step": 28350 + }, + { + "epoch": 1.8151846872799435, + "grad_norm": 12.959922790527344, + "learning_rate": 4.0183036938431695e-06, + "loss": 0.2967, + "num_input_tokens_seen": 89238528, + "step": 28355 + }, + { + "epoch": 1.8155047692209205, + "grad_norm": 19.177244186401367, + "learning_rate": 4.017859831127868e-06, + "loss": 0.3506, + "num_input_tokens_seen": 89254592, + "step": 28360 + }, + { + "epoch": 1.8158248511618975, + "grad_norm": 17.2382755279541, + "learning_rate": 4.017415892618552e-06, + "loss": 0.2924, + "num_input_tokens_seen": 89269824, + "step": 28365 + }, + { + "epoch": 1.8161449331028743, + "grad_norm": 7.87879753112793, + "learning_rate": 4.0169718783373915e-06, + "loss": 0.3246, + "num_input_tokens_seen": 89285632, + "step": 28370 + }, + { + "epoch": 1.8164650150438513, + "grad_norm": 23.242881774902344, + "learning_rate": 4.0165277883065565e-06, + "loss": 0.3985, + "num_input_tokens_seen": 89301248, + "step": 28375 + }, + { + "epoch": 1.816785096984828, + "grad_norm": 42.98371124267578, + "learning_rate": 4.016083622548222e-06, + "loss": 0.4508, + "num_input_tokens_seen": 89317504, + "step": 28380 + }, + { + "epoch": 1.817105178925805, + "grad_norm": 32.904144287109375, + "learning_rate": 4.01563938108457e-06, + "loss": 0.457, + "num_input_tokens_seen": 89333632, + "step": 28385 + }, + { + "epoch": 1.817425260866782, + "grad_norm": 18.097871780395508, + "learning_rate": 4.015195063937781e-06, + "loss": 0.2177, + "num_input_tokens_seen": 89349184, + "step": 28390 + }, + { + "epoch": 1.8177453428077588, + "grad_norm": 58.93098449707031, + "learning_rate": 4.014750671130044e-06, + "loss": 0.3487, + "num_input_tokens_seen": 89365248, + "step": 28395 + }, + { + "epoch": 1.8180654247487356, + "grad_norm": 5.067666053771973, + "learning_rate": 4.014306202683548e-06, + "loss": 0.2793, + "num_input_tokens_seen": 89381056, + "step": 28400 + }, + { + "epoch": 1.8183855066897125, + "grad_norm": 45.062095642089844, + "learning_rate": 4.013861658620487e-06, + "loss": 0.2725, + "num_input_tokens_seen": 89396800, + "step": 28405 + }, + { + "epoch": 1.8187055886306895, + "grad_norm": 17.073637008666992, + "learning_rate": 4.013417038963061e-06, + "loss": 0.403, + "num_input_tokens_seen": 89412992, + "step": 28410 + }, + { + "epoch": 1.8190256705716663, + "grad_norm": 25.158449172973633, + "learning_rate": 4.012972343733471e-06, + "loss": 0.2654, + "num_input_tokens_seen": 89429184, + "step": 28415 + }, + { + "epoch": 1.819345752512643, + "grad_norm": 18.825443267822266, + "learning_rate": 4.012527572953923e-06, + "loss": 0.2509, + "num_input_tokens_seen": 89445312, + "step": 28420 + }, + { + "epoch": 1.81966583445362, + "grad_norm": 60.708587646484375, + "learning_rate": 4.012082726646627e-06, + "loss": 0.3218, + "num_input_tokens_seen": 89460160, + "step": 28425 + }, + { + "epoch": 1.819985916394597, + "grad_norm": 15.377226829528809, + "learning_rate": 4.011637804833795e-06, + "loss": 0.333, + "num_input_tokens_seen": 89475200, + "step": 28430 + }, + { + "epoch": 1.820305998335574, + "grad_norm": 15.964266777038574, + "learning_rate": 4.011192807537645e-06, + "loss": 0.4607, + "num_input_tokens_seen": 89490944, + "step": 28435 + }, + { + "epoch": 1.8206260802765508, + "grad_norm": 17.669593811035156, + "learning_rate": 4.010747734780398e-06, + "loss": 0.3291, + "num_input_tokens_seen": 89505600, + "step": 28440 + }, + { + "epoch": 1.8209461622175276, + "grad_norm": 31.187267303466797, + "learning_rate": 4.0103025865842785e-06, + "loss": 0.348, + "num_input_tokens_seen": 89520704, + "step": 28445 + }, + { + "epoch": 1.8212662441585046, + "grad_norm": 12.855758666992188, + "learning_rate": 4.009857362971514e-06, + "loss": 0.3568, + "num_input_tokens_seen": 89537152, + "step": 28450 + }, + { + "epoch": 1.8215863260994816, + "grad_norm": 18.53982925415039, + "learning_rate": 4.009412063964338e-06, + "loss": 0.31, + "num_input_tokens_seen": 89556032, + "step": 28455 + }, + { + "epoch": 1.8219064080404583, + "grad_norm": 11.837178230285645, + "learning_rate": 4.008966689584985e-06, + "loss": 0.2255, + "num_input_tokens_seen": 89570880, + "step": 28460 + }, + { + "epoch": 1.822226489981435, + "grad_norm": 31.08254051208496, + "learning_rate": 4.008521239855697e-06, + "loss": 0.3559, + "num_input_tokens_seen": 89586048, + "step": 28465 + }, + { + "epoch": 1.822546571922412, + "grad_norm": 41.60906982421875, + "learning_rate": 4.0080757147987135e-06, + "loss": 0.277, + "num_input_tokens_seen": 89602240, + "step": 28470 + }, + { + "epoch": 1.822866653863389, + "grad_norm": 37.99907302856445, + "learning_rate": 4.0076301144362865e-06, + "loss": 0.3515, + "num_input_tokens_seen": 89617600, + "step": 28475 + }, + { + "epoch": 1.823186735804366, + "grad_norm": 49.28627395629883, + "learning_rate": 4.007184438790663e-06, + "loss": 0.371, + "num_input_tokens_seen": 89633344, + "step": 28480 + }, + { + "epoch": 1.8235068177453428, + "grad_norm": 20.201560974121094, + "learning_rate": 4.0067386878840995e-06, + "loss": 0.2706, + "num_input_tokens_seen": 89648832, + "step": 28485 + }, + { + "epoch": 1.8238268996863196, + "grad_norm": 13.831769943237305, + "learning_rate": 4.006292861738855e-06, + "loss": 0.3214, + "num_input_tokens_seen": 89664256, + "step": 28490 + }, + { + "epoch": 1.8241469816272966, + "grad_norm": 11.644999504089355, + "learning_rate": 4.00584696037719e-06, + "loss": 0.3532, + "num_input_tokens_seen": 89679488, + "step": 28495 + }, + { + "epoch": 1.8244670635682736, + "grad_norm": 37.967140197753906, + "learning_rate": 4.005400983821373e-06, + "loss": 0.4889, + "num_input_tokens_seen": 89695488, + "step": 28500 + }, + { + "epoch": 1.8247871455092504, + "grad_norm": 35.70683670043945, + "learning_rate": 4.00495493209367e-06, + "loss": 0.3353, + "num_input_tokens_seen": 89710592, + "step": 28505 + }, + { + "epoch": 1.8251072274502271, + "grad_norm": 13.85402774810791, + "learning_rate": 4.0045088052163585e-06, + "loss": 0.2648, + "num_input_tokens_seen": 89725760, + "step": 28510 + }, + { + "epoch": 1.8254273093912041, + "grad_norm": 18.567853927612305, + "learning_rate": 4.004062603211714e-06, + "loss": 0.3814, + "num_input_tokens_seen": 89741696, + "step": 28515 + }, + { + "epoch": 1.8257473913321811, + "grad_norm": 15.55744457244873, + "learning_rate": 4.003616326102015e-06, + "loss": 0.2767, + "num_input_tokens_seen": 89757376, + "step": 28520 + }, + { + "epoch": 1.8260674732731579, + "grad_norm": 21.2593994140625, + "learning_rate": 4.003169973909551e-06, + "loss": 0.4397, + "num_input_tokens_seen": 89774592, + "step": 28525 + }, + { + "epoch": 1.8263875552141349, + "grad_norm": 21.259733200073242, + "learning_rate": 4.002723546656608e-06, + "loss": 0.2701, + "num_input_tokens_seen": 89790784, + "step": 28530 + }, + { + "epoch": 1.8267076371551116, + "grad_norm": 33.85911178588867, + "learning_rate": 4.002277044365478e-06, + "loss": 0.2188, + "num_input_tokens_seen": 89806208, + "step": 28535 + }, + { + "epoch": 1.8270277190960886, + "grad_norm": 12.770758628845215, + "learning_rate": 4.001830467058458e-06, + "loss": 0.2906, + "num_input_tokens_seen": 89822592, + "step": 28540 + }, + { + "epoch": 1.8273478010370656, + "grad_norm": 13.489777565002441, + "learning_rate": 4.001383814757847e-06, + "loss": 0.494, + "num_input_tokens_seen": 89838656, + "step": 28545 + }, + { + "epoch": 1.8276678829780424, + "grad_norm": 23.053556442260742, + "learning_rate": 4.000937087485948e-06, + "loss": 0.2168, + "num_input_tokens_seen": 89854272, + "step": 28550 + }, + { + "epoch": 1.8279879649190192, + "grad_norm": 45.380653381347656, + "learning_rate": 4.00049028526507e-06, + "loss": 0.3996, + "num_input_tokens_seen": 89869504, + "step": 28555 + }, + { + "epoch": 1.8283080468599961, + "grad_norm": 16.76576042175293, + "learning_rate": 4.000043408117523e-06, + "loss": 0.3058, + "num_input_tokens_seen": 89884928, + "step": 28560 + }, + { + "epoch": 1.8286281288009731, + "grad_norm": 27.784191131591797, + "learning_rate": 3.999596456065621e-06, + "loss": 0.3295, + "num_input_tokens_seen": 89901056, + "step": 28565 + }, + { + "epoch": 1.82894821074195, + "grad_norm": 13.905599594116211, + "learning_rate": 3.999149429131683e-06, + "loss": 0.371, + "num_input_tokens_seen": 89917952, + "step": 28570 + }, + { + "epoch": 1.8292682926829267, + "grad_norm": 13.120275497436523, + "learning_rate": 3.998702327338031e-06, + "loss": 0.3834, + "num_input_tokens_seen": 89932992, + "step": 28575 + }, + { + "epoch": 1.8295883746239037, + "grad_norm": 12.456917762756348, + "learning_rate": 3.998255150706993e-06, + "loss": 0.31, + "num_input_tokens_seen": 89948096, + "step": 28580 + }, + { + "epoch": 1.8299084565648807, + "grad_norm": 17.415735244750977, + "learning_rate": 3.9978078992608955e-06, + "loss": 0.2606, + "num_input_tokens_seen": 89963776, + "step": 28585 + }, + { + "epoch": 1.8302285385058576, + "grad_norm": 43.488800048828125, + "learning_rate": 3.997360573022073e-06, + "loss": 0.3278, + "num_input_tokens_seen": 89978368, + "step": 28590 + }, + { + "epoch": 1.8305486204468344, + "grad_norm": 41.87416076660156, + "learning_rate": 3.996913172012864e-06, + "loss": 0.2976, + "num_input_tokens_seen": 89993792, + "step": 28595 + }, + { + "epoch": 1.8308687023878112, + "grad_norm": 18.32713508605957, + "learning_rate": 3.996465696255607e-06, + "loss": 0.3669, + "num_input_tokens_seen": 90009088, + "step": 28600 + }, + { + "epoch": 1.8311887843287882, + "grad_norm": 12.628074645996094, + "learning_rate": 3.996018145772649e-06, + "loss": 0.3186, + "num_input_tokens_seen": 90024576, + "step": 28605 + }, + { + "epoch": 1.8315088662697652, + "grad_norm": 21.69766616821289, + "learning_rate": 3.9955705205863375e-06, + "loss": 0.385, + "num_input_tokens_seen": 90040064, + "step": 28610 + }, + { + "epoch": 1.831828948210742, + "grad_norm": 15.185138702392578, + "learning_rate": 3.995122820719023e-06, + "loss": 0.3319, + "num_input_tokens_seen": 90056384, + "step": 28615 + }, + { + "epoch": 1.8321490301517187, + "grad_norm": 58.90837478637695, + "learning_rate": 3.994675046193064e-06, + "loss": 0.334, + "num_input_tokens_seen": 90072192, + "step": 28620 + }, + { + "epoch": 1.8324691120926957, + "grad_norm": 60.09852600097656, + "learning_rate": 3.994227197030818e-06, + "loss": 0.3245, + "num_input_tokens_seen": 90087936, + "step": 28625 + }, + { + "epoch": 1.8327891940336727, + "grad_norm": 16.578474044799805, + "learning_rate": 3.993779273254649e-06, + "loss": 0.3607, + "num_input_tokens_seen": 90103616, + "step": 28630 + }, + { + "epoch": 1.8331092759746497, + "grad_norm": 69.27417755126953, + "learning_rate": 3.993331274886923e-06, + "loss": 0.2726, + "num_input_tokens_seen": 90118656, + "step": 28635 + }, + { + "epoch": 1.8334293579156264, + "grad_norm": 36.61022186279297, + "learning_rate": 3.992883201950013e-06, + "loss": 0.2995, + "num_input_tokens_seen": 90136384, + "step": 28640 + }, + { + "epoch": 1.8337494398566032, + "grad_norm": 19.886735916137695, + "learning_rate": 3.992435054466291e-06, + "loss": 0.3335, + "num_input_tokens_seen": 90151424, + "step": 28645 + }, + { + "epoch": 1.8340695217975802, + "grad_norm": 36.22454833984375, + "learning_rate": 3.991986832458138e-06, + "loss": 0.4017, + "num_input_tokens_seen": 90167360, + "step": 28650 + }, + { + "epoch": 1.8343896037385572, + "grad_norm": 30.07575225830078, + "learning_rate": 3.991538535947933e-06, + "loss": 0.4695, + "num_input_tokens_seen": 90183296, + "step": 28655 + }, + { + "epoch": 1.834709685679534, + "grad_norm": 18.708986282348633, + "learning_rate": 3.991090164958062e-06, + "loss": 0.2858, + "num_input_tokens_seen": 90199040, + "step": 28660 + }, + { + "epoch": 1.8350297676205107, + "grad_norm": 66.23686218261719, + "learning_rate": 3.990641719510916e-06, + "loss": 0.4025, + "num_input_tokens_seen": 90214656, + "step": 28665 + }, + { + "epoch": 1.8353498495614877, + "grad_norm": 11.62290096282959, + "learning_rate": 3.990193199628886e-06, + "loss": 0.3675, + "num_input_tokens_seen": 90230336, + "step": 28670 + }, + { + "epoch": 1.8356699315024647, + "grad_norm": 12.562758445739746, + "learning_rate": 3.9897446053343705e-06, + "loss": 0.2516, + "num_input_tokens_seen": 90245248, + "step": 28675 + }, + { + "epoch": 1.8359900134434415, + "grad_norm": 18.54178237915039, + "learning_rate": 3.989295936649769e-06, + "loss": 0.3023, + "num_input_tokens_seen": 90260608, + "step": 28680 + }, + { + "epoch": 1.8363100953844183, + "grad_norm": 14.94848346710205, + "learning_rate": 3.988847193597486e-06, + "loss": 0.3535, + "num_input_tokens_seen": 90276672, + "step": 28685 + }, + { + "epoch": 1.8366301773253952, + "grad_norm": 31.02399253845215, + "learning_rate": 3.988398376199929e-06, + "loss": 0.5439, + "num_input_tokens_seen": 90291840, + "step": 28690 + }, + { + "epoch": 1.8369502592663722, + "grad_norm": 22.078397750854492, + "learning_rate": 3.9879494844795095e-06, + "loss": 0.2631, + "num_input_tokens_seen": 90308352, + "step": 28695 + }, + { + "epoch": 1.8372703412073492, + "grad_norm": 4.443678855895996, + "learning_rate": 3.987500518458644e-06, + "loss": 0.2444, + "num_input_tokens_seen": 90323840, + "step": 28700 + }, + { + "epoch": 1.837590423148326, + "grad_norm": 27.736433029174805, + "learning_rate": 3.98705147815975e-06, + "loss": 0.3744, + "num_input_tokens_seen": 90339968, + "step": 28705 + }, + { + "epoch": 1.8379105050893028, + "grad_norm": 9.993854522705078, + "learning_rate": 3.986602363605251e-06, + "loss": 0.359, + "num_input_tokens_seen": 90355328, + "step": 28710 + }, + { + "epoch": 1.8382305870302798, + "grad_norm": 12.782987594604492, + "learning_rate": 3.986153174817574e-06, + "loss": 0.2048, + "num_input_tokens_seen": 90370240, + "step": 28715 + }, + { + "epoch": 1.8385506689712567, + "grad_norm": 10.760567665100098, + "learning_rate": 3.985703911819149e-06, + "loss": 0.3128, + "num_input_tokens_seen": 90389696, + "step": 28720 + }, + { + "epoch": 1.8388707509122335, + "grad_norm": 8.431180000305176, + "learning_rate": 3.985254574632408e-06, + "loss": 0.3714, + "num_input_tokens_seen": 90405184, + "step": 28725 + }, + { + "epoch": 1.8391908328532103, + "grad_norm": 9.162860870361328, + "learning_rate": 3.984805163279791e-06, + "loss": 0.3571, + "num_input_tokens_seen": 90422528, + "step": 28730 + }, + { + "epoch": 1.8395109147941873, + "grad_norm": 12.956842422485352, + "learning_rate": 3.984355677783738e-06, + "loss": 0.2416, + "num_input_tokens_seen": 90439552, + "step": 28735 + }, + { + "epoch": 1.8398309967351643, + "grad_norm": 50.86716842651367, + "learning_rate": 3.983906118166694e-06, + "loss": 0.4798, + "num_input_tokens_seen": 90454336, + "step": 28740 + }, + { + "epoch": 1.8401510786761412, + "grad_norm": 19.431400299072266, + "learning_rate": 3.983456484451108e-06, + "loss": 0.2643, + "num_input_tokens_seen": 90470336, + "step": 28745 + }, + { + "epoch": 1.840471160617118, + "grad_norm": 14.62239933013916, + "learning_rate": 3.983006776659432e-06, + "loss": 0.3677, + "num_input_tokens_seen": 90486144, + "step": 28750 + }, + { + "epoch": 1.8407912425580948, + "grad_norm": 14.016789436340332, + "learning_rate": 3.982556994814122e-06, + "loss": 0.3843, + "num_input_tokens_seen": 90501504, + "step": 28755 + }, + { + "epoch": 1.8411113244990718, + "grad_norm": 22.032974243164062, + "learning_rate": 3.982107138937638e-06, + "loss": 0.366, + "num_input_tokens_seen": 90517632, + "step": 28760 + }, + { + "epoch": 1.8414314064400488, + "grad_norm": 54.81047439575195, + "learning_rate": 3.9816572090524445e-06, + "loss": 0.4233, + "num_input_tokens_seen": 90533440, + "step": 28765 + }, + { + "epoch": 1.8417514883810255, + "grad_norm": 35.19789123535156, + "learning_rate": 3.981207205181006e-06, + "loss": 0.4106, + "num_input_tokens_seen": 90547840, + "step": 28770 + }, + { + "epoch": 1.8420715703220023, + "grad_norm": 41.02235794067383, + "learning_rate": 3.980757127345796e-06, + "loss": 0.3703, + "num_input_tokens_seen": 90562560, + "step": 28775 + }, + { + "epoch": 1.8423916522629793, + "grad_norm": 25.0679988861084, + "learning_rate": 3.980306975569288e-06, + "loss": 0.2585, + "num_input_tokens_seen": 90577664, + "step": 28780 + }, + { + "epoch": 1.8427117342039563, + "grad_norm": 71.28292083740234, + "learning_rate": 3.9798567498739605e-06, + "loss": 0.3285, + "num_input_tokens_seen": 90593024, + "step": 28785 + }, + { + "epoch": 1.843031816144933, + "grad_norm": 9.616785049438477, + "learning_rate": 3.979406450282295e-06, + "loss": 0.3128, + "num_input_tokens_seen": 90607616, + "step": 28790 + }, + { + "epoch": 1.84335189808591, + "grad_norm": 11.211156845092773, + "learning_rate": 3.978956076816778e-06, + "loss": 0.2937, + "num_input_tokens_seen": 90622528, + "step": 28795 + }, + { + "epoch": 1.8436719800268868, + "grad_norm": 38.873435974121094, + "learning_rate": 3.978505629499897e-06, + "loss": 0.5434, + "num_input_tokens_seen": 90638272, + "step": 28800 + }, + { + "epoch": 1.8439920619678638, + "grad_norm": 16.6673583984375, + "learning_rate": 3.9780551083541465e-06, + "loss": 0.2492, + "num_input_tokens_seen": 90654080, + "step": 28805 + }, + { + "epoch": 1.8443121439088408, + "grad_norm": 19.177444458007812, + "learning_rate": 3.9776045134020234e-06, + "loss": 0.3155, + "num_input_tokens_seen": 90668992, + "step": 28810 + }, + { + "epoch": 1.8446322258498176, + "grad_norm": 21.22713279724121, + "learning_rate": 3.977153844666026e-06, + "loss": 0.3145, + "num_input_tokens_seen": 90684736, + "step": 28815 + }, + { + "epoch": 1.8449523077907943, + "grad_norm": 37.32114791870117, + "learning_rate": 3.976703102168661e-06, + "loss": 0.3716, + "num_input_tokens_seen": 90700416, + "step": 28820 + }, + { + "epoch": 1.8452723897317713, + "grad_norm": 18.37027359008789, + "learning_rate": 3.976252285932435e-06, + "loss": 0.3012, + "num_input_tokens_seen": 90715648, + "step": 28825 + }, + { + "epoch": 1.8455924716727483, + "grad_norm": 36.66377258300781, + "learning_rate": 3.975801395979859e-06, + "loss": 0.3135, + "num_input_tokens_seen": 90730432, + "step": 28830 + }, + { + "epoch": 1.845912553613725, + "grad_norm": 27.66805648803711, + "learning_rate": 3.975350432333449e-06, + "loss": 0.4524, + "num_input_tokens_seen": 90746240, + "step": 28835 + }, + { + "epoch": 1.8462326355547019, + "grad_norm": 54.457847595214844, + "learning_rate": 3.974899395015722e-06, + "loss": 0.301, + "num_input_tokens_seen": 90760768, + "step": 28840 + }, + { + "epoch": 1.8465527174956788, + "grad_norm": 15.923336029052734, + "learning_rate": 3.974448284049202e-06, + "loss": 0.3463, + "num_input_tokens_seen": 90776896, + "step": 28845 + }, + { + "epoch": 1.8468727994366558, + "grad_norm": 22.233701705932617, + "learning_rate": 3.973997099456416e-06, + "loss": 0.2872, + "num_input_tokens_seen": 90792512, + "step": 28850 + }, + { + "epoch": 1.8471928813776328, + "grad_norm": 35.170692443847656, + "learning_rate": 3.973545841259892e-06, + "loss": 0.272, + "num_input_tokens_seen": 90808960, + "step": 28855 + }, + { + "epoch": 1.8475129633186096, + "grad_norm": 15.061385154724121, + "learning_rate": 3.973094509482164e-06, + "loss": 0.2062, + "num_input_tokens_seen": 90823744, + "step": 28860 + }, + { + "epoch": 1.8478330452595864, + "grad_norm": 13.476611137390137, + "learning_rate": 3.972643104145769e-06, + "loss": 0.3953, + "num_input_tokens_seen": 90840576, + "step": 28865 + }, + { + "epoch": 1.8481531272005634, + "grad_norm": 13.706339836120605, + "learning_rate": 3.972191625273248e-06, + "loss": 0.2605, + "num_input_tokens_seen": 90855872, + "step": 28870 + }, + { + "epoch": 1.8484732091415403, + "grad_norm": 35.71235656738281, + "learning_rate": 3.971740072887146e-06, + "loss": 0.3846, + "num_input_tokens_seen": 90871040, + "step": 28875 + }, + { + "epoch": 1.848793291082517, + "grad_norm": 45.633705139160156, + "learning_rate": 3.971288447010011e-06, + "loss": 0.239, + "num_input_tokens_seen": 90887360, + "step": 28880 + }, + { + "epoch": 1.8491133730234939, + "grad_norm": 31.753225326538086, + "learning_rate": 3.970836747664394e-06, + "loss": 0.3381, + "num_input_tokens_seen": 90902464, + "step": 28885 + }, + { + "epoch": 1.8494334549644709, + "grad_norm": 17.96687889099121, + "learning_rate": 3.970384974872851e-06, + "loss": 0.33, + "num_input_tokens_seen": 90917056, + "step": 28890 + }, + { + "epoch": 1.8497535369054479, + "grad_norm": 42.486576080322266, + "learning_rate": 3.969933128657942e-06, + "loss": 0.4705, + "num_input_tokens_seen": 90932160, + "step": 28895 + }, + { + "epoch": 1.8500736188464248, + "grad_norm": 30.307218551635742, + "learning_rate": 3.9694812090422294e-06, + "loss": 0.3002, + "num_input_tokens_seen": 90947328, + "step": 28900 + }, + { + "epoch": 1.8503937007874016, + "grad_norm": 44.39156723022461, + "learning_rate": 3.969029216048279e-06, + "loss": 0.3778, + "num_input_tokens_seen": 90962880, + "step": 28905 + }, + { + "epoch": 1.8507137827283784, + "grad_norm": 20.6695499420166, + "learning_rate": 3.968577149698661e-06, + "loss": 0.313, + "num_input_tokens_seen": 90978944, + "step": 28910 + }, + { + "epoch": 1.8510338646693554, + "grad_norm": 47.58138656616211, + "learning_rate": 3.9681250100159505e-06, + "loss": 0.3712, + "num_input_tokens_seen": 90994368, + "step": 28915 + }, + { + "epoch": 1.8513539466103324, + "grad_norm": 24.702781677246094, + "learning_rate": 3.967672797022724e-06, + "loss": 0.4533, + "num_input_tokens_seen": 91011072, + "step": 28920 + }, + { + "epoch": 1.8516740285513091, + "grad_norm": 23.113719940185547, + "learning_rate": 3.967220510741562e-06, + "loss": 0.2506, + "num_input_tokens_seen": 91026688, + "step": 28925 + }, + { + "epoch": 1.851994110492286, + "grad_norm": 25.764646530151367, + "learning_rate": 3.966768151195051e-06, + "loss": 0.3414, + "num_input_tokens_seen": 91042176, + "step": 28930 + }, + { + "epoch": 1.852314192433263, + "grad_norm": 14.516716003417969, + "learning_rate": 3.966315718405779e-06, + "loss": 0.2907, + "num_input_tokens_seen": 91057600, + "step": 28935 + }, + { + "epoch": 1.8526342743742399, + "grad_norm": 14.688582420349121, + "learning_rate": 3.965863212396337e-06, + "loss": 0.3664, + "num_input_tokens_seen": 91073152, + "step": 28940 + }, + { + "epoch": 1.8529543563152167, + "grad_norm": 29.789827346801758, + "learning_rate": 3.965410633189321e-06, + "loss": 0.3997, + "num_input_tokens_seen": 91088896, + "step": 28945 + }, + { + "epoch": 1.8532744382561934, + "grad_norm": 24.808185577392578, + "learning_rate": 3.964957980807332e-06, + "loss": 0.4065, + "num_input_tokens_seen": 91104256, + "step": 28950 + }, + { + "epoch": 1.8535945201971704, + "grad_norm": 34.87112808227539, + "learning_rate": 3.9645052552729725e-06, + "loss": 0.3937, + "num_input_tokens_seen": 91119488, + "step": 28955 + }, + { + "epoch": 1.8539146021381474, + "grad_norm": 16.967491149902344, + "learning_rate": 3.964052456608848e-06, + "loss": 0.3684, + "num_input_tokens_seen": 91134848, + "step": 28960 + }, + { + "epoch": 1.8542346840791244, + "grad_norm": 16.952314376831055, + "learning_rate": 3.9635995848375706e-06, + "loss": 0.2936, + "num_input_tokens_seen": 91151488, + "step": 28965 + }, + { + "epoch": 1.8545547660201012, + "grad_norm": 26.562171936035156, + "learning_rate": 3.963146639981753e-06, + "loss": 0.3816, + "num_input_tokens_seen": 91166912, + "step": 28970 + }, + { + "epoch": 1.854874847961078, + "grad_norm": 14.705999374389648, + "learning_rate": 3.962693622064013e-06, + "loss": 0.3236, + "num_input_tokens_seen": 91183360, + "step": 28975 + }, + { + "epoch": 1.855194929902055, + "grad_norm": 48.26199722290039, + "learning_rate": 3.962240531106973e-06, + "loss": 0.4398, + "num_input_tokens_seen": 91198592, + "step": 28980 + }, + { + "epoch": 1.855515011843032, + "grad_norm": 9.569680213928223, + "learning_rate": 3.961787367133258e-06, + "loss": 0.2922, + "num_input_tokens_seen": 91213696, + "step": 28985 + }, + { + "epoch": 1.8558350937840087, + "grad_norm": 15.099367141723633, + "learning_rate": 3.9613341301654954e-06, + "loss": 0.2227, + "num_input_tokens_seen": 91228672, + "step": 28990 + }, + { + "epoch": 1.8561551757249855, + "grad_norm": 38.288387298583984, + "learning_rate": 3.960880820226318e-06, + "loss": 0.3278, + "num_input_tokens_seen": 91244160, + "step": 28995 + }, + { + "epoch": 1.8564752576659624, + "grad_norm": 44.33543014526367, + "learning_rate": 3.960427437338362e-06, + "loss": 0.2866, + "num_input_tokens_seen": 91259904, + "step": 29000 + }, + { + "epoch": 1.8567953396069394, + "grad_norm": 13.27837085723877, + "learning_rate": 3.9599739815242665e-06, + "loss": 0.2409, + "num_input_tokens_seen": 91274816, + "step": 29005 + }, + { + "epoch": 1.8571154215479164, + "grad_norm": 35.33314514160156, + "learning_rate": 3.959520452806675e-06, + "loss": 0.3317, + "num_input_tokens_seen": 91290624, + "step": 29010 + }, + { + "epoch": 1.8574355034888932, + "grad_norm": 11.981253623962402, + "learning_rate": 3.9590668512082355e-06, + "loss": 0.2801, + "num_input_tokens_seen": 91307264, + "step": 29015 + }, + { + "epoch": 1.85775558542987, + "grad_norm": 29.393232345581055, + "learning_rate": 3.958613176751597e-06, + "loss": 0.3701, + "num_input_tokens_seen": 91322112, + "step": 29020 + }, + { + "epoch": 1.858075667370847, + "grad_norm": 16.0595645904541, + "learning_rate": 3.9581594294594135e-06, + "loss": 0.3043, + "num_input_tokens_seen": 91338112, + "step": 29025 + }, + { + "epoch": 1.858395749311824, + "grad_norm": 46.18486404418945, + "learning_rate": 3.957705609354345e-06, + "loss": 0.4275, + "num_input_tokens_seen": 91353792, + "step": 29030 + }, + { + "epoch": 1.8587158312528007, + "grad_norm": 26.759286880493164, + "learning_rate": 3.9572517164590495e-06, + "loss": 0.5796, + "num_input_tokens_seen": 91369216, + "step": 29035 + }, + { + "epoch": 1.8590359131937775, + "grad_norm": 49.33650207519531, + "learning_rate": 3.956797750796195e-06, + "loss": 0.3269, + "num_input_tokens_seen": 91384000, + "step": 29040 + }, + { + "epoch": 1.8593559951347545, + "grad_norm": 6.868922710418701, + "learning_rate": 3.956343712388448e-06, + "loss": 0.3778, + "num_input_tokens_seen": 91400064, + "step": 29045 + }, + { + "epoch": 1.8596760770757315, + "grad_norm": 19.381458282470703, + "learning_rate": 3.955889601258483e-06, + "loss": 0.3251, + "num_input_tokens_seen": 91415616, + "step": 29050 + }, + { + "epoch": 1.8599961590167082, + "grad_norm": 19.5499210357666, + "learning_rate": 3.955435417428973e-06, + "loss": 0.3249, + "num_input_tokens_seen": 91432192, + "step": 29055 + }, + { + "epoch": 1.8603162409576852, + "grad_norm": 37.81060028076172, + "learning_rate": 3.954981160922601e-06, + "loss": 0.483, + "num_input_tokens_seen": 91447808, + "step": 29060 + }, + { + "epoch": 1.860636322898662, + "grad_norm": 42.51133346557617, + "learning_rate": 3.954526831762048e-06, + "loss": 0.2738, + "num_input_tokens_seen": 91462912, + "step": 29065 + }, + { + "epoch": 1.860956404839639, + "grad_norm": 33.341217041015625, + "learning_rate": 3.954072429970002e-06, + "loss": 0.3437, + "num_input_tokens_seen": 91478656, + "step": 29070 + }, + { + "epoch": 1.861276486780616, + "grad_norm": 15.93057632446289, + "learning_rate": 3.953617955569151e-06, + "loss": 0.2974, + "num_input_tokens_seen": 91493824, + "step": 29075 + }, + { + "epoch": 1.8615965687215927, + "grad_norm": 65.57926177978516, + "learning_rate": 3.953163408582193e-06, + "loss": 0.5277, + "num_input_tokens_seen": 91509184, + "step": 29080 + }, + { + "epoch": 1.8619166506625695, + "grad_norm": 92.5763931274414, + "learning_rate": 3.952708789031822e-06, + "loss": 0.2569, + "num_input_tokens_seen": 91524992, + "step": 29085 + }, + { + "epoch": 1.8622367326035465, + "grad_norm": 14.650162696838379, + "learning_rate": 3.952254096940742e-06, + "loss": 0.3735, + "num_input_tokens_seen": 91540800, + "step": 29090 + }, + { + "epoch": 1.8625568145445235, + "grad_norm": 16.783449172973633, + "learning_rate": 3.951799332331656e-06, + "loss": 0.2988, + "num_input_tokens_seen": 91555904, + "step": 29095 + }, + { + "epoch": 1.8628768964855003, + "grad_norm": 22.363670349121094, + "learning_rate": 3.951344495227275e-06, + "loss": 0.3661, + "num_input_tokens_seen": 91570944, + "step": 29100 + }, + { + "epoch": 1.863196978426477, + "grad_norm": 12.960387229919434, + "learning_rate": 3.950889585650308e-06, + "loss": 0.3106, + "num_input_tokens_seen": 91586432, + "step": 29105 + }, + { + "epoch": 1.863517060367454, + "grad_norm": 11.946305274963379, + "learning_rate": 3.950434603623474e-06, + "loss": 0.3557, + "num_input_tokens_seen": 91601216, + "step": 29110 + }, + { + "epoch": 1.863837142308431, + "grad_norm": 8.680253028869629, + "learning_rate": 3.949979549169489e-06, + "loss": 0.3512, + "num_input_tokens_seen": 91617792, + "step": 29115 + }, + { + "epoch": 1.864157224249408, + "grad_norm": 43.44010543823242, + "learning_rate": 3.9495244223110795e-06, + "loss": 0.2518, + "num_input_tokens_seen": 91633600, + "step": 29120 + }, + { + "epoch": 1.8644773061903848, + "grad_norm": 16.352243423461914, + "learning_rate": 3.94906922307097e-06, + "loss": 0.2783, + "num_input_tokens_seen": 91649408, + "step": 29125 + }, + { + "epoch": 1.8647973881313615, + "grad_norm": 20.241741180419922, + "learning_rate": 3.948613951471892e-06, + "loss": 0.2948, + "num_input_tokens_seen": 91665280, + "step": 29130 + }, + { + "epoch": 1.8651174700723385, + "grad_norm": 21.04620933532715, + "learning_rate": 3.948158607536579e-06, + "loss": 0.2225, + "num_input_tokens_seen": 91681152, + "step": 29135 + }, + { + "epoch": 1.8654375520133155, + "grad_norm": 19.05590057373047, + "learning_rate": 3.947703191287768e-06, + "loss": 0.3353, + "num_input_tokens_seen": 91696768, + "step": 29140 + }, + { + "epoch": 1.8657576339542923, + "grad_norm": 23.882911682128906, + "learning_rate": 3.9472477027482e-06, + "loss": 0.4083, + "num_input_tokens_seen": 91713344, + "step": 29145 + }, + { + "epoch": 1.866077715895269, + "grad_norm": 32.692718505859375, + "learning_rate": 3.946792141940621e-06, + "loss": 0.3764, + "num_input_tokens_seen": 91728768, + "step": 29150 + }, + { + "epoch": 1.866397797836246, + "grad_norm": 9.343303680419922, + "learning_rate": 3.946336508887778e-06, + "loss": 0.1963, + "num_input_tokens_seen": 91744320, + "step": 29155 + }, + { + "epoch": 1.866717879777223, + "grad_norm": 32.22968292236328, + "learning_rate": 3.945880803612423e-06, + "loss": 0.4213, + "num_input_tokens_seen": 91759616, + "step": 29160 + }, + { + "epoch": 1.8670379617182, + "grad_norm": 21.59407615661621, + "learning_rate": 3.945425026137313e-06, + "loss": 0.3482, + "num_input_tokens_seen": 91775040, + "step": 29165 + }, + { + "epoch": 1.8673580436591768, + "grad_norm": 5.827695369720459, + "learning_rate": 3.9449691764852045e-06, + "loss": 0.3791, + "num_input_tokens_seen": 91789568, + "step": 29170 + }, + { + "epoch": 1.8676781256001536, + "grad_norm": 20.636701583862305, + "learning_rate": 3.944513254678863e-06, + "loss": 0.3363, + "num_input_tokens_seen": 91807232, + "step": 29175 + }, + { + "epoch": 1.8679982075411306, + "grad_norm": 18.30698585510254, + "learning_rate": 3.944057260741054e-06, + "loss": 0.3427, + "num_input_tokens_seen": 91822848, + "step": 29180 + }, + { + "epoch": 1.8683182894821075, + "grad_norm": 19.487810134887695, + "learning_rate": 3.943601194694547e-06, + "loss": 0.3246, + "num_input_tokens_seen": 91838080, + "step": 29185 + }, + { + "epoch": 1.8686383714230843, + "grad_norm": 62.39342498779297, + "learning_rate": 3.943145056562116e-06, + "loss": 0.3411, + "num_input_tokens_seen": 91852672, + "step": 29190 + }, + { + "epoch": 1.868958453364061, + "grad_norm": 8.430472373962402, + "learning_rate": 3.942688846366537e-06, + "loss": 0.2123, + "num_input_tokens_seen": 91867520, + "step": 29195 + }, + { + "epoch": 1.869278535305038, + "grad_norm": 25.58384132385254, + "learning_rate": 3.942232564130592e-06, + "loss": 0.411, + "num_input_tokens_seen": 91882368, + "step": 29200 + }, + { + "epoch": 1.869598617246015, + "grad_norm": 15.405624389648438, + "learning_rate": 3.941776209877066e-06, + "loss": 0.2532, + "num_input_tokens_seen": 91897216, + "step": 29205 + }, + { + "epoch": 1.8699186991869918, + "grad_norm": 10.146136283874512, + "learning_rate": 3.941319783628745e-06, + "loss": 0.2938, + "num_input_tokens_seen": 91913088, + "step": 29210 + }, + { + "epoch": 1.8702387811279686, + "grad_norm": 29.276355743408203, + "learning_rate": 3.940863285408423e-06, + "loss": 0.3412, + "num_input_tokens_seen": 91929152, + "step": 29215 + }, + { + "epoch": 1.8705588630689456, + "grad_norm": 8.702577590942383, + "learning_rate": 3.940406715238892e-06, + "loss": 0.2966, + "num_input_tokens_seen": 91944256, + "step": 29220 + }, + { + "epoch": 1.8708789450099226, + "grad_norm": 49.134544372558594, + "learning_rate": 3.939950073142954e-06, + "loss": 0.2381, + "num_input_tokens_seen": 91961216, + "step": 29225 + }, + { + "epoch": 1.8711990269508996, + "grad_norm": 29.56614112854004, + "learning_rate": 3.939493359143408e-06, + "loss": 0.4507, + "num_input_tokens_seen": 91976384, + "step": 29230 + }, + { + "epoch": 1.8715191088918763, + "grad_norm": 9.751029014587402, + "learning_rate": 3.939036573263063e-06, + "loss": 0.2847, + "num_input_tokens_seen": 91992512, + "step": 29235 + }, + { + "epoch": 1.871839190832853, + "grad_norm": 40.614749908447266, + "learning_rate": 3.938579715524726e-06, + "loss": 0.2979, + "num_input_tokens_seen": 92007360, + "step": 29240 + }, + { + "epoch": 1.87215927277383, + "grad_norm": 31.275676727294922, + "learning_rate": 3.9381227859512125e-06, + "loss": 0.2641, + "num_input_tokens_seen": 92023040, + "step": 29245 + }, + { + "epoch": 1.872479354714807, + "grad_norm": 13.967216491699219, + "learning_rate": 3.9376657845653386e-06, + "loss": 0.3321, + "num_input_tokens_seen": 92038912, + "step": 29250 + }, + { + "epoch": 1.8727994366557839, + "grad_norm": 24.062429428100586, + "learning_rate": 3.9372087113899224e-06, + "loss": 0.2838, + "num_input_tokens_seen": 92054080, + "step": 29255 + }, + { + "epoch": 1.8731195185967606, + "grad_norm": 19.111379623413086, + "learning_rate": 3.93675156644779e-06, + "loss": 0.2504, + "num_input_tokens_seen": 92069312, + "step": 29260 + }, + { + "epoch": 1.8734396005377376, + "grad_norm": 10.02426815032959, + "learning_rate": 3.936294349761768e-06, + "loss": 0.3596, + "num_input_tokens_seen": 92086528, + "step": 29265 + }, + { + "epoch": 1.8737596824787146, + "grad_norm": 46.70328903198242, + "learning_rate": 3.935837061354687e-06, + "loss": 0.4362, + "num_input_tokens_seen": 92102336, + "step": 29270 + }, + { + "epoch": 1.8740797644196916, + "grad_norm": 23.468820571899414, + "learning_rate": 3.935379701249383e-06, + "loss": 0.5054, + "num_input_tokens_seen": 92121344, + "step": 29275 + }, + { + "epoch": 1.8743998463606684, + "grad_norm": 23.966312408447266, + "learning_rate": 3.9349222694686926e-06, + "loss": 0.2932, + "num_input_tokens_seen": 92136896, + "step": 29280 + }, + { + "epoch": 1.8747199283016451, + "grad_norm": 15.729668617248535, + "learning_rate": 3.93446476603546e-06, + "loss": 0.3226, + "num_input_tokens_seen": 92151168, + "step": 29285 + }, + { + "epoch": 1.8750400102426221, + "grad_norm": 5.310672283172607, + "learning_rate": 3.934007190972527e-06, + "loss": 0.2129, + "num_input_tokens_seen": 92166848, + "step": 29290 + }, + { + "epoch": 1.8753600921835991, + "grad_norm": 12.005672454833984, + "learning_rate": 3.933549544302745e-06, + "loss": 0.3391, + "num_input_tokens_seen": 92182400, + "step": 29295 + }, + { + "epoch": 1.8756801741245759, + "grad_norm": 31.59393882751465, + "learning_rate": 3.9330918260489656e-06, + "loss": 0.3735, + "num_input_tokens_seen": 92197824, + "step": 29300 + }, + { + "epoch": 1.8760002560655527, + "grad_norm": 23.457990646362305, + "learning_rate": 3.932634036234045e-06, + "loss": 0.3093, + "num_input_tokens_seen": 92212992, + "step": 29305 + }, + { + "epoch": 1.8763203380065296, + "grad_norm": 17.77989387512207, + "learning_rate": 3.932176174880843e-06, + "loss": 0.3101, + "num_input_tokens_seen": 92228864, + "step": 29310 + }, + { + "epoch": 1.8766404199475066, + "grad_norm": 44.58913040161133, + "learning_rate": 3.931718242012223e-06, + "loss": 0.3471, + "num_input_tokens_seen": 92244800, + "step": 29315 + }, + { + "epoch": 1.8769605018884834, + "grad_norm": 22.149150848388672, + "learning_rate": 3.931260237651052e-06, + "loss": 0.2309, + "num_input_tokens_seen": 92259648, + "step": 29320 + }, + { + "epoch": 1.8772805838294604, + "grad_norm": 71.32801818847656, + "learning_rate": 3.930802161820199e-06, + "loss": 0.332, + "num_input_tokens_seen": 92275136, + "step": 29325 + }, + { + "epoch": 1.8776006657704372, + "grad_norm": 47.53898620605469, + "learning_rate": 3.93034401454254e-06, + "loss": 0.4056, + "num_input_tokens_seen": 92290752, + "step": 29330 + }, + { + "epoch": 1.8779207477114142, + "grad_norm": 36.481529235839844, + "learning_rate": 3.92988579584095e-06, + "loss": 0.3318, + "num_input_tokens_seen": 92305728, + "step": 29335 + }, + { + "epoch": 1.8782408296523911, + "grad_norm": 7.916237831115723, + "learning_rate": 3.929427505738312e-06, + "loss": 0.338, + "num_input_tokens_seen": 92321088, + "step": 29340 + }, + { + "epoch": 1.878560911593368, + "grad_norm": 14.059556007385254, + "learning_rate": 3.92896914425751e-06, + "loss": 0.359, + "num_input_tokens_seen": 92336512, + "step": 29345 + }, + { + "epoch": 1.8788809935343447, + "grad_norm": 28.55406951904297, + "learning_rate": 3.928510711421431e-06, + "loss": 0.437, + "num_input_tokens_seen": 92352128, + "step": 29350 + }, + { + "epoch": 1.8792010754753217, + "grad_norm": 23.137226104736328, + "learning_rate": 3.9280522072529694e-06, + "loss": 0.3809, + "num_input_tokens_seen": 92367616, + "step": 29355 + }, + { + "epoch": 1.8795211574162987, + "grad_norm": 41.7763557434082, + "learning_rate": 3.927593631775019e-06, + "loss": 0.3917, + "num_input_tokens_seen": 92383104, + "step": 29360 + }, + { + "epoch": 1.8798412393572754, + "grad_norm": 34.17869186401367, + "learning_rate": 3.927134985010478e-06, + "loss": 0.3139, + "num_input_tokens_seen": 92399552, + "step": 29365 + }, + { + "epoch": 1.8801613212982522, + "grad_norm": 17.134414672851562, + "learning_rate": 3.92667626698225e-06, + "loss": 0.292, + "num_input_tokens_seen": 92414272, + "step": 29370 + }, + { + "epoch": 1.8804814032392292, + "grad_norm": 15.340600967407227, + "learning_rate": 3.92621747771324e-06, + "loss": 0.4266, + "num_input_tokens_seen": 92430464, + "step": 29375 + }, + { + "epoch": 1.8808014851802062, + "grad_norm": 7.173458099365234, + "learning_rate": 3.925758617226358e-06, + "loss": 0.2903, + "num_input_tokens_seen": 92445248, + "step": 29380 + }, + { + "epoch": 1.8811215671211832, + "grad_norm": 13.543390274047852, + "learning_rate": 3.925299685544518e-06, + "loss": 0.3035, + "num_input_tokens_seen": 92461376, + "step": 29385 + }, + { + "epoch": 1.88144164906216, + "grad_norm": 20.04826545715332, + "learning_rate": 3.924840682690636e-06, + "loss": 0.2456, + "num_input_tokens_seen": 92476736, + "step": 29390 + }, + { + "epoch": 1.8817617310031367, + "grad_norm": 47.620384216308594, + "learning_rate": 3.924381608687631e-06, + "loss": 0.3066, + "num_input_tokens_seen": 92491392, + "step": 29395 + }, + { + "epoch": 1.8820818129441137, + "grad_norm": 18.37364387512207, + "learning_rate": 3.923922463558428e-06, + "loss": 0.4081, + "num_input_tokens_seen": 92506240, + "step": 29400 + }, + { + "epoch": 1.8824018948850907, + "grad_norm": 17.911943435668945, + "learning_rate": 3.9234632473259535e-06, + "loss": 0.3408, + "num_input_tokens_seen": 92522240, + "step": 29405 + }, + { + "epoch": 1.8827219768260675, + "grad_norm": 35.70653533935547, + "learning_rate": 3.92300396001314e-06, + "loss": 0.3953, + "num_input_tokens_seen": 92538688, + "step": 29410 + }, + { + "epoch": 1.8830420587670442, + "grad_norm": 12.55550479888916, + "learning_rate": 3.922544601642921e-06, + "loss": 0.2775, + "num_input_tokens_seen": 92553664, + "step": 29415 + }, + { + "epoch": 1.8833621407080212, + "grad_norm": 12.551745414733887, + "learning_rate": 3.922085172238234e-06, + "loss": 0.3444, + "num_input_tokens_seen": 92569152, + "step": 29420 + }, + { + "epoch": 1.8836822226489982, + "grad_norm": 39.581626892089844, + "learning_rate": 3.921625671822021e-06, + "loss": 0.3184, + "num_input_tokens_seen": 92584320, + "step": 29425 + }, + { + "epoch": 1.8840023045899752, + "grad_norm": 9.208744049072266, + "learning_rate": 3.9211661004172265e-06, + "loss": 0.3453, + "num_input_tokens_seen": 92600576, + "step": 29430 + }, + { + "epoch": 1.884322386530952, + "grad_norm": 19.816028594970703, + "learning_rate": 3.9207064580468e-06, + "loss": 0.3237, + "num_input_tokens_seen": 92616384, + "step": 29435 + }, + { + "epoch": 1.8846424684719287, + "grad_norm": 16.266124725341797, + "learning_rate": 3.9202467447336935e-06, + "loss": 0.3214, + "num_input_tokens_seen": 92633216, + "step": 29440 + }, + { + "epoch": 1.8849625504129057, + "grad_norm": 23.292312622070312, + "learning_rate": 3.919786960500862e-06, + "loss": 0.4061, + "num_input_tokens_seen": 92650368, + "step": 29445 + }, + { + "epoch": 1.8852826323538827, + "grad_norm": 21.99002456665039, + "learning_rate": 3.919327105371264e-06, + "loss": 0.3836, + "num_input_tokens_seen": 92667520, + "step": 29450 + }, + { + "epoch": 1.8856027142948595, + "grad_norm": 39.6856803894043, + "learning_rate": 3.918867179367865e-06, + "loss": 0.3477, + "num_input_tokens_seen": 92683520, + "step": 29455 + }, + { + "epoch": 1.8859227962358363, + "grad_norm": 12.141905784606934, + "learning_rate": 3.918407182513629e-06, + "loss": 0.3697, + "num_input_tokens_seen": 92700928, + "step": 29460 + }, + { + "epoch": 1.8862428781768132, + "grad_norm": 79.67517852783203, + "learning_rate": 3.917947114831526e-06, + "loss": 0.3807, + "num_input_tokens_seen": 92716672, + "step": 29465 + }, + { + "epoch": 1.8865629601177902, + "grad_norm": 20.970195770263672, + "learning_rate": 3.91748697634453e-06, + "loss": 0.2563, + "num_input_tokens_seen": 92732928, + "step": 29470 + }, + { + "epoch": 1.886883042058767, + "grad_norm": 32.1429328918457, + "learning_rate": 3.917026767075619e-06, + "loss": 0.3888, + "num_input_tokens_seen": 92748224, + "step": 29475 + }, + { + "epoch": 1.8872031239997438, + "grad_norm": 8.232988357543945, + "learning_rate": 3.916566487047772e-06, + "loss": 0.3139, + "num_input_tokens_seen": 92764224, + "step": 29480 + }, + { + "epoch": 1.8875232059407208, + "grad_norm": 40.09192657470703, + "learning_rate": 3.916106136283973e-06, + "loss": 0.3486, + "num_input_tokens_seen": 92779776, + "step": 29485 + }, + { + "epoch": 1.8878432878816978, + "grad_norm": 10.644286155700684, + "learning_rate": 3.915645714807209e-06, + "loss": 0.3538, + "num_input_tokens_seen": 92795072, + "step": 29490 + }, + { + "epoch": 1.8881633698226747, + "grad_norm": 10.119404792785645, + "learning_rate": 3.9151852226404714e-06, + "loss": 0.1942, + "num_input_tokens_seen": 92810432, + "step": 29495 + }, + { + "epoch": 1.8884834517636515, + "grad_norm": 79.31651306152344, + "learning_rate": 3.914724659806757e-06, + "loss": 0.3684, + "num_input_tokens_seen": 92826048, + "step": 29500 + }, + { + "epoch": 1.8888035337046283, + "grad_norm": 25.840091705322266, + "learning_rate": 3.914264026329062e-06, + "loss": 0.3988, + "num_input_tokens_seen": 92842624, + "step": 29505 + }, + { + "epoch": 1.8891236156456053, + "grad_norm": 81.29267883300781, + "learning_rate": 3.9138033222303876e-06, + "loss": 0.4192, + "num_input_tokens_seen": 92859008, + "step": 29510 + }, + { + "epoch": 1.8894436975865823, + "grad_norm": 18.999387741088867, + "learning_rate": 3.913342547533739e-06, + "loss": 0.2912, + "num_input_tokens_seen": 92874560, + "step": 29515 + }, + { + "epoch": 1.889763779527559, + "grad_norm": 54.768646240234375, + "learning_rate": 3.9128817022621255e-06, + "loss": 0.3068, + "num_input_tokens_seen": 92889472, + "step": 29520 + }, + { + "epoch": 1.8900838614685358, + "grad_norm": 20.519433975219727, + "learning_rate": 3.91242078643856e-06, + "loss": 0.2897, + "num_input_tokens_seen": 92904640, + "step": 29525 + }, + { + "epoch": 1.8904039434095128, + "grad_norm": 33.78892135620117, + "learning_rate": 3.911959800086057e-06, + "loss": 0.5306, + "num_input_tokens_seen": 92919296, + "step": 29530 + }, + { + "epoch": 1.8907240253504898, + "grad_norm": 19.686851501464844, + "learning_rate": 3.911498743227636e-06, + "loss": 0.3709, + "num_input_tokens_seen": 92934656, + "step": 29535 + }, + { + "epoch": 1.8910441072914668, + "grad_norm": 51.32371139526367, + "learning_rate": 3.911037615886319e-06, + "loss": 0.3673, + "num_input_tokens_seen": 92950336, + "step": 29540 + }, + { + "epoch": 1.8913641892324435, + "grad_norm": 18.397260665893555, + "learning_rate": 3.910576418085135e-06, + "loss": 0.3713, + "num_input_tokens_seen": 92965696, + "step": 29545 + }, + { + "epoch": 1.8916842711734203, + "grad_norm": 10.494402885437012, + "learning_rate": 3.910115149847112e-06, + "loss": 0.2579, + "num_input_tokens_seen": 92981184, + "step": 29550 + }, + { + "epoch": 1.8920043531143973, + "grad_norm": 22.70030975341797, + "learning_rate": 3.909653811195283e-06, + "loss": 0.2935, + "num_input_tokens_seen": 92997056, + "step": 29555 + }, + { + "epoch": 1.8923244350553743, + "grad_norm": 30.219820022583008, + "learning_rate": 3.909192402152685e-06, + "loss": 0.4026, + "num_input_tokens_seen": 93012992, + "step": 29560 + }, + { + "epoch": 1.892644516996351, + "grad_norm": 11.638113021850586, + "learning_rate": 3.90873092274236e-06, + "loss": 0.3713, + "num_input_tokens_seen": 93030016, + "step": 29565 + }, + { + "epoch": 1.8929645989373278, + "grad_norm": 30.498342514038086, + "learning_rate": 3.908269372987349e-06, + "loss": 0.4132, + "num_input_tokens_seen": 93046592, + "step": 29570 + }, + { + "epoch": 1.8932846808783048, + "grad_norm": 10.540436744689941, + "learning_rate": 3.907807752910701e-06, + "loss": 0.3479, + "num_input_tokens_seen": 93062528, + "step": 29575 + }, + { + "epoch": 1.8936047628192818, + "grad_norm": 32.65711212158203, + "learning_rate": 3.907346062535467e-06, + "loss": 0.3162, + "num_input_tokens_seen": 93078464, + "step": 29580 + }, + { + "epoch": 1.8939248447602586, + "grad_norm": 6.1508097648620605, + "learning_rate": 3.906884301884702e-06, + "loss": 0.2174, + "num_input_tokens_seen": 93092928, + "step": 29585 + }, + { + "epoch": 1.8942449267012356, + "grad_norm": 10.945369720458984, + "learning_rate": 3.906422470981462e-06, + "loss": 0.2567, + "num_input_tokens_seen": 93107648, + "step": 29590 + }, + { + "epoch": 1.8945650086422123, + "grad_norm": 9.898181915283203, + "learning_rate": 3.905960569848811e-06, + "loss": 0.3699, + "num_input_tokens_seen": 93123584, + "step": 29595 + }, + { + "epoch": 1.8948850905831893, + "grad_norm": 27.358169555664062, + "learning_rate": 3.905498598509812e-06, + "loss": 0.3734, + "num_input_tokens_seen": 93139328, + "step": 29600 + }, + { + "epoch": 1.8952051725241663, + "grad_norm": 20.474933624267578, + "learning_rate": 3.905036556987534e-06, + "loss": 0.2258, + "num_input_tokens_seen": 93154304, + "step": 29605 + }, + { + "epoch": 1.895525254465143, + "grad_norm": 13.454569816589355, + "learning_rate": 3.904574445305048e-06, + "loss": 0.3358, + "num_input_tokens_seen": 93169344, + "step": 29610 + }, + { + "epoch": 1.8958453364061199, + "grad_norm": 13.251181602478027, + "learning_rate": 3.904112263485431e-06, + "loss": 0.2756, + "num_input_tokens_seen": 93184704, + "step": 29615 + }, + { + "epoch": 1.8961654183470968, + "grad_norm": 23.368593215942383, + "learning_rate": 3.903650011551762e-06, + "loss": 0.3054, + "num_input_tokens_seen": 93199232, + "step": 29620 + }, + { + "epoch": 1.8964855002880738, + "grad_norm": 18.486364364624023, + "learning_rate": 3.903187689527121e-06, + "loss": 0.2537, + "num_input_tokens_seen": 93215552, + "step": 29625 + }, + { + "epoch": 1.8968055822290506, + "grad_norm": 37.99721145629883, + "learning_rate": 3.902725297434598e-06, + "loss": 0.3309, + "num_input_tokens_seen": 93231232, + "step": 29630 + }, + { + "epoch": 1.8971256641700274, + "grad_norm": 33.505760192871094, + "learning_rate": 3.902262835297278e-06, + "loss": 0.3343, + "num_input_tokens_seen": 93246464, + "step": 29635 + }, + { + "epoch": 1.8974457461110044, + "grad_norm": 22.465791702270508, + "learning_rate": 3.9018003031382565e-06, + "loss": 0.4265, + "num_input_tokens_seen": 93262976, + "step": 29640 + }, + { + "epoch": 1.8977658280519814, + "grad_norm": 34.54280090332031, + "learning_rate": 3.9013377009806306e-06, + "loss": 0.41, + "num_input_tokens_seen": 93278208, + "step": 29645 + }, + { + "epoch": 1.8980859099929583, + "grad_norm": 92.16565704345703, + "learning_rate": 3.900875028847499e-06, + "loss": 0.3079, + "num_input_tokens_seen": 93295744, + "step": 29650 + }, + { + "epoch": 1.898405991933935, + "grad_norm": 27.569040298461914, + "learning_rate": 3.900412286761965e-06, + "loss": 0.2987, + "num_input_tokens_seen": 93310016, + "step": 29655 + }, + { + "epoch": 1.8987260738749119, + "grad_norm": 11.401784896850586, + "learning_rate": 3.899949474747136e-06, + "loss": 0.2464, + "num_input_tokens_seen": 93326016, + "step": 29660 + }, + { + "epoch": 1.8990461558158889, + "grad_norm": 27.257919311523438, + "learning_rate": 3.899486592826122e-06, + "loss": 0.3691, + "num_input_tokens_seen": 93342272, + "step": 29665 + }, + { + "epoch": 1.8993662377568659, + "grad_norm": 22.20341682434082, + "learning_rate": 3.899023641022038e-06, + "loss": 0.2123, + "num_input_tokens_seen": 93357248, + "step": 29670 + }, + { + "epoch": 1.8996863196978426, + "grad_norm": 13.766783714294434, + "learning_rate": 3.898560619358001e-06, + "loss": 0.2229, + "num_input_tokens_seen": 93372480, + "step": 29675 + }, + { + "epoch": 1.9000064016388194, + "grad_norm": 29.87504005432129, + "learning_rate": 3.89809752785713e-06, + "loss": 0.3734, + "num_input_tokens_seen": 93389376, + "step": 29680 + }, + { + "epoch": 1.9003264835797964, + "grad_norm": 59.26504135131836, + "learning_rate": 3.897634366542552e-06, + "loss": 0.2921, + "num_input_tokens_seen": 93405568, + "step": 29685 + }, + { + "epoch": 1.9006465655207734, + "grad_norm": 16.29066276550293, + "learning_rate": 3.897171135437393e-06, + "loss": 0.2887, + "num_input_tokens_seen": 93420224, + "step": 29690 + }, + { + "epoch": 1.9009666474617504, + "grad_norm": 14.609169960021973, + "learning_rate": 3.896707834564785e-06, + "loss": 0.3296, + "num_input_tokens_seen": 93435904, + "step": 29695 + }, + { + "epoch": 1.9012867294027271, + "grad_norm": 63.62397766113281, + "learning_rate": 3.896244463947863e-06, + "loss": 0.3328, + "num_input_tokens_seen": 93450240, + "step": 29700 + }, + { + "epoch": 1.901606811343704, + "grad_norm": 15.487939834594727, + "learning_rate": 3.895781023609766e-06, + "loss": 0.2082, + "num_input_tokens_seen": 93465536, + "step": 29705 + }, + { + "epoch": 1.901926893284681, + "grad_norm": 33.196571350097656, + "learning_rate": 3.8953175135736345e-06, + "loss": 0.3424, + "num_input_tokens_seen": 93480704, + "step": 29710 + }, + { + "epoch": 1.9022469752256579, + "grad_norm": 29.24186134338379, + "learning_rate": 3.894853933862614e-06, + "loss": 0.4787, + "num_input_tokens_seen": 93496128, + "step": 29715 + }, + { + "epoch": 1.9025670571666347, + "grad_norm": 17.64666748046875, + "learning_rate": 3.894390284499855e-06, + "loss": 0.2776, + "num_input_tokens_seen": 93511232, + "step": 29720 + }, + { + "epoch": 1.9028871391076114, + "grad_norm": 17.350887298583984, + "learning_rate": 3.893926565508507e-06, + "loss": 0.4588, + "num_input_tokens_seen": 93526848, + "step": 29725 + }, + { + "epoch": 1.9032072210485884, + "grad_norm": 16.57300567626953, + "learning_rate": 3.893462776911727e-06, + "loss": 0.3266, + "num_input_tokens_seen": 93541504, + "step": 29730 + }, + { + "epoch": 1.9035273029895654, + "grad_norm": 23.138904571533203, + "learning_rate": 3.8929989187326745e-06, + "loss": 0.3062, + "num_input_tokens_seen": 93557056, + "step": 29735 + }, + { + "epoch": 1.9038473849305422, + "grad_norm": 38.111854553222656, + "learning_rate": 3.892534990994512e-06, + "loss": 0.4412, + "num_input_tokens_seen": 93574016, + "step": 29740 + }, + { + "epoch": 1.904167466871519, + "grad_norm": 19.729839324951172, + "learning_rate": 3.892070993720405e-06, + "loss": 0.3652, + "num_input_tokens_seen": 93590208, + "step": 29745 + }, + { + "epoch": 1.904487548812496, + "grad_norm": 19.37326431274414, + "learning_rate": 3.891606926933524e-06, + "loss": 0.3777, + "num_input_tokens_seen": 93606592, + "step": 29750 + }, + { + "epoch": 1.904807630753473, + "grad_norm": 13.211954116821289, + "learning_rate": 3.891142790657041e-06, + "loss": 0.3252, + "num_input_tokens_seen": 93621632, + "step": 29755 + }, + { + "epoch": 1.90512771269445, + "grad_norm": 23.084537506103516, + "learning_rate": 3.890678584914134e-06, + "loss": 0.2774, + "num_input_tokens_seen": 93638912, + "step": 29760 + }, + { + "epoch": 1.9054477946354267, + "grad_norm": 34.55193328857422, + "learning_rate": 3.890214309727983e-06, + "loss": 0.4059, + "num_input_tokens_seen": 93655552, + "step": 29765 + }, + { + "epoch": 1.9057678765764035, + "grad_norm": 15.052336692810059, + "learning_rate": 3.8897499651217695e-06, + "loss": 0.3385, + "num_input_tokens_seen": 93670400, + "step": 29770 + }, + { + "epoch": 1.9060879585173804, + "grad_norm": 19.69241714477539, + "learning_rate": 3.889285551118682e-06, + "loss": 0.3391, + "num_input_tokens_seen": 93686016, + "step": 29775 + }, + { + "epoch": 1.9064080404583574, + "grad_norm": 14.42460823059082, + "learning_rate": 3.8888210677419105e-06, + "loss": 0.2084, + "num_input_tokens_seen": 93699840, + "step": 29780 + }, + { + "epoch": 1.9067281223993342, + "grad_norm": 8.323235511779785, + "learning_rate": 3.888356515014649e-06, + "loss": 0.2909, + "num_input_tokens_seen": 93715456, + "step": 29785 + }, + { + "epoch": 1.907048204340311, + "grad_norm": 57.39909744262695, + "learning_rate": 3.887891892960095e-06, + "loss": 0.436, + "num_input_tokens_seen": 93730368, + "step": 29790 + }, + { + "epoch": 1.907368286281288, + "grad_norm": 18.87969970703125, + "learning_rate": 3.8874272016014494e-06, + "loss": 0.3454, + "num_input_tokens_seen": 93745536, + "step": 29795 + }, + { + "epoch": 1.907688368222265, + "grad_norm": 22.118165969848633, + "learning_rate": 3.886962440961916e-06, + "loss": 0.3915, + "num_input_tokens_seen": 93761664, + "step": 29800 + }, + { + "epoch": 1.908008450163242, + "grad_norm": 15.14316463470459, + "learning_rate": 3.886497611064703e-06, + "loss": 0.2676, + "num_input_tokens_seen": 93778880, + "step": 29805 + }, + { + "epoch": 1.9083285321042187, + "grad_norm": 45.5562858581543, + "learning_rate": 3.886032711933021e-06, + "loss": 0.313, + "num_input_tokens_seen": 93795456, + "step": 29810 + }, + { + "epoch": 1.9086486140451955, + "grad_norm": 42.75628662109375, + "learning_rate": 3.885567743590084e-06, + "loss": 0.3767, + "num_input_tokens_seen": 93810944, + "step": 29815 + }, + { + "epoch": 1.9089686959861725, + "grad_norm": 52.64173126220703, + "learning_rate": 3.885102706059112e-06, + "loss": 0.3712, + "num_input_tokens_seen": 93826432, + "step": 29820 + }, + { + "epoch": 1.9092887779271495, + "grad_norm": 56.94313430786133, + "learning_rate": 3.884637599363325e-06, + "loss": 0.4187, + "num_input_tokens_seen": 93843200, + "step": 29825 + }, + { + "epoch": 1.9096088598681262, + "grad_norm": 16.836727142333984, + "learning_rate": 3.8841724235259495e-06, + "loss": 0.4812, + "num_input_tokens_seen": 93861760, + "step": 29830 + }, + { + "epoch": 1.909928941809103, + "grad_norm": 42.56732940673828, + "learning_rate": 3.883707178570213e-06, + "loss": 0.3444, + "num_input_tokens_seen": 93877440, + "step": 29835 + }, + { + "epoch": 1.91024902375008, + "grad_norm": 37.15359878540039, + "learning_rate": 3.883241864519347e-06, + "loss": 0.3317, + "num_input_tokens_seen": 93892672, + "step": 29840 + }, + { + "epoch": 1.910569105691057, + "grad_norm": 33.81910705566406, + "learning_rate": 3.882776481396588e-06, + "loss": 0.2844, + "num_input_tokens_seen": 93907776, + "step": 29845 + }, + { + "epoch": 1.9108891876320337, + "grad_norm": 29.613794326782227, + "learning_rate": 3.882311029225173e-06, + "loss": 0.4965, + "num_input_tokens_seen": 93922880, + "step": 29850 + }, + { + "epoch": 1.9112092695730107, + "grad_norm": 18.618301391601562, + "learning_rate": 3.881845508028346e-06, + "loss": 0.4514, + "num_input_tokens_seen": 93938752, + "step": 29855 + }, + { + "epoch": 1.9115293515139875, + "grad_norm": 27.215669631958008, + "learning_rate": 3.8813799178293525e-06, + "loss": 0.4591, + "num_input_tokens_seen": 93954752, + "step": 29860 + }, + { + "epoch": 1.9118494334549645, + "grad_norm": 20.74531364440918, + "learning_rate": 3.8809142586514406e-06, + "loss": 0.3295, + "num_input_tokens_seen": 93970240, + "step": 29865 + }, + { + "epoch": 1.9121695153959415, + "grad_norm": 7.659165382385254, + "learning_rate": 3.880448530517864e-06, + "loss": 0.2836, + "num_input_tokens_seen": 93985152, + "step": 29870 + }, + { + "epoch": 1.9124895973369183, + "grad_norm": 36.19165802001953, + "learning_rate": 3.879982733451878e-06, + "loss": 0.5027, + "num_input_tokens_seen": 94000832, + "step": 29875 + }, + { + "epoch": 1.912809679277895, + "grad_norm": 27.32901382446289, + "learning_rate": 3.8795168674767425e-06, + "loss": 0.2764, + "num_input_tokens_seen": 94016640, + "step": 29880 + }, + { + "epoch": 1.913129761218872, + "grad_norm": 53.5356559753418, + "learning_rate": 3.87905093261572e-06, + "loss": 0.3356, + "num_input_tokens_seen": 94033472, + "step": 29885 + }, + { + "epoch": 1.913449843159849, + "grad_norm": 22.318286895751953, + "learning_rate": 3.878584928892077e-06, + "loss": 0.3717, + "num_input_tokens_seen": 94050432, + "step": 29890 + }, + { + "epoch": 1.9137699251008258, + "grad_norm": 16.608745574951172, + "learning_rate": 3.878118856329084e-06, + "loss": 0.3477, + "num_input_tokens_seen": 94065536, + "step": 29895 + }, + { + "epoch": 1.9140900070418025, + "grad_norm": 46.75202941894531, + "learning_rate": 3.877652714950014e-06, + "loss": 0.3671, + "num_input_tokens_seen": 94080256, + "step": 29900 + }, + { + "epoch": 1.9144100889827795, + "grad_norm": 15.693512916564941, + "learning_rate": 3.877186504778143e-06, + "loss": 0.2348, + "num_input_tokens_seen": 94095424, + "step": 29905 + }, + { + "epoch": 1.9147301709237565, + "grad_norm": 10.201492309570312, + "learning_rate": 3.876720225836751e-06, + "loss": 0.2735, + "num_input_tokens_seen": 94110464, + "step": 29910 + }, + { + "epoch": 1.9150502528647335, + "grad_norm": 44.97652053833008, + "learning_rate": 3.876253878149122e-06, + "loss": 0.2435, + "num_input_tokens_seen": 94126976, + "step": 29915 + }, + { + "epoch": 1.9153703348057103, + "grad_norm": 22.677419662475586, + "learning_rate": 3.875787461738544e-06, + "loss": 0.2175, + "num_input_tokens_seen": 94143616, + "step": 29920 + }, + { + "epoch": 1.915690416746687, + "grad_norm": 20.90260124206543, + "learning_rate": 3.875320976628305e-06, + "loss": 0.2355, + "num_input_tokens_seen": 94159744, + "step": 29925 + }, + { + "epoch": 1.916010498687664, + "grad_norm": 30.106414794921875, + "learning_rate": 3.874854422841701e-06, + "loss": 0.3534, + "num_input_tokens_seen": 94176064, + "step": 29930 + }, + { + "epoch": 1.916330580628641, + "grad_norm": 35.717586517333984, + "learning_rate": 3.874387800402028e-06, + "loss": 0.3364, + "num_input_tokens_seen": 94191424, + "step": 29935 + }, + { + "epoch": 1.9166506625696178, + "grad_norm": 31.004621505737305, + "learning_rate": 3.873921109332587e-06, + "loss": 0.3102, + "num_input_tokens_seen": 94207552, + "step": 29940 + }, + { + "epoch": 1.9169707445105946, + "grad_norm": 36.03857421875, + "learning_rate": 3.873454349656683e-06, + "loss": 0.3507, + "num_input_tokens_seen": 94222784, + "step": 29945 + }, + { + "epoch": 1.9172908264515716, + "grad_norm": 14.426321983337402, + "learning_rate": 3.8729875213976235e-06, + "loss": 0.3056, + "num_input_tokens_seen": 94238784, + "step": 29950 + }, + { + "epoch": 1.9176109083925486, + "grad_norm": 11.526430130004883, + "learning_rate": 3.8725206245787175e-06, + "loss": 0.2813, + "num_input_tokens_seen": 94254912, + "step": 29955 + }, + { + "epoch": 1.9179309903335255, + "grad_norm": 12.72860336303711, + "learning_rate": 3.872053659223281e-06, + "loss": 0.3429, + "num_input_tokens_seen": 94270912, + "step": 29960 + }, + { + "epoch": 1.9182510722745023, + "grad_norm": 24.346471786499023, + "learning_rate": 3.871586625354632e-06, + "loss": 0.2434, + "num_input_tokens_seen": 94287296, + "step": 29965 + }, + { + "epoch": 1.918571154215479, + "grad_norm": 6.944890975952148, + "learning_rate": 3.871119522996091e-06, + "loss": 0.3745, + "num_input_tokens_seen": 94303744, + "step": 29970 + }, + { + "epoch": 1.918891236156456, + "grad_norm": 26.432146072387695, + "learning_rate": 3.870652352170983e-06, + "loss": 0.3355, + "num_input_tokens_seen": 94319552, + "step": 29975 + }, + { + "epoch": 1.919211318097433, + "grad_norm": 37.98194885253906, + "learning_rate": 3.870185112902636e-06, + "loss": 0.3076, + "num_input_tokens_seen": 94335616, + "step": 29980 + }, + { + "epoch": 1.9195314000384098, + "grad_norm": 58.83201217651367, + "learning_rate": 3.869717805214381e-06, + "loss": 0.2829, + "num_input_tokens_seen": 94351168, + "step": 29985 + }, + { + "epoch": 1.9198514819793866, + "grad_norm": 23.142181396484375, + "learning_rate": 3.869250429129553e-06, + "loss": 0.3977, + "num_input_tokens_seen": 94367104, + "step": 29990 + }, + { + "epoch": 1.9201715639203636, + "grad_norm": 15.757255554199219, + "learning_rate": 3.8687829846714915e-06, + "loss": 0.2625, + "num_input_tokens_seen": 94382720, + "step": 29995 + }, + { + "epoch": 1.9204916458613406, + "grad_norm": 44.703983306884766, + "learning_rate": 3.868315471863537e-06, + "loss": 0.3499, + "num_input_tokens_seen": 94398528, + "step": 30000 + }, + { + "epoch": 1.9208117278023173, + "grad_norm": 23.929405212402344, + "learning_rate": 3.8678478907290365e-06, + "loss": 0.353, + "num_input_tokens_seen": 94413568, + "step": 30005 + }, + { + "epoch": 1.9211318097432943, + "grad_norm": 37.6884765625, + "learning_rate": 3.8673802412913355e-06, + "loss": 0.295, + "num_input_tokens_seen": 94429376, + "step": 30010 + }, + { + "epoch": 1.921451891684271, + "grad_norm": 38.98127746582031, + "learning_rate": 3.8669125235737894e-06, + "loss": 0.3622, + "num_input_tokens_seen": 94444608, + "step": 30015 + }, + { + "epoch": 1.921771973625248, + "grad_norm": 61.917301177978516, + "learning_rate": 3.86644473759975e-06, + "loss": 0.2796, + "num_input_tokens_seen": 94460224, + "step": 30020 + }, + { + "epoch": 1.922092055566225, + "grad_norm": 10.108414649963379, + "learning_rate": 3.8659768833925795e-06, + "loss": 0.3065, + "num_input_tokens_seen": 94476032, + "step": 30025 + }, + { + "epoch": 1.9224121375072019, + "grad_norm": 8.93319320678711, + "learning_rate": 3.865508960975638e-06, + "loss": 0.2967, + "num_input_tokens_seen": 94491264, + "step": 30030 + }, + { + "epoch": 1.9227322194481786, + "grad_norm": 13.493293762207031, + "learning_rate": 3.865040970372291e-06, + "loss": 0.4575, + "num_input_tokens_seen": 94505920, + "step": 30035 + }, + { + "epoch": 1.9230523013891556, + "grad_norm": 67.70994567871094, + "learning_rate": 3.86457291160591e-06, + "loss": 0.3527, + "num_input_tokens_seen": 94521216, + "step": 30040 + }, + { + "epoch": 1.9233723833301326, + "grad_norm": 12.19276237487793, + "learning_rate": 3.864104784699864e-06, + "loss": 0.4377, + "num_input_tokens_seen": 94536064, + "step": 30045 + }, + { + "epoch": 1.9236924652711094, + "grad_norm": 30.80713653564453, + "learning_rate": 3.86363658967753e-06, + "loss": 0.3294, + "num_input_tokens_seen": 94551808, + "step": 30050 + }, + { + "epoch": 1.9240125472120861, + "grad_norm": 19.791534423828125, + "learning_rate": 3.863168326562289e-06, + "loss": 0.2252, + "num_input_tokens_seen": 94567104, + "step": 30055 + }, + { + "epoch": 1.9243326291530631, + "grad_norm": 12.44153118133545, + "learning_rate": 3.862699995377521e-06, + "loss": 0.3768, + "num_input_tokens_seen": 94585536, + "step": 30060 + }, + { + "epoch": 1.9246527110940401, + "grad_norm": 29.16981315612793, + "learning_rate": 3.862231596146614e-06, + "loss": 0.296, + "num_input_tokens_seen": 94600448, + "step": 30065 + }, + { + "epoch": 1.9249727930350171, + "grad_norm": 14.304576873779297, + "learning_rate": 3.861763128892957e-06, + "loss": 0.42, + "num_input_tokens_seen": 94615872, + "step": 30070 + }, + { + "epoch": 1.9252928749759939, + "grad_norm": 26.049257278442383, + "learning_rate": 3.861294593639942e-06, + "loss": 0.4731, + "num_input_tokens_seen": 94631744, + "step": 30075 + }, + { + "epoch": 1.9256129569169707, + "grad_norm": 18.290403366088867, + "learning_rate": 3.860825990410966e-06, + "loss": 0.3117, + "num_input_tokens_seen": 94647552, + "step": 30080 + }, + { + "epoch": 1.9259330388579476, + "grad_norm": 15.824028968811035, + "learning_rate": 3.860357319229428e-06, + "loss": 0.2447, + "num_input_tokens_seen": 94664384, + "step": 30085 + }, + { + "epoch": 1.9262531207989246, + "grad_norm": 30.10344886779785, + "learning_rate": 3.859888580118732e-06, + "loss": 0.3645, + "num_input_tokens_seen": 94679296, + "step": 30090 + }, + { + "epoch": 1.9265732027399014, + "grad_norm": 8.01262092590332, + "learning_rate": 3.859419773102283e-06, + "loss": 0.303, + "num_input_tokens_seen": 94695424, + "step": 30095 + }, + { + "epoch": 1.9268932846808782, + "grad_norm": 12.178086280822754, + "learning_rate": 3.858950898203491e-06, + "loss": 0.3463, + "num_input_tokens_seen": 94711872, + "step": 30100 + }, + { + "epoch": 1.9272133666218552, + "grad_norm": 20.894227981567383, + "learning_rate": 3.858481955445771e-06, + "loss": 0.2347, + "num_input_tokens_seen": 94728320, + "step": 30105 + }, + { + "epoch": 1.9275334485628322, + "grad_norm": 15.461017608642578, + "learning_rate": 3.8580129448525375e-06, + "loss": 0.4611, + "num_input_tokens_seen": 94743488, + "step": 30110 + }, + { + "epoch": 1.927853530503809, + "grad_norm": 8.366080284118652, + "learning_rate": 3.857543866447211e-06, + "loss": 0.2032, + "num_input_tokens_seen": 94758912, + "step": 30115 + }, + { + "epoch": 1.928173612444786, + "grad_norm": 10.39750862121582, + "learning_rate": 3.857074720253215e-06, + "loss": 0.267, + "num_input_tokens_seen": 94774720, + "step": 30120 + }, + { + "epoch": 1.9284936943857627, + "grad_norm": 14.799546241760254, + "learning_rate": 3.856605506293977e-06, + "loss": 0.2482, + "num_input_tokens_seen": 94791296, + "step": 30125 + }, + { + "epoch": 1.9288137763267397, + "grad_norm": 49.20902633666992, + "learning_rate": 3.856136224592926e-06, + "loss": 0.4299, + "num_input_tokens_seen": 94808320, + "step": 30130 + }, + { + "epoch": 1.9291338582677167, + "grad_norm": 8.991806030273438, + "learning_rate": 3.855666875173496e-06, + "loss": 0.3139, + "num_input_tokens_seen": 94824192, + "step": 30135 + }, + { + "epoch": 1.9294539402086934, + "grad_norm": 100.75064086914062, + "learning_rate": 3.855197458059122e-06, + "loss": 0.3475, + "num_input_tokens_seen": 94839808, + "step": 30140 + }, + { + "epoch": 1.9297740221496702, + "grad_norm": 39.4422607421875, + "learning_rate": 3.854727973273248e-06, + "loss": 0.2858, + "num_input_tokens_seen": 94855424, + "step": 30145 + }, + { + "epoch": 1.9300941040906472, + "grad_norm": 21.575857162475586, + "learning_rate": 3.854258420839315e-06, + "loss": 0.2617, + "num_input_tokens_seen": 94872000, + "step": 30150 + }, + { + "epoch": 1.9304141860316242, + "grad_norm": 27.164756774902344, + "learning_rate": 3.85378880078077e-06, + "loss": 0.3139, + "num_input_tokens_seen": 94888512, + "step": 30155 + }, + { + "epoch": 1.930734267972601, + "grad_norm": 11.279132843017578, + "learning_rate": 3.853319113121064e-06, + "loss": 0.3184, + "num_input_tokens_seen": 94904640, + "step": 30160 + }, + { + "epoch": 1.9310543499135777, + "grad_norm": 37.40410614013672, + "learning_rate": 3.85284935788365e-06, + "loss": 0.3605, + "num_input_tokens_seen": 94920384, + "step": 30165 + }, + { + "epoch": 1.9313744318545547, + "grad_norm": 51.10989761352539, + "learning_rate": 3.852379535091987e-06, + "loss": 0.3403, + "num_input_tokens_seen": 94935424, + "step": 30170 + }, + { + "epoch": 1.9316945137955317, + "grad_norm": 30.94226837158203, + "learning_rate": 3.851909644769534e-06, + "loss": 0.4861, + "num_input_tokens_seen": 94950656, + "step": 30175 + }, + { + "epoch": 1.9320145957365087, + "grad_norm": 27.780899047851562, + "learning_rate": 3.851439686939755e-06, + "loss": 0.2875, + "num_input_tokens_seen": 94966976, + "step": 30180 + }, + { + "epoch": 1.9323346776774855, + "grad_norm": 43.48771286010742, + "learning_rate": 3.8509696616261175e-06, + "loss": 0.273, + "num_input_tokens_seen": 94982336, + "step": 30185 + }, + { + "epoch": 1.9326547596184622, + "grad_norm": 14.761856079101562, + "learning_rate": 3.850499568852092e-06, + "loss": 0.4421, + "num_input_tokens_seen": 94997568, + "step": 30190 + }, + { + "epoch": 1.9329748415594392, + "grad_norm": 27.51034164428711, + "learning_rate": 3.850029408641153e-06, + "loss": 0.384, + "num_input_tokens_seen": 95012608, + "step": 30195 + }, + { + "epoch": 1.9332949235004162, + "grad_norm": 42.56759262084961, + "learning_rate": 3.849559181016777e-06, + "loss": 0.423, + "num_input_tokens_seen": 95028544, + "step": 30200 + }, + { + "epoch": 1.933615005441393, + "grad_norm": 12.147018432617188, + "learning_rate": 3.849088886002445e-06, + "loss": 0.3546, + "num_input_tokens_seen": 95043648, + "step": 30205 + }, + { + "epoch": 1.9339350873823697, + "grad_norm": 14.512138366699219, + "learning_rate": 3.848618523621642e-06, + "loss": 0.4762, + "num_input_tokens_seen": 95059136, + "step": 30210 + }, + { + "epoch": 1.9342551693233467, + "grad_norm": 15.788080215454102, + "learning_rate": 3.8481480938978536e-06, + "loss": 0.3033, + "num_input_tokens_seen": 95075456, + "step": 30215 + }, + { + "epoch": 1.9345752512643237, + "grad_norm": 9.742542266845703, + "learning_rate": 3.847677596854572e-06, + "loss": 0.2361, + "num_input_tokens_seen": 95090944, + "step": 30220 + }, + { + "epoch": 1.9348953332053007, + "grad_norm": 13.378199577331543, + "learning_rate": 3.84720703251529e-06, + "loss": 0.4519, + "num_input_tokens_seen": 95106560, + "step": 30225 + }, + { + "epoch": 1.9352154151462775, + "grad_norm": 4.8459577560424805, + "learning_rate": 3.846736400903507e-06, + "loss": 0.2382, + "num_input_tokens_seen": 95122560, + "step": 30230 + }, + { + "epoch": 1.9355354970872543, + "grad_norm": 5.868607521057129, + "learning_rate": 3.846265702042724e-06, + "loss": 0.2277, + "num_input_tokens_seen": 95137344, + "step": 30235 + }, + { + "epoch": 1.9358555790282312, + "grad_norm": 24.63622283935547, + "learning_rate": 3.845794935956443e-06, + "loss": 0.3356, + "num_input_tokens_seen": 95153216, + "step": 30240 + }, + { + "epoch": 1.9361756609692082, + "grad_norm": 72.40505981445312, + "learning_rate": 3.845324102668173e-06, + "loss": 0.2712, + "num_input_tokens_seen": 95169088, + "step": 30245 + }, + { + "epoch": 1.936495742910185, + "grad_norm": 12.404093742370605, + "learning_rate": 3.844853202201425e-06, + "loss": 0.3282, + "num_input_tokens_seen": 95184256, + "step": 30250 + }, + { + "epoch": 1.9368158248511618, + "grad_norm": 19.639427185058594, + "learning_rate": 3.844382234579714e-06, + "loss": 0.3786, + "num_input_tokens_seen": 95200640, + "step": 30255 + }, + { + "epoch": 1.9371359067921388, + "grad_norm": 17.794626235961914, + "learning_rate": 3.843911199826556e-06, + "loss": 0.2359, + "num_input_tokens_seen": 95215232, + "step": 30260 + }, + { + "epoch": 1.9374559887331158, + "grad_norm": 11.361255645751953, + "learning_rate": 3.843440097965473e-06, + "loss": 0.3188, + "num_input_tokens_seen": 95230208, + "step": 30265 + }, + { + "epoch": 1.9377760706740925, + "grad_norm": 11.430974960327148, + "learning_rate": 3.84296892901999e-06, + "loss": 0.4323, + "num_input_tokens_seen": 95245056, + "step": 30270 + }, + { + "epoch": 1.9380961526150695, + "grad_norm": 13.340152740478516, + "learning_rate": 3.842497693013632e-06, + "loss": 0.278, + "num_input_tokens_seen": 95260992, + "step": 30275 + }, + { + "epoch": 1.9384162345560463, + "grad_norm": 8.194086074829102, + "learning_rate": 3.842026389969933e-06, + "loss": 0.2226, + "num_input_tokens_seen": 95277696, + "step": 30280 + }, + { + "epoch": 1.9387363164970233, + "grad_norm": 59.20631790161133, + "learning_rate": 3.841555019912427e-06, + "loss": 0.2805, + "num_input_tokens_seen": 95292352, + "step": 30285 + }, + { + "epoch": 1.9390563984380003, + "grad_norm": 8.895296096801758, + "learning_rate": 3.841083582864651e-06, + "loss": 0.4224, + "num_input_tokens_seen": 95308224, + "step": 30290 + }, + { + "epoch": 1.939376480378977, + "grad_norm": 7.677918910980225, + "learning_rate": 3.840612078850146e-06, + "loss": 0.2287, + "num_input_tokens_seen": 95323840, + "step": 30295 + }, + { + "epoch": 1.9396965623199538, + "grad_norm": 32.369346618652344, + "learning_rate": 3.840140507892457e-06, + "loss": 0.3425, + "num_input_tokens_seen": 95338624, + "step": 30300 + }, + { + "epoch": 1.9400166442609308, + "grad_norm": 10.547171592712402, + "learning_rate": 3.839668870015131e-06, + "loss": 0.2687, + "num_input_tokens_seen": 95353920, + "step": 30305 + }, + { + "epoch": 1.9403367262019078, + "grad_norm": 21.136274337768555, + "learning_rate": 3.8391971652417206e-06, + "loss": 0.2283, + "num_input_tokens_seen": 95368704, + "step": 30310 + }, + { + "epoch": 1.9406568081428845, + "grad_norm": 31.921951293945312, + "learning_rate": 3.838725393595779e-06, + "loss": 0.2701, + "num_input_tokens_seen": 95384192, + "step": 30315 + }, + { + "epoch": 1.9409768900838613, + "grad_norm": 7.160539627075195, + "learning_rate": 3.838253555100865e-06, + "loss": 0.3183, + "num_input_tokens_seen": 95399040, + "step": 30320 + }, + { + "epoch": 1.9412969720248383, + "grad_norm": 27.60106658935547, + "learning_rate": 3.837781649780539e-06, + "loss": 0.353, + "num_input_tokens_seen": 95414784, + "step": 30325 + }, + { + "epoch": 1.9416170539658153, + "grad_norm": 15.067522048950195, + "learning_rate": 3.837309677658365e-06, + "loss": 0.3793, + "num_input_tokens_seen": 95431552, + "step": 30330 + }, + { + "epoch": 1.9419371359067923, + "grad_norm": 23.533763885498047, + "learning_rate": 3.836837638757911e-06, + "loss": 0.3414, + "num_input_tokens_seen": 95450112, + "step": 30335 + }, + { + "epoch": 1.942257217847769, + "grad_norm": 24.867034912109375, + "learning_rate": 3.83636553310275e-06, + "loss": 0.4655, + "num_input_tokens_seen": 95465344, + "step": 30340 + }, + { + "epoch": 1.9425772997887458, + "grad_norm": 34.734561920166016, + "learning_rate": 3.835893360716454e-06, + "loss": 0.269, + "num_input_tokens_seen": 95480256, + "step": 30345 + }, + { + "epoch": 1.9428973817297228, + "grad_norm": 20.621980667114258, + "learning_rate": 3.835421121622603e-06, + "loss": 0.2237, + "num_input_tokens_seen": 95495744, + "step": 30350 + }, + { + "epoch": 1.9432174636706998, + "grad_norm": 14.323284149169922, + "learning_rate": 3.8349488158447765e-06, + "loss": 0.3931, + "num_input_tokens_seen": 95512704, + "step": 30355 + }, + { + "epoch": 1.9435375456116766, + "grad_norm": 52.423789978027344, + "learning_rate": 3.83447644340656e-06, + "loss": 0.3496, + "num_input_tokens_seen": 95528832, + "step": 30360 + }, + { + "epoch": 1.9438576275526533, + "grad_norm": 12.560113906860352, + "learning_rate": 3.834004004331541e-06, + "loss": 0.3195, + "num_input_tokens_seen": 95543360, + "step": 30365 + }, + { + "epoch": 1.9441777094936303, + "grad_norm": 43.0251350402832, + "learning_rate": 3.83353149864331e-06, + "loss": 0.3097, + "num_input_tokens_seen": 95558720, + "step": 30370 + }, + { + "epoch": 1.9444977914346073, + "grad_norm": 48.5806884765625, + "learning_rate": 3.833058926365463e-06, + "loss": 0.3635, + "num_input_tokens_seen": 95574592, + "step": 30375 + }, + { + "epoch": 1.944817873375584, + "grad_norm": 9.256945610046387, + "learning_rate": 3.8325862875215956e-06, + "loss": 0.3275, + "num_input_tokens_seen": 95589952, + "step": 30380 + }, + { + "epoch": 1.945137955316561, + "grad_norm": 30.985546112060547, + "learning_rate": 3.8321135821353114e-06, + "loss": 0.3504, + "num_input_tokens_seen": 95606784, + "step": 30385 + }, + { + "epoch": 1.9454580372575379, + "grad_norm": 36.493961334228516, + "learning_rate": 3.831640810230213e-06, + "loss": 0.4305, + "num_input_tokens_seen": 95622464, + "step": 30390 + }, + { + "epoch": 1.9457781191985148, + "grad_norm": 6.907671928405762, + "learning_rate": 3.831167971829909e-06, + "loss": 0.2161, + "num_input_tokens_seen": 95639744, + "step": 30395 + }, + { + "epoch": 1.9460982011394918, + "grad_norm": 6.466375827789307, + "learning_rate": 3.8306950669580105e-06, + "loss": 0.3016, + "num_input_tokens_seen": 95656320, + "step": 30400 + }, + { + "epoch": 1.9464182830804686, + "grad_norm": 11.337971687316895, + "learning_rate": 3.8302220956381305e-06, + "loss": 0.2999, + "num_input_tokens_seen": 95671552, + "step": 30405 + }, + { + "epoch": 1.9467383650214454, + "grad_norm": 27.663776397705078, + "learning_rate": 3.829749057893889e-06, + "loss": 0.3643, + "num_input_tokens_seen": 95687360, + "step": 30410 + }, + { + "epoch": 1.9470584469624224, + "grad_norm": 19.744413375854492, + "learning_rate": 3.829275953748906e-06, + "loss": 0.3055, + "num_input_tokens_seen": 95702848, + "step": 30415 + }, + { + "epoch": 1.9473785289033994, + "grad_norm": 27.882213592529297, + "learning_rate": 3.828802783226805e-06, + "loss": 0.2595, + "num_input_tokens_seen": 95719040, + "step": 30420 + }, + { + "epoch": 1.9476986108443761, + "grad_norm": 26.309751510620117, + "learning_rate": 3.828329546351216e-06, + "loss": 0.3429, + "num_input_tokens_seen": 95735360, + "step": 30425 + }, + { + "epoch": 1.948018692785353, + "grad_norm": 36.453556060791016, + "learning_rate": 3.827856243145768e-06, + "loss": 0.3531, + "num_input_tokens_seen": 95751616, + "step": 30430 + }, + { + "epoch": 1.9483387747263299, + "grad_norm": 28.783851623535156, + "learning_rate": 3.827382873634095e-06, + "loss": 0.5013, + "num_input_tokens_seen": 95767168, + "step": 30435 + }, + { + "epoch": 1.9486588566673069, + "grad_norm": 24.61430549621582, + "learning_rate": 3.826909437839836e-06, + "loss": 0.372, + "num_input_tokens_seen": 95782464, + "step": 30440 + }, + { + "epoch": 1.9489789386082839, + "grad_norm": 72.19210815429688, + "learning_rate": 3.82643593578663e-06, + "loss": 0.399, + "num_input_tokens_seen": 95798400, + "step": 30445 + }, + { + "epoch": 1.9492990205492606, + "grad_norm": 35.27503967285156, + "learning_rate": 3.825962367498124e-06, + "loss": 0.3005, + "num_input_tokens_seen": 95814912, + "step": 30450 + }, + { + "epoch": 1.9496191024902374, + "grad_norm": 47.66834259033203, + "learning_rate": 3.825488732997963e-06, + "loss": 0.2929, + "num_input_tokens_seen": 95831232, + "step": 30455 + }, + { + "epoch": 1.9499391844312144, + "grad_norm": 21.076704025268555, + "learning_rate": 3.8250150323098e-06, + "loss": 0.3082, + "num_input_tokens_seen": 95847232, + "step": 30460 + }, + { + "epoch": 1.9502592663721914, + "grad_norm": 11.819143295288086, + "learning_rate": 3.824541265457287e-06, + "loss": 0.223, + "num_input_tokens_seen": 95863040, + "step": 30465 + }, + { + "epoch": 1.9505793483131681, + "grad_norm": 31.22002601623535, + "learning_rate": 3.824067432464083e-06, + "loss": 0.3478, + "num_input_tokens_seen": 95879040, + "step": 30470 + }, + { + "epoch": 1.950899430254145, + "grad_norm": 58.79093933105469, + "learning_rate": 3.823593533353849e-06, + "loss": 0.3021, + "num_input_tokens_seen": 95895360, + "step": 30475 + }, + { + "epoch": 1.951219512195122, + "grad_norm": 26.115089416503906, + "learning_rate": 3.823119568150247e-06, + "loss": 0.3454, + "num_input_tokens_seen": 95911872, + "step": 30480 + }, + { + "epoch": 1.951539594136099, + "grad_norm": 53.935054779052734, + "learning_rate": 3.822645536876946e-06, + "loss": 0.4653, + "num_input_tokens_seen": 95928256, + "step": 30485 + }, + { + "epoch": 1.951859676077076, + "grad_norm": 18.109477996826172, + "learning_rate": 3.822171439557617e-06, + "loss": 0.3173, + "num_input_tokens_seen": 95946560, + "step": 30490 + }, + { + "epoch": 1.9521797580180527, + "grad_norm": 16.25759506225586, + "learning_rate": 3.8216972762159325e-06, + "loss": 0.3329, + "num_input_tokens_seen": 95963520, + "step": 30495 + }, + { + "epoch": 1.9524998399590294, + "grad_norm": 22.89107894897461, + "learning_rate": 3.821223046875571e-06, + "loss": 0.2395, + "num_input_tokens_seen": 95979200, + "step": 30500 + }, + { + "epoch": 1.9528199219000064, + "grad_norm": 10.361407279968262, + "learning_rate": 3.8207487515602115e-06, + "loss": 0.228, + "num_input_tokens_seen": 95994368, + "step": 30505 + }, + { + "epoch": 1.9531400038409834, + "grad_norm": 27.21394920349121, + "learning_rate": 3.82027439029354e-06, + "loss": 0.3221, + "num_input_tokens_seen": 96012992, + "step": 30510 + }, + { + "epoch": 1.9534600857819602, + "grad_norm": 13.665699005126953, + "learning_rate": 3.819799963099242e-06, + "loss": 0.4123, + "num_input_tokens_seen": 96028288, + "step": 30515 + }, + { + "epoch": 1.953780167722937, + "grad_norm": 20.0784969329834, + "learning_rate": 3.819325470001008e-06, + "loss": 0.4322, + "num_input_tokens_seen": 96044992, + "step": 30520 + }, + { + "epoch": 1.954100249663914, + "grad_norm": 17.235925674438477, + "learning_rate": 3.818850911022534e-06, + "loss": 0.291, + "num_input_tokens_seen": 96060544, + "step": 30525 + }, + { + "epoch": 1.954420331604891, + "grad_norm": 16.806333541870117, + "learning_rate": 3.818376286187513e-06, + "loss": 0.3012, + "num_input_tokens_seen": 96076416, + "step": 30530 + }, + { + "epoch": 1.9547404135458677, + "grad_norm": 42.42023468017578, + "learning_rate": 3.817901595519648e-06, + "loss": 0.3354, + "num_input_tokens_seen": 96091136, + "step": 30535 + }, + { + "epoch": 1.9550604954868447, + "grad_norm": 22.003559112548828, + "learning_rate": 3.817426839042643e-06, + "loss": 0.2579, + "num_input_tokens_seen": 96106304, + "step": 30540 + }, + { + "epoch": 1.9553805774278215, + "grad_norm": 18.12207794189453, + "learning_rate": 3.816952016780203e-06, + "loss": 0.2388, + "num_input_tokens_seen": 96122624, + "step": 30545 + }, + { + "epoch": 1.9557006593687984, + "grad_norm": 27.35410499572754, + "learning_rate": 3.8164771287560394e-06, + "loss": 0.3569, + "num_input_tokens_seen": 96138688, + "step": 30550 + }, + { + "epoch": 1.9560207413097754, + "grad_norm": 9.292060852050781, + "learning_rate": 3.816002174993865e-06, + "loss": 0.3053, + "num_input_tokens_seen": 96154560, + "step": 30555 + }, + { + "epoch": 1.9563408232507522, + "grad_norm": 15.812111854553223, + "learning_rate": 3.815527155517397e-06, + "loss": 0.2488, + "num_input_tokens_seen": 96170624, + "step": 30560 + }, + { + "epoch": 1.956660905191729, + "grad_norm": 34.775901794433594, + "learning_rate": 3.815052070350355e-06, + "loss": 0.4322, + "num_input_tokens_seen": 96185984, + "step": 30565 + }, + { + "epoch": 1.956980987132706, + "grad_norm": 71.4795913696289, + "learning_rate": 3.8145769195164628e-06, + "loss": 0.3333, + "num_input_tokens_seen": 96202368, + "step": 30570 + }, + { + "epoch": 1.957301069073683, + "grad_norm": 13.75683879852295, + "learning_rate": 3.8141017030394454e-06, + "loss": 0.2573, + "num_input_tokens_seen": 96218816, + "step": 30575 + }, + { + "epoch": 1.9576211510146597, + "grad_norm": 51.31011962890625, + "learning_rate": 3.813626420943035e-06, + "loss": 0.3146, + "num_input_tokens_seen": 96233472, + "step": 30580 + }, + { + "epoch": 1.9579412329556365, + "grad_norm": 55.18775177001953, + "learning_rate": 3.8131510732509636e-06, + "loss": 0.3778, + "num_input_tokens_seen": 96248128, + "step": 30585 + }, + { + "epoch": 1.9582613148966135, + "grad_norm": 16.885168075561523, + "learning_rate": 3.8126756599869673e-06, + "loss": 0.3476, + "num_input_tokens_seen": 96263296, + "step": 30590 + }, + { + "epoch": 1.9585813968375905, + "grad_norm": 21.844058990478516, + "learning_rate": 3.812200181174786e-06, + "loss": 0.2856, + "num_input_tokens_seen": 96278400, + "step": 30595 + }, + { + "epoch": 1.9589014787785675, + "grad_norm": 45.27677917480469, + "learning_rate": 3.8117246368381627e-06, + "loss": 0.3761, + "num_input_tokens_seen": 96294080, + "step": 30600 + }, + { + "epoch": 1.9592215607195442, + "grad_norm": 9.842395782470703, + "learning_rate": 3.811249027000844e-06, + "loss": 0.271, + "num_input_tokens_seen": 96308416, + "step": 30605 + }, + { + "epoch": 1.959541642660521, + "grad_norm": 11.360607147216797, + "learning_rate": 3.8107733516865787e-06, + "loss": 0.2707, + "num_input_tokens_seen": 96324672, + "step": 30610 + }, + { + "epoch": 1.959861724601498, + "grad_norm": 61.10039138793945, + "learning_rate": 3.810297610919119e-06, + "loss": 0.5187, + "num_input_tokens_seen": 96340416, + "step": 30615 + }, + { + "epoch": 1.960181806542475, + "grad_norm": 51.26926040649414, + "learning_rate": 3.8098218047222225e-06, + "loss": 0.3842, + "num_input_tokens_seen": 96355584, + "step": 30620 + }, + { + "epoch": 1.9605018884834517, + "grad_norm": 25.956745147705078, + "learning_rate": 3.8093459331196474e-06, + "loss": 0.4289, + "num_input_tokens_seen": 96370752, + "step": 30625 + }, + { + "epoch": 1.9608219704244285, + "grad_norm": 27.066604614257812, + "learning_rate": 3.8088699961351573e-06, + "loss": 0.3577, + "num_input_tokens_seen": 96387136, + "step": 30630 + }, + { + "epoch": 1.9611420523654055, + "grad_norm": 9.790367126464844, + "learning_rate": 3.8083939937925157e-06, + "loss": 0.3082, + "num_input_tokens_seen": 96402560, + "step": 30635 + }, + { + "epoch": 1.9614621343063825, + "grad_norm": 16.28690528869629, + "learning_rate": 3.8079179261154942e-06, + "loss": 0.3179, + "num_input_tokens_seen": 96420288, + "step": 30640 + }, + { + "epoch": 1.9617822162473595, + "grad_norm": 66.59683227539062, + "learning_rate": 3.807441793127864e-06, + "loss": 0.2698, + "num_input_tokens_seen": 96436800, + "step": 30645 + }, + { + "epoch": 1.9621022981883363, + "grad_norm": 50.72922897338867, + "learning_rate": 3.8069655948534006e-06, + "loss": 0.2348, + "num_input_tokens_seen": 96451968, + "step": 30650 + }, + { + "epoch": 1.962422380129313, + "grad_norm": 16.57595443725586, + "learning_rate": 3.8064893313158834e-06, + "loss": 0.4277, + "num_input_tokens_seen": 96467840, + "step": 30655 + }, + { + "epoch": 1.96274246207029, + "grad_norm": 33.22600555419922, + "learning_rate": 3.8060130025390946e-06, + "loss": 0.5217, + "num_input_tokens_seen": 96483392, + "step": 30660 + }, + { + "epoch": 1.963062544011267, + "grad_norm": 20.93801498413086, + "learning_rate": 3.805536608546819e-06, + "loss": 0.2882, + "num_input_tokens_seen": 96500032, + "step": 30665 + }, + { + "epoch": 1.9633826259522438, + "grad_norm": 18.29781150817871, + "learning_rate": 3.805060149362846e-06, + "loss": 0.5469, + "num_input_tokens_seen": 96515904, + "step": 30670 + }, + { + "epoch": 1.9637027078932205, + "grad_norm": 28.22369956970215, + "learning_rate": 3.804583625010966e-06, + "loss": 0.2334, + "num_input_tokens_seen": 96531264, + "step": 30675 + }, + { + "epoch": 1.9640227898341975, + "grad_norm": 26.079090118408203, + "learning_rate": 3.804107035514975e-06, + "loss": 0.3117, + "num_input_tokens_seen": 96546816, + "step": 30680 + }, + { + "epoch": 1.9643428717751745, + "grad_norm": 30.013263702392578, + "learning_rate": 3.803630380898672e-06, + "loss": 0.3487, + "num_input_tokens_seen": 96561408, + "step": 30685 + }, + { + "epoch": 1.9646629537161513, + "grad_norm": 13.698871612548828, + "learning_rate": 3.803153661185858e-06, + "loss": 0.3256, + "num_input_tokens_seen": 96577152, + "step": 30690 + }, + { + "epoch": 1.964983035657128, + "grad_norm": 69.84667205810547, + "learning_rate": 3.802676876400338e-06, + "loss": 0.3654, + "num_input_tokens_seen": 96592960, + "step": 30695 + }, + { + "epoch": 1.965303117598105, + "grad_norm": 33.49753952026367, + "learning_rate": 3.802200026565921e-06, + "loss": 0.5017, + "num_input_tokens_seen": 96608192, + "step": 30700 + }, + { + "epoch": 1.965623199539082, + "grad_norm": 21.07265853881836, + "learning_rate": 3.8017231117064166e-06, + "loss": 0.284, + "num_input_tokens_seen": 96623680, + "step": 30705 + }, + { + "epoch": 1.965943281480059, + "grad_norm": 23.167863845825195, + "learning_rate": 3.8012461318456407e-06, + "loss": 0.3915, + "num_input_tokens_seen": 96638912, + "step": 30710 + }, + { + "epoch": 1.9662633634210358, + "grad_norm": 21.608043670654297, + "learning_rate": 3.800769087007411e-06, + "loss": 0.3317, + "num_input_tokens_seen": 96655744, + "step": 30715 + }, + { + "epoch": 1.9665834453620126, + "grad_norm": 14.791343688964844, + "learning_rate": 3.8002919772155477e-06, + "loss": 0.2268, + "num_input_tokens_seen": 96671616, + "step": 30720 + }, + { + "epoch": 1.9669035273029896, + "grad_norm": 27.24882698059082, + "learning_rate": 3.7998148024938775e-06, + "loss": 0.3062, + "num_input_tokens_seen": 96686976, + "step": 30725 + }, + { + "epoch": 1.9672236092439666, + "grad_norm": 9.983153343200684, + "learning_rate": 3.799337562866226e-06, + "loss": 0.3562, + "num_input_tokens_seen": 96703744, + "step": 30730 + }, + { + "epoch": 1.9675436911849433, + "grad_norm": 16.748476028442383, + "learning_rate": 3.7988602583564236e-06, + "loss": 0.4155, + "num_input_tokens_seen": 96719488, + "step": 30735 + }, + { + "epoch": 1.96786377312592, + "grad_norm": 16.784862518310547, + "learning_rate": 3.7983828889883067e-06, + "loss": 0.3041, + "num_input_tokens_seen": 96736448, + "step": 30740 + }, + { + "epoch": 1.968183855066897, + "grad_norm": 13.717694282531738, + "learning_rate": 3.797905454785711e-06, + "loss": 0.3438, + "num_input_tokens_seen": 96752960, + "step": 30745 + }, + { + "epoch": 1.968503937007874, + "grad_norm": 33.41716003417969, + "learning_rate": 3.797427955772477e-06, + "loss": 0.4202, + "num_input_tokens_seen": 96768192, + "step": 30750 + }, + { + "epoch": 1.968824018948851, + "grad_norm": 13.633380889892578, + "learning_rate": 3.7969503919724493e-06, + "loss": 0.2469, + "num_input_tokens_seen": 96783296, + "step": 30755 + }, + { + "epoch": 1.9691441008898278, + "grad_norm": 8.971928596496582, + "learning_rate": 3.796472763409475e-06, + "loss": 0.3581, + "num_input_tokens_seen": 96798976, + "step": 30760 + }, + { + "epoch": 1.9694641828308046, + "grad_norm": 17.810617446899414, + "learning_rate": 3.795995070107403e-06, + "loss": 0.2204, + "num_input_tokens_seen": 96814464, + "step": 30765 + }, + { + "epoch": 1.9697842647717816, + "grad_norm": 10.616767883300781, + "learning_rate": 3.795517312090089e-06, + "loss": 0.2773, + "num_input_tokens_seen": 96829120, + "step": 30770 + }, + { + "epoch": 1.9701043467127586, + "grad_norm": 17.60805892944336, + "learning_rate": 3.795039489381388e-06, + "loss": 0.3547, + "num_input_tokens_seen": 96844672, + "step": 30775 + }, + { + "epoch": 1.9704244286537353, + "grad_norm": 39.75784683227539, + "learning_rate": 3.79456160200516e-06, + "loss": 0.3898, + "num_input_tokens_seen": 96860544, + "step": 30780 + }, + { + "epoch": 1.9707445105947121, + "grad_norm": 59.90557861328125, + "learning_rate": 3.7940836499852697e-06, + "loss": 0.4881, + "num_input_tokens_seen": 96875968, + "step": 30785 + }, + { + "epoch": 1.971064592535689, + "grad_norm": 32.086875915527344, + "learning_rate": 3.793605633345582e-06, + "loss": 0.4323, + "num_input_tokens_seen": 96891904, + "step": 30790 + }, + { + "epoch": 1.971384674476666, + "grad_norm": 12.32679271697998, + "learning_rate": 3.793127552109966e-06, + "loss": 0.2951, + "num_input_tokens_seen": 96907840, + "step": 30795 + }, + { + "epoch": 1.9717047564176429, + "grad_norm": 74.86787414550781, + "learning_rate": 3.7926494063022978e-06, + "loss": 0.5639, + "num_input_tokens_seen": 96923072, + "step": 30800 + }, + { + "epoch": 1.9720248383586199, + "grad_norm": 12.47989273071289, + "learning_rate": 3.79217119594645e-06, + "loss": 0.441, + "num_input_tokens_seen": 96937792, + "step": 30805 + }, + { + "epoch": 1.9723449202995966, + "grad_norm": 11.432453155517578, + "learning_rate": 3.7916929210663037e-06, + "loss": 0.2818, + "num_input_tokens_seen": 96952960, + "step": 30810 + }, + { + "epoch": 1.9726650022405736, + "grad_norm": 18.60009765625, + "learning_rate": 3.7912145816857414e-06, + "loss": 0.3657, + "num_input_tokens_seen": 96969792, + "step": 30815 + }, + { + "epoch": 1.9729850841815506, + "grad_norm": 48.22722244262695, + "learning_rate": 3.790736177828648e-06, + "loss": 0.269, + "num_input_tokens_seen": 96987072, + "step": 30820 + }, + { + "epoch": 1.9733051661225274, + "grad_norm": 31.56644630432129, + "learning_rate": 3.7902577095189137e-06, + "loss": 0.2769, + "num_input_tokens_seen": 97003968, + "step": 30825 + }, + { + "epoch": 1.9736252480635041, + "grad_norm": 16.03586769104004, + "learning_rate": 3.78977917678043e-06, + "loss": 0.213, + "num_input_tokens_seen": 97019456, + "step": 30830 + }, + { + "epoch": 1.9739453300044811, + "grad_norm": 28.29656219482422, + "learning_rate": 3.7893005796370924e-06, + "loss": 0.2732, + "num_input_tokens_seen": 97034432, + "step": 30835 + }, + { + "epoch": 1.9742654119454581, + "grad_norm": 42.621009826660156, + "learning_rate": 3.7888219181127995e-06, + "loss": 0.2402, + "num_input_tokens_seen": 97051392, + "step": 30840 + }, + { + "epoch": 1.974585493886435, + "grad_norm": 23.488405227661133, + "learning_rate": 3.7883431922314532e-06, + "loss": 0.3779, + "num_input_tokens_seen": 97067648, + "step": 30845 + }, + { + "epoch": 1.9749055758274117, + "grad_norm": 30.491270065307617, + "learning_rate": 3.7878644020169585e-06, + "loss": 0.4301, + "num_input_tokens_seen": 97082624, + "step": 30850 + }, + { + "epoch": 1.9752256577683887, + "grad_norm": 10.290331840515137, + "learning_rate": 3.787385547493224e-06, + "loss": 0.423, + "num_input_tokens_seen": 97098240, + "step": 30855 + }, + { + "epoch": 1.9755457397093656, + "grad_norm": 11.379972457885742, + "learning_rate": 3.7869066286841612e-06, + "loss": 0.4389, + "num_input_tokens_seen": 97115584, + "step": 30860 + }, + { + "epoch": 1.9758658216503426, + "grad_norm": 27.1103458404541, + "learning_rate": 3.786427645613684e-06, + "loss": 0.2542, + "num_input_tokens_seen": 97130432, + "step": 30865 + }, + { + "epoch": 1.9761859035913194, + "grad_norm": 5.516606330871582, + "learning_rate": 3.785948598305711e-06, + "loss": 0.3285, + "num_input_tokens_seen": 97145728, + "step": 30870 + }, + { + "epoch": 1.9765059855322962, + "grad_norm": 45.51311111450195, + "learning_rate": 3.7854694867841637e-06, + "loss": 0.3806, + "num_input_tokens_seen": 97161152, + "step": 30875 + }, + { + "epoch": 1.9768260674732732, + "grad_norm": 13.79338264465332, + "learning_rate": 3.784990311072966e-06, + "loss": 0.2684, + "num_input_tokens_seen": 97176128, + "step": 30880 + }, + { + "epoch": 1.9771461494142502, + "grad_norm": 19.46444320678711, + "learning_rate": 3.784511071196045e-06, + "loss": 0.3563, + "num_input_tokens_seen": 97190912, + "step": 30885 + }, + { + "epoch": 1.977466231355227, + "grad_norm": 36.86937713623047, + "learning_rate": 3.784031767177332e-06, + "loss": 0.2885, + "num_input_tokens_seen": 97208128, + "step": 30890 + }, + { + "epoch": 1.9777863132962037, + "grad_norm": 15.705076217651367, + "learning_rate": 3.783552399040761e-06, + "loss": 0.3819, + "num_input_tokens_seen": 97223744, + "step": 30895 + }, + { + "epoch": 1.9781063952371807, + "grad_norm": 12.987893104553223, + "learning_rate": 3.7830729668102694e-06, + "loss": 0.3393, + "num_input_tokens_seen": 97239872, + "step": 30900 + }, + { + "epoch": 1.9784264771781577, + "grad_norm": 35.42729187011719, + "learning_rate": 3.782593470509796e-06, + "loss": 0.5342, + "num_input_tokens_seen": 97255936, + "step": 30905 + }, + { + "epoch": 1.9787465591191347, + "grad_norm": 33.104801177978516, + "learning_rate": 3.7821139101632864e-06, + "loss": 0.2545, + "num_input_tokens_seen": 97271424, + "step": 30910 + }, + { + "epoch": 1.9790666410601114, + "grad_norm": 20.708372116088867, + "learning_rate": 3.7816342857946864e-06, + "loss": 0.4111, + "num_input_tokens_seen": 97286144, + "step": 30915 + }, + { + "epoch": 1.9793867230010882, + "grad_norm": 19.263338088989258, + "learning_rate": 3.7811545974279454e-06, + "loss": 0.4032, + "num_input_tokens_seen": 97300160, + "step": 30920 + }, + { + "epoch": 1.9797068049420652, + "grad_norm": 18.009191513061523, + "learning_rate": 3.780674845087017e-06, + "loss": 0.3373, + "num_input_tokens_seen": 97314816, + "step": 30925 + }, + { + "epoch": 1.9800268868830422, + "grad_norm": 19.856718063354492, + "learning_rate": 3.780195028795858e-06, + "loss": 0.3002, + "num_input_tokens_seen": 97329984, + "step": 30930 + }, + { + "epoch": 1.980346968824019, + "grad_norm": 38.57408142089844, + "learning_rate": 3.7797151485784277e-06, + "loss": 0.311, + "num_input_tokens_seen": 97345792, + "step": 30935 + }, + { + "epoch": 1.9806670507649957, + "grad_norm": 7.878316402435303, + "learning_rate": 3.779235204458689e-06, + "loss": 0.322, + "num_input_tokens_seen": 97361536, + "step": 30940 + }, + { + "epoch": 1.9809871327059727, + "grad_norm": 32.10776901245117, + "learning_rate": 3.7787551964606066e-06, + "loss": 0.2427, + "num_input_tokens_seen": 97377472, + "step": 30945 + }, + { + "epoch": 1.9813072146469497, + "grad_norm": 48.43342208862305, + "learning_rate": 3.7782751246081513e-06, + "loss": 0.4856, + "num_input_tokens_seen": 97395200, + "step": 30950 + }, + { + "epoch": 1.9816272965879265, + "grad_norm": 26.27881622314453, + "learning_rate": 3.7777949889252942e-06, + "loss": 0.3891, + "num_input_tokens_seen": 97410560, + "step": 30955 + }, + { + "epoch": 1.9819473785289032, + "grad_norm": 10.576167106628418, + "learning_rate": 3.7773147894360106e-06, + "loss": 0.2833, + "num_input_tokens_seen": 97424960, + "step": 30960 + }, + { + "epoch": 1.9822674604698802, + "grad_norm": 18.220823287963867, + "learning_rate": 3.7768345261642804e-06, + "loss": 0.3072, + "num_input_tokens_seen": 97440768, + "step": 30965 + }, + { + "epoch": 1.9825875424108572, + "grad_norm": 12.356149673461914, + "learning_rate": 3.776354199134085e-06, + "loss": 0.3888, + "num_input_tokens_seen": 97456960, + "step": 30970 + }, + { + "epoch": 1.9829076243518342, + "grad_norm": 14.19937801361084, + "learning_rate": 3.7758738083694084e-06, + "loss": 0.2775, + "num_input_tokens_seen": 97471424, + "step": 30975 + }, + { + "epoch": 1.983227706292811, + "grad_norm": 10.318498611450195, + "learning_rate": 3.77539335389424e-06, + "loss": 0.3476, + "num_input_tokens_seen": 97486784, + "step": 30980 + }, + { + "epoch": 1.9835477882337877, + "grad_norm": 39.32685852050781, + "learning_rate": 3.7749128357325706e-06, + "loss": 0.2606, + "num_input_tokens_seen": 97503104, + "step": 30985 + }, + { + "epoch": 1.9838678701747647, + "grad_norm": 19.00457763671875, + "learning_rate": 3.7744322539083956e-06, + "loss": 0.4286, + "num_input_tokens_seen": 97519296, + "step": 30990 + }, + { + "epoch": 1.9841879521157417, + "grad_norm": 12.107366561889648, + "learning_rate": 3.7739516084457104e-06, + "loss": 0.3691, + "num_input_tokens_seen": 97534656, + "step": 30995 + }, + { + "epoch": 1.9845080340567185, + "grad_norm": 31.22132682800293, + "learning_rate": 3.7734708993685194e-06, + "loss": 0.3158, + "num_input_tokens_seen": 97564672, + "step": 31000 + }, + { + "epoch": 1.9848281159976953, + "grad_norm": 28.73276710510254, + "learning_rate": 3.7729901267008246e-06, + "loss": 0.3386, + "num_input_tokens_seen": 97579904, + "step": 31005 + }, + { + "epoch": 1.9851481979386723, + "grad_norm": 23.043888092041016, + "learning_rate": 3.7725092904666337e-06, + "loss": 0.2765, + "num_input_tokens_seen": 97595520, + "step": 31010 + }, + { + "epoch": 1.9854682798796492, + "grad_norm": 15.768203735351562, + "learning_rate": 3.772028390689957e-06, + "loss": 0.2711, + "num_input_tokens_seen": 97611392, + "step": 31015 + }, + { + "epoch": 1.9857883618206262, + "grad_norm": 12.00822639465332, + "learning_rate": 3.771547427394807e-06, + "loss": 0.4288, + "num_input_tokens_seen": 97627328, + "step": 31020 + }, + { + "epoch": 1.986108443761603, + "grad_norm": 27.42289161682129, + "learning_rate": 3.771066400605203e-06, + "loss": 0.3723, + "num_input_tokens_seen": 97641984, + "step": 31025 + }, + { + "epoch": 1.9864285257025798, + "grad_norm": 9.228874206542969, + "learning_rate": 3.770585310345164e-06, + "loss": 0.3042, + "num_input_tokens_seen": 97657856, + "step": 31030 + }, + { + "epoch": 1.9867486076435568, + "grad_norm": 7.694231986999512, + "learning_rate": 3.7701041566387123e-06, + "loss": 0.2281, + "num_input_tokens_seen": 97673792, + "step": 31035 + }, + { + "epoch": 1.9870686895845338, + "grad_norm": 30.21535301208496, + "learning_rate": 3.769622939509875e-06, + "loss": 0.3068, + "num_input_tokens_seen": 97688768, + "step": 31040 + }, + { + "epoch": 1.9873887715255105, + "grad_norm": 32.29559326171875, + "learning_rate": 3.769141658982681e-06, + "loss": 0.2365, + "num_input_tokens_seen": 97704768, + "step": 31045 + }, + { + "epoch": 1.9877088534664873, + "grad_norm": 19.18822479248047, + "learning_rate": 3.768660315081163e-06, + "loss": 0.3298, + "num_input_tokens_seen": 97719360, + "step": 31050 + }, + { + "epoch": 1.9880289354074643, + "grad_norm": 25.264446258544922, + "learning_rate": 3.7681789078293575e-06, + "loss": 0.3928, + "num_input_tokens_seen": 97734528, + "step": 31055 + }, + { + "epoch": 1.9883490173484413, + "grad_norm": 32.582332611083984, + "learning_rate": 3.767697437251303e-06, + "loss": 0.3649, + "num_input_tokens_seen": 97749760, + "step": 31060 + }, + { + "epoch": 1.988669099289418, + "grad_norm": 36.41999435424805, + "learning_rate": 3.7672159033710422e-06, + "loss": 0.4059, + "num_input_tokens_seen": 97766144, + "step": 31065 + }, + { + "epoch": 1.988989181230395, + "grad_norm": 15.462432861328125, + "learning_rate": 3.7667343062126193e-06, + "loss": 0.2611, + "num_input_tokens_seen": 97783104, + "step": 31070 + }, + { + "epoch": 1.9893092631713718, + "grad_norm": 18.08184051513672, + "learning_rate": 3.766252645800083e-06, + "loss": 0.3792, + "num_input_tokens_seen": 97798528, + "step": 31075 + }, + { + "epoch": 1.9896293451123488, + "grad_norm": 7.0601115226745605, + "learning_rate": 3.7657709221574856e-06, + "loss": 0.2394, + "num_input_tokens_seen": 97813376, + "step": 31080 + }, + { + "epoch": 1.9899494270533258, + "grad_norm": 10.215871810913086, + "learning_rate": 3.7652891353088814e-06, + "loss": 0.2987, + "num_input_tokens_seen": 97828480, + "step": 31085 + }, + { + "epoch": 1.9902695089943025, + "grad_norm": 56.16816329956055, + "learning_rate": 3.764807285278329e-06, + "loss": 0.3523, + "num_input_tokens_seen": 97844864, + "step": 31090 + }, + { + "epoch": 1.9905895909352793, + "grad_norm": 32.15388870239258, + "learning_rate": 3.7643253720898882e-06, + "loss": 0.3876, + "num_input_tokens_seen": 97859648, + "step": 31095 + }, + { + "epoch": 1.9909096728762563, + "grad_norm": 18.873207092285156, + "learning_rate": 3.763843395767624e-06, + "loss": 0.4626, + "num_input_tokens_seen": 97875136, + "step": 31100 + }, + { + "epoch": 1.9912297548172333, + "grad_norm": 13.042328834533691, + "learning_rate": 3.763361356335604e-06, + "loss": 0.2729, + "num_input_tokens_seen": 97892352, + "step": 31105 + }, + { + "epoch": 1.99154983675821, + "grad_norm": 16.48528480529785, + "learning_rate": 3.762879253817898e-06, + "loss": 0.286, + "num_input_tokens_seen": 97907712, + "step": 31110 + }, + { + "epoch": 1.9918699186991868, + "grad_norm": 11.140523910522461, + "learning_rate": 3.7623970882385806e-06, + "loss": 0.3811, + "num_input_tokens_seen": 97923456, + "step": 31115 + }, + { + "epoch": 1.9921900006401638, + "grad_norm": 38.279197692871094, + "learning_rate": 3.7619148596217283e-06, + "loss": 0.4266, + "num_input_tokens_seen": 97939136, + "step": 31120 + }, + { + "epoch": 1.9925100825811408, + "grad_norm": 30.822389602661133, + "learning_rate": 3.761432567991421e-06, + "loss": 0.227, + "num_input_tokens_seen": 97954368, + "step": 31125 + }, + { + "epoch": 1.9928301645221178, + "grad_norm": 7.610524654388428, + "learning_rate": 3.760950213371742e-06, + "loss": 0.2321, + "num_input_tokens_seen": 97970240, + "step": 31130 + }, + { + "epoch": 1.9931502464630946, + "grad_norm": 20.473766326904297, + "learning_rate": 3.7604677957867763e-06, + "loss": 0.3824, + "num_input_tokens_seen": 97986304, + "step": 31135 + }, + { + "epoch": 1.9934703284040713, + "grad_norm": 14.924949645996094, + "learning_rate": 3.7599853152606157e-06, + "loss": 0.3487, + "num_input_tokens_seen": 98002880, + "step": 31140 + }, + { + "epoch": 1.9937904103450483, + "grad_norm": 7.047386646270752, + "learning_rate": 3.759502771817351e-06, + "loss": 0.245, + "num_input_tokens_seen": 98017920, + "step": 31145 + }, + { + "epoch": 1.9941104922860253, + "grad_norm": 21.109487533569336, + "learning_rate": 3.759020165481079e-06, + "loss": 0.3172, + "num_input_tokens_seen": 98033792, + "step": 31150 + }, + { + "epoch": 1.994430574227002, + "grad_norm": 45.460269927978516, + "learning_rate": 3.758537496275897e-06, + "loss": 0.5168, + "num_input_tokens_seen": 98049920, + "step": 31155 + }, + { + "epoch": 1.9947506561679789, + "grad_norm": 31.373950958251953, + "learning_rate": 3.758054764225908e-06, + "loss": 0.4145, + "num_input_tokens_seen": 98065536, + "step": 31160 + }, + { + "epoch": 1.9950707381089559, + "grad_norm": 8.323851585388184, + "learning_rate": 3.7575719693552165e-06, + "loss": 0.3843, + "num_input_tokens_seen": 98080832, + "step": 31165 + }, + { + "epoch": 1.9953908200499328, + "grad_norm": 25.942720413208008, + "learning_rate": 3.7570891116879326e-06, + "loss": 0.3953, + "num_input_tokens_seen": 98096192, + "step": 31170 + }, + { + "epoch": 1.9957109019909098, + "grad_norm": 14.968273162841797, + "learning_rate": 3.7566061912481657e-06, + "loss": 0.2898, + "num_input_tokens_seen": 98111232, + "step": 31175 + }, + { + "epoch": 1.9960309839318866, + "grad_norm": 15.001967430114746, + "learning_rate": 3.756123208060031e-06, + "loss": 0.2436, + "num_input_tokens_seen": 98126208, + "step": 31180 + }, + { + "epoch": 1.9963510658728634, + "grad_norm": 8.742140769958496, + "learning_rate": 3.7556401621476466e-06, + "loss": 0.2302, + "num_input_tokens_seen": 98142272, + "step": 31185 + }, + { + "epoch": 1.9966711478138404, + "grad_norm": 47.3300895690918, + "learning_rate": 3.7551570535351334e-06, + "loss": 0.2753, + "num_input_tokens_seen": 98157120, + "step": 31190 + }, + { + "epoch": 1.9969912297548174, + "grad_norm": 36.16213607788086, + "learning_rate": 3.7546738822466134e-06, + "loss": 0.2842, + "num_input_tokens_seen": 98173248, + "step": 31195 + }, + { + "epoch": 1.9973113116957941, + "grad_norm": 6.575536727905273, + "learning_rate": 3.754190648306216e-06, + "loss": 0.2516, + "num_input_tokens_seen": 98190080, + "step": 31200 + }, + { + "epoch": 1.997631393636771, + "grad_norm": 16.61985206604004, + "learning_rate": 3.75370735173807e-06, + "loss": 0.3013, + "num_input_tokens_seen": 98206400, + "step": 31205 + }, + { + "epoch": 1.9979514755777479, + "grad_norm": 29.525646209716797, + "learning_rate": 3.7532239925663094e-06, + "loss": 0.3755, + "num_input_tokens_seen": 98221312, + "step": 31210 + }, + { + "epoch": 1.9982715575187249, + "grad_norm": 56.64884567260742, + "learning_rate": 3.7527405708150707e-06, + "loss": 0.3245, + "num_input_tokens_seen": 98236672, + "step": 31215 + }, + { + "epoch": 1.9985916394597016, + "grad_norm": 21.97316551208496, + "learning_rate": 3.752257086508493e-06, + "loss": 0.3046, + "num_input_tokens_seen": 98252224, + "step": 31220 + }, + { + "epoch": 1.9989117214006784, + "grad_norm": 28.858659744262695, + "learning_rate": 3.7517735396707184e-06, + "loss": 0.3517, + "num_input_tokens_seen": 98267776, + "step": 31225 + }, + { + "epoch": 1.9992318033416554, + "grad_norm": 15.696198463439941, + "learning_rate": 3.7512899303258943e-06, + "loss": 0.2957, + "num_input_tokens_seen": 98284672, + "step": 31230 + }, + { + "epoch": 1.9995518852826324, + "grad_norm": 9.730144500732422, + "learning_rate": 3.7508062584981686e-06, + "loss": 0.3742, + "num_input_tokens_seen": 98301696, + "step": 31235 + }, + { + "epoch": 1.9998719672236094, + "grad_norm": 27.34050941467285, + "learning_rate": 3.7503225242116937e-06, + "loss": 0.3667, + "num_input_tokens_seen": 98317056, + "step": 31240 + }, + { + "epoch": 2.000192049164586, + "grad_norm": 6.792014122009277, + "learning_rate": 3.7498387274906253e-06, + "loss": 0.2658, + "num_input_tokens_seen": 98332416, + "step": 31245 + }, + { + "epoch": 2.0003840983291723, + "eval_loss": 0.4481422007083893, + "eval_runtime": 51.0153, + "eval_samples_per_second": 272.193, + "eval_steps_per_second": 34.029, + "num_input_tokens_seen": 98341056, + "step": 31248 + }, + { + "epoch": 2.000512131105563, + "grad_norm": 18.30593490600586, + "learning_rate": 3.7493548683591198e-06, + "loss": 0.1909, + "num_input_tokens_seen": 98346752, + "step": 31250 + }, + { + "epoch": 2.00083221304654, + "grad_norm": 22.62354278564453, + "learning_rate": 3.7488709468413405e-06, + "loss": 0.2203, + "num_input_tokens_seen": 98362944, + "step": 31255 + }, + { + "epoch": 2.001152294987517, + "grad_norm": 8.886079788208008, + "learning_rate": 3.748386962961451e-06, + "loss": 0.2375, + "num_input_tokens_seen": 98378112, + "step": 31260 + }, + { + "epoch": 2.001472376928494, + "grad_norm": 8.806995391845703, + "learning_rate": 3.7479029167436193e-06, + "loss": 0.1562, + "num_input_tokens_seen": 98393536, + "step": 31265 + }, + { + "epoch": 2.0017924588694704, + "grad_norm": 12.830883979797363, + "learning_rate": 3.747418808212016e-06, + "loss": 0.2236, + "num_input_tokens_seen": 98409088, + "step": 31270 + }, + { + "epoch": 2.0021125408104474, + "grad_norm": 8.249821662902832, + "learning_rate": 3.7469346373908145e-06, + "loss": 0.1775, + "num_input_tokens_seen": 98424768, + "step": 31275 + }, + { + "epoch": 2.0024326227514244, + "grad_norm": 29.100061416625977, + "learning_rate": 3.7464504043041925e-06, + "loss": 0.1839, + "num_input_tokens_seen": 98439872, + "step": 31280 + }, + { + "epoch": 2.0027527046924014, + "grad_norm": 9.943705558776855, + "learning_rate": 3.74596610897633e-06, + "loss": 0.167, + "num_input_tokens_seen": 98455296, + "step": 31285 + }, + { + "epoch": 2.003072786633378, + "grad_norm": 10.390896797180176, + "learning_rate": 3.74548175143141e-06, + "loss": 0.211, + "num_input_tokens_seen": 98471360, + "step": 31290 + }, + { + "epoch": 2.003392868574355, + "grad_norm": 5.428808689117432, + "learning_rate": 3.7449973316936195e-06, + "loss": 0.1803, + "num_input_tokens_seen": 98487360, + "step": 31295 + }, + { + "epoch": 2.003712950515332, + "grad_norm": 15.837689399719238, + "learning_rate": 3.7445128497871463e-06, + "loss": 0.2111, + "num_input_tokens_seen": 98503104, + "step": 31300 + }, + { + "epoch": 2.004033032456309, + "grad_norm": 11.82539176940918, + "learning_rate": 3.7440283057361844e-06, + "loss": 0.1779, + "num_input_tokens_seen": 98519744, + "step": 31305 + }, + { + "epoch": 2.004353114397286, + "grad_norm": 17.060874938964844, + "learning_rate": 3.7435436995649282e-06, + "loss": 0.176, + "num_input_tokens_seen": 98537408, + "step": 31310 + }, + { + "epoch": 2.0046731963382625, + "grad_norm": 9.664280891418457, + "learning_rate": 3.7430590312975774e-06, + "loss": 0.1705, + "num_input_tokens_seen": 98553728, + "step": 31315 + }, + { + "epoch": 2.0049932782792395, + "grad_norm": 5.091222763061523, + "learning_rate": 3.742574300958334e-06, + "loss": 0.2459, + "num_input_tokens_seen": 98570432, + "step": 31320 + }, + { + "epoch": 2.0053133602202164, + "grad_norm": 9.67857551574707, + "learning_rate": 3.7420895085714014e-06, + "loss": 0.2012, + "num_input_tokens_seen": 98586816, + "step": 31325 + }, + { + "epoch": 2.0056334421611934, + "grad_norm": 30.380226135253906, + "learning_rate": 3.7416046541609892e-06, + "loss": 0.2318, + "num_input_tokens_seen": 98602432, + "step": 31330 + }, + { + "epoch": 2.00595352410217, + "grad_norm": 5.336231708526611, + "learning_rate": 3.741119737751307e-06, + "loss": 0.1981, + "num_input_tokens_seen": 98617664, + "step": 31335 + }, + { + "epoch": 2.006273606043147, + "grad_norm": 9.214975357055664, + "learning_rate": 3.74063475936657e-06, + "loss": 0.2127, + "num_input_tokens_seen": 98632896, + "step": 31340 + }, + { + "epoch": 2.006593687984124, + "grad_norm": 9.22287368774414, + "learning_rate": 3.7401497190309955e-06, + "loss": 0.1451, + "num_input_tokens_seen": 98648768, + "step": 31345 + }, + { + "epoch": 2.006913769925101, + "grad_norm": 70.18025207519531, + "learning_rate": 3.7396646167688043e-06, + "loss": 0.144, + "num_input_tokens_seen": 98664000, + "step": 31350 + }, + { + "epoch": 2.0072338518660775, + "grad_norm": 17.804231643676758, + "learning_rate": 3.7391794526042192e-06, + "loss": 0.3766, + "num_input_tokens_seen": 98678912, + "step": 31355 + }, + { + "epoch": 2.0075539338070545, + "grad_norm": 7.520683765411377, + "learning_rate": 3.7386942265614667e-06, + "loss": 0.2094, + "num_input_tokens_seen": 98694144, + "step": 31360 + }, + { + "epoch": 2.0078740157480315, + "grad_norm": 13.408977508544922, + "learning_rate": 3.738208938664776e-06, + "loss": 0.2508, + "num_input_tokens_seen": 98709376, + "step": 31365 + }, + { + "epoch": 2.0081940976890085, + "grad_norm": 23.664573669433594, + "learning_rate": 3.737723588938381e-06, + "loss": 0.1977, + "num_input_tokens_seen": 98724864, + "step": 31370 + }, + { + "epoch": 2.0085141796299855, + "grad_norm": 5.740665435791016, + "learning_rate": 3.737238177406518e-06, + "loss": 0.1377, + "num_input_tokens_seen": 98740160, + "step": 31375 + }, + { + "epoch": 2.008834261570962, + "grad_norm": 15.011392593383789, + "learning_rate": 3.736752704093424e-06, + "loss": 0.1949, + "num_input_tokens_seen": 98755456, + "step": 31380 + }, + { + "epoch": 2.009154343511939, + "grad_norm": 11.414643287658691, + "learning_rate": 3.736267169023342e-06, + "loss": 0.2044, + "num_input_tokens_seen": 98773504, + "step": 31385 + }, + { + "epoch": 2.009474425452916, + "grad_norm": 11.1210355758667, + "learning_rate": 3.735781572220517e-06, + "loss": 0.1996, + "num_input_tokens_seen": 98789120, + "step": 31390 + }, + { + "epoch": 2.009794507393893, + "grad_norm": 41.15389633178711, + "learning_rate": 3.735295913709197e-06, + "loss": 0.2712, + "num_input_tokens_seen": 98805440, + "step": 31395 + }, + { + "epoch": 2.0101145893348695, + "grad_norm": 21.766992568969727, + "learning_rate": 3.734810193513634e-06, + "loss": 0.1854, + "num_input_tokens_seen": 98821824, + "step": 31400 + }, + { + "epoch": 2.0104346712758465, + "grad_norm": 12.251710891723633, + "learning_rate": 3.7343244116580816e-06, + "loss": 0.2738, + "num_input_tokens_seen": 98837184, + "step": 31405 + }, + { + "epoch": 2.0107547532168235, + "grad_norm": 13.790022850036621, + "learning_rate": 3.7338385681667976e-06, + "loss": 0.2292, + "num_input_tokens_seen": 98852800, + "step": 31410 + }, + { + "epoch": 2.0110748351578005, + "grad_norm": 3.8673017024993896, + "learning_rate": 3.7333526630640425e-06, + "loss": 0.2003, + "num_input_tokens_seen": 98868480, + "step": 31415 + }, + { + "epoch": 2.0113949170987775, + "grad_norm": 6.530966281890869, + "learning_rate": 3.732866696374079e-06, + "loss": 0.1576, + "num_input_tokens_seen": 98883072, + "step": 31420 + }, + { + "epoch": 2.011714999039754, + "grad_norm": 9.77780818939209, + "learning_rate": 3.732380668121175e-06, + "loss": 0.1872, + "num_input_tokens_seen": 98898560, + "step": 31425 + }, + { + "epoch": 2.012035080980731, + "grad_norm": 51.7783088684082, + "learning_rate": 3.7318945783296002e-06, + "loss": 0.2457, + "num_input_tokens_seen": 98914112, + "step": 31430 + }, + { + "epoch": 2.012355162921708, + "grad_norm": 31.83150291442871, + "learning_rate": 3.731408427023626e-06, + "loss": 0.1851, + "num_input_tokens_seen": 98930752, + "step": 31435 + }, + { + "epoch": 2.012675244862685, + "grad_norm": 9.19583511352539, + "learning_rate": 3.730922214227529e-06, + "loss": 0.1586, + "num_input_tokens_seen": 98945920, + "step": 31440 + }, + { + "epoch": 2.0129953268036616, + "grad_norm": 5.062085151672363, + "learning_rate": 3.730435939965589e-06, + "loss": 0.1758, + "num_input_tokens_seen": 98962560, + "step": 31445 + }, + { + "epoch": 2.0133154087446385, + "grad_norm": 5.372337818145752, + "learning_rate": 3.729949604262087e-06, + "loss": 0.1546, + "num_input_tokens_seen": 98978560, + "step": 31450 + }, + { + "epoch": 2.0136354906856155, + "grad_norm": 7.389028072357178, + "learning_rate": 3.7294632071413076e-06, + "loss": 0.1061, + "num_input_tokens_seen": 98993408, + "step": 31455 + }, + { + "epoch": 2.0139555726265925, + "grad_norm": 5.481876850128174, + "learning_rate": 3.7289767486275406e-06, + "loss": 0.123, + "num_input_tokens_seen": 99010112, + "step": 31460 + }, + { + "epoch": 2.0142756545675695, + "grad_norm": 95.7530517578125, + "learning_rate": 3.7284902287450765e-06, + "loss": 0.2356, + "num_input_tokens_seen": 99026880, + "step": 31465 + }, + { + "epoch": 2.014595736508546, + "grad_norm": 12.938974380493164, + "learning_rate": 3.7280036475182083e-06, + "loss": 0.1587, + "num_input_tokens_seen": 99043328, + "step": 31470 + }, + { + "epoch": 2.014915818449523, + "grad_norm": 9.863133430480957, + "learning_rate": 3.7275170049712352e-06, + "loss": 0.2923, + "num_input_tokens_seen": 99060544, + "step": 31475 + }, + { + "epoch": 2.0152359003905, + "grad_norm": 78.82652282714844, + "learning_rate": 3.7270303011284557e-06, + "loss": 0.2537, + "num_input_tokens_seen": 99075136, + "step": 31480 + }, + { + "epoch": 2.015555982331477, + "grad_norm": 54.07355499267578, + "learning_rate": 3.7265435360141757e-06, + "loss": 0.2203, + "num_input_tokens_seen": 99091200, + "step": 31485 + }, + { + "epoch": 2.0158760642724536, + "grad_norm": 20.5142822265625, + "learning_rate": 3.7260567096527e-06, + "loss": 0.0922, + "num_input_tokens_seen": 99106880, + "step": 31490 + }, + { + "epoch": 2.0161961462134306, + "grad_norm": 9.629528999328613, + "learning_rate": 3.7255698220683386e-06, + "loss": 0.2122, + "num_input_tokens_seen": 99122624, + "step": 31495 + }, + { + "epoch": 2.0165162281544076, + "grad_norm": 26.5911808013916, + "learning_rate": 3.7250828732854037e-06, + "loss": 0.2027, + "num_input_tokens_seen": 99137088, + "step": 31500 + }, + { + "epoch": 2.0168363100953846, + "grad_norm": 21.393993377685547, + "learning_rate": 3.7245958633282107e-06, + "loss": 0.2087, + "num_input_tokens_seen": 99152448, + "step": 31505 + }, + { + "epoch": 2.017156392036361, + "grad_norm": 6.850471496582031, + "learning_rate": 3.7241087922210796e-06, + "loss": 0.2042, + "num_input_tokens_seen": 99168128, + "step": 31510 + }, + { + "epoch": 2.017476473977338, + "grad_norm": 8.637240409851074, + "learning_rate": 3.7236216599883317e-06, + "loss": 0.218, + "num_input_tokens_seen": 99183808, + "step": 31515 + }, + { + "epoch": 2.017796555918315, + "grad_norm": 38.11037826538086, + "learning_rate": 3.7231344666542917e-06, + "loss": 0.1838, + "num_input_tokens_seen": 99198528, + "step": 31520 + }, + { + "epoch": 2.018116637859292, + "grad_norm": 9.2385892868042, + "learning_rate": 3.7226472122432877e-06, + "loss": 0.2434, + "num_input_tokens_seen": 99212864, + "step": 31525 + }, + { + "epoch": 2.018436719800269, + "grad_norm": 52.43635177612305, + "learning_rate": 3.72215989677965e-06, + "loss": 0.209, + "num_input_tokens_seen": 99228224, + "step": 31530 + }, + { + "epoch": 2.0187568017412456, + "grad_norm": 6.482731819152832, + "learning_rate": 3.721672520287713e-06, + "loss": 0.193, + "num_input_tokens_seen": 99244736, + "step": 31535 + }, + { + "epoch": 2.0190768836822226, + "grad_norm": 81.34902954101562, + "learning_rate": 3.721185082791814e-06, + "loss": 0.264, + "num_input_tokens_seen": 99260224, + "step": 31540 + }, + { + "epoch": 2.0193969656231996, + "grad_norm": 8.693144798278809, + "learning_rate": 3.720697584316293e-06, + "loss": 0.1281, + "num_input_tokens_seen": 99275968, + "step": 31545 + }, + { + "epoch": 2.0197170475641766, + "grad_norm": 12.498528480529785, + "learning_rate": 3.7202100248854932e-06, + "loss": 0.2161, + "num_input_tokens_seen": 99292032, + "step": 31550 + }, + { + "epoch": 2.020037129505153, + "grad_norm": 17.701993942260742, + "learning_rate": 3.719722404523761e-06, + "loss": 0.1193, + "num_input_tokens_seen": 99308288, + "step": 31555 + }, + { + "epoch": 2.02035721144613, + "grad_norm": 21.759164810180664, + "learning_rate": 3.719234723255444e-06, + "loss": 0.1895, + "num_input_tokens_seen": 99324480, + "step": 31560 + }, + { + "epoch": 2.020677293387107, + "grad_norm": 38.65891647338867, + "learning_rate": 3.718746981104896e-06, + "loss": 0.1933, + "num_input_tokens_seen": 99341696, + "step": 31565 + }, + { + "epoch": 2.020997375328084, + "grad_norm": 13.26035213470459, + "learning_rate": 3.7182591780964715e-06, + "loss": 0.1834, + "num_input_tokens_seen": 99356992, + "step": 31570 + }, + { + "epoch": 2.021317457269061, + "grad_norm": 9.50526237487793, + "learning_rate": 3.7177713142545306e-06, + "loss": 0.1159, + "num_input_tokens_seen": 99373248, + "step": 31575 + }, + { + "epoch": 2.0216375392100376, + "grad_norm": 6.374725818634033, + "learning_rate": 3.7172833896034332e-06, + "loss": 0.1897, + "num_input_tokens_seen": 99388160, + "step": 31580 + }, + { + "epoch": 2.0219576211510146, + "grad_norm": 53.19056701660156, + "learning_rate": 3.7167954041675435e-06, + "loss": 0.1918, + "num_input_tokens_seen": 99404800, + "step": 31585 + }, + { + "epoch": 2.0222777030919916, + "grad_norm": 6.648632526397705, + "learning_rate": 3.7163073579712294e-06, + "loss": 0.2138, + "num_input_tokens_seen": 99420736, + "step": 31590 + }, + { + "epoch": 2.0225977850329686, + "grad_norm": 10.460993766784668, + "learning_rate": 3.715819251038861e-06, + "loss": 0.247, + "num_input_tokens_seen": 99436288, + "step": 31595 + }, + { + "epoch": 2.022917866973945, + "grad_norm": 20.93423080444336, + "learning_rate": 3.715331083394813e-06, + "loss": 0.229, + "num_input_tokens_seen": 99451968, + "step": 31600 + }, + { + "epoch": 2.023237948914922, + "grad_norm": 12.920645713806152, + "learning_rate": 3.7148428550634607e-06, + "loss": 0.219, + "num_input_tokens_seen": 99467648, + "step": 31605 + }, + { + "epoch": 2.023558030855899, + "grad_norm": 15.648971557617188, + "learning_rate": 3.7143545660691845e-06, + "loss": 0.174, + "num_input_tokens_seen": 99482624, + "step": 31610 + }, + { + "epoch": 2.023878112796876, + "grad_norm": 7.402578830718994, + "learning_rate": 3.713866216436366e-06, + "loss": 0.316, + "num_input_tokens_seen": 99498240, + "step": 31615 + }, + { + "epoch": 2.0241981947378527, + "grad_norm": 7.495685577392578, + "learning_rate": 3.7133778061893915e-06, + "loss": 0.2307, + "num_input_tokens_seen": 99513536, + "step": 31620 + }, + { + "epoch": 2.0245182766788297, + "grad_norm": 23.52496337890625, + "learning_rate": 3.7128893353526495e-06, + "loss": 0.193, + "num_input_tokens_seen": 99530816, + "step": 31625 + }, + { + "epoch": 2.0248383586198067, + "grad_norm": 75.79849243164062, + "learning_rate": 3.712400803950532e-06, + "loss": 0.224, + "num_input_tokens_seen": 99546688, + "step": 31630 + }, + { + "epoch": 2.0251584405607836, + "grad_norm": 79.76666259765625, + "learning_rate": 3.7119122120074334e-06, + "loss": 0.1821, + "num_input_tokens_seen": 99563072, + "step": 31635 + }, + { + "epoch": 2.0254785225017606, + "grad_norm": 19.802461624145508, + "learning_rate": 3.7114235595477522e-06, + "loss": 0.172, + "num_input_tokens_seen": 99578432, + "step": 31640 + }, + { + "epoch": 2.025798604442737, + "grad_norm": 39.66547393798828, + "learning_rate": 3.7109348465958884e-06, + "loss": 0.2526, + "num_input_tokens_seen": 99594048, + "step": 31645 + }, + { + "epoch": 2.026118686383714, + "grad_norm": 10.36485481262207, + "learning_rate": 3.710446073176245e-06, + "loss": 0.1652, + "num_input_tokens_seen": 99610496, + "step": 31650 + }, + { + "epoch": 2.026438768324691, + "grad_norm": 63.92506790161133, + "learning_rate": 3.7099572393132303e-06, + "loss": 0.2609, + "num_input_tokens_seen": 99626432, + "step": 31655 + }, + { + "epoch": 2.026758850265668, + "grad_norm": 4.559746265411377, + "learning_rate": 3.709468345031254e-06, + "loss": 0.1763, + "num_input_tokens_seen": 99641088, + "step": 31660 + }, + { + "epoch": 2.0270789322066447, + "grad_norm": 12.730093002319336, + "learning_rate": 3.7089793903547276e-06, + "loss": 0.2441, + "num_input_tokens_seen": 99656448, + "step": 31665 + }, + { + "epoch": 2.0273990141476217, + "grad_norm": 8.44605827331543, + "learning_rate": 3.708490375308068e-06, + "loss": 0.2303, + "num_input_tokens_seen": 99671424, + "step": 31670 + }, + { + "epoch": 2.0277190960885987, + "grad_norm": 13.058996200561523, + "learning_rate": 3.7080012999156943e-06, + "loss": 0.2227, + "num_input_tokens_seen": 99685952, + "step": 31675 + }, + { + "epoch": 2.0280391780295757, + "grad_norm": 6.022151947021484, + "learning_rate": 3.7075121642020273e-06, + "loss": 0.1612, + "num_input_tokens_seen": 99701760, + "step": 31680 + }, + { + "epoch": 2.0283592599705527, + "grad_norm": 8.362464904785156, + "learning_rate": 3.7070229681914927e-06, + "loss": 0.1307, + "num_input_tokens_seen": 99717056, + "step": 31685 + }, + { + "epoch": 2.028679341911529, + "grad_norm": 7.398426055908203, + "learning_rate": 3.7065337119085182e-06, + "loss": 0.2032, + "num_input_tokens_seen": 99733184, + "step": 31690 + }, + { + "epoch": 2.028999423852506, + "grad_norm": 138.2696075439453, + "learning_rate": 3.706044395377535e-06, + "loss": 0.2394, + "num_input_tokens_seen": 99750464, + "step": 31695 + }, + { + "epoch": 2.029319505793483, + "grad_norm": 9.964588165283203, + "learning_rate": 3.7055550186229765e-06, + "loss": 0.2861, + "num_input_tokens_seen": 99765504, + "step": 31700 + }, + { + "epoch": 2.02963958773446, + "grad_norm": 15.346993446350098, + "learning_rate": 3.7050655816692804e-06, + "loss": 0.26, + "num_input_tokens_seen": 99781376, + "step": 31705 + }, + { + "epoch": 2.0299596696754367, + "grad_norm": 12.238480567932129, + "learning_rate": 3.7045760845408853e-06, + "loss": 0.2029, + "num_input_tokens_seen": 99798528, + "step": 31710 + }, + { + "epoch": 2.0302797516164137, + "grad_norm": 6.684248447418213, + "learning_rate": 3.7040865272622352e-06, + "loss": 0.2184, + "num_input_tokens_seen": 99813952, + "step": 31715 + }, + { + "epoch": 2.0305998335573907, + "grad_norm": 8.350740432739258, + "learning_rate": 3.7035969098577764e-06, + "loss": 0.2437, + "num_input_tokens_seen": 99828480, + "step": 31720 + }, + { + "epoch": 2.0309199154983677, + "grad_norm": 12.255497932434082, + "learning_rate": 3.7031072323519567e-06, + "loss": 0.2714, + "num_input_tokens_seen": 99844608, + "step": 31725 + }, + { + "epoch": 2.0312399974393447, + "grad_norm": 9.92345142364502, + "learning_rate": 3.7026174947692284e-06, + "loss": 0.2792, + "num_input_tokens_seen": 99860608, + "step": 31730 + }, + { + "epoch": 2.0315600793803212, + "grad_norm": 31.20443344116211, + "learning_rate": 3.702127697134047e-06, + "loss": 0.2103, + "num_input_tokens_seen": 99877248, + "step": 31735 + }, + { + "epoch": 2.0318801613212982, + "grad_norm": 18.597299575805664, + "learning_rate": 3.7016378394708694e-06, + "loss": 0.2149, + "num_input_tokens_seen": 99894016, + "step": 31740 + }, + { + "epoch": 2.032200243262275, + "grad_norm": 9.636820793151855, + "learning_rate": 3.701147921804158e-06, + "loss": 0.2266, + "num_input_tokens_seen": 99909888, + "step": 31745 + }, + { + "epoch": 2.032520325203252, + "grad_norm": 35.8570671081543, + "learning_rate": 3.7006579441583756e-06, + "loss": 0.1853, + "num_input_tokens_seen": 99926272, + "step": 31750 + }, + { + "epoch": 2.0328404071442288, + "grad_norm": 16.92479133605957, + "learning_rate": 3.7001679065579893e-06, + "loss": 0.2258, + "num_input_tokens_seen": 99942464, + "step": 31755 + }, + { + "epoch": 2.0331604890852057, + "grad_norm": 18.906003952026367, + "learning_rate": 3.69967780902747e-06, + "loss": 0.2136, + "num_input_tokens_seen": 99956544, + "step": 31760 + }, + { + "epoch": 2.0334805710261827, + "grad_norm": 9.982441902160645, + "learning_rate": 3.699187651591288e-06, + "loss": 0.1381, + "num_input_tokens_seen": 99972032, + "step": 31765 + }, + { + "epoch": 2.0338006529671597, + "grad_norm": 16.048145294189453, + "learning_rate": 3.698697434273923e-06, + "loss": 0.2557, + "num_input_tokens_seen": 99987520, + "step": 31770 + }, + { + "epoch": 2.0341207349081363, + "grad_norm": 27.292301177978516, + "learning_rate": 3.6982071570998512e-06, + "loss": 0.2953, + "num_input_tokens_seen": 100003008, + "step": 31775 + }, + { + "epoch": 2.0344408168491133, + "grad_norm": 7.143673896789551, + "learning_rate": 3.6977168200935554e-06, + "loss": 0.1572, + "num_input_tokens_seen": 100019136, + "step": 31780 + }, + { + "epoch": 2.0347608987900903, + "grad_norm": 6.098036766052246, + "learning_rate": 3.6972264232795198e-06, + "loss": 0.2309, + "num_input_tokens_seen": 100034688, + "step": 31785 + }, + { + "epoch": 2.0350809807310672, + "grad_norm": 3.621169328689575, + "learning_rate": 3.6967359666822333e-06, + "loss": 0.2005, + "num_input_tokens_seen": 100049536, + "step": 31790 + }, + { + "epoch": 2.0354010626720442, + "grad_norm": 7.0114946365356445, + "learning_rate": 3.696245450326186e-06, + "loss": 0.1672, + "num_input_tokens_seen": 100064768, + "step": 31795 + }, + { + "epoch": 2.035721144613021, + "grad_norm": 7.041891574859619, + "learning_rate": 3.6957548742358706e-06, + "loss": 0.1763, + "num_input_tokens_seen": 100080448, + "step": 31800 + }, + { + "epoch": 2.0360412265539978, + "grad_norm": 7.263533592224121, + "learning_rate": 3.6952642384357867e-06, + "loss": 0.1021, + "num_input_tokens_seen": 100095296, + "step": 31805 + }, + { + "epoch": 2.0363613084949748, + "grad_norm": 19.61225128173828, + "learning_rate": 3.694773542950433e-06, + "loss": 0.1704, + "num_input_tokens_seen": 100111936, + "step": 31810 + }, + { + "epoch": 2.0366813904359518, + "grad_norm": 4.729818820953369, + "learning_rate": 3.694282787804311e-06, + "loss": 0.1592, + "num_input_tokens_seen": 100127296, + "step": 31815 + }, + { + "epoch": 2.0370014723769283, + "grad_norm": 11.538938522338867, + "learning_rate": 3.693791973021928e-06, + "loss": 0.256, + "num_input_tokens_seen": 100142464, + "step": 31820 + }, + { + "epoch": 2.0373215543179053, + "grad_norm": 9.754230499267578, + "learning_rate": 3.693301098627791e-06, + "loss": 0.2572, + "num_input_tokens_seen": 100157952, + "step": 31825 + }, + { + "epoch": 2.0376416362588823, + "grad_norm": 15.75890064239502, + "learning_rate": 3.692810164646414e-06, + "loss": 0.1787, + "num_input_tokens_seen": 100173312, + "step": 31830 + }, + { + "epoch": 2.0379617181998593, + "grad_norm": 26.702373504638672, + "learning_rate": 3.6923191711023097e-06, + "loss": 0.2623, + "num_input_tokens_seen": 100188352, + "step": 31835 + }, + { + "epoch": 2.0382818001408363, + "grad_norm": 11.657258987426758, + "learning_rate": 3.6918281180199977e-06, + "loss": 0.2514, + "num_input_tokens_seen": 100204032, + "step": 31840 + }, + { + "epoch": 2.038601882081813, + "grad_norm": 31.518159866333008, + "learning_rate": 3.6913370054239967e-06, + "loss": 0.1957, + "num_input_tokens_seen": 100219008, + "step": 31845 + }, + { + "epoch": 2.03892196402279, + "grad_norm": 5.756220817565918, + "learning_rate": 3.690845833338831e-06, + "loss": 0.2899, + "num_input_tokens_seen": 100233856, + "step": 31850 + }, + { + "epoch": 2.039242045963767, + "grad_norm": 23.58654022216797, + "learning_rate": 3.6903546017890275e-06, + "loss": 0.2437, + "num_input_tokens_seen": 100248256, + "step": 31855 + }, + { + "epoch": 2.039562127904744, + "grad_norm": 6.71699857711792, + "learning_rate": 3.6898633107991143e-06, + "loss": 0.1458, + "num_input_tokens_seen": 100263616, + "step": 31860 + }, + { + "epoch": 2.0398822098457203, + "grad_norm": 4.603450775146484, + "learning_rate": 3.689371960393627e-06, + "loss": 0.1635, + "num_input_tokens_seen": 100279744, + "step": 31865 + }, + { + "epoch": 2.0402022917866973, + "grad_norm": 7.007498264312744, + "learning_rate": 3.688880550597098e-06, + "loss": 0.1918, + "num_input_tokens_seen": 100295552, + "step": 31870 + }, + { + "epoch": 2.0405223737276743, + "grad_norm": 8.12877368927002, + "learning_rate": 3.6883890814340678e-06, + "loss": 0.2051, + "num_input_tokens_seen": 100310912, + "step": 31875 + }, + { + "epoch": 2.0408424556686513, + "grad_norm": 8.770915985107422, + "learning_rate": 3.687897552929076e-06, + "loss": 0.2058, + "num_input_tokens_seen": 100326464, + "step": 31880 + }, + { + "epoch": 2.041162537609628, + "grad_norm": 5.729369640350342, + "learning_rate": 3.687405965106669e-06, + "loss": 0.2004, + "num_input_tokens_seen": 100341568, + "step": 31885 + }, + { + "epoch": 2.041482619550605, + "grad_norm": 6.300750255584717, + "learning_rate": 3.6869143179913923e-06, + "loss": 0.1228, + "num_input_tokens_seen": 100357696, + "step": 31890 + }, + { + "epoch": 2.041802701491582, + "grad_norm": 4.117056369781494, + "learning_rate": 3.6864226116077977e-06, + "loss": 0.2019, + "num_input_tokens_seen": 100373248, + "step": 31895 + }, + { + "epoch": 2.042122783432559, + "grad_norm": 6.200119495391846, + "learning_rate": 3.685930845980438e-06, + "loss": 0.1433, + "num_input_tokens_seen": 100388672, + "step": 31900 + }, + { + "epoch": 2.042442865373536, + "grad_norm": 7.8876800537109375, + "learning_rate": 3.685439021133868e-06, + "loss": 0.2607, + "num_input_tokens_seen": 100405376, + "step": 31905 + }, + { + "epoch": 2.0427629473145124, + "grad_norm": 10.182713508605957, + "learning_rate": 3.6849471370926483e-06, + "loss": 0.248, + "num_input_tokens_seen": 100421376, + "step": 31910 + }, + { + "epoch": 2.0430830292554893, + "grad_norm": 26.95984649658203, + "learning_rate": 3.68445519388134e-06, + "loss": 0.1678, + "num_input_tokens_seen": 100436096, + "step": 31915 + }, + { + "epoch": 2.0434031111964663, + "grad_norm": 12.574654579162598, + "learning_rate": 3.68396319152451e-06, + "loss": 0.2534, + "num_input_tokens_seen": 100450752, + "step": 31920 + }, + { + "epoch": 2.0437231931374433, + "grad_norm": 14.061174392700195, + "learning_rate": 3.6834711300467254e-06, + "loss": 0.302, + "num_input_tokens_seen": 100467392, + "step": 31925 + }, + { + "epoch": 2.04404327507842, + "grad_norm": 11.90701675415039, + "learning_rate": 3.682979009472557e-06, + "loss": 0.1205, + "num_input_tokens_seen": 100483712, + "step": 31930 + }, + { + "epoch": 2.044363357019397, + "grad_norm": 10.487199783325195, + "learning_rate": 3.6824868298265782e-06, + "loss": 0.269, + "num_input_tokens_seen": 100499648, + "step": 31935 + }, + { + "epoch": 2.044683438960374, + "grad_norm": 9.061062812805176, + "learning_rate": 3.6819945911333666e-06, + "loss": 0.2446, + "num_input_tokens_seen": 100515008, + "step": 31940 + }, + { + "epoch": 2.045003520901351, + "grad_norm": 7.368435859680176, + "learning_rate": 3.681502293417502e-06, + "loss": 0.1921, + "num_input_tokens_seen": 100529664, + "step": 31945 + }, + { + "epoch": 2.045323602842328, + "grad_norm": 11.183034896850586, + "learning_rate": 3.681009936703567e-06, + "loss": 0.2747, + "num_input_tokens_seen": 100545088, + "step": 31950 + }, + { + "epoch": 2.0456436847833044, + "grad_norm": 4.403030872344971, + "learning_rate": 3.680517521016148e-06, + "loss": 0.209, + "num_input_tokens_seen": 100560576, + "step": 31955 + }, + { + "epoch": 2.0459637667242814, + "grad_norm": 16.821407318115234, + "learning_rate": 3.680025046379833e-06, + "loss": 0.2114, + "num_input_tokens_seen": 100576640, + "step": 31960 + }, + { + "epoch": 2.0462838486652584, + "grad_norm": 35.166316986083984, + "learning_rate": 3.679532512819213e-06, + "loss": 0.2232, + "num_input_tokens_seen": 100591360, + "step": 31965 + }, + { + "epoch": 2.0466039306062354, + "grad_norm": 6.6109209060668945, + "learning_rate": 3.6790399203588834e-06, + "loss": 0.1649, + "num_input_tokens_seen": 100606912, + "step": 31970 + }, + { + "epoch": 2.046924012547212, + "grad_norm": 10.415719985961914, + "learning_rate": 3.6785472690234415e-06, + "loss": 0.2078, + "num_input_tokens_seen": 100623552, + "step": 31975 + }, + { + "epoch": 2.047244094488189, + "grad_norm": 4.198312759399414, + "learning_rate": 3.6780545588374883e-06, + "loss": 0.2281, + "num_input_tokens_seen": 100638720, + "step": 31980 + }, + { + "epoch": 2.047564176429166, + "grad_norm": 33.428707122802734, + "learning_rate": 3.6775617898256267e-06, + "loss": 0.2779, + "num_input_tokens_seen": 100654592, + "step": 31985 + }, + { + "epoch": 2.047884258370143, + "grad_norm": 10.219470024108887, + "learning_rate": 3.6770689620124626e-06, + "loss": 0.2415, + "num_input_tokens_seen": 100670336, + "step": 31990 + }, + { + "epoch": 2.04820434031112, + "grad_norm": 13.0357084274292, + "learning_rate": 3.6765760754226053e-06, + "loss": 0.1341, + "num_input_tokens_seen": 100685696, + "step": 31995 + }, + { + "epoch": 2.0485244222520964, + "grad_norm": 23.49104118347168, + "learning_rate": 3.6760831300806687e-06, + "loss": 0.1803, + "num_input_tokens_seen": 100700352, + "step": 32000 + }, + { + "epoch": 2.0488445041930734, + "grad_norm": 12.037748336791992, + "learning_rate": 3.675590126011266e-06, + "loss": 0.2749, + "num_input_tokens_seen": 100716544, + "step": 32005 + }, + { + "epoch": 2.0491645861340504, + "grad_norm": 14.110736846923828, + "learning_rate": 3.6750970632390156e-06, + "loss": 0.2017, + "num_input_tokens_seen": 100732096, + "step": 32010 + }, + { + "epoch": 2.0494846680750274, + "grad_norm": 5.19609260559082, + "learning_rate": 3.6746039417885387e-06, + "loss": 0.2598, + "num_input_tokens_seen": 100747968, + "step": 32015 + }, + { + "epoch": 2.049804750016004, + "grad_norm": 4.594327449798584, + "learning_rate": 3.674110761684459e-06, + "loss": 0.1603, + "num_input_tokens_seen": 100763648, + "step": 32020 + }, + { + "epoch": 2.050124831956981, + "grad_norm": 61.543426513671875, + "learning_rate": 3.6736175229514036e-06, + "loss": 0.2008, + "num_input_tokens_seen": 100783488, + "step": 32025 + }, + { + "epoch": 2.050444913897958, + "grad_norm": 26.84408950805664, + "learning_rate": 3.6731242256140033e-06, + "loss": 0.2693, + "num_input_tokens_seen": 100799552, + "step": 32030 + }, + { + "epoch": 2.050764995838935, + "grad_norm": 39.33397674560547, + "learning_rate": 3.672630869696888e-06, + "loss": 0.273, + "num_input_tokens_seen": 100816192, + "step": 32035 + }, + { + "epoch": 2.0510850777799114, + "grad_norm": 29.31500244140625, + "learning_rate": 3.6721374552246962e-06, + "loss": 0.1789, + "num_input_tokens_seen": 100831680, + "step": 32040 + }, + { + "epoch": 2.0514051597208884, + "grad_norm": 26.08116912841797, + "learning_rate": 3.6716439822220656e-06, + "loss": 0.2565, + "num_input_tokens_seen": 100846400, + "step": 32045 + }, + { + "epoch": 2.0517252416618654, + "grad_norm": 20.268341064453125, + "learning_rate": 3.671150450713637e-06, + "loss": 0.1367, + "num_input_tokens_seen": 100862400, + "step": 32050 + }, + { + "epoch": 2.0520453236028424, + "grad_norm": 9.696059226989746, + "learning_rate": 3.670656860724055e-06, + "loss": 0.2691, + "num_input_tokens_seen": 100877312, + "step": 32055 + }, + { + "epoch": 2.0523654055438194, + "grad_norm": 176.3709259033203, + "learning_rate": 3.6701632122779674e-06, + "loss": 0.1851, + "num_input_tokens_seen": 100892416, + "step": 32060 + }, + { + "epoch": 2.052685487484796, + "grad_norm": 4.976164817810059, + "learning_rate": 3.669669505400024e-06, + "loss": 0.1442, + "num_input_tokens_seen": 100908032, + "step": 32065 + }, + { + "epoch": 2.053005569425773, + "grad_norm": 13.827176094055176, + "learning_rate": 3.6691757401148785e-06, + "loss": 0.2362, + "num_input_tokens_seen": 100924352, + "step": 32070 + }, + { + "epoch": 2.05332565136675, + "grad_norm": 4.688620567321777, + "learning_rate": 3.668681916447186e-06, + "loss": 0.1598, + "num_input_tokens_seen": 100940032, + "step": 32075 + }, + { + "epoch": 2.053645733307727, + "grad_norm": 82.8245620727539, + "learning_rate": 3.668188034421606e-06, + "loss": 0.2386, + "num_input_tokens_seen": 100957376, + "step": 32080 + }, + { + "epoch": 2.0539658152487035, + "grad_norm": 8.894316673278809, + "learning_rate": 3.6676940940628007e-06, + "loss": 0.2209, + "num_input_tokens_seen": 100972544, + "step": 32085 + }, + { + "epoch": 2.0542858971896805, + "grad_norm": 7.109041213989258, + "learning_rate": 3.6672000953954346e-06, + "loss": 0.1616, + "num_input_tokens_seen": 100988224, + "step": 32090 + }, + { + "epoch": 2.0546059791306575, + "grad_norm": 21.683687210083008, + "learning_rate": 3.666706038444175e-06, + "loss": 0.1884, + "num_input_tokens_seen": 101003968, + "step": 32095 + }, + { + "epoch": 2.0549260610716344, + "grad_norm": 42.486534118652344, + "learning_rate": 3.6662119232336933e-06, + "loss": 0.3679, + "num_input_tokens_seen": 101018368, + "step": 32100 + }, + { + "epoch": 2.0552461430126114, + "grad_norm": 10.531460762023926, + "learning_rate": 3.6657177497886633e-06, + "loss": 0.1868, + "num_input_tokens_seen": 101032896, + "step": 32105 + }, + { + "epoch": 2.055566224953588, + "grad_norm": 9.307168960571289, + "learning_rate": 3.6652235181337605e-06, + "loss": 0.1823, + "num_input_tokens_seen": 101049792, + "step": 32110 + }, + { + "epoch": 2.055886306894565, + "grad_norm": 12.77942180633545, + "learning_rate": 3.664729228293664e-06, + "loss": 0.1792, + "num_input_tokens_seen": 101065600, + "step": 32115 + }, + { + "epoch": 2.056206388835542, + "grad_norm": 31.994068145751953, + "learning_rate": 3.6642348802930576e-06, + "loss": 0.1921, + "num_input_tokens_seen": 101081088, + "step": 32120 + }, + { + "epoch": 2.056526470776519, + "grad_norm": 21.702102661132812, + "learning_rate": 3.663740474156625e-06, + "loss": 0.209, + "num_input_tokens_seen": 101096704, + "step": 32125 + }, + { + "epoch": 2.0568465527174955, + "grad_norm": 31.458465576171875, + "learning_rate": 3.6632460099090555e-06, + "loss": 0.1708, + "num_input_tokens_seen": 101111936, + "step": 32130 + }, + { + "epoch": 2.0571666346584725, + "grad_norm": 35.98301315307617, + "learning_rate": 3.6627514875750386e-06, + "loss": 0.3031, + "num_input_tokens_seen": 101127744, + "step": 32135 + }, + { + "epoch": 2.0574867165994495, + "grad_norm": 4.394067287445068, + "learning_rate": 3.662256907179269e-06, + "loss": 0.1409, + "num_input_tokens_seen": 101143872, + "step": 32140 + }, + { + "epoch": 2.0578067985404265, + "grad_norm": 8.302879333496094, + "learning_rate": 3.6617622687464446e-06, + "loss": 0.2047, + "num_input_tokens_seen": 101160000, + "step": 32145 + }, + { + "epoch": 2.058126880481403, + "grad_norm": 20.850507736206055, + "learning_rate": 3.6612675723012626e-06, + "loss": 0.3062, + "num_input_tokens_seen": 101176256, + "step": 32150 + }, + { + "epoch": 2.05844696242238, + "grad_norm": 14.78418254852295, + "learning_rate": 3.660772817868427e-06, + "loss": 0.1899, + "num_input_tokens_seen": 101192192, + "step": 32155 + }, + { + "epoch": 2.058767044363357, + "grad_norm": 28.942903518676758, + "learning_rate": 3.660278005472643e-06, + "loss": 0.173, + "num_input_tokens_seen": 101208512, + "step": 32160 + }, + { + "epoch": 2.059087126304334, + "grad_norm": 11.777610778808594, + "learning_rate": 3.6597831351386203e-06, + "loss": 0.1725, + "num_input_tokens_seen": 101224384, + "step": 32165 + }, + { + "epoch": 2.059407208245311, + "grad_norm": 8.718653678894043, + "learning_rate": 3.6592882068910673e-06, + "loss": 0.1945, + "num_input_tokens_seen": 101239360, + "step": 32170 + }, + { + "epoch": 2.0597272901862875, + "grad_norm": 51.50399398803711, + "learning_rate": 3.6587932207547004e-06, + "loss": 0.1768, + "num_input_tokens_seen": 101254528, + "step": 32175 + }, + { + "epoch": 2.0600473721272645, + "grad_norm": 70.89078521728516, + "learning_rate": 3.658298176754237e-06, + "loss": 0.2859, + "num_input_tokens_seen": 101270144, + "step": 32180 + }, + { + "epoch": 2.0603674540682415, + "grad_norm": 78.76889038085938, + "learning_rate": 3.657803074914395e-06, + "loss": 0.2116, + "num_input_tokens_seen": 101284992, + "step": 32185 + }, + { + "epoch": 2.0606875360092185, + "grad_norm": 7.242041110992432, + "learning_rate": 3.6573079152598982e-06, + "loss": 0.2396, + "num_input_tokens_seen": 101302784, + "step": 32190 + }, + { + "epoch": 2.061007617950195, + "grad_norm": 19.602622985839844, + "learning_rate": 3.656812697815472e-06, + "loss": 0.2634, + "num_input_tokens_seen": 101319552, + "step": 32195 + }, + { + "epoch": 2.061327699891172, + "grad_norm": 13.381507873535156, + "learning_rate": 3.656317422605846e-06, + "loss": 0.1979, + "num_input_tokens_seen": 101334272, + "step": 32200 + }, + { + "epoch": 2.061647781832149, + "grad_norm": 16.052385330200195, + "learning_rate": 3.655822089655751e-06, + "loss": 0.1825, + "num_input_tokens_seen": 101350208, + "step": 32205 + }, + { + "epoch": 2.061967863773126, + "grad_norm": 2.0148000717163086, + "learning_rate": 3.6553266989899207e-06, + "loss": 0.1583, + "num_input_tokens_seen": 101364928, + "step": 32210 + }, + { + "epoch": 2.062287945714103, + "grad_norm": 5.830702304840088, + "learning_rate": 3.6548312506330934e-06, + "loss": 0.1841, + "num_input_tokens_seen": 101382208, + "step": 32215 + }, + { + "epoch": 2.0626080276550796, + "grad_norm": 48.85504913330078, + "learning_rate": 3.6543357446100085e-06, + "loss": 0.2441, + "num_input_tokens_seen": 101398400, + "step": 32220 + }, + { + "epoch": 2.0629281095960565, + "grad_norm": 14.2472505569458, + "learning_rate": 3.653840180945409e-06, + "loss": 0.2626, + "num_input_tokens_seen": 101414720, + "step": 32225 + }, + { + "epoch": 2.0632481915370335, + "grad_norm": 11.085589408874512, + "learning_rate": 3.6533445596640414e-06, + "loss": 0.1868, + "num_input_tokens_seen": 101429440, + "step": 32230 + }, + { + "epoch": 2.0635682734780105, + "grad_norm": 14.76227855682373, + "learning_rate": 3.6528488807906543e-06, + "loss": 0.2584, + "num_input_tokens_seen": 101444608, + "step": 32235 + }, + { + "epoch": 2.063888355418987, + "grad_norm": 28.67974090576172, + "learning_rate": 3.652353144349999e-06, + "loss": 0.2329, + "num_input_tokens_seen": 101460800, + "step": 32240 + }, + { + "epoch": 2.064208437359964, + "grad_norm": 54.29447937011719, + "learning_rate": 3.65185735036683e-06, + "loss": 0.2626, + "num_input_tokens_seen": 101477888, + "step": 32245 + }, + { + "epoch": 2.064528519300941, + "grad_norm": 12.094378471374512, + "learning_rate": 3.651361498865904e-06, + "loss": 0.1905, + "num_input_tokens_seen": 101493632, + "step": 32250 + }, + { + "epoch": 2.064848601241918, + "grad_norm": 12.054615020751953, + "learning_rate": 3.6508655898719824e-06, + "loss": 0.1524, + "num_input_tokens_seen": 101509760, + "step": 32255 + }, + { + "epoch": 2.065168683182895, + "grad_norm": 15.24704647064209, + "learning_rate": 3.650369623409828e-06, + "loss": 0.2483, + "num_input_tokens_seen": 101527104, + "step": 32260 + }, + { + "epoch": 2.0654887651238716, + "grad_norm": 4.543539524078369, + "learning_rate": 3.6498735995042066e-06, + "loss": 0.1585, + "num_input_tokens_seen": 101543040, + "step": 32265 + }, + { + "epoch": 2.0658088470648486, + "grad_norm": 19.653066635131836, + "learning_rate": 3.6493775181798866e-06, + "loss": 0.2534, + "num_input_tokens_seen": 101561024, + "step": 32270 + }, + { + "epoch": 2.0661289290058256, + "grad_norm": 10.793724060058594, + "learning_rate": 3.6488813794616412e-06, + "loss": 0.2004, + "num_input_tokens_seen": 101576448, + "step": 32275 + }, + { + "epoch": 2.0664490109468026, + "grad_norm": 71.61993408203125, + "learning_rate": 3.6483851833742435e-06, + "loss": 0.2001, + "num_input_tokens_seen": 101592000, + "step": 32280 + }, + { + "epoch": 2.066769092887779, + "grad_norm": 23.244884490966797, + "learning_rate": 3.6478889299424713e-06, + "loss": 0.2706, + "num_input_tokens_seen": 101608192, + "step": 32285 + }, + { + "epoch": 2.067089174828756, + "grad_norm": 13.472075462341309, + "learning_rate": 3.647392619191106e-06, + "loss": 0.2167, + "num_input_tokens_seen": 101624320, + "step": 32290 + }, + { + "epoch": 2.067409256769733, + "grad_norm": 22.586896896362305, + "learning_rate": 3.6468962511449293e-06, + "loss": 0.2007, + "num_input_tokens_seen": 101640128, + "step": 32295 + }, + { + "epoch": 2.06772933871071, + "grad_norm": 8.193096160888672, + "learning_rate": 3.646399825828727e-06, + "loss": 0.2372, + "num_input_tokens_seen": 101655808, + "step": 32300 + }, + { + "epoch": 2.0680494206516866, + "grad_norm": 14.021180152893066, + "learning_rate": 3.6459033432672898e-06, + "loss": 0.2353, + "num_input_tokens_seen": 101670080, + "step": 32305 + }, + { + "epoch": 2.0683695025926636, + "grad_norm": 53.65520477294922, + "learning_rate": 3.645406803485407e-06, + "loss": 0.2576, + "num_input_tokens_seen": 101685184, + "step": 32310 + }, + { + "epoch": 2.0686895845336406, + "grad_norm": 16.39846420288086, + "learning_rate": 3.6449102065078767e-06, + "loss": 0.2226, + "num_input_tokens_seen": 101701184, + "step": 32315 + }, + { + "epoch": 2.0690096664746176, + "grad_norm": 23.795228958129883, + "learning_rate": 3.6444135523594933e-06, + "loss": 0.3052, + "num_input_tokens_seen": 101718592, + "step": 32320 + }, + { + "epoch": 2.0693297484155946, + "grad_norm": 36.62592697143555, + "learning_rate": 3.6439168410650576e-06, + "loss": 0.2081, + "num_input_tokens_seen": 101733632, + "step": 32325 + }, + { + "epoch": 2.069649830356571, + "grad_norm": 10.301275253295898, + "learning_rate": 3.643420072649374e-06, + "loss": 0.2453, + "num_input_tokens_seen": 101750400, + "step": 32330 + }, + { + "epoch": 2.069969912297548, + "grad_norm": 34.32440948486328, + "learning_rate": 3.6429232471372478e-06, + "loss": 0.1596, + "num_input_tokens_seen": 101765888, + "step": 32335 + }, + { + "epoch": 2.070289994238525, + "grad_norm": 19.838294982910156, + "learning_rate": 3.6424263645534873e-06, + "loss": 0.1673, + "num_input_tokens_seen": 101781376, + "step": 32340 + }, + { + "epoch": 2.070610076179502, + "grad_norm": 8.788199424743652, + "learning_rate": 3.6419294249229053e-06, + "loss": 0.1531, + "num_input_tokens_seen": 101797760, + "step": 32345 + }, + { + "epoch": 2.0709301581204786, + "grad_norm": 5.4415788650512695, + "learning_rate": 3.641432428270316e-06, + "loss": 0.1952, + "num_input_tokens_seen": 101813760, + "step": 32350 + }, + { + "epoch": 2.0712502400614556, + "grad_norm": 30.643959045410156, + "learning_rate": 3.6409353746205367e-06, + "loss": 0.3066, + "num_input_tokens_seen": 101830144, + "step": 32355 + }, + { + "epoch": 2.0715703220024326, + "grad_norm": 9.071423530578613, + "learning_rate": 3.6404382639983883e-06, + "loss": 0.4124, + "num_input_tokens_seen": 101846848, + "step": 32360 + }, + { + "epoch": 2.0718904039434096, + "grad_norm": 7.45940637588501, + "learning_rate": 3.639941096428692e-06, + "loss": 0.2141, + "num_input_tokens_seen": 101861888, + "step": 32365 + }, + { + "epoch": 2.0722104858843866, + "grad_norm": 86.32014465332031, + "learning_rate": 3.6394438719362758e-06, + "loss": 0.2322, + "num_input_tokens_seen": 101877696, + "step": 32370 + }, + { + "epoch": 2.072530567825363, + "grad_norm": 8.984808921813965, + "learning_rate": 3.638946590545968e-06, + "loss": 0.1493, + "num_input_tokens_seen": 101892544, + "step": 32375 + }, + { + "epoch": 2.07285064976634, + "grad_norm": 9.638562202453613, + "learning_rate": 3.6384492522826e-06, + "loss": 0.2721, + "num_input_tokens_seen": 101908416, + "step": 32380 + }, + { + "epoch": 2.073170731707317, + "grad_norm": 5.949554443359375, + "learning_rate": 3.6379518571710053e-06, + "loss": 0.2413, + "num_input_tokens_seen": 101923712, + "step": 32385 + }, + { + "epoch": 2.073490813648294, + "grad_norm": 6.405786991119385, + "learning_rate": 3.637454405236023e-06, + "loss": 0.1763, + "num_input_tokens_seen": 101939968, + "step": 32390 + }, + { + "epoch": 2.0738108955892707, + "grad_norm": 9.54163932800293, + "learning_rate": 3.636956896502493e-06, + "loss": 0.1755, + "num_input_tokens_seen": 101954816, + "step": 32395 + }, + { + "epoch": 2.0741309775302477, + "grad_norm": 10.488677024841309, + "learning_rate": 3.636459330995257e-06, + "loss": 0.2567, + "num_input_tokens_seen": 101970048, + "step": 32400 + }, + { + "epoch": 2.0744510594712247, + "grad_norm": 83.64234161376953, + "learning_rate": 3.635961708739162e-06, + "loss": 0.2265, + "num_input_tokens_seen": 101985344, + "step": 32405 + }, + { + "epoch": 2.0747711414122016, + "grad_norm": 6.500986576080322, + "learning_rate": 3.635464029759056e-06, + "loss": 0.2665, + "num_input_tokens_seen": 102001088, + "step": 32410 + }, + { + "epoch": 2.075091223353178, + "grad_norm": 12.042566299438477, + "learning_rate": 3.634966294079791e-06, + "loss": 0.3026, + "num_input_tokens_seen": 102018048, + "step": 32415 + }, + { + "epoch": 2.075411305294155, + "grad_norm": 63.53236389160156, + "learning_rate": 3.6344685017262216e-06, + "loss": 0.2463, + "num_input_tokens_seen": 102033344, + "step": 32420 + }, + { + "epoch": 2.075731387235132, + "grad_norm": 10.246853828430176, + "learning_rate": 3.633970652723203e-06, + "loss": 0.249, + "num_input_tokens_seen": 102048448, + "step": 32425 + }, + { + "epoch": 2.076051469176109, + "grad_norm": 12.740586280822754, + "learning_rate": 3.6334727470955978e-06, + "loss": 0.2439, + "num_input_tokens_seen": 102065792, + "step": 32430 + }, + { + "epoch": 2.076371551117086, + "grad_norm": 42.63481140136719, + "learning_rate": 3.6329747848682674e-06, + "loss": 0.1378, + "num_input_tokens_seen": 102080704, + "step": 32435 + }, + { + "epoch": 2.0766916330580627, + "grad_norm": 14.172962188720703, + "learning_rate": 3.632476766066078e-06, + "loss": 0.3276, + "num_input_tokens_seen": 102096512, + "step": 32440 + }, + { + "epoch": 2.0770117149990397, + "grad_norm": 6.26027774810791, + "learning_rate": 3.6319786907138973e-06, + "loss": 0.2661, + "num_input_tokens_seen": 102112320, + "step": 32445 + }, + { + "epoch": 2.0773317969400167, + "grad_norm": 9.674921035766602, + "learning_rate": 3.631480558836597e-06, + "loss": 0.1498, + "num_input_tokens_seen": 102127936, + "step": 32450 + }, + { + "epoch": 2.0776518788809937, + "grad_norm": 19.76288604736328, + "learning_rate": 3.6309823704590506e-06, + "loss": 0.2003, + "num_input_tokens_seen": 102143552, + "step": 32455 + }, + { + "epoch": 2.07797196082197, + "grad_norm": 19.34661865234375, + "learning_rate": 3.6304841256061373e-06, + "loss": 0.2259, + "num_input_tokens_seen": 102159104, + "step": 32460 + }, + { + "epoch": 2.078292042762947, + "grad_norm": 6.792261123657227, + "learning_rate": 3.629985824302734e-06, + "loss": 0.2576, + "num_input_tokens_seen": 102174848, + "step": 32465 + }, + { + "epoch": 2.078612124703924, + "grad_norm": 6.931921005249023, + "learning_rate": 3.6294874665737257e-06, + "loss": 0.1689, + "num_input_tokens_seen": 102191360, + "step": 32470 + }, + { + "epoch": 2.078932206644901, + "grad_norm": 9.916192054748535, + "learning_rate": 3.6289890524439956e-06, + "loss": 0.216, + "num_input_tokens_seen": 102206016, + "step": 32475 + }, + { + "epoch": 2.079252288585878, + "grad_norm": 26.11004066467285, + "learning_rate": 3.628490581938433e-06, + "loss": 0.2358, + "num_input_tokens_seen": 102221312, + "step": 32480 + }, + { + "epoch": 2.0795723705268547, + "grad_norm": 5.167192459106445, + "learning_rate": 3.627992055081929e-06, + "loss": 0.1419, + "num_input_tokens_seen": 102236928, + "step": 32485 + }, + { + "epoch": 2.0798924524678317, + "grad_norm": 13.754141807556152, + "learning_rate": 3.6274934718993776e-06, + "loss": 0.2382, + "num_input_tokens_seen": 102252160, + "step": 32490 + }, + { + "epoch": 2.0802125344088087, + "grad_norm": 20.040063858032227, + "learning_rate": 3.626994832415675e-06, + "loss": 0.2546, + "num_input_tokens_seen": 102267328, + "step": 32495 + }, + { + "epoch": 2.0805326163497857, + "grad_norm": 71.4978256225586, + "learning_rate": 3.62649613665572e-06, + "loss": 0.2345, + "num_input_tokens_seen": 102283136, + "step": 32500 + }, + { + "epoch": 2.0808526982907622, + "grad_norm": 30.77115821838379, + "learning_rate": 3.6259973846444162e-06, + "loss": 0.2405, + "num_input_tokens_seen": 102298560, + "step": 32505 + }, + { + "epoch": 2.0811727802317392, + "grad_norm": 5.461601257324219, + "learning_rate": 3.6254985764066688e-06, + "loss": 0.171, + "num_input_tokens_seen": 102313728, + "step": 32510 + }, + { + "epoch": 2.0814928621727162, + "grad_norm": 12.805008888244629, + "learning_rate": 3.6249997119673835e-06, + "loss": 0.2459, + "num_input_tokens_seen": 102329088, + "step": 32515 + }, + { + "epoch": 2.081812944113693, + "grad_norm": 7.694183349609375, + "learning_rate": 3.6245007913514736e-06, + "loss": 0.2137, + "num_input_tokens_seen": 102344768, + "step": 32520 + }, + { + "epoch": 2.08213302605467, + "grad_norm": 39.7358283996582, + "learning_rate": 3.6240018145838513e-06, + "loss": 0.3477, + "num_input_tokens_seen": 102360256, + "step": 32525 + }, + { + "epoch": 2.0824531079956468, + "grad_norm": 6.778387069702148, + "learning_rate": 3.623502781689433e-06, + "loss": 0.1191, + "num_input_tokens_seen": 102376640, + "step": 32530 + }, + { + "epoch": 2.0827731899366237, + "grad_norm": 6.577770709991455, + "learning_rate": 3.623003692693138e-06, + "loss": 0.1723, + "num_input_tokens_seen": 102391744, + "step": 32535 + }, + { + "epoch": 2.0830932718776007, + "grad_norm": 15.734174728393555, + "learning_rate": 3.6225045476198876e-06, + "loss": 0.2269, + "num_input_tokens_seen": 102407936, + "step": 32540 + }, + { + "epoch": 2.0834133538185777, + "grad_norm": 8.33460521697998, + "learning_rate": 3.6220053464946074e-06, + "loss": 0.1461, + "num_input_tokens_seen": 102423744, + "step": 32545 + }, + { + "epoch": 2.0837334357595543, + "grad_norm": 21.1351375579834, + "learning_rate": 3.621506089342225e-06, + "loss": 0.2772, + "num_input_tokens_seen": 102440384, + "step": 32550 + }, + { + "epoch": 2.0840535177005313, + "grad_norm": 26.025842666625977, + "learning_rate": 3.62100677618767e-06, + "loss": 0.2889, + "num_input_tokens_seen": 102454784, + "step": 32555 + }, + { + "epoch": 2.0843735996415083, + "grad_norm": 6.087081432342529, + "learning_rate": 3.6205074070558747e-06, + "loss": 0.1941, + "num_input_tokens_seen": 102471232, + "step": 32560 + }, + { + "epoch": 2.0846936815824852, + "grad_norm": 6.044461727142334, + "learning_rate": 3.620007981971777e-06, + "loss": 0.1543, + "num_input_tokens_seen": 102485824, + "step": 32565 + }, + { + "epoch": 2.085013763523462, + "grad_norm": 9.539655685424805, + "learning_rate": 3.6195085009603136e-06, + "loss": 0.1995, + "num_input_tokens_seen": 102500928, + "step": 32570 + }, + { + "epoch": 2.085333845464439, + "grad_norm": 18.915752410888672, + "learning_rate": 3.6190089640464283e-06, + "loss": 0.2033, + "num_input_tokens_seen": 102516608, + "step": 32575 + }, + { + "epoch": 2.0856539274054158, + "grad_norm": 18.10259246826172, + "learning_rate": 3.6185093712550636e-06, + "loss": 0.2386, + "num_input_tokens_seen": 102533440, + "step": 32580 + }, + { + "epoch": 2.0859740093463928, + "grad_norm": 6.292322158813477, + "learning_rate": 3.6180097226111667e-06, + "loss": 0.1653, + "num_input_tokens_seen": 102550464, + "step": 32585 + }, + { + "epoch": 2.0862940912873698, + "grad_norm": 7.282343864440918, + "learning_rate": 3.6175100181396882e-06, + "loss": 0.1673, + "num_input_tokens_seen": 102566848, + "step": 32590 + }, + { + "epoch": 2.0866141732283463, + "grad_norm": 15.919133186340332, + "learning_rate": 3.6170102578655797e-06, + "loss": 0.1834, + "num_input_tokens_seen": 102583744, + "step": 32595 + }, + { + "epoch": 2.0869342551693233, + "grad_norm": 72.5996322631836, + "learning_rate": 3.6165104418137976e-06, + "loss": 0.167, + "num_input_tokens_seen": 102598848, + "step": 32600 + }, + { + "epoch": 2.0872543371103003, + "grad_norm": 16.26901626586914, + "learning_rate": 3.6160105700092994e-06, + "loss": 0.1778, + "num_input_tokens_seen": 102614464, + "step": 32605 + }, + { + "epoch": 2.0875744190512773, + "grad_norm": 22.82914161682129, + "learning_rate": 3.615510642477047e-06, + "loss": 0.316, + "num_input_tokens_seen": 102629632, + "step": 32610 + }, + { + "epoch": 2.087894500992254, + "grad_norm": 11.327823638916016, + "learning_rate": 3.615010659242003e-06, + "loss": 0.205, + "num_input_tokens_seen": 102646528, + "step": 32615 + }, + { + "epoch": 2.088214582933231, + "grad_norm": 32.90909194946289, + "learning_rate": 3.614510620329135e-06, + "loss": 0.2306, + "num_input_tokens_seen": 102662144, + "step": 32620 + }, + { + "epoch": 2.088534664874208, + "grad_norm": 10.08419132232666, + "learning_rate": 3.6140105257634117e-06, + "loss": 0.254, + "num_input_tokens_seen": 102677760, + "step": 32625 + }, + { + "epoch": 2.088854746815185, + "grad_norm": 2.763347864151001, + "learning_rate": 3.613510375569805e-06, + "loss": 0.2597, + "num_input_tokens_seen": 102695232, + "step": 32630 + }, + { + "epoch": 2.089174828756162, + "grad_norm": 22.587167739868164, + "learning_rate": 3.6130101697732906e-06, + "loss": 0.1964, + "num_input_tokens_seen": 102710464, + "step": 32635 + }, + { + "epoch": 2.0894949106971383, + "grad_norm": 16.76158905029297, + "learning_rate": 3.612509908398846e-06, + "loss": 0.262, + "num_input_tokens_seen": 102726528, + "step": 32640 + }, + { + "epoch": 2.0898149926381153, + "grad_norm": 7.5878095626831055, + "learning_rate": 3.6120095914714514e-06, + "loss": 0.228, + "num_input_tokens_seen": 102742208, + "step": 32645 + }, + { + "epoch": 2.0901350745790923, + "grad_norm": 21.305158615112305, + "learning_rate": 3.61150921901609e-06, + "loss": 0.2535, + "num_input_tokens_seen": 102758144, + "step": 32650 + }, + { + "epoch": 2.0904551565200693, + "grad_norm": 57.18846130371094, + "learning_rate": 3.6110087910577474e-06, + "loss": 0.337, + "num_input_tokens_seen": 102773440, + "step": 32655 + }, + { + "epoch": 2.090775238461046, + "grad_norm": 25.98863410949707, + "learning_rate": 3.610508307621413e-06, + "loss": 0.3579, + "num_input_tokens_seen": 102789824, + "step": 32660 + }, + { + "epoch": 2.091095320402023, + "grad_norm": 15.597640991210938, + "learning_rate": 3.6100077687320783e-06, + "loss": 0.3105, + "num_input_tokens_seen": 102808000, + "step": 32665 + }, + { + "epoch": 2.091415402343, + "grad_norm": 8.15157413482666, + "learning_rate": 3.6095071744147377e-06, + "loss": 0.2647, + "num_input_tokens_seen": 102824064, + "step": 32670 + }, + { + "epoch": 2.091735484283977, + "grad_norm": 7.445859909057617, + "learning_rate": 3.6090065246943875e-06, + "loss": 0.2256, + "num_input_tokens_seen": 102841408, + "step": 32675 + }, + { + "epoch": 2.0920555662249534, + "grad_norm": 18.242982864379883, + "learning_rate": 3.608505819596028e-06, + "loss": 0.2509, + "num_input_tokens_seen": 102856768, + "step": 32680 + }, + { + "epoch": 2.0923756481659304, + "grad_norm": 14.223156929016113, + "learning_rate": 3.608005059144662e-06, + "loss": 0.2774, + "num_input_tokens_seen": 102872640, + "step": 32685 + }, + { + "epoch": 2.0926957301069073, + "grad_norm": 3.6847805976867676, + "learning_rate": 3.6075042433652953e-06, + "loss": 0.1289, + "num_input_tokens_seen": 102888640, + "step": 32690 + }, + { + "epoch": 2.0930158120478843, + "grad_norm": 12.595032691955566, + "learning_rate": 3.607003372282935e-06, + "loss": 0.2728, + "num_input_tokens_seen": 102902400, + "step": 32695 + }, + { + "epoch": 2.0933358939888613, + "grad_norm": 132.19747924804688, + "learning_rate": 3.606502445922593e-06, + "loss": 0.2277, + "num_input_tokens_seen": 102916992, + "step": 32700 + }, + { + "epoch": 2.093655975929838, + "grad_norm": 18.19849395751953, + "learning_rate": 3.6060014643092815e-06, + "loss": 0.1895, + "num_input_tokens_seen": 102932352, + "step": 32705 + }, + { + "epoch": 2.093976057870815, + "grad_norm": 16.47821807861328, + "learning_rate": 3.6055004274680183e-06, + "loss": 0.1889, + "num_input_tokens_seen": 102947392, + "step": 32710 + }, + { + "epoch": 2.094296139811792, + "grad_norm": 11.314166069030762, + "learning_rate": 3.6049993354238223e-06, + "loss": 0.2022, + "num_input_tokens_seen": 102963520, + "step": 32715 + }, + { + "epoch": 2.094616221752769, + "grad_norm": 122.7343521118164, + "learning_rate": 3.604498188201715e-06, + "loss": 0.2681, + "num_input_tokens_seen": 102978432, + "step": 32720 + }, + { + "epoch": 2.0949363036937454, + "grad_norm": 14.342913627624512, + "learning_rate": 3.6039969858267208e-06, + "loss": 0.2638, + "num_input_tokens_seen": 102993856, + "step": 32725 + }, + { + "epoch": 2.0952563856347224, + "grad_norm": 7.351763725280762, + "learning_rate": 3.6034957283238686e-06, + "loss": 0.174, + "num_input_tokens_seen": 103008768, + "step": 32730 + }, + { + "epoch": 2.0955764675756994, + "grad_norm": 12.129271507263184, + "learning_rate": 3.6029944157181868e-06, + "loss": 0.2459, + "num_input_tokens_seen": 103025408, + "step": 32735 + }, + { + "epoch": 2.0958965495166764, + "grad_norm": 6.199464321136475, + "learning_rate": 3.6024930480347097e-06, + "loss": 0.1956, + "num_input_tokens_seen": 103040640, + "step": 32740 + }, + { + "epoch": 2.0962166314576534, + "grad_norm": 11.44277286529541, + "learning_rate": 3.6019916252984715e-06, + "loss": 0.1956, + "num_input_tokens_seen": 103056192, + "step": 32745 + }, + { + "epoch": 2.09653671339863, + "grad_norm": 10.984116554260254, + "learning_rate": 3.6014901475345126e-06, + "loss": 0.1755, + "num_input_tokens_seen": 103071552, + "step": 32750 + }, + { + "epoch": 2.096856795339607, + "grad_norm": 23.653026580810547, + "learning_rate": 3.600988614767873e-06, + "loss": 0.1854, + "num_input_tokens_seen": 103087936, + "step": 32755 + }, + { + "epoch": 2.097176877280584, + "grad_norm": 9.313762664794922, + "learning_rate": 3.6004870270235968e-06, + "loss": 0.2523, + "num_input_tokens_seen": 103104384, + "step": 32760 + }, + { + "epoch": 2.097496959221561, + "grad_norm": 4.696132183074951, + "learning_rate": 3.599985384326731e-06, + "loss": 0.1531, + "num_input_tokens_seen": 103119936, + "step": 32765 + }, + { + "epoch": 2.0978170411625374, + "grad_norm": 6.010063648223877, + "learning_rate": 3.599483686702324e-06, + "loss": 0.1785, + "num_input_tokens_seen": 103135744, + "step": 32770 + }, + { + "epoch": 2.0981371231035144, + "grad_norm": 9.114346504211426, + "learning_rate": 3.5989819341754295e-06, + "loss": 0.2357, + "num_input_tokens_seen": 103152704, + "step": 32775 + }, + { + "epoch": 2.0984572050444914, + "grad_norm": 15.401331901550293, + "learning_rate": 3.598480126771101e-06, + "loss": 0.2085, + "num_input_tokens_seen": 103167360, + "step": 32780 + }, + { + "epoch": 2.0987772869854684, + "grad_norm": 8.101390838623047, + "learning_rate": 3.597978264514397e-06, + "loss": 0.2125, + "num_input_tokens_seen": 103182336, + "step": 32785 + }, + { + "epoch": 2.0990973689264454, + "grad_norm": 63.481075286865234, + "learning_rate": 3.5974763474303776e-06, + "loss": 0.2493, + "num_input_tokens_seen": 103198336, + "step": 32790 + }, + { + "epoch": 2.099417450867422, + "grad_norm": 8.36731243133545, + "learning_rate": 3.5969743755441055e-06, + "loss": 0.1539, + "num_input_tokens_seen": 103214272, + "step": 32795 + }, + { + "epoch": 2.099737532808399, + "grad_norm": 18.94017219543457, + "learning_rate": 3.5964723488806473e-06, + "loss": 0.1957, + "num_input_tokens_seen": 103233792, + "step": 32800 + }, + { + "epoch": 2.100057614749376, + "grad_norm": 8.369444847106934, + "learning_rate": 3.5959702674650717e-06, + "loss": 0.2853, + "num_input_tokens_seen": 103250368, + "step": 32805 + }, + { + "epoch": 2.100377696690353, + "grad_norm": 22.65048599243164, + "learning_rate": 3.595468131322449e-06, + "loss": 0.2134, + "num_input_tokens_seen": 103266560, + "step": 32810 + }, + { + "epoch": 2.1006977786313294, + "grad_norm": 6.729460716247559, + "learning_rate": 3.594965940477855e-06, + "loss": 0.1418, + "num_input_tokens_seen": 103283456, + "step": 32815 + }, + { + "epoch": 2.1010178605723064, + "grad_norm": 8.053346633911133, + "learning_rate": 3.5944636949563644e-06, + "loss": 0.2374, + "num_input_tokens_seen": 103300864, + "step": 32820 + }, + { + "epoch": 2.1013379425132834, + "grad_norm": 4.845975875854492, + "learning_rate": 3.5939613947830576e-06, + "loss": 0.1968, + "num_input_tokens_seen": 103315648, + "step": 32825 + }, + { + "epoch": 2.1016580244542604, + "grad_norm": 8.181585311889648, + "learning_rate": 3.593459039983017e-06, + "loss": 0.2184, + "num_input_tokens_seen": 103331136, + "step": 32830 + }, + { + "epoch": 2.101978106395237, + "grad_norm": 11.880120277404785, + "learning_rate": 3.5929566305813286e-06, + "loss": 0.2968, + "num_input_tokens_seen": 103346304, + "step": 32835 + }, + { + "epoch": 2.102298188336214, + "grad_norm": 25.3138370513916, + "learning_rate": 3.592454166603079e-06, + "loss": 0.1904, + "num_input_tokens_seen": 103361600, + "step": 32840 + }, + { + "epoch": 2.102618270277191, + "grad_norm": 4.0950117111206055, + "learning_rate": 3.5919516480733573e-06, + "loss": 0.1331, + "num_input_tokens_seen": 103376768, + "step": 32845 + }, + { + "epoch": 2.102938352218168, + "grad_norm": 86.44049072265625, + "learning_rate": 3.5914490750172594e-06, + "loss": 0.2789, + "num_input_tokens_seen": 103392384, + "step": 32850 + }, + { + "epoch": 2.103258434159145, + "grad_norm": 7.242374420166016, + "learning_rate": 3.5909464474598786e-06, + "loss": 0.2005, + "num_input_tokens_seen": 103408320, + "step": 32855 + }, + { + "epoch": 2.1035785161001215, + "grad_norm": 39.826786041259766, + "learning_rate": 3.5904437654263157e-06, + "loss": 0.3303, + "num_input_tokens_seen": 103423360, + "step": 32860 + }, + { + "epoch": 2.1038985980410985, + "grad_norm": 7.972715854644775, + "learning_rate": 3.589941028941671e-06, + "loss": 0.2978, + "num_input_tokens_seen": 103439424, + "step": 32865 + }, + { + "epoch": 2.1042186799820755, + "grad_norm": 7.0493245124816895, + "learning_rate": 3.5894382380310483e-06, + "loss": 0.1799, + "num_input_tokens_seen": 103454848, + "step": 32870 + }, + { + "epoch": 2.1045387619230524, + "grad_norm": 77.84017181396484, + "learning_rate": 3.5889353927195546e-06, + "loss": 0.2245, + "num_input_tokens_seen": 103470656, + "step": 32875 + }, + { + "epoch": 2.104858843864029, + "grad_norm": 26.308120727539062, + "learning_rate": 3.5884324930323e-06, + "loss": 0.2617, + "num_input_tokens_seen": 103486464, + "step": 32880 + }, + { + "epoch": 2.105178925805006, + "grad_norm": 12.902850151062012, + "learning_rate": 3.5879295389943957e-06, + "loss": 0.2073, + "num_input_tokens_seen": 103501312, + "step": 32885 + }, + { + "epoch": 2.105499007745983, + "grad_norm": 13.935078620910645, + "learning_rate": 3.5874265306309574e-06, + "loss": 0.2908, + "num_input_tokens_seen": 103517376, + "step": 32890 + }, + { + "epoch": 2.10581908968696, + "grad_norm": 16.29108428955078, + "learning_rate": 3.5869234679671024e-06, + "loss": 0.1572, + "num_input_tokens_seen": 103532928, + "step": 32895 + }, + { + "epoch": 2.106139171627937, + "grad_norm": 33.36905288696289, + "learning_rate": 3.5864203510279503e-06, + "loss": 0.2208, + "num_input_tokens_seen": 103548096, + "step": 32900 + }, + { + "epoch": 2.1064592535689135, + "grad_norm": 44.79658889770508, + "learning_rate": 3.5859171798386257e-06, + "loss": 0.1649, + "num_input_tokens_seen": 103564736, + "step": 32905 + }, + { + "epoch": 2.1067793355098905, + "grad_norm": 15.220879554748535, + "learning_rate": 3.585413954424252e-06, + "loss": 0.1681, + "num_input_tokens_seen": 103580800, + "step": 32910 + }, + { + "epoch": 2.1070994174508675, + "grad_norm": 21.591171264648438, + "learning_rate": 3.5849106748099606e-06, + "loss": 0.2048, + "num_input_tokens_seen": 103596480, + "step": 32915 + }, + { + "epoch": 2.1074194993918445, + "grad_norm": 7.357547760009766, + "learning_rate": 3.58440734102088e-06, + "loss": 0.1806, + "num_input_tokens_seen": 103612416, + "step": 32920 + }, + { + "epoch": 2.107739581332821, + "grad_norm": 6.296259880065918, + "learning_rate": 3.583903953082146e-06, + "loss": 0.1449, + "num_input_tokens_seen": 103627648, + "step": 32925 + }, + { + "epoch": 2.108059663273798, + "grad_norm": 36.057701110839844, + "learning_rate": 3.5834005110188953e-06, + "loss": 0.2911, + "num_input_tokens_seen": 103644736, + "step": 32930 + }, + { + "epoch": 2.108379745214775, + "grad_norm": 13.79022216796875, + "learning_rate": 3.582897014856265e-06, + "loss": 0.3524, + "num_input_tokens_seen": 103661760, + "step": 32935 + }, + { + "epoch": 2.108699827155752, + "grad_norm": 24.803037643432617, + "learning_rate": 3.5823934646193986e-06, + "loss": 0.3149, + "num_input_tokens_seen": 103677184, + "step": 32940 + }, + { + "epoch": 2.1090199090967285, + "grad_norm": 33.31729507446289, + "learning_rate": 3.5818898603334413e-06, + "loss": 0.1633, + "num_input_tokens_seen": 103693632, + "step": 32945 + }, + { + "epoch": 2.1093399910377055, + "grad_norm": 7.985044956207275, + "learning_rate": 3.581386202023539e-06, + "loss": 0.12, + "num_input_tokens_seen": 103708800, + "step": 32950 + }, + { + "epoch": 2.1096600729786825, + "grad_norm": 16.190837860107422, + "learning_rate": 3.580882489714843e-06, + "loss": 0.1988, + "num_input_tokens_seen": 103726656, + "step": 32955 + }, + { + "epoch": 2.1099801549196595, + "grad_norm": 10.02880859375, + "learning_rate": 3.5803787234325057e-06, + "loss": 0.2024, + "num_input_tokens_seen": 103742848, + "step": 32960 + }, + { + "epoch": 2.1103002368606365, + "grad_norm": 25.63985824584961, + "learning_rate": 3.579874903201681e-06, + "loss": 0.2063, + "num_input_tokens_seen": 103758464, + "step": 32965 + }, + { + "epoch": 2.110620318801613, + "grad_norm": 35.48906707763672, + "learning_rate": 3.57937102904753e-06, + "loss": 0.2494, + "num_input_tokens_seen": 103776128, + "step": 32970 + }, + { + "epoch": 2.11094040074259, + "grad_norm": 14.39611530303955, + "learning_rate": 3.578867100995211e-06, + "loss": 0.26, + "num_input_tokens_seen": 103791168, + "step": 32975 + }, + { + "epoch": 2.111260482683567, + "grad_norm": 8.426005363464355, + "learning_rate": 3.578363119069889e-06, + "loss": 0.1586, + "num_input_tokens_seen": 103807808, + "step": 32980 + }, + { + "epoch": 2.111580564624544, + "grad_norm": 18.081890106201172, + "learning_rate": 3.5778590832967305e-06, + "loss": 0.2688, + "num_input_tokens_seen": 103823040, + "step": 32985 + }, + { + "epoch": 2.1119006465655206, + "grad_norm": 13.35736083984375, + "learning_rate": 3.577354993700903e-06, + "loss": 0.2284, + "num_input_tokens_seen": 103837632, + "step": 32990 + }, + { + "epoch": 2.1122207285064976, + "grad_norm": 6.57157039642334, + "learning_rate": 3.5768508503075795e-06, + "loss": 0.2555, + "num_input_tokens_seen": 103853440, + "step": 32995 + }, + { + "epoch": 2.1125408104474745, + "grad_norm": 37.56112289428711, + "learning_rate": 3.5763466531419328e-06, + "loss": 0.2679, + "num_input_tokens_seen": 103869120, + "step": 33000 + }, + { + "epoch": 2.1128608923884515, + "grad_norm": 32.09355926513672, + "learning_rate": 3.5758424022291415e-06, + "loss": 0.315, + "num_input_tokens_seen": 103884608, + "step": 33005 + }, + { + "epoch": 2.1131809743294285, + "grad_norm": 19.447195053100586, + "learning_rate": 3.575338097594384e-06, + "loss": 0.1961, + "num_input_tokens_seen": 103901056, + "step": 33010 + }, + { + "epoch": 2.113501056270405, + "grad_norm": 6.462966442108154, + "learning_rate": 3.5748337392628434e-06, + "loss": 0.2865, + "num_input_tokens_seen": 103916160, + "step": 33015 + }, + { + "epoch": 2.113821138211382, + "grad_norm": 12.079622268676758, + "learning_rate": 3.5743293272597045e-06, + "loss": 0.1648, + "num_input_tokens_seen": 103931328, + "step": 33020 + }, + { + "epoch": 2.114141220152359, + "grad_norm": 23.68670654296875, + "learning_rate": 3.5738248616101545e-06, + "loss": 0.1693, + "num_input_tokens_seen": 103947392, + "step": 33025 + }, + { + "epoch": 2.114461302093336, + "grad_norm": 6.999565124511719, + "learning_rate": 3.573320342339384e-06, + "loss": 0.1568, + "num_input_tokens_seen": 103962496, + "step": 33030 + }, + { + "epoch": 2.1147813840343126, + "grad_norm": 29.133167266845703, + "learning_rate": 3.5728157694725875e-06, + "loss": 0.1916, + "num_input_tokens_seen": 103978688, + "step": 33035 + }, + { + "epoch": 2.1151014659752896, + "grad_norm": 30.300710678100586, + "learning_rate": 3.5723111430349587e-06, + "loss": 0.194, + "num_input_tokens_seen": 103995392, + "step": 33040 + }, + { + "epoch": 2.1154215479162666, + "grad_norm": 6.674276828765869, + "learning_rate": 3.5718064630516975e-06, + "loss": 0.3004, + "num_input_tokens_seen": 104011648, + "step": 33045 + }, + { + "epoch": 2.1157416298572436, + "grad_norm": 33.69058609008789, + "learning_rate": 3.571301729548004e-06, + "loss": 0.2046, + "num_input_tokens_seen": 104027392, + "step": 33050 + }, + { + "epoch": 2.1160617117982206, + "grad_norm": 37.12346267700195, + "learning_rate": 3.570796942549082e-06, + "loss": 0.1911, + "num_input_tokens_seen": 104043008, + "step": 33055 + }, + { + "epoch": 2.116381793739197, + "grad_norm": 17.59907341003418, + "learning_rate": 3.5702921020801385e-06, + "loss": 0.2238, + "num_input_tokens_seen": 104059968, + "step": 33060 + }, + { + "epoch": 2.116701875680174, + "grad_norm": 12.765830993652344, + "learning_rate": 3.5697872081663826e-06, + "loss": 0.2035, + "num_input_tokens_seen": 104075456, + "step": 33065 + }, + { + "epoch": 2.117021957621151, + "grad_norm": 20.717641830444336, + "learning_rate": 3.569282260833026e-06, + "loss": 0.2817, + "num_input_tokens_seen": 104091264, + "step": 33070 + }, + { + "epoch": 2.117342039562128, + "grad_norm": 13.429285049438477, + "learning_rate": 3.568777260105283e-06, + "loss": 0.15, + "num_input_tokens_seen": 104107008, + "step": 33075 + }, + { + "epoch": 2.1176621215031046, + "grad_norm": 35.533851623535156, + "learning_rate": 3.5682722060083695e-06, + "loss": 0.2618, + "num_input_tokens_seen": 104122304, + "step": 33080 + }, + { + "epoch": 2.1179822034440816, + "grad_norm": 7.880161762237549, + "learning_rate": 3.5677670985675072e-06, + "loss": 0.1627, + "num_input_tokens_seen": 104137600, + "step": 33085 + }, + { + "epoch": 2.1183022853850586, + "grad_norm": 16.665544509887695, + "learning_rate": 3.5672619378079183e-06, + "loss": 0.3528, + "num_input_tokens_seen": 104153344, + "step": 33090 + }, + { + "epoch": 2.1186223673260356, + "grad_norm": 8.554130554199219, + "learning_rate": 3.566756723754827e-06, + "loss": 0.2339, + "num_input_tokens_seen": 104167680, + "step": 33095 + }, + { + "epoch": 2.118942449267012, + "grad_norm": 13.111029624938965, + "learning_rate": 3.5662514564334616e-06, + "loss": 0.23, + "num_input_tokens_seen": 104182464, + "step": 33100 + }, + { + "epoch": 2.119262531207989, + "grad_norm": 6.073443412780762, + "learning_rate": 3.5657461358690526e-06, + "loss": 0.2296, + "num_input_tokens_seen": 104197760, + "step": 33105 + }, + { + "epoch": 2.119582613148966, + "grad_norm": 6.713893890380859, + "learning_rate": 3.5652407620868323e-06, + "loss": 0.2549, + "num_input_tokens_seen": 104215168, + "step": 33110 + }, + { + "epoch": 2.119902695089943, + "grad_norm": 8.054269790649414, + "learning_rate": 3.5647353351120374e-06, + "loss": 0.187, + "num_input_tokens_seen": 104230720, + "step": 33115 + }, + { + "epoch": 2.12022277703092, + "grad_norm": 8.263710975646973, + "learning_rate": 3.5642298549699054e-06, + "loss": 0.1922, + "num_input_tokens_seen": 104246144, + "step": 33120 + }, + { + "epoch": 2.1205428589718966, + "grad_norm": 16.79450798034668, + "learning_rate": 3.563724321685678e-06, + "loss": 0.2326, + "num_input_tokens_seen": 104261568, + "step": 33125 + }, + { + "epoch": 2.1208629409128736, + "grad_norm": 10.389249801635742, + "learning_rate": 3.563218735284599e-06, + "loss": 0.1748, + "num_input_tokens_seen": 104278528, + "step": 33130 + }, + { + "epoch": 2.1211830228538506, + "grad_norm": 9.7471923828125, + "learning_rate": 3.5627130957919144e-06, + "loss": 0.1412, + "num_input_tokens_seen": 104293760, + "step": 33135 + }, + { + "epoch": 2.1215031047948276, + "grad_norm": 18.80559539794922, + "learning_rate": 3.562207403232873e-06, + "loss": 0.2032, + "num_input_tokens_seen": 104308928, + "step": 33140 + }, + { + "epoch": 2.121823186735804, + "grad_norm": 11.817268371582031, + "learning_rate": 3.561701657632726e-06, + "loss": 0.223, + "num_input_tokens_seen": 104324032, + "step": 33145 + }, + { + "epoch": 2.122143268676781, + "grad_norm": 30.012248992919922, + "learning_rate": 3.5611958590167295e-06, + "loss": 0.3532, + "num_input_tokens_seen": 104338816, + "step": 33150 + }, + { + "epoch": 2.122463350617758, + "grad_norm": 44.62567138671875, + "learning_rate": 3.560690007410139e-06, + "loss": 0.334, + "num_input_tokens_seen": 104353984, + "step": 33155 + }, + { + "epoch": 2.122783432558735, + "grad_norm": 16.600486755371094, + "learning_rate": 3.560184102838215e-06, + "loss": 0.2286, + "num_input_tokens_seen": 104369408, + "step": 33160 + }, + { + "epoch": 2.123103514499712, + "grad_norm": 5.290410995483398, + "learning_rate": 3.5596781453262193e-06, + "loss": 0.2311, + "num_input_tokens_seen": 104384832, + "step": 33165 + }, + { + "epoch": 2.1234235964406887, + "grad_norm": 10.199400901794434, + "learning_rate": 3.5591721348994157e-06, + "loss": 0.2808, + "num_input_tokens_seen": 104400000, + "step": 33170 + }, + { + "epoch": 2.1237436783816657, + "grad_norm": 28.746448516845703, + "learning_rate": 3.5586660715830727e-06, + "loss": 0.273, + "num_input_tokens_seen": 104416704, + "step": 33175 + }, + { + "epoch": 2.1240637603226427, + "grad_norm": 15.167634010314941, + "learning_rate": 3.5581599554024615e-06, + "loss": 0.1704, + "num_input_tokens_seen": 104432640, + "step": 33180 + }, + { + "epoch": 2.1243838422636196, + "grad_norm": 9.676361083984375, + "learning_rate": 3.557653786382853e-06, + "loss": 0.2523, + "num_input_tokens_seen": 104450880, + "step": 33185 + }, + { + "epoch": 2.124703924204596, + "grad_norm": 17.986879348754883, + "learning_rate": 3.5571475645495244e-06, + "loss": 0.2722, + "num_input_tokens_seen": 104466240, + "step": 33190 + }, + { + "epoch": 2.125024006145573, + "grad_norm": 16.29620361328125, + "learning_rate": 3.556641289927752e-06, + "loss": 0.2074, + "num_input_tokens_seen": 104481024, + "step": 33195 + }, + { + "epoch": 2.12534408808655, + "grad_norm": 12.426271438598633, + "learning_rate": 3.556134962542818e-06, + "loss": 0.1555, + "num_input_tokens_seen": 104498880, + "step": 33200 + }, + { + "epoch": 2.125664170027527, + "grad_norm": 9.695657730102539, + "learning_rate": 3.5556285824200043e-06, + "loss": 0.2333, + "num_input_tokens_seen": 104515008, + "step": 33205 + }, + { + "epoch": 2.1259842519685037, + "grad_norm": 15.969820022583008, + "learning_rate": 3.5551221495845987e-06, + "loss": 0.2938, + "num_input_tokens_seen": 104530368, + "step": 33210 + }, + { + "epoch": 2.1263043339094807, + "grad_norm": 7.095480442047119, + "learning_rate": 3.5546156640618888e-06, + "loss": 0.2368, + "num_input_tokens_seen": 104546944, + "step": 33215 + }, + { + "epoch": 2.1266244158504577, + "grad_norm": 10.409255027770996, + "learning_rate": 3.5541091258771652e-06, + "loss": 0.2845, + "num_input_tokens_seen": 104561344, + "step": 33220 + }, + { + "epoch": 2.1269444977914347, + "grad_norm": 11.553095817565918, + "learning_rate": 3.5536025350557235e-06, + "loss": 0.273, + "num_input_tokens_seen": 104576512, + "step": 33225 + }, + { + "epoch": 2.1272645797324117, + "grad_norm": 16.32191276550293, + "learning_rate": 3.5530958916228584e-06, + "loss": 0.2434, + "num_input_tokens_seen": 104591744, + "step": 33230 + }, + { + "epoch": 2.1275846616733882, + "grad_norm": 8.33896255493164, + "learning_rate": 3.5525891956038705e-06, + "loss": 0.1655, + "num_input_tokens_seen": 104608256, + "step": 33235 + }, + { + "epoch": 2.127904743614365, + "grad_norm": 15.485922813415527, + "learning_rate": 3.5520824470240606e-06, + "loss": 0.1644, + "num_input_tokens_seen": 104625536, + "step": 33240 + }, + { + "epoch": 2.128224825555342, + "grad_norm": 6.643684387207031, + "learning_rate": 3.5515756459087334e-06, + "loss": 0.147, + "num_input_tokens_seen": 104639680, + "step": 33245 + }, + { + "epoch": 2.128544907496319, + "grad_norm": 48.0531120300293, + "learning_rate": 3.5510687922831966e-06, + "loss": 0.2699, + "num_input_tokens_seen": 104656000, + "step": 33250 + }, + { + "epoch": 2.1288649894372957, + "grad_norm": 40.94508743286133, + "learning_rate": 3.5505618861727574e-06, + "loss": 0.1568, + "num_input_tokens_seen": 104671744, + "step": 33255 + }, + { + "epoch": 2.1291850713782727, + "grad_norm": 11.861018180847168, + "learning_rate": 3.5500549276027306e-06, + "loss": 0.1426, + "num_input_tokens_seen": 104686144, + "step": 33260 + }, + { + "epoch": 2.1295051533192497, + "grad_norm": 24.590953826904297, + "learning_rate": 3.5495479165984307e-06, + "loss": 0.1269, + "num_input_tokens_seen": 104702208, + "step": 33265 + }, + { + "epoch": 2.1298252352602267, + "grad_norm": 16.742143630981445, + "learning_rate": 3.549040853185175e-06, + "loss": 0.2102, + "num_input_tokens_seen": 104718784, + "step": 33270 + }, + { + "epoch": 2.1301453172012037, + "grad_norm": 14.293667793273926, + "learning_rate": 3.5485337373882827e-06, + "loss": 0.2664, + "num_input_tokens_seen": 104734848, + "step": 33275 + }, + { + "epoch": 2.1304653991421802, + "grad_norm": 7.039449214935303, + "learning_rate": 3.5480265692330775e-06, + "loss": 0.1545, + "num_input_tokens_seen": 104749696, + "step": 33280 + }, + { + "epoch": 2.1307854810831572, + "grad_norm": 10.840749740600586, + "learning_rate": 3.547519348744884e-06, + "loss": 0.1183, + "num_input_tokens_seen": 104765376, + "step": 33285 + }, + { + "epoch": 2.1311055630241342, + "grad_norm": 9.912793159484863, + "learning_rate": 3.5470120759490305e-06, + "loss": 0.1367, + "num_input_tokens_seen": 104781056, + "step": 33290 + }, + { + "epoch": 2.131425644965111, + "grad_norm": 5.814173221588135, + "learning_rate": 3.5465047508708485e-06, + "loss": 0.1776, + "num_input_tokens_seen": 104796864, + "step": 33295 + }, + { + "epoch": 2.1317457269060878, + "grad_norm": 14.186983108520508, + "learning_rate": 3.5459973735356697e-06, + "loss": 0.1945, + "num_input_tokens_seen": 104813888, + "step": 33300 + }, + { + "epoch": 2.1320658088470648, + "grad_norm": 8.831124305725098, + "learning_rate": 3.5454899439688306e-06, + "loss": 0.1501, + "num_input_tokens_seen": 104829504, + "step": 33305 + }, + { + "epoch": 2.1323858907880417, + "grad_norm": 15.192893028259277, + "learning_rate": 3.5449824621956685e-06, + "loss": 0.2541, + "num_input_tokens_seen": 104845312, + "step": 33310 + }, + { + "epoch": 2.1327059727290187, + "grad_norm": 14.026074409484863, + "learning_rate": 3.5444749282415266e-06, + "loss": 0.2544, + "num_input_tokens_seen": 104861888, + "step": 33315 + }, + { + "epoch": 2.1330260546699957, + "grad_norm": 10.448291778564453, + "learning_rate": 3.543967342131747e-06, + "loss": 0.1929, + "num_input_tokens_seen": 104877888, + "step": 33320 + }, + { + "epoch": 2.1333461366109723, + "grad_norm": 58.44920349121094, + "learning_rate": 3.543459703891675e-06, + "loss": 0.1913, + "num_input_tokens_seen": 104893888, + "step": 33325 + }, + { + "epoch": 2.1336662185519493, + "grad_norm": 20.419189453125, + "learning_rate": 3.542952013546662e-06, + "loss": 0.2014, + "num_input_tokens_seen": 104909952, + "step": 33330 + }, + { + "epoch": 2.1339863004929263, + "grad_norm": 57.91758728027344, + "learning_rate": 3.5424442711220582e-06, + "loss": 0.2067, + "num_input_tokens_seen": 104925120, + "step": 33335 + }, + { + "epoch": 2.1343063824339032, + "grad_norm": 10.983946800231934, + "learning_rate": 3.541936476643216e-06, + "loss": 0.2051, + "num_input_tokens_seen": 104941056, + "step": 33340 + }, + { + "epoch": 2.13462646437488, + "grad_norm": 7.879558086395264, + "learning_rate": 3.5414286301354943e-06, + "loss": 0.2205, + "num_input_tokens_seen": 104955392, + "step": 33345 + }, + { + "epoch": 2.134946546315857, + "grad_norm": 16.38644790649414, + "learning_rate": 3.540920731624251e-06, + "loss": 0.3438, + "num_input_tokens_seen": 104971072, + "step": 33350 + }, + { + "epoch": 2.1352666282568338, + "grad_norm": 10.988844871520996, + "learning_rate": 3.5404127811348486e-06, + "loss": 0.2247, + "num_input_tokens_seen": 104987072, + "step": 33355 + }, + { + "epoch": 2.1355867101978108, + "grad_norm": 12.758758544921875, + "learning_rate": 3.5399047786926514e-06, + "loss": 0.3341, + "num_input_tokens_seen": 105002176, + "step": 33360 + }, + { + "epoch": 2.1359067921387878, + "grad_norm": 17.369976043701172, + "learning_rate": 3.5393967243230265e-06, + "loss": 0.239, + "num_input_tokens_seen": 105018048, + "step": 33365 + }, + { + "epoch": 2.1362268740797643, + "grad_norm": 9.049800872802734, + "learning_rate": 3.538888618051342e-06, + "loss": 0.1765, + "num_input_tokens_seen": 105032960, + "step": 33370 + }, + { + "epoch": 2.1365469560207413, + "grad_norm": 12.507542610168457, + "learning_rate": 3.5383804599029725e-06, + "loss": 0.2707, + "num_input_tokens_seen": 105050176, + "step": 33375 + }, + { + "epoch": 2.1368670379617183, + "grad_norm": 13.60191822052002, + "learning_rate": 3.5378722499032904e-06, + "loss": 0.154, + "num_input_tokens_seen": 105065280, + "step": 33380 + }, + { + "epoch": 2.1371871199026953, + "grad_norm": 66.62649536132812, + "learning_rate": 3.537363988077675e-06, + "loss": 0.2655, + "num_input_tokens_seen": 105080448, + "step": 33385 + }, + { + "epoch": 2.137507201843672, + "grad_norm": 10.838614463806152, + "learning_rate": 3.5368556744515057e-06, + "loss": 0.2169, + "num_input_tokens_seen": 105096256, + "step": 33390 + }, + { + "epoch": 2.137827283784649, + "grad_norm": 43.9549674987793, + "learning_rate": 3.536347309050164e-06, + "loss": 0.2677, + "num_input_tokens_seen": 105112704, + "step": 33395 + }, + { + "epoch": 2.138147365725626, + "grad_norm": 38.627227783203125, + "learning_rate": 3.535838891899036e-06, + "loss": 0.1674, + "num_input_tokens_seen": 105129024, + "step": 33400 + }, + { + "epoch": 2.138467447666603, + "grad_norm": 10.017066955566406, + "learning_rate": 3.5353304230235096e-06, + "loss": 0.2576, + "num_input_tokens_seen": 105144832, + "step": 33405 + }, + { + "epoch": 2.1387875296075793, + "grad_norm": 8.991349220275879, + "learning_rate": 3.534821902448975e-06, + "loss": 0.1551, + "num_input_tokens_seen": 105160256, + "step": 33410 + }, + { + "epoch": 2.1391076115485563, + "grad_norm": 39.748252868652344, + "learning_rate": 3.534313330200825e-06, + "loss": 0.174, + "num_input_tokens_seen": 105175424, + "step": 33415 + }, + { + "epoch": 2.1394276934895333, + "grad_norm": 7.042289733886719, + "learning_rate": 3.533804706304454e-06, + "loss": 0.3091, + "num_input_tokens_seen": 105190464, + "step": 33420 + }, + { + "epoch": 2.1397477754305103, + "grad_norm": 17.10451889038086, + "learning_rate": 3.5332960307852604e-06, + "loss": 0.2082, + "num_input_tokens_seen": 105206912, + "step": 33425 + }, + { + "epoch": 2.1400678573714873, + "grad_norm": 23.77743148803711, + "learning_rate": 3.5327873036686466e-06, + "loss": 0.0997, + "num_input_tokens_seen": 105222336, + "step": 33430 + }, + { + "epoch": 2.140387939312464, + "grad_norm": 9.477691650390625, + "learning_rate": 3.532278524980013e-06, + "loss": 0.14, + "num_input_tokens_seen": 105236544, + "step": 33435 + }, + { + "epoch": 2.140708021253441, + "grad_norm": 12.190116882324219, + "learning_rate": 3.531769694744768e-06, + "loss": 0.2127, + "num_input_tokens_seen": 105252096, + "step": 33440 + }, + { + "epoch": 2.141028103194418, + "grad_norm": 8.005645751953125, + "learning_rate": 3.5312608129883186e-06, + "loss": 0.18, + "num_input_tokens_seen": 105268224, + "step": 33445 + }, + { + "epoch": 2.141348185135395, + "grad_norm": 13.806563377380371, + "learning_rate": 3.5307518797360756e-06, + "loss": 0.2038, + "num_input_tokens_seen": 105285824, + "step": 33450 + }, + { + "epoch": 2.1416682670763714, + "grad_norm": 17.29207420349121, + "learning_rate": 3.5302428950134525e-06, + "loss": 0.2503, + "num_input_tokens_seen": 105301504, + "step": 33455 + }, + { + "epoch": 2.1419883490173484, + "grad_norm": 13.173235893249512, + "learning_rate": 3.529733858845866e-06, + "loss": 0.2744, + "num_input_tokens_seen": 105317952, + "step": 33460 + }, + { + "epoch": 2.1423084309583253, + "grad_norm": 13.26514720916748, + "learning_rate": 3.5292247712587336e-06, + "loss": 0.1624, + "num_input_tokens_seen": 105333440, + "step": 33465 + }, + { + "epoch": 2.1426285128993023, + "grad_norm": 47.41533660888672, + "learning_rate": 3.528715632277478e-06, + "loss": 0.2341, + "num_input_tokens_seen": 105350720, + "step": 33470 + }, + { + "epoch": 2.142948594840279, + "grad_norm": 68.5445327758789, + "learning_rate": 3.5282064419275217e-06, + "loss": 0.2384, + "num_input_tokens_seen": 105366144, + "step": 33475 + }, + { + "epoch": 2.143268676781256, + "grad_norm": 53.69206237792969, + "learning_rate": 3.527697200234291e-06, + "loss": 0.2245, + "num_input_tokens_seen": 105382656, + "step": 33480 + }, + { + "epoch": 2.143588758722233, + "grad_norm": 29.74703598022461, + "learning_rate": 3.5271879072232158e-06, + "loss": 0.2075, + "num_input_tokens_seen": 105398016, + "step": 33485 + }, + { + "epoch": 2.14390884066321, + "grad_norm": 7.334719181060791, + "learning_rate": 3.526678562919727e-06, + "loss": 0.1965, + "num_input_tokens_seen": 105413696, + "step": 33490 + }, + { + "epoch": 2.144228922604187, + "grad_norm": 5.837265491485596, + "learning_rate": 3.526169167349258e-06, + "loss": 0.2447, + "num_input_tokens_seen": 105429760, + "step": 33495 + }, + { + "epoch": 2.1445490045451634, + "grad_norm": 5.784276008605957, + "learning_rate": 3.5256597205372463e-06, + "loss": 0.1897, + "num_input_tokens_seen": 105446272, + "step": 33500 + }, + { + "epoch": 2.1448690864861404, + "grad_norm": 5.788731098175049, + "learning_rate": 3.5251502225091305e-06, + "loss": 0.1563, + "num_input_tokens_seen": 105462400, + "step": 33505 + }, + { + "epoch": 2.1451891684271174, + "grad_norm": 35.76256561279297, + "learning_rate": 3.5246406732903525e-06, + "loss": 0.1698, + "num_input_tokens_seen": 105478912, + "step": 33510 + }, + { + "epoch": 2.1455092503680944, + "grad_norm": 6.075908184051514, + "learning_rate": 3.524131072906356e-06, + "loss": 0.1394, + "num_input_tokens_seen": 105495936, + "step": 33515 + }, + { + "epoch": 2.145829332309071, + "grad_norm": 20.28211212158203, + "learning_rate": 3.523621421382589e-06, + "loss": 0.1046, + "num_input_tokens_seen": 105511296, + "step": 33520 + }, + { + "epoch": 2.146149414250048, + "grad_norm": 7.876670837402344, + "learning_rate": 3.5231117187444998e-06, + "loss": 0.1298, + "num_input_tokens_seen": 105526592, + "step": 33525 + }, + { + "epoch": 2.146469496191025, + "grad_norm": 9.303982734680176, + "learning_rate": 3.52260196501754e-06, + "loss": 0.2161, + "num_input_tokens_seen": 105542464, + "step": 33530 + }, + { + "epoch": 2.146789578132002, + "grad_norm": 7.051878929138184, + "learning_rate": 3.522092160227165e-06, + "loss": 0.2425, + "num_input_tokens_seen": 105557056, + "step": 33535 + }, + { + "epoch": 2.147109660072979, + "grad_norm": 13.118914604187012, + "learning_rate": 3.521582304398832e-06, + "loss": 0.2174, + "num_input_tokens_seen": 105572224, + "step": 33540 + }, + { + "epoch": 2.1474297420139554, + "grad_norm": 18.493038177490234, + "learning_rate": 3.5210723975579992e-06, + "loss": 0.2682, + "num_input_tokens_seen": 105590016, + "step": 33545 + }, + { + "epoch": 2.1477498239549324, + "grad_norm": 36.383758544921875, + "learning_rate": 3.5205624397301297e-06, + "loss": 0.1951, + "num_input_tokens_seen": 105606656, + "step": 33550 + }, + { + "epoch": 2.1480699058959094, + "grad_norm": 19.277671813964844, + "learning_rate": 3.520052430940687e-06, + "loss": 0.2774, + "num_input_tokens_seen": 105622464, + "step": 33555 + }, + { + "epoch": 2.1483899878368864, + "grad_norm": 10.230371475219727, + "learning_rate": 3.51954237121514e-06, + "loss": 0.1945, + "num_input_tokens_seen": 105637504, + "step": 33560 + }, + { + "epoch": 2.148710069777863, + "grad_norm": 46.058563232421875, + "learning_rate": 3.519032260578958e-06, + "loss": 0.2432, + "num_input_tokens_seen": 105652160, + "step": 33565 + }, + { + "epoch": 2.14903015171884, + "grad_norm": 4.295886039733887, + "learning_rate": 3.5185220990576123e-06, + "loss": 0.1757, + "num_input_tokens_seen": 105668160, + "step": 33570 + }, + { + "epoch": 2.149350233659817, + "grad_norm": 8.385891914367676, + "learning_rate": 3.518011886676578e-06, + "loss": 0.209, + "num_input_tokens_seen": 105682944, + "step": 33575 + }, + { + "epoch": 2.149670315600794, + "grad_norm": 4.916619300842285, + "learning_rate": 3.5175016234613334e-06, + "loss": 0.2531, + "num_input_tokens_seen": 105698816, + "step": 33580 + }, + { + "epoch": 2.149990397541771, + "grad_norm": 8.141895294189453, + "learning_rate": 3.5169913094373575e-06, + "loss": 0.1416, + "num_input_tokens_seen": 105713728, + "step": 33585 + }, + { + "epoch": 2.1503104794827474, + "grad_norm": 8.786888122558594, + "learning_rate": 3.516480944630133e-06, + "loss": 0.2043, + "num_input_tokens_seen": 105729344, + "step": 33590 + }, + { + "epoch": 2.1506305614237244, + "grad_norm": 12.183272361755371, + "learning_rate": 3.515970529065145e-06, + "loss": 0.1679, + "num_input_tokens_seen": 105746560, + "step": 33595 + }, + { + "epoch": 2.1509506433647014, + "grad_norm": 10.228096008300781, + "learning_rate": 3.5154600627678792e-06, + "loss": 0.2972, + "num_input_tokens_seen": 105764480, + "step": 33600 + }, + { + "epoch": 2.1512707253056784, + "grad_norm": 14.246576309204102, + "learning_rate": 3.514949545763828e-06, + "loss": 0.1771, + "num_input_tokens_seen": 105780992, + "step": 33605 + }, + { + "epoch": 2.151590807246655, + "grad_norm": 16.437620162963867, + "learning_rate": 3.514438978078484e-06, + "loss": 0.3079, + "num_input_tokens_seen": 105796928, + "step": 33610 + }, + { + "epoch": 2.151910889187632, + "grad_norm": 23.551830291748047, + "learning_rate": 3.513928359737341e-06, + "loss": 0.2153, + "num_input_tokens_seen": 105811712, + "step": 33615 + }, + { + "epoch": 2.152230971128609, + "grad_norm": 8.423008918762207, + "learning_rate": 3.5134176907658966e-06, + "loss": 0.1891, + "num_input_tokens_seen": 105828800, + "step": 33620 + }, + { + "epoch": 2.152551053069586, + "grad_norm": 14.974390029907227, + "learning_rate": 3.512906971189652e-06, + "loss": 0.2186, + "num_input_tokens_seen": 105846272, + "step": 33625 + }, + { + "epoch": 2.152871135010563, + "grad_norm": 9.487344741821289, + "learning_rate": 3.512396201034109e-06, + "loss": 0.2392, + "num_input_tokens_seen": 105862848, + "step": 33630 + }, + { + "epoch": 2.1531912169515395, + "grad_norm": 15.758338928222656, + "learning_rate": 3.5118853803247733e-06, + "loss": 0.2205, + "num_input_tokens_seen": 105877440, + "step": 33635 + }, + { + "epoch": 2.1535112988925165, + "grad_norm": 34.1297721862793, + "learning_rate": 3.5113745090871526e-06, + "loss": 0.3131, + "num_input_tokens_seen": 105892928, + "step": 33640 + }, + { + "epoch": 2.1538313808334935, + "grad_norm": 27.92584228515625, + "learning_rate": 3.5108635873467565e-06, + "loss": 0.1858, + "num_input_tokens_seen": 105908160, + "step": 33645 + }, + { + "epoch": 2.1541514627744704, + "grad_norm": 19.262584686279297, + "learning_rate": 3.510352615129099e-06, + "loss": 0.2167, + "num_input_tokens_seen": 105923392, + "step": 33650 + }, + { + "epoch": 2.154471544715447, + "grad_norm": 11.552528381347656, + "learning_rate": 3.509841592459694e-06, + "loss": 0.1931, + "num_input_tokens_seen": 105939136, + "step": 33655 + }, + { + "epoch": 2.154791626656424, + "grad_norm": 7.9584574699401855, + "learning_rate": 3.5093305193640597e-06, + "loss": 0.1777, + "num_input_tokens_seen": 105955584, + "step": 33660 + }, + { + "epoch": 2.155111708597401, + "grad_norm": 23.05380630493164, + "learning_rate": 3.5088193958677174e-06, + "loss": 0.3062, + "num_input_tokens_seen": 105971200, + "step": 33665 + }, + { + "epoch": 2.155431790538378, + "grad_norm": 11.486910820007324, + "learning_rate": 3.5083082219961883e-06, + "loss": 0.1523, + "num_input_tokens_seen": 105986816, + "step": 33670 + }, + { + "epoch": 2.1557518724793545, + "grad_norm": 8.484726905822754, + "learning_rate": 3.5077969977749993e-06, + "loss": 0.1629, + "num_input_tokens_seen": 106003072, + "step": 33675 + }, + { + "epoch": 2.1560719544203315, + "grad_norm": 31.580522537231445, + "learning_rate": 3.507285723229678e-06, + "loss": 0.2281, + "num_input_tokens_seen": 106019136, + "step": 33680 + }, + { + "epoch": 2.1563920363613085, + "grad_norm": 5.946142196655273, + "learning_rate": 3.5067743983857538e-06, + "loss": 0.1908, + "num_input_tokens_seen": 106034176, + "step": 33685 + }, + { + "epoch": 2.1567121183022855, + "grad_norm": 7.089213848114014, + "learning_rate": 3.5062630232687606e-06, + "loss": 0.2205, + "num_input_tokens_seen": 106050176, + "step": 33690 + }, + { + "epoch": 2.1570322002432625, + "grad_norm": 8.018689155578613, + "learning_rate": 3.5057515979042335e-06, + "loss": 0.2417, + "num_input_tokens_seen": 106066432, + "step": 33695 + }, + { + "epoch": 2.157352282184239, + "grad_norm": 19.922914505004883, + "learning_rate": 3.50524012231771e-06, + "loss": 0.2782, + "num_input_tokens_seen": 106082240, + "step": 33700 + }, + { + "epoch": 2.157672364125216, + "grad_norm": 8.639389038085938, + "learning_rate": 3.504728596534731e-06, + "loss": 0.2555, + "num_input_tokens_seen": 106097088, + "step": 33705 + }, + { + "epoch": 2.157992446066193, + "grad_norm": 5.258303165435791, + "learning_rate": 3.5042170205808403e-06, + "loss": 0.1407, + "num_input_tokens_seen": 106112064, + "step": 33710 + }, + { + "epoch": 2.15831252800717, + "grad_norm": 7.418134689331055, + "learning_rate": 3.503705394481581e-06, + "loss": 0.2465, + "num_input_tokens_seen": 106128000, + "step": 33715 + }, + { + "epoch": 2.1586326099481465, + "grad_norm": 8.424468040466309, + "learning_rate": 3.5031937182625023e-06, + "loss": 0.1645, + "num_input_tokens_seen": 106142720, + "step": 33720 + }, + { + "epoch": 2.1589526918891235, + "grad_norm": 6.238149642944336, + "learning_rate": 3.5026819919491563e-06, + "loss": 0.2709, + "num_input_tokens_seen": 106158144, + "step": 33725 + }, + { + "epoch": 2.1592727738301005, + "grad_norm": 74.6076431274414, + "learning_rate": 3.502170215567092e-06, + "loss": 0.1932, + "num_input_tokens_seen": 106177984, + "step": 33730 + }, + { + "epoch": 2.1595928557710775, + "grad_norm": 17.02430534362793, + "learning_rate": 3.501658389141869e-06, + "loss": 0.2208, + "num_input_tokens_seen": 106193472, + "step": 33735 + }, + { + "epoch": 2.159912937712054, + "grad_norm": 9.83948802947998, + "learning_rate": 3.501146512699044e-06, + "loss": 0.1614, + "num_input_tokens_seen": 106208640, + "step": 33740 + }, + { + "epoch": 2.160233019653031, + "grad_norm": 4.417003631591797, + "learning_rate": 3.5006345862641755e-06, + "loss": 0.2378, + "num_input_tokens_seen": 106224448, + "step": 33745 + }, + { + "epoch": 2.160553101594008, + "grad_norm": 5.838078022003174, + "learning_rate": 3.5001226098628288e-06, + "loss": 0.1367, + "num_input_tokens_seen": 106240448, + "step": 33750 + }, + { + "epoch": 2.160873183534985, + "grad_norm": 16.313669204711914, + "learning_rate": 3.499610583520568e-06, + "loss": 0.2617, + "num_input_tokens_seen": 106256128, + "step": 33755 + }, + { + "epoch": 2.161193265475962, + "grad_norm": 7.196619987487793, + "learning_rate": 3.4990985072629617e-06, + "loss": 0.1985, + "num_input_tokens_seen": 106270848, + "step": 33760 + }, + { + "epoch": 2.1615133474169386, + "grad_norm": 58.06814193725586, + "learning_rate": 3.49858638111558e-06, + "loss": 0.216, + "num_input_tokens_seen": 106286528, + "step": 33765 + }, + { + "epoch": 2.1618334293579156, + "grad_norm": 38.963748931884766, + "learning_rate": 3.4980742051039957e-06, + "loss": 0.2746, + "num_input_tokens_seen": 106301568, + "step": 33770 + }, + { + "epoch": 2.1621535112988925, + "grad_norm": 87.5967025756836, + "learning_rate": 3.4975619792537846e-06, + "loss": 0.1873, + "num_input_tokens_seen": 106316416, + "step": 33775 + }, + { + "epoch": 2.1624735932398695, + "grad_norm": 11.181268692016602, + "learning_rate": 3.4970497035905244e-06, + "loss": 0.3015, + "num_input_tokens_seen": 106331968, + "step": 33780 + }, + { + "epoch": 2.162793675180846, + "grad_norm": 5.054171085357666, + "learning_rate": 3.496537378139795e-06, + "loss": 0.2405, + "num_input_tokens_seen": 106347840, + "step": 33785 + }, + { + "epoch": 2.163113757121823, + "grad_norm": 10.441642761230469, + "learning_rate": 3.4960250029271795e-06, + "loss": 0.296, + "num_input_tokens_seen": 106364096, + "step": 33790 + }, + { + "epoch": 2.1634338390628, + "grad_norm": 16.396364212036133, + "learning_rate": 3.4955125779782646e-06, + "loss": 0.2345, + "num_input_tokens_seen": 106379520, + "step": 33795 + }, + { + "epoch": 2.163753921003777, + "grad_norm": 8.84406852722168, + "learning_rate": 3.495000103318636e-06, + "loss": 0.1912, + "num_input_tokens_seen": 106394240, + "step": 33800 + }, + { + "epoch": 2.164074002944754, + "grad_norm": 15.671616554260254, + "learning_rate": 3.4944875789738852e-06, + "loss": 0.1327, + "num_input_tokens_seen": 106410304, + "step": 33805 + }, + { + "epoch": 2.1643940848857306, + "grad_norm": 7.501727104187012, + "learning_rate": 3.4939750049696053e-06, + "loss": 0.1797, + "num_input_tokens_seen": 106425088, + "step": 33810 + }, + { + "epoch": 2.1647141668267076, + "grad_norm": 7.673486709594727, + "learning_rate": 3.4934623813313905e-06, + "loss": 0.1659, + "num_input_tokens_seen": 106440512, + "step": 33815 + }, + { + "epoch": 2.1650342487676846, + "grad_norm": 30.700428009033203, + "learning_rate": 3.4929497080848396e-06, + "loss": 0.2589, + "num_input_tokens_seen": 106456704, + "step": 33820 + }, + { + "epoch": 2.1653543307086616, + "grad_norm": 8.742431640625, + "learning_rate": 3.4924369852555524e-06, + "loss": 0.2504, + "num_input_tokens_seen": 106471488, + "step": 33825 + }, + { + "epoch": 2.165674412649638, + "grad_norm": 15.281237602233887, + "learning_rate": 3.491924212869131e-06, + "loss": 0.1923, + "num_input_tokens_seen": 106487488, + "step": 33830 + }, + { + "epoch": 2.165994494590615, + "grad_norm": 15.858083724975586, + "learning_rate": 3.4914113909511813e-06, + "loss": 0.1552, + "num_input_tokens_seen": 106503232, + "step": 33835 + }, + { + "epoch": 2.166314576531592, + "grad_norm": 20.43320655822754, + "learning_rate": 3.4908985195273116e-06, + "loss": 0.2846, + "num_input_tokens_seen": 106518464, + "step": 33840 + }, + { + "epoch": 2.166634658472569, + "grad_norm": 25.317468643188477, + "learning_rate": 3.49038559862313e-06, + "loss": 0.1956, + "num_input_tokens_seen": 106534720, + "step": 33845 + }, + { + "epoch": 2.166954740413546, + "grad_norm": 18.923110961914062, + "learning_rate": 3.489872628264251e-06, + "loss": 0.2281, + "num_input_tokens_seen": 106552128, + "step": 33850 + }, + { + "epoch": 2.1672748223545226, + "grad_norm": 9.746256828308105, + "learning_rate": 3.489359608476289e-06, + "loss": 0.383, + "num_input_tokens_seen": 106568192, + "step": 33855 + }, + { + "epoch": 2.1675949042954996, + "grad_norm": 42.454627990722656, + "learning_rate": 3.4888465392848613e-06, + "loss": 0.1301, + "num_input_tokens_seen": 106583296, + "step": 33860 + }, + { + "epoch": 2.1679149862364766, + "grad_norm": 37.232391357421875, + "learning_rate": 3.4883334207155885e-06, + "loss": 0.2674, + "num_input_tokens_seen": 106598464, + "step": 33865 + }, + { + "epoch": 2.1682350681774536, + "grad_norm": 11.799038887023926, + "learning_rate": 3.4878202527940926e-06, + "loss": 0.1899, + "num_input_tokens_seen": 106614464, + "step": 33870 + }, + { + "epoch": 2.16855515011843, + "grad_norm": 42.381874084472656, + "learning_rate": 3.4873070355459986e-06, + "loss": 0.1777, + "num_input_tokens_seen": 106630016, + "step": 33875 + }, + { + "epoch": 2.168875232059407, + "grad_norm": 12.398883819580078, + "learning_rate": 3.4867937689969337e-06, + "loss": 0.1854, + "num_input_tokens_seen": 106645952, + "step": 33880 + }, + { + "epoch": 2.169195314000384, + "grad_norm": 16.308137893676758, + "learning_rate": 3.486280453172528e-06, + "loss": 0.2888, + "num_input_tokens_seen": 106662784, + "step": 33885 + }, + { + "epoch": 2.169515395941361, + "grad_norm": 24.028635025024414, + "learning_rate": 3.485767088098414e-06, + "loss": 0.2578, + "num_input_tokens_seen": 106679104, + "step": 33890 + }, + { + "epoch": 2.169835477882338, + "grad_norm": 5.804035186767578, + "learning_rate": 3.4852536738002266e-06, + "loss": 0.1939, + "num_input_tokens_seen": 106694592, + "step": 33895 + }, + { + "epoch": 2.1701555598233147, + "grad_norm": 8.640684127807617, + "learning_rate": 3.4847402103036027e-06, + "loss": 0.1252, + "num_input_tokens_seen": 106710272, + "step": 33900 + }, + { + "epoch": 2.1704756417642916, + "grad_norm": 8.035508155822754, + "learning_rate": 3.4842266976341814e-06, + "loss": 0.1469, + "num_input_tokens_seen": 106726272, + "step": 33905 + }, + { + "epoch": 2.1707957237052686, + "grad_norm": 7.366702079772949, + "learning_rate": 3.483713135817606e-06, + "loss": 0.2561, + "num_input_tokens_seen": 106740352, + "step": 33910 + }, + { + "epoch": 2.1711158056462456, + "grad_norm": 14.049993515014648, + "learning_rate": 3.4831995248795204e-06, + "loss": 0.2273, + "num_input_tokens_seen": 106756416, + "step": 33915 + }, + { + "epoch": 2.171435887587222, + "grad_norm": 7.8034563064575195, + "learning_rate": 3.482685864845572e-06, + "loss": 0.2186, + "num_input_tokens_seen": 106771072, + "step": 33920 + }, + { + "epoch": 2.171755969528199, + "grad_norm": 19.028770446777344, + "learning_rate": 3.4821721557414094e-06, + "loss": 0.2672, + "num_input_tokens_seen": 106786816, + "step": 33925 + }, + { + "epoch": 2.172076051469176, + "grad_norm": 16.438451766967773, + "learning_rate": 3.4816583975926865e-06, + "loss": 0.354, + "num_input_tokens_seen": 106802048, + "step": 33930 + }, + { + "epoch": 2.172396133410153, + "grad_norm": 12.869158744812012, + "learning_rate": 3.4811445904250557e-06, + "loss": 0.1862, + "num_input_tokens_seen": 106817344, + "step": 33935 + }, + { + "epoch": 2.1727162153511297, + "grad_norm": 10.607888221740723, + "learning_rate": 3.480630734264175e-06, + "loss": 0.1708, + "num_input_tokens_seen": 106833408, + "step": 33940 + }, + { + "epoch": 2.1730362972921067, + "grad_norm": 6.969241619110107, + "learning_rate": 3.480116829135703e-06, + "loss": 0.2142, + "num_input_tokens_seen": 106849344, + "step": 33945 + }, + { + "epoch": 2.1733563792330837, + "grad_norm": 6.068783760070801, + "learning_rate": 3.479602875065302e-06, + "loss": 0.1494, + "num_input_tokens_seen": 106865088, + "step": 33950 + }, + { + "epoch": 2.1736764611740607, + "grad_norm": 4.276485443115234, + "learning_rate": 3.4790888720786357e-06, + "loss": 0.1881, + "num_input_tokens_seen": 106880576, + "step": 33955 + }, + { + "epoch": 2.1739965431150376, + "grad_norm": 27.5069580078125, + "learning_rate": 3.4785748202013712e-06, + "loss": 0.2463, + "num_input_tokens_seen": 106896768, + "step": 33960 + }, + { + "epoch": 2.174316625056014, + "grad_norm": 26.227617263793945, + "learning_rate": 3.4780607194591775e-06, + "loss": 0.3227, + "num_input_tokens_seen": 106911360, + "step": 33965 + }, + { + "epoch": 2.174636706996991, + "grad_norm": 13.416975021362305, + "learning_rate": 3.477546569877726e-06, + "loss": 0.1808, + "num_input_tokens_seen": 106927424, + "step": 33970 + }, + { + "epoch": 2.174956788937968, + "grad_norm": 13.161640167236328, + "learning_rate": 3.4770323714826903e-06, + "loss": 0.2348, + "num_input_tokens_seen": 106942720, + "step": 33975 + }, + { + "epoch": 2.175276870878945, + "grad_norm": 57.05457305908203, + "learning_rate": 3.4765181242997474e-06, + "loss": 0.2581, + "num_input_tokens_seen": 106959680, + "step": 33980 + }, + { + "epoch": 2.1755969528199217, + "grad_norm": 13.239656448364258, + "learning_rate": 3.476003828354576e-06, + "loss": 0.1787, + "num_input_tokens_seen": 106974464, + "step": 33985 + }, + { + "epoch": 2.1759170347608987, + "grad_norm": 8.435933113098145, + "learning_rate": 3.4754894836728576e-06, + "loss": 0.1541, + "num_input_tokens_seen": 106989312, + "step": 33990 + }, + { + "epoch": 2.1762371167018757, + "grad_norm": 17.723249435424805, + "learning_rate": 3.474975090280275e-06, + "loss": 0.1986, + "num_input_tokens_seen": 107004480, + "step": 33995 + }, + { + "epoch": 2.1765571986428527, + "grad_norm": 8.79848575592041, + "learning_rate": 3.4744606482025145e-06, + "loss": 0.1989, + "num_input_tokens_seen": 107019456, + "step": 34000 + }, + { + "epoch": 2.1768772805838292, + "grad_norm": 20.189376831054688, + "learning_rate": 3.4739461574652654e-06, + "loss": 0.2142, + "num_input_tokens_seen": 107035200, + "step": 34005 + }, + { + "epoch": 2.1771973625248062, + "grad_norm": 18.765579223632812, + "learning_rate": 3.4734316180942186e-06, + "loss": 0.3719, + "num_input_tokens_seen": 107050624, + "step": 34010 + }, + { + "epoch": 2.177517444465783, + "grad_norm": 5.595909118652344, + "learning_rate": 3.4729170301150673e-06, + "loss": 0.1449, + "num_input_tokens_seen": 107066752, + "step": 34015 + }, + { + "epoch": 2.17783752640676, + "grad_norm": 4.4483795166015625, + "learning_rate": 3.472402393553507e-06, + "loss": 0.2014, + "num_input_tokens_seen": 107081088, + "step": 34020 + }, + { + "epoch": 2.178157608347737, + "grad_norm": 17.303050994873047, + "learning_rate": 3.4718877084352352e-06, + "loss": 0.2346, + "num_input_tokens_seen": 107096512, + "step": 34025 + }, + { + "epoch": 2.1784776902887137, + "grad_norm": 19.018423080444336, + "learning_rate": 3.471372974785955e-06, + "loss": 0.242, + "num_input_tokens_seen": 107112512, + "step": 34030 + }, + { + "epoch": 2.1787977722296907, + "grad_norm": 7.196739673614502, + "learning_rate": 3.470858192631368e-06, + "loss": 0.2467, + "num_input_tokens_seen": 107127872, + "step": 34035 + }, + { + "epoch": 2.1791178541706677, + "grad_norm": 6.691616058349609, + "learning_rate": 3.4703433619971794e-06, + "loss": 0.1246, + "num_input_tokens_seen": 107143104, + "step": 34040 + }, + { + "epoch": 2.1794379361116447, + "grad_norm": 9.199682235717773, + "learning_rate": 3.469828482909098e-06, + "loss": 0.2645, + "num_input_tokens_seen": 107158272, + "step": 34045 + }, + { + "epoch": 2.1797580180526213, + "grad_norm": 65.3196792602539, + "learning_rate": 3.4693135553928337e-06, + "loss": 0.2499, + "num_input_tokens_seen": 107173696, + "step": 34050 + }, + { + "epoch": 2.1800780999935983, + "grad_norm": 10.45327091217041, + "learning_rate": 3.4687985794740993e-06, + "loss": 0.2194, + "num_input_tokens_seen": 107189696, + "step": 34055 + }, + { + "epoch": 2.1803981819345752, + "grad_norm": 104.62250518798828, + "learning_rate": 3.4682835551786097e-06, + "loss": 0.2478, + "num_input_tokens_seen": 107205184, + "step": 34060 + }, + { + "epoch": 2.1807182638755522, + "grad_norm": 7.12977933883667, + "learning_rate": 3.4677684825320835e-06, + "loss": 0.2267, + "num_input_tokens_seen": 107221824, + "step": 34065 + }, + { + "epoch": 2.1810383458165292, + "grad_norm": 13.798439979553223, + "learning_rate": 3.46725336156024e-06, + "loss": 0.2264, + "num_input_tokens_seen": 107238976, + "step": 34070 + }, + { + "epoch": 2.1813584277575058, + "grad_norm": 19.01006507873535, + "learning_rate": 3.466738192288802e-06, + "loss": 0.2012, + "num_input_tokens_seen": 107256128, + "step": 34075 + }, + { + "epoch": 2.1816785096984828, + "grad_norm": 24.576839447021484, + "learning_rate": 3.466222974743494e-06, + "loss": 0.23, + "num_input_tokens_seen": 107271680, + "step": 34080 + }, + { + "epoch": 2.1819985916394598, + "grad_norm": 9.867623329162598, + "learning_rate": 3.465707708950044e-06, + "loss": 0.1558, + "num_input_tokens_seen": 107288960, + "step": 34085 + }, + { + "epoch": 2.1823186735804367, + "grad_norm": 6.597915172576904, + "learning_rate": 3.4651923949341804e-06, + "loss": 0.1799, + "num_input_tokens_seen": 107305280, + "step": 34090 + }, + { + "epoch": 2.1826387555214133, + "grad_norm": 37.78429412841797, + "learning_rate": 3.4646770327216367e-06, + "loss": 0.3084, + "num_input_tokens_seen": 107320704, + "step": 34095 + }, + { + "epoch": 2.1829588374623903, + "grad_norm": 31.155460357666016, + "learning_rate": 3.4641616223381467e-06, + "loss": 0.2897, + "num_input_tokens_seen": 107336256, + "step": 34100 + }, + { + "epoch": 2.1832789194033673, + "grad_norm": 15.219396591186523, + "learning_rate": 3.4636461638094476e-06, + "loss": 0.1803, + "num_input_tokens_seen": 107351872, + "step": 34105 + }, + { + "epoch": 2.1835990013443443, + "grad_norm": 10.867082595825195, + "learning_rate": 3.463130657161279e-06, + "loss": 0.1921, + "num_input_tokens_seen": 107367040, + "step": 34110 + }, + { + "epoch": 2.1839190832853212, + "grad_norm": 13.620123863220215, + "learning_rate": 3.462615102419381e-06, + "loss": 0.2382, + "num_input_tokens_seen": 107382656, + "step": 34115 + }, + { + "epoch": 2.184239165226298, + "grad_norm": 10.859635353088379, + "learning_rate": 3.4620994996094993e-06, + "loss": 0.2082, + "num_input_tokens_seen": 107398720, + "step": 34120 + }, + { + "epoch": 2.184559247167275, + "grad_norm": 84.70750427246094, + "learning_rate": 3.4615838487573803e-06, + "loss": 0.1904, + "num_input_tokens_seen": 107412864, + "step": 34125 + }, + { + "epoch": 2.1848793291082518, + "grad_norm": 9.397979736328125, + "learning_rate": 3.461068149888773e-06, + "loss": 0.1471, + "num_input_tokens_seen": 107427776, + "step": 34130 + }, + { + "epoch": 2.1851994110492288, + "grad_norm": 56.5551872253418, + "learning_rate": 3.460552403029427e-06, + "loss": 0.249, + "num_input_tokens_seen": 107443072, + "step": 34135 + }, + { + "epoch": 2.1855194929902053, + "grad_norm": 36.59563064575195, + "learning_rate": 3.4600366082050977e-06, + "loss": 0.2319, + "num_input_tokens_seen": 107457920, + "step": 34140 + }, + { + "epoch": 2.1858395749311823, + "grad_norm": 75.31183624267578, + "learning_rate": 3.459520765441541e-06, + "loss": 0.351, + "num_input_tokens_seen": 107473600, + "step": 34145 + }, + { + "epoch": 2.1861596568721593, + "grad_norm": 43.2436408996582, + "learning_rate": 3.4590048747645154e-06, + "loss": 0.3121, + "num_input_tokens_seen": 107488832, + "step": 34150 + }, + { + "epoch": 2.1864797388131363, + "grad_norm": 7.989569187164307, + "learning_rate": 3.458488936199782e-06, + "loss": 0.2794, + "num_input_tokens_seen": 107505920, + "step": 34155 + }, + { + "epoch": 2.1867998207541133, + "grad_norm": 36.04973602294922, + "learning_rate": 3.4579729497731037e-06, + "loss": 0.1562, + "num_input_tokens_seen": 107522624, + "step": 34160 + }, + { + "epoch": 2.18711990269509, + "grad_norm": 4.517207622528076, + "learning_rate": 3.4574569155102455e-06, + "loss": 0.1315, + "num_input_tokens_seen": 107539136, + "step": 34165 + }, + { + "epoch": 2.187439984636067, + "grad_norm": 15.036360740661621, + "learning_rate": 3.4569408334369768e-06, + "loss": 0.1624, + "num_input_tokens_seen": 107554944, + "step": 34170 + }, + { + "epoch": 2.187760066577044, + "grad_norm": 13.128661155700684, + "learning_rate": 3.4564247035790664e-06, + "loss": 0.2167, + "num_input_tokens_seen": 107571456, + "step": 34175 + }, + { + "epoch": 2.188080148518021, + "grad_norm": 82.16181945800781, + "learning_rate": 3.4559085259622893e-06, + "loss": 0.2237, + "num_input_tokens_seen": 107586880, + "step": 34180 + }, + { + "epoch": 2.1884002304589973, + "grad_norm": 10.030646324157715, + "learning_rate": 3.4553923006124192e-06, + "loss": 0.3577, + "num_input_tokens_seen": 107602560, + "step": 34185 + }, + { + "epoch": 2.1887203123999743, + "grad_norm": 38.94335174560547, + "learning_rate": 3.454876027555234e-06, + "loss": 0.2601, + "num_input_tokens_seen": 107618112, + "step": 34190 + }, + { + "epoch": 2.1890403943409513, + "grad_norm": 5.577716827392578, + "learning_rate": 3.454359706816513e-06, + "loss": 0.1799, + "num_input_tokens_seen": 107636288, + "step": 34195 + }, + { + "epoch": 2.1893604762819283, + "grad_norm": 6.303470611572266, + "learning_rate": 3.4538433384220403e-06, + "loss": 0.2103, + "num_input_tokens_seen": 107652032, + "step": 34200 + }, + { + "epoch": 2.189680558222905, + "grad_norm": 17.950607299804688, + "learning_rate": 3.453326922397599e-06, + "loss": 0.2011, + "num_input_tokens_seen": 107667328, + "step": 34205 + }, + { + "epoch": 2.190000640163882, + "grad_norm": 4.675487518310547, + "learning_rate": 3.452810458768977e-06, + "loss": 0.1632, + "num_input_tokens_seen": 107683904, + "step": 34210 + }, + { + "epoch": 2.190320722104859, + "grad_norm": 8.304838180541992, + "learning_rate": 3.452293947561964e-06, + "loss": 0.1922, + "num_input_tokens_seen": 107699520, + "step": 34215 + }, + { + "epoch": 2.190640804045836, + "grad_norm": 33.777374267578125, + "learning_rate": 3.4517773888023522e-06, + "loss": 0.2186, + "num_input_tokens_seen": 107715008, + "step": 34220 + }, + { + "epoch": 2.190960885986813, + "grad_norm": 7.546712398529053, + "learning_rate": 3.4512607825159343e-06, + "loss": 0.1719, + "num_input_tokens_seen": 107729984, + "step": 34225 + }, + { + "epoch": 2.1912809679277894, + "grad_norm": 60.18655776977539, + "learning_rate": 3.4507441287285083e-06, + "loss": 0.1818, + "num_input_tokens_seen": 107746176, + "step": 34230 + }, + { + "epoch": 2.1916010498687664, + "grad_norm": 13.407075881958008, + "learning_rate": 3.4502274274658724e-06, + "loss": 0.2136, + "num_input_tokens_seen": 107761664, + "step": 34235 + }, + { + "epoch": 2.1919211318097434, + "grad_norm": 7.244666576385498, + "learning_rate": 3.4497106787538286e-06, + "loss": 0.2131, + "num_input_tokens_seen": 107778240, + "step": 34240 + }, + { + "epoch": 2.1922412137507203, + "grad_norm": 6.239695072174072, + "learning_rate": 3.4491938826181803e-06, + "loss": 0.1925, + "num_input_tokens_seen": 107793216, + "step": 34245 + }, + { + "epoch": 2.192561295691697, + "grad_norm": 21.84775161743164, + "learning_rate": 3.448677039084734e-06, + "loss": 0.1419, + "num_input_tokens_seen": 107808768, + "step": 34250 + }, + { + "epoch": 2.192881377632674, + "grad_norm": 22.778528213500977, + "learning_rate": 3.448160148179297e-06, + "loss": 0.156, + "num_input_tokens_seen": 107823808, + "step": 34255 + }, + { + "epoch": 2.193201459573651, + "grad_norm": 33.421669006347656, + "learning_rate": 3.4476432099276813e-06, + "loss": 0.5304, + "num_input_tokens_seen": 107838912, + "step": 34260 + }, + { + "epoch": 2.193521541514628, + "grad_norm": 14.228677749633789, + "learning_rate": 3.4471262243556995e-06, + "loss": 0.323, + "num_input_tokens_seen": 107854784, + "step": 34265 + }, + { + "epoch": 2.1938416234556044, + "grad_norm": 9.316814422607422, + "learning_rate": 3.4466091914891685e-06, + "loss": 0.1791, + "num_input_tokens_seen": 107870208, + "step": 34270 + }, + { + "epoch": 2.1941617053965814, + "grad_norm": 6.750350475311279, + "learning_rate": 3.4460921113539046e-06, + "loss": 0.1536, + "num_input_tokens_seen": 107886144, + "step": 34275 + }, + { + "epoch": 2.1944817873375584, + "grad_norm": 5.810678005218506, + "learning_rate": 3.445574983975729e-06, + "loss": 0.2229, + "num_input_tokens_seen": 107902720, + "step": 34280 + }, + { + "epoch": 2.1948018692785354, + "grad_norm": 48.07472610473633, + "learning_rate": 3.445057809380463e-06, + "loss": 0.2088, + "num_input_tokens_seen": 107917056, + "step": 34285 + }, + { + "epoch": 2.1951219512195124, + "grad_norm": 8.84497356414795, + "learning_rate": 3.4445405875939337e-06, + "loss": 0.1265, + "num_input_tokens_seen": 107931968, + "step": 34290 + }, + { + "epoch": 2.195442033160489, + "grad_norm": 8.906197547912598, + "learning_rate": 3.4440233186419668e-06, + "loss": 0.1757, + "num_input_tokens_seen": 107948288, + "step": 34295 + }, + { + "epoch": 2.195762115101466, + "grad_norm": 46.39454650878906, + "learning_rate": 3.4435060025503932e-06, + "loss": 0.1888, + "num_input_tokens_seen": 107963584, + "step": 34300 + }, + { + "epoch": 2.196082197042443, + "grad_norm": 11.041963577270508, + "learning_rate": 3.442988639345044e-06, + "loss": 0.2399, + "num_input_tokens_seen": 107980096, + "step": 34305 + }, + { + "epoch": 2.19640227898342, + "grad_norm": 56.58922576904297, + "learning_rate": 3.4424712290517535e-06, + "loss": 0.2176, + "num_input_tokens_seen": 107994816, + "step": 34310 + }, + { + "epoch": 2.1967223609243964, + "grad_norm": 22.29492950439453, + "learning_rate": 3.4419537716963597e-06, + "loss": 0.2193, + "num_input_tokens_seen": 108010048, + "step": 34315 + }, + { + "epoch": 2.1970424428653734, + "grad_norm": 49.21507263183594, + "learning_rate": 3.4414362673047008e-06, + "loss": 0.2667, + "num_input_tokens_seen": 108025984, + "step": 34320 + }, + { + "epoch": 2.1973625248063504, + "grad_norm": 47.66905212402344, + "learning_rate": 3.440918715902618e-06, + "loss": 0.1844, + "num_input_tokens_seen": 108041344, + "step": 34325 + }, + { + "epoch": 2.1976826067473274, + "grad_norm": 13.947000503540039, + "learning_rate": 3.4404011175159564e-06, + "loss": 0.2452, + "num_input_tokens_seen": 108057984, + "step": 34330 + }, + { + "epoch": 2.1980026886883044, + "grad_norm": 4.051390171051025, + "learning_rate": 3.439883472170561e-06, + "loss": 0.2462, + "num_input_tokens_seen": 108073024, + "step": 34335 + }, + { + "epoch": 2.198322770629281, + "grad_norm": 14.462547302246094, + "learning_rate": 3.4393657798922807e-06, + "loss": 0.1926, + "num_input_tokens_seen": 108089536, + "step": 34340 + }, + { + "epoch": 2.198642852570258, + "grad_norm": 3.970907688140869, + "learning_rate": 3.4388480407069665e-06, + "loss": 0.1936, + "num_input_tokens_seen": 108105536, + "step": 34345 + }, + { + "epoch": 2.198962934511235, + "grad_norm": 20.315040588378906, + "learning_rate": 3.4383302546404713e-06, + "loss": 0.1931, + "num_input_tokens_seen": 108123200, + "step": 34350 + }, + { + "epoch": 2.199283016452212, + "grad_norm": 7.554413795471191, + "learning_rate": 3.4378124217186503e-06, + "loss": 0.2237, + "num_input_tokens_seen": 108139456, + "step": 34355 + }, + { + "epoch": 2.1996030983931885, + "grad_norm": 11.939340591430664, + "learning_rate": 3.4372945419673626e-06, + "loss": 0.2067, + "num_input_tokens_seen": 108155648, + "step": 34360 + }, + { + "epoch": 2.1999231803341655, + "grad_norm": 19.525165557861328, + "learning_rate": 3.436776615412467e-06, + "loss": 0.1792, + "num_input_tokens_seen": 108171712, + "step": 34365 + }, + { + "epoch": 2.2002432622751424, + "grad_norm": 12.2312650680542, + "learning_rate": 3.436258642079827e-06, + "loss": 0.2654, + "num_input_tokens_seen": 108188032, + "step": 34370 + }, + { + "epoch": 2.2005633442161194, + "grad_norm": 13.401116371154785, + "learning_rate": 3.435740621995307e-06, + "loss": 0.2347, + "num_input_tokens_seen": 108202432, + "step": 34375 + }, + { + "epoch": 2.2008834261570964, + "grad_norm": 17.8356876373291, + "learning_rate": 3.4352225551847747e-06, + "loss": 0.1676, + "num_input_tokens_seen": 108217536, + "step": 34380 + }, + { + "epoch": 2.201203508098073, + "grad_norm": 29.347475051879883, + "learning_rate": 3.434704441674099e-06, + "loss": 0.2502, + "num_input_tokens_seen": 108235072, + "step": 34385 + }, + { + "epoch": 2.20152359003905, + "grad_norm": 10.629000663757324, + "learning_rate": 3.434186281489153e-06, + "loss": 0.2983, + "num_input_tokens_seen": 108250752, + "step": 34390 + }, + { + "epoch": 2.201843671980027, + "grad_norm": 45.45307922363281, + "learning_rate": 3.43366807465581e-06, + "loss": 0.1947, + "num_input_tokens_seen": 108266304, + "step": 34395 + }, + { + "epoch": 2.202163753921004, + "grad_norm": 10.151894569396973, + "learning_rate": 3.433149821199946e-06, + "loss": 0.1995, + "num_input_tokens_seen": 108282304, + "step": 34400 + }, + { + "epoch": 2.2024838358619805, + "grad_norm": 15.644984245300293, + "learning_rate": 3.4326315211474408e-06, + "loss": 0.1841, + "num_input_tokens_seen": 108298176, + "step": 34405 + }, + { + "epoch": 2.2028039178029575, + "grad_norm": 26.89163589477539, + "learning_rate": 3.432113174524175e-06, + "loss": 0.1742, + "num_input_tokens_seen": 108313152, + "step": 34410 + }, + { + "epoch": 2.2031239997439345, + "grad_norm": 11.450398445129395, + "learning_rate": 3.4315947813560337e-06, + "loss": 0.1972, + "num_input_tokens_seen": 108328448, + "step": 34415 + }, + { + "epoch": 2.2034440816849115, + "grad_norm": 3.2233924865722656, + "learning_rate": 3.4310763416689003e-06, + "loss": 0.1578, + "num_input_tokens_seen": 108344128, + "step": 34420 + }, + { + "epoch": 2.2037641636258885, + "grad_norm": 6.2864227294921875, + "learning_rate": 3.4305578554886647e-06, + "loss": 0.2849, + "num_input_tokens_seen": 108360192, + "step": 34425 + }, + { + "epoch": 2.204084245566865, + "grad_norm": 10.45910358428955, + "learning_rate": 3.4300393228412167e-06, + "loss": 0.187, + "num_input_tokens_seen": 108374784, + "step": 34430 + }, + { + "epoch": 2.204404327507842, + "grad_norm": 36.022926330566406, + "learning_rate": 3.4295207437524485e-06, + "loss": 0.1217, + "num_input_tokens_seen": 108390144, + "step": 34435 + }, + { + "epoch": 2.204724409448819, + "grad_norm": 16.056190490722656, + "learning_rate": 3.429002118248257e-06, + "loss": 0.2001, + "num_input_tokens_seen": 108405056, + "step": 34440 + }, + { + "epoch": 2.205044491389796, + "grad_norm": 11.969344139099121, + "learning_rate": 3.428483446354539e-06, + "loss": 0.2279, + "num_input_tokens_seen": 108420160, + "step": 34445 + }, + { + "epoch": 2.2053645733307725, + "grad_norm": 5.751147747039795, + "learning_rate": 3.427964728097194e-06, + "loss": 0.1814, + "num_input_tokens_seen": 108434560, + "step": 34450 + }, + { + "epoch": 2.2056846552717495, + "grad_norm": 11.300662994384766, + "learning_rate": 3.4274459635021236e-06, + "loss": 0.2064, + "num_input_tokens_seen": 108449216, + "step": 34455 + }, + { + "epoch": 2.2060047372127265, + "grad_norm": 16.18874168395996, + "learning_rate": 3.426927152595232e-06, + "loss": 0.3005, + "num_input_tokens_seen": 108464768, + "step": 34460 + }, + { + "epoch": 2.2063248191537035, + "grad_norm": 9.498771667480469, + "learning_rate": 3.4264082954024275e-06, + "loss": 0.2335, + "num_input_tokens_seen": 108479296, + "step": 34465 + }, + { + "epoch": 2.20664490109468, + "grad_norm": 49.81904220581055, + "learning_rate": 3.4258893919496182e-06, + "loss": 0.3266, + "num_input_tokens_seen": 108493888, + "step": 34470 + }, + { + "epoch": 2.206964983035657, + "grad_norm": 8.408439636230469, + "learning_rate": 3.4253704422627153e-06, + "loss": 0.1955, + "num_input_tokens_seen": 108509248, + "step": 34475 + }, + { + "epoch": 2.207285064976634, + "grad_norm": 41.55635452270508, + "learning_rate": 3.424851446367633e-06, + "loss": 0.2876, + "num_input_tokens_seen": 108526464, + "step": 34480 + }, + { + "epoch": 2.207605146917611, + "grad_norm": 9.069906234741211, + "learning_rate": 3.424332404290286e-06, + "loss": 0.2008, + "num_input_tokens_seen": 108542208, + "step": 34485 + }, + { + "epoch": 2.207925228858588, + "grad_norm": 123.22395324707031, + "learning_rate": 3.4238133160565934e-06, + "loss": 0.232, + "num_input_tokens_seen": 108557056, + "step": 34490 + }, + { + "epoch": 2.2082453107995645, + "grad_norm": 19.383426666259766, + "learning_rate": 3.4232941816924764e-06, + "loss": 0.1912, + "num_input_tokens_seen": 108572608, + "step": 34495 + }, + { + "epoch": 2.2085653927405415, + "grad_norm": 8.655158042907715, + "learning_rate": 3.422775001223857e-06, + "loss": 0.2744, + "num_input_tokens_seen": 108588544, + "step": 34500 + }, + { + "epoch": 2.2088854746815185, + "grad_norm": 33.4112434387207, + "learning_rate": 3.4222557746766604e-06, + "loss": 0.1612, + "num_input_tokens_seen": 108604800, + "step": 34505 + }, + { + "epoch": 2.2092055566224955, + "grad_norm": 20.795772552490234, + "learning_rate": 3.421736502076815e-06, + "loss": 0.1753, + "num_input_tokens_seen": 108619776, + "step": 34510 + }, + { + "epoch": 2.209525638563472, + "grad_norm": 13.861227989196777, + "learning_rate": 3.421217183450249e-06, + "loss": 0.153, + "num_input_tokens_seen": 108636032, + "step": 34515 + }, + { + "epoch": 2.209845720504449, + "grad_norm": 11.299156188964844, + "learning_rate": 3.4206978188228955e-06, + "loss": 0.2578, + "num_input_tokens_seen": 108651520, + "step": 34520 + }, + { + "epoch": 2.210165802445426, + "grad_norm": 9.508881568908691, + "learning_rate": 3.420178408220689e-06, + "loss": 0.1968, + "num_input_tokens_seen": 108669440, + "step": 34525 + }, + { + "epoch": 2.210485884386403, + "grad_norm": 5.463967323303223, + "learning_rate": 3.4196589516695655e-06, + "loss": 0.2407, + "num_input_tokens_seen": 108685952, + "step": 34530 + }, + { + "epoch": 2.2108059663273796, + "grad_norm": 5.472833156585693, + "learning_rate": 3.419139449195465e-06, + "loss": 0.2009, + "num_input_tokens_seen": 108701440, + "step": 34535 + }, + { + "epoch": 2.2111260482683566, + "grad_norm": 30.49024772644043, + "learning_rate": 3.418619900824327e-06, + "loss": 0.1482, + "num_input_tokens_seen": 108718656, + "step": 34540 + }, + { + "epoch": 2.2114461302093336, + "grad_norm": 32.29063034057617, + "learning_rate": 3.4181003065820963e-06, + "loss": 0.3677, + "num_input_tokens_seen": 108735680, + "step": 34545 + }, + { + "epoch": 2.2117662121503106, + "grad_norm": 9.882074356079102, + "learning_rate": 3.4175806664947186e-06, + "loss": 0.1548, + "num_input_tokens_seen": 108751488, + "step": 34550 + }, + { + "epoch": 2.2120862940912875, + "grad_norm": 8.916545867919922, + "learning_rate": 3.417060980588142e-06, + "loss": 0.2066, + "num_input_tokens_seen": 108766272, + "step": 34555 + }, + { + "epoch": 2.212406376032264, + "grad_norm": 10.299091339111328, + "learning_rate": 3.4165412488883173e-06, + "loss": 0.2848, + "num_input_tokens_seen": 108781632, + "step": 34560 + }, + { + "epoch": 2.212726457973241, + "grad_norm": 19.957761764526367, + "learning_rate": 3.416021471421196e-06, + "loss": 0.1604, + "num_input_tokens_seen": 108797376, + "step": 34565 + }, + { + "epoch": 2.213046539914218, + "grad_norm": 6.982228755950928, + "learning_rate": 3.415501648212734e-06, + "loss": 0.1568, + "num_input_tokens_seen": 108814656, + "step": 34570 + }, + { + "epoch": 2.213366621855195, + "grad_norm": 22.171709060668945, + "learning_rate": 3.414981779288888e-06, + "loss": 0.2778, + "num_input_tokens_seen": 108829888, + "step": 34575 + }, + { + "epoch": 2.2136867037961716, + "grad_norm": 4.82012414932251, + "learning_rate": 3.4144618646756187e-06, + "loss": 0.2578, + "num_input_tokens_seen": 108845376, + "step": 34580 + }, + { + "epoch": 2.2140067857371486, + "grad_norm": 16.681961059570312, + "learning_rate": 3.413941904398887e-06, + "loss": 0.1357, + "num_input_tokens_seen": 108861056, + "step": 34585 + }, + { + "epoch": 2.2143268676781256, + "grad_norm": 9.253324508666992, + "learning_rate": 3.413421898484657e-06, + "loss": 0.1876, + "num_input_tokens_seen": 108876096, + "step": 34590 + }, + { + "epoch": 2.2146469496191026, + "grad_norm": 40.87359619140625, + "learning_rate": 3.412901846958895e-06, + "loss": 0.2488, + "num_input_tokens_seen": 108892288, + "step": 34595 + }, + { + "epoch": 2.2149670315600796, + "grad_norm": 4.21613073348999, + "learning_rate": 3.41238174984757e-06, + "loss": 0.3183, + "num_input_tokens_seen": 108907456, + "step": 34600 + }, + { + "epoch": 2.215287113501056, + "grad_norm": 10.68395709991455, + "learning_rate": 3.4118616071766526e-06, + "loss": 0.1888, + "num_input_tokens_seen": 108924224, + "step": 34605 + }, + { + "epoch": 2.215607195442033, + "grad_norm": 5.656182289123535, + "learning_rate": 3.411341418972116e-06, + "loss": 0.1472, + "num_input_tokens_seen": 108939776, + "step": 34610 + }, + { + "epoch": 2.21592727738301, + "grad_norm": 15.58651351928711, + "learning_rate": 3.410821185259937e-06, + "loss": 0.2511, + "num_input_tokens_seen": 108954560, + "step": 34615 + }, + { + "epoch": 2.216247359323987, + "grad_norm": 5.7025957107543945, + "learning_rate": 3.4103009060660917e-06, + "loss": 0.1248, + "num_input_tokens_seen": 108970304, + "step": 34620 + }, + { + "epoch": 2.2165674412649636, + "grad_norm": 11.10112190246582, + "learning_rate": 3.4097805814165608e-06, + "loss": 0.2162, + "num_input_tokens_seen": 108984832, + "step": 34625 + }, + { + "epoch": 2.2168875232059406, + "grad_norm": 12.712764739990234, + "learning_rate": 3.4092602113373265e-06, + "loss": 0.1608, + "num_input_tokens_seen": 109000576, + "step": 34630 + }, + { + "epoch": 2.2172076051469176, + "grad_norm": 10.976789474487305, + "learning_rate": 3.4087397958543735e-06, + "loss": 0.2242, + "num_input_tokens_seen": 109017152, + "step": 34635 + }, + { + "epoch": 2.2175276870878946, + "grad_norm": 7.512298583984375, + "learning_rate": 3.4082193349936885e-06, + "loss": 0.2537, + "num_input_tokens_seen": 109032448, + "step": 34640 + }, + { + "epoch": 2.2178477690288716, + "grad_norm": 13.179900169372559, + "learning_rate": 3.407698828781261e-06, + "loss": 0.2881, + "num_input_tokens_seen": 109047296, + "step": 34645 + }, + { + "epoch": 2.218167850969848, + "grad_norm": 36.54250717163086, + "learning_rate": 3.4071782772430816e-06, + "loss": 0.2061, + "num_input_tokens_seen": 109062528, + "step": 34650 + }, + { + "epoch": 2.218487932910825, + "grad_norm": 9.857177734375, + "learning_rate": 3.4066576804051445e-06, + "loss": 0.2075, + "num_input_tokens_seen": 109077376, + "step": 34655 + }, + { + "epoch": 2.218808014851802, + "grad_norm": 7.839757919311523, + "learning_rate": 3.406137038293445e-06, + "loss": 0.1857, + "num_input_tokens_seen": 109093824, + "step": 34660 + }, + { + "epoch": 2.219128096792779, + "grad_norm": 17.505081176757812, + "learning_rate": 3.4056163509339816e-06, + "loss": 0.2564, + "num_input_tokens_seen": 109111104, + "step": 34665 + }, + { + "epoch": 2.2194481787337557, + "grad_norm": 58.89444351196289, + "learning_rate": 3.4050956183527556e-06, + "loss": 0.1952, + "num_input_tokens_seen": 109126528, + "step": 34670 + }, + { + "epoch": 2.2197682606747327, + "grad_norm": 8.754674911499023, + "learning_rate": 3.4045748405757683e-06, + "loss": 0.1603, + "num_input_tokens_seen": 109142464, + "step": 34675 + }, + { + "epoch": 2.2200883426157096, + "grad_norm": 12.386850357055664, + "learning_rate": 3.404054017629026e-06, + "loss": 0.2068, + "num_input_tokens_seen": 109158144, + "step": 34680 + }, + { + "epoch": 2.2204084245566866, + "grad_norm": 4.092357158660889, + "learning_rate": 3.403533149538535e-06, + "loss": 0.146, + "num_input_tokens_seen": 109173952, + "step": 34685 + }, + { + "epoch": 2.2207285064976636, + "grad_norm": 4.688878059387207, + "learning_rate": 3.4030122363303037e-06, + "loss": 0.1901, + "num_input_tokens_seen": 109190144, + "step": 34690 + }, + { + "epoch": 2.22104858843864, + "grad_norm": 28.23328971862793, + "learning_rate": 3.4024912780303457e-06, + "loss": 0.3357, + "num_input_tokens_seen": 109206080, + "step": 34695 + }, + { + "epoch": 2.221368670379617, + "grad_norm": 12.980323791503906, + "learning_rate": 3.4019702746646745e-06, + "loss": 0.1982, + "num_input_tokens_seen": 109222528, + "step": 34700 + }, + { + "epoch": 2.221688752320594, + "grad_norm": 10.436493873596191, + "learning_rate": 3.401449226259306e-06, + "loss": 0.2625, + "num_input_tokens_seen": 109237376, + "step": 34705 + }, + { + "epoch": 2.222008834261571, + "grad_norm": 15.829132080078125, + "learning_rate": 3.4009281328402583e-06, + "loss": 0.2085, + "num_input_tokens_seen": 109253120, + "step": 34710 + }, + { + "epoch": 2.2223289162025477, + "grad_norm": 28.35854148864746, + "learning_rate": 3.4004069944335515e-06, + "loss": 0.2405, + "num_input_tokens_seen": 109269184, + "step": 34715 + }, + { + "epoch": 2.2226489981435247, + "grad_norm": 21.59927749633789, + "learning_rate": 3.39988581106521e-06, + "loss": 0.213, + "num_input_tokens_seen": 109284096, + "step": 34720 + }, + { + "epoch": 2.2229690800845017, + "grad_norm": 9.525490760803223, + "learning_rate": 3.399364582761259e-06, + "loss": 0.3387, + "num_input_tokens_seen": 109299200, + "step": 34725 + }, + { + "epoch": 2.2232891620254787, + "grad_norm": 12.221794128417969, + "learning_rate": 3.3988433095477252e-06, + "loss": 0.1595, + "num_input_tokens_seen": 109314176, + "step": 34730 + }, + { + "epoch": 2.223609243966455, + "grad_norm": 6.539985656738281, + "learning_rate": 3.398321991450639e-06, + "loss": 0.3225, + "num_input_tokens_seen": 109329344, + "step": 34735 + }, + { + "epoch": 2.223929325907432, + "grad_norm": 16.57061767578125, + "learning_rate": 3.3978006284960306e-06, + "loss": 0.2605, + "num_input_tokens_seen": 109345280, + "step": 34740 + }, + { + "epoch": 2.224249407848409, + "grad_norm": 7.04702615737915, + "learning_rate": 3.397279220709936e-06, + "loss": 0.1708, + "num_input_tokens_seen": 109361664, + "step": 34745 + }, + { + "epoch": 2.224569489789386, + "grad_norm": 6.813352108001709, + "learning_rate": 3.3967577681183906e-06, + "loss": 0.2029, + "num_input_tokens_seen": 109377216, + "step": 34750 + }, + { + "epoch": 2.224889571730363, + "grad_norm": 28.55457878112793, + "learning_rate": 3.396236270747433e-06, + "loss": 0.304, + "num_input_tokens_seen": 109393216, + "step": 34755 + }, + { + "epoch": 2.2252096536713397, + "grad_norm": 9.248698234558105, + "learning_rate": 3.395714728623105e-06, + "loss": 0.213, + "num_input_tokens_seen": 109409536, + "step": 34760 + }, + { + "epoch": 2.2255297356123167, + "grad_norm": 7.777442932128906, + "learning_rate": 3.395193141771449e-06, + "loss": 0.1936, + "num_input_tokens_seen": 109426624, + "step": 34765 + }, + { + "epoch": 2.2258498175532937, + "grad_norm": 46.6873779296875, + "learning_rate": 3.39467151021851e-06, + "loss": 0.2805, + "num_input_tokens_seen": 109443328, + "step": 34770 + }, + { + "epoch": 2.2261698994942707, + "grad_norm": 9.247982025146484, + "learning_rate": 3.3941498339903357e-06, + "loss": 0.1941, + "num_input_tokens_seen": 109459456, + "step": 34775 + }, + { + "epoch": 2.2264899814352472, + "grad_norm": 20.330659866333008, + "learning_rate": 3.3936281131129762e-06, + "loss": 0.1622, + "num_input_tokens_seen": 109475968, + "step": 34780 + }, + { + "epoch": 2.2268100633762242, + "grad_norm": 8.100350379943848, + "learning_rate": 3.3931063476124845e-06, + "loss": 0.1761, + "num_input_tokens_seen": 109492160, + "step": 34785 + }, + { + "epoch": 2.227130145317201, + "grad_norm": 37.44276428222656, + "learning_rate": 3.392584537514913e-06, + "loss": 0.2776, + "num_input_tokens_seen": 109506560, + "step": 34790 + }, + { + "epoch": 2.227450227258178, + "grad_norm": 6.636483669281006, + "learning_rate": 3.392062682846319e-06, + "loss": 0.1537, + "num_input_tokens_seen": 109521600, + "step": 34795 + }, + { + "epoch": 2.2277703091991548, + "grad_norm": 9.684094429016113, + "learning_rate": 3.391540783632762e-06, + "loss": 0.2741, + "num_input_tokens_seen": 109535616, + "step": 34800 + }, + { + "epoch": 2.2280903911401317, + "grad_norm": 15.394722938537598, + "learning_rate": 3.391018839900301e-06, + "loss": 0.2724, + "num_input_tokens_seen": 109551488, + "step": 34805 + }, + { + "epoch": 2.2284104730811087, + "grad_norm": 7.208224296569824, + "learning_rate": 3.3904968516750002e-06, + "loss": 0.2132, + "num_input_tokens_seen": 109566592, + "step": 34810 + }, + { + "epoch": 2.2287305550220857, + "grad_norm": 19.48341178894043, + "learning_rate": 3.3899748189829263e-06, + "loss": 0.283, + "num_input_tokens_seen": 109581952, + "step": 34815 + }, + { + "epoch": 2.2290506369630627, + "grad_norm": 8.217260360717773, + "learning_rate": 3.389452741850145e-06, + "loss": 0.1845, + "num_input_tokens_seen": 109597632, + "step": 34820 + }, + { + "epoch": 2.2293707189040393, + "grad_norm": 6.444521903991699, + "learning_rate": 3.3889306203027266e-06, + "loss": 0.1912, + "num_input_tokens_seen": 109613248, + "step": 34825 + }, + { + "epoch": 2.2296908008450163, + "grad_norm": 23.0635929107666, + "learning_rate": 3.388408454366743e-06, + "loss": 0.2176, + "num_input_tokens_seen": 109628992, + "step": 34830 + }, + { + "epoch": 2.2300108827859932, + "grad_norm": 37.12395095825195, + "learning_rate": 3.387886244068269e-06, + "loss": 0.1843, + "num_input_tokens_seen": 109644352, + "step": 34835 + }, + { + "epoch": 2.2303309647269702, + "grad_norm": 10.101747512817383, + "learning_rate": 3.387363989433381e-06, + "loss": 0.2576, + "num_input_tokens_seen": 109660736, + "step": 34840 + }, + { + "epoch": 2.230651046667947, + "grad_norm": 7.644516944885254, + "learning_rate": 3.3868416904881575e-06, + "loss": 0.2127, + "num_input_tokens_seen": 109675648, + "step": 34845 + }, + { + "epoch": 2.2309711286089238, + "grad_norm": 34.796321868896484, + "learning_rate": 3.3863193472586797e-06, + "loss": 0.3412, + "num_input_tokens_seen": 109691264, + "step": 34850 + }, + { + "epoch": 2.2312912105499008, + "grad_norm": 65.55392456054688, + "learning_rate": 3.38579695977103e-06, + "loss": 0.2122, + "num_input_tokens_seen": 109708544, + "step": 34855 + }, + { + "epoch": 2.2316112924908778, + "grad_norm": 11.202197074890137, + "learning_rate": 3.3852745280512933e-06, + "loss": 0.2122, + "num_input_tokens_seen": 109723968, + "step": 34860 + }, + { + "epoch": 2.2319313744318547, + "grad_norm": 25.743209838867188, + "learning_rate": 3.384752052125559e-06, + "loss": 0.3039, + "num_input_tokens_seen": 109739456, + "step": 34865 + }, + { + "epoch": 2.2322514563728313, + "grad_norm": 22.984359741210938, + "learning_rate": 3.384229532019915e-06, + "loss": 0.2535, + "num_input_tokens_seen": 109755200, + "step": 34870 + }, + { + "epoch": 2.2325715383138083, + "grad_norm": 6.575779914855957, + "learning_rate": 3.383706967760455e-06, + "loss": 0.176, + "num_input_tokens_seen": 109770944, + "step": 34875 + }, + { + "epoch": 2.2328916202547853, + "grad_norm": 11.220803260803223, + "learning_rate": 3.3831843593732715e-06, + "loss": 0.1979, + "num_input_tokens_seen": 109786624, + "step": 34880 + }, + { + "epoch": 2.2332117021957623, + "grad_norm": 19.23015594482422, + "learning_rate": 3.382661706884461e-06, + "loss": 0.2653, + "num_input_tokens_seen": 109801344, + "step": 34885 + }, + { + "epoch": 2.233531784136739, + "grad_norm": 11.185528755187988, + "learning_rate": 3.382139010320123e-06, + "loss": 0.3099, + "num_input_tokens_seen": 109818048, + "step": 34890 + }, + { + "epoch": 2.233851866077716, + "grad_norm": 39.168006896972656, + "learning_rate": 3.381616269706357e-06, + "loss": 0.1537, + "num_input_tokens_seen": 109835328, + "step": 34895 + }, + { + "epoch": 2.234171948018693, + "grad_norm": 6.921159267425537, + "learning_rate": 3.381093485069267e-06, + "loss": 0.1815, + "num_input_tokens_seen": 109852160, + "step": 34900 + }, + { + "epoch": 2.23449202995967, + "grad_norm": 8.711544036865234, + "learning_rate": 3.3805706564349583e-06, + "loss": 0.2513, + "num_input_tokens_seen": 109868352, + "step": 34905 + }, + { + "epoch": 2.2348121119006468, + "grad_norm": 12.093772888183594, + "learning_rate": 3.3800477838295375e-06, + "loss": 0.2768, + "num_input_tokens_seen": 109883200, + "step": 34910 + }, + { + "epoch": 2.2351321938416233, + "grad_norm": 31.35710334777832, + "learning_rate": 3.379524867279115e-06, + "loss": 0.1918, + "num_input_tokens_seen": 109898944, + "step": 34915 + }, + { + "epoch": 2.2354522757826003, + "grad_norm": 12.336737632751465, + "learning_rate": 3.3790019068098e-06, + "loss": 0.2025, + "num_input_tokens_seen": 109913920, + "step": 34920 + }, + { + "epoch": 2.2357723577235773, + "grad_norm": 9.022392272949219, + "learning_rate": 3.37847890244771e-06, + "loss": 0.2429, + "num_input_tokens_seen": 109931008, + "step": 34925 + }, + { + "epoch": 2.2360924396645543, + "grad_norm": 34.272361755371094, + "learning_rate": 3.3779558542189595e-06, + "loss": 0.1842, + "num_input_tokens_seen": 109946368, + "step": 34930 + }, + { + "epoch": 2.236412521605531, + "grad_norm": 8.95547866821289, + "learning_rate": 3.377432762149666e-06, + "loss": 0.2256, + "num_input_tokens_seen": 109960960, + "step": 34935 + }, + { + "epoch": 2.236732603546508, + "grad_norm": 6.360369682312012, + "learning_rate": 3.376909626265951e-06, + "loss": 0.1823, + "num_input_tokens_seen": 109978368, + "step": 34940 + }, + { + "epoch": 2.237052685487485, + "grad_norm": 61.45766830444336, + "learning_rate": 3.3763864465939367e-06, + "loss": 0.215, + "num_input_tokens_seen": 109994304, + "step": 34945 + }, + { + "epoch": 2.237372767428462, + "grad_norm": 5.31660270690918, + "learning_rate": 3.375863223159749e-06, + "loss": 0.234, + "num_input_tokens_seen": 110009280, + "step": 34950 + }, + { + "epoch": 2.237692849369439, + "grad_norm": 21.273530960083008, + "learning_rate": 3.3753399559895127e-06, + "loss": 0.2539, + "num_input_tokens_seen": 110026816, + "step": 34955 + }, + { + "epoch": 2.2380129313104153, + "grad_norm": 6.863424777984619, + "learning_rate": 3.3748166451093596e-06, + "loss": 0.2392, + "num_input_tokens_seen": 110042240, + "step": 34960 + }, + { + "epoch": 2.2383330132513923, + "grad_norm": 24.082725524902344, + "learning_rate": 3.3742932905454194e-06, + "loss": 0.259, + "num_input_tokens_seen": 110057600, + "step": 34965 + }, + { + "epoch": 2.2386530951923693, + "grad_norm": 7.113059043884277, + "learning_rate": 3.3737698923238273e-06, + "loss": 0.1604, + "num_input_tokens_seen": 110073856, + "step": 34970 + }, + { + "epoch": 2.2389731771333463, + "grad_norm": 8.595257759094238, + "learning_rate": 3.3732464504707164e-06, + "loss": 0.2358, + "num_input_tokens_seen": 110089536, + "step": 34975 + }, + { + "epoch": 2.239293259074323, + "grad_norm": 91.89366149902344, + "learning_rate": 3.3727229650122274e-06, + "loss": 0.3496, + "num_input_tokens_seen": 110105536, + "step": 34980 + }, + { + "epoch": 2.2396133410153, + "grad_norm": 22.969215393066406, + "learning_rate": 3.3721994359745e-06, + "loss": 0.2086, + "num_input_tokens_seen": 110121920, + "step": 34985 + }, + { + "epoch": 2.239933422956277, + "grad_norm": 20.66089630126953, + "learning_rate": 3.371675863383675e-06, + "loss": 0.2684, + "num_input_tokens_seen": 110136704, + "step": 34990 + }, + { + "epoch": 2.240253504897254, + "grad_norm": 9.550512313842773, + "learning_rate": 3.371152247265898e-06, + "loss": 0.204, + "num_input_tokens_seen": 110153472, + "step": 34995 + }, + { + "epoch": 2.2405735868382304, + "grad_norm": 10.237200736999512, + "learning_rate": 3.370628587647315e-06, + "loss": 0.184, + "num_input_tokens_seen": 110168768, + "step": 35000 + }, + { + "epoch": 2.2408936687792074, + "grad_norm": 12.384590148925781, + "learning_rate": 3.3701048845540757e-06, + "loss": 0.2724, + "num_input_tokens_seen": 110185216, + "step": 35005 + }, + { + "epoch": 2.2412137507201844, + "grad_norm": 4.065131187438965, + "learning_rate": 3.369581138012331e-06, + "loss": 0.219, + "num_input_tokens_seen": 110200576, + "step": 35010 + }, + { + "epoch": 2.2415338326611614, + "grad_norm": 21.871627807617188, + "learning_rate": 3.369057348048233e-06, + "loss": 0.2744, + "num_input_tokens_seen": 110216192, + "step": 35015 + }, + { + "epoch": 2.2418539146021383, + "grad_norm": 6.508429050445557, + "learning_rate": 3.3685335146879384e-06, + "loss": 0.1438, + "num_input_tokens_seen": 110231296, + "step": 35020 + }, + { + "epoch": 2.242173996543115, + "grad_norm": 34.257041931152344, + "learning_rate": 3.3680096379576037e-06, + "loss": 0.1885, + "num_input_tokens_seen": 110246144, + "step": 35025 + }, + { + "epoch": 2.242494078484092, + "grad_norm": 12.85622787475586, + "learning_rate": 3.367485717883389e-06, + "loss": 0.1477, + "num_input_tokens_seen": 110261440, + "step": 35030 + }, + { + "epoch": 2.242814160425069, + "grad_norm": 11.435379981994629, + "learning_rate": 3.3669617544914562e-06, + "loss": 0.2131, + "num_input_tokens_seen": 110276416, + "step": 35035 + }, + { + "epoch": 2.243134242366046, + "grad_norm": 9.163228988647461, + "learning_rate": 3.36643774780797e-06, + "loss": 0.1741, + "num_input_tokens_seen": 110290944, + "step": 35040 + }, + { + "epoch": 2.2434543243070224, + "grad_norm": 7.027058124542236, + "learning_rate": 3.3659136978590946e-06, + "loss": 0.263, + "num_input_tokens_seen": 110306048, + "step": 35045 + }, + { + "epoch": 2.2437744062479994, + "grad_norm": 4.837616443634033, + "learning_rate": 3.365389604671e-06, + "loss": 0.2051, + "num_input_tokens_seen": 110322240, + "step": 35050 + }, + { + "epoch": 2.2440944881889764, + "grad_norm": 13.105058670043945, + "learning_rate": 3.364865468269857e-06, + "loss": 0.2442, + "num_input_tokens_seen": 110338560, + "step": 35055 + }, + { + "epoch": 2.2444145701299534, + "grad_norm": 29.756059646606445, + "learning_rate": 3.364341288681836e-06, + "loss": 0.2658, + "num_input_tokens_seen": 110353728, + "step": 35060 + }, + { + "epoch": 2.24473465207093, + "grad_norm": 10.656841278076172, + "learning_rate": 3.3638170659331138e-06, + "loss": 0.3488, + "num_input_tokens_seen": 110369472, + "step": 35065 + }, + { + "epoch": 2.245054734011907, + "grad_norm": 59.04771041870117, + "learning_rate": 3.3632928000498667e-06, + "loss": 0.2753, + "num_input_tokens_seen": 110384832, + "step": 35070 + }, + { + "epoch": 2.245374815952884, + "grad_norm": 40.91374588012695, + "learning_rate": 3.3627684910582737e-06, + "loss": 0.2681, + "num_input_tokens_seen": 110399872, + "step": 35075 + }, + { + "epoch": 2.245694897893861, + "grad_norm": 9.7687406539917, + "learning_rate": 3.3622441389845166e-06, + "loss": 0.2491, + "num_input_tokens_seen": 110416064, + "step": 35080 + }, + { + "epoch": 2.246014979834838, + "grad_norm": 8.951963424682617, + "learning_rate": 3.3617197438547787e-06, + "loss": 0.1607, + "num_input_tokens_seen": 110432064, + "step": 35085 + }, + { + "epoch": 2.2463350617758144, + "grad_norm": 5.252530574798584, + "learning_rate": 3.361195305695245e-06, + "loss": 0.2389, + "num_input_tokens_seen": 110448256, + "step": 35090 + }, + { + "epoch": 2.2466551437167914, + "grad_norm": 17.174951553344727, + "learning_rate": 3.3606708245321035e-06, + "loss": 0.2655, + "num_input_tokens_seen": 110464576, + "step": 35095 + }, + { + "epoch": 2.2469752256577684, + "grad_norm": 32.26191711425781, + "learning_rate": 3.360146300391545e-06, + "loss": 0.2452, + "num_input_tokens_seen": 110479680, + "step": 35100 + }, + { + "epoch": 2.2472953075987454, + "grad_norm": 17.947969436645508, + "learning_rate": 3.3596217332997594e-06, + "loss": 0.3262, + "num_input_tokens_seen": 110495296, + "step": 35105 + }, + { + "epoch": 2.247615389539722, + "grad_norm": 13.328115463256836, + "learning_rate": 3.359097123282943e-06, + "loss": 0.2149, + "num_input_tokens_seen": 110511232, + "step": 35110 + }, + { + "epoch": 2.247935471480699, + "grad_norm": 8.660000801086426, + "learning_rate": 3.3585724703672894e-06, + "loss": 0.241, + "num_input_tokens_seen": 110527232, + "step": 35115 + }, + { + "epoch": 2.248255553421676, + "grad_norm": 10.059239387512207, + "learning_rate": 3.358047774579e-06, + "loss": 0.2237, + "num_input_tokens_seen": 110542464, + "step": 35120 + }, + { + "epoch": 2.248575635362653, + "grad_norm": 91.564208984375, + "learning_rate": 3.3575230359442747e-06, + "loss": 0.2935, + "num_input_tokens_seen": 110558336, + "step": 35125 + }, + { + "epoch": 2.24889571730363, + "grad_norm": 21.52518081665039, + "learning_rate": 3.3569982544893144e-06, + "loss": 0.3091, + "num_input_tokens_seen": 110573632, + "step": 35130 + }, + { + "epoch": 2.2492157992446065, + "grad_norm": 22.541847229003906, + "learning_rate": 3.356473430240326e-06, + "loss": 0.2674, + "num_input_tokens_seen": 110590080, + "step": 35135 + }, + { + "epoch": 2.2495358811855835, + "grad_norm": 24.216915130615234, + "learning_rate": 3.3559485632235155e-06, + "loss": 0.2304, + "num_input_tokens_seen": 110606464, + "step": 35140 + }, + { + "epoch": 2.2498559631265604, + "grad_norm": 3.2160212993621826, + "learning_rate": 3.355423653465093e-06, + "loss": 0.1817, + "num_input_tokens_seen": 110621376, + "step": 35145 + }, + { + "epoch": 2.2501760450675374, + "grad_norm": 6.397547245025635, + "learning_rate": 3.3548987009912676e-06, + "loss": 0.2663, + "num_input_tokens_seen": 110637184, + "step": 35150 + }, + { + "epoch": 2.2504321106203187, + "eval_loss": 0.5257287621498108, + "eval_runtime": 50.981, + "eval_samples_per_second": 272.376, + "eval_steps_per_second": 34.052, + "num_input_tokens_seen": 110649216, + "step": 35154 + }, + { + "epoch": 2.250496127008514, + "grad_norm": 12.565267562866211, + "learning_rate": 3.354373705828255e-06, + "loss": 0.2498, + "num_input_tokens_seen": 110652352, + "step": 35155 + }, + { + "epoch": 2.250816208949491, + "grad_norm": 10.282295227050781, + "learning_rate": 3.3538486680022695e-06, + "loss": 0.2259, + "num_input_tokens_seen": 110667328, + "step": 35160 + }, + { + "epoch": 2.251136290890468, + "grad_norm": 27.780973434448242, + "learning_rate": 3.3533235875395293e-06, + "loss": 0.2664, + "num_input_tokens_seen": 110682432, + "step": 35165 + }, + { + "epoch": 2.251456372831445, + "grad_norm": 25.000825881958008, + "learning_rate": 3.3527984644662538e-06, + "loss": 0.1614, + "num_input_tokens_seen": 110697856, + "step": 35170 + }, + { + "epoch": 2.251776454772422, + "grad_norm": 17.25286865234375, + "learning_rate": 3.352273298808665e-06, + "loss": 0.2603, + "num_input_tokens_seen": 110712832, + "step": 35175 + }, + { + "epoch": 2.2520965367133985, + "grad_norm": 11.50916862487793, + "learning_rate": 3.351748090592987e-06, + "loss": 0.1891, + "num_input_tokens_seen": 110727552, + "step": 35180 + }, + { + "epoch": 2.2524166186543755, + "grad_norm": 10.286513328552246, + "learning_rate": 3.351222839845446e-06, + "loss": 0.2633, + "num_input_tokens_seen": 110743680, + "step": 35185 + }, + { + "epoch": 2.2527367005953525, + "grad_norm": 8.223031044006348, + "learning_rate": 3.3506975465922697e-06, + "loss": 0.1996, + "num_input_tokens_seen": 110760512, + "step": 35190 + }, + { + "epoch": 2.2530567825363295, + "grad_norm": 11.03484058380127, + "learning_rate": 3.3501722108596896e-06, + "loss": 0.2406, + "num_input_tokens_seen": 110775616, + "step": 35195 + }, + { + "epoch": 2.253376864477306, + "grad_norm": 12.925236701965332, + "learning_rate": 3.3496468326739377e-06, + "loss": 0.1835, + "num_input_tokens_seen": 110790912, + "step": 35200 + }, + { + "epoch": 2.253696946418283, + "grad_norm": 21.27532386779785, + "learning_rate": 3.349121412061248e-06, + "loss": 0.2762, + "num_input_tokens_seen": 110806208, + "step": 35205 + }, + { + "epoch": 2.25401702835926, + "grad_norm": 15.825685501098633, + "learning_rate": 3.3485959490478587e-06, + "loss": 0.2097, + "num_input_tokens_seen": 110822080, + "step": 35210 + }, + { + "epoch": 2.254337110300237, + "grad_norm": 7.744662284851074, + "learning_rate": 3.3480704436600083e-06, + "loss": 0.3238, + "num_input_tokens_seen": 110838720, + "step": 35215 + }, + { + "epoch": 2.254657192241214, + "grad_norm": 20.287315368652344, + "learning_rate": 3.3475448959239364e-06, + "loss": 0.2346, + "num_input_tokens_seen": 110855488, + "step": 35220 + }, + { + "epoch": 2.2549772741821905, + "grad_norm": 14.115250587463379, + "learning_rate": 3.3470193058658874e-06, + "loss": 0.321, + "num_input_tokens_seen": 110870784, + "step": 35225 + }, + { + "epoch": 2.2552973561231675, + "grad_norm": 14.611054420471191, + "learning_rate": 3.346493673512106e-06, + "loss": 0.2382, + "num_input_tokens_seen": 110886016, + "step": 35230 + }, + { + "epoch": 2.2556174380641445, + "grad_norm": 8.945589065551758, + "learning_rate": 3.34596799888884e-06, + "loss": 0.1633, + "num_input_tokens_seen": 110901568, + "step": 35235 + }, + { + "epoch": 2.2559375200051215, + "grad_norm": 15.193854331970215, + "learning_rate": 3.345442282022339e-06, + "loss": 0.1878, + "num_input_tokens_seen": 110917056, + "step": 35240 + }, + { + "epoch": 2.256257601946098, + "grad_norm": 26.97940444946289, + "learning_rate": 3.344916522938853e-06, + "loss": 0.1487, + "num_input_tokens_seen": 110932992, + "step": 35245 + }, + { + "epoch": 2.256577683887075, + "grad_norm": 15.77139949798584, + "learning_rate": 3.3443907216646378e-06, + "loss": 0.2015, + "num_input_tokens_seen": 110947904, + "step": 35250 + }, + { + "epoch": 2.256897765828052, + "grad_norm": 7.768870830535889, + "learning_rate": 3.3438648782259487e-06, + "loss": 0.1884, + "num_input_tokens_seen": 110963392, + "step": 35255 + }, + { + "epoch": 2.257217847769029, + "grad_norm": 7.810608386993408, + "learning_rate": 3.343338992649042e-06, + "loss": 0.2113, + "num_input_tokens_seen": 110979200, + "step": 35260 + }, + { + "epoch": 2.257537929710006, + "grad_norm": 7.358640193939209, + "learning_rate": 3.342813064960179e-06, + "loss": 0.2611, + "num_input_tokens_seen": 110996544, + "step": 35265 + }, + { + "epoch": 2.2578580116509825, + "grad_norm": 12.906908988952637, + "learning_rate": 3.3422870951856222e-06, + "loss": 0.1967, + "num_input_tokens_seen": 111013376, + "step": 35270 + }, + { + "epoch": 2.2581780935919595, + "grad_norm": 8.17377758026123, + "learning_rate": 3.3417610833516346e-06, + "loss": 0.1391, + "num_input_tokens_seen": 111028608, + "step": 35275 + }, + { + "epoch": 2.2584981755329365, + "grad_norm": 10.837651252746582, + "learning_rate": 3.3412350294844836e-06, + "loss": 0.214, + "num_input_tokens_seen": 111044480, + "step": 35280 + }, + { + "epoch": 2.2588182574739135, + "grad_norm": 39.64620590209961, + "learning_rate": 3.340708933610437e-06, + "loss": 0.1921, + "num_input_tokens_seen": 111059840, + "step": 35285 + }, + { + "epoch": 2.25913833941489, + "grad_norm": 7.711105823516846, + "learning_rate": 3.340182795755765e-06, + "loss": 0.1358, + "num_input_tokens_seen": 111074368, + "step": 35290 + }, + { + "epoch": 2.259458421355867, + "grad_norm": 7.056981086730957, + "learning_rate": 3.3396566159467404e-06, + "loss": 0.3094, + "num_input_tokens_seen": 111090432, + "step": 35295 + }, + { + "epoch": 2.259778503296844, + "grad_norm": 9.262246131896973, + "learning_rate": 3.339130394209639e-06, + "loss": 0.1856, + "num_input_tokens_seen": 111105920, + "step": 35300 + }, + { + "epoch": 2.260098585237821, + "grad_norm": 22.854881286621094, + "learning_rate": 3.338604130570735e-06, + "loss": 0.3488, + "num_input_tokens_seen": 111121216, + "step": 35305 + }, + { + "epoch": 2.2604186671787976, + "grad_norm": 8.22153377532959, + "learning_rate": 3.3380778250563097e-06, + "loss": 0.1892, + "num_input_tokens_seen": 111136896, + "step": 35310 + }, + { + "epoch": 2.2607387491197746, + "grad_norm": 13.352703094482422, + "learning_rate": 3.337551477692643e-06, + "loss": 0.1966, + "num_input_tokens_seen": 111153152, + "step": 35315 + }, + { + "epoch": 2.2610588310607516, + "grad_norm": 31.112838745117188, + "learning_rate": 3.3370250885060183e-06, + "loss": 0.3031, + "num_input_tokens_seen": 111169728, + "step": 35320 + }, + { + "epoch": 2.2613789130017286, + "grad_norm": 28.307361602783203, + "learning_rate": 3.336498657522721e-06, + "loss": 0.1913, + "num_input_tokens_seen": 111186816, + "step": 35325 + }, + { + "epoch": 2.261698994942705, + "grad_norm": 6.208664417266846, + "learning_rate": 3.3359721847690375e-06, + "loss": 0.3084, + "num_input_tokens_seen": 111203136, + "step": 35330 + }, + { + "epoch": 2.262019076883682, + "grad_norm": 9.962281227111816, + "learning_rate": 3.335445670271258e-06, + "loss": 0.2074, + "num_input_tokens_seen": 111218880, + "step": 35335 + }, + { + "epoch": 2.262339158824659, + "grad_norm": 9.64030933380127, + "learning_rate": 3.334919114055673e-06, + "loss": 0.2325, + "num_input_tokens_seen": 111234624, + "step": 35340 + }, + { + "epoch": 2.262659240765636, + "grad_norm": 10.196565628051758, + "learning_rate": 3.3343925161485757e-06, + "loss": 0.2509, + "num_input_tokens_seen": 111251712, + "step": 35345 + }, + { + "epoch": 2.262979322706613, + "grad_norm": 8.561590194702148, + "learning_rate": 3.3338658765762635e-06, + "loss": 0.1662, + "num_input_tokens_seen": 111266176, + "step": 35350 + }, + { + "epoch": 2.2632994046475896, + "grad_norm": 10.9122953414917, + "learning_rate": 3.3333391953650326e-06, + "loss": 0.1905, + "num_input_tokens_seen": 111281472, + "step": 35355 + }, + { + "epoch": 2.2636194865885666, + "grad_norm": 10.9142484664917, + "learning_rate": 3.3328124725411825e-06, + "loss": 0.1882, + "num_input_tokens_seen": 111297024, + "step": 35360 + }, + { + "epoch": 2.2639395685295436, + "grad_norm": 12.595375061035156, + "learning_rate": 3.3322857081310158e-06, + "loss": 0.3215, + "num_input_tokens_seen": 111311936, + "step": 35365 + }, + { + "epoch": 2.2642596504705206, + "grad_norm": 15.546282768249512, + "learning_rate": 3.3317589021608353e-06, + "loss": 0.2659, + "num_input_tokens_seen": 111328576, + "step": 35370 + }, + { + "epoch": 2.264579732411497, + "grad_norm": 7.030916213989258, + "learning_rate": 3.3312320546569486e-06, + "loss": 0.1803, + "num_input_tokens_seen": 111344000, + "step": 35375 + }, + { + "epoch": 2.264899814352474, + "grad_norm": 21.66854476928711, + "learning_rate": 3.3307051656456624e-06, + "loss": 0.2751, + "num_input_tokens_seen": 111359936, + "step": 35380 + }, + { + "epoch": 2.265219896293451, + "grad_norm": 20.79526710510254, + "learning_rate": 3.3301782351532874e-06, + "loss": 0.1414, + "num_input_tokens_seen": 111375488, + "step": 35385 + }, + { + "epoch": 2.265539978234428, + "grad_norm": 18.37453842163086, + "learning_rate": 3.329651263206136e-06, + "loss": 0.2949, + "num_input_tokens_seen": 111391552, + "step": 35390 + }, + { + "epoch": 2.265860060175405, + "grad_norm": 8.519487380981445, + "learning_rate": 3.3291242498305215e-06, + "loss": 0.2348, + "num_input_tokens_seen": 111406976, + "step": 35395 + }, + { + "epoch": 2.2661801421163816, + "grad_norm": 31.83456802368164, + "learning_rate": 3.328597195052761e-06, + "loss": 0.2822, + "num_input_tokens_seen": 111423360, + "step": 35400 + }, + { + "epoch": 2.2665002240573586, + "grad_norm": 18.640356063842773, + "learning_rate": 3.328070098899172e-06, + "loss": 0.2325, + "num_input_tokens_seen": 111438272, + "step": 35405 + }, + { + "epoch": 2.2668203059983356, + "grad_norm": 15.340004920959473, + "learning_rate": 3.327542961396076e-06, + "loss": 0.2491, + "num_input_tokens_seen": 111453120, + "step": 35410 + }, + { + "epoch": 2.2671403879393126, + "grad_norm": 8.75334644317627, + "learning_rate": 3.327015782569795e-06, + "loss": 0.2223, + "num_input_tokens_seen": 111468608, + "step": 35415 + }, + { + "epoch": 2.267460469880289, + "grad_norm": 7.8361945152282715, + "learning_rate": 3.3264885624466537e-06, + "loss": 0.1699, + "num_input_tokens_seen": 111483200, + "step": 35420 + }, + { + "epoch": 2.267780551821266, + "grad_norm": 7.773234844207764, + "learning_rate": 3.325961301052978e-06, + "loss": 0.2718, + "num_input_tokens_seen": 111499328, + "step": 35425 + }, + { + "epoch": 2.268100633762243, + "grad_norm": 36.406837463378906, + "learning_rate": 3.325433998415098e-06, + "loss": 0.1949, + "num_input_tokens_seen": 111514880, + "step": 35430 + }, + { + "epoch": 2.26842071570322, + "grad_norm": 29.64110565185547, + "learning_rate": 3.3249066545593428e-06, + "loss": 0.2217, + "num_input_tokens_seen": 111529920, + "step": 35435 + }, + { + "epoch": 2.268740797644197, + "grad_norm": 51.0810546875, + "learning_rate": 3.324379269512047e-06, + "loss": 0.231, + "num_input_tokens_seen": 111548736, + "step": 35440 + }, + { + "epoch": 2.2690608795851737, + "grad_norm": 61.440040588378906, + "learning_rate": 3.323851843299544e-06, + "loss": 0.171, + "num_input_tokens_seen": 111564032, + "step": 35445 + }, + { + "epoch": 2.2693809615261507, + "grad_norm": 5.870046138763428, + "learning_rate": 3.3233243759481717e-06, + "loss": 0.1846, + "num_input_tokens_seen": 111581184, + "step": 35450 + }, + { + "epoch": 2.2697010434671276, + "grad_norm": 9.934873580932617, + "learning_rate": 3.322796867484268e-06, + "loss": 0.1711, + "num_input_tokens_seen": 111596416, + "step": 35455 + }, + { + "epoch": 2.2700211254081046, + "grad_norm": 28.721343994140625, + "learning_rate": 3.3222693179341743e-06, + "loss": 0.2638, + "num_input_tokens_seen": 111611136, + "step": 35460 + }, + { + "epoch": 2.270341207349081, + "grad_norm": 6.330406188964844, + "learning_rate": 3.3217417273242346e-06, + "loss": 0.2347, + "num_input_tokens_seen": 111625920, + "step": 35465 + }, + { + "epoch": 2.270661289290058, + "grad_norm": 12.853839874267578, + "learning_rate": 3.321214095680793e-06, + "loss": 0.2005, + "num_input_tokens_seen": 111641536, + "step": 35470 + }, + { + "epoch": 2.270981371231035, + "grad_norm": 6.851596832275391, + "learning_rate": 3.3206864230301974e-06, + "loss": 0.2397, + "num_input_tokens_seen": 111657216, + "step": 35475 + }, + { + "epoch": 2.271301453172012, + "grad_norm": 8.160727500915527, + "learning_rate": 3.320158709398796e-06, + "loss": 0.213, + "num_input_tokens_seen": 111672192, + "step": 35480 + }, + { + "epoch": 2.271621535112989, + "grad_norm": 19.33397102355957, + "learning_rate": 3.319630954812941e-06, + "loss": 0.2734, + "num_input_tokens_seen": 111687488, + "step": 35485 + }, + { + "epoch": 2.2719416170539657, + "grad_norm": 15.440061569213867, + "learning_rate": 3.319103159298985e-06, + "loss": 0.2439, + "num_input_tokens_seen": 111702976, + "step": 35490 + }, + { + "epoch": 2.2722616989949427, + "grad_norm": 4.654638767242432, + "learning_rate": 3.318575322883284e-06, + "loss": 0.2182, + "num_input_tokens_seen": 111718592, + "step": 35495 + }, + { + "epoch": 2.2725817809359197, + "grad_norm": 11.60648250579834, + "learning_rate": 3.3180474455921958e-06, + "loss": 0.181, + "num_input_tokens_seen": 111734592, + "step": 35500 + }, + { + "epoch": 2.2729018628768967, + "grad_norm": 16.630300521850586, + "learning_rate": 3.3175195274520786e-06, + "loss": 0.2401, + "num_input_tokens_seen": 111752064, + "step": 35505 + }, + { + "epoch": 2.273221944817873, + "grad_norm": 10.675640106201172, + "learning_rate": 3.316991568489295e-06, + "loss": 0.2504, + "num_input_tokens_seen": 111766976, + "step": 35510 + }, + { + "epoch": 2.27354202675885, + "grad_norm": 13.940610885620117, + "learning_rate": 3.3164635687302084e-06, + "loss": 0.1993, + "num_input_tokens_seen": 111782848, + "step": 35515 + }, + { + "epoch": 2.273862108699827, + "grad_norm": 23.679641723632812, + "learning_rate": 3.315935528201183e-06, + "loss": 0.2055, + "num_input_tokens_seen": 111799296, + "step": 35520 + }, + { + "epoch": 2.274182190640804, + "grad_norm": 37.21015167236328, + "learning_rate": 3.3154074469285876e-06, + "loss": 0.1978, + "num_input_tokens_seen": 111815424, + "step": 35525 + }, + { + "epoch": 2.274502272581781, + "grad_norm": 18.377437591552734, + "learning_rate": 3.314879324938792e-06, + "loss": 0.236, + "num_input_tokens_seen": 111831936, + "step": 35530 + }, + { + "epoch": 2.2748223545227577, + "grad_norm": 9.884086608886719, + "learning_rate": 3.314351162258167e-06, + "loss": 0.2359, + "num_input_tokens_seen": 111846848, + "step": 35535 + }, + { + "epoch": 2.2751424364637347, + "grad_norm": 7.361856460571289, + "learning_rate": 3.3138229589130867e-06, + "loss": 0.1994, + "num_input_tokens_seen": 111862784, + "step": 35540 + }, + { + "epoch": 2.2754625184047117, + "grad_norm": 12.656679153442383, + "learning_rate": 3.313294714929927e-06, + "loss": 0.2478, + "num_input_tokens_seen": 111879872, + "step": 35545 + }, + { + "epoch": 2.2757826003456887, + "grad_norm": 11.909708023071289, + "learning_rate": 3.3127664303350653e-06, + "loss": 0.2394, + "num_input_tokens_seen": 111895360, + "step": 35550 + }, + { + "epoch": 2.2761026822866652, + "grad_norm": 8.095574378967285, + "learning_rate": 3.312238105154882e-06, + "loss": 0.1685, + "num_input_tokens_seen": 111911296, + "step": 35555 + }, + { + "epoch": 2.2764227642276422, + "grad_norm": 10.272913932800293, + "learning_rate": 3.3117097394157583e-06, + "loss": 0.184, + "num_input_tokens_seen": 111929024, + "step": 35560 + }, + { + "epoch": 2.276742846168619, + "grad_norm": 12.677459716796875, + "learning_rate": 3.311181333144079e-06, + "loss": 0.2587, + "num_input_tokens_seen": 111944704, + "step": 35565 + }, + { + "epoch": 2.277062928109596, + "grad_norm": 39.041282653808594, + "learning_rate": 3.3106528863662286e-06, + "loss": 0.2375, + "num_input_tokens_seen": 111961600, + "step": 35570 + }, + { + "epoch": 2.2773830100505728, + "grad_norm": 11.360595703125, + "learning_rate": 3.310124399108595e-06, + "loss": 0.1761, + "num_input_tokens_seen": 111976960, + "step": 35575 + }, + { + "epoch": 2.2777030919915497, + "grad_norm": 3.7757184505462646, + "learning_rate": 3.3095958713975697e-06, + "loss": 0.1755, + "num_input_tokens_seen": 111992576, + "step": 35580 + }, + { + "epoch": 2.2780231739325267, + "grad_norm": 55.66069412231445, + "learning_rate": 3.3090673032595435e-06, + "loss": 0.1489, + "num_input_tokens_seen": 112008128, + "step": 35585 + }, + { + "epoch": 2.2783432558735037, + "grad_norm": 6.0593438148498535, + "learning_rate": 3.3085386947209103e-06, + "loss": 0.176, + "num_input_tokens_seen": 112023424, + "step": 35590 + }, + { + "epoch": 2.2786633378144803, + "grad_norm": 8.088003158569336, + "learning_rate": 3.3080100458080655e-06, + "loss": 0.1759, + "num_input_tokens_seen": 112040768, + "step": 35595 + }, + { + "epoch": 2.2789834197554573, + "grad_norm": 15.366931915283203, + "learning_rate": 3.3074813565474084e-06, + "loss": 0.1716, + "num_input_tokens_seen": 112056448, + "step": 35600 + }, + { + "epoch": 2.2793035016964343, + "grad_norm": 40.979591369628906, + "learning_rate": 3.3069526269653386e-06, + "loss": 0.1691, + "num_input_tokens_seen": 112071808, + "step": 35605 + }, + { + "epoch": 2.2796235836374112, + "grad_norm": 34.786415100097656, + "learning_rate": 3.3064238570882573e-06, + "loss": 0.2409, + "num_input_tokens_seen": 112087680, + "step": 35610 + }, + { + "epoch": 2.2799436655783882, + "grad_norm": 5.498454570770264, + "learning_rate": 3.3058950469425694e-06, + "loss": 0.1783, + "num_input_tokens_seen": 112104384, + "step": 35615 + }, + { + "epoch": 2.280263747519365, + "grad_norm": 13.900015830993652, + "learning_rate": 3.3053661965546807e-06, + "loss": 0.163, + "num_input_tokens_seen": 112120768, + "step": 35620 + }, + { + "epoch": 2.2805838294603418, + "grad_norm": 4.243710994720459, + "learning_rate": 3.3048373059509993e-06, + "loss": 0.2691, + "num_input_tokens_seen": 112136384, + "step": 35625 + }, + { + "epoch": 2.2809039114013188, + "grad_norm": 49.52979278564453, + "learning_rate": 3.304308375157935e-06, + "loss": 0.2316, + "num_input_tokens_seen": 112151552, + "step": 35630 + }, + { + "epoch": 2.2812239933422958, + "grad_norm": 11.711613655090332, + "learning_rate": 3.3037794042018995e-06, + "loss": 0.2818, + "num_input_tokens_seen": 112166848, + "step": 35635 + }, + { + "epoch": 2.2815440752832723, + "grad_norm": 8.722797393798828, + "learning_rate": 3.303250393109307e-06, + "loss": 0.1925, + "num_input_tokens_seen": 112182336, + "step": 35640 + }, + { + "epoch": 2.2818641572242493, + "grad_norm": 9.590538024902344, + "learning_rate": 3.302721341906575e-06, + "loss": 0.2369, + "num_input_tokens_seen": 112198272, + "step": 35645 + }, + { + "epoch": 2.2821842391652263, + "grad_norm": 27.80156898498535, + "learning_rate": 3.30219225062012e-06, + "loss": 0.2024, + "num_input_tokens_seen": 112213376, + "step": 35650 + }, + { + "epoch": 2.2825043211062033, + "grad_norm": 10.462433815002441, + "learning_rate": 3.301663119276362e-06, + "loss": 0.2214, + "num_input_tokens_seen": 112228032, + "step": 35655 + }, + { + "epoch": 2.2828244030471803, + "grad_norm": 10.967360496520996, + "learning_rate": 3.3011339479017242e-06, + "loss": 0.2142, + "num_input_tokens_seen": 112242752, + "step": 35660 + }, + { + "epoch": 2.283144484988157, + "grad_norm": 12.178037643432617, + "learning_rate": 3.300604736522629e-06, + "loss": 0.2561, + "num_input_tokens_seen": 112258880, + "step": 35665 + }, + { + "epoch": 2.283464566929134, + "grad_norm": 8.50973129272461, + "learning_rate": 3.300075485165504e-06, + "loss": 0.2027, + "num_input_tokens_seen": 112275136, + "step": 35670 + }, + { + "epoch": 2.283784648870111, + "grad_norm": 30.25974464416504, + "learning_rate": 3.2995461938567773e-06, + "loss": 0.3039, + "num_input_tokens_seen": 112290752, + "step": 35675 + }, + { + "epoch": 2.284104730811088, + "grad_norm": 9.226094245910645, + "learning_rate": 3.299016862622878e-06, + "loss": 0.1981, + "num_input_tokens_seen": 112305856, + "step": 35680 + }, + { + "epoch": 2.2844248127520643, + "grad_norm": 10.110601425170898, + "learning_rate": 3.298487491490238e-06, + "loss": 0.1854, + "num_input_tokens_seen": 112321984, + "step": 35685 + }, + { + "epoch": 2.2847448946930413, + "grad_norm": 12.652838706970215, + "learning_rate": 3.2979580804852923e-06, + "loss": 0.1716, + "num_input_tokens_seen": 112337280, + "step": 35690 + }, + { + "epoch": 2.2850649766340183, + "grad_norm": 6.680558204650879, + "learning_rate": 3.2974286296344763e-06, + "loss": 0.229, + "num_input_tokens_seen": 112354688, + "step": 35695 + }, + { + "epoch": 2.2853850585749953, + "grad_norm": 9.001913070678711, + "learning_rate": 3.2968991389642286e-06, + "loss": 0.1905, + "num_input_tokens_seen": 112369472, + "step": 35700 + }, + { + "epoch": 2.2857051405159723, + "grad_norm": 8.440253257751465, + "learning_rate": 3.2963696085009884e-06, + "loss": 0.217, + "num_input_tokens_seen": 112386048, + "step": 35705 + }, + { + "epoch": 2.286025222456949, + "grad_norm": 10.86352252960205, + "learning_rate": 3.2958400382711984e-06, + "loss": 0.2465, + "num_input_tokens_seen": 112401536, + "step": 35710 + }, + { + "epoch": 2.286345304397926, + "grad_norm": 9.798785209655762, + "learning_rate": 3.2953104283013016e-06, + "loss": 0.1618, + "num_input_tokens_seen": 112416896, + "step": 35715 + }, + { + "epoch": 2.286665386338903, + "grad_norm": 15.852917671203613, + "learning_rate": 3.294780778617745e-06, + "loss": 0.1625, + "num_input_tokens_seen": 112432704, + "step": 35720 + }, + { + "epoch": 2.28698546827988, + "grad_norm": 77.81875610351562, + "learning_rate": 3.294251089246976e-06, + "loss": 0.1489, + "num_input_tokens_seen": 112448320, + "step": 35725 + }, + { + "epoch": 2.2873055502208564, + "grad_norm": 36.56449890136719, + "learning_rate": 3.293721360215445e-06, + "loss": 0.248, + "num_input_tokens_seen": 112464448, + "step": 35730 + }, + { + "epoch": 2.2876256321618333, + "grad_norm": 18.764564514160156, + "learning_rate": 3.293191591549604e-06, + "loss": 0.1548, + "num_input_tokens_seen": 112480384, + "step": 35735 + }, + { + "epoch": 2.2879457141028103, + "grad_norm": 9.45766830444336, + "learning_rate": 3.2926617832759055e-06, + "loss": 0.2323, + "num_input_tokens_seen": 112494592, + "step": 35740 + }, + { + "epoch": 2.2882657960437873, + "grad_norm": 36.87850570678711, + "learning_rate": 3.2921319354208074e-06, + "loss": 0.2985, + "num_input_tokens_seen": 112509568, + "step": 35745 + }, + { + "epoch": 2.2885858779847643, + "grad_norm": 18.898422241210938, + "learning_rate": 3.291602048010766e-06, + "loss": 0.232, + "num_input_tokens_seen": 112525568, + "step": 35750 + }, + { + "epoch": 2.288905959925741, + "grad_norm": 15.58016586303711, + "learning_rate": 3.291072121072242e-06, + "loss": 0.3065, + "num_input_tokens_seen": 112541376, + "step": 35755 + }, + { + "epoch": 2.289226041866718, + "grad_norm": 8.644890785217285, + "learning_rate": 3.2905421546316963e-06, + "loss": 0.1949, + "num_input_tokens_seen": 112558848, + "step": 35760 + }, + { + "epoch": 2.289546123807695, + "grad_norm": 10.998539924621582, + "learning_rate": 3.290012148715593e-06, + "loss": 0.2198, + "num_input_tokens_seen": 112574656, + "step": 35765 + }, + { + "epoch": 2.289866205748672, + "grad_norm": 17.58231544494629, + "learning_rate": 3.289482103350398e-06, + "loss": 0.3517, + "num_input_tokens_seen": 112588928, + "step": 35770 + }, + { + "epoch": 2.2901862876896484, + "grad_norm": 8.914985656738281, + "learning_rate": 3.2889520185625802e-06, + "loss": 0.1504, + "num_input_tokens_seen": 112604864, + "step": 35775 + }, + { + "epoch": 2.2905063696306254, + "grad_norm": 47.677486419677734, + "learning_rate": 3.288421894378606e-06, + "loss": 0.2392, + "num_input_tokens_seen": 112619840, + "step": 35780 + }, + { + "epoch": 2.2908264515716024, + "grad_norm": 13.528252601623535, + "learning_rate": 3.2878917308249514e-06, + "loss": 0.2417, + "num_input_tokens_seen": 112634624, + "step": 35785 + }, + { + "epoch": 2.2911465335125794, + "grad_norm": 7.44513463973999, + "learning_rate": 3.2873615279280868e-06, + "loss": 0.3495, + "num_input_tokens_seen": 112650112, + "step": 35790 + }, + { + "epoch": 2.2914666154535563, + "grad_norm": 42.30290603637695, + "learning_rate": 3.2868312857144887e-06, + "loss": 0.2281, + "num_input_tokens_seen": 112664576, + "step": 35795 + }, + { + "epoch": 2.291786697394533, + "grad_norm": 10.938398361206055, + "learning_rate": 3.2863010042106347e-06, + "loss": 0.2212, + "num_input_tokens_seen": 112680064, + "step": 35800 + }, + { + "epoch": 2.29210677933551, + "grad_norm": 62.51510238647461, + "learning_rate": 3.2857706834430037e-06, + "loss": 0.2435, + "num_input_tokens_seen": 112696576, + "step": 35805 + }, + { + "epoch": 2.292426861276487, + "grad_norm": 19.0396728515625, + "learning_rate": 3.285240323438078e-06, + "loss": 0.1967, + "num_input_tokens_seen": 112711232, + "step": 35810 + }, + { + "epoch": 2.292746943217464, + "grad_norm": 12.439939498901367, + "learning_rate": 3.284709924222341e-06, + "loss": 0.2464, + "num_input_tokens_seen": 112727424, + "step": 35815 + }, + { + "epoch": 2.2930670251584404, + "grad_norm": 33.320743560791016, + "learning_rate": 3.2841794858222783e-06, + "loss": 0.2852, + "num_input_tokens_seen": 112744576, + "step": 35820 + }, + { + "epoch": 2.2933871070994174, + "grad_norm": 31.752248764038086, + "learning_rate": 3.2836490082643756e-06, + "loss": 0.1911, + "num_input_tokens_seen": 112760192, + "step": 35825 + }, + { + "epoch": 2.2937071890403944, + "grad_norm": 10.483866691589355, + "learning_rate": 3.2831184915751233e-06, + "loss": 0.2908, + "num_input_tokens_seen": 112775168, + "step": 35830 + }, + { + "epoch": 2.2940272709813714, + "grad_norm": 15.098259925842285, + "learning_rate": 3.2825879357810136e-06, + "loss": 0.1631, + "num_input_tokens_seen": 112790912, + "step": 35835 + }, + { + "epoch": 2.2943473529223484, + "grad_norm": 9.398052215576172, + "learning_rate": 3.2820573409085383e-06, + "loss": 0.1283, + "num_input_tokens_seen": 112806144, + "step": 35840 + }, + { + "epoch": 2.294667434863325, + "grad_norm": 17.752103805541992, + "learning_rate": 3.281526706984193e-06, + "loss": 0.2341, + "num_input_tokens_seen": 112822592, + "step": 35845 + }, + { + "epoch": 2.294987516804302, + "grad_norm": 12.587376594543457, + "learning_rate": 3.280996034034475e-06, + "loss": 0.1831, + "num_input_tokens_seen": 112838592, + "step": 35850 + }, + { + "epoch": 2.295307598745279, + "grad_norm": 7.8609466552734375, + "learning_rate": 3.2804653220858828e-06, + "loss": 0.1915, + "num_input_tokens_seen": 112854336, + "step": 35855 + }, + { + "epoch": 2.2956276806862554, + "grad_norm": 73.04683685302734, + "learning_rate": 3.2799345711649176e-06, + "loss": 0.2168, + "num_input_tokens_seen": 112869568, + "step": 35860 + }, + { + "epoch": 2.2959477626272324, + "grad_norm": 29.797895431518555, + "learning_rate": 3.2794037812980823e-06, + "loss": 0.2558, + "num_input_tokens_seen": 112885440, + "step": 35865 + }, + { + "epoch": 2.2962678445682094, + "grad_norm": 11.27059268951416, + "learning_rate": 3.278872952511882e-06, + "loss": 0.1851, + "num_input_tokens_seen": 112899968, + "step": 35870 + }, + { + "epoch": 2.2965879265091864, + "grad_norm": 8.809618949890137, + "learning_rate": 3.278342084832824e-06, + "loss": 0.2553, + "num_input_tokens_seen": 112915904, + "step": 35875 + }, + { + "epoch": 2.2969080084501634, + "grad_norm": 103.92847442626953, + "learning_rate": 3.2778111782874168e-06, + "loss": 0.2898, + "num_input_tokens_seen": 112932288, + "step": 35880 + }, + { + "epoch": 2.29722809039114, + "grad_norm": 12.129205703735352, + "learning_rate": 3.2772802329021704e-06, + "loss": 0.2115, + "num_input_tokens_seen": 112948672, + "step": 35885 + }, + { + "epoch": 2.297548172332117, + "grad_norm": 28.871829986572266, + "learning_rate": 3.2767492487035975e-06, + "loss": 0.2639, + "num_input_tokens_seen": 112964544, + "step": 35890 + }, + { + "epoch": 2.297868254273094, + "grad_norm": 22.45380401611328, + "learning_rate": 3.276218225718213e-06, + "loss": 0.1864, + "num_input_tokens_seen": 112980032, + "step": 35895 + }, + { + "epoch": 2.298188336214071, + "grad_norm": 4.953970909118652, + "learning_rate": 3.2756871639725335e-06, + "loss": 0.2037, + "num_input_tokens_seen": 112996224, + "step": 35900 + }, + { + "epoch": 2.2985084181550475, + "grad_norm": 25.913053512573242, + "learning_rate": 3.2751560634930777e-06, + "loss": 0.237, + "num_input_tokens_seen": 113011328, + "step": 35905 + }, + { + "epoch": 2.2988285000960245, + "grad_norm": 8.983987808227539, + "learning_rate": 3.274624924306366e-06, + "loss": 0.2893, + "num_input_tokens_seen": 113027392, + "step": 35910 + }, + { + "epoch": 2.2991485820370015, + "grad_norm": 76.28411102294922, + "learning_rate": 3.27409374643892e-06, + "loss": 0.2912, + "num_input_tokens_seen": 113044032, + "step": 35915 + }, + { + "epoch": 2.2994686639779784, + "grad_norm": 77.40506744384766, + "learning_rate": 3.2735625299172645e-06, + "loss": 0.2751, + "num_input_tokens_seen": 113059072, + "step": 35920 + }, + { + "epoch": 2.2997887459189554, + "grad_norm": 10.0732421875, + "learning_rate": 3.273031274767926e-06, + "loss": 0.2352, + "num_input_tokens_seen": 113074880, + "step": 35925 + }, + { + "epoch": 2.300108827859932, + "grad_norm": 8.726280212402344, + "learning_rate": 3.2724999810174318e-06, + "loss": 0.1902, + "num_input_tokens_seen": 113090624, + "step": 35930 + }, + { + "epoch": 2.300428909800909, + "grad_norm": 82.3401107788086, + "learning_rate": 3.2719686486923126e-06, + "loss": 0.2567, + "num_input_tokens_seen": 113106368, + "step": 35935 + }, + { + "epoch": 2.300748991741886, + "grad_norm": 18.672771453857422, + "learning_rate": 3.2714372778191e-06, + "loss": 0.196, + "num_input_tokens_seen": 113121472, + "step": 35940 + }, + { + "epoch": 2.301069073682863, + "grad_norm": 11.678031921386719, + "learning_rate": 3.270905868424328e-06, + "loss": 0.3752, + "num_input_tokens_seen": 113136448, + "step": 35945 + }, + { + "epoch": 2.3013891556238395, + "grad_norm": 10.009881019592285, + "learning_rate": 3.2703744205345316e-06, + "loss": 0.206, + "num_input_tokens_seen": 113152512, + "step": 35950 + }, + { + "epoch": 2.3017092375648165, + "grad_norm": 5.211224555969238, + "learning_rate": 3.2698429341762507e-06, + "loss": 0.1289, + "num_input_tokens_seen": 113167296, + "step": 35955 + }, + { + "epoch": 2.3020293195057935, + "grad_norm": 11.978759765625, + "learning_rate": 3.2693114093760233e-06, + "loss": 0.2085, + "num_input_tokens_seen": 113183552, + "step": 35960 + }, + { + "epoch": 2.3023494014467705, + "grad_norm": 38.87721633911133, + "learning_rate": 3.268779846160391e-06, + "loss": 0.2432, + "num_input_tokens_seen": 113199424, + "step": 35965 + }, + { + "epoch": 2.3026694833877475, + "grad_norm": 7.910388469696045, + "learning_rate": 3.2682482445558975e-06, + "loss": 0.1899, + "num_input_tokens_seen": 113214336, + "step": 35970 + }, + { + "epoch": 2.302989565328724, + "grad_norm": 14.44770336151123, + "learning_rate": 3.267716604589089e-06, + "loss": 0.2147, + "num_input_tokens_seen": 113229824, + "step": 35975 + }, + { + "epoch": 2.303309647269701, + "grad_norm": 6.606043815612793, + "learning_rate": 3.2671849262865114e-06, + "loss": 0.1596, + "num_input_tokens_seen": 113246016, + "step": 35980 + }, + { + "epoch": 2.303629729210678, + "grad_norm": 18.1777400970459, + "learning_rate": 3.266653209674715e-06, + "loss": 0.1786, + "num_input_tokens_seen": 113261376, + "step": 35985 + }, + { + "epoch": 2.303949811151655, + "grad_norm": 14.733747482299805, + "learning_rate": 3.2661214547802512e-06, + "loss": 0.2039, + "num_input_tokens_seen": 113276480, + "step": 35990 + }, + { + "epoch": 2.3042698930926315, + "grad_norm": 15.235027313232422, + "learning_rate": 3.2655896616296722e-06, + "loss": 0.1284, + "num_input_tokens_seen": 113292352, + "step": 35995 + }, + { + "epoch": 2.3045899750336085, + "grad_norm": 9.571091651916504, + "learning_rate": 3.265057830249533e-06, + "loss": 0.1803, + "num_input_tokens_seen": 113308160, + "step": 36000 + }, + { + "epoch": 2.3049100569745855, + "grad_norm": 55.70668029785156, + "learning_rate": 3.2645259606663904e-06, + "loss": 0.2459, + "num_input_tokens_seen": 113323584, + "step": 36005 + }, + { + "epoch": 2.3052301389155625, + "grad_norm": 7.681850910186768, + "learning_rate": 3.263994052906804e-06, + "loss": 0.1979, + "num_input_tokens_seen": 113338880, + "step": 36010 + }, + { + "epoch": 2.3055502208565395, + "grad_norm": 11.965143203735352, + "learning_rate": 3.2634621069973344e-06, + "loss": 0.3052, + "num_input_tokens_seen": 113354944, + "step": 36015 + }, + { + "epoch": 2.305870302797516, + "grad_norm": 29.761146545410156, + "learning_rate": 3.262930122964544e-06, + "loss": 0.1532, + "num_input_tokens_seen": 113370432, + "step": 36020 + }, + { + "epoch": 2.306190384738493, + "grad_norm": 11.936759948730469, + "learning_rate": 3.262398100834997e-06, + "loss": 0.1774, + "num_input_tokens_seen": 113385408, + "step": 36025 + }, + { + "epoch": 2.30651046667947, + "grad_norm": 9.991302490234375, + "learning_rate": 3.26186604063526e-06, + "loss": 0.2313, + "num_input_tokens_seen": 113401920, + "step": 36030 + }, + { + "epoch": 2.306830548620447, + "grad_norm": 4.760511875152588, + "learning_rate": 3.261333942391901e-06, + "loss": 0.2411, + "num_input_tokens_seen": 113416768, + "step": 36035 + }, + { + "epoch": 2.3071506305614236, + "grad_norm": 30.445419311523438, + "learning_rate": 3.2608018061314906e-06, + "loss": 0.1825, + "num_input_tokens_seen": 113432896, + "step": 36040 + }, + { + "epoch": 2.3074707125024005, + "grad_norm": 7.365900993347168, + "learning_rate": 3.2602696318806017e-06, + "loss": 0.1812, + "num_input_tokens_seen": 113449472, + "step": 36045 + }, + { + "epoch": 2.3077907944433775, + "grad_norm": 15.509835243225098, + "learning_rate": 3.259737419665807e-06, + "loss": 0.129, + "num_input_tokens_seen": 113465728, + "step": 36050 + }, + { + "epoch": 2.3081108763843545, + "grad_norm": 31.38401985168457, + "learning_rate": 3.2592051695136828e-06, + "loss": 0.2606, + "num_input_tokens_seen": 113481216, + "step": 36055 + }, + { + "epoch": 2.3084309583253315, + "grad_norm": 11.13673210144043, + "learning_rate": 3.2586728814508066e-06, + "loss": 0.1819, + "num_input_tokens_seen": 113496640, + "step": 36060 + }, + { + "epoch": 2.308751040266308, + "grad_norm": 11.814154624938965, + "learning_rate": 3.258140555503759e-06, + "loss": 0.2935, + "num_input_tokens_seen": 113511296, + "step": 36065 + }, + { + "epoch": 2.309071122207285, + "grad_norm": 8.347168922424316, + "learning_rate": 3.2576081916991208e-06, + "loss": 0.2085, + "num_input_tokens_seen": 113526400, + "step": 36070 + }, + { + "epoch": 2.309391204148262, + "grad_norm": 42.029842376708984, + "learning_rate": 3.257075790063476e-06, + "loss": 0.1803, + "num_input_tokens_seen": 113541824, + "step": 36075 + }, + { + "epoch": 2.309711286089239, + "grad_norm": 11.643416404724121, + "learning_rate": 3.2565433506234095e-06, + "loss": 0.3065, + "num_input_tokens_seen": 113557952, + "step": 36080 + }, + { + "epoch": 2.3100313680302156, + "grad_norm": 10.432663917541504, + "learning_rate": 3.256010873405509e-06, + "loss": 0.1975, + "num_input_tokens_seen": 113573312, + "step": 36085 + }, + { + "epoch": 2.3103514499711926, + "grad_norm": 8.74044418334961, + "learning_rate": 3.2554783584363635e-06, + "loss": 0.1953, + "num_input_tokens_seen": 113589120, + "step": 36090 + }, + { + "epoch": 2.3106715319121696, + "grad_norm": 73.13190460205078, + "learning_rate": 3.2549458057425633e-06, + "loss": 0.2128, + "num_input_tokens_seen": 113604224, + "step": 36095 + }, + { + "epoch": 2.3109916138531466, + "grad_norm": 6.662492275238037, + "learning_rate": 3.2544132153507024e-06, + "loss": 0.1691, + "num_input_tokens_seen": 113619520, + "step": 36100 + }, + { + "epoch": 2.3113116957941235, + "grad_norm": 18.719253540039062, + "learning_rate": 3.2538805872873753e-06, + "loss": 0.2388, + "num_input_tokens_seen": 113634560, + "step": 36105 + }, + { + "epoch": 2.3116317777351, + "grad_norm": 21.759801864624023, + "learning_rate": 3.2533479215791786e-06, + "loss": 0.2767, + "num_input_tokens_seen": 113650176, + "step": 36110 + }, + { + "epoch": 2.311951859676077, + "grad_norm": 8.962681770324707, + "learning_rate": 3.2528152182527105e-06, + "loss": 0.2197, + "num_input_tokens_seen": 113665856, + "step": 36115 + }, + { + "epoch": 2.312271941617054, + "grad_norm": 26.604583740234375, + "learning_rate": 3.2522824773345713e-06, + "loss": 0.2403, + "num_input_tokens_seen": 113680896, + "step": 36120 + }, + { + "epoch": 2.3125920235580306, + "grad_norm": 43.188690185546875, + "learning_rate": 3.251749698851363e-06, + "loss": 0.2539, + "num_input_tokens_seen": 113695680, + "step": 36125 + }, + { + "epoch": 2.3129121054990076, + "grad_norm": 11.435210227966309, + "learning_rate": 3.251216882829692e-06, + "loss": 0.234, + "num_input_tokens_seen": 113711872, + "step": 36130 + }, + { + "epoch": 2.3132321874399846, + "grad_norm": 7.544994354248047, + "learning_rate": 3.250684029296162e-06, + "loss": 0.2084, + "num_input_tokens_seen": 113728000, + "step": 36135 + }, + { + "epoch": 2.3135522693809616, + "grad_norm": 7.497391223907471, + "learning_rate": 3.250151138277382e-06, + "loss": 0.136, + "num_input_tokens_seen": 113743488, + "step": 36140 + }, + { + "epoch": 2.3138723513219386, + "grad_norm": 103.44902038574219, + "learning_rate": 3.2496182097999617e-06, + "loss": 0.2399, + "num_input_tokens_seen": 113758784, + "step": 36145 + }, + { + "epoch": 2.314192433262915, + "grad_norm": 26.927433013916016, + "learning_rate": 3.2490852438905123e-06, + "loss": 0.2673, + "num_input_tokens_seen": 113775040, + "step": 36150 + }, + { + "epoch": 2.314512515203892, + "grad_norm": 8.605679512023926, + "learning_rate": 3.2485522405756475e-06, + "loss": 0.2352, + "num_input_tokens_seen": 113791168, + "step": 36155 + }, + { + "epoch": 2.314832597144869, + "grad_norm": 13.05678939819336, + "learning_rate": 3.2480191998819836e-06, + "loss": 0.1866, + "num_input_tokens_seen": 113807104, + "step": 36160 + }, + { + "epoch": 2.315152679085846, + "grad_norm": 18.18501091003418, + "learning_rate": 3.247486121836137e-06, + "loss": 0.2246, + "num_input_tokens_seen": 113823232, + "step": 36165 + }, + { + "epoch": 2.3154727610268226, + "grad_norm": 7.198841094970703, + "learning_rate": 3.246953006464727e-06, + "loss": 0.2279, + "num_input_tokens_seen": 113837888, + "step": 36170 + }, + { + "epoch": 2.3157928429677996, + "grad_norm": 52.04134750366211, + "learning_rate": 3.2464198537943736e-06, + "loss": 0.2344, + "num_input_tokens_seen": 113853184, + "step": 36175 + }, + { + "epoch": 2.3161129249087766, + "grad_norm": 6.083765983581543, + "learning_rate": 3.245886663851701e-06, + "loss": 0.2208, + "num_input_tokens_seen": 113868864, + "step": 36180 + }, + { + "epoch": 2.3164330068497536, + "grad_norm": 13.450091361999512, + "learning_rate": 3.2453534366633343e-06, + "loss": 0.3125, + "num_input_tokens_seen": 113883904, + "step": 36185 + }, + { + "epoch": 2.3167530887907306, + "grad_norm": 11.20137882232666, + "learning_rate": 3.244820172255899e-06, + "loss": 0.2061, + "num_input_tokens_seen": 113899584, + "step": 36190 + }, + { + "epoch": 2.317073170731707, + "grad_norm": 10.52540111541748, + "learning_rate": 3.2442868706560237e-06, + "loss": 0.2645, + "num_input_tokens_seen": 113914432, + "step": 36195 + }, + { + "epoch": 2.317393252672684, + "grad_norm": 19.775129318237305, + "learning_rate": 3.2437535318903395e-06, + "loss": 0.2573, + "num_input_tokens_seen": 113929856, + "step": 36200 + }, + { + "epoch": 2.317713334613661, + "grad_norm": 12.76001262664795, + "learning_rate": 3.243220155985478e-06, + "loss": 0.1858, + "num_input_tokens_seen": 113945280, + "step": 36205 + }, + { + "epoch": 2.318033416554638, + "grad_norm": 8.411699295043945, + "learning_rate": 3.2426867429680725e-06, + "loss": 0.1597, + "num_input_tokens_seen": 113961408, + "step": 36210 + }, + { + "epoch": 2.3183534984956147, + "grad_norm": 7.8055949211120605, + "learning_rate": 3.2421532928647605e-06, + "loss": 0.1967, + "num_input_tokens_seen": 113976384, + "step": 36215 + }, + { + "epoch": 2.3186735804365917, + "grad_norm": 7.363320827484131, + "learning_rate": 3.241619805702178e-06, + "loss": 0.2269, + "num_input_tokens_seen": 113991680, + "step": 36220 + }, + { + "epoch": 2.3189936623775687, + "grad_norm": 17.80304718017578, + "learning_rate": 3.2410862815069656e-06, + "loss": 0.2183, + "num_input_tokens_seen": 114008512, + "step": 36225 + }, + { + "epoch": 2.3193137443185456, + "grad_norm": 19.76668930053711, + "learning_rate": 3.2405527203057645e-06, + "loss": 0.2233, + "num_input_tokens_seen": 114028032, + "step": 36230 + }, + { + "epoch": 2.3196338262595226, + "grad_norm": 25.161767959594727, + "learning_rate": 3.2400191221252173e-06, + "loss": 0.2322, + "num_input_tokens_seen": 114044032, + "step": 36235 + }, + { + "epoch": 2.319953908200499, + "grad_norm": 4.194925785064697, + "learning_rate": 3.23948548699197e-06, + "loss": 0.184, + "num_input_tokens_seen": 114058816, + "step": 36240 + }, + { + "epoch": 2.320273990141476, + "grad_norm": 5.703189373016357, + "learning_rate": 3.2389518149326702e-06, + "loss": 0.2354, + "num_input_tokens_seen": 114074368, + "step": 36245 + }, + { + "epoch": 2.320594072082453, + "grad_norm": 6.995147705078125, + "learning_rate": 3.2384181059739656e-06, + "loss": 0.1831, + "num_input_tokens_seen": 114089728, + "step": 36250 + }, + { + "epoch": 2.32091415402343, + "grad_norm": 8.577021598815918, + "learning_rate": 3.237884360142507e-06, + "loss": 0.1526, + "num_input_tokens_seen": 114105664, + "step": 36255 + }, + { + "epoch": 2.3212342359644067, + "grad_norm": 20.302223205566406, + "learning_rate": 3.2373505774649464e-06, + "loss": 0.1939, + "num_input_tokens_seen": 114121408, + "step": 36260 + }, + { + "epoch": 2.3215543179053837, + "grad_norm": 7.461986064910889, + "learning_rate": 3.2368167579679386e-06, + "loss": 0.1495, + "num_input_tokens_seen": 114136960, + "step": 36265 + }, + { + "epoch": 2.3218743998463607, + "grad_norm": 44.04828643798828, + "learning_rate": 3.2362829016781405e-06, + "loss": 0.231, + "num_input_tokens_seen": 114153216, + "step": 36270 + }, + { + "epoch": 2.3221944817873377, + "grad_norm": 4.809954643249512, + "learning_rate": 3.2357490086222093e-06, + "loss": 0.2567, + "num_input_tokens_seen": 114169280, + "step": 36275 + }, + { + "epoch": 2.3225145637283147, + "grad_norm": 15.693134307861328, + "learning_rate": 3.2352150788268056e-06, + "loss": 0.2084, + "num_input_tokens_seen": 114185728, + "step": 36280 + }, + { + "epoch": 2.322834645669291, + "grad_norm": 3.512272357940674, + "learning_rate": 3.2346811123185896e-06, + "loss": 0.1672, + "num_input_tokens_seen": 114201920, + "step": 36285 + }, + { + "epoch": 2.323154727610268, + "grad_norm": 47.89105987548828, + "learning_rate": 3.2341471091242256e-06, + "loss": 0.2174, + "num_input_tokens_seen": 114218688, + "step": 36290 + }, + { + "epoch": 2.323474809551245, + "grad_norm": 17.968666076660156, + "learning_rate": 3.2336130692703794e-06, + "loss": 0.2327, + "num_input_tokens_seen": 114235520, + "step": 36295 + }, + { + "epoch": 2.323794891492222, + "grad_norm": 15.443018913269043, + "learning_rate": 3.233078992783717e-06, + "loss": 0.2098, + "num_input_tokens_seen": 114250304, + "step": 36300 + }, + { + "epoch": 2.3241149734331987, + "grad_norm": 17.80232048034668, + "learning_rate": 3.2325448796909085e-06, + "loss": 0.2274, + "num_input_tokens_seen": 114265536, + "step": 36305 + }, + { + "epoch": 2.3244350553741757, + "grad_norm": 63.20021438598633, + "learning_rate": 3.232010730018625e-06, + "loss": 0.2361, + "num_input_tokens_seen": 114281984, + "step": 36310 + }, + { + "epoch": 2.3247551373151527, + "grad_norm": 17.743173599243164, + "learning_rate": 3.2314765437935386e-06, + "loss": 0.245, + "num_input_tokens_seen": 114299008, + "step": 36315 + }, + { + "epoch": 2.3250752192561297, + "grad_norm": 24.700029373168945, + "learning_rate": 3.230942321042323e-06, + "loss": 0.3888, + "num_input_tokens_seen": 114314560, + "step": 36320 + }, + { + "epoch": 2.3253953011971067, + "grad_norm": 8.019083976745605, + "learning_rate": 3.2304080617916556e-06, + "loss": 0.1697, + "num_input_tokens_seen": 114330752, + "step": 36325 + }, + { + "epoch": 2.3257153831380832, + "grad_norm": 6.512394905090332, + "learning_rate": 3.229873766068214e-06, + "loss": 0.1558, + "num_input_tokens_seen": 114346880, + "step": 36330 + }, + { + "epoch": 2.3260354650790602, + "grad_norm": 7.019997596740723, + "learning_rate": 3.229339433898678e-06, + "loss": 0.198, + "num_input_tokens_seen": 114363200, + "step": 36335 + }, + { + "epoch": 2.326355547020037, + "grad_norm": 9.9674072265625, + "learning_rate": 3.2288050653097307e-06, + "loss": 0.2137, + "num_input_tokens_seen": 114379392, + "step": 36340 + }, + { + "epoch": 2.326675628961014, + "grad_norm": 18.708051681518555, + "learning_rate": 3.2282706603280545e-06, + "loss": 0.1978, + "num_input_tokens_seen": 114394752, + "step": 36345 + }, + { + "epoch": 2.3269957109019908, + "grad_norm": 13.15926742553711, + "learning_rate": 3.2277362189803342e-06, + "loss": 0.2727, + "num_input_tokens_seen": 114410560, + "step": 36350 + }, + { + "epoch": 2.3273157928429677, + "grad_norm": 9.107211112976074, + "learning_rate": 3.2272017412932573e-06, + "loss": 0.303, + "num_input_tokens_seen": 114426432, + "step": 36355 + }, + { + "epoch": 2.3276358747839447, + "grad_norm": 19.46666145324707, + "learning_rate": 3.226667227293514e-06, + "loss": 0.191, + "num_input_tokens_seen": 114441792, + "step": 36360 + }, + { + "epoch": 2.3279559567249217, + "grad_norm": 17.249998092651367, + "learning_rate": 3.226132677007795e-06, + "loss": 0.2371, + "num_input_tokens_seen": 114458816, + "step": 36365 + }, + { + "epoch": 2.3282760386658987, + "grad_norm": 9.444707870483398, + "learning_rate": 3.2255980904627914e-06, + "loss": 0.2465, + "num_input_tokens_seen": 114474240, + "step": 36370 + }, + { + "epoch": 2.3285961206068753, + "grad_norm": 7.969840049743652, + "learning_rate": 3.2250634676851992e-06, + "loss": 0.1747, + "num_input_tokens_seen": 114491456, + "step": 36375 + }, + { + "epoch": 2.3289162025478523, + "grad_norm": 36.074337005615234, + "learning_rate": 3.224528808701714e-06, + "loss": 0.2339, + "num_input_tokens_seen": 114507200, + "step": 36380 + }, + { + "epoch": 2.3292362844888292, + "grad_norm": 43.0540885925293, + "learning_rate": 3.223994113539034e-06, + "loss": 0.2379, + "num_input_tokens_seen": 114523904, + "step": 36385 + }, + { + "epoch": 2.329556366429806, + "grad_norm": 6.997498512268066, + "learning_rate": 3.2234593822238598e-06, + "loss": 0.1407, + "num_input_tokens_seen": 114539840, + "step": 36390 + }, + { + "epoch": 2.329876448370783, + "grad_norm": 7.267942428588867, + "learning_rate": 3.222924614782892e-06, + "loss": 0.1382, + "num_input_tokens_seen": 114555072, + "step": 36395 + }, + { + "epoch": 2.3301965303117598, + "grad_norm": 7.478787899017334, + "learning_rate": 3.2223898112428344e-06, + "loss": 0.213, + "num_input_tokens_seen": 114570176, + "step": 36400 + }, + { + "epoch": 2.3305166122527368, + "grad_norm": 16.421283721923828, + "learning_rate": 3.2218549716303914e-06, + "loss": 0.2016, + "num_input_tokens_seen": 114585728, + "step": 36405 + }, + { + "epoch": 2.3308366941937138, + "grad_norm": 16.025402069091797, + "learning_rate": 3.2213200959722724e-06, + "loss": 0.2206, + "num_input_tokens_seen": 114601792, + "step": 36410 + }, + { + "epoch": 2.3311567761346903, + "grad_norm": 12.179213523864746, + "learning_rate": 3.2207851842951844e-06, + "loss": 0.1338, + "num_input_tokens_seen": 114617664, + "step": 36415 + }, + { + "epoch": 2.3314768580756673, + "grad_norm": 23.29302406311035, + "learning_rate": 3.2202502366258394e-06, + "loss": 0.2276, + "num_input_tokens_seen": 114632000, + "step": 36420 + }, + { + "epoch": 2.3317969400166443, + "grad_norm": 11.993744850158691, + "learning_rate": 3.2197152529909486e-06, + "loss": 0.2008, + "num_input_tokens_seen": 114647872, + "step": 36425 + }, + { + "epoch": 2.3321170219576213, + "grad_norm": 14.123621940612793, + "learning_rate": 3.2191802334172275e-06, + "loss": 0.2457, + "num_input_tokens_seen": 114663168, + "step": 36430 + }, + { + "epoch": 2.332437103898598, + "grad_norm": 10.397367477416992, + "learning_rate": 3.2186451779313914e-06, + "loss": 0.1498, + "num_input_tokens_seen": 114677824, + "step": 36435 + }, + { + "epoch": 2.332757185839575, + "grad_norm": 14.460177421569824, + "learning_rate": 3.218110086560158e-06, + "loss": 0.2776, + "num_input_tokens_seen": 114692672, + "step": 36440 + }, + { + "epoch": 2.333077267780552, + "grad_norm": 4.958197593688965, + "learning_rate": 3.217574959330248e-06, + "loss": 0.3512, + "num_input_tokens_seen": 114707968, + "step": 36445 + }, + { + "epoch": 2.333397349721529, + "grad_norm": 20.508495330810547, + "learning_rate": 3.2170397962683818e-06, + "loss": 0.1954, + "num_input_tokens_seen": 114723712, + "step": 36450 + }, + { + "epoch": 2.333717431662506, + "grad_norm": 8.750564575195312, + "learning_rate": 3.2165045974012832e-06, + "loss": 0.272, + "num_input_tokens_seen": 114738880, + "step": 36455 + }, + { + "epoch": 2.3340375136034823, + "grad_norm": 9.437599182128906, + "learning_rate": 3.2159693627556777e-06, + "loss": 0.2548, + "num_input_tokens_seen": 114753472, + "step": 36460 + }, + { + "epoch": 2.3343575955444593, + "grad_norm": 10.455501556396484, + "learning_rate": 3.2154340923582904e-06, + "loss": 0.1387, + "num_input_tokens_seen": 114768000, + "step": 36465 + }, + { + "epoch": 2.3346776774854363, + "grad_norm": 31.632152557373047, + "learning_rate": 3.214898786235852e-06, + "loss": 0.3848, + "num_input_tokens_seen": 114784000, + "step": 36470 + }, + { + "epoch": 2.3349977594264133, + "grad_norm": 13.991983413696289, + "learning_rate": 3.214363444415091e-06, + "loss": 0.2352, + "num_input_tokens_seen": 114798848, + "step": 36475 + }, + { + "epoch": 2.33531784136739, + "grad_norm": 19.18278694152832, + "learning_rate": 3.2138280669227416e-06, + "loss": 0.259, + "num_input_tokens_seen": 114813632, + "step": 36480 + }, + { + "epoch": 2.335637923308367, + "grad_norm": 13.182718276977539, + "learning_rate": 3.213292653785537e-06, + "loss": 0.2253, + "num_input_tokens_seen": 114830208, + "step": 36485 + }, + { + "epoch": 2.335958005249344, + "grad_norm": 20.119794845581055, + "learning_rate": 3.212757205030212e-06, + "loss": 0.2324, + "num_input_tokens_seen": 114846784, + "step": 36490 + }, + { + "epoch": 2.336278087190321, + "grad_norm": 10.40616512298584, + "learning_rate": 3.2122217206835043e-06, + "loss": 0.2594, + "num_input_tokens_seen": 114861888, + "step": 36495 + }, + { + "epoch": 2.336598169131298, + "grad_norm": 6.487320899963379, + "learning_rate": 3.2116862007721544e-06, + "loss": 0.1692, + "num_input_tokens_seen": 114877504, + "step": 36500 + }, + { + "epoch": 2.3369182510722744, + "grad_norm": 11.378605842590332, + "learning_rate": 3.2111506453229024e-06, + "loss": 0.2188, + "num_input_tokens_seen": 114891840, + "step": 36505 + }, + { + "epoch": 2.3372383330132513, + "grad_norm": 31.40604019165039, + "learning_rate": 3.210615054362492e-06, + "loss": 0.3845, + "num_input_tokens_seen": 114906944, + "step": 36510 + }, + { + "epoch": 2.3375584149542283, + "grad_norm": 71.17240905761719, + "learning_rate": 3.2100794279176674e-06, + "loss": 0.1854, + "num_input_tokens_seen": 114922176, + "step": 36515 + }, + { + "epoch": 2.3378784968952053, + "grad_norm": 16.25260353088379, + "learning_rate": 3.2095437660151734e-06, + "loss": 0.2246, + "num_input_tokens_seen": 114938432, + "step": 36520 + }, + { + "epoch": 2.338198578836182, + "grad_norm": 18.94392204284668, + "learning_rate": 3.2090080686817605e-06, + "loss": 0.318, + "num_input_tokens_seen": 114953152, + "step": 36525 + }, + { + "epoch": 2.338518660777159, + "grad_norm": 81.26191711425781, + "learning_rate": 3.208472335944177e-06, + "loss": 0.221, + "num_input_tokens_seen": 114969344, + "step": 36530 + }, + { + "epoch": 2.338838742718136, + "grad_norm": 5.093250274658203, + "learning_rate": 3.2079365678291767e-06, + "loss": 0.1831, + "num_input_tokens_seen": 114984960, + "step": 36535 + }, + { + "epoch": 2.339158824659113, + "grad_norm": 13.905264854431152, + "learning_rate": 3.207400764363511e-06, + "loss": 0.2345, + "num_input_tokens_seen": 115000576, + "step": 36540 + }, + { + "epoch": 2.33947890660009, + "grad_norm": 4.868024826049805, + "learning_rate": 3.206864925573936e-06, + "loss": 0.1756, + "num_input_tokens_seen": 115015552, + "step": 36545 + }, + { + "epoch": 2.3397989885410664, + "grad_norm": 32.03523635864258, + "learning_rate": 3.206329051487208e-06, + "loss": 0.1685, + "num_input_tokens_seen": 115030016, + "step": 36550 + }, + { + "epoch": 2.3401190704820434, + "grad_norm": 8.265064239501953, + "learning_rate": 3.205793142130087e-06, + "loss": 0.1446, + "num_input_tokens_seen": 115044416, + "step": 36555 + }, + { + "epoch": 2.3404391524230204, + "grad_norm": 68.26471710205078, + "learning_rate": 3.205257197529333e-06, + "loss": 0.3622, + "num_input_tokens_seen": 115059840, + "step": 36560 + }, + { + "epoch": 2.3407592343639974, + "grad_norm": 8.953837394714355, + "learning_rate": 3.2047212177117078e-06, + "loss": 0.1435, + "num_input_tokens_seen": 115075840, + "step": 36565 + }, + { + "epoch": 2.341079316304974, + "grad_norm": 49.203102111816406, + "learning_rate": 3.2041852027039755e-06, + "loss": 0.22, + "num_input_tokens_seen": 115091648, + "step": 36570 + }, + { + "epoch": 2.341399398245951, + "grad_norm": 13.961012840270996, + "learning_rate": 3.203649152532902e-06, + "loss": 0.2323, + "num_input_tokens_seen": 115108608, + "step": 36575 + }, + { + "epoch": 2.341719480186928, + "grad_norm": 119.48910522460938, + "learning_rate": 3.2031130672252552e-06, + "loss": 0.1537, + "num_input_tokens_seen": 115124800, + "step": 36580 + }, + { + "epoch": 2.342039562127905, + "grad_norm": 7.945565700531006, + "learning_rate": 3.2025769468078044e-06, + "loss": 0.2829, + "num_input_tokens_seen": 115140160, + "step": 36585 + }, + { + "epoch": 2.342359644068882, + "grad_norm": 29.813264846801758, + "learning_rate": 3.20204079130732e-06, + "loss": 0.227, + "num_input_tokens_seen": 115156096, + "step": 36590 + }, + { + "epoch": 2.3426797260098584, + "grad_norm": 52.7647590637207, + "learning_rate": 3.2015046007505757e-06, + "loss": 0.2746, + "num_input_tokens_seen": 115170816, + "step": 36595 + }, + { + "epoch": 2.3429998079508354, + "grad_norm": 9.422049522399902, + "learning_rate": 3.2009683751643457e-06, + "loss": 0.2314, + "num_input_tokens_seen": 115185536, + "step": 36600 + }, + { + "epoch": 2.3433198898918124, + "grad_norm": 32.124454498291016, + "learning_rate": 3.2004321145754065e-06, + "loss": 0.1881, + "num_input_tokens_seen": 115201024, + "step": 36605 + }, + { + "epoch": 2.3436399718327894, + "grad_norm": 82.24066162109375, + "learning_rate": 3.1998958190105352e-06, + "loss": 0.2162, + "num_input_tokens_seen": 115215744, + "step": 36610 + }, + { + "epoch": 2.343960053773766, + "grad_norm": 8.341460227966309, + "learning_rate": 3.1993594884965123e-06, + "loss": 0.254, + "num_input_tokens_seen": 115232704, + "step": 36615 + }, + { + "epoch": 2.344280135714743, + "grad_norm": 19.92010498046875, + "learning_rate": 3.1988231230601198e-06, + "loss": 0.2069, + "num_input_tokens_seen": 115248064, + "step": 36620 + }, + { + "epoch": 2.34460021765572, + "grad_norm": 5.4186835289001465, + "learning_rate": 3.198286722728141e-06, + "loss": 0.223, + "num_input_tokens_seen": 115263296, + "step": 36625 + }, + { + "epoch": 2.344920299596697, + "grad_norm": 20.381425857543945, + "learning_rate": 3.1977502875273605e-06, + "loss": 0.2543, + "num_input_tokens_seen": 115279872, + "step": 36630 + }, + { + "epoch": 2.345240381537674, + "grad_norm": 8.819711685180664, + "learning_rate": 3.1972138174845635e-06, + "loss": 0.1653, + "num_input_tokens_seen": 115294976, + "step": 36635 + }, + { + "epoch": 2.3455604634786504, + "grad_norm": 32.56293487548828, + "learning_rate": 3.1966773126265415e-06, + "loss": 0.1667, + "num_input_tokens_seen": 115310784, + "step": 36640 + }, + { + "epoch": 2.3458805454196274, + "grad_norm": 16.445056915283203, + "learning_rate": 3.196140772980083e-06, + "loss": 0.2562, + "num_input_tokens_seen": 115326592, + "step": 36645 + }, + { + "epoch": 2.3462006273606044, + "grad_norm": 12.677993774414062, + "learning_rate": 3.19560419857198e-06, + "loss": 0.2537, + "num_input_tokens_seen": 115343616, + "step": 36650 + }, + { + "epoch": 2.346520709301581, + "grad_norm": 6.626298427581787, + "learning_rate": 3.1950675894290275e-06, + "loss": 0.1709, + "num_input_tokens_seen": 115358656, + "step": 36655 + }, + { + "epoch": 2.346840791242558, + "grad_norm": 21.363201141357422, + "learning_rate": 3.1945309455780195e-06, + "loss": 0.3394, + "num_input_tokens_seen": 115375296, + "step": 36660 + }, + { + "epoch": 2.347160873183535, + "grad_norm": 52.184173583984375, + "learning_rate": 3.1939942670457536e-06, + "loss": 0.2417, + "num_input_tokens_seen": 115390976, + "step": 36665 + }, + { + "epoch": 2.347480955124512, + "grad_norm": 34.71135330200195, + "learning_rate": 3.193457553859029e-06, + "loss": 0.2483, + "num_input_tokens_seen": 115406592, + "step": 36670 + }, + { + "epoch": 2.347801037065489, + "grad_norm": 27.03336524963379, + "learning_rate": 3.1929208060446465e-06, + "loss": 0.2299, + "num_input_tokens_seen": 115421376, + "step": 36675 + }, + { + "epoch": 2.3481211190064655, + "grad_norm": 7.19573974609375, + "learning_rate": 3.1923840236294085e-06, + "loss": 0.2541, + "num_input_tokens_seen": 115436096, + "step": 36680 + }, + { + "epoch": 2.3484412009474425, + "grad_norm": 13.609254837036133, + "learning_rate": 3.1918472066401185e-06, + "loss": 0.2223, + "num_input_tokens_seen": 115452672, + "step": 36685 + }, + { + "epoch": 2.3487612828884195, + "grad_norm": 6.97996187210083, + "learning_rate": 3.191310355103583e-06, + "loss": 0.2102, + "num_input_tokens_seen": 115468544, + "step": 36690 + }, + { + "epoch": 2.3490813648293964, + "grad_norm": 8.51577377319336, + "learning_rate": 3.1907734690466086e-06, + "loss": 0.3369, + "num_input_tokens_seen": 115483904, + "step": 36695 + }, + { + "epoch": 2.349401446770373, + "grad_norm": 7.138817310333252, + "learning_rate": 3.190236548496005e-06, + "loss": 0.2031, + "num_input_tokens_seen": 115500032, + "step": 36700 + }, + { + "epoch": 2.34972152871135, + "grad_norm": 33.44905090332031, + "learning_rate": 3.189699593478584e-06, + "loss": 0.2442, + "num_input_tokens_seen": 115515712, + "step": 36705 + }, + { + "epoch": 2.350041610652327, + "grad_norm": 9.951212882995605, + "learning_rate": 3.1891626040211582e-06, + "loss": 0.2571, + "num_input_tokens_seen": 115531200, + "step": 36710 + }, + { + "epoch": 2.350361692593304, + "grad_norm": 8.407683372497559, + "learning_rate": 3.1886255801505418e-06, + "loss": 0.1583, + "num_input_tokens_seen": 115547072, + "step": 36715 + }, + { + "epoch": 2.350681774534281, + "grad_norm": 13.094743728637695, + "learning_rate": 3.1880885218935505e-06, + "loss": 0.2909, + "num_input_tokens_seen": 115562560, + "step": 36720 + }, + { + "epoch": 2.3510018564752575, + "grad_norm": 6.470582962036133, + "learning_rate": 3.1875514292770015e-06, + "loss": 0.1459, + "num_input_tokens_seen": 115577856, + "step": 36725 + }, + { + "epoch": 2.3513219384162345, + "grad_norm": 23.2507381439209, + "learning_rate": 3.1870143023277168e-06, + "loss": 0.2942, + "num_input_tokens_seen": 115593152, + "step": 36730 + }, + { + "epoch": 2.3516420203572115, + "grad_norm": 5.636234283447266, + "learning_rate": 3.186477141072516e-06, + "loss": 0.2979, + "num_input_tokens_seen": 115609024, + "step": 36735 + }, + { + "epoch": 2.3519621022981885, + "grad_norm": 23.995433807373047, + "learning_rate": 3.1859399455382228e-06, + "loss": 0.2339, + "num_input_tokens_seen": 115625024, + "step": 36740 + }, + { + "epoch": 2.352282184239165, + "grad_norm": 39.613704681396484, + "learning_rate": 3.1854027157516615e-06, + "loss": 0.2626, + "num_input_tokens_seen": 115640512, + "step": 36745 + }, + { + "epoch": 2.352602266180142, + "grad_norm": 11.8702392578125, + "learning_rate": 3.1848654517396583e-06, + "loss": 0.1688, + "num_input_tokens_seen": 115657920, + "step": 36750 + }, + { + "epoch": 2.352922348121119, + "grad_norm": 14.874940872192383, + "learning_rate": 3.184328153529042e-06, + "loss": 0.278, + "num_input_tokens_seen": 115672896, + "step": 36755 + }, + { + "epoch": 2.353242430062096, + "grad_norm": 6.407235622406006, + "learning_rate": 3.183790821146642e-06, + "loss": 0.2092, + "num_input_tokens_seen": 115690624, + "step": 36760 + }, + { + "epoch": 2.353562512003073, + "grad_norm": 6.348870277404785, + "learning_rate": 3.1832534546192908e-06, + "loss": 0.2352, + "num_input_tokens_seen": 115706624, + "step": 36765 + }, + { + "epoch": 2.3538825939440495, + "grad_norm": 10.974652290344238, + "learning_rate": 3.18271605397382e-06, + "loss": 0.1923, + "num_input_tokens_seen": 115721344, + "step": 36770 + }, + { + "epoch": 2.3542026758850265, + "grad_norm": 8.081686019897461, + "learning_rate": 3.182178619237066e-06, + "loss": 0.1588, + "num_input_tokens_seen": 115736576, + "step": 36775 + }, + { + "epoch": 2.3545227578260035, + "grad_norm": 13.836678504943848, + "learning_rate": 3.181641150435866e-06, + "loss": 0.224, + "num_input_tokens_seen": 115752896, + "step": 36780 + }, + { + "epoch": 2.3548428397669805, + "grad_norm": 8.053634643554688, + "learning_rate": 3.1811036475970563e-06, + "loss": 0.1854, + "num_input_tokens_seen": 115768320, + "step": 36785 + }, + { + "epoch": 2.355162921707957, + "grad_norm": 8.664653778076172, + "learning_rate": 3.180566110747479e-06, + "loss": 0.241, + "num_input_tokens_seen": 115784640, + "step": 36790 + }, + { + "epoch": 2.355483003648934, + "grad_norm": 38.18824005126953, + "learning_rate": 3.1800285399139747e-06, + "loss": 0.2999, + "num_input_tokens_seen": 115799424, + "step": 36795 + }, + { + "epoch": 2.355803085589911, + "grad_norm": 6.173532009124756, + "learning_rate": 3.1794909351233873e-06, + "loss": 0.1685, + "num_input_tokens_seen": 115816384, + "step": 36800 + }, + { + "epoch": 2.356123167530888, + "grad_norm": 14.610916137695312, + "learning_rate": 3.178953296402562e-06, + "loss": 0.2579, + "num_input_tokens_seen": 115831808, + "step": 36805 + }, + { + "epoch": 2.356443249471865, + "grad_norm": 47.54705047607422, + "learning_rate": 3.1784156237783453e-06, + "loss": 0.2363, + "num_input_tokens_seen": 115847552, + "step": 36810 + }, + { + "epoch": 2.3567633314128416, + "grad_norm": 8.406708717346191, + "learning_rate": 3.1778779172775866e-06, + "loss": 0.2009, + "num_input_tokens_seen": 115863808, + "step": 36815 + }, + { + "epoch": 2.3570834133538185, + "grad_norm": 11.946459770202637, + "learning_rate": 3.1773401769271357e-06, + "loss": 0.2429, + "num_input_tokens_seen": 115879936, + "step": 36820 + }, + { + "epoch": 2.3574034952947955, + "grad_norm": 40.51496505737305, + "learning_rate": 3.1768024027538435e-06, + "loss": 0.2146, + "num_input_tokens_seen": 115894912, + "step": 36825 + }, + { + "epoch": 2.3577235772357725, + "grad_norm": 5.890418529510498, + "learning_rate": 3.1762645947845656e-06, + "loss": 0.154, + "num_input_tokens_seen": 115910784, + "step": 36830 + }, + { + "epoch": 2.358043659176749, + "grad_norm": 15.28730297088623, + "learning_rate": 3.175726753046156e-06, + "loss": 0.1963, + "num_input_tokens_seen": 115927424, + "step": 36835 + }, + { + "epoch": 2.358363741117726, + "grad_norm": 12.560386657714844, + "learning_rate": 3.1751888775654715e-06, + "loss": 0.2708, + "num_input_tokens_seen": 115943936, + "step": 36840 + }, + { + "epoch": 2.358683823058703, + "grad_norm": 13.182215690612793, + "learning_rate": 3.1746509683693727e-06, + "loss": 0.2317, + "num_input_tokens_seen": 115961152, + "step": 36845 + }, + { + "epoch": 2.35900390499968, + "grad_norm": 21.77263832092285, + "learning_rate": 3.174113025484718e-06, + "loss": 0.2335, + "num_input_tokens_seen": 115977344, + "step": 36850 + }, + { + "epoch": 2.359323986940657, + "grad_norm": 34.98011016845703, + "learning_rate": 3.1735750489383703e-06, + "loss": 0.1963, + "num_input_tokens_seen": 115993472, + "step": 36855 + }, + { + "epoch": 2.3596440688816336, + "grad_norm": 12.228315353393555, + "learning_rate": 3.173037038757193e-06, + "loss": 0.1885, + "num_input_tokens_seen": 116008640, + "step": 36860 + }, + { + "epoch": 2.3599641508226106, + "grad_norm": 5.860924243927002, + "learning_rate": 3.172498994968052e-06, + "loss": 0.1878, + "num_input_tokens_seen": 116025280, + "step": 36865 + }, + { + "epoch": 2.3602842327635876, + "grad_norm": 15.625822067260742, + "learning_rate": 3.1719609175978133e-06, + "loss": 0.1902, + "num_input_tokens_seen": 116040064, + "step": 36870 + }, + { + "epoch": 2.3606043147045646, + "grad_norm": 7.853455066680908, + "learning_rate": 3.1714228066733477e-06, + "loss": 0.1829, + "num_input_tokens_seen": 116057088, + "step": 36875 + }, + { + "epoch": 2.360924396645541, + "grad_norm": 12.096200942993164, + "learning_rate": 3.170884662221523e-06, + "loss": 0.2213, + "num_input_tokens_seen": 116072960, + "step": 36880 + }, + { + "epoch": 2.361244478586518, + "grad_norm": 12.670032501220703, + "learning_rate": 3.1703464842692138e-06, + "loss": 0.272, + "num_input_tokens_seen": 116088704, + "step": 36885 + }, + { + "epoch": 2.361564560527495, + "grad_norm": 8.393098831176758, + "learning_rate": 3.169808272843293e-06, + "loss": 0.1445, + "num_input_tokens_seen": 116105664, + "step": 36890 + }, + { + "epoch": 2.361884642468472, + "grad_norm": 14.37580394744873, + "learning_rate": 3.169270027970635e-06, + "loss": 0.2431, + "num_input_tokens_seen": 116121024, + "step": 36895 + }, + { + "epoch": 2.362204724409449, + "grad_norm": 7.352872848510742, + "learning_rate": 3.168731749678119e-06, + "loss": 0.2349, + "num_input_tokens_seen": 116135808, + "step": 36900 + }, + { + "epoch": 2.3625248063504256, + "grad_norm": 4.873231410980225, + "learning_rate": 3.168193437992622e-06, + "loss": 0.2027, + "num_input_tokens_seen": 116150528, + "step": 36905 + }, + { + "epoch": 2.3628448882914026, + "grad_norm": 5.932233810424805, + "learning_rate": 3.1676550929410256e-06, + "loss": 0.192, + "num_input_tokens_seen": 116165312, + "step": 36910 + }, + { + "epoch": 2.3631649702323796, + "grad_norm": 13.201468467712402, + "learning_rate": 3.1671167145502114e-06, + "loss": 0.1879, + "num_input_tokens_seen": 116180416, + "step": 36915 + }, + { + "epoch": 2.363485052173356, + "grad_norm": 18.971824645996094, + "learning_rate": 3.1665783028470636e-06, + "loss": 0.2415, + "num_input_tokens_seen": 116196288, + "step": 36920 + }, + { + "epoch": 2.363805134114333, + "grad_norm": 28.532108306884766, + "learning_rate": 3.1660398578584665e-06, + "loss": 0.2012, + "num_input_tokens_seen": 116211072, + "step": 36925 + }, + { + "epoch": 2.36412521605531, + "grad_norm": 6.815257549285889, + "learning_rate": 3.1655013796113092e-06, + "loss": 0.2828, + "num_input_tokens_seen": 116225856, + "step": 36930 + }, + { + "epoch": 2.364445297996287, + "grad_norm": 14.541940689086914, + "learning_rate": 3.1649628681324795e-06, + "loss": 0.2462, + "num_input_tokens_seen": 116240448, + "step": 36935 + }, + { + "epoch": 2.364765379937264, + "grad_norm": 33.84447479248047, + "learning_rate": 3.164424323448867e-06, + "loss": 0.273, + "num_input_tokens_seen": 116256448, + "step": 36940 + }, + { + "epoch": 2.3650854618782406, + "grad_norm": 13.062736511230469, + "learning_rate": 3.163885745587365e-06, + "loss": 0.2831, + "num_input_tokens_seen": 116272640, + "step": 36945 + }, + { + "epoch": 2.3654055438192176, + "grad_norm": 8.148175239562988, + "learning_rate": 3.1633471345748672e-06, + "loss": 0.2958, + "num_input_tokens_seen": 116288896, + "step": 36950 + }, + { + "epoch": 2.3657256257601946, + "grad_norm": 11.005448341369629, + "learning_rate": 3.1628084904382685e-06, + "loss": 0.1791, + "num_input_tokens_seen": 116304000, + "step": 36955 + }, + { + "epoch": 2.3660457077011716, + "grad_norm": 13.437126159667969, + "learning_rate": 3.162269813204466e-06, + "loss": 0.2435, + "num_input_tokens_seen": 116320896, + "step": 36960 + }, + { + "epoch": 2.366365789642148, + "grad_norm": 4.172769069671631, + "learning_rate": 3.1617311029003593e-06, + "loss": 0.1587, + "num_input_tokens_seen": 116337088, + "step": 36965 + }, + { + "epoch": 2.366685871583125, + "grad_norm": 13.355697631835938, + "learning_rate": 3.161192359552848e-06, + "loss": 0.2713, + "num_input_tokens_seen": 116351488, + "step": 36970 + }, + { + "epoch": 2.367005953524102, + "grad_norm": 9.705187797546387, + "learning_rate": 3.160653583188834e-06, + "loss": 0.3221, + "num_input_tokens_seen": 116366976, + "step": 36975 + }, + { + "epoch": 2.367326035465079, + "grad_norm": 5.422187805175781, + "learning_rate": 3.1601147738352212e-06, + "loss": 0.1425, + "num_input_tokens_seen": 116384384, + "step": 36980 + }, + { + "epoch": 2.367646117406056, + "grad_norm": 13.550509452819824, + "learning_rate": 3.1595759315189155e-06, + "loss": 0.2174, + "num_input_tokens_seen": 116400384, + "step": 36985 + }, + { + "epoch": 2.3679661993470327, + "grad_norm": 11.080265998840332, + "learning_rate": 3.1590370562668233e-06, + "loss": 0.2824, + "num_input_tokens_seen": 116414976, + "step": 36990 + }, + { + "epoch": 2.3682862812880097, + "grad_norm": 14.822955131530762, + "learning_rate": 3.158498148105853e-06, + "loss": 0.2902, + "num_input_tokens_seen": 116430336, + "step": 36995 + }, + { + "epoch": 2.3686063632289867, + "grad_norm": 20.426958084106445, + "learning_rate": 3.157959207062915e-06, + "loss": 0.1279, + "num_input_tokens_seen": 116445888, + "step": 37000 + }, + { + "epoch": 2.3689264451699636, + "grad_norm": 13.83476734161377, + "learning_rate": 3.1574202331649216e-06, + "loss": 0.2083, + "num_input_tokens_seen": 116460736, + "step": 37005 + }, + { + "epoch": 2.36924652711094, + "grad_norm": 10.335325241088867, + "learning_rate": 3.156881226438786e-06, + "loss": 0.2319, + "num_input_tokens_seen": 116475904, + "step": 37010 + }, + { + "epoch": 2.369566609051917, + "grad_norm": 6.817458629608154, + "learning_rate": 3.1563421869114236e-06, + "loss": 0.1774, + "num_input_tokens_seen": 116492352, + "step": 37015 + }, + { + "epoch": 2.369886690992894, + "grad_norm": 6.697795867919922, + "learning_rate": 3.155803114609751e-06, + "loss": 0.1879, + "num_input_tokens_seen": 116507520, + "step": 37020 + }, + { + "epoch": 2.370206772933871, + "grad_norm": 5.8335394859313965, + "learning_rate": 3.155264009560688e-06, + "loss": 0.1764, + "num_input_tokens_seen": 116523712, + "step": 37025 + }, + { + "epoch": 2.370526854874848, + "grad_norm": 40.44031524658203, + "learning_rate": 3.1547248717911522e-06, + "loss": 0.229, + "num_input_tokens_seen": 116538624, + "step": 37030 + }, + { + "epoch": 2.3708469368158247, + "grad_norm": 32.297096252441406, + "learning_rate": 3.1541857013280676e-06, + "loss": 0.216, + "num_input_tokens_seen": 116554688, + "step": 37035 + }, + { + "epoch": 2.3711670187568017, + "grad_norm": 68.8265151977539, + "learning_rate": 3.153646498198355e-06, + "loss": 0.2766, + "num_input_tokens_seen": 116570944, + "step": 37040 + }, + { + "epoch": 2.3714871006977787, + "grad_norm": 62.649391174316406, + "learning_rate": 3.153107262428943e-06, + "loss": 0.2411, + "num_input_tokens_seen": 116586496, + "step": 37045 + }, + { + "epoch": 2.3718071826387557, + "grad_norm": 44.067378997802734, + "learning_rate": 3.1525679940467548e-06, + "loss": 0.284, + "num_input_tokens_seen": 116601792, + "step": 37050 + }, + { + "epoch": 2.372127264579732, + "grad_norm": 8.85615062713623, + "learning_rate": 3.15202869307872e-06, + "loss": 0.1597, + "num_input_tokens_seen": 116616768, + "step": 37055 + }, + { + "epoch": 2.372447346520709, + "grad_norm": 50.967403411865234, + "learning_rate": 3.1514893595517694e-06, + "loss": 0.3351, + "num_input_tokens_seen": 116632960, + "step": 37060 + }, + { + "epoch": 2.372767428461686, + "grad_norm": 7.708974838256836, + "learning_rate": 3.150949993492833e-06, + "loss": 0.1726, + "num_input_tokens_seen": 116648640, + "step": 37065 + }, + { + "epoch": 2.373087510402663, + "grad_norm": 13.351419448852539, + "learning_rate": 3.150410594928845e-06, + "loss": 0.3061, + "num_input_tokens_seen": 116665088, + "step": 37070 + }, + { + "epoch": 2.37340759234364, + "grad_norm": 4.7631096839904785, + "learning_rate": 3.1498711638867395e-06, + "loss": 0.1316, + "num_input_tokens_seen": 116681088, + "step": 37075 + }, + { + "epoch": 2.3737276742846167, + "grad_norm": 21.050188064575195, + "learning_rate": 3.149331700393454e-06, + "loss": 0.2597, + "num_input_tokens_seen": 116695936, + "step": 37080 + }, + { + "epoch": 2.3740477562255937, + "grad_norm": 65.45433807373047, + "learning_rate": 3.1487922044759244e-06, + "loss": 0.2362, + "num_input_tokens_seen": 116711488, + "step": 37085 + }, + { + "epoch": 2.3743678381665707, + "grad_norm": 2.645545482635498, + "learning_rate": 3.148252676161093e-06, + "loss": 0.1329, + "num_input_tokens_seen": 116727360, + "step": 37090 + }, + { + "epoch": 2.3746879201075477, + "grad_norm": 26.68486785888672, + "learning_rate": 3.1477131154758976e-06, + "loss": 0.2023, + "num_input_tokens_seen": 116743616, + "step": 37095 + }, + { + "epoch": 2.3750080020485242, + "grad_norm": 20.513412475585938, + "learning_rate": 3.1471735224472842e-06, + "loss": 0.3928, + "num_input_tokens_seen": 116758784, + "step": 37100 + }, + { + "epoch": 2.3753280839895012, + "grad_norm": 9.7322359085083, + "learning_rate": 3.1466338971021963e-06, + "loss": 0.2498, + "num_input_tokens_seen": 116774144, + "step": 37105 + }, + { + "epoch": 2.3756481659304782, + "grad_norm": 11.574187278747559, + "learning_rate": 3.1460942394675798e-06, + "loss": 0.2343, + "num_input_tokens_seen": 116790336, + "step": 37110 + }, + { + "epoch": 2.375968247871455, + "grad_norm": 10.10575008392334, + "learning_rate": 3.1455545495703816e-06, + "loss": 0.1273, + "num_input_tokens_seen": 116806976, + "step": 37115 + }, + { + "epoch": 2.376288329812432, + "grad_norm": 38.386959075927734, + "learning_rate": 3.1450148274375523e-06, + "loss": 0.312, + "num_input_tokens_seen": 116822912, + "step": 37120 + }, + { + "epoch": 2.3766084117534088, + "grad_norm": 12.201395988464355, + "learning_rate": 3.1444750730960415e-06, + "loss": 0.2116, + "num_input_tokens_seen": 116838272, + "step": 37125 + }, + { + "epoch": 2.3769284936943857, + "grad_norm": 29.928003311157227, + "learning_rate": 3.1439352865728035e-06, + "loss": 0.1827, + "num_input_tokens_seen": 116855040, + "step": 37130 + }, + { + "epoch": 2.3772485756353627, + "grad_norm": 123.92460632324219, + "learning_rate": 3.1433954678947916e-06, + "loss": 0.2978, + "num_input_tokens_seen": 116869888, + "step": 37135 + }, + { + "epoch": 2.3775686575763397, + "grad_norm": 8.818259239196777, + "learning_rate": 3.1428556170889607e-06, + "loss": 0.2941, + "num_input_tokens_seen": 116884736, + "step": 37140 + }, + { + "epoch": 2.3778887395173163, + "grad_norm": 8.911873817443848, + "learning_rate": 3.1423157341822693e-06, + "loss": 0.2497, + "num_input_tokens_seen": 116900032, + "step": 37145 + }, + { + "epoch": 2.3782088214582933, + "grad_norm": 20.452741622924805, + "learning_rate": 3.141775819201675e-06, + "loss": 0.1966, + "num_input_tokens_seen": 116915712, + "step": 37150 + }, + { + "epoch": 2.3785289033992703, + "grad_norm": 32.981590270996094, + "learning_rate": 3.14123587217414e-06, + "loss": 0.3116, + "num_input_tokens_seen": 116930560, + "step": 37155 + }, + { + "epoch": 2.3788489853402472, + "grad_norm": 8.769399642944336, + "learning_rate": 3.140695893126625e-06, + "loss": 0.181, + "num_input_tokens_seen": 116946496, + "step": 37160 + }, + { + "epoch": 2.3791690672812242, + "grad_norm": 14.852522850036621, + "learning_rate": 3.140155882086095e-06, + "loss": 0.1874, + "num_input_tokens_seen": 116962112, + "step": 37165 + }, + { + "epoch": 2.379489149222201, + "grad_norm": 18.5749454498291, + "learning_rate": 3.1396158390795143e-06, + "loss": 0.2267, + "num_input_tokens_seen": 116978880, + "step": 37170 + }, + { + "epoch": 2.3798092311631778, + "grad_norm": 17.520065307617188, + "learning_rate": 3.1390757641338497e-06, + "loss": 0.3445, + "num_input_tokens_seen": 116993920, + "step": 37175 + }, + { + "epoch": 2.3801293131041548, + "grad_norm": 10.386476516723633, + "learning_rate": 3.138535657276071e-06, + "loss": 0.3064, + "num_input_tokens_seen": 117009280, + "step": 37180 + }, + { + "epoch": 2.3804493950451313, + "grad_norm": 7.113763332366943, + "learning_rate": 3.1379955185331474e-06, + "loss": 0.1395, + "num_input_tokens_seen": 117024128, + "step": 37185 + }, + { + "epoch": 2.3807694769861083, + "grad_norm": 50.7012939453125, + "learning_rate": 3.137455347932051e-06, + "loss": 0.175, + "num_input_tokens_seen": 117039104, + "step": 37190 + }, + { + "epoch": 2.3810895589270853, + "grad_norm": 7.255154132843018, + "learning_rate": 3.1369151454997547e-06, + "loss": 0.2272, + "num_input_tokens_seen": 117054016, + "step": 37195 + }, + { + "epoch": 2.3814096408680623, + "grad_norm": 10.14128589630127, + "learning_rate": 3.1363749112632336e-06, + "loss": 0.3674, + "num_input_tokens_seen": 117068992, + "step": 37200 + }, + { + "epoch": 2.3817297228090393, + "grad_norm": 8.855972290039062, + "learning_rate": 3.1358346452494647e-06, + "loss": 0.1852, + "num_input_tokens_seen": 117085312, + "step": 37205 + }, + { + "epoch": 2.382049804750016, + "grad_norm": 9.549874305725098, + "learning_rate": 3.1352943474854246e-06, + "loss": 0.1488, + "num_input_tokens_seen": 117100288, + "step": 37210 + }, + { + "epoch": 2.382369886690993, + "grad_norm": 30.718341827392578, + "learning_rate": 3.1347540179980945e-06, + "loss": 0.262, + "num_input_tokens_seen": 117116352, + "step": 37215 + }, + { + "epoch": 2.38268996863197, + "grad_norm": 9.329771995544434, + "learning_rate": 3.1342136568144556e-06, + "loss": 0.1892, + "num_input_tokens_seen": 117132160, + "step": 37220 + }, + { + "epoch": 2.383010050572947, + "grad_norm": 6.734994888305664, + "learning_rate": 3.1336732639614894e-06, + "loss": 0.2432, + "num_input_tokens_seen": 117147456, + "step": 37225 + }, + { + "epoch": 2.3833301325139233, + "grad_norm": 12.503034591674805, + "learning_rate": 3.133132839466181e-06, + "loss": 0.319, + "num_input_tokens_seen": 117163008, + "step": 37230 + }, + { + "epoch": 2.3836502144549003, + "grad_norm": 6.49802303314209, + "learning_rate": 3.1325923833555165e-06, + "loss": 0.1343, + "num_input_tokens_seen": 117178752, + "step": 37235 + }, + { + "epoch": 2.3839702963958773, + "grad_norm": 9.378292083740234, + "learning_rate": 3.1320518956564833e-06, + "loss": 0.1841, + "num_input_tokens_seen": 117194304, + "step": 37240 + }, + { + "epoch": 2.3842903783368543, + "grad_norm": 57.493316650390625, + "learning_rate": 3.1315113763960707e-06, + "loss": 0.2999, + "num_input_tokens_seen": 117209408, + "step": 37245 + }, + { + "epoch": 2.3846104602778313, + "grad_norm": 9.495537757873535, + "learning_rate": 3.13097082560127e-06, + "loss": 0.1687, + "num_input_tokens_seen": 117225408, + "step": 37250 + }, + { + "epoch": 2.384930542218808, + "grad_norm": 13.116098403930664, + "learning_rate": 3.1304302432990728e-06, + "loss": 0.3064, + "num_input_tokens_seen": 117240128, + "step": 37255 + }, + { + "epoch": 2.385250624159785, + "grad_norm": 26.57094383239746, + "learning_rate": 3.1298896295164727e-06, + "loss": 0.2303, + "num_input_tokens_seen": 117255808, + "step": 37260 + }, + { + "epoch": 2.385570706100762, + "grad_norm": 12.772140502929688, + "learning_rate": 3.1293489842804654e-06, + "loss": 0.1998, + "num_input_tokens_seen": 117273600, + "step": 37265 + }, + { + "epoch": 2.385890788041739, + "grad_norm": 5.882211208343506, + "learning_rate": 3.128808307618048e-06, + "loss": 0.2555, + "num_input_tokens_seen": 117291136, + "step": 37270 + }, + { + "epoch": 2.3862108699827154, + "grad_norm": 3.229363441467285, + "learning_rate": 3.1282675995562195e-06, + "loss": 0.262, + "num_input_tokens_seen": 117306560, + "step": 37275 + }, + { + "epoch": 2.3865309519236924, + "grad_norm": 38.896095275878906, + "learning_rate": 3.1277268601219797e-06, + "loss": 0.3019, + "num_input_tokens_seen": 117322176, + "step": 37280 + }, + { + "epoch": 2.3868510338646693, + "grad_norm": 26.675928115844727, + "learning_rate": 3.12718608934233e-06, + "loss": 0.3054, + "num_input_tokens_seen": 117339072, + "step": 37285 + }, + { + "epoch": 2.3871711158056463, + "grad_norm": 5.303474426269531, + "learning_rate": 3.126645287244273e-06, + "loss": 0.1775, + "num_input_tokens_seen": 117367936, + "step": 37290 + }, + { + "epoch": 2.3874911977466233, + "grad_norm": 32.31753921508789, + "learning_rate": 3.126104453854815e-06, + "loss": 0.2964, + "num_input_tokens_seen": 117383680, + "step": 37295 + }, + { + "epoch": 2.3878112796876, + "grad_norm": 11.586474418640137, + "learning_rate": 3.1255635892009617e-06, + "loss": 0.2318, + "num_input_tokens_seen": 117399744, + "step": 37300 + }, + { + "epoch": 2.388131361628577, + "grad_norm": 7.306075096130371, + "learning_rate": 3.1250226933097216e-06, + "loss": 0.2093, + "num_input_tokens_seen": 117415872, + "step": 37305 + }, + { + "epoch": 2.388451443569554, + "grad_norm": 6.561552047729492, + "learning_rate": 3.1244817662081038e-06, + "loss": 0.2072, + "num_input_tokens_seen": 117430976, + "step": 37310 + }, + { + "epoch": 2.388771525510531, + "grad_norm": 5.694633483886719, + "learning_rate": 3.12394080792312e-06, + "loss": 0.2696, + "num_input_tokens_seen": 117447040, + "step": 37315 + }, + { + "epoch": 2.3890916074515074, + "grad_norm": 8.543218612670898, + "learning_rate": 3.1233998184817813e-06, + "loss": 0.3026, + "num_input_tokens_seen": 117462144, + "step": 37320 + }, + { + "epoch": 2.3894116893924844, + "grad_norm": 11.709739685058594, + "learning_rate": 3.1228587979111027e-06, + "loss": 0.2848, + "num_input_tokens_seen": 117477056, + "step": 37325 + }, + { + "epoch": 2.3897317713334614, + "grad_norm": 15.90149974822998, + "learning_rate": 3.1223177462381005e-06, + "loss": 0.26, + "num_input_tokens_seen": 117492224, + "step": 37330 + }, + { + "epoch": 2.3900518532744384, + "grad_norm": 34.87411117553711, + "learning_rate": 3.1217766634897918e-06, + "loss": 0.3487, + "num_input_tokens_seen": 117507456, + "step": 37335 + }, + { + "epoch": 2.3903719352154154, + "grad_norm": 39.38871383666992, + "learning_rate": 3.121235549693195e-06, + "loss": 0.2269, + "num_input_tokens_seen": 117521728, + "step": 37340 + }, + { + "epoch": 2.390692017156392, + "grad_norm": 9.18639850616455, + "learning_rate": 3.1206944048753307e-06, + "loss": 0.2479, + "num_input_tokens_seen": 117536640, + "step": 37345 + }, + { + "epoch": 2.391012099097369, + "grad_norm": 7.0150980949401855, + "learning_rate": 3.1201532290632207e-06, + "loss": 0.1789, + "num_input_tokens_seen": 117552512, + "step": 37350 + }, + { + "epoch": 2.391332181038346, + "grad_norm": 30.76559066772461, + "learning_rate": 3.119612022283889e-06, + "loss": 0.2537, + "num_input_tokens_seen": 117567424, + "step": 37355 + }, + { + "epoch": 2.391652262979323, + "grad_norm": 93.1422348022461, + "learning_rate": 3.1190707845643605e-06, + "loss": 0.28, + "num_input_tokens_seen": 117583424, + "step": 37360 + }, + { + "epoch": 2.3919723449202994, + "grad_norm": 10.120512962341309, + "learning_rate": 3.1185295159316608e-06, + "loss": 0.1772, + "num_input_tokens_seen": 117599424, + "step": 37365 + }, + { + "epoch": 2.3922924268612764, + "grad_norm": 7.272660255432129, + "learning_rate": 3.1179882164128202e-06, + "loss": 0.2561, + "num_input_tokens_seen": 117614720, + "step": 37370 + }, + { + "epoch": 2.3926125088022534, + "grad_norm": 8.74010944366455, + "learning_rate": 3.117446886034866e-06, + "loss": 0.177, + "num_input_tokens_seen": 117629504, + "step": 37375 + }, + { + "epoch": 2.3929325907432304, + "grad_norm": 36.281700134277344, + "learning_rate": 3.116905524824831e-06, + "loss": 0.1672, + "num_input_tokens_seen": 117645760, + "step": 37380 + }, + { + "epoch": 2.3932526726842074, + "grad_norm": 13.62809944152832, + "learning_rate": 3.1163641328097472e-06, + "loss": 0.2088, + "num_input_tokens_seen": 117660480, + "step": 37385 + }, + { + "epoch": 2.393572754625184, + "grad_norm": 24.710763931274414, + "learning_rate": 3.115822710016649e-06, + "loss": 0.265, + "num_input_tokens_seen": 117676224, + "step": 37390 + }, + { + "epoch": 2.393892836566161, + "grad_norm": 36.35129165649414, + "learning_rate": 3.115281256472573e-06, + "loss": 0.3185, + "num_input_tokens_seen": 117690880, + "step": 37395 + }, + { + "epoch": 2.394212918507138, + "grad_norm": 9.580770492553711, + "learning_rate": 3.1147397722045558e-06, + "loss": 0.2079, + "num_input_tokens_seen": 117706240, + "step": 37400 + }, + { + "epoch": 2.394533000448115, + "grad_norm": 27.28125, + "learning_rate": 3.1141982572396355e-06, + "loss": 0.2328, + "num_input_tokens_seen": 117721536, + "step": 37405 + }, + { + "epoch": 2.3948530823890914, + "grad_norm": 33.82225799560547, + "learning_rate": 3.1136567116048545e-06, + "loss": 0.273, + "num_input_tokens_seen": 117735936, + "step": 37410 + }, + { + "epoch": 2.3951731643300684, + "grad_norm": 14.060302734375, + "learning_rate": 3.1131151353272527e-06, + "loss": 0.2035, + "num_input_tokens_seen": 117752384, + "step": 37415 + }, + { + "epoch": 2.3954932462710454, + "grad_norm": 9.512151718139648, + "learning_rate": 3.1125735284338754e-06, + "loss": 0.157, + "num_input_tokens_seen": 117768128, + "step": 37420 + }, + { + "epoch": 2.3958133282120224, + "grad_norm": 60.755615234375, + "learning_rate": 3.112031890951767e-06, + "loss": 0.2292, + "num_input_tokens_seen": 117784128, + "step": 37425 + }, + { + "epoch": 2.3961334101529994, + "grad_norm": 26.649267196655273, + "learning_rate": 3.1114902229079737e-06, + "loss": 0.1662, + "num_input_tokens_seen": 117798848, + "step": 37430 + }, + { + "epoch": 2.396453492093976, + "grad_norm": 24.651779174804688, + "learning_rate": 3.110948524329544e-06, + "loss": 0.195, + "num_input_tokens_seen": 117814720, + "step": 37435 + }, + { + "epoch": 2.396773574034953, + "grad_norm": 5.858760833740234, + "learning_rate": 3.1104067952435267e-06, + "loss": 0.225, + "num_input_tokens_seen": 117830016, + "step": 37440 + }, + { + "epoch": 2.39709365597593, + "grad_norm": 19.189777374267578, + "learning_rate": 3.1098650356769745e-06, + "loss": 0.1745, + "num_input_tokens_seen": 117846272, + "step": 37445 + }, + { + "epoch": 2.3974137379169065, + "grad_norm": 24.7856502532959, + "learning_rate": 3.1093232456569384e-06, + "loss": 0.1957, + "num_input_tokens_seen": 117860736, + "step": 37450 + }, + { + "epoch": 2.3977338198578835, + "grad_norm": 14.942941665649414, + "learning_rate": 3.108781425210473e-06, + "loss": 0.2226, + "num_input_tokens_seen": 117875008, + "step": 37455 + }, + { + "epoch": 2.3980539017988605, + "grad_norm": 7.149017810821533, + "learning_rate": 3.1082395743646353e-06, + "loss": 0.2238, + "num_input_tokens_seen": 117892736, + "step": 37460 + }, + { + "epoch": 2.3983739837398375, + "grad_norm": 15.378026008605957, + "learning_rate": 3.1076976931464803e-06, + "loss": 0.2795, + "num_input_tokens_seen": 117908352, + "step": 37465 + }, + { + "epoch": 2.3986940656808144, + "grad_norm": 3.731379508972168, + "learning_rate": 3.107155781583068e-06, + "loss": 0.1588, + "num_input_tokens_seen": 117924672, + "step": 37470 + }, + { + "epoch": 2.399014147621791, + "grad_norm": 9.486040115356445, + "learning_rate": 3.106613839701459e-06, + "loss": 0.1354, + "num_input_tokens_seen": 117940160, + "step": 37475 + }, + { + "epoch": 2.399334229562768, + "grad_norm": 5.018671989440918, + "learning_rate": 3.106071867528715e-06, + "loss": 0.2088, + "num_input_tokens_seen": 117955136, + "step": 37480 + }, + { + "epoch": 2.399654311503745, + "grad_norm": 15.32253646850586, + "learning_rate": 3.105529865091898e-06, + "loss": 0.2431, + "num_input_tokens_seen": 117969856, + "step": 37485 + }, + { + "epoch": 2.399974393444722, + "grad_norm": 8.602699279785156, + "learning_rate": 3.1049878324180745e-06, + "loss": 0.1642, + "num_input_tokens_seen": 117984704, + "step": 37490 + }, + { + "epoch": 2.4002944753856985, + "grad_norm": 10.954680442810059, + "learning_rate": 3.104445769534309e-06, + "loss": 0.2134, + "num_input_tokens_seen": 118000128, + "step": 37495 + }, + { + "epoch": 2.4006145573266755, + "grad_norm": 8.652085304260254, + "learning_rate": 3.1039036764676704e-06, + "loss": 0.1757, + "num_input_tokens_seen": 118016640, + "step": 37500 + }, + { + "epoch": 2.4009346392676525, + "grad_norm": 31.2181453704834, + "learning_rate": 3.1033615532452283e-06, + "loss": 0.3412, + "num_input_tokens_seen": 118031872, + "step": 37505 + }, + { + "epoch": 2.4012547212086295, + "grad_norm": 32.80670928955078, + "learning_rate": 3.1028193998940525e-06, + "loss": 0.2326, + "num_input_tokens_seen": 118046848, + "step": 37510 + }, + { + "epoch": 2.4015748031496065, + "grad_norm": 13.024328231811523, + "learning_rate": 3.1022772164412168e-06, + "loss": 0.2026, + "num_input_tokens_seen": 118062912, + "step": 37515 + }, + { + "epoch": 2.401894885090583, + "grad_norm": 41.76816177368164, + "learning_rate": 3.101735002913793e-06, + "loss": 0.214, + "num_input_tokens_seen": 118080128, + "step": 37520 + }, + { + "epoch": 2.40221496703156, + "grad_norm": 10.219579696655273, + "learning_rate": 3.101192759338858e-06, + "loss": 0.2351, + "num_input_tokens_seen": 118096064, + "step": 37525 + }, + { + "epoch": 2.402535048972537, + "grad_norm": 8.359210968017578, + "learning_rate": 3.100650485743487e-06, + "loss": 0.1285, + "num_input_tokens_seen": 118113152, + "step": 37530 + }, + { + "epoch": 2.402855130913514, + "grad_norm": 15.589118003845215, + "learning_rate": 3.1001081821547605e-06, + "loss": 0.3104, + "num_input_tokens_seen": 118128000, + "step": 37535 + }, + { + "epoch": 2.4031752128544905, + "grad_norm": 29.00078773498535, + "learning_rate": 3.0995658485997575e-06, + "loss": 0.2372, + "num_input_tokens_seen": 118145728, + "step": 37540 + }, + { + "epoch": 2.4034952947954675, + "grad_norm": 22.053743362426758, + "learning_rate": 3.099023485105559e-06, + "loss": 0.1751, + "num_input_tokens_seen": 118161408, + "step": 37545 + }, + { + "epoch": 2.4038153767364445, + "grad_norm": 13.043147087097168, + "learning_rate": 3.0984810916992475e-06, + "loss": 0.239, + "num_input_tokens_seen": 118176896, + "step": 37550 + }, + { + "epoch": 2.4041354586774215, + "grad_norm": 7.818286895751953, + "learning_rate": 3.097938668407907e-06, + "loss": 0.2408, + "num_input_tokens_seen": 118191360, + "step": 37555 + }, + { + "epoch": 2.4044555406183985, + "grad_norm": 15.379731178283691, + "learning_rate": 3.0973962152586247e-06, + "loss": 0.2011, + "num_input_tokens_seen": 118207936, + "step": 37560 + }, + { + "epoch": 2.404775622559375, + "grad_norm": 5.505024433135986, + "learning_rate": 3.096853732278487e-06, + "loss": 0.2181, + "num_input_tokens_seen": 118223744, + "step": 37565 + }, + { + "epoch": 2.405095704500352, + "grad_norm": 42.15372085571289, + "learning_rate": 3.0963112194945834e-06, + "loss": 0.1763, + "num_input_tokens_seen": 118240064, + "step": 37570 + }, + { + "epoch": 2.405415786441329, + "grad_norm": 6.647226810455322, + "learning_rate": 3.095768676934003e-06, + "loss": 0.1374, + "num_input_tokens_seen": 118255424, + "step": 37575 + }, + { + "epoch": 2.405735868382306, + "grad_norm": 11.882967948913574, + "learning_rate": 3.0952261046238375e-06, + "loss": 0.1365, + "num_input_tokens_seen": 118270464, + "step": 37580 + }, + { + "epoch": 2.4060559503232826, + "grad_norm": 8.211660385131836, + "learning_rate": 3.094683502591181e-06, + "loss": 0.178, + "num_input_tokens_seen": 118285888, + "step": 37585 + }, + { + "epoch": 2.4063760322642596, + "grad_norm": 22.07065200805664, + "learning_rate": 3.0941408708631282e-06, + "loss": 0.2249, + "num_input_tokens_seen": 118303616, + "step": 37590 + }, + { + "epoch": 2.4066961142052365, + "grad_norm": 35.21490478515625, + "learning_rate": 3.0935982094667754e-06, + "loss": 0.1757, + "num_input_tokens_seen": 118319936, + "step": 37595 + }, + { + "epoch": 2.4070161961462135, + "grad_norm": 59.862735748291016, + "learning_rate": 3.09305551842922e-06, + "loss": 0.2439, + "num_input_tokens_seen": 118334848, + "step": 37600 + }, + { + "epoch": 2.4073362780871905, + "grad_norm": 7.077723026275635, + "learning_rate": 3.0925127977775608e-06, + "loss": 0.2724, + "num_input_tokens_seen": 118350592, + "step": 37605 + }, + { + "epoch": 2.407656360028167, + "grad_norm": 8.112854957580566, + "learning_rate": 3.0919700475388985e-06, + "loss": 0.2555, + "num_input_tokens_seen": 118367488, + "step": 37610 + }, + { + "epoch": 2.407976441969144, + "grad_norm": 6.839084625244141, + "learning_rate": 3.091427267740336e-06, + "loss": 0.1574, + "num_input_tokens_seen": 118382720, + "step": 37615 + }, + { + "epoch": 2.408296523910121, + "grad_norm": 8.214258193969727, + "learning_rate": 3.0908844584089764e-06, + "loss": 0.2141, + "num_input_tokens_seen": 118399040, + "step": 37620 + }, + { + "epoch": 2.408616605851098, + "grad_norm": 10.406015396118164, + "learning_rate": 3.090341619571925e-06, + "loss": 0.1975, + "num_input_tokens_seen": 118413824, + "step": 37625 + }, + { + "epoch": 2.4089366877920746, + "grad_norm": 11.788811683654785, + "learning_rate": 3.089798751256288e-06, + "loss": 0.2963, + "num_input_tokens_seen": 118430208, + "step": 37630 + }, + { + "epoch": 2.4092567697330516, + "grad_norm": 11.489863395690918, + "learning_rate": 3.089255853489173e-06, + "loss": 0.1646, + "num_input_tokens_seen": 118445248, + "step": 37635 + }, + { + "epoch": 2.4095768516740286, + "grad_norm": 25.461694717407227, + "learning_rate": 3.08871292629769e-06, + "loss": 0.1595, + "num_input_tokens_seen": 118464896, + "step": 37640 + }, + { + "epoch": 2.4098969336150056, + "grad_norm": 19.701677322387695, + "learning_rate": 3.0881699697089494e-06, + "loss": 0.1514, + "num_input_tokens_seen": 118480768, + "step": 37645 + }, + { + "epoch": 2.4102170155559826, + "grad_norm": 14.488852500915527, + "learning_rate": 3.087626983750065e-06, + "loss": 0.2708, + "num_input_tokens_seen": 118495360, + "step": 37650 + }, + { + "epoch": 2.410537097496959, + "grad_norm": 17.450563430786133, + "learning_rate": 3.0870839684481502e-06, + "loss": 0.2412, + "num_input_tokens_seen": 118510848, + "step": 37655 + }, + { + "epoch": 2.410857179437936, + "grad_norm": 7.715856552124023, + "learning_rate": 3.0865409238303196e-06, + "loss": 0.2465, + "num_input_tokens_seen": 118526336, + "step": 37660 + }, + { + "epoch": 2.411177261378913, + "grad_norm": 7.664495944976807, + "learning_rate": 3.085997849923691e-06, + "loss": 0.1733, + "num_input_tokens_seen": 118541312, + "step": 37665 + }, + { + "epoch": 2.41149734331989, + "grad_norm": 7.507491588592529, + "learning_rate": 3.085454746755381e-06, + "loss": 0.2339, + "num_input_tokens_seen": 118558912, + "step": 37670 + }, + { + "epoch": 2.4118174252608666, + "grad_norm": 34.525047302246094, + "learning_rate": 3.084911614352511e-06, + "loss": 0.3016, + "num_input_tokens_seen": 118573952, + "step": 37675 + }, + { + "epoch": 2.4121375072018436, + "grad_norm": 15.074491500854492, + "learning_rate": 3.084368452742202e-06, + "loss": 0.1975, + "num_input_tokens_seen": 118589376, + "step": 37680 + }, + { + "epoch": 2.4124575891428206, + "grad_norm": 4.892065048217773, + "learning_rate": 3.0838252619515764e-06, + "loss": 0.1254, + "num_input_tokens_seen": 118604608, + "step": 37685 + }, + { + "epoch": 2.4127776710837976, + "grad_norm": 7.424117088317871, + "learning_rate": 3.0832820420077576e-06, + "loss": 0.2196, + "num_input_tokens_seen": 118619200, + "step": 37690 + }, + { + "epoch": 2.4130977530247746, + "grad_norm": 26.45439338684082, + "learning_rate": 3.0827387929378716e-06, + "loss": 0.1882, + "num_input_tokens_seen": 118635264, + "step": 37695 + }, + { + "epoch": 2.413417834965751, + "grad_norm": 45.94657516479492, + "learning_rate": 3.082195514769046e-06, + "loss": 0.3483, + "num_input_tokens_seen": 118651008, + "step": 37700 + }, + { + "epoch": 2.413737916906728, + "grad_norm": 7.429213523864746, + "learning_rate": 3.0816522075284093e-06, + "loss": 0.2173, + "num_input_tokens_seen": 118667328, + "step": 37705 + }, + { + "epoch": 2.414057998847705, + "grad_norm": 15.757867813110352, + "learning_rate": 3.081108871243091e-06, + "loss": 0.2104, + "num_input_tokens_seen": 118682560, + "step": 37710 + }, + { + "epoch": 2.4143780807886817, + "grad_norm": 11.749175071716309, + "learning_rate": 3.080565505940223e-06, + "loss": 0.1763, + "num_input_tokens_seen": 118699136, + "step": 37715 + }, + { + "epoch": 2.4146981627296586, + "grad_norm": 23.19740867614746, + "learning_rate": 3.080022111646937e-06, + "loss": 0.2768, + "num_input_tokens_seen": 118715712, + "step": 37720 + }, + { + "epoch": 2.4150182446706356, + "grad_norm": 8.410465240478516, + "learning_rate": 3.0794786883903675e-06, + "loss": 0.2486, + "num_input_tokens_seen": 118731072, + "step": 37725 + }, + { + "epoch": 2.4153383266116126, + "grad_norm": 28.19003677368164, + "learning_rate": 3.0789352361976515e-06, + "loss": 0.3058, + "num_input_tokens_seen": 118747008, + "step": 37730 + }, + { + "epoch": 2.4156584085525896, + "grad_norm": 14.808332443237305, + "learning_rate": 3.078391755095925e-06, + "loss": 0.3504, + "num_input_tokens_seen": 118762368, + "step": 37735 + }, + { + "epoch": 2.415978490493566, + "grad_norm": 44.35429382324219, + "learning_rate": 3.0778482451123276e-06, + "loss": 0.1283, + "num_input_tokens_seen": 118778688, + "step": 37740 + }, + { + "epoch": 2.416298572434543, + "grad_norm": 54.9836540222168, + "learning_rate": 3.0773047062739987e-06, + "loss": 0.2192, + "num_input_tokens_seen": 118794496, + "step": 37745 + }, + { + "epoch": 2.41661865437552, + "grad_norm": 15.683159828186035, + "learning_rate": 3.076761138608079e-06, + "loss": 0.3876, + "num_input_tokens_seen": 118809088, + "step": 37750 + }, + { + "epoch": 2.416938736316497, + "grad_norm": 6.513302803039551, + "learning_rate": 3.076217542141713e-06, + "loss": 0.1543, + "num_input_tokens_seen": 118824704, + "step": 37755 + }, + { + "epoch": 2.4172588182574737, + "grad_norm": 34.34773254394531, + "learning_rate": 3.0756739169020434e-06, + "loss": 0.21, + "num_input_tokens_seen": 118840704, + "step": 37760 + }, + { + "epoch": 2.4175789001984507, + "grad_norm": 22.227006912231445, + "learning_rate": 3.0751302629162176e-06, + "loss": 0.3376, + "num_input_tokens_seen": 118857088, + "step": 37765 + }, + { + "epoch": 2.4178989821394277, + "grad_norm": 6.106966972351074, + "learning_rate": 3.0745865802113828e-06, + "loss": 0.206, + "num_input_tokens_seen": 118873344, + "step": 37770 + }, + { + "epoch": 2.4182190640804047, + "grad_norm": 25.87758445739746, + "learning_rate": 3.0740428688146862e-06, + "loss": 0.2648, + "num_input_tokens_seen": 118889600, + "step": 37775 + }, + { + "epoch": 2.4185391460213816, + "grad_norm": 10.106524467468262, + "learning_rate": 3.0734991287532796e-06, + "loss": 0.2679, + "num_input_tokens_seen": 118905408, + "step": 37780 + }, + { + "epoch": 2.418859227962358, + "grad_norm": 10.600911140441895, + "learning_rate": 3.072955360054313e-06, + "loss": 0.233, + "num_input_tokens_seen": 118921408, + "step": 37785 + }, + { + "epoch": 2.419179309903335, + "grad_norm": 4.512929439544678, + "learning_rate": 3.0724115627449403e-06, + "loss": 0.2814, + "num_input_tokens_seen": 118939136, + "step": 37790 + }, + { + "epoch": 2.419499391844312, + "grad_norm": 41.3058967590332, + "learning_rate": 3.0718677368523164e-06, + "loss": 0.2321, + "num_input_tokens_seen": 118955200, + "step": 37795 + }, + { + "epoch": 2.419819473785289, + "grad_norm": 4.4426164627075195, + "learning_rate": 3.0713238824035957e-06, + "loss": 0.196, + "num_input_tokens_seen": 118970624, + "step": 37800 + }, + { + "epoch": 2.4201395557262657, + "grad_norm": 9.538148880004883, + "learning_rate": 3.0707799994259367e-06, + "loss": 0.1438, + "num_input_tokens_seen": 118987328, + "step": 37805 + }, + { + "epoch": 2.4204596376672427, + "grad_norm": 14.102176666259766, + "learning_rate": 3.070236087946497e-06, + "loss": 0.1631, + "num_input_tokens_seen": 119002624, + "step": 37810 + }, + { + "epoch": 2.4207797196082197, + "grad_norm": 20.075054168701172, + "learning_rate": 3.0696921479924376e-06, + "loss": 0.3156, + "num_input_tokens_seen": 119018240, + "step": 37815 + }, + { + "epoch": 2.4210998015491967, + "grad_norm": 10.79030990600586, + "learning_rate": 3.06914817959092e-06, + "loss": 0.1863, + "num_input_tokens_seen": 119034560, + "step": 37820 + }, + { + "epoch": 2.4214198834901737, + "grad_norm": 12.672633171081543, + "learning_rate": 3.0686041827691066e-06, + "loss": 0.2396, + "num_input_tokens_seen": 119051712, + "step": 37825 + }, + { + "epoch": 2.42173996543115, + "grad_norm": 9.5419340133667, + "learning_rate": 3.0680601575541625e-06, + "loss": 0.1676, + "num_input_tokens_seen": 119067968, + "step": 37830 + }, + { + "epoch": 2.422060047372127, + "grad_norm": 12.600614547729492, + "learning_rate": 3.0675161039732526e-06, + "loss": 0.2777, + "num_input_tokens_seen": 119083200, + "step": 37835 + }, + { + "epoch": 2.422380129313104, + "grad_norm": 7.195057392120361, + "learning_rate": 3.0669720220535447e-06, + "loss": 0.1946, + "num_input_tokens_seen": 119097408, + "step": 37840 + }, + { + "epoch": 2.422700211254081, + "grad_norm": 14.248100280761719, + "learning_rate": 3.0664279118222074e-06, + "loss": 0.2237, + "num_input_tokens_seen": 119112448, + "step": 37845 + }, + { + "epoch": 2.4230202931950577, + "grad_norm": 10.251774787902832, + "learning_rate": 3.065883773306411e-06, + "loss": 0.1967, + "num_input_tokens_seen": 119127680, + "step": 37850 + }, + { + "epoch": 2.4233403751360347, + "grad_norm": 5.028314590454102, + "learning_rate": 3.065339606533326e-06, + "loss": 0.1469, + "num_input_tokens_seen": 119143232, + "step": 37855 + }, + { + "epoch": 2.4236604570770117, + "grad_norm": 33.257835388183594, + "learning_rate": 3.064795411530126e-06, + "loss": 0.2169, + "num_input_tokens_seen": 119158400, + "step": 37860 + }, + { + "epoch": 2.4239805390179887, + "grad_norm": 31.159828186035156, + "learning_rate": 3.0642511883239846e-06, + "loss": 0.164, + "num_input_tokens_seen": 119173696, + "step": 37865 + }, + { + "epoch": 2.4243006209589657, + "grad_norm": 82.10261535644531, + "learning_rate": 3.063706936942078e-06, + "loss": 0.3497, + "num_input_tokens_seen": 119189568, + "step": 37870 + }, + { + "epoch": 2.4246207028999422, + "grad_norm": 12.303750038146973, + "learning_rate": 3.063162657411583e-06, + "loss": 0.2508, + "num_input_tokens_seen": 119204992, + "step": 37875 + }, + { + "epoch": 2.4249407848409192, + "grad_norm": 8.986992835998535, + "learning_rate": 3.062618349759679e-06, + "loss": 0.2146, + "num_input_tokens_seen": 119220224, + "step": 37880 + }, + { + "epoch": 2.4252608667818962, + "grad_norm": 10.059836387634277, + "learning_rate": 3.062074014013545e-06, + "loss": 0.2724, + "num_input_tokens_seen": 119234432, + "step": 37885 + }, + { + "epoch": 2.425580948722873, + "grad_norm": 4.443511009216309, + "learning_rate": 3.0615296502003627e-06, + "loss": 0.1117, + "num_input_tokens_seen": 119249088, + "step": 37890 + }, + { + "epoch": 2.4259010306638498, + "grad_norm": 3.5195791721343994, + "learning_rate": 3.060985258347314e-06, + "loss": 0.1979, + "num_input_tokens_seen": 119264064, + "step": 37895 + }, + { + "epoch": 2.4262211126048268, + "grad_norm": 7.867177963256836, + "learning_rate": 3.0604408384815836e-06, + "loss": 0.2217, + "num_input_tokens_seen": 119278592, + "step": 37900 + }, + { + "epoch": 2.4265411945458037, + "grad_norm": 5.1937737464904785, + "learning_rate": 3.0598963906303576e-06, + "loss": 0.2541, + "num_input_tokens_seen": 119293440, + "step": 37905 + }, + { + "epoch": 2.4268612764867807, + "grad_norm": 12.988494873046875, + "learning_rate": 3.0593519148208217e-06, + "loss": 0.2013, + "num_input_tokens_seen": 119308544, + "step": 37910 + }, + { + "epoch": 2.4271813584277577, + "grad_norm": 8.550618171691895, + "learning_rate": 3.058807411080165e-06, + "loss": 0.2045, + "num_input_tokens_seen": 119324928, + "step": 37915 + }, + { + "epoch": 2.4275014403687343, + "grad_norm": 5.735256195068359, + "learning_rate": 3.0582628794355775e-06, + "loss": 0.3166, + "num_input_tokens_seen": 119339712, + "step": 37920 + }, + { + "epoch": 2.4278215223097113, + "grad_norm": 28.290945053100586, + "learning_rate": 3.057718319914248e-06, + "loss": 0.2321, + "num_input_tokens_seen": 119356096, + "step": 37925 + }, + { + "epoch": 2.4281416042506883, + "grad_norm": 38.84010696411133, + "learning_rate": 3.057173732543372e-06, + "loss": 0.1764, + "num_input_tokens_seen": 119373056, + "step": 37930 + }, + { + "epoch": 2.4284616861916652, + "grad_norm": 11.633269309997559, + "learning_rate": 3.056629117350141e-06, + "loss": 0.2173, + "num_input_tokens_seen": 119388672, + "step": 37935 + }, + { + "epoch": 2.428781768132642, + "grad_norm": 16.04018783569336, + "learning_rate": 3.056084474361752e-06, + "loss": 0.3087, + "num_input_tokens_seen": 119404032, + "step": 37940 + }, + { + "epoch": 2.429101850073619, + "grad_norm": 8.033092498779297, + "learning_rate": 3.0555398036054006e-06, + "loss": 0.1567, + "num_input_tokens_seen": 119418368, + "step": 37945 + }, + { + "epoch": 2.4294219320145958, + "grad_norm": 7.654631614685059, + "learning_rate": 3.054995105108286e-06, + "loss": 0.2722, + "num_input_tokens_seen": 119433088, + "step": 37950 + }, + { + "epoch": 2.4297420139555728, + "grad_norm": 7.935496807098389, + "learning_rate": 3.0544503788976054e-06, + "loss": 0.2747, + "num_input_tokens_seen": 119448320, + "step": 37955 + }, + { + "epoch": 2.4300620958965498, + "grad_norm": 23.870647430419922, + "learning_rate": 3.0539056250005617e-06, + "loss": 0.2972, + "num_input_tokens_seen": 119463872, + "step": 37960 + }, + { + "epoch": 2.4303821778375263, + "grad_norm": 6.8671159744262695, + "learning_rate": 3.0533608434443562e-06, + "loss": 0.1852, + "num_input_tokens_seen": 119480064, + "step": 37965 + }, + { + "epoch": 2.4307022597785033, + "grad_norm": 6.044654846191406, + "learning_rate": 3.0528160342561925e-06, + "loss": 0.1684, + "num_input_tokens_seen": 119495168, + "step": 37970 + }, + { + "epoch": 2.4310223417194803, + "grad_norm": 5.857413291931152, + "learning_rate": 3.0522711974632758e-06, + "loss": 0.2119, + "num_input_tokens_seen": 119510720, + "step": 37975 + }, + { + "epoch": 2.431342423660457, + "grad_norm": 20.923704147338867, + "learning_rate": 3.0517263330928116e-06, + "loss": 0.2815, + "num_input_tokens_seen": 119526336, + "step": 37980 + }, + { + "epoch": 2.431662505601434, + "grad_norm": 14.611063957214355, + "learning_rate": 3.0511814411720083e-06, + "loss": 0.2965, + "num_input_tokens_seen": 119541952, + "step": 37985 + }, + { + "epoch": 2.431982587542411, + "grad_norm": 6.521999835968018, + "learning_rate": 3.0506365217280742e-06, + "loss": 0.2018, + "num_input_tokens_seen": 119557248, + "step": 37990 + }, + { + "epoch": 2.432302669483388, + "grad_norm": 10.780217170715332, + "learning_rate": 3.0500915747882213e-06, + "loss": 0.2062, + "num_input_tokens_seen": 119571456, + "step": 37995 + }, + { + "epoch": 2.432622751424365, + "grad_norm": 8.675753593444824, + "learning_rate": 3.0495466003796603e-06, + "loss": 0.1656, + "num_input_tokens_seen": 119587456, + "step": 38000 + }, + { + "epoch": 2.4329428333653413, + "grad_norm": 11.423070907592773, + "learning_rate": 3.0490015985296053e-06, + "loss": 0.354, + "num_input_tokens_seen": 119603264, + "step": 38005 + }, + { + "epoch": 2.4332629153063183, + "grad_norm": 11.440950393676758, + "learning_rate": 3.048456569265269e-06, + "loss": 0.1856, + "num_input_tokens_seen": 119618048, + "step": 38010 + }, + { + "epoch": 2.4335829972472953, + "grad_norm": 30.071104049682617, + "learning_rate": 3.047911512613869e-06, + "loss": 0.3026, + "num_input_tokens_seen": 119634432, + "step": 38015 + }, + { + "epoch": 2.4339030791882723, + "grad_norm": 8.836359977722168, + "learning_rate": 3.0473664286026216e-06, + "loss": 0.2709, + "num_input_tokens_seen": 119649984, + "step": 38020 + }, + { + "epoch": 2.434223161129249, + "grad_norm": 16.531267166137695, + "learning_rate": 3.0468213172587465e-06, + "loss": 0.233, + "num_input_tokens_seen": 119666176, + "step": 38025 + }, + { + "epoch": 2.434543243070226, + "grad_norm": 41.75273895263672, + "learning_rate": 3.0462761786094624e-06, + "loss": 0.2561, + "num_input_tokens_seen": 119680384, + "step": 38030 + }, + { + "epoch": 2.434863325011203, + "grad_norm": 18.97899627685547, + "learning_rate": 3.0457310126819917e-06, + "loss": 0.2369, + "num_input_tokens_seen": 119696000, + "step": 38035 + }, + { + "epoch": 2.43518340695218, + "grad_norm": 41.444026947021484, + "learning_rate": 3.0451858195035564e-06, + "loss": 0.4032, + "num_input_tokens_seen": 119712192, + "step": 38040 + }, + { + "epoch": 2.435503488893157, + "grad_norm": 6.8568115234375, + "learning_rate": 3.044640599101382e-06, + "loss": 0.1391, + "num_input_tokens_seen": 119730368, + "step": 38045 + }, + { + "epoch": 2.4358235708341334, + "grad_norm": 24.65223503112793, + "learning_rate": 3.0440953515026916e-06, + "loss": 0.224, + "num_input_tokens_seen": 119747136, + "step": 38050 + }, + { + "epoch": 2.4361436527751104, + "grad_norm": 11.116029739379883, + "learning_rate": 3.043550076734715e-06, + "loss": 0.2364, + "num_input_tokens_seen": 119763392, + "step": 38055 + }, + { + "epoch": 2.4364637347160873, + "grad_norm": 14.740232467651367, + "learning_rate": 3.043004774824678e-06, + "loss": 0.2292, + "num_input_tokens_seen": 119778944, + "step": 38060 + }, + { + "epoch": 2.4367838166570643, + "grad_norm": 13.086958885192871, + "learning_rate": 3.0424594457998115e-06, + "loss": 0.159, + "num_input_tokens_seen": 119795520, + "step": 38065 + }, + { + "epoch": 2.437103898598041, + "grad_norm": 18.71027183532715, + "learning_rate": 3.0419140896873446e-06, + "loss": 0.1652, + "num_input_tokens_seen": 119810688, + "step": 38070 + }, + { + "epoch": 2.437423980539018, + "grad_norm": 7.93483304977417, + "learning_rate": 3.041368706514512e-06, + "loss": 0.3335, + "num_input_tokens_seen": 119825280, + "step": 38075 + }, + { + "epoch": 2.437744062479995, + "grad_norm": 65.04298400878906, + "learning_rate": 3.040823296308546e-06, + "loss": 0.3407, + "num_input_tokens_seen": 119840384, + "step": 38080 + }, + { + "epoch": 2.438064144420972, + "grad_norm": 9.297974586486816, + "learning_rate": 3.0402778590966814e-06, + "loss": 0.2037, + "num_input_tokens_seen": 119855168, + "step": 38085 + }, + { + "epoch": 2.438384226361949, + "grad_norm": 21.36587142944336, + "learning_rate": 3.039732394906155e-06, + "loss": 0.2166, + "num_input_tokens_seen": 119870464, + "step": 38090 + }, + { + "epoch": 2.4387043083029254, + "grad_norm": 6.5994553565979, + "learning_rate": 3.039186903764203e-06, + "loss": 0.1906, + "num_input_tokens_seen": 119885696, + "step": 38095 + }, + { + "epoch": 2.4390243902439024, + "grad_norm": 7.81169319152832, + "learning_rate": 3.0386413856980666e-06, + "loss": 0.2295, + "num_input_tokens_seen": 119901312, + "step": 38100 + }, + { + "epoch": 2.4393444721848794, + "grad_norm": 3.9842286109924316, + "learning_rate": 3.038095840734985e-06, + "loss": 0.249, + "num_input_tokens_seen": 119916992, + "step": 38105 + }, + { + "epoch": 2.4396645541258564, + "grad_norm": 20.37855339050293, + "learning_rate": 3.037550268902199e-06, + "loss": 0.2908, + "num_input_tokens_seen": 119932928, + "step": 38110 + }, + { + "epoch": 2.439984636066833, + "grad_norm": 38.855167388916016, + "learning_rate": 3.037004670226954e-06, + "loss": 0.2406, + "num_input_tokens_seen": 119948992, + "step": 38115 + }, + { + "epoch": 2.44030471800781, + "grad_norm": 47.90819549560547, + "learning_rate": 3.0364590447364923e-06, + "loss": 0.2393, + "num_input_tokens_seen": 119963712, + "step": 38120 + }, + { + "epoch": 2.440624799948787, + "grad_norm": 5.9892377853393555, + "learning_rate": 3.03591339245806e-06, + "loss": 0.1609, + "num_input_tokens_seen": 119978752, + "step": 38125 + }, + { + "epoch": 2.440944881889764, + "grad_norm": 10.77601432800293, + "learning_rate": 3.0353677134189042e-06, + "loss": 0.2331, + "num_input_tokens_seen": 119994432, + "step": 38130 + }, + { + "epoch": 2.441264963830741, + "grad_norm": 32.52187728881836, + "learning_rate": 3.0348220076462743e-06, + "loss": 0.2935, + "num_input_tokens_seen": 120010816, + "step": 38135 + }, + { + "epoch": 2.4415850457717174, + "grad_norm": 46.77041244506836, + "learning_rate": 3.034276275167419e-06, + "loss": 0.1801, + "num_input_tokens_seen": 120026368, + "step": 38140 + }, + { + "epoch": 2.4419051277126944, + "grad_norm": 11.829523086547852, + "learning_rate": 3.033730516009589e-06, + "loss": 0.1317, + "num_input_tokens_seen": 120041920, + "step": 38145 + }, + { + "epoch": 2.4422252096536714, + "grad_norm": 7.347039222717285, + "learning_rate": 3.0331847302000373e-06, + "loss": 0.2711, + "num_input_tokens_seen": 120057984, + "step": 38150 + }, + { + "epoch": 2.4425452915946484, + "grad_norm": 8.64231014251709, + "learning_rate": 3.0326389177660164e-06, + "loss": 0.2212, + "num_input_tokens_seen": 120075456, + "step": 38155 + }, + { + "epoch": 2.442865373535625, + "grad_norm": 7.127157211303711, + "learning_rate": 3.0320930787347835e-06, + "loss": 0.2365, + "num_input_tokens_seen": 120091328, + "step": 38160 + }, + { + "epoch": 2.443185455476602, + "grad_norm": 20.82406997680664, + "learning_rate": 3.0315472131335934e-06, + "loss": 0.2407, + "num_input_tokens_seen": 120107392, + "step": 38165 + }, + { + "epoch": 2.443505537417579, + "grad_norm": 10.188647270202637, + "learning_rate": 3.031001320989705e-06, + "loss": 0.2258, + "num_input_tokens_seen": 120124096, + "step": 38170 + }, + { + "epoch": 2.443825619358556, + "grad_norm": 5.782182693481445, + "learning_rate": 3.0304554023303757e-06, + "loss": 0.2009, + "num_input_tokens_seen": 120140224, + "step": 38175 + }, + { + "epoch": 2.444145701299533, + "grad_norm": 11.585745811462402, + "learning_rate": 3.0299094571828667e-06, + "loss": 0.2672, + "num_input_tokens_seen": 120157504, + "step": 38180 + }, + { + "epoch": 2.4444657832405094, + "grad_norm": 31.153804779052734, + "learning_rate": 3.0293634855744397e-06, + "loss": 0.2614, + "num_input_tokens_seen": 120173184, + "step": 38185 + }, + { + "epoch": 2.4447858651814864, + "grad_norm": 9.071516036987305, + "learning_rate": 3.028817487532358e-06, + "loss": 0.2464, + "num_input_tokens_seen": 120187776, + "step": 38190 + }, + { + "epoch": 2.4451059471224634, + "grad_norm": 30.61634635925293, + "learning_rate": 3.028271463083885e-06, + "loss": 0.3033, + "num_input_tokens_seen": 120202368, + "step": 38195 + }, + { + "epoch": 2.4454260290634404, + "grad_norm": 4.73928165435791, + "learning_rate": 3.0277254122562873e-06, + "loss": 0.223, + "num_input_tokens_seen": 120218624, + "step": 38200 + }, + { + "epoch": 2.445746111004417, + "grad_norm": 15.228744506835938, + "learning_rate": 3.0271793350768307e-06, + "loss": 0.2247, + "num_input_tokens_seen": 120234432, + "step": 38205 + }, + { + "epoch": 2.446066192945394, + "grad_norm": 33.776641845703125, + "learning_rate": 3.0266332315727843e-06, + "loss": 0.2486, + "num_input_tokens_seen": 120249984, + "step": 38210 + }, + { + "epoch": 2.446386274886371, + "grad_norm": 26.201494216918945, + "learning_rate": 3.0260871017714177e-06, + "loss": 0.1844, + "num_input_tokens_seen": 120264448, + "step": 38215 + }, + { + "epoch": 2.446706356827348, + "grad_norm": 63.80481719970703, + "learning_rate": 3.0255409457000014e-06, + "loss": 0.3329, + "num_input_tokens_seen": 120279872, + "step": 38220 + }, + { + "epoch": 2.447026438768325, + "grad_norm": 9.887198448181152, + "learning_rate": 3.0249947633858074e-06, + "loss": 0.1865, + "num_input_tokens_seen": 120294976, + "step": 38225 + }, + { + "epoch": 2.4473465207093015, + "grad_norm": 11.86949348449707, + "learning_rate": 3.0244485548561097e-06, + "loss": 0.1945, + "num_input_tokens_seen": 120311040, + "step": 38230 + }, + { + "epoch": 2.4476666026502785, + "grad_norm": 6.499368667602539, + "learning_rate": 3.0239023201381836e-06, + "loss": 0.3044, + "num_input_tokens_seen": 120327040, + "step": 38235 + }, + { + "epoch": 2.4479866845912555, + "grad_norm": 34.24015808105469, + "learning_rate": 3.0233560592593036e-06, + "loss": 0.1835, + "num_input_tokens_seen": 120342208, + "step": 38240 + }, + { + "epoch": 2.448306766532232, + "grad_norm": 9.83552360534668, + "learning_rate": 3.0228097722467483e-06, + "loss": 0.2948, + "num_input_tokens_seen": 120357568, + "step": 38245 + }, + { + "epoch": 2.448626848473209, + "grad_norm": 11.458948135375977, + "learning_rate": 3.022263459127796e-06, + "loss": 0.1664, + "num_input_tokens_seen": 120372032, + "step": 38250 + }, + { + "epoch": 2.448946930414186, + "grad_norm": 9.118972778320312, + "learning_rate": 3.0217171199297273e-06, + "loss": 0.1201, + "num_input_tokens_seen": 120387520, + "step": 38255 + }, + { + "epoch": 2.449267012355163, + "grad_norm": 37.859588623046875, + "learning_rate": 3.0211707546798235e-06, + "loss": 0.2732, + "num_input_tokens_seen": 120402176, + "step": 38260 + }, + { + "epoch": 2.44958709429614, + "grad_norm": 14.687504768371582, + "learning_rate": 3.0206243634053666e-06, + "loss": 0.1979, + "num_input_tokens_seen": 120418176, + "step": 38265 + }, + { + "epoch": 2.4499071762371165, + "grad_norm": 74.33649444580078, + "learning_rate": 3.0200779461336397e-06, + "loss": 0.2882, + "num_input_tokens_seen": 120435328, + "step": 38270 + }, + { + "epoch": 2.4502272581780935, + "grad_norm": 7.761834621429443, + "learning_rate": 3.019531502891931e-06, + "loss": 0.3605, + "num_input_tokens_seen": 120450752, + "step": 38275 + }, + { + "epoch": 2.4505473401190705, + "grad_norm": 7.182443618774414, + "learning_rate": 3.0189850337075245e-06, + "loss": 0.1623, + "num_input_tokens_seen": 120467072, + "step": 38280 + }, + { + "epoch": 2.4508674220600475, + "grad_norm": 9.085068702697754, + "learning_rate": 3.0184385386077076e-06, + "loss": 0.2415, + "num_input_tokens_seen": 120482496, + "step": 38285 + }, + { + "epoch": 2.451187504001024, + "grad_norm": 25.019882202148438, + "learning_rate": 3.017892017619772e-06, + "loss": 0.2557, + "num_input_tokens_seen": 120498368, + "step": 38290 + }, + { + "epoch": 2.451507585942001, + "grad_norm": 58.47596740722656, + "learning_rate": 3.017345470771006e-06, + "loss": 0.2319, + "num_input_tokens_seen": 120514432, + "step": 38295 + }, + { + "epoch": 2.451827667882978, + "grad_norm": 8.369961738586426, + "learning_rate": 3.016798898088702e-06, + "loss": 0.225, + "num_input_tokens_seen": 120528960, + "step": 38300 + }, + { + "epoch": 2.452147749823955, + "grad_norm": 9.961597442626953, + "learning_rate": 3.016252299600153e-06, + "loss": 0.2271, + "num_input_tokens_seen": 120544896, + "step": 38305 + }, + { + "epoch": 2.452467831764932, + "grad_norm": 19.625900268554688, + "learning_rate": 3.015705675332653e-06, + "loss": 0.3316, + "num_input_tokens_seen": 120559936, + "step": 38310 + }, + { + "epoch": 2.4527879137059085, + "grad_norm": 13.482919692993164, + "learning_rate": 3.015159025313498e-06, + "loss": 0.2252, + "num_input_tokens_seen": 120574528, + "step": 38315 + }, + { + "epoch": 2.4531079956468855, + "grad_norm": 6.598126411437988, + "learning_rate": 3.014612349569985e-06, + "loss": 0.2107, + "num_input_tokens_seen": 120589056, + "step": 38320 + }, + { + "epoch": 2.4534280775878625, + "grad_norm": 11.177162170410156, + "learning_rate": 3.0140656481294114e-06, + "loss": 0.157, + "num_input_tokens_seen": 120605760, + "step": 38325 + }, + { + "epoch": 2.4537481595288395, + "grad_norm": 6.08179235458374, + "learning_rate": 3.013518921019077e-06, + "loss": 0.2334, + "num_input_tokens_seen": 120621184, + "step": 38330 + }, + { + "epoch": 2.454068241469816, + "grad_norm": 39.19736862182617, + "learning_rate": 3.0129721682662826e-06, + "loss": 0.1959, + "num_input_tokens_seen": 120636992, + "step": 38335 + }, + { + "epoch": 2.454388323410793, + "grad_norm": 4.213704586029053, + "learning_rate": 3.0124253898983295e-06, + "loss": 0.1609, + "num_input_tokens_seen": 120652224, + "step": 38340 + }, + { + "epoch": 2.45470840535177, + "grad_norm": 19.693653106689453, + "learning_rate": 3.011878585942522e-06, + "loss": 0.2677, + "num_input_tokens_seen": 120667328, + "step": 38345 + }, + { + "epoch": 2.455028487292747, + "grad_norm": 12.160491943359375, + "learning_rate": 3.0113317564261646e-06, + "loss": 0.1615, + "num_input_tokens_seen": 120682112, + "step": 38350 + }, + { + "epoch": 2.455348569233724, + "grad_norm": 18.329708099365234, + "learning_rate": 3.0107849013765623e-06, + "loss": 0.2636, + "num_input_tokens_seen": 120697728, + "step": 38355 + }, + { + "epoch": 2.4556686511747006, + "grad_norm": 5.4388275146484375, + "learning_rate": 3.0102380208210225e-06, + "loss": 0.245, + "num_input_tokens_seen": 120712576, + "step": 38360 + }, + { + "epoch": 2.4559887331156776, + "grad_norm": 61.563262939453125, + "learning_rate": 3.009691114786854e-06, + "loss": 0.1978, + "num_input_tokens_seen": 120728384, + "step": 38365 + }, + { + "epoch": 2.4563088150566545, + "grad_norm": 5.466808795928955, + "learning_rate": 3.0091441833013663e-06, + "loss": 0.1854, + "num_input_tokens_seen": 120746880, + "step": 38370 + }, + { + "epoch": 2.4566288969976315, + "grad_norm": 5.838440418243408, + "learning_rate": 3.0085972263918694e-06, + "loss": 0.2427, + "num_input_tokens_seen": 120763072, + "step": 38375 + }, + { + "epoch": 2.456948978938608, + "grad_norm": 5.903468132019043, + "learning_rate": 3.008050244085677e-06, + "loss": 0.256, + "num_input_tokens_seen": 120778368, + "step": 38380 + }, + { + "epoch": 2.457269060879585, + "grad_norm": 30.825809478759766, + "learning_rate": 3.007503236410101e-06, + "loss": 0.2559, + "num_input_tokens_seen": 120794048, + "step": 38385 + }, + { + "epoch": 2.457589142820562, + "grad_norm": 46.4233512878418, + "learning_rate": 3.0069562033924573e-06, + "loss": 0.2848, + "num_input_tokens_seen": 120809088, + "step": 38390 + }, + { + "epoch": 2.457909224761539, + "grad_norm": 18.490550994873047, + "learning_rate": 3.006409145060061e-06, + "loss": 0.2871, + "num_input_tokens_seen": 120826048, + "step": 38395 + }, + { + "epoch": 2.458229306702516, + "grad_norm": 28.214557647705078, + "learning_rate": 3.00586206144023e-06, + "loss": 0.1771, + "num_input_tokens_seen": 120841344, + "step": 38400 + }, + { + "epoch": 2.4585493886434926, + "grad_norm": 8.19700813293457, + "learning_rate": 3.0053149525602834e-06, + "loss": 0.2119, + "num_input_tokens_seen": 120857856, + "step": 38405 + }, + { + "epoch": 2.4588694705844696, + "grad_norm": 21.806655883789062, + "learning_rate": 3.004767818447539e-06, + "loss": 0.2947, + "num_input_tokens_seen": 120873408, + "step": 38410 + }, + { + "epoch": 2.4591895525254466, + "grad_norm": 10.118896484375, + "learning_rate": 3.0042206591293195e-06, + "loss": 0.1922, + "num_input_tokens_seen": 120888512, + "step": 38415 + }, + { + "epoch": 2.4595096344664236, + "grad_norm": 11.417610168457031, + "learning_rate": 3.0036734746329465e-06, + "loss": 0.2306, + "num_input_tokens_seen": 120904064, + "step": 38420 + }, + { + "epoch": 2.4598297164074, + "grad_norm": 2.0597176551818848, + "learning_rate": 3.003126264985744e-06, + "loss": 0.1751, + "num_input_tokens_seen": 120919296, + "step": 38425 + }, + { + "epoch": 2.460149798348377, + "grad_norm": 24.51936912536621, + "learning_rate": 3.0025790302150365e-06, + "loss": 0.2463, + "num_input_tokens_seen": 120935296, + "step": 38430 + }, + { + "epoch": 2.460469880289354, + "grad_norm": 46.07404708862305, + "learning_rate": 3.00203177034815e-06, + "loss": 0.338, + "num_input_tokens_seen": 120952000, + "step": 38435 + }, + { + "epoch": 2.460789962230331, + "grad_norm": 11.970405578613281, + "learning_rate": 3.0014844854124114e-06, + "loss": 0.231, + "num_input_tokens_seen": 120967104, + "step": 38440 + }, + { + "epoch": 2.461110044171308, + "grad_norm": 55.70515823364258, + "learning_rate": 3.0009371754351503e-06, + "loss": 0.3679, + "num_input_tokens_seen": 120982848, + "step": 38445 + }, + { + "epoch": 2.4614301261122846, + "grad_norm": 7.090089321136475, + "learning_rate": 3.000389840443696e-06, + "loss": 0.1984, + "num_input_tokens_seen": 120999424, + "step": 38450 + }, + { + "epoch": 2.4617502080532616, + "grad_norm": 26.284786224365234, + "learning_rate": 2.9998424804653794e-06, + "loss": 0.239, + "num_input_tokens_seen": 121015360, + "step": 38455 + }, + { + "epoch": 2.4620702899942386, + "grad_norm": 5.748332977294922, + "learning_rate": 2.9992950955275325e-06, + "loss": 0.2238, + "num_input_tokens_seen": 121032320, + "step": 38460 + }, + { + "epoch": 2.4623903719352156, + "grad_norm": 8.453232765197754, + "learning_rate": 2.9987476856574897e-06, + "loss": 0.1826, + "num_input_tokens_seen": 121049856, + "step": 38465 + }, + { + "epoch": 2.462710453876192, + "grad_norm": 12.571626663208008, + "learning_rate": 2.9982002508825853e-06, + "loss": 0.1803, + "num_input_tokens_seen": 121065856, + "step": 38470 + }, + { + "epoch": 2.463030535817169, + "grad_norm": 24.189605712890625, + "learning_rate": 2.997652791230155e-06, + "loss": 0.1846, + "num_input_tokens_seen": 121081664, + "step": 38475 + }, + { + "epoch": 2.463350617758146, + "grad_norm": 14.243130683898926, + "learning_rate": 2.997105306727537e-06, + "loss": 0.3221, + "num_input_tokens_seen": 121097856, + "step": 38480 + }, + { + "epoch": 2.463670699699123, + "grad_norm": 18.728816986083984, + "learning_rate": 2.996557797402069e-06, + "loss": 0.1719, + "num_input_tokens_seen": 121113536, + "step": 38485 + }, + { + "epoch": 2.4639907816401, + "grad_norm": 11.049641609191895, + "learning_rate": 2.9960102632810915e-06, + "loss": 0.2739, + "num_input_tokens_seen": 121129152, + "step": 38490 + }, + { + "epoch": 2.4643108635810766, + "grad_norm": 7.169228553771973, + "learning_rate": 2.9954627043919448e-06, + "loss": 0.182, + "num_input_tokens_seen": 121144704, + "step": 38495 + }, + { + "epoch": 2.4646309455220536, + "grad_norm": 27.034910202026367, + "learning_rate": 2.994915120761971e-06, + "loss": 0.2309, + "num_input_tokens_seen": 121160704, + "step": 38500 + }, + { + "epoch": 2.4649510274630306, + "grad_norm": 36.002140045166016, + "learning_rate": 2.9943675124185147e-06, + "loss": 0.2265, + "num_input_tokens_seen": 121175808, + "step": 38505 + }, + { + "epoch": 2.465271109404007, + "grad_norm": 14.632423400878906, + "learning_rate": 2.9938198793889197e-06, + "loss": 0.2576, + "num_input_tokens_seen": 121191616, + "step": 38510 + }, + { + "epoch": 2.465591191344984, + "grad_norm": 6.937241554260254, + "learning_rate": 2.9932722217005315e-06, + "loss": 0.2165, + "num_input_tokens_seen": 121207168, + "step": 38515 + }, + { + "epoch": 2.465911273285961, + "grad_norm": 9.375100135803223, + "learning_rate": 2.9927245393806985e-06, + "loss": 0.1199, + "num_input_tokens_seen": 121222656, + "step": 38520 + }, + { + "epoch": 2.466231355226938, + "grad_norm": 14.680469512939453, + "learning_rate": 2.9921768324567686e-06, + "loss": 0.2052, + "num_input_tokens_seen": 121240832, + "step": 38525 + }, + { + "epoch": 2.466551437167915, + "grad_norm": 13.17792797088623, + "learning_rate": 2.991629100956091e-06, + "loss": 0.3025, + "num_input_tokens_seen": 121256320, + "step": 38530 + }, + { + "epoch": 2.4668715191088917, + "grad_norm": 9.459092140197754, + "learning_rate": 2.991081344906017e-06, + "loss": 0.1865, + "num_input_tokens_seen": 121271616, + "step": 38535 + }, + { + "epoch": 2.4671916010498687, + "grad_norm": 8.75217056274414, + "learning_rate": 2.990533564333899e-06, + "loss": 0.2554, + "num_input_tokens_seen": 121286336, + "step": 38540 + }, + { + "epoch": 2.4675116829908457, + "grad_norm": 2.486612319946289, + "learning_rate": 2.9899857592670893e-06, + "loss": 0.1651, + "num_input_tokens_seen": 121301952, + "step": 38545 + }, + { + "epoch": 2.4678317649318227, + "grad_norm": 32.26498794555664, + "learning_rate": 2.9894379297329435e-06, + "loss": 0.3387, + "num_input_tokens_seen": 121318080, + "step": 38550 + }, + { + "epoch": 2.468151846872799, + "grad_norm": 8.672422409057617, + "learning_rate": 2.988890075758816e-06, + "loss": 0.1882, + "num_input_tokens_seen": 121334080, + "step": 38555 + }, + { + "epoch": 2.468471928813776, + "grad_norm": 9.896806716918945, + "learning_rate": 2.988342197372066e-06, + "loss": 0.242, + "num_input_tokens_seen": 121349248, + "step": 38560 + }, + { + "epoch": 2.468792010754753, + "grad_norm": 37.86430740356445, + "learning_rate": 2.9877942946000494e-06, + "loss": 0.2495, + "num_input_tokens_seen": 121363840, + "step": 38565 + }, + { + "epoch": 2.46911209269573, + "grad_norm": 12.676549911499023, + "learning_rate": 2.9872463674701267e-06, + "loss": 0.2575, + "num_input_tokens_seen": 121379136, + "step": 38570 + }, + { + "epoch": 2.469432174636707, + "grad_norm": 10.217848777770996, + "learning_rate": 2.9866984160096586e-06, + "loss": 0.3895, + "num_input_tokens_seen": 121397376, + "step": 38575 + }, + { + "epoch": 2.4697522565776837, + "grad_norm": 12.812984466552734, + "learning_rate": 2.986150440246007e-06, + "loss": 0.1979, + "num_input_tokens_seen": 121413632, + "step": 38580 + }, + { + "epoch": 2.4700723385186607, + "grad_norm": 7.4920830726623535, + "learning_rate": 2.9856024402065344e-06, + "loss": 0.1206, + "num_input_tokens_seen": 121428544, + "step": 38585 + }, + { + "epoch": 2.4703924204596377, + "grad_norm": 4.229184627532959, + "learning_rate": 2.9850544159186046e-06, + "loss": 0.2129, + "num_input_tokens_seen": 121442752, + "step": 38590 + }, + { + "epoch": 2.4707125024006147, + "grad_norm": 9.815690994262695, + "learning_rate": 2.9845063674095844e-06, + "loss": 0.2264, + "num_input_tokens_seen": 121458368, + "step": 38595 + }, + { + "epoch": 2.4710325843415912, + "grad_norm": 5.956965446472168, + "learning_rate": 2.9839582947068404e-06, + "loss": 0.1578, + "num_input_tokens_seen": 121474432, + "step": 38600 + }, + { + "epoch": 2.4713526662825682, + "grad_norm": 17.11016082763672, + "learning_rate": 2.9834101978377394e-06, + "loss": 0.3306, + "num_input_tokens_seen": 121490240, + "step": 38605 + }, + { + "epoch": 2.471672748223545, + "grad_norm": 8.707038879394531, + "learning_rate": 2.9828620768296518e-06, + "loss": 0.1765, + "num_input_tokens_seen": 121506496, + "step": 38610 + }, + { + "epoch": 2.471992830164522, + "grad_norm": 6.862905025482178, + "learning_rate": 2.982313931709946e-06, + "loss": 0.1379, + "num_input_tokens_seen": 121523392, + "step": 38615 + }, + { + "epoch": 2.472312912105499, + "grad_norm": 13.159392356872559, + "learning_rate": 2.981765762505996e-06, + "loss": 0.2563, + "num_input_tokens_seen": 121538624, + "step": 38620 + }, + { + "epoch": 2.4726329940464757, + "grad_norm": 7.173088550567627, + "learning_rate": 2.981217569245173e-06, + "loss": 0.2458, + "num_input_tokens_seen": 121553728, + "step": 38625 + }, + { + "epoch": 2.4729530759874527, + "grad_norm": 19.971837997436523, + "learning_rate": 2.980669351954851e-06, + "loss": 0.1847, + "num_input_tokens_seen": 121569472, + "step": 38630 + }, + { + "epoch": 2.4732731579284297, + "grad_norm": 6.527810096740723, + "learning_rate": 2.980121110662404e-06, + "loss": 0.1923, + "num_input_tokens_seen": 121586048, + "step": 38635 + }, + { + "epoch": 2.4735932398694067, + "grad_norm": 12.171278953552246, + "learning_rate": 2.979572845395211e-06, + "loss": 0.3104, + "num_input_tokens_seen": 121600576, + "step": 38640 + }, + { + "epoch": 2.4739133218103833, + "grad_norm": 4.996275424957275, + "learning_rate": 2.9790245561806475e-06, + "loss": 0.1758, + "num_input_tokens_seen": 121616000, + "step": 38645 + }, + { + "epoch": 2.4742334037513602, + "grad_norm": 7.181641578674316, + "learning_rate": 2.9784762430460933e-06, + "loss": 0.3138, + "num_input_tokens_seen": 121631936, + "step": 38650 + }, + { + "epoch": 2.4745534856923372, + "grad_norm": 6.754929065704346, + "learning_rate": 2.977927906018928e-06, + "loss": 0.2557, + "num_input_tokens_seen": 121647808, + "step": 38655 + }, + { + "epoch": 2.4748735676333142, + "grad_norm": 7.137955188751221, + "learning_rate": 2.9773795451265318e-06, + "loss": 0.1177, + "num_input_tokens_seen": 121663104, + "step": 38660 + }, + { + "epoch": 2.475193649574291, + "grad_norm": 20.647146224975586, + "learning_rate": 2.9768311603962886e-06, + "loss": 0.2509, + "num_input_tokens_seen": 121679232, + "step": 38665 + }, + { + "epoch": 2.4755137315152678, + "grad_norm": 12.125418663024902, + "learning_rate": 2.97628275185558e-06, + "loss": 0.2307, + "num_input_tokens_seen": 121694208, + "step": 38670 + }, + { + "epoch": 2.4758338134562448, + "grad_norm": 38.82849884033203, + "learning_rate": 2.975734319531792e-06, + "loss": 0.3046, + "num_input_tokens_seen": 121710272, + "step": 38675 + }, + { + "epoch": 2.4761538953972217, + "grad_norm": 4.881045341491699, + "learning_rate": 2.9751858634523105e-06, + "loss": 0.1934, + "num_input_tokens_seen": 121726144, + "step": 38680 + }, + { + "epoch": 2.4764739773381987, + "grad_norm": 10.119040489196777, + "learning_rate": 2.974637383644522e-06, + "loss": 0.2431, + "num_input_tokens_seen": 121741888, + "step": 38685 + }, + { + "epoch": 2.4767940592791753, + "grad_norm": 4.431619167327881, + "learning_rate": 2.974088880135814e-06, + "loss": 0.221, + "num_input_tokens_seen": 121757312, + "step": 38690 + }, + { + "epoch": 2.4771141412201523, + "grad_norm": 7.884054183959961, + "learning_rate": 2.9735403529535777e-06, + "loss": 0.1129, + "num_input_tokens_seen": 121772224, + "step": 38695 + }, + { + "epoch": 2.4774342231611293, + "grad_norm": 21.15617561340332, + "learning_rate": 2.9729918021252023e-06, + "loss": 0.2844, + "num_input_tokens_seen": 121788672, + "step": 38700 + }, + { + "epoch": 2.4777543051021063, + "grad_norm": 9.524483680725098, + "learning_rate": 2.9724432276780803e-06, + "loss": 0.239, + "num_input_tokens_seen": 121804928, + "step": 38705 + }, + { + "epoch": 2.4780743870430832, + "grad_norm": 29.684642791748047, + "learning_rate": 2.9718946296396045e-06, + "loss": 0.3126, + "num_input_tokens_seen": 121819840, + "step": 38710 + }, + { + "epoch": 2.47839446898406, + "grad_norm": 41.154396057128906, + "learning_rate": 2.9713460080371685e-06, + "loss": 0.3743, + "num_input_tokens_seen": 121834624, + "step": 38715 + }, + { + "epoch": 2.478714550925037, + "grad_norm": 10.18474006652832, + "learning_rate": 2.9707973628981683e-06, + "loss": 0.2391, + "num_input_tokens_seen": 121850176, + "step": 38720 + }, + { + "epoch": 2.4790346328660138, + "grad_norm": 10.561362266540527, + "learning_rate": 2.9702486942500003e-06, + "loss": 0.2137, + "num_input_tokens_seen": 121865920, + "step": 38725 + }, + { + "epoch": 2.4793547148069908, + "grad_norm": 5.466406345367432, + "learning_rate": 2.9697000021200613e-06, + "loss": 0.218, + "num_input_tokens_seen": 121882112, + "step": 38730 + }, + { + "epoch": 2.4796747967479673, + "grad_norm": 90.27532196044922, + "learning_rate": 2.9691512865357513e-06, + "loss": 0.227, + "num_input_tokens_seen": 121897920, + "step": 38735 + }, + { + "epoch": 2.4799948786889443, + "grad_norm": 11.579312324523926, + "learning_rate": 2.9686025475244695e-06, + "loss": 0.2508, + "num_input_tokens_seen": 121913472, + "step": 38740 + }, + { + "epoch": 2.4803149606299213, + "grad_norm": 68.57108306884766, + "learning_rate": 2.9680537851136176e-06, + "loss": 0.2228, + "num_input_tokens_seen": 121929920, + "step": 38745 + }, + { + "epoch": 2.4806350425708983, + "grad_norm": 19.923097610473633, + "learning_rate": 2.967504999330597e-06, + "loss": 0.1358, + "num_input_tokens_seen": 121945472, + "step": 38750 + }, + { + "epoch": 2.4809551245118753, + "grad_norm": 14.584402084350586, + "learning_rate": 2.9669561902028125e-06, + "loss": 0.2594, + "num_input_tokens_seen": 121960512, + "step": 38755 + }, + { + "epoch": 2.481275206452852, + "grad_norm": 6.130236625671387, + "learning_rate": 2.966407357757668e-06, + "loss": 0.1662, + "num_input_tokens_seen": 121978304, + "step": 38760 + }, + { + "epoch": 2.481595288393829, + "grad_norm": 10.323691368103027, + "learning_rate": 2.9658585020225695e-06, + "loss": 0.302, + "num_input_tokens_seen": 121994880, + "step": 38765 + }, + { + "epoch": 2.481915370334806, + "grad_norm": 7.153809070587158, + "learning_rate": 2.965309623024924e-06, + "loss": 0.1458, + "num_input_tokens_seen": 122009600, + "step": 38770 + }, + { + "epoch": 2.482235452275783, + "grad_norm": 22.009042739868164, + "learning_rate": 2.96476072079214e-06, + "loss": 0.2354, + "num_input_tokens_seen": 122026304, + "step": 38775 + }, + { + "epoch": 2.4825555342167593, + "grad_norm": 5.0591607093811035, + "learning_rate": 2.964211795351626e-06, + "loss": 0.2299, + "num_input_tokens_seen": 122041664, + "step": 38780 + }, + { + "epoch": 2.4828756161577363, + "grad_norm": 17.633808135986328, + "learning_rate": 2.9636628467307927e-06, + "loss": 0.2043, + "num_input_tokens_seen": 122056896, + "step": 38785 + }, + { + "epoch": 2.4831956980987133, + "grad_norm": 12.632428169250488, + "learning_rate": 2.963113874957052e-06, + "loss": 0.2083, + "num_input_tokens_seen": 122071488, + "step": 38790 + }, + { + "epoch": 2.4835157800396903, + "grad_norm": 6.007513523101807, + "learning_rate": 2.962564880057817e-06, + "loss": 0.2005, + "num_input_tokens_seen": 122086400, + "step": 38795 + }, + { + "epoch": 2.483835861980667, + "grad_norm": 32.8830451965332, + "learning_rate": 2.9620158620605018e-06, + "loss": 0.1575, + "num_input_tokens_seen": 122102784, + "step": 38800 + }, + { + "epoch": 2.484155943921644, + "grad_norm": 35.66856384277344, + "learning_rate": 2.96146682099252e-06, + "loss": 0.2453, + "num_input_tokens_seen": 122118848, + "step": 38805 + }, + { + "epoch": 2.484476025862621, + "grad_norm": 7.539981365203857, + "learning_rate": 2.960917756881289e-06, + "loss": 0.1629, + "num_input_tokens_seen": 122134080, + "step": 38810 + }, + { + "epoch": 2.484796107803598, + "grad_norm": 71.62683868408203, + "learning_rate": 2.960368669754226e-06, + "loss": 0.4773, + "num_input_tokens_seen": 122148992, + "step": 38815 + }, + { + "epoch": 2.4851161897445744, + "grad_norm": 8.393058776855469, + "learning_rate": 2.9598195596387504e-06, + "loss": 0.1538, + "num_input_tokens_seen": 122164608, + "step": 38820 + }, + { + "epoch": 2.4854362716855514, + "grad_norm": 32.54914855957031, + "learning_rate": 2.9592704265622807e-06, + "loss": 0.232, + "num_input_tokens_seen": 122180608, + "step": 38825 + }, + { + "epoch": 2.4857563536265284, + "grad_norm": 15.908554077148438, + "learning_rate": 2.9587212705522384e-06, + "loss": 0.2174, + "num_input_tokens_seen": 122195712, + "step": 38830 + }, + { + "epoch": 2.4860764355675053, + "grad_norm": 3.7470293045043945, + "learning_rate": 2.9581720916360447e-06, + "loss": 0.3016, + "num_input_tokens_seen": 122211904, + "step": 38835 + }, + { + "epoch": 2.4863965175084823, + "grad_norm": 10.036834716796875, + "learning_rate": 2.9576228898411234e-06, + "loss": 0.2784, + "num_input_tokens_seen": 122226688, + "step": 38840 + }, + { + "epoch": 2.486716599449459, + "grad_norm": 7.82347297668457, + "learning_rate": 2.957073665194898e-06, + "loss": 0.1753, + "num_input_tokens_seen": 122241408, + "step": 38845 + }, + { + "epoch": 2.487036681390436, + "grad_norm": 32.01795959472656, + "learning_rate": 2.956524417724796e-06, + "loss": 0.194, + "num_input_tokens_seen": 122257216, + "step": 38850 + }, + { + "epoch": 2.487356763331413, + "grad_norm": 11.690752983093262, + "learning_rate": 2.955975147458242e-06, + "loss": 0.1745, + "num_input_tokens_seen": 122272512, + "step": 38855 + }, + { + "epoch": 2.48767684527239, + "grad_norm": 21.427616119384766, + "learning_rate": 2.9554258544226644e-06, + "loss": 0.3614, + "num_input_tokens_seen": 122287360, + "step": 38860 + }, + { + "epoch": 2.4879969272133664, + "grad_norm": 14.624544143676758, + "learning_rate": 2.954876538645491e-06, + "loss": 0.2425, + "num_input_tokens_seen": 122303488, + "step": 38865 + }, + { + "epoch": 2.4883170091543434, + "grad_norm": 6.32078218460083, + "learning_rate": 2.9543272001541535e-06, + "loss": 0.2122, + "num_input_tokens_seen": 122318976, + "step": 38870 + }, + { + "epoch": 2.4886370910953204, + "grad_norm": 17.23505401611328, + "learning_rate": 2.9537778389760812e-06, + "loss": 0.2268, + "num_input_tokens_seen": 122334272, + "step": 38875 + }, + { + "epoch": 2.4889571730362974, + "grad_norm": 8.260202407836914, + "learning_rate": 2.953228455138708e-06, + "loss": 0.1778, + "num_input_tokens_seen": 122350016, + "step": 38880 + }, + { + "epoch": 2.4892772549772744, + "grad_norm": 17.259309768676758, + "learning_rate": 2.952679048669467e-06, + "loss": 0.1932, + "num_input_tokens_seen": 122366336, + "step": 38885 + }, + { + "epoch": 2.489597336918251, + "grad_norm": 9.557049751281738, + "learning_rate": 2.952129619595791e-06, + "loss": 0.2486, + "num_input_tokens_seen": 122382016, + "step": 38890 + }, + { + "epoch": 2.489917418859228, + "grad_norm": 28.75272560119629, + "learning_rate": 2.9515801679451177e-06, + "loss": 0.2206, + "num_input_tokens_seen": 122397568, + "step": 38895 + }, + { + "epoch": 2.490237500800205, + "grad_norm": 12.056356430053711, + "learning_rate": 2.9510306937448825e-06, + "loss": 0.2299, + "num_input_tokens_seen": 122413568, + "step": 38900 + }, + { + "epoch": 2.490557582741182, + "grad_norm": 9.148770332336426, + "learning_rate": 2.9504811970225246e-06, + "loss": 0.1943, + "num_input_tokens_seen": 122429184, + "step": 38905 + }, + { + "epoch": 2.4908776646821584, + "grad_norm": 9.814897537231445, + "learning_rate": 2.9499316778054817e-06, + "loss": 0.2332, + "num_input_tokens_seen": 122444672, + "step": 38910 + }, + { + "epoch": 2.4911977466231354, + "grad_norm": 6.779013633728027, + "learning_rate": 2.9493821361211944e-06, + "loss": 0.2755, + "num_input_tokens_seen": 122459648, + "step": 38915 + }, + { + "epoch": 2.4915178285641124, + "grad_norm": 7.012596607208252, + "learning_rate": 2.948832571997104e-06, + "loss": 0.1965, + "num_input_tokens_seen": 122474688, + "step": 38920 + }, + { + "epoch": 2.4918379105050894, + "grad_norm": 6.483165264129639, + "learning_rate": 2.948282985460652e-06, + "loss": 0.2573, + "num_input_tokens_seen": 122490752, + "step": 38925 + }, + { + "epoch": 2.4921579924460664, + "grad_norm": 3.8059725761413574, + "learning_rate": 2.9477333765392835e-06, + "loss": 0.207, + "num_input_tokens_seen": 122505984, + "step": 38930 + }, + { + "epoch": 2.492478074387043, + "grad_norm": 12.535743713378906, + "learning_rate": 2.9471837452604424e-06, + "loss": 0.2417, + "num_input_tokens_seen": 122521024, + "step": 38935 + }, + { + "epoch": 2.49279815632802, + "grad_norm": 8.625566482543945, + "learning_rate": 2.9466340916515746e-06, + "loss": 0.2529, + "num_input_tokens_seen": 122536448, + "step": 38940 + }, + { + "epoch": 2.493118238268997, + "grad_norm": 9.557392120361328, + "learning_rate": 2.9460844157401263e-06, + "loss": 0.2426, + "num_input_tokens_seen": 122551872, + "step": 38945 + }, + { + "epoch": 2.493438320209974, + "grad_norm": 9.749059677124023, + "learning_rate": 2.945534717553546e-06, + "loss": 0.1973, + "num_input_tokens_seen": 122568192, + "step": 38950 + }, + { + "epoch": 2.4937584021509505, + "grad_norm": 7.827054500579834, + "learning_rate": 2.944984997119283e-06, + "loss": 0.2229, + "num_input_tokens_seen": 122585344, + "step": 38955 + }, + { + "epoch": 2.4940784840919275, + "grad_norm": 34.863277435302734, + "learning_rate": 2.944435254464786e-06, + "loss": 0.2131, + "num_input_tokens_seen": 122600640, + "step": 38960 + }, + { + "epoch": 2.4943985660329044, + "grad_norm": 14.673707962036133, + "learning_rate": 2.943885489617508e-06, + "loss": 0.1986, + "num_input_tokens_seen": 122616064, + "step": 38965 + }, + { + "epoch": 2.4947186479738814, + "grad_norm": 12.92646312713623, + "learning_rate": 2.943335702604901e-06, + "loss": 0.306, + "num_input_tokens_seen": 122631232, + "step": 38970 + }, + { + "epoch": 2.4950387299148584, + "grad_norm": 33.25495529174805, + "learning_rate": 2.9427858934544183e-06, + "loss": 0.3408, + "num_input_tokens_seen": 122645824, + "step": 38975 + }, + { + "epoch": 2.495358811855835, + "grad_norm": 13.375948905944824, + "learning_rate": 2.942236062193514e-06, + "loss": 0.2662, + "num_input_tokens_seen": 122661184, + "step": 38980 + }, + { + "epoch": 2.495678893796812, + "grad_norm": 8.167360305786133, + "learning_rate": 2.9416862088496444e-06, + "loss": 0.1876, + "num_input_tokens_seen": 122675840, + "step": 38985 + }, + { + "epoch": 2.495998975737789, + "grad_norm": 14.32509708404541, + "learning_rate": 2.941136333450266e-06, + "loss": 0.2183, + "num_input_tokens_seen": 122692096, + "step": 38990 + }, + { + "epoch": 2.496319057678766, + "grad_norm": 7.430271148681641, + "learning_rate": 2.940586436022837e-06, + "loss": 0.2081, + "num_input_tokens_seen": 122706560, + "step": 38995 + }, + { + "epoch": 2.4966391396197425, + "grad_norm": 8.824281692504883, + "learning_rate": 2.9400365165948163e-06, + "loss": 0.2781, + "num_input_tokens_seen": 122722048, + "step": 39000 + }, + { + "epoch": 2.4969592215607195, + "grad_norm": 13.258875846862793, + "learning_rate": 2.939486575193665e-06, + "loss": 0.2418, + "num_input_tokens_seen": 122737152, + "step": 39005 + }, + { + "epoch": 2.4972793035016965, + "grad_norm": 24.30643081665039, + "learning_rate": 2.9389366118468422e-06, + "loss": 0.42, + "num_input_tokens_seen": 122752192, + "step": 39010 + }, + { + "epoch": 2.4975993854426735, + "grad_norm": 15.57521915435791, + "learning_rate": 2.9383866265818118e-06, + "loss": 0.1836, + "num_input_tokens_seen": 122768320, + "step": 39015 + }, + { + "epoch": 2.4979194673836504, + "grad_norm": 141.46188354492188, + "learning_rate": 2.9378366194260364e-06, + "loss": 0.2326, + "num_input_tokens_seen": 122784640, + "step": 39020 + }, + { + "epoch": 2.498239549324627, + "grad_norm": 21.679424285888672, + "learning_rate": 2.937286590406981e-06, + "loss": 0.2853, + "num_input_tokens_seen": 122801408, + "step": 39025 + }, + { + "epoch": 2.498559631265604, + "grad_norm": 31.33565330505371, + "learning_rate": 2.936736539552111e-06, + "loss": 0.2938, + "num_input_tokens_seen": 122817600, + "step": 39030 + }, + { + "epoch": 2.498879713206581, + "grad_norm": 18.35047721862793, + "learning_rate": 2.936186466888894e-06, + "loss": 0.1987, + "num_input_tokens_seen": 122832896, + "step": 39035 + }, + { + "epoch": 2.499199795147558, + "grad_norm": 4.764860153198242, + "learning_rate": 2.9356363724447958e-06, + "loss": 0.1753, + "num_input_tokens_seen": 122846976, + "step": 39040 + }, + { + "epoch": 2.4995198770885345, + "grad_norm": 9.352620124816895, + "learning_rate": 2.935086256247286e-06, + "loss": 0.1784, + "num_input_tokens_seen": 122862912, + "step": 39045 + }, + { + "epoch": 2.4998399590295115, + "grad_norm": 3.8339014053344727, + "learning_rate": 2.934536118323835e-06, + "loss": 0.199, + "num_input_tokens_seen": 122878720, + "step": 39050 + }, + { + "epoch": 2.5001600409704885, + "grad_norm": 16.444629669189453, + "learning_rate": 2.9339859587019145e-06, + "loss": 0.2244, + "num_input_tokens_seen": 122895104, + "step": 39055 + }, + { + "epoch": 2.5004801229114655, + "grad_norm": 7.244654655456543, + "learning_rate": 2.9334357774089956e-06, + "loss": 0.1792, + "num_input_tokens_seen": 122910592, + "step": 39060 + }, + { + "epoch": 2.5004801229114655, + "eval_loss": 0.5070953965187073, + "eval_runtime": 51.0975, + "eval_samples_per_second": 271.755, + "eval_steps_per_second": 33.974, + "num_input_tokens_seen": 122910592, + "step": 39060 + }, + { + "epoch": 2.5008002048524425, + "grad_norm": 14.553043365478516, + "learning_rate": 2.9328855744725515e-06, + "loss": 0.2447, + "num_input_tokens_seen": 122925952, + "step": 39065 + }, + { + "epoch": 2.501120286793419, + "grad_norm": 11.576017379760742, + "learning_rate": 2.932335349920056e-06, + "loss": 0.2185, + "num_input_tokens_seen": 122940736, + "step": 39070 + }, + { + "epoch": 2.501440368734396, + "grad_norm": 12.042302131652832, + "learning_rate": 2.9317851037789856e-06, + "loss": 0.2521, + "num_input_tokens_seen": 122956288, + "step": 39075 + }, + { + "epoch": 2.501760450675373, + "grad_norm": 10.809813499450684, + "learning_rate": 2.9312348360768163e-06, + "loss": 0.2858, + "num_input_tokens_seen": 122973056, + "step": 39080 + }, + { + "epoch": 2.5020805326163496, + "grad_norm": 18.209569931030273, + "learning_rate": 2.9306845468410256e-06, + "loss": 0.2661, + "num_input_tokens_seen": 122988672, + "step": 39085 + }, + { + "epoch": 2.5024006145573265, + "grad_norm": 12.027667045593262, + "learning_rate": 2.930134236099092e-06, + "loss": 0.2316, + "num_input_tokens_seen": 123004480, + "step": 39090 + }, + { + "epoch": 2.5027206964983035, + "grad_norm": 5.103783130645752, + "learning_rate": 2.929583903878495e-06, + "loss": 0.2113, + "num_input_tokens_seen": 123020224, + "step": 39095 + }, + { + "epoch": 2.5030407784392805, + "grad_norm": 7.200120449066162, + "learning_rate": 2.929033550206715e-06, + "loss": 0.2862, + "num_input_tokens_seen": 123035200, + "step": 39100 + }, + { + "epoch": 2.5033608603802575, + "grad_norm": 28.262252807617188, + "learning_rate": 2.928483175111234e-06, + "loss": 0.2525, + "num_input_tokens_seen": 123051968, + "step": 39105 + }, + { + "epoch": 2.5036809423212345, + "grad_norm": 37.26548767089844, + "learning_rate": 2.927932778619536e-06, + "loss": 0.2098, + "num_input_tokens_seen": 123068096, + "step": 39110 + }, + { + "epoch": 2.504001024262211, + "grad_norm": 9.93254280090332, + "learning_rate": 2.927382360759104e-06, + "loss": 0.1985, + "num_input_tokens_seen": 123083584, + "step": 39115 + }, + { + "epoch": 2.504321106203188, + "grad_norm": 4.510644912719727, + "learning_rate": 2.9268319215574232e-06, + "loss": 0.222, + "num_input_tokens_seen": 123099968, + "step": 39120 + }, + { + "epoch": 2.504641188144165, + "grad_norm": 5.042325973510742, + "learning_rate": 2.926281461041979e-06, + "loss": 0.1681, + "num_input_tokens_seen": 123115776, + "step": 39125 + }, + { + "epoch": 2.5049612700851416, + "grad_norm": 7.393616199493408, + "learning_rate": 2.925730979240259e-06, + "loss": 0.2601, + "num_input_tokens_seen": 123131904, + "step": 39130 + }, + { + "epoch": 2.5052813520261186, + "grad_norm": 42.69497299194336, + "learning_rate": 2.925180476179752e-06, + "loss": 0.197, + "num_input_tokens_seen": 123147968, + "step": 39135 + }, + { + "epoch": 2.5056014339670956, + "grad_norm": 20.724655151367188, + "learning_rate": 2.924629951887946e-06, + "loss": 0.4018, + "num_input_tokens_seen": 123163840, + "step": 39140 + }, + { + "epoch": 2.5059215159080725, + "grad_norm": 10.690742492675781, + "learning_rate": 2.9240794063923327e-06, + "loss": 0.2582, + "num_input_tokens_seen": 123180352, + "step": 39145 + }, + { + "epoch": 2.5062415978490495, + "grad_norm": 5.596488952636719, + "learning_rate": 2.923528839720403e-06, + "loss": 0.1756, + "num_input_tokens_seen": 123196608, + "step": 39150 + }, + { + "epoch": 2.506561679790026, + "grad_norm": 8.040584564208984, + "learning_rate": 2.922978251899648e-06, + "loss": 0.2078, + "num_input_tokens_seen": 123212480, + "step": 39155 + }, + { + "epoch": 2.506881761731003, + "grad_norm": 8.755120277404785, + "learning_rate": 2.9224276429575626e-06, + "loss": 0.1229, + "num_input_tokens_seen": 123230016, + "step": 39160 + }, + { + "epoch": 2.50720184367198, + "grad_norm": 19.807876586914062, + "learning_rate": 2.9218770129216417e-06, + "loss": 0.1453, + "num_input_tokens_seen": 123245632, + "step": 39165 + }, + { + "epoch": 2.507521925612957, + "grad_norm": 15.484386444091797, + "learning_rate": 2.92132636181938e-06, + "loss": 0.3071, + "num_input_tokens_seen": 123262272, + "step": 39170 + }, + { + "epoch": 2.5078420075539336, + "grad_norm": 5.313027858734131, + "learning_rate": 2.9207756896782746e-06, + "loss": 0.2318, + "num_input_tokens_seen": 123277888, + "step": 39175 + }, + { + "epoch": 2.5081620894949106, + "grad_norm": 6.006651401519775, + "learning_rate": 2.920224996525823e-06, + "loss": 0.1895, + "num_input_tokens_seen": 123293824, + "step": 39180 + }, + { + "epoch": 2.5084821714358876, + "grad_norm": 50.92525100708008, + "learning_rate": 2.919674282389524e-06, + "loss": 0.2957, + "num_input_tokens_seen": 123309248, + "step": 39185 + }, + { + "epoch": 2.5088022533768646, + "grad_norm": 24.875207901000977, + "learning_rate": 2.9191235472968767e-06, + "loss": 0.2751, + "num_input_tokens_seen": 123325120, + "step": 39190 + }, + { + "epoch": 2.5091223353178416, + "grad_norm": 8.30079174041748, + "learning_rate": 2.9185727912753827e-06, + "loss": 0.2485, + "num_input_tokens_seen": 123339776, + "step": 39195 + }, + { + "epoch": 2.509442417258818, + "grad_norm": 14.76864242553711, + "learning_rate": 2.918022014352544e-06, + "loss": 0.2622, + "num_input_tokens_seen": 123354944, + "step": 39200 + }, + { + "epoch": 2.509762499199795, + "grad_norm": 9.041280746459961, + "learning_rate": 2.917471216555864e-06, + "loss": 0.2675, + "num_input_tokens_seen": 123371520, + "step": 39205 + }, + { + "epoch": 2.510082581140772, + "grad_norm": 23.082021713256836, + "learning_rate": 2.916920397912845e-06, + "loss": 0.2917, + "num_input_tokens_seen": 123387072, + "step": 39210 + }, + { + "epoch": 2.510402663081749, + "grad_norm": 10.659268379211426, + "learning_rate": 2.9163695584509934e-06, + "loss": 0.2082, + "num_input_tokens_seen": 123402624, + "step": 39215 + }, + { + "epoch": 2.5107227450227256, + "grad_norm": 24.884912490844727, + "learning_rate": 2.915818698197814e-06, + "loss": 0.3232, + "num_input_tokens_seen": 123418880, + "step": 39220 + }, + { + "epoch": 2.5110428269637026, + "grad_norm": 8.25831127166748, + "learning_rate": 2.9152678171808156e-06, + "loss": 0.1872, + "num_input_tokens_seen": 123435584, + "step": 39225 + }, + { + "epoch": 2.5113629089046796, + "grad_norm": 4.7805280685424805, + "learning_rate": 2.914716915427505e-06, + "loss": 0.1625, + "num_input_tokens_seen": 123450880, + "step": 39230 + }, + { + "epoch": 2.5116829908456566, + "grad_norm": 22.800153732299805, + "learning_rate": 2.9141659929653925e-06, + "loss": 0.3469, + "num_input_tokens_seen": 123465600, + "step": 39235 + }, + { + "epoch": 2.5120030727866336, + "grad_norm": 6.996478080749512, + "learning_rate": 2.9136150498219872e-06, + "loss": 0.2009, + "num_input_tokens_seen": 123481472, + "step": 39240 + }, + { + "epoch": 2.51232315472761, + "grad_norm": 44.93198776245117, + "learning_rate": 2.9130640860248003e-06, + "loss": 0.2325, + "num_input_tokens_seen": 123496512, + "step": 39245 + }, + { + "epoch": 2.512643236668587, + "grad_norm": 15.36402702331543, + "learning_rate": 2.912513101601345e-06, + "loss": 0.2488, + "num_input_tokens_seen": 123512640, + "step": 39250 + }, + { + "epoch": 2.512963318609564, + "grad_norm": 12.174909591674805, + "learning_rate": 2.9119620965791346e-06, + "loss": 0.1831, + "num_input_tokens_seen": 123528192, + "step": 39255 + }, + { + "epoch": 2.5132834005505407, + "grad_norm": 9.637043952941895, + "learning_rate": 2.911411070985683e-06, + "loss": 0.2414, + "num_input_tokens_seen": 123543680, + "step": 39260 + }, + { + "epoch": 2.5136034824915177, + "grad_norm": 10.493911743164062, + "learning_rate": 2.910860024848505e-06, + "loss": 0.1765, + "num_input_tokens_seen": 123560896, + "step": 39265 + }, + { + "epoch": 2.5139235644324947, + "grad_norm": 11.353713989257812, + "learning_rate": 2.9103089581951166e-06, + "loss": 0.1817, + "num_input_tokens_seen": 123575808, + "step": 39270 + }, + { + "epoch": 2.5142436463734716, + "grad_norm": 10.455677032470703, + "learning_rate": 2.909757871053037e-06, + "loss": 0.2243, + "num_input_tokens_seen": 123591552, + "step": 39275 + }, + { + "epoch": 2.5145637283144486, + "grad_norm": 5.658506870269775, + "learning_rate": 2.9092067634497835e-06, + "loss": 0.2163, + "num_input_tokens_seen": 123607488, + "step": 39280 + }, + { + "epoch": 2.5148838102554256, + "grad_norm": 11.82728099822998, + "learning_rate": 2.908655635412876e-06, + "loss": 0.1746, + "num_input_tokens_seen": 123622144, + "step": 39285 + }, + { + "epoch": 2.515203892196402, + "grad_norm": 8.972734451293945, + "learning_rate": 2.9081044869698342e-06, + "loss": 0.2954, + "num_input_tokens_seen": 123638016, + "step": 39290 + }, + { + "epoch": 2.515523974137379, + "grad_norm": 13.7730131149292, + "learning_rate": 2.90755331814818e-06, + "loss": 0.2129, + "num_input_tokens_seen": 123654976, + "step": 39295 + }, + { + "epoch": 2.515844056078356, + "grad_norm": 30.84093475341797, + "learning_rate": 2.9070021289754368e-06, + "loss": 0.3048, + "num_input_tokens_seen": 123670528, + "step": 39300 + }, + { + "epoch": 2.5161641380193327, + "grad_norm": 10.201210975646973, + "learning_rate": 2.9064509194791263e-06, + "loss": 0.3119, + "num_input_tokens_seen": 123686720, + "step": 39305 + }, + { + "epoch": 2.5164842199603097, + "grad_norm": 9.271411895751953, + "learning_rate": 2.9058996896867743e-06, + "loss": 0.3004, + "num_input_tokens_seen": 123702016, + "step": 39310 + }, + { + "epoch": 2.5168043019012867, + "grad_norm": 4.87648868560791, + "learning_rate": 2.905348439625906e-06, + "loss": 0.1985, + "num_input_tokens_seen": 123715968, + "step": 39315 + }, + { + "epoch": 2.5171243838422637, + "grad_norm": 6.801302433013916, + "learning_rate": 2.9047971693240485e-06, + "loss": 0.2372, + "num_input_tokens_seen": 123730880, + "step": 39320 + }, + { + "epoch": 2.5174444657832407, + "grad_norm": 17.49167251586914, + "learning_rate": 2.904245878808729e-06, + "loss": 0.2031, + "num_input_tokens_seen": 123745920, + "step": 39325 + }, + { + "epoch": 2.5177645477242176, + "grad_norm": 19.64748191833496, + "learning_rate": 2.9036945681074745e-06, + "loss": 0.2428, + "num_input_tokens_seen": 123761536, + "step": 39330 + }, + { + "epoch": 2.518084629665194, + "grad_norm": 49.27276611328125, + "learning_rate": 2.9031432372478167e-06, + "loss": 0.2125, + "num_input_tokens_seen": 123776832, + "step": 39335 + }, + { + "epoch": 2.518404711606171, + "grad_norm": 38.06633377075195, + "learning_rate": 2.9025918862572866e-06, + "loss": 0.3436, + "num_input_tokens_seen": 123794688, + "step": 39340 + }, + { + "epoch": 2.518724793547148, + "grad_norm": 8.028717994689941, + "learning_rate": 2.902040515163414e-06, + "loss": 0.2761, + "num_input_tokens_seen": 123809792, + "step": 39345 + }, + { + "epoch": 2.5190448754881247, + "grad_norm": 25.730382919311523, + "learning_rate": 2.901489123993732e-06, + "loss": 0.2474, + "num_input_tokens_seen": 123825664, + "step": 39350 + }, + { + "epoch": 2.5193649574291017, + "grad_norm": 24.894704818725586, + "learning_rate": 2.9009377127757747e-06, + "loss": 0.2624, + "num_input_tokens_seen": 123842304, + "step": 39355 + }, + { + "epoch": 2.5196850393700787, + "grad_norm": 11.68652629852295, + "learning_rate": 2.9003862815370764e-06, + "loss": 0.2393, + "num_input_tokens_seen": 123856960, + "step": 39360 + }, + { + "epoch": 2.5200051213110557, + "grad_norm": 26.080554962158203, + "learning_rate": 2.8998348303051725e-06, + "loss": 0.2521, + "num_input_tokens_seen": 123873344, + "step": 39365 + }, + { + "epoch": 2.5203252032520327, + "grad_norm": 16.243837356567383, + "learning_rate": 2.8992833591076e-06, + "loss": 0.1666, + "num_input_tokens_seen": 123889984, + "step": 39370 + }, + { + "epoch": 2.5206452851930097, + "grad_norm": 24.55183982849121, + "learning_rate": 2.8987318679718966e-06, + "loss": 0.2193, + "num_input_tokens_seen": 123906240, + "step": 39375 + }, + { + "epoch": 2.5209653671339862, + "grad_norm": 25.963817596435547, + "learning_rate": 2.8981803569256007e-06, + "loss": 0.3724, + "num_input_tokens_seen": 123921920, + "step": 39380 + }, + { + "epoch": 2.521285449074963, + "grad_norm": 7.075596809387207, + "learning_rate": 2.8976288259962508e-06, + "loss": 0.2095, + "num_input_tokens_seen": 123936832, + "step": 39385 + }, + { + "epoch": 2.52160553101594, + "grad_norm": 8.80421257019043, + "learning_rate": 2.89707727521139e-06, + "loss": 0.2137, + "num_input_tokens_seen": 123951616, + "step": 39390 + }, + { + "epoch": 2.5219256129569168, + "grad_norm": 11.193263053894043, + "learning_rate": 2.896525704598557e-06, + "loss": 0.1472, + "num_input_tokens_seen": 123968384, + "step": 39395 + }, + { + "epoch": 2.5222456948978937, + "grad_norm": 10.479330062866211, + "learning_rate": 2.8959741141852963e-06, + "loss": 0.1806, + "num_input_tokens_seen": 123984640, + "step": 39400 + }, + { + "epoch": 2.5225657768388707, + "grad_norm": 6.3143463134765625, + "learning_rate": 2.8954225039991514e-06, + "loss": 0.1203, + "num_input_tokens_seen": 124001280, + "step": 39405 + }, + { + "epoch": 2.5228858587798477, + "grad_norm": 15.095909118652344, + "learning_rate": 2.8948708740676657e-06, + "loss": 0.2126, + "num_input_tokens_seen": 124016640, + "step": 39410 + }, + { + "epoch": 2.5232059407208247, + "grad_norm": 11.234752655029297, + "learning_rate": 2.894319224418386e-06, + "loss": 0.2419, + "num_input_tokens_seen": 124032384, + "step": 39415 + }, + { + "epoch": 2.5235260226618013, + "grad_norm": 9.916899681091309, + "learning_rate": 2.893767555078857e-06, + "loss": 0.1775, + "num_input_tokens_seen": 124047232, + "step": 39420 + }, + { + "epoch": 2.5238461046027783, + "grad_norm": 10.19306468963623, + "learning_rate": 2.893215866076628e-06, + "loss": 0.2091, + "num_input_tokens_seen": 124063744, + "step": 39425 + }, + { + "epoch": 2.5241661865437552, + "grad_norm": 39.28792190551758, + "learning_rate": 2.8926641574392473e-06, + "loss": 0.1578, + "num_input_tokens_seen": 124078336, + "step": 39430 + }, + { + "epoch": 2.5244862684847322, + "grad_norm": 3.487787961959839, + "learning_rate": 2.8921124291942635e-06, + "loss": 0.126, + "num_input_tokens_seen": 124093056, + "step": 39435 + }, + { + "epoch": 2.524806350425709, + "grad_norm": 12.912683486938477, + "learning_rate": 2.8915606813692276e-06, + "loss": 0.2111, + "num_input_tokens_seen": 124107392, + "step": 39440 + }, + { + "epoch": 2.5251264323666858, + "grad_norm": 15.808707237243652, + "learning_rate": 2.89100891399169e-06, + "loss": 0.1535, + "num_input_tokens_seen": 124122496, + "step": 39445 + }, + { + "epoch": 2.5254465143076628, + "grad_norm": 3.247551679611206, + "learning_rate": 2.8904571270892046e-06, + "loss": 0.1801, + "num_input_tokens_seen": 124138176, + "step": 39450 + }, + { + "epoch": 2.5257665962486398, + "grad_norm": 5.069752216339111, + "learning_rate": 2.889905320689324e-06, + "loss": 0.2439, + "num_input_tokens_seen": 124152640, + "step": 39455 + }, + { + "epoch": 2.5260866781896167, + "grad_norm": 14.135446548461914, + "learning_rate": 2.889353494819603e-06, + "loss": 0.2297, + "num_input_tokens_seen": 124170432, + "step": 39460 + }, + { + "epoch": 2.5264067601305933, + "grad_norm": 10.261747360229492, + "learning_rate": 2.888801649507596e-06, + "loss": 0.2656, + "num_input_tokens_seen": 124186048, + "step": 39465 + }, + { + "epoch": 2.5267268420715703, + "grad_norm": 42.799015045166016, + "learning_rate": 2.8882497847808605e-06, + "loss": 0.3109, + "num_input_tokens_seen": 124202048, + "step": 39470 + }, + { + "epoch": 2.5270469240125473, + "grad_norm": 10.0659818649292, + "learning_rate": 2.887697900666952e-06, + "loss": 0.2268, + "num_input_tokens_seen": 124218688, + "step": 39475 + }, + { + "epoch": 2.5273670059535243, + "grad_norm": 29.457874298095703, + "learning_rate": 2.8871459971934307e-06, + "loss": 0.2231, + "num_input_tokens_seen": 124234048, + "step": 39480 + }, + { + "epoch": 2.527687087894501, + "grad_norm": 9.474661827087402, + "learning_rate": 2.8865940743878546e-06, + "loss": 0.1872, + "num_input_tokens_seen": 124249280, + "step": 39485 + }, + { + "epoch": 2.528007169835478, + "grad_norm": 8.119494438171387, + "learning_rate": 2.8860421322777844e-06, + "loss": 0.1659, + "num_input_tokens_seen": 124264832, + "step": 39490 + }, + { + "epoch": 2.528327251776455, + "grad_norm": 10.685164451599121, + "learning_rate": 2.885490170890781e-06, + "loss": 0.2652, + "num_input_tokens_seen": 124280448, + "step": 39495 + }, + { + "epoch": 2.5286473337174318, + "grad_norm": 11.57536792755127, + "learning_rate": 2.884938190254405e-06, + "loss": 0.2381, + "num_input_tokens_seen": 124296896, + "step": 39500 + }, + { + "epoch": 2.5289674156584088, + "grad_norm": 8.064180374145508, + "learning_rate": 2.884386190396222e-06, + "loss": 0.1968, + "num_input_tokens_seen": 124311296, + "step": 39505 + }, + { + "epoch": 2.5292874975993853, + "grad_norm": 3.5740957260131836, + "learning_rate": 2.8838341713437944e-06, + "loss": 0.2005, + "num_input_tokens_seen": 124326272, + "step": 39510 + }, + { + "epoch": 2.5296075795403623, + "grad_norm": 3.8082973957061768, + "learning_rate": 2.8832821331246878e-06, + "loss": 0.1836, + "num_input_tokens_seen": 124341248, + "step": 39515 + }, + { + "epoch": 2.5299276614813393, + "grad_norm": 9.47239875793457, + "learning_rate": 2.8827300757664686e-06, + "loss": 0.1753, + "num_input_tokens_seen": 124359040, + "step": 39520 + }, + { + "epoch": 2.530247743422316, + "grad_norm": 18.885211944580078, + "learning_rate": 2.8821779992967026e-06, + "loss": 0.3078, + "num_input_tokens_seen": 124374720, + "step": 39525 + }, + { + "epoch": 2.530567825363293, + "grad_norm": 7.401687145233154, + "learning_rate": 2.8816259037429573e-06, + "loss": 0.208, + "num_input_tokens_seen": 124391040, + "step": 39530 + }, + { + "epoch": 2.53088790730427, + "grad_norm": 14.420079231262207, + "learning_rate": 2.881073789132803e-06, + "loss": 0.2192, + "num_input_tokens_seen": 124407680, + "step": 39535 + }, + { + "epoch": 2.531207989245247, + "grad_norm": 6.335430145263672, + "learning_rate": 2.880521655493808e-06, + "loss": 0.2628, + "num_input_tokens_seen": 124423744, + "step": 39540 + }, + { + "epoch": 2.531528071186224, + "grad_norm": 50.00111389160156, + "learning_rate": 2.879969502853544e-06, + "loss": 0.2605, + "num_input_tokens_seen": 124438720, + "step": 39545 + }, + { + "epoch": 2.531848153127201, + "grad_norm": 7.590613842010498, + "learning_rate": 2.879417331239582e-06, + "loss": 0.177, + "num_input_tokens_seen": 124455104, + "step": 39550 + }, + { + "epoch": 2.5321682350681773, + "grad_norm": 12.190577507019043, + "learning_rate": 2.8788651406794956e-06, + "loss": 0.2341, + "num_input_tokens_seen": 124471488, + "step": 39555 + }, + { + "epoch": 2.5324883170091543, + "grad_norm": 20.98114013671875, + "learning_rate": 2.878312931200856e-06, + "loss": 0.1969, + "num_input_tokens_seen": 124486976, + "step": 39560 + }, + { + "epoch": 2.5328083989501313, + "grad_norm": 12.154642105102539, + "learning_rate": 2.8777607028312406e-06, + "loss": 0.2184, + "num_input_tokens_seen": 124503616, + "step": 39565 + }, + { + "epoch": 2.533128480891108, + "grad_norm": 8.63257884979248, + "learning_rate": 2.877208455598222e-06, + "loss": 0.1877, + "num_input_tokens_seen": 124520000, + "step": 39570 + }, + { + "epoch": 2.533448562832085, + "grad_norm": 27.853425979614258, + "learning_rate": 2.876656189529379e-06, + "loss": 0.5476, + "num_input_tokens_seen": 124535296, + "step": 39575 + }, + { + "epoch": 2.533768644773062, + "grad_norm": 7.977745532989502, + "learning_rate": 2.8761039046522874e-06, + "loss": 0.2581, + "num_input_tokens_seen": 124552064, + "step": 39580 + }, + { + "epoch": 2.534088726714039, + "grad_norm": 12.296770095825195, + "learning_rate": 2.8755516009945266e-06, + "loss": 0.1697, + "num_input_tokens_seen": 124567552, + "step": 39585 + }, + { + "epoch": 2.534408808655016, + "grad_norm": 9.808465003967285, + "learning_rate": 2.874999278583674e-06, + "loss": 0.235, + "num_input_tokens_seen": 124583360, + "step": 39590 + }, + { + "epoch": 2.534728890595993, + "grad_norm": 6.023181438446045, + "learning_rate": 2.874446937447311e-06, + "loss": 0.2169, + "num_input_tokens_seen": 124598080, + "step": 39595 + }, + { + "epoch": 2.5350489725369694, + "grad_norm": 44.20576095581055, + "learning_rate": 2.8738945776130188e-06, + "loss": 0.2462, + "num_input_tokens_seen": 124612992, + "step": 39600 + }, + { + "epoch": 2.5353690544779464, + "grad_norm": 25.67531967163086, + "learning_rate": 2.8733421991083787e-06, + "loss": 0.2994, + "num_input_tokens_seen": 124628544, + "step": 39605 + }, + { + "epoch": 2.5356891364189234, + "grad_norm": 6.0716071128845215, + "learning_rate": 2.872789801960974e-06, + "loss": 0.186, + "num_input_tokens_seen": 124645120, + "step": 39610 + }, + { + "epoch": 2.5360092183599, + "grad_norm": 2.201699733734131, + "learning_rate": 2.8722373861983875e-06, + "loss": 0.2054, + "num_input_tokens_seen": 124661056, + "step": 39615 + }, + { + "epoch": 2.536329300300877, + "grad_norm": 5.532742977142334, + "learning_rate": 2.8716849518482058e-06, + "loss": 0.1903, + "num_input_tokens_seen": 124677120, + "step": 39620 + }, + { + "epoch": 2.536649382241854, + "grad_norm": 23.669231414794922, + "learning_rate": 2.8711324989380135e-06, + "loss": 0.1526, + "num_input_tokens_seen": 124692224, + "step": 39625 + }, + { + "epoch": 2.536969464182831, + "grad_norm": 9.34775447845459, + "learning_rate": 2.870580027495397e-06, + "loss": 0.1917, + "num_input_tokens_seen": 124707904, + "step": 39630 + }, + { + "epoch": 2.537289546123808, + "grad_norm": 10.969000816345215, + "learning_rate": 2.8700275375479447e-06, + "loss": 0.2173, + "num_input_tokens_seen": 124722944, + "step": 39635 + }, + { + "epoch": 2.537609628064785, + "grad_norm": 11.328625679016113, + "learning_rate": 2.8694750291232443e-06, + "loss": 0.1564, + "num_input_tokens_seen": 124738368, + "step": 39640 + }, + { + "epoch": 2.5379297100057614, + "grad_norm": 49.839237213134766, + "learning_rate": 2.8689225022488852e-06, + "loss": 0.2893, + "num_input_tokens_seen": 124753792, + "step": 39645 + }, + { + "epoch": 2.5382497919467384, + "grad_norm": 10.015029907226562, + "learning_rate": 2.8683699569524587e-06, + "loss": 0.2568, + "num_input_tokens_seen": 124770304, + "step": 39650 + }, + { + "epoch": 2.5385698738877154, + "grad_norm": 8.927042961120605, + "learning_rate": 2.867817393261555e-06, + "loss": 0.271, + "num_input_tokens_seen": 124788480, + "step": 39655 + }, + { + "epoch": 2.538889955828692, + "grad_norm": 32.46571731567383, + "learning_rate": 2.8672648112037665e-06, + "loss": 0.1103, + "num_input_tokens_seen": 124803904, + "step": 39660 + }, + { + "epoch": 2.539210037769669, + "grad_norm": 11.662788391113281, + "learning_rate": 2.8667122108066863e-06, + "loss": 0.2727, + "num_input_tokens_seen": 124819392, + "step": 39665 + }, + { + "epoch": 2.539530119710646, + "grad_norm": 17.636051177978516, + "learning_rate": 2.8661595920979087e-06, + "loss": 0.2652, + "num_input_tokens_seen": 124836544, + "step": 39670 + }, + { + "epoch": 2.539850201651623, + "grad_norm": 8.222387313842773, + "learning_rate": 2.8656069551050275e-06, + "loss": 0.3355, + "num_input_tokens_seen": 124851648, + "step": 39675 + }, + { + "epoch": 2.5401702835926, + "grad_norm": 38.183624267578125, + "learning_rate": 2.8650542998556403e-06, + "loss": 0.2043, + "num_input_tokens_seen": 124867840, + "step": 39680 + }, + { + "epoch": 2.5404903655335764, + "grad_norm": 11.327149391174316, + "learning_rate": 2.8645016263773418e-06, + "loss": 0.2482, + "num_input_tokens_seen": 124884928, + "step": 39685 + }, + { + "epoch": 2.5408104474745534, + "grad_norm": 5.265298366546631, + "learning_rate": 2.8639489346977316e-06, + "loss": 0.3038, + "num_input_tokens_seen": 124900992, + "step": 39690 + }, + { + "epoch": 2.5411305294155304, + "grad_norm": 28.770448684692383, + "learning_rate": 2.8633962248444074e-06, + "loss": 0.2139, + "num_input_tokens_seen": 124917248, + "step": 39695 + }, + { + "epoch": 2.5414506113565074, + "grad_norm": 18.90245246887207, + "learning_rate": 2.862843496844968e-06, + "loss": 0.2057, + "num_input_tokens_seen": 124931648, + "step": 39700 + }, + { + "epoch": 2.541770693297484, + "grad_norm": 14.072361946105957, + "learning_rate": 2.862290750727015e-06, + "loss": 0.2156, + "num_input_tokens_seen": 124946048, + "step": 39705 + }, + { + "epoch": 2.542090775238461, + "grad_norm": 30.76444435119629, + "learning_rate": 2.8617379865181483e-06, + "loss": 0.3019, + "num_input_tokens_seen": 124961600, + "step": 39710 + }, + { + "epoch": 2.542410857179438, + "grad_norm": 7.925790309906006, + "learning_rate": 2.8611852042459715e-06, + "loss": 0.2215, + "num_input_tokens_seen": 124979648, + "step": 39715 + }, + { + "epoch": 2.542730939120415, + "grad_norm": 9.666680335998535, + "learning_rate": 2.8606324039380865e-06, + "loss": 0.3225, + "num_input_tokens_seen": 124995008, + "step": 39720 + }, + { + "epoch": 2.543051021061392, + "grad_norm": 15.364862442016602, + "learning_rate": 2.860079585622098e-06, + "loss": 0.3222, + "num_input_tokens_seen": 125010560, + "step": 39725 + }, + { + "epoch": 2.5433711030023685, + "grad_norm": 8.001169204711914, + "learning_rate": 2.8595267493256103e-06, + "loss": 0.3629, + "num_input_tokens_seen": 125026112, + "step": 39730 + }, + { + "epoch": 2.5436911849433455, + "grad_norm": 10.78283405303955, + "learning_rate": 2.858973895076229e-06, + "loss": 0.1813, + "num_input_tokens_seen": 125042304, + "step": 39735 + }, + { + "epoch": 2.5440112668843224, + "grad_norm": 17.991493225097656, + "learning_rate": 2.8584210229015622e-06, + "loss": 0.2903, + "num_input_tokens_seen": 125058368, + "step": 39740 + }, + { + "epoch": 2.5443313488252994, + "grad_norm": 27.992855072021484, + "learning_rate": 2.857868132829216e-06, + "loss": 0.3299, + "num_input_tokens_seen": 125074496, + "step": 39745 + }, + { + "epoch": 2.544651430766276, + "grad_norm": 12.17162799835205, + "learning_rate": 2.857315224886799e-06, + "loss": 0.1938, + "num_input_tokens_seen": 125089792, + "step": 39750 + }, + { + "epoch": 2.544971512707253, + "grad_norm": 25.912954330444336, + "learning_rate": 2.856762299101921e-06, + "loss": 0.2714, + "num_input_tokens_seen": 125104704, + "step": 39755 + }, + { + "epoch": 2.54529159464823, + "grad_norm": 12.069341659545898, + "learning_rate": 2.856209355502192e-06, + "loss": 0.2626, + "num_input_tokens_seen": 125120896, + "step": 39760 + }, + { + "epoch": 2.545611676589207, + "grad_norm": 4.707726955413818, + "learning_rate": 2.8556563941152236e-06, + "loss": 0.2318, + "num_input_tokens_seen": 125136192, + "step": 39765 + }, + { + "epoch": 2.545931758530184, + "grad_norm": 16.42812156677246, + "learning_rate": 2.8551034149686274e-06, + "loss": 0.2387, + "num_input_tokens_seen": 125150592, + "step": 39770 + }, + { + "epoch": 2.5462518404711605, + "grad_norm": 7.734299659729004, + "learning_rate": 2.8545504180900164e-06, + "loss": 0.191, + "num_input_tokens_seen": 125167360, + "step": 39775 + }, + { + "epoch": 2.5465719224121375, + "grad_norm": 6.9417290687561035, + "learning_rate": 2.8539974035070044e-06, + "loss": 0.2008, + "num_input_tokens_seen": 125184128, + "step": 39780 + }, + { + "epoch": 2.5468920043531145, + "grad_norm": 7.453134536743164, + "learning_rate": 2.853444371247206e-06, + "loss": 0.2601, + "num_input_tokens_seen": 125202560, + "step": 39785 + }, + { + "epoch": 2.547212086294091, + "grad_norm": 8.641552925109863, + "learning_rate": 2.852891321338236e-06, + "loss": 0.243, + "num_input_tokens_seen": 125218176, + "step": 39790 + }, + { + "epoch": 2.547532168235068, + "grad_norm": 12.593292236328125, + "learning_rate": 2.852338253807712e-06, + "loss": 0.2192, + "num_input_tokens_seen": 125234496, + "step": 39795 + }, + { + "epoch": 2.547852250176045, + "grad_norm": 28.148975372314453, + "learning_rate": 2.8517851686832506e-06, + "loss": 0.2298, + "num_input_tokens_seen": 125249664, + "step": 39800 + }, + { + "epoch": 2.548172332117022, + "grad_norm": 16.327999114990234, + "learning_rate": 2.8512320659924703e-06, + "loss": 0.327, + "num_input_tokens_seen": 125267008, + "step": 39805 + }, + { + "epoch": 2.548492414057999, + "grad_norm": 36.17322540283203, + "learning_rate": 2.8506789457629906e-06, + "loss": 0.235, + "num_input_tokens_seen": 125282496, + "step": 39810 + }, + { + "epoch": 2.548812495998976, + "grad_norm": 6.240364074707031, + "learning_rate": 2.8501258080224308e-06, + "loss": 0.1675, + "num_input_tokens_seen": 125297408, + "step": 39815 + }, + { + "epoch": 2.5491325779399525, + "grad_norm": 53.986175537109375, + "learning_rate": 2.8495726527984112e-06, + "loss": 0.1942, + "num_input_tokens_seen": 125313088, + "step": 39820 + }, + { + "epoch": 2.5494526598809295, + "grad_norm": 22.577632904052734, + "learning_rate": 2.849019480118555e-06, + "loss": 0.2023, + "num_input_tokens_seen": 125328448, + "step": 39825 + }, + { + "epoch": 2.5497727418219065, + "grad_norm": 1.357703685760498, + "learning_rate": 2.8484662900104835e-06, + "loss": 0.3413, + "num_input_tokens_seen": 125343744, + "step": 39830 + }, + { + "epoch": 2.550092823762883, + "grad_norm": 11.108006477355957, + "learning_rate": 2.8479130825018214e-06, + "loss": 0.28, + "num_input_tokens_seen": 125359936, + "step": 39835 + }, + { + "epoch": 2.55041290570386, + "grad_norm": 8.136918067932129, + "learning_rate": 2.8473598576201916e-06, + "loss": 0.2827, + "num_input_tokens_seen": 125376128, + "step": 39840 + }, + { + "epoch": 2.550732987644837, + "grad_norm": 29.272157669067383, + "learning_rate": 2.8468066153932193e-06, + "loss": 0.287, + "num_input_tokens_seen": 125392000, + "step": 39845 + }, + { + "epoch": 2.551053069585814, + "grad_norm": 7.266817569732666, + "learning_rate": 2.846253355848532e-06, + "loss": 0.1688, + "num_input_tokens_seen": 125408512, + "step": 39850 + }, + { + "epoch": 2.551373151526791, + "grad_norm": 19.27082061767578, + "learning_rate": 2.845700079013755e-06, + "loss": 0.2146, + "num_input_tokens_seen": 125425408, + "step": 39855 + }, + { + "epoch": 2.551693233467768, + "grad_norm": 14.628301620483398, + "learning_rate": 2.8451467849165167e-06, + "loss": 0.1933, + "num_input_tokens_seen": 125440704, + "step": 39860 + }, + { + "epoch": 2.5520133154087445, + "grad_norm": 8.51124382019043, + "learning_rate": 2.844593473584446e-06, + "loss": 0.1679, + "num_input_tokens_seen": 125456448, + "step": 39865 + }, + { + "epoch": 2.5523333973497215, + "grad_norm": 3.227097272872925, + "learning_rate": 2.8440401450451727e-06, + "loss": 0.2809, + "num_input_tokens_seen": 125471552, + "step": 39870 + }, + { + "epoch": 2.5526534792906985, + "grad_norm": 7.665600299835205, + "learning_rate": 2.8434867993263254e-06, + "loss": 0.2265, + "num_input_tokens_seen": 125486400, + "step": 39875 + }, + { + "epoch": 2.552973561231675, + "grad_norm": 11.115821838378906, + "learning_rate": 2.8429334364555374e-06, + "loss": 0.2882, + "num_input_tokens_seen": 125501760, + "step": 39880 + }, + { + "epoch": 2.553293643172652, + "grad_norm": 7.333259105682373, + "learning_rate": 2.8423800564604397e-06, + "loss": 0.1316, + "num_input_tokens_seen": 125518336, + "step": 39885 + }, + { + "epoch": 2.553613725113629, + "grad_norm": 10.523496627807617, + "learning_rate": 2.8418266593686655e-06, + "loss": 0.2625, + "num_input_tokens_seen": 125533312, + "step": 39890 + }, + { + "epoch": 2.553933807054606, + "grad_norm": 9.82259750366211, + "learning_rate": 2.8412732452078484e-06, + "loss": 0.2524, + "num_input_tokens_seen": 125549632, + "step": 39895 + }, + { + "epoch": 2.554253888995583, + "grad_norm": 8.04072093963623, + "learning_rate": 2.840719814005623e-06, + "loss": 0.171, + "num_input_tokens_seen": 125564224, + "step": 39900 + }, + { + "epoch": 2.55457397093656, + "grad_norm": 4.497681617736816, + "learning_rate": 2.8401663657896247e-06, + "loss": 0.2254, + "num_input_tokens_seen": 125580928, + "step": 39905 + }, + { + "epoch": 2.5548940528775366, + "grad_norm": 19.85114097595215, + "learning_rate": 2.83961290058749e-06, + "loss": 0.3978, + "num_input_tokens_seen": 125600128, + "step": 39910 + }, + { + "epoch": 2.5552141348185136, + "grad_norm": 25.54197120666504, + "learning_rate": 2.839059418426856e-06, + "loss": 0.3144, + "num_input_tokens_seen": 125615744, + "step": 39915 + }, + { + "epoch": 2.5555342167594906, + "grad_norm": 6.206348896026611, + "learning_rate": 2.8385059193353602e-06, + "loss": 0.2625, + "num_input_tokens_seen": 125631040, + "step": 39920 + }, + { + "epoch": 2.555854298700467, + "grad_norm": 5.896967887878418, + "learning_rate": 2.837952403340643e-06, + "loss": 0.2009, + "num_input_tokens_seen": 125648448, + "step": 39925 + }, + { + "epoch": 2.556174380641444, + "grad_norm": 9.887957572937012, + "learning_rate": 2.8373988704703426e-06, + "loss": 0.1901, + "num_input_tokens_seen": 125664128, + "step": 39930 + }, + { + "epoch": 2.556494462582421, + "grad_norm": 9.855116844177246, + "learning_rate": 2.8368453207520998e-06, + "loss": 0.2775, + "num_input_tokens_seen": 125679360, + "step": 39935 + }, + { + "epoch": 2.556814544523398, + "grad_norm": 26.925186157226562, + "learning_rate": 2.836291754213557e-06, + "loss": 0.2407, + "num_input_tokens_seen": 125694144, + "step": 39940 + }, + { + "epoch": 2.557134626464375, + "grad_norm": 11.236572265625, + "learning_rate": 2.835738170882355e-06, + "loss": 0.1919, + "num_input_tokens_seen": 125710080, + "step": 39945 + }, + { + "epoch": 2.5574547084053516, + "grad_norm": 11.028860092163086, + "learning_rate": 2.835184570786138e-06, + "loss": 0.1835, + "num_input_tokens_seen": 125724544, + "step": 39950 + }, + { + "epoch": 2.5577747903463286, + "grad_norm": 8.118217468261719, + "learning_rate": 2.8346309539525494e-06, + "loss": 0.1916, + "num_input_tokens_seen": 125740224, + "step": 39955 + }, + { + "epoch": 2.5580948722873056, + "grad_norm": 8.552081108093262, + "learning_rate": 2.834077320409234e-06, + "loss": 0.2222, + "num_input_tokens_seen": 125756672, + "step": 39960 + }, + { + "epoch": 2.5584149542282826, + "grad_norm": 9.836065292358398, + "learning_rate": 2.833523670183837e-06, + "loss": 0.1859, + "num_input_tokens_seen": 125772480, + "step": 39965 + }, + { + "epoch": 2.558735036169259, + "grad_norm": 9.651877403259277, + "learning_rate": 2.832970003304005e-06, + "loss": 0.239, + "num_input_tokens_seen": 125788096, + "step": 39970 + }, + { + "epoch": 2.559055118110236, + "grad_norm": 23.33323097229004, + "learning_rate": 2.832416319797386e-06, + "loss": 0.2278, + "num_input_tokens_seen": 125802944, + "step": 39975 + }, + { + "epoch": 2.559375200051213, + "grad_norm": 43.254940032958984, + "learning_rate": 2.831862619691627e-06, + "loss": 0.1894, + "num_input_tokens_seen": 125818432, + "step": 39980 + }, + { + "epoch": 2.55969528199219, + "grad_norm": 10.702681541442871, + "learning_rate": 2.831308903014378e-06, + "loss": 0.2023, + "num_input_tokens_seen": 125835264, + "step": 39985 + }, + { + "epoch": 2.560015363933167, + "grad_norm": 4.5340094566345215, + "learning_rate": 2.8307551697932874e-06, + "loss": 0.3285, + "num_input_tokens_seen": 125851712, + "step": 39990 + }, + { + "epoch": 2.5603354458741436, + "grad_norm": 11.828371047973633, + "learning_rate": 2.8302014200560068e-06, + "loss": 0.2648, + "num_input_tokens_seen": 125871104, + "step": 39995 + }, + { + "epoch": 2.5606555278151206, + "grad_norm": 8.25086498260498, + "learning_rate": 2.8296476538301875e-06, + "loss": 0.2592, + "num_input_tokens_seen": 125886976, + "step": 40000 + }, + { + "epoch": 2.5609756097560976, + "grad_norm": 12.985546112060547, + "learning_rate": 2.829093871143481e-06, + "loss": 0.1919, + "num_input_tokens_seen": 125901248, + "step": 40005 + }, + { + "epoch": 2.5612956916970746, + "grad_norm": 11.598610877990723, + "learning_rate": 2.8285400720235414e-06, + "loss": 0.2342, + "num_input_tokens_seen": 125915584, + "step": 40010 + }, + { + "epoch": 2.561615773638051, + "grad_norm": 13.676925659179688, + "learning_rate": 2.8279862564980215e-06, + "loss": 0.2611, + "num_input_tokens_seen": 125931328, + "step": 40015 + }, + { + "epoch": 2.561935855579028, + "grad_norm": 10.751296043395996, + "learning_rate": 2.827432424594576e-06, + "loss": 0.1823, + "num_input_tokens_seen": 125947392, + "step": 40020 + }, + { + "epoch": 2.562255937520005, + "grad_norm": 29.034975051879883, + "learning_rate": 2.826878576340861e-06, + "loss": 0.1831, + "num_input_tokens_seen": 125963264, + "step": 40025 + }, + { + "epoch": 2.562576019460982, + "grad_norm": 20.760129928588867, + "learning_rate": 2.826324711764533e-06, + "loss": 0.2589, + "num_input_tokens_seen": 125978944, + "step": 40030 + }, + { + "epoch": 2.562896101401959, + "grad_norm": 15.613178253173828, + "learning_rate": 2.825770830893248e-06, + "loss": 0.2792, + "num_input_tokens_seen": 125994880, + "step": 40035 + }, + { + "epoch": 2.5632161833429357, + "grad_norm": 7.782827854156494, + "learning_rate": 2.825216933754665e-06, + "loss": 0.1957, + "num_input_tokens_seen": 126010368, + "step": 40040 + }, + { + "epoch": 2.5635362652839127, + "grad_norm": 25.573923110961914, + "learning_rate": 2.8246630203764426e-06, + "loss": 0.1953, + "num_input_tokens_seen": 126029504, + "step": 40045 + }, + { + "epoch": 2.5638563472248896, + "grad_norm": 14.244132995605469, + "learning_rate": 2.8241090907862388e-06, + "loss": 0.1655, + "num_input_tokens_seen": 126045696, + "step": 40050 + }, + { + "epoch": 2.564176429165866, + "grad_norm": 15.315010070800781, + "learning_rate": 2.823555145011716e-06, + "loss": 0.24, + "num_input_tokens_seen": 126061888, + "step": 40055 + }, + { + "epoch": 2.564496511106843, + "grad_norm": 13.243570327758789, + "learning_rate": 2.8230011830805347e-06, + "loss": 0.194, + "num_input_tokens_seen": 126077184, + "step": 40060 + }, + { + "epoch": 2.56481659304782, + "grad_norm": 70.0346450805664, + "learning_rate": 2.822447205020357e-06, + "loss": 0.2946, + "num_input_tokens_seen": 126092352, + "step": 40065 + }, + { + "epoch": 2.565136674988797, + "grad_norm": 29.012046813964844, + "learning_rate": 2.821893210858846e-06, + "loss": 0.2528, + "num_input_tokens_seen": 126107456, + "step": 40070 + }, + { + "epoch": 2.565456756929774, + "grad_norm": 28.824926376342773, + "learning_rate": 2.8213392006236635e-06, + "loss": 0.2257, + "num_input_tokens_seen": 126122688, + "step": 40075 + }, + { + "epoch": 2.565776838870751, + "grad_norm": 18.66287612915039, + "learning_rate": 2.8207851743424753e-06, + "loss": 0.2635, + "num_input_tokens_seen": 126137472, + "step": 40080 + }, + { + "epoch": 2.5660969208117277, + "grad_norm": 12.519660949707031, + "learning_rate": 2.820231132042947e-06, + "loss": 0.2849, + "num_input_tokens_seen": 126153280, + "step": 40085 + }, + { + "epoch": 2.5664170027527047, + "grad_norm": 8.840042114257812, + "learning_rate": 2.8196770737527434e-06, + "loss": 0.1777, + "num_input_tokens_seen": 126169152, + "step": 40090 + }, + { + "epoch": 2.5667370846936817, + "grad_norm": 15.796510696411133, + "learning_rate": 2.8191229994995317e-06, + "loss": 0.2273, + "num_input_tokens_seen": 126185600, + "step": 40095 + }, + { + "epoch": 2.567057166634658, + "grad_norm": 57.166099548339844, + "learning_rate": 2.8185689093109803e-06, + "loss": 0.2582, + "num_input_tokens_seen": 126202368, + "step": 40100 + }, + { + "epoch": 2.567377248575635, + "grad_norm": 24.315460205078125, + "learning_rate": 2.8180148032147563e-06, + "loss": 0.2239, + "num_input_tokens_seen": 126217856, + "step": 40105 + }, + { + "epoch": 2.567697330516612, + "grad_norm": 8.641668319702148, + "learning_rate": 2.81746068123853e-06, + "loss": 0.2796, + "num_input_tokens_seen": 126232768, + "step": 40110 + }, + { + "epoch": 2.568017412457589, + "grad_norm": 69.26226806640625, + "learning_rate": 2.8169065434099714e-06, + "loss": 0.2338, + "num_input_tokens_seen": 126248320, + "step": 40115 + }, + { + "epoch": 2.568337494398566, + "grad_norm": 14.292060852050781, + "learning_rate": 2.81635238975675e-06, + "loss": 0.3427, + "num_input_tokens_seen": 126263872, + "step": 40120 + }, + { + "epoch": 2.568657576339543, + "grad_norm": 6.072351455688477, + "learning_rate": 2.815798220306538e-06, + "loss": 0.1515, + "num_input_tokens_seen": 126278720, + "step": 40125 + }, + { + "epoch": 2.5689776582805197, + "grad_norm": 7.930324077606201, + "learning_rate": 2.815244035087008e-06, + "loss": 0.2018, + "num_input_tokens_seen": 126293248, + "step": 40130 + }, + { + "epoch": 2.5692977402214967, + "grad_norm": 2.8365001678466797, + "learning_rate": 2.8146898341258328e-06, + "loss": 0.2389, + "num_input_tokens_seen": 126308992, + "step": 40135 + }, + { + "epoch": 2.5696178221624737, + "grad_norm": 12.528871536254883, + "learning_rate": 2.8141356174506865e-06, + "loss": 0.2551, + "num_input_tokens_seen": 126324480, + "step": 40140 + }, + { + "epoch": 2.5699379041034502, + "grad_norm": 10.238134384155273, + "learning_rate": 2.813581385089244e-06, + "loss": 0.2702, + "num_input_tokens_seen": 126339392, + "step": 40145 + }, + { + "epoch": 2.5702579860444272, + "grad_norm": 7.533078670501709, + "learning_rate": 2.8130271370691802e-06, + "loss": 0.1983, + "num_input_tokens_seen": 126354304, + "step": 40150 + }, + { + "epoch": 2.5705780679854042, + "grad_norm": 59.06941223144531, + "learning_rate": 2.8124728734181716e-06, + "loss": 0.2871, + "num_input_tokens_seen": 126370240, + "step": 40155 + }, + { + "epoch": 2.570898149926381, + "grad_norm": 14.037846565246582, + "learning_rate": 2.811918594163896e-06, + "loss": 0.3206, + "num_input_tokens_seen": 126386560, + "step": 40160 + }, + { + "epoch": 2.571218231867358, + "grad_norm": 22.8270263671875, + "learning_rate": 2.8113642993340296e-06, + "loss": 0.1632, + "num_input_tokens_seen": 126403136, + "step": 40165 + }, + { + "epoch": 2.571538313808335, + "grad_norm": 16.859539031982422, + "learning_rate": 2.8108099889562524e-06, + "loss": 0.2099, + "num_input_tokens_seen": 126417856, + "step": 40170 + }, + { + "epoch": 2.5718583957493117, + "grad_norm": 45.26021957397461, + "learning_rate": 2.8102556630582435e-06, + "loss": 0.2497, + "num_input_tokens_seen": 126433408, + "step": 40175 + }, + { + "epoch": 2.5721784776902887, + "grad_norm": 25.683917999267578, + "learning_rate": 2.8097013216676826e-06, + "loss": 0.2417, + "num_input_tokens_seen": 126448064, + "step": 40180 + }, + { + "epoch": 2.5724985596312657, + "grad_norm": 4.372764587402344, + "learning_rate": 2.809146964812251e-06, + "loss": 0.1375, + "num_input_tokens_seen": 126462784, + "step": 40185 + }, + { + "epoch": 2.5728186415722423, + "grad_norm": 9.49170970916748, + "learning_rate": 2.80859259251963e-06, + "loss": 0.2106, + "num_input_tokens_seen": 126478336, + "step": 40190 + }, + { + "epoch": 2.5731387235132193, + "grad_norm": 27.58793067932129, + "learning_rate": 2.8080382048175026e-06, + "loss": 0.1834, + "num_input_tokens_seen": 126493184, + "step": 40195 + }, + { + "epoch": 2.5734588054541963, + "grad_norm": 15.270527839660645, + "learning_rate": 2.8074838017335515e-06, + "loss": 0.2348, + "num_input_tokens_seen": 126509824, + "step": 40200 + }, + { + "epoch": 2.5737788873951732, + "grad_norm": 7.008871078491211, + "learning_rate": 2.8069293832954613e-06, + "loss": 0.2725, + "num_input_tokens_seen": 126526080, + "step": 40205 + }, + { + "epoch": 2.5740989693361502, + "grad_norm": 25.55521583557129, + "learning_rate": 2.806374949530916e-06, + "loss": 0.2202, + "num_input_tokens_seen": 126541568, + "step": 40210 + }, + { + "epoch": 2.574419051277127, + "grad_norm": 16.17547035217285, + "learning_rate": 2.805820500467602e-06, + "loss": 0.2271, + "num_input_tokens_seen": 126556864, + "step": 40215 + }, + { + "epoch": 2.5747391332181038, + "grad_norm": 13.923786163330078, + "learning_rate": 2.8052660361332045e-06, + "loss": 0.2008, + "num_input_tokens_seen": 126573376, + "step": 40220 + }, + { + "epoch": 2.5750592151590808, + "grad_norm": 7.1095404624938965, + "learning_rate": 2.804711556555412e-06, + "loss": 0.1942, + "num_input_tokens_seen": 126591168, + "step": 40225 + }, + { + "epoch": 2.5753792971000578, + "grad_norm": 11.678969383239746, + "learning_rate": 2.8041570617619114e-06, + "loss": 0.2544, + "num_input_tokens_seen": 126606784, + "step": 40230 + }, + { + "epoch": 2.5756993790410343, + "grad_norm": 6.903146266937256, + "learning_rate": 2.8036025517803915e-06, + "loss": 0.1715, + "num_input_tokens_seen": 126622976, + "step": 40235 + }, + { + "epoch": 2.5760194609820113, + "grad_norm": 27.91892433166504, + "learning_rate": 2.8030480266385413e-06, + "loss": 0.2383, + "num_input_tokens_seen": 126638720, + "step": 40240 + }, + { + "epoch": 2.5763395429229883, + "grad_norm": 11.181923866271973, + "learning_rate": 2.8024934863640507e-06, + "loss": 0.1301, + "num_input_tokens_seen": 126654144, + "step": 40245 + }, + { + "epoch": 2.5766596248639653, + "grad_norm": 6.7241387367248535, + "learning_rate": 2.8019389309846116e-06, + "loss": 0.2135, + "num_input_tokens_seen": 126669056, + "step": 40250 + }, + { + "epoch": 2.5769797068049423, + "grad_norm": 33.2452278137207, + "learning_rate": 2.801384360527916e-06, + "loss": 0.2559, + "num_input_tokens_seen": 126683904, + "step": 40255 + }, + { + "epoch": 2.577299788745919, + "grad_norm": 12.045188903808594, + "learning_rate": 2.800829775021654e-06, + "loss": 0.1696, + "num_input_tokens_seen": 126699648, + "step": 40260 + }, + { + "epoch": 2.577619870686896, + "grad_norm": 9.842594146728516, + "learning_rate": 2.8002751744935204e-06, + "loss": 0.2481, + "num_input_tokens_seen": 126715072, + "step": 40265 + }, + { + "epoch": 2.577939952627873, + "grad_norm": 12.880126953125, + "learning_rate": 2.799720558971208e-06, + "loss": 0.1637, + "num_input_tokens_seen": 126731072, + "step": 40270 + }, + { + "epoch": 2.57826003456885, + "grad_norm": 13.627829551696777, + "learning_rate": 2.799165928482413e-06, + "loss": 0.332, + "num_input_tokens_seen": 126747072, + "step": 40275 + }, + { + "epoch": 2.5785801165098263, + "grad_norm": 11.241034507751465, + "learning_rate": 2.798611283054829e-06, + "loss": 0.1858, + "num_input_tokens_seen": 126762624, + "step": 40280 + }, + { + "epoch": 2.5789001984508033, + "grad_norm": 12.729536056518555, + "learning_rate": 2.798056622716153e-06, + "loss": 0.219, + "num_input_tokens_seen": 126778112, + "step": 40285 + }, + { + "epoch": 2.5792202803917803, + "grad_norm": 3.952590227127075, + "learning_rate": 2.797501947494083e-06, + "loss": 0.264, + "num_input_tokens_seen": 126794112, + "step": 40290 + }, + { + "epoch": 2.5795403623327573, + "grad_norm": 11.765562057495117, + "learning_rate": 2.7969472574163142e-06, + "loss": 0.2105, + "num_input_tokens_seen": 126808832, + "step": 40295 + }, + { + "epoch": 2.5798604442737343, + "grad_norm": 36.822662353515625, + "learning_rate": 2.7963925525105467e-06, + "loss": 0.205, + "num_input_tokens_seen": 126824960, + "step": 40300 + }, + { + "epoch": 2.580180526214711, + "grad_norm": 54.56272506713867, + "learning_rate": 2.7958378328044784e-06, + "loss": 0.1652, + "num_input_tokens_seen": 126839552, + "step": 40305 + }, + { + "epoch": 2.580500608155688, + "grad_norm": 8.125021934509277, + "learning_rate": 2.79528309832581e-06, + "loss": 0.2387, + "num_input_tokens_seen": 126855488, + "step": 40310 + }, + { + "epoch": 2.580820690096665, + "grad_norm": 81.59183502197266, + "learning_rate": 2.794728349102242e-06, + "loss": 0.1717, + "num_input_tokens_seen": 126872320, + "step": 40315 + }, + { + "epoch": 2.5811407720376414, + "grad_norm": 12.54194164276123, + "learning_rate": 2.7941735851614755e-06, + "loss": 0.357, + "num_input_tokens_seen": 126887680, + "step": 40320 + }, + { + "epoch": 2.5814608539786184, + "grad_norm": 6.209218978881836, + "learning_rate": 2.793618806531212e-06, + "loss": 0.1238, + "num_input_tokens_seen": 126904064, + "step": 40325 + }, + { + "epoch": 2.5817809359195953, + "grad_norm": 9.226082801818848, + "learning_rate": 2.7930640132391545e-06, + "loss": 0.2239, + "num_input_tokens_seen": 126919232, + "step": 40330 + }, + { + "epoch": 2.5821010178605723, + "grad_norm": 45.109100341796875, + "learning_rate": 2.7925092053130064e-06, + "loss": 0.2197, + "num_input_tokens_seen": 126934784, + "step": 40335 + }, + { + "epoch": 2.5824210998015493, + "grad_norm": 12.52566146850586, + "learning_rate": 2.7919543827804735e-06, + "loss": 0.3262, + "num_input_tokens_seen": 126950464, + "step": 40340 + }, + { + "epoch": 2.5827411817425263, + "grad_norm": 52.20455551147461, + "learning_rate": 2.791399545669259e-06, + "loss": 0.2917, + "num_input_tokens_seen": 126979456, + "step": 40345 + }, + { + "epoch": 2.583061263683503, + "grad_norm": 4.165820121765137, + "learning_rate": 2.7908446940070687e-06, + "loss": 0.2101, + "num_input_tokens_seen": 126995136, + "step": 40350 + }, + { + "epoch": 2.58338134562448, + "grad_norm": 7.075889587402344, + "learning_rate": 2.7902898278216095e-06, + "loss": 0.2366, + "num_input_tokens_seen": 127010816, + "step": 40355 + }, + { + "epoch": 2.583701427565457, + "grad_norm": 21.930522918701172, + "learning_rate": 2.7897349471405884e-06, + "loss": 0.2774, + "num_input_tokens_seen": 127026432, + "step": 40360 + }, + { + "epoch": 2.5840215095064334, + "grad_norm": 3.483004570007324, + "learning_rate": 2.789180051991713e-06, + "loss": 0.178, + "num_input_tokens_seen": 127042496, + "step": 40365 + }, + { + "epoch": 2.5843415914474104, + "grad_norm": 11.685827255249023, + "learning_rate": 2.7886251424026924e-06, + "loss": 0.2812, + "num_input_tokens_seen": 127058112, + "step": 40370 + }, + { + "epoch": 2.5846616733883874, + "grad_norm": 4.165320873260498, + "learning_rate": 2.7880702184012355e-06, + "loss": 0.1885, + "num_input_tokens_seen": 127073856, + "step": 40375 + }, + { + "epoch": 2.5849817553293644, + "grad_norm": 6.42250394821167, + "learning_rate": 2.787515280015052e-06, + "loss": 0.207, + "num_input_tokens_seen": 127089856, + "step": 40380 + }, + { + "epoch": 2.5853018372703414, + "grad_norm": 9.705294609069824, + "learning_rate": 2.786960327271853e-06, + "loss": 0.176, + "num_input_tokens_seen": 127104448, + "step": 40385 + }, + { + "epoch": 2.5856219192113183, + "grad_norm": 28.74517250061035, + "learning_rate": 2.7864053601993506e-06, + "loss": 0.2844, + "num_input_tokens_seen": 127120000, + "step": 40390 + }, + { + "epoch": 2.585942001152295, + "grad_norm": 33.97887420654297, + "learning_rate": 2.7858503788252562e-06, + "loss": 0.3363, + "num_input_tokens_seen": 127137024, + "step": 40395 + }, + { + "epoch": 2.586262083093272, + "grad_norm": 46.9144401550293, + "learning_rate": 2.785295383177283e-06, + "loss": 0.2154, + "num_input_tokens_seen": 127152384, + "step": 40400 + }, + { + "epoch": 2.586582165034249, + "grad_norm": 4.721399307250977, + "learning_rate": 2.784740373283144e-06, + "loss": 0.1823, + "num_input_tokens_seen": 127168192, + "step": 40405 + }, + { + "epoch": 2.5869022469752254, + "grad_norm": 29.36037254333496, + "learning_rate": 2.784185349170554e-06, + "loss": 0.2541, + "num_input_tokens_seen": 127184192, + "step": 40410 + }, + { + "epoch": 2.5872223289162024, + "grad_norm": 12.444198608398438, + "learning_rate": 2.783630310867228e-06, + "loss": 0.2417, + "num_input_tokens_seen": 127199360, + "step": 40415 + }, + { + "epoch": 2.5875424108571794, + "grad_norm": 12.96671199798584, + "learning_rate": 2.783075258400881e-06, + "loss": 0.2424, + "num_input_tokens_seen": 127214016, + "step": 40420 + }, + { + "epoch": 2.5878624927981564, + "grad_norm": 23.035634994506836, + "learning_rate": 2.782520191799231e-06, + "loss": 0.2632, + "num_input_tokens_seen": 127232000, + "step": 40425 + }, + { + "epoch": 2.5881825747391334, + "grad_norm": 5.271570205688477, + "learning_rate": 2.7819651110899943e-06, + "loss": 0.2933, + "num_input_tokens_seen": 127247872, + "step": 40430 + }, + { + "epoch": 2.5885026566801104, + "grad_norm": 6.744181156158447, + "learning_rate": 2.781410016300888e-06, + "loss": 0.3145, + "num_input_tokens_seen": 127264128, + "step": 40435 + }, + { + "epoch": 2.588822738621087, + "grad_norm": 15.992965698242188, + "learning_rate": 2.7808549074596318e-06, + "loss": 0.1872, + "num_input_tokens_seen": 127279296, + "step": 40440 + }, + { + "epoch": 2.589142820562064, + "grad_norm": 6.340536117553711, + "learning_rate": 2.780299784593944e-06, + "loss": 0.1627, + "num_input_tokens_seen": 127295232, + "step": 40445 + }, + { + "epoch": 2.589462902503041, + "grad_norm": 6.780928134918213, + "learning_rate": 2.7797446477315448e-06, + "loss": 0.2176, + "num_input_tokens_seen": 127311488, + "step": 40450 + }, + { + "epoch": 2.5897829844440174, + "grad_norm": 4.489713668823242, + "learning_rate": 2.7791894969001555e-06, + "loss": 0.2127, + "num_input_tokens_seen": 127326912, + "step": 40455 + }, + { + "epoch": 2.5901030663849944, + "grad_norm": 19.369049072265625, + "learning_rate": 2.7786343321274967e-06, + "loss": 0.2832, + "num_input_tokens_seen": 127341888, + "step": 40460 + }, + { + "epoch": 2.5904231483259714, + "grad_norm": 7.781414985656738, + "learning_rate": 2.778079153441291e-06, + "loss": 0.1455, + "num_input_tokens_seen": 127357120, + "step": 40465 + }, + { + "epoch": 2.5907432302669484, + "grad_norm": 5.6934814453125, + "learning_rate": 2.777523960869261e-06, + "loss": 0.3235, + "num_input_tokens_seen": 127373696, + "step": 40470 + }, + { + "epoch": 2.5910633122079254, + "grad_norm": 10.77383804321289, + "learning_rate": 2.776968754439129e-06, + "loss": 0.2108, + "num_input_tokens_seen": 127389440, + "step": 40475 + }, + { + "epoch": 2.591383394148902, + "grad_norm": 8.086007118225098, + "learning_rate": 2.7764135341786214e-06, + "loss": 0.2055, + "num_input_tokens_seen": 127404800, + "step": 40480 + }, + { + "epoch": 2.591703476089879, + "grad_norm": 7.190048694610596, + "learning_rate": 2.775858300115461e-06, + "loss": 0.2051, + "num_input_tokens_seen": 127419904, + "step": 40485 + }, + { + "epoch": 2.592023558030856, + "grad_norm": 23.326839447021484, + "learning_rate": 2.7753030522773745e-06, + "loss": 0.3448, + "num_input_tokens_seen": 127435200, + "step": 40490 + }, + { + "epoch": 2.592343639971833, + "grad_norm": 13.765336990356445, + "learning_rate": 2.774747790692087e-06, + "loss": 0.2214, + "num_input_tokens_seen": 127450496, + "step": 40495 + }, + { + "epoch": 2.5926637219128095, + "grad_norm": 6.078306198120117, + "learning_rate": 2.7741925153873263e-06, + "loss": 0.1552, + "num_input_tokens_seen": 127467328, + "step": 40500 + }, + { + "epoch": 2.5929838038537865, + "grad_norm": 6.100549697875977, + "learning_rate": 2.773637226390819e-06, + "loss": 0.2552, + "num_input_tokens_seen": 127483072, + "step": 40505 + }, + { + "epoch": 2.5933038857947635, + "grad_norm": 11.810319900512695, + "learning_rate": 2.7730819237302935e-06, + "loss": 0.2069, + "num_input_tokens_seen": 127499840, + "step": 40510 + }, + { + "epoch": 2.5936239677357404, + "grad_norm": 59.29362869262695, + "learning_rate": 2.7725266074334807e-06, + "loss": 0.2342, + "num_input_tokens_seen": 127514816, + "step": 40515 + }, + { + "epoch": 2.5939440496767174, + "grad_norm": 32.72431182861328, + "learning_rate": 2.771971277528108e-06, + "loss": 0.2927, + "num_input_tokens_seen": 127531072, + "step": 40520 + }, + { + "epoch": 2.594264131617694, + "grad_norm": 10.901833534240723, + "learning_rate": 2.771415934041906e-06, + "loss": 0.2973, + "num_input_tokens_seen": 127549952, + "step": 40525 + }, + { + "epoch": 2.594584213558671, + "grad_norm": 7.8463006019592285, + "learning_rate": 2.7708605770026064e-06, + "loss": 0.1886, + "num_input_tokens_seen": 127567872, + "step": 40530 + }, + { + "epoch": 2.594904295499648, + "grad_norm": 9.6135892868042, + "learning_rate": 2.7703052064379398e-06, + "loss": 0.15, + "num_input_tokens_seen": 127582528, + "step": 40535 + }, + { + "epoch": 2.595224377440625, + "grad_norm": 15.985236167907715, + "learning_rate": 2.76974982237564e-06, + "loss": 0.1754, + "num_input_tokens_seen": 127598144, + "step": 40540 + }, + { + "epoch": 2.5955444593816015, + "grad_norm": 12.98289966583252, + "learning_rate": 2.7691944248434388e-06, + "loss": 0.2159, + "num_input_tokens_seen": 127615360, + "step": 40545 + }, + { + "epoch": 2.5958645413225785, + "grad_norm": 16.955551147460938, + "learning_rate": 2.7686390138690706e-06, + "loss": 0.2137, + "num_input_tokens_seen": 127629888, + "step": 40550 + }, + { + "epoch": 2.5961846232635555, + "grad_norm": 17.47580909729004, + "learning_rate": 2.7680835894802693e-06, + "loss": 0.2748, + "num_input_tokens_seen": 127644224, + "step": 40555 + }, + { + "epoch": 2.5965047052045325, + "grad_norm": 10.271271705627441, + "learning_rate": 2.767528151704769e-06, + "loss": 0.2182, + "num_input_tokens_seen": 127659904, + "step": 40560 + }, + { + "epoch": 2.5968247871455095, + "grad_norm": 13.51440143585205, + "learning_rate": 2.766972700570306e-06, + "loss": 0.2189, + "num_input_tokens_seen": 127675904, + "step": 40565 + }, + { + "epoch": 2.597144869086486, + "grad_norm": 10.583700180053711, + "learning_rate": 2.766417236104618e-06, + "loss": 0.2486, + "num_input_tokens_seen": 127691904, + "step": 40570 + }, + { + "epoch": 2.597464951027463, + "grad_norm": 154.47483825683594, + "learning_rate": 2.765861758335441e-06, + "loss": 0.3322, + "num_input_tokens_seen": 127707264, + "step": 40575 + }, + { + "epoch": 2.59778503296844, + "grad_norm": 8.717446327209473, + "learning_rate": 2.765306267290513e-06, + "loss": 0.2168, + "num_input_tokens_seen": 127724480, + "step": 40580 + }, + { + "epoch": 2.5981051149094165, + "grad_norm": 21.650257110595703, + "learning_rate": 2.764750762997571e-06, + "loss": 0.163, + "num_input_tokens_seen": 127740736, + "step": 40585 + }, + { + "epoch": 2.5984251968503935, + "grad_norm": 6.200979232788086, + "learning_rate": 2.764195245484355e-06, + "loss": 0.1926, + "num_input_tokens_seen": 127756480, + "step": 40590 + }, + { + "epoch": 2.5987452787913705, + "grad_norm": 9.081363677978516, + "learning_rate": 2.7636397147786053e-06, + "loss": 0.174, + "num_input_tokens_seen": 127772352, + "step": 40595 + }, + { + "epoch": 2.5990653607323475, + "grad_norm": 33.96547317504883, + "learning_rate": 2.7630841709080615e-06, + "loss": 0.2995, + "num_input_tokens_seen": 127787136, + "step": 40600 + }, + { + "epoch": 2.5993854426733245, + "grad_norm": 15.360040664672852, + "learning_rate": 2.7625286139004635e-06, + "loss": 0.249, + "num_input_tokens_seen": 127803840, + "step": 40605 + }, + { + "epoch": 2.5997055246143015, + "grad_norm": 10.759800910949707, + "learning_rate": 2.761973043783555e-06, + "loss": 0.1906, + "num_input_tokens_seen": 127819712, + "step": 40610 + }, + { + "epoch": 2.600025606555278, + "grad_norm": 35.02130126953125, + "learning_rate": 2.7614174605850776e-06, + "loss": 0.3053, + "num_input_tokens_seen": 127835328, + "step": 40615 + }, + { + "epoch": 2.600345688496255, + "grad_norm": 10.564120292663574, + "learning_rate": 2.7608618643327724e-06, + "loss": 0.3161, + "num_input_tokens_seen": 127851904, + "step": 40620 + }, + { + "epoch": 2.600665770437232, + "grad_norm": 6.4356560707092285, + "learning_rate": 2.7603062550543846e-06, + "loss": 0.1776, + "num_input_tokens_seen": 127866624, + "step": 40625 + }, + { + "epoch": 2.6009858523782086, + "grad_norm": 8.131680488586426, + "learning_rate": 2.759750632777659e-06, + "loss": 0.3068, + "num_input_tokens_seen": 127881664, + "step": 40630 + }, + { + "epoch": 2.6013059343191856, + "grad_norm": 7.348508358001709, + "learning_rate": 2.75919499753034e-06, + "loss": 0.1777, + "num_input_tokens_seen": 127896704, + "step": 40635 + }, + { + "epoch": 2.6016260162601625, + "grad_norm": 8.605745315551758, + "learning_rate": 2.758639349340173e-06, + "loss": 0.2716, + "num_input_tokens_seen": 127915264, + "step": 40640 + }, + { + "epoch": 2.6019460982011395, + "grad_norm": 58.789798736572266, + "learning_rate": 2.758083688234904e-06, + "loss": 0.2362, + "num_input_tokens_seen": 127931264, + "step": 40645 + }, + { + "epoch": 2.6022661801421165, + "grad_norm": 13.237247467041016, + "learning_rate": 2.757528014242279e-06, + "loss": 0.1817, + "num_input_tokens_seen": 127948032, + "step": 40650 + }, + { + "epoch": 2.6025862620830935, + "grad_norm": 6.363697052001953, + "learning_rate": 2.7569723273900474e-06, + "loss": 0.2391, + "num_input_tokens_seen": 127965312, + "step": 40655 + }, + { + "epoch": 2.60290634402407, + "grad_norm": 7.637840747833252, + "learning_rate": 2.756416627705956e-06, + "loss": 0.1652, + "num_input_tokens_seen": 127980544, + "step": 40660 + }, + { + "epoch": 2.603226425965047, + "grad_norm": 10.211756706237793, + "learning_rate": 2.755860915217754e-06, + "loss": 0.1585, + "num_input_tokens_seen": 127996224, + "step": 40665 + }, + { + "epoch": 2.603546507906024, + "grad_norm": 7.1551384925842285, + "learning_rate": 2.75530518995319e-06, + "loss": 0.163, + "num_input_tokens_seen": 128012608, + "step": 40670 + }, + { + "epoch": 2.6038665898470006, + "grad_norm": 24.42405128479004, + "learning_rate": 2.754749451940015e-06, + "loss": 0.3127, + "num_input_tokens_seen": 128027584, + "step": 40675 + }, + { + "epoch": 2.6041866717879776, + "grad_norm": 4.584772109985352, + "learning_rate": 2.7541937012059787e-06, + "loss": 0.2108, + "num_input_tokens_seen": 128043648, + "step": 40680 + }, + { + "epoch": 2.6045067537289546, + "grad_norm": 16.539257049560547, + "learning_rate": 2.753637937778834e-06, + "loss": 0.2278, + "num_input_tokens_seen": 128058880, + "step": 40685 + }, + { + "epoch": 2.6048268356699316, + "grad_norm": 8.23320198059082, + "learning_rate": 2.7530821616863317e-06, + "loss": 0.2063, + "num_input_tokens_seen": 128075264, + "step": 40690 + }, + { + "epoch": 2.6051469176109086, + "grad_norm": 70.27928161621094, + "learning_rate": 2.7525263729562247e-06, + "loss": 0.2888, + "num_input_tokens_seen": 128091904, + "step": 40695 + }, + { + "epoch": 2.6054669995518855, + "grad_norm": 10.534892082214355, + "learning_rate": 2.7519705716162653e-06, + "loss": 0.2072, + "num_input_tokens_seen": 128106560, + "step": 40700 + }, + { + "epoch": 2.605787081492862, + "grad_norm": 15.83415412902832, + "learning_rate": 2.751414757694208e-06, + "loss": 0.1759, + "num_input_tokens_seen": 128122496, + "step": 40705 + }, + { + "epoch": 2.606107163433839, + "grad_norm": 11.459822654724121, + "learning_rate": 2.750858931217808e-06, + "loss": 0.2457, + "num_input_tokens_seen": 128136960, + "step": 40710 + }, + { + "epoch": 2.606427245374816, + "grad_norm": 31.125951766967773, + "learning_rate": 2.7503030922148194e-06, + "loss": 0.1815, + "num_input_tokens_seen": 128153472, + "step": 40715 + }, + { + "epoch": 2.6067473273157926, + "grad_norm": 7.751552581787109, + "learning_rate": 2.7497472407129984e-06, + "loss": 0.2602, + "num_input_tokens_seen": 128169664, + "step": 40720 + }, + { + "epoch": 2.6070674092567696, + "grad_norm": 3.3892009258270264, + "learning_rate": 2.749191376740101e-06, + "loss": 0.1712, + "num_input_tokens_seen": 128186240, + "step": 40725 + }, + { + "epoch": 2.6073874911977466, + "grad_norm": 8.716469764709473, + "learning_rate": 2.748635500323883e-06, + "loss": 0.2503, + "num_input_tokens_seen": 128201216, + "step": 40730 + }, + { + "epoch": 2.6077075731387236, + "grad_norm": 9.73806381225586, + "learning_rate": 2.7480796114921044e-06, + "loss": 0.2093, + "num_input_tokens_seen": 128217152, + "step": 40735 + }, + { + "epoch": 2.6080276550797006, + "grad_norm": 10.579660415649414, + "learning_rate": 2.7475237102725216e-06, + "loss": 0.2199, + "num_input_tokens_seen": 128232576, + "step": 40740 + }, + { + "epoch": 2.6083477370206776, + "grad_norm": 55.89817810058594, + "learning_rate": 2.7469677966928943e-06, + "loss": 0.2027, + "num_input_tokens_seen": 128247360, + "step": 40745 + }, + { + "epoch": 2.608667818961654, + "grad_norm": 12.878406524658203, + "learning_rate": 2.746411870780982e-06, + "loss": 0.3396, + "num_input_tokens_seen": 128262976, + "step": 40750 + }, + { + "epoch": 2.608987900902631, + "grad_norm": 8.279939651489258, + "learning_rate": 2.745855932564544e-06, + "loss": 0.2575, + "num_input_tokens_seen": 128278528, + "step": 40755 + }, + { + "epoch": 2.609307982843608, + "grad_norm": 106.87232208251953, + "learning_rate": 2.7452999820713417e-06, + "loss": 0.3572, + "num_input_tokens_seen": 128293952, + "step": 40760 + }, + { + "epoch": 2.6096280647845846, + "grad_norm": 18.329801559448242, + "learning_rate": 2.7447440193291353e-06, + "loss": 0.2125, + "num_input_tokens_seen": 128310208, + "step": 40765 + }, + { + "epoch": 2.6099481467255616, + "grad_norm": 6.093523025512695, + "learning_rate": 2.744188044365688e-06, + "loss": 0.219, + "num_input_tokens_seen": 128325376, + "step": 40770 + }, + { + "epoch": 2.6102682286665386, + "grad_norm": 12.53476619720459, + "learning_rate": 2.7436320572087614e-06, + "loss": 0.1755, + "num_input_tokens_seen": 128341952, + "step": 40775 + }, + { + "epoch": 2.6105883106075156, + "grad_norm": 28.095491409301758, + "learning_rate": 2.743076057886119e-06, + "loss": 0.2195, + "num_input_tokens_seen": 128358528, + "step": 40780 + }, + { + "epoch": 2.6109083925484926, + "grad_norm": 10.997230529785156, + "learning_rate": 2.7425200464255253e-06, + "loss": 0.2159, + "num_input_tokens_seen": 128373568, + "step": 40785 + }, + { + "epoch": 2.611228474489469, + "grad_norm": 10.26932144165039, + "learning_rate": 2.741964022854742e-06, + "loss": 0.2201, + "num_input_tokens_seen": 128389056, + "step": 40790 + }, + { + "epoch": 2.611548556430446, + "grad_norm": 10.853693962097168, + "learning_rate": 2.7414079872015367e-06, + "loss": 0.2496, + "num_input_tokens_seen": 128404160, + "step": 40795 + }, + { + "epoch": 2.611868638371423, + "grad_norm": 11.536881446838379, + "learning_rate": 2.740851939493674e-06, + "loss": 0.2125, + "num_input_tokens_seen": 128419904, + "step": 40800 + }, + { + "epoch": 2.6121887203124, + "grad_norm": 40.72044372558594, + "learning_rate": 2.7402958797589197e-06, + "loss": 0.2583, + "num_input_tokens_seen": 128435392, + "step": 40805 + }, + { + "epoch": 2.6125088022533767, + "grad_norm": 5.257191181182861, + "learning_rate": 2.739739808025041e-06, + "loss": 0.2339, + "num_input_tokens_seen": 128450880, + "step": 40810 + }, + { + "epoch": 2.6128288841943537, + "grad_norm": 49.51536560058594, + "learning_rate": 2.7391837243198056e-06, + "loss": 0.3018, + "num_input_tokens_seen": 128468096, + "step": 40815 + }, + { + "epoch": 2.6131489661353307, + "grad_norm": 7.488208770751953, + "learning_rate": 2.7386276286709795e-06, + "loss": 0.2648, + "num_input_tokens_seen": 128483904, + "step": 40820 + }, + { + "epoch": 2.6134690480763076, + "grad_norm": 6.391339302062988, + "learning_rate": 2.7380715211063336e-06, + "loss": 0.304, + "num_input_tokens_seen": 128499648, + "step": 40825 + }, + { + "epoch": 2.6137891300172846, + "grad_norm": 9.526755332946777, + "learning_rate": 2.7375154016536357e-06, + "loss": 0.196, + "num_input_tokens_seen": 128515136, + "step": 40830 + }, + { + "epoch": 2.614109211958261, + "grad_norm": 8.514233589172363, + "learning_rate": 2.736959270340656e-06, + "loss": 0.24, + "num_input_tokens_seen": 128529728, + "step": 40835 + }, + { + "epoch": 2.614429293899238, + "grad_norm": 49.584815979003906, + "learning_rate": 2.736403127195165e-06, + "loss": 0.2053, + "num_input_tokens_seen": 128545472, + "step": 40840 + }, + { + "epoch": 2.614749375840215, + "grad_norm": 6.12732458114624, + "learning_rate": 2.735846972244932e-06, + "loss": 0.1457, + "num_input_tokens_seen": 128560384, + "step": 40845 + }, + { + "epoch": 2.6150694577811917, + "grad_norm": 7.186172008514404, + "learning_rate": 2.7352908055177306e-06, + "loss": 0.2336, + "num_input_tokens_seen": 128576448, + "step": 40850 + }, + { + "epoch": 2.6153895397221687, + "grad_norm": 11.568062782287598, + "learning_rate": 2.7347346270413316e-06, + "loss": 0.278, + "num_input_tokens_seen": 128591616, + "step": 40855 + }, + { + "epoch": 2.6157096216631457, + "grad_norm": 8.793776512145996, + "learning_rate": 2.7341784368435086e-06, + "loss": 0.1925, + "num_input_tokens_seen": 128606720, + "step": 40860 + }, + { + "epoch": 2.6160297036041227, + "grad_norm": 20.258647918701172, + "learning_rate": 2.7336222349520336e-06, + "loss": 0.2862, + "num_input_tokens_seen": 128622336, + "step": 40865 + }, + { + "epoch": 2.6163497855450997, + "grad_norm": 15.353017807006836, + "learning_rate": 2.733066021394682e-06, + "loss": 0.3135, + "num_input_tokens_seen": 128637568, + "step": 40870 + }, + { + "epoch": 2.6166698674860767, + "grad_norm": 4.902554035186768, + "learning_rate": 2.7325097961992264e-06, + "loss": 0.1468, + "num_input_tokens_seen": 128652672, + "step": 40875 + }, + { + "epoch": 2.616989949427053, + "grad_norm": 5.0793962478637695, + "learning_rate": 2.7319535593934426e-06, + "loss": 0.196, + "num_input_tokens_seen": 128667968, + "step": 40880 + }, + { + "epoch": 2.61731003136803, + "grad_norm": 23.257598876953125, + "learning_rate": 2.7313973110051067e-06, + "loss": 0.268, + "num_input_tokens_seen": 128684288, + "step": 40885 + }, + { + "epoch": 2.617630113309007, + "grad_norm": 10.901030540466309, + "learning_rate": 2.7308410510619945e-06, + "loss": 0.2837, + "num_input_tokens_seen": 128698816, + "step": 40890 + }, + { + "epoch": 2.6179501952499837, + "grad_norm": 4.125280857086182, + "learning_rate": 2.7302847795918824e-06, + "loss": 0.2696, + "num_input_tokens_seen": 128714816, + "step": 40895 + }, + { + "epoch": 2.6182702771909607, + "grad_norm": 56.804595947265625, + "learning_rate": 2.729728496622548e-06, + "loss": 0.2631, + "num_input_tokens_seen": 128729536, + "step": 40900 + }, + { + "epoch": 2.6185903591319377, + "grad_norm": 13.326336860656738, + "learning_rate": 2.7291722021817684e-06, + "loss": 0.2565, + "num_input_tokens_seen": 128744576, + "step": 40905 + }, + { + "epoch": 2.6189104410729147, + "grad_norm": 7.280146598815918, + "learning_rate": 2.728615896297323e-06, + "loss": 0.209, + "num_input_tokens_seen": 128760192, + "step": 40910 + }, + { + "epoch": 2.6192305230138917, + "grad_norm": 42.96177673339844, + "learning_rate": 2.728059578996991e-06, + "loss": 0.1708, + "num_input_tokens_seen": 128774656, + "step": 40915 + }, + { + "epoch": 2.6195506049548687, + "grad_norm": 72.85982513427734, + "learning_rate": 2.7275032503085515e-06, + "loss": 0.1691, + "num_input_tokens_seen": 128791360, + "step": 40920 + }, + { + "epoch": 2.6198706868958452, + "grad_norm": 16.489320755004883, + "learning_rate": 2.7269469102597844e-06, + "loss": 0.2619, + "num_input_tokens_seen": 128806912, + "step": 40925 + }, + { + "epoch": 2.6201907688368222, + "grad_norm": 8.56832218170166, + "learning_rate": 2.726390558878471e-06, + "loss": 0.1432, + "num_input_tokens_seen": 128824128, + "step": 40930 + }, + { + "epoch": 2.620510850777799, + "grad_norm": 7.569084167480469, + "learning_rate": 2.7258341961923923e-06, + "loss": 0.2237, + "num_input_tokens_seen": 128840384, + "step": 40935 + }, + { + "epoch": 2.6208309327187758, + "grad_norm": 0.770358681678772, + "learning_rate": 2.72527782222933e-06, + "loss": 0.1387, + "num_input_tokens_seen": 128857792, + "step": 40940 + }, + { + "epoch": 2.6211510146597528, + "grad_norm": 8.218961715698242, + "learning_rate": 2.724721437017067e-06, + "loss": 0.2324, + "num_input_tokens_seen": 128874176, + "step": 40945 + }, + { + "epoch": 2.6214710966007297, + "grad_norm": 36.262454986572266, + "learning_rate": 2.724165040583386e-06, + "loss": 0.203, + "num_input_tokens_seen": 128889920, + "step": 40950 + }, + { + "epoch": 2.6217911785417067, + "grad_norm": 8.93173599243164, + "learning_rate": 2.7236086329560703e-06, + "loss": 0.2569, + "num_input_tokens_seen": 128905088, + "step": 40955 + }, + { + "epoch": 2.6221112604826837, + "grad_norm": 13.128573417663574, + "learning_rate": 2.723052214162904e-06, + "loss": 0.2041, + "num_input_tokens_seen": 128920960, + "step": 40960 + }, + { + "epoch": 2.6224313424236607, + "grad_norm": 6.999279022216797, + "learning_rate": 2.7224957842316717e-06, + "loss": 0.1669, + "num_input_tokens_seen": 128935616, + "step": 40965 + }, + { + "epoch": 2.6227514243646373, + "grad_norm": 24.057937622070312, + "learning_rate": 2.721939343190159e-06, + "loss": 0.2564, + "num_input_tokens_seen": 128952448, + "step": 40970 + }, + { + "epoch": 2.6230715063056143, + "grad_norm": 13.843546867370605, + "learning_rate": 2.721382891066152e-06, + "loss": 0.1982, + "num_input_tokens_seen": 128967744, + "step": 40975 + }, + { + "epoch": 2.6233915882465912, + "grad_norm": 10.452146530151367, + "learning_rate": 2.720826427887437e-06, + "loss": 0.2127, + "num_input_tokens_seen": 128983168, + "step": 40980 + }, + { + "epoch": 2.623711670187568, + "grad_norm": 9.795985221862793, + "learning_rate": 2.7202699536818e-06, + "loss": 0.2716, + "num_input_tokens_seen": 128998272, + "step": 40985 + }, + { + "epoch": 2.624031752128545, + "grad_norm": 19.42143440246582, + "learning_rate": 2.7197134684770288e-06, + "loss": 0.2002, + "num_input_tokens_seen": 129013888, + "step": 40990 + }, + { + "epoch": 2.6243518340695218, + "grad_norm": 61.839073181152344, + "learning_rate": 2.719156972300911e-06, + "loss": 0.2078, + "num_input_tokens_seen": 129029376, + "step": 40995 + }, + { + "epoch": 2.6246719160104988, + "grad_norm": 11.433079719543457, + "learning_rate": 2.718600465181236e-06, + "loss": 0.2925, + "num_input_tokens_seen": 129045888, + "step": 41000 + }, + { + "epoch": 2.6249919979514758, + "grad_norm": 12.30956745147705, + "learning_rate": 2.718043947145792e-06, + "loss": 0.1804, + "num_input_tokens_seen": 129060928, + "step": 41005 + }, + { + "epoch": 2.6253120798924527, + "grad_norm": 63.89870071411133, + "learning_rate": 2.71748741822237e-06, + "loss": 0.3181, + "num_input_tokens_seen": 129075776, + "step": 41010 + }, + { + "epoch": 2.6256321618334293, + "grad_norm": 6.882837295532227, + "learning_rate": 2.716930878438758e-06, + "loss": 0.1856, + "num_input_tokens_seen": 129092928, + "step": 41015 + }, + { + "epoch": 2.6259522437744063, + "grad_norm": 4.56889533996582, + "learning_rate": 2.716374327822748e-06, + "loss": 0.1736, + "num_input_tokens_seen": 129107776, + "step": 41020 + }, + { + "epoch": 2.6262723257153833, + "grad_norm": 48.188053131103516, + "learning_rate": 2.7158177664021316e-06, + "loss": 0.224, + "num_input_tokens_seen": 129123392, + "step": 41025 + }, + { + "epoch": 2.62659240765636, + "grad_norm": 14.864694595336914, + "learning_rate": 2.7152611942047e-06, + "loss": 0.2394, + "num_input_tokens_seen": 129139072, + "step": 41030 + }, + { + "epoch": 2.626912489597337, + "grad_norm": 4.846706867218018, + "learning_rate": 2.714704611258246e-06, + "loss": 0.1845, + "num_input_tokens_seen": 129153856, + "step": 41035 + }, + { + "epoch": 2.627232571538314, + "grad_norm": 25.14792823791504, + "learning_rate": 2.7141480175905615e-06, + "loss": 0.2173, + "num_input_tokens_seen": 129169920, + "step": 41040 + }, + { + "epoch": 2.627552653479291, + "grad_norm": 13.26449203491211, + "learning_rate": 2.713591413229441e-06, + "loss": 0.1824, + "num_input_tokens_seen": 129185344, + "step": 41045 + }, + { + "epoch": 2.627872735420268, + "grad_norm": 14.534130096435547, + "learning_rate": 2.713034798202677e-06, + "loss": 0.1922, + "num_input_tokens_seen": 129201152, + "step": 41050 + }, + { + "epoch": 2.6281928173612443, + "grad_norm": 28.712379455566406, + "learning_rate": 2.7124781725380657e-06, + "loss": 0.2191, + "num_input_tokens_seen": 129216384, + "step": 41055 + }, + { + "epoch": 2.6285128993022213, + "grad_norm": 10.999410629272461, + "learning_rate": 2.711921536263401e-06, + "loss": 0.1356, + "num_input_tokens_seen": 129232128, + "step": 41060 + }, + { + "epoch": 2.6288329812431983, + "grad_norm": 13.232422828674316, + "learning_rate": 2.711364889406479e-06, + "loss": 0.2034, + "num_input_tokens_seen": 129248512, + "step": 41065 + }, + { + "epoch": 2.6291530631841753, + "grad_norm": 9.054866790771484, + "learning_rate": 2.7108082319950946e-06, + "loss": 0.1895, + "num_input_tokens_seen": 129264448, + "step": 41070 + }, + { + "epoch": 2.629473145125152, + "grad_norm": 14.835942268371582, + "learning_rate": 2.710251564057045e-06, + "loss": 0.2898, + "num_input_tokens_seen": 129279296, + "step": 41075 + }, + { + "epoch": 2.629793227066129, + "grad_norm": 11.424516677856445, + "learning_rate": 2.709694885620128e-06, + "loss": 0.1662, + "num_input_tokens_seen": 129295552, + "step": 41080 + }, + { + "epoch": 2.630113309007106, + "grad_norm": 44.51874542236328, + "learning_rate": 2.7091381967121405e-06, + "loss": 0.2313, + "num_input_tokens_seen": 129310720, + "step": 41085 + }, + { + "epoch": 2.630433390948083, + "grad_norm": 52.004207611083984, + "learning_rate": 2.708581497360881e-06, + "loss": 0.3258, + "num_input_tokens_seen": 129326400, + "step": 41090 + }, + { + "epoch": 2.63075347288906, + "grad_norm": 6.214739799499512, + "learning_rate": 2.708024787594148e-06, + "loss": 0.1606, + "num_input_tokens_seen": 129343232, + "step": 41095 + }, + { + "epoch": 2.6310735548300364, + "grad_norm": 10.116371154785156, + "learning_rate": 2.7074680674397408e-06, + "loss": 0.2334, + "num_input_tokens_seen": 129358080, + "step": 41100 + }, + { + "epoch": 2.6313936367710133, + "grad_norm": 61.83306121826172, + "learning_rate": 2.7069113369254584e-06, + "loss": 0.2308, + "num_input_tokens_seen": 129373120, + "step": 41105 + }, + { + "epoch": 2.6317137187119903, + "grad_norm": 8.271693229675293, + "learning_rate": 2.7063545960791015e-06, + "loss": 0.2637, + "num_input_tokens_seen": 129388800, + "step": 41110 + }, + { + "epoch": 2.6320338006529673, + "grad_norm": 17.583280563354492, + "learning_rate": 2.7057978449284716e-06, + "loss": 0.254, + "num_input_tokens_seen": 129405248, + "step": 41115 + }, + { + "epoch": 2.632353882593944, + "grad_norm": 9.994958877563477, + "learning_rate": 2.7052410835013694e-06, + "loss": 0.4129, + "num_input_tokens_seen": 129422784, + "step": 41120 + }, + { + "epoch": 2.632673964534921, + "grad_norm": 8.244126319885254, + "learning_rate": 2.7046843118255963e-06, + "loss": 0.2204, + "num_input_tokens_seen": 129438528, + "step": 41125 + }, + { + "epoch": 2.632994046475898, + "grad_norm": 37.524715423583984, + "learning_rate": 2.704127529928955e-06, + "loss": 0.2269, + "num_input_tokens_seen": 129453632, + "step": 41130 + }, + { + "epoch": 2.633314128416875, + "grad_norm": 6.37009334564209, + "learning_rate": 2.7035707378392473e-06, + "loss": 0.16, + "num_input_tokens_seen": 129469376, + "step": 41135 + }, + { + "epoch": 2.633634210357852, + "grad_norm": 8.75476360321045, + "learning_rate": 2.7030139355842783e-06, + "loss": 0.2356, + "num_input_tokens_seen": 129484928, + "step": 41140 + }, + { + "epoch": 2.6339542922988284, + "grad_norm": 9.234814643859863, + "learning_rate": 2.7024571231918495e-06, + "loss": 0.2529, + "num_input_tokens_seen": 129500544, + "step": 41145 + }, + { + "epoch": 2.6342743742398054, + "grad_norm": 9.56367301940918, + "learning_rate": 2.7019003006897675e-06, + "loss": 0.2412, + "num_input_tokens_seen": 129515904, + "step": 41150 + }, + { + "epoch": 2.6345944561807824, + "grad_norm": 8.775601387023926, + "learning_rate": 2.7013434681058363e-06, + "loss": 0.1943, + "num_input_tokens_seen": 129531328, + "step": 41155 + }, + { + "epoch": 2.634914538121759, + "grad_norm": 21.25328826904297, + "learning_rate": 2.700786625467861e-06, + "loss": 0.2205, + "num_input_tokens_seen": 129546752, + "step": 41160 + }, + { + "epoch": 2.635234620062736, + "grad_norm": 7.21071720123291, + "learning_rate": 2.700229772803647e-06, + "loss": 0.2483, + "num_input_tokens_seen": 129564096, + "step": 41165 + }, + { + "epoch": 2.635554702003713, + "grad_norm": 32.28436279296875, + "learning_rate": 2.6996729101410013e-06, + "loss": 0.2401, + "num_input_tokens_seen": 129580032, + "step": 41170 + }, + { + "epoch": 2.63587478394469, + "grad_norm": 7.8643670082092285, + "learning_rate": 2.6991160375077308e-06, + "loss": 0.2484, + "num_input_tokens_seen": 129594944, + "step": 41175 + }, + { + "epoch": 2.636194865885667, + "grad_norm": 26.603294372558594, + "learning_rate": 2.6985591549316424e-06, + "loss": 0.229, + "num_input_tokens_seen": 129610752, + "step": 41180 + }, + { + "epoch": 2.636514947826644, + "grad_norm": 20.833126068115234, + "learning_rate": 2.6980022624405446e-06, + "loss": 0.1819, + "num_input_tokens_seen": 129626112, + "step": 41185 + }, + { + "epoch": 2.6368350297676204, + "grad_norm": 4.844991683959961, + "learning_rate": 2.6974453600622442e-06, + "loss": 0.1814, + "num_input_tokens_seen": 129641408, + "step": 41190 + }, + { + "epoch": 2.6371551117085974, + "grad_norm": 136.17015075683594, + "learning_rate": 2.6968884478245515e-06, + "loss": 0.2203, + "num_input_tokens_seen": 129656448, + "step": 41195 + }, + { + "epoch": 2.6374751936495744, + "grad_norm": 25.756866455078125, + "learning_rate": 2.6963315257552754e-06, + "loss": 0.1885, + "num_input_tokens_seen": 129671488, + "step": 41200 + }, + { + "epoch": 2.637795275590551, + "grad_norm": 11.210677146911621, + "learning_rate": 2.6957745938822248e-06, + "loss": 0.3412, + "num_input_tokens_seen": 129688192, + "step": 41205 + }, + { + "epoch": 2.638115357531528, + "grad_norm": 11.489736557006836, + "learning_rate": 2.695217652233211e-06, + "loss": 0.2104, + "num_input_tokens_seen": 129705024, + "step": 41210 + }, + { + "epoch": 2.638435439472505, + "grad_norm": 13.48912525177002, + "learning_rate": 2.694660700836045e-06, + "loss": 0.2396, + "num_input_tokens_seen": 129719616, + "step": 41215 + }, + { + "epoch": 2.638755521413482, + "grad_norm": 4.801694869995117, + "learning_rate": 2.6941037397185375e-06, + "loss": 0.2503, + "num_input_tokens_seen": 129734848, + "step": 41220 + }, + { + "epoch": 2.639075603354459, + "grad_norm": 58.675514221191406, + "learning_rate": 2.6935467689085e-06, + "loss": 0.3266, + "num_input_tokens_seen": 129750784, + "step": 41225 + }, + { + "epoch": 2.639395685295436, + "grad_norm": 9.959197998046875, + "learning_rate": 2.6929897884337454e-06, + "loss": 0.2465, + "num_input_tokens_seen": 129766720, + "step": 41230 + }, + { + "epoch": 2.6397157672364124, + "grad_norm": 8.048118591308594, + "learning_rate": 2.6924327983220856e-06, + "loss": 0.2098, + "num_input_tokens_seen": 129784192, + "step": 41235 + }, + { + "epoch": 2.6400358491773894, + "grad_norm": 10.46900463104248, + "learning_rate": 2.6918757986013346e-06, + "loss": 0.2653, + "num_input_tokens_seen": 129799424, + "step": 41240 + }, + { + "epoch": 2.6403559311183664, + "grad_norm": 21.576087951660156, + "learning_rate": 2.6913187892993053e-06, + "loss": 0.2272, + "num_input_tokens_seen": 129814720, + "step": 41245 + }, + { + "epoch": 2.640676013059343, + "grad_norm": 7.272669792175293, + "learning_rate": 2.6907617704438115e-06, + "loss": 0.1359, + "num_input_tokens_seen": 129831040, + "step": 41250 + }, + { + "epoch": 2.64099609500032, + "grad_norm": 30.682748794555664, + "learning_rate": 2.69020474206267e-06, + "loss": 0.3712, + "num_input_tokens_seen": 129845440, + "step": 41255 + }, + { + "epoch": 2.641316176941297, + "grad_norm": 7.461791038513184, + "learning_rate": 2.6896477041836934e-06, + "loss": 0.2336, + "num_input_tokens_seen": 129861568, + "step": 41260 + }, + { + "epoch": 2.641636258882274, + "grad_norm": 28.393150329589844, + "learning_rate": 2.6890906568346987e-06, + "loss": 0.2963, + "num_input_tokens_seen": 129877056, + "step": 41265 + }, + { + "epoch": 2.641956340823251, + "grad_norm": 22.945831298828125, + "learning_rate": 2.6885336000435016e-06, + "loss": 0.2921, + "num_input_tokens_seen": 129891776, + "step": 41270 + }, + { + "epoch": 2.642276422764228, + "grad_norm": 9.879828453063965, + "learning_rate": 2.6879765338379183e-06, + "loss": 0.1619, + "num_input_tokens_seen": 129906816, + "step": 41275 + }, + { + "epoch": 2.6425965047052045, + "grad_norm": 12.679505348205566, + "learning_rate": 2.6874194582457657e-06, + "loss": 0.1129, + "num_input_tokens_seen": 129922368, + "step": 41280 + }, + { + "epoch": 2.6429165866461815, + "grad_norm": 8.366716384887695, + "learning_rate": 2.6868623732948617e-06, + "loss": 0.2401, + "num_input_tokens_seen": 129938624, + "step": 41285 + }, + { + "epoch": 2.6432366685871584, + "grad_norm": 11.185348510742188, + "learning_rate": 2.686305279013025e-06, + "loss": 0.1611, + "num_input_tokens_seen": 129953984, + "step": 41290 + }, + { + "epoch": 2.643556750528135, + "grad_norm": 10.340240478515625, + "learning_rate": 2.685748175428072e-06, + "loss": 0.2679, + "num_input_tokens_seen": 129969408, + "step": 41295 + }, + { + "epoch": 2.643876832469112, + "grad_norm": 4.414626121520996, + "learning_rate": 2.685191062567824e-06, + "loss": 0.1871, + "num_input_tokens_seen": 129984704, + "step": 41300 + }, + { + "epoch": 2.644196914410089, + "grad_norm": 8.661752700805664, + "learning_rate": 2.6846339404600973e-06, + "loss": 0.2274, + "num_input_tokens_seen": 130000704, + "step": 41305 + }, + { + "epoch": 2.644516996351066, + "grad_norm": 4.812618732452393, + "learning_rate": 2.6840768091327147e-06, + "loss": 0.2339, + "num_input_tokens_seen": 130015232, + "step": 41310 + }, + { + "epoch": 2.644837078292043, + "grad_norm": 11.8095703125, + "learning_rate": 2.6835196686134945e-06, + "loss": 0.2078, + "num_input_tokens_seen": 130030720, + "step": 41315 + }, + { + "epoch": 2.6451571602330195, + "grad_norm": 7.192386627197266, + "learning_rate": 2.682962518930258e-06, + "loss": 0.1687, + "num_input_tokens_seen": 130046272, + "step": 41320 + }, + { + "epoch": 2.6454772421739965, + "grad_norm": 67.81058502197266, + "learning_rate": 2.682405360110826e-06, + "loss": 0.2655, + "num_input_tokens_seen": 130061760, + "step": 41325 + }, + { + "epoch": 2.6457973241149735, + "grad_norm": 10.697776794433594, + "learning_rate": 2.6818481921830207e-06, + "loss": 0.1826, + "num_input_tokens_seen": 130077312, + "step": 41330 + }, + { + "epoch": 2.6461174060559505, + "grad_norm": 26.01933479309082, + "learning_rate": 2.681291015174664e-06, + "loss": 0.1732, + "num_input_tokens_seen": 130095808, + "step": 41335 + }, + { + "epoch": 2.646437487996927, + "grad_norm": 14.045877456665039, + "learning_rate": 2.680733829113578e-06, + "loss": 0.2907, + "num_input_tokens_seen": 130110784, + "step": 41340 + }, + { + "epoch": 2.646757569937904, + "grad_norm": 65.57521057128906, + "learning_rate": 2.6801766340275865e-06, + "loss": 0.2794, + "num_input_tokens_seen": 130126400, + "step": 41345 + }, + { + "epoch": 2.647077651878881, + "grad_norm": 12.66614818572998, + "learning_rate": 2.6796194299445123e-06, + "loss": 0.2194, + "num_input_tokens_seen": 130142208, + "step": 41350 + }, + { + "epoch": 2.647397733819858, + "grad_norm": 12.766860008239746, + "learning_rate": 2.6790622168921794e-06, + "loss": 0.2653, + "num_input_tokens_seen": 130157504, + "step": 41355 + }, + { + "epoch": 2.647717815760835, + "grad_norm": 18.106754302978516, + "learning_rate": 2.6785049948984122e-06, + "loss": 0.158, + "num_input_tokens_seen": 130173376, + "step": 41360 + }, + { + "epoch": 2.6480378977018115, + "grad_norm": 13.253392219543457, + "learning_rate": 2.6779477639910344e-06, + "loss": 0.1953, + "num_input_tokens_seen": 130188800, + "step": 41365 + }, + { + "epoch": 2.6483579796427885, + "grad_norm": 4.1504082679748535, + "learning_rate": 2.677390524197873e-06, + "loss": 0.1425, + "num_input_tokens_seen": 130206592, + "step": 41370 + }, + { + "epoch": 2.6486780615837655, + "grad_norm": 13.193449974060059, + "learning_rate": 2.676833275546753e-06, + "loss": 0.1956, + "num_input_tokens_seen": 130224576, + "step": 41375 + }, + { + "epoch": 2.6489981435247425, + "grad_norm": 13.692056655883789, + "learning_rate": 2.6762760180654995e-06, + "loss": 0.1839, + "num_input_tokens_seen": 130241920, + "step": 41380 + }, + { + "epoch": 2.649318225465719, + "grad_norm": 31.05748176574707, + "learning_rate": 2.6757187517819406e-06, + "loss": 0.2494, + "num_input_tokens_seen": 130258624, + "step": 41385 + }, + { + "epoch": 2.649638307406696, + "grad_norm": 9.801410675048828, + "learning_rate": 2.675161476723902e-06, + "loss": 0.211, + "num_input_tokens_seen": 130273536, + "step": 41390 + }, + { + "epoch": 2.649958389347673, + "grad_norm": 11.570938110351562, + "learning_rate": 2.6746041929192113e-06, + "loss": 0.158, + "num_input_tokens_seen": 130288384, + "step": 41395 + }, + { + "epoch": 2.65027847128865, + "grad_norm": 23.455665588378906, + "learning_rate": 2.6740469003956974e-06, + "loss": 0.3643, + "num_input_tokens_seen": 130304192, + "step": 41400 + }, + { + "epoch": 2.650598553229627, + "grad_norm": 4.620269775390625, + "learning_rate": 2.6734895991811884e-06, + "loss": 0.1569, + "num_input_tokens_seen": 130319680, + "step": 41405 + }, + { + "epoch": 2.6509186351706036, + "grad_norm": 10.317648887634277, + "learning_rate": 2.6729322893035114e-06, + "loss": 0.1823, + "num_input_tokens_seen": 130334592, + "step": 41410 + }, + { + "epoch": 2.6512387171115805, + "grad_norm": 4.269364833831787, + "learning_rate": 2.6723749707904974e-06, + "loss": 0.1812, + "num_input_tokens_seen": 130350272, + "step": 41415 + }, + { + "epoch": 2.6515587990525575, + "grad_norm": 7.956858158111572, + "learning_rate": 2.6718176436699744e-06, + "loss": 0.2267, + "num_input_tokens_seen": 130364992, + "step": 41420 + }, + { + "epoch": 2.651878880993534, + "grad_norm": 7.068239212036133, + "learning_rate": 2.6712603079697742e-06, + "loss": 0.301, + "num_input_tokens_seen": 130381376, + "step": 41425 + }, + { + "epoch": 2.652198962934511, + "grad_norm": 13.887728691101074, + "learning_rate": 2.6707029637177257e-06, + "loss": 0.2851, + "num_input_tokens_seen": 130398016, + "step": 41430 + }, + { + "epoch": 2.652519044875488, + "grad_norm": 6.670989036560059, + "learning_rate": 2.6701456109416602e-06, + "loss": 0.1436, + "num_input_tokens_seen": 130413248, + "step": 41435 + }, + { + "epoch": 2.652839126816465, + "grad_norm": 15.932421684265137, + "learning_rate": 2.669588249669409e-06, + "loss": 0.2448, + "num_input_tokens_seen": 130427776, + "step": 41440 + }, + { + "epoch": 2.653159208757442, + "grad_norm": 8.407307624816895, + "learning_rate": 2.6690308799288043e-06, + "loss": 0.2274, + "num_input_tokens_seen": 130444288, + "step": 41445 + }, + { + "epoch": 2.653479290698419, + "grad_norm": 15.642078399658203, + "learning_rate": 2.668473501747679e-06, + "loss": 0.2413, + "num_input_tokens_seen": 130459456, + "step": 41450 + }, + { + "epoch": 2.6537993726393956, + "grad_norm": 19.933691024780273, + "learning_rate": 2.6679161151538623e-06, + "loss": 0.2521, + "num_input_tokens_seen": 130474880, + "step": 41455 + }, + { + "epoch": 2.6541194545803726, + "grad_norm": 20.992321014404297, + "learning_rate": 2.6673587201751912e-06, + "loss": 0.2512, + "num_input_tokens_seen": 130491136, + "step": 41460 + }, + { + "epoch": 2.6544395365213496, + "grad_norm": 8.718404769897461, + "learning_rate": 2.666801316839497e-06, + "loss": 0.1614, + "num_input_tokens_seen": 130507072, + "step": 41465 + }, + { + "epoch": 2.654759618462326, + "grad_norm": 12.257546424865723, + "learning_rate": 2.6662439051746136e-06, + "loss": 0.237, + "num_input_tokens_seen": 130522624, + "step": 41470 + }, + { + "epoch": 2.655079700403303, + "grad_norm": 7.759065628051758, + "learning_rate": 2.6656864852083758e-06, + "loss": 0.1732, + "num_input_tokens_seen": 130537536, + "step": 41475 + }, + { + "epoch": 2.65539978234428, + "grad_norm": 17.32001304626465, + "learning_rate": 2.6651290569686174e-06, + "loss": 0.3565, + "num_input_tokens_seen": 130552960, + "step": 41480 + }, + { + "epoch": 2.655719864285257, + "grad_norm": 11.723633766174316, + "learning_rate": 2.6645716204831745e-06, + "loss": 0.152, + "num_input_tokens_seen": 130569856, + "step": 41485 + }, + { + "epoch": 2.656039946226234, + "grad_norm": 14.423673629760742, + "learning_rate": 2.664014175779882e-06, + "loss": 0.2822, + "num_input_tokens_seen": 130585024, + "step": 41490 + }, + { + "epoch": 2.656360028167211, + "grad_norm": 6.802129745483398, + "learning_rate": 2.663456722886576e-06, + "loss": 0.2037, + "num_input_tokens_seen": 130600384, + "step": 41495 + }, + { + "epoch": 2.6566801101081876, + "grad_norm": 14.656618118286133, + "learning_rate": 2.6628992618310923e-06, + "loss": 0.3533, + "num_input_tokens_seen": 130616768, + "step": 41500 + }, + { + "epoch": 2.6570001920491646, + "grad_norm": 6.199748992919922, + "learning_rate": 2.6623417926412686e-06, + "loss": 0.204, + "num_input_tokens_seen": 130633536, + "step": 41505 + }, + { + "epoch": 2.6573202739901416, + "grad_norm": 5.870962619781494, + "learning_rate": 2.6617843153449407e-06, + "loss": 0.2205, + "num_input_tokens_seen": 130649152, + "step": 41510 + }, + { + "epoch": 2.657640355931118, + "grad_norm": 6.6146321296691895, + "learning_rate": 2.6612268299699474e-06, + "loss": 0.199, + "num_input_tokens_seen": 130665792, + "step": 41515 + }, + { + "epoch": 2.657960437872095, + "grad_norm": 67.77375030517578, + "learning_rate": 2.6606693365441254e-06, + "loss": 0.2726, + "num_input_tokens_seen": 130681472, + "step": 41520 + }, + { + "epoch": 2.658280519813072, + "grad_norm": 7.500676155090332, + "learning_rate": 2.660111835095314e-06, + "loss": 0.232, + "num_input_tokens_seen": 130697408, + "step": 41525 + }, + { + "epoch": 2.658600601754049, + "grad_norm": 7.132558345794678, + "learning_rate": 2.6595543256513515e-06, + "loss": 0.2411, + "num_input_tokens_seen": 130712512, + "step": 41530 + }, + { + "epoch": 2.658920683695026, + "grad_norm": 12.475085258483887, + "learning_rate": 2.658996808240077e-06, + "loss": 0.2569, + "num_input_tokens_seen": 130728704, + "step": 41535 + }, + { + "epoch": 2.659240765636003, + "grad_norm": 18.555421829223633, + "learning_rate": 2.65843928288933e-06, + "loss": 0.2186, + "num_input_tokens_seen": 130744064, + "step": 41540 + }, + { + "epoch": 2.6595608475769796, + "grad_norm": 6.650147914886475, + "learning_rate": 2.657881749626951e-06, + "loss": 0.1895, + "num_input_tokens_seen": 130760384, + "step": 41545 + }, + { + "epoch": 2.6598809295179566, + "grad_norm": 16.861391067504883, + "learning_rate": 2.6573242084807794e-06, + "loss": 0.2509, + "num_input_tokens_seen": 130776256, + "step": 41550 + }, + { + "epoch": 2.6602010114589336, + "grad_norm": 11.006488800048828, + "learning_rate": 2.656766659478656e-06, + "loss": 0.2283, + "num_input_tokens_seen": 130793024, + "step": 41555 + }, + { + "epoch": 2.66052109339991, + "grad_norm": 10.280050277709961, + "learning_rate": 2.6562091026484226e-06, + "loss": 0.1837, + "num_input_tokens_seen": 130808384, + "step": 41560 + }, + { + "epoch": 2.660841175340887, + "grad_norm": 7.439911842346191, + "learning_rate": 2.6556515380179203e-06, + "loss": 0.248, + "num_input_tokens_seen": 130822848, + "step": 41565 + }, + { + "epoch": 2.661161257281864, + "grad_norm": 11.340697288513184, + "learning_rate": 2.6550939656149906e-06, + "loss": 0.1602, + "num_input_tokens_seen": 130838784, + "step": 41570 + }, + { + "epoch": 2.661481339222841, + "grad_norm": 5.4996256828308105, + "learning_rate": 2.6545363854674765e-06, + "loss": 0.2316, + "num_input_tokens_seen": 130854208, + "step": 41575 + }, + { + "epoch": 2.661801421163818, + "grad_norm": 29.597749710083008, + "learning_rate": 2.6539787976032204e-06, + "loss": 0.2155, + "num_input_tokens_seen": 130869248, + "step": 41580 + }, + { + "epoch": 2.6621215031047947, + "grad_norm": 35.54854965209961, + "learning_rate": 2.653421202050065e-06, + "loss": 0.2503, + "num_input_tokens_seen": 130884480, + "step": 41585 + }, + { + "epoch": 2.6624415850457717, + "grad_norm": 95.33007049560547, + "learning_rate": 2.6528635988358537e-06, + "loss": 0.248, + "num_input_tokens_seen": 130899200, + "step": 41590 + }, + { + "epoch": 2.6627616669867487, + "grad_norm": 5.49475622177124, + "learning_rate": 2.6523059879884305e-06, + "loss": 0.1598, + "num_input_tokens_seen": 130914752, + "step": 41595 + }, + { + "epoch": 2.6630817489277256, + "grad_norm": 11.530165672302246, + "learning_rate": 2.65174836953564e-06, + "loss": 0.2021, + "num_input_tokens_seen": 130930176, + "step": 41600 + }, + { + "epoch": 2.663401830868702, + "grad_norm": 8.34245491027832, + "learning_rate": 2.651190743505326e-06, + "loss": 0.1484, + "num_input_tokens_seen": 130945472, + "step": 41605 + }, + { + "epoch": 2.663721912809679, + "grad_norm": 6.582840919494629, + "learning_rate": 2.6506331099253333e-06, + "loss": 0.2114, + "num_input_tokens_seen": 130960128, + "step": 41610 + }, + { + "epoch": 2.664041994750656, + "grad_norm": 5.434636116027832, + "learning_rate": 2.6500754688235085e-06, + "loss": 0.2003, + "num_input_tokens_seen": 130975424, + "step": 41615 + }, + { + "epoch": 2.664362076691633, + "grad_norm": 5.589023113250732, + "learning_rate": 2.6495178202276965e-06, + "loss": 0.2115, + "num_input_tokens_seen": 130990016, + "step": 41620 + }, + { + "epoch": 2.66468215863261, + "grad_norm": 5.58685302734375, + "learning_rate": 2.6489601641657426e-06, + "loss": 0.255, + "num_input_tokens_seen": 131006272, + "step": 41625 + }, + { + "epoch": 2.6650022405735867, + "grad_norm": 41.946598052978516, + "learning_rate": 2.6484025006654952e-06, + "loss": 0.1549, + "num_input_tokens_seen": 131022720, + "step": 41630 + }, + { + "epoch": 2.6653223225145637, + "grad_norm": 12.765965461730957, + "learning_rate": 2.6478448297547993e-06, + "loss": 0.1778, + "num_input_tokens_seen": 131037312, + "step": 41635 + }, + { + "epoch": 2.6656424044555407, + "grad_norm": 10.362780570983887, + "learning_rate": 2.6472871514615026e-06, + "loss": 0.1708, + "num_input_tokens_seen": 131052928, + "step": 41640 + }, + { + "epoch": 2.6659624863965177, + "grad_norm": 17.022829055786133, + "learning_rate": 2.646729465813453e-06, + "loss": 0.2943, + "num_input_tokens_seen": 131068544, + "step": 41645 + }, + { + "epoch": 2.666282568337494, + "grad_norm": 8.11144733428955, + "learning_rate": 2.6461717728384972e-06, + "loss": 0.2628, + "num_input_tokens_seen": 131083968, + "step": 41650 + }, + { + "epoch": 2.666602650278471, + "grad_norm": 15.27695083618164, + "learning_rate": 2.6456140725644856e-06, + "loss": 0.3024, + "num_input_tokens_seen": 131099648, + "step": 41655 + }, + { + "epoch": 2.666922732219448, + "grad_norm": 13.360663414001465, + "learning_rate": 2.6450563650192657e-06, + "loss": 0.2815, + "num_input_tokens_seen": 131114496, + "step": 41660 + }, + { + "epoch": 2.667242814160425, + "grad_norm": 8.705547332763672, + "learning_rate": 2.6444986502306863e-06, + "loss": 0.2378, + "num_input_tokens_seen": 131129472, + "step": 41665 + }, + { + "epoch": 2.667562896101402, + "grad_norm": 64.63578796386719, + "learning_rate": 2.643940928226596e-06, + "loss": 0.2157, + "num_input_tokens_seen": 131144128, + "step": 41670 + }, + { + "epoch": 2.6678829780423787, + "grad_norm": 10.427050590515137, + "learning_rate": 2.643383199034847e-06, + "loss": 0.2313, + "num_input_tokens_seen": 131160192, + "step": 41675 + }, + { + "epoch": 2.6682030599833557, + "grad_norm": 11.615039825439453, + "learning_rate": 2.6428254626832872e-06, + "loss": 0.2001, + "num_input_tokens_seen": 131176128, + "step": 41680 + }, + { + "epoch": 2.6685231419243327, + "grad_norm": 40.24650192260742, + "learning_rate": 2.6422677191997668e-06, + "loss": 0.3001, + "num_input_tokens_seen": 131193536, + "step": 41685 + }, + { + "epoch": 2.6688432238653093, + "grad_norm": 48.91459274291992, + "learning_rate": 2.641709968612139e-06, + "loss": 0.1506, + "num_input_tokens_seen": 131209152, + "step": 41690 + }, + { + "epoch": 2.6691633058062862, + "grad_norm": 14.808205604553223, + "learning_rate": 2.641152210948253e-06, + "loss": 0.4037, + "num_input_tokens_seen": 131224704, + "step": 41695 + }, + { + "epoch": 2.6694833877472632, + "grad_norm": 95.857666015625, + "learning_rate": 2.640594446235961e-06, + "loss": 0.1781, + "num_input_tokens_seen": 131240896, + "step": 41700 + }, + { + "epoch": 2.6698034696882402, + "grad_norm": 11.303804397583008, + "learning_rate": 2.6400366745031147e-06, + "loss": 0.2807, + "num_input_tokens_seen": 131256768, + "step": 41705 + }, + { + "epoch": 2.670123551629217, + "grad_norm": 5.088261604309082, + "learning_rate": 2.6394788957775657e-06, + "loss": 0.236, + "num_input_tokens_seen": 131271552, + "step": 41710 + }, + { + "epoch": 2.670443633570194, + "grad_norm": 4.405519962310791, + "learning_rate": 2.6389211100871677e-06, + "loss": 0.2688, + "num_input_tokens_seen": 131286784, + "step": 41715 + }, + { + "epoch": 2.6707637155111708, + "grad_norm": 4.6575446128845215, + "learning_rate": 2.6383633174597735e-06, + "loss": 0.1437, + "num_input_tokens_seen": 131302272, + "step": 41720 + }, + { + "epoch": 2.6710837974521477, + "grad_norm": 12.183914184570312, + "learning_rate": 2.6378055179232354e-06, + "loss": 0.2011, + "num_input_tokens_seen": 131319360, + "step": 41725 + }, + { + "epoch": 2.6714038793931247, + "grad_norm": 33.486751556396484, + "learning_rate": 2.6372477115054067e-06, + "loss": 0.2875, + "num_input_tokens_seen": 131335232, + "step": 41730 + }, + { + "epoch": 2.6717239613341013, + "grad_norm": 12.690851211547852, + "learning_rate": 2.6366898982341433e-06, + "loss": 0.3015, + "num_input_tokens_seen": 131350272, + "step": 41735 + }, + { + "epoch": 2.6720440432750783, + "grad_norm": 9.193899154663086, + "learning_rate": 2.636132078137298e-06, + "loss": 0.205, + "num_input_tokens_seen": 131367296, + "step": 41740 + }, + { + "epoch": 2.6723641252160553, + "grad_norm": 23.772052764892578, + "learning_rate": 2.635574251242726e-06, + "loss": 0.1604, + "num_input_tokens_seen": 131382080, + "step": 41745 + }, + { + "epoch": 2.6726842071570323, + "grad_norm": 16.26959991455078, + "learning_rate": 2.6350164175782816e-06, + "loss": 0.1906, + "num_input_tokens_seen": 131397632, + "step": 41750 + }, + { + "epoch": 2.6730042890980092, + "grad_norm": 60.894142150878906, + "learning_rate": 2.6344585771718214e-06, + "loss": 0.26, + "num_input_tokens_seen": 131414720, + "step": 41755 + }, + { + "epoch": 2.6733243710389862, + "grad_norm": 4.999109745025635, + "learning_rate": 2.6339007300512e-06, + "loss": 0.1476, + "num_input_tokens_seen": 131431104, + "step": 41760 + }, + { + "epoch": 2.673644452979963, + "grad_norm": 45.03144073486328, + "learning_rate": 2.6333428762442724e-06, + "loss": 0.2659, + "num_input_tokens_seen": 131446208, + "step": 41765 + }, + { + "epoch": 2.6739645349209398, + "grad_norm": 62.39839172363281, + "learning_rate": 2.6327850157788976e-06, + "loss": 0.2564, + "num_input_tokens_seen": 131462080, + "step": 41770 + }, + { + "epoch": 2.6742846168619168, + "grad_norm": 7.775589466094971, + "learning_rate": 2.6322271486829303e-06, + "loss": 0.2235, + "num_input_tokens_seen": 131477696, + "step": 41775 + }, + { + "epoch": 2.6746046988028933, + "grad_norm": 32.979793548583984, + "learning_rate": 2.631669274984228e-06, + "loss": 0.2871, + "num_input_tokens_seen": 131492672, + "step": 41780 + }, + { + "epoch": 2.6749247807438703, + "grad_norm": 7.424008369445801, + "learning_rate": 2.631111394710648e-06, + "loss": 0.1942, + "num_input_tokens_seen": 131509184, + "step": 41785 + }, + { + "epoch": 2.6752448626848473, + "grad_norm": 5.440722942352295, + "learning_rate": 2.6305535078900473e-06, + "loss": 0.2044, + "num_input_tokens_seen": 131524672, + "step": 41790 + }, + { + "epoch": 2.6755649446258243, + "grad_norm": 61.188133239746094, + "learning_rate": 2.629995614550285e-06, + "loss": 0.2911, + "num_input_tokens_seen": 131540800, + "step": 41795 + }, + { + "epoch": 2.6758850265668013, + "grad_norm": 31.71238136291504, + "learning_rate": 2.6294377147192186e-06, + "loss": 0.1487, + "num_input_tokens_seen": 131555520, + "step": 41800 + }, + { + "epoch": 2.6762051085077783, + "grad_norm": 25.608341217041016, + "learning_rate": 2.628879808424707e-06, + "loss": 0.2589, + "num_input_tokens_seen": 131571520, + "step": 41805 + }, + { + "epoch": 2.676525190448755, + "grad_norm": 38.590824127197266, + "learning_rate": 2.6283218956946096e-06, + "loss": 0.2274, + "num_input_tokens_seen": 131586304, + "step": 41810 + }, + { + "epoch": 2.676845272389732, + "grad_norm": 9.29341983795166, + "learning_rate": 2.6277639765567842e-06, + "loss": 0.2239, + "num_input_tokens_seen": 131601472, + "step": 41815 + }, + { + "epoch": 2.677165354330709, + "grad_norm": 36.762451171875, + "learning_rate": 2.627206051039092e-06, + "loss": 0.2437, + "num_input_tokens_seen": 131618176, + "step": 41820 + }, + { + "epoch": 2.6774854362716853, + "grad_norm": 10.889565467834473, + "learning_rate": 2.6266481191693915e-06, + "loss": 0.1405, + "num_input_tokens_seen": 131634368, + "step": 41825 + }, + { + "epoch": 2.6778055182126623, + "grad_norm": 28.15713882446289, + "learning_rate": 2.626090180975544e-06, + "loss": 0.3287, + "num_input_tokens_seen": 131649472, + "step": 41830 + }, + { + "epoch": 2.6781256001536393, + "grad_norm": 11.138851165771484, + "learning_rate": 2.62553223648541e-06, + "loss": 0.1763, + "num_input_tokens_seen": 131663552, + "step": 41835 + }, + { + "epoch": 2.6784456820946163, + "grad_norm": 15.188346862792969, + "learning_rate": 2.62497428572685e-06, + "loss": 0.2578, + "num_input_tokens_seen": 131677760, + "step": 41840 + }, + { + "epoch": 2.6787657640355933, + "grad_norm": 12.703963279724121, + "learning_rate": 2.6244163287277247e-06, + "loss": 0.3469, + "num_input_tokens_seen": 131693504, + "step": 41845 + }, + { + "epoch": 2.67908584597657, + "grad_norm": 7.853699207305908, + "learning_rate": 2.623858365515896e-06, + "loss": 0.163, + "num_input_tokens_seen": 131708992, + "step": 41850 + }, + { + "epoch": 2.679405927917547, + "grad_norm": 54.13878631591797, + "learning_rate": 2.6233003961192255e-06, + "loss": 0.2484, + "num_input_tokens_seen": 131724416, + "step": 41855 + }, + { + "epoch": 2.679726009858524, + "grad_norm": 8.601041793823242, + "learning_rate": 2.6227424205655762e-06, + "loss": 0.1891, + "num_input_tokens_seen": 131739264, + "step": 41860 + }, + { + "epoch": 2.680046091799501, + "grad_norm": 11.330079078674316, + "learning_rate": 2.6221844388828092e-06, + "loss": 0.2357, + "num_input_tokens_seen": 131755520, + "step": 41865 + }, + { + "epoch": 2.6803661737404774, + "grad_norm": 10.433127403259277, + "learning_rate": 2.621626451098788e-06, + "loss": 0.1739, + "num_input_tokens_seen": 131771840, + "step": 41870 + }, + { + "epoch": 2.6806862556814544, + "grad_norm": 7.0228352546691895, + "learning_rate": 2.621068457241375e-06, + "loss": 0.3005, + "num_input_tokens_seen": 131788032, + "step": 41875 + }, + { + "epoch": 2.6810063376224313, + "grad_norm": 7.00137996673584, + "learning_rate": 2.6205104573384344e-06, + "loss": 0.2631, + "num_input_tokens_seen": 131804416, + "step": 41880 + }, + { + "epoch": 2.6813264195634083, + "grad_norm": 43.33604049682617, + "learning_rate": 2.6199524514178285e-06, + "loss": 0.1928, + "num_input_tokens_seen": 131820096, + "step": 41885 + }, + { + "epoch": 2.6816465015043853, + "grad_norm": 3.5668110847473145, + "learning_rate": 2.619394439507423e-06, + "loss": 0.1743, + "num_input_tokens_seen": 131836032, + "step": 41890 + }, + { + "epoch": 2.681966583445362, + "grad_norm": 15.436537742614746, + "learning_rate": 2.618836421635081e-06, + "loss": 0.2193, + "num_input_tokens_seen": 131850816, + "step": 41895 + }, + { + "epoch": 2.682286665386339, + "grad_norm": 44.689327239990234, + "learning_rate": 2.618278397828666e-06, + "loss": 0.2378, + "num_input_tokens_seen": 131866112, + "step": 41900 + }, + { + "epoch": 2.682606747327316, + "grad_norm": 9.695342063903809, + "learning_rate": 2.6177203681160445e-06, + "loss": 0.2459, + "num_input_tokens_seen": 131881344, + "step": 41905 + }, + { + "epoch": 2.682926829268293, + "grad_norm": 19.30942153930664, + "learning_rate": 2.617162332525081e-06, + "loss": 0.2885, + "num_input_tokens_seen": 131896064, + "step": 41910 + }, + { + "epoch": 2.6832469112092694, + "grad_norm": 21.983108520507812, + "learning_rate": 2.6166042910836408e-06, + "loss": 0.1519, + "num_input_tokens_seen": 131915648, + "step": 41915 + }, + { + "epoch": 2.6835669931502464, + "grad_norm": 44.612762451171875, + "learning_rate": 2.6160462438195905e-06, + "loss": 0.2115, + "num_input_tokens_seen": 131930688, + "step": 41920 + }, + { + "epoch": 2.6838870750912234, + "grad_norm": 10.119032859802246, + "learning_rate": 2.6154881907607943e-06, + "loss": 0.2692, + "num_input_tokens_seen": 131946112, + "step": 41925 + }, + { + "epoch": 2.6842071570322004, + "grad_norm": 41.16634750366211, + "learning_rate": 2.61493013193512e-06, + "loss": 0.2683, + "num_input_tokens_seen": 131962304, + "step": 41930 + }, + { + "epoch": 2.6845272389731774, + "grad_norm": 11.267885208129883, + "learning_rate": 2.6143720673704337e-06, + "loss": 0.2109, + "num_input_tokens_seen": 131977664, + "step": 41935 + }, + { + "epoch": 2.684847320914154, + "grad_norm": 9.147860527038574, + "learning_rate": 2.613813997094601e-06, + "loss": 0.2304, + "num_input_tokens_seen": 131993024, + "step": 41940 + }, + { + "epoch": 2.685167402855131, + "grad_norm": 23.50022315979004, + "learning_rate": 2.6132559211354907e-06, + "loss": 0.2856, + "num_input_tokens_seen": 132008192, + "step": 41945 + }, + { + "epoch": 2.685487484796108, + "grad_norm": 11.449419021606445, + "learning_rate": 2.61269783952097e-06, + "loss": 0.2237, + "num_input_tokens_seen": 132023232, + "step": 41950 + }, + { + "epoch": 2.6858075667370844, + "grad_norm": 12.674370765686035, + "learning_rate": 2.612139752278906e-06, + "loss": 0.3079, + "num_input_tokens_seen": 132039552, + "step": 41955 + }, + { + "epoch": 2.6861276486780614, + "grad_norm": 6.699582576751709, + "learning_rate": 2.6115816594371674e-06, + "loss": 0.1774, + "num_input_tokens_seen": 132055168, + "step": 41960 + }, + { + "epoch": 2.6864477306190384, + "grad_norm": 17.275554656982422, + "learning_rate": 2.6110235610236207e-06, + "loss": 0.1585, + "num_input_tokens_seen": 132070464, + "step": 41965 + }, + { + "epoch": 2.6867678125600154, + "grad_norm": 36.31129837036133, + "learning_rate": 2.6104654570661357e-06, + "loss": 0.275, + "num_input_tokens_seen": 132086848, + "step": 41970 + }, + { + "epoch": 2.6870878945009924, + "grad_norm": 7.511320114135742, + "learning_rate": 2.6099073475925824e-06, + "loss": 0.3131, + "num_input_tokens_seen": 132100864, + "step": 41975 + }, + { + "epoch": 2.6874079764419694, + "grad_norm": 18.46245002746582, + "learning_rate": 2.609349232630828e-06, + "loss": 0.2236, + "num_input_tokens_seen": 132116672, + "step": 41980 + }, + { + "epoch": 2.687728058382946, + "grad_norm": 9.428324699401855, + "learning_rate": 2.6087911122087427e-06, + "loss": 0.287, + "num_input_tokens_seen": 132133632, + "step": 41985 + }, + { + "epoch": 2.688048140323923, + "grad_norm": 4.4608941078186035, + "learning_rate": 2.6082329863541956e-06, + "loss": 0.2718, + "num_input_tokens_seen": 132149376, + "step": 41990 + }, + { + "epoch": 2.6883682222649, + "grad_norm": 6.628734588623047, + "learning_rate": 2.607674855095057e-06, + "loss": 0.1877, + "num_input_tokens_seen": 132164736, + "step": 41995 + }, + { + "epoch": 2.6886883042058765, + "grad_norm": 14.931358337402344, + "learning_rate": 2.607116718459197e-06, + "loss": 0.2317, + "num_input_tokens_seen": 132179840, + "step": 42000 + }, + { + "epoch": 2.6890083861468534, + "grad_norm": 36.42599868774414, + "learning_rate": 2.606558576474486e-06, + "loss": 0.3007, + "num_input_tokens_seen": 132194624, + "step": 42005 + }, + { + "epoch": 2.6893284680878304, + "grad_norm": 8.173050880432129, + "learning_rate": 2.6060004291687953e-06, + "loss": 0.1507, + "num_input_tokens_seen": 132209728, + "step": 42010 + }, + { + "epoch": 2.6896485500288074, + "grad_norm": 25.537248611450195, + "learning_rate": 2.605442276569995e-06, + "loss": 0.1296, + "num_input_tokens_seen": 132225536, + "step": 42015 + }, + { + "epoch": 2.6899686319697844, + "grad_norm": 9.657831192016602, + "learning_rate": 2.604884118705956e-06, + "loss": 0.186, + "num_input_tokens_seen": 132240832, + "step": 42020 + }, + { + "epoch": 2.6902887139107614, + "grad_norm": 8.640597343444824, + "learning_rate": 2.6043259556045507e-06, + "loss": 0.1699, + "num_input_tokens_seen": 132256512, + "step": 42025 + }, + { + "epoch": 2.690608795851738, + "grad_norm": 17.45255470275879, + "learning_rate": 2.6037677872936508e-06, + "loss": 0.2736, + "num_input_tokens_seen": 132272000, + "step": 42030 + }, + { + "epoch": 2.690928877792715, + "grad_norm": 9.907628059387207, + "learning_rate": 2.603209613801128e-06, + "loss": 0.2222, + "num_input_tokens_seen": 132288000, + "step": 42035 + }, + { + "epoch": 2.691248959733692, + "grad_norm": 8.133905410766602, + "learning_rate": 2.6026514351548556e-06, + "loss": 0.2271, + "num_input_tokens_seen": 132305152, + "step": 42040 + }, + { + "epoch": 2.6915690416746685, + "grad_norm": 17.744625091552734, + "learning_rate": 2.602093251382704e-06, + "loss": 0.2093, + "num_input_tokens_seen": 132320768, + "step": 42045 + }, + { + "epoch": 2.6918891236156455, + "grad_norm": 5.976108074188232, + "learning_rate": 2.601535062512548e-06, + "loss": 0.1654, + "num_input_tokens_seen": 132336256, + "step": 42050 + }, + { + "epoch": 2.6922092055566225, + "grad_norm": 10.052351951599121, + "learning_rate": 2.600976868572259e-06, + "loss": 0.1773, + "num_input_tokens_seen": 132351040, + "step": 42055 + }, + { + "epoch": 2.6925292874975995, + "grad_norm": 8.653334617614746, + "learning_rate": 2.600418669589712e-06, + "loss": 0.2258, + "num_input_tokens_seen": 132366336, + "step": 42060 + }, + { + "epoch": 2.6928493694385764, + "grad_norm": 63.922637939453125, + "learning_rate": 2.5998604655927794e-06, + "loss": 0.1983, + "num_input_tokens_seen": 132380864, + "step": 42065 + }, + { + "epoch": 2.6931694513795534, + "grad_norm": 9.04688549041748, + "learning_rate": 2.599302256609335e-06, + "loss": 0.2047, + "num_input_tokens_seen": 132397248, + "step": 42070 + }, + { + "epoch": 2.69348953332053, + "grad_norm": 9.838821411132812, + "learning_rate": 2.5987440426672537e-06, + "loss": 0.1606, + "num_input_tokens_seen": 132412928, + "step": 42075 + }, + { + "epoch": 2.693809615261507, + "grad_norm": 42.18534851074219, + "learning_rate": 2.5981858237944084e-06, + "loss": 0.293, + "num_input_tokens_seen": 132428416, + "step": 42080 + }, + { + "epoch": 2.694129697202484, + "grad_norm": 14.02318286895752, + "learning_rate": 2.597627600018675e-06, + "loss": 0.2504, + "num_input_tokens_seen": 132444352, + "step": 42085 + }, + { + "epoch": 2.6944497791434605, + "grad_norm": 10.004277229309082, + "learning_rate": 2.597069371367928e-06, + "loss": 0.2006, + "num_input_tokens_seen": 132459712, + "step": 42090 + }, + { + "epoch": 2.6947698610844375, + "grad_norm": 5.416501045227051, + "learning_rate": 2.596511137870042e-06, + "loss": 0.18, + "num_input_tokens_seen": 132476672, + "step": 42095 + }, + { + "epoch": 2.6950899430254145, + "grad_norm": 33.2869758605957, + "learning_rate": 2.595952899552893e-06, + "loss": 0.2087, + "num_input_tokens_seen": 132492224, + "step": 42100 + }, + { + "epoch": 2.6954100249663915, + "grad_norm": 80.02841186523438, + "learning_rate": 2.5953946564443554e-06, + "loss": 0.2058, + "num_input_tokens_seen": 132507456, + "step": 42105 + }, + { + "epoch": 2.6957301069073685, + "grad_norm": 56.987579345703125, + "learning_rate": 2.5948364085723055e-06, + "loss": 0.2725, + "num_input_tokens_seen": 132523392, + "step": 42110 + }, + { + "epoch": 2.696050188848345, + "grad_norm": 15.17396354675293, + "learning_rate": 2.5942781559646197e-06, + "loss": 0.2302, + "num_input_tokens_seen": 132538816, + "step": 42115 + }, + { + "epoch": 2.696370270789322, + "grad_norm": 5.4446492195129395, + "learning_rate": 2.5937198986491735e-06, + "loss": 0.1905, + "num_input_tokens_seen": 132554432, + "step": 42120 + }, + { + "epoch": 2.696690352730299, + "grad_norm": 12.28258228302002, + "learning_rate": 2.5931616366538444e-06, + "loss": 0.1991, + "num_input_tokens_seen": 132570368, + "step": 42125 + }, + { + "epoch": 2.697010434671276, + "grad_norm": 3.949202537536621, + "learning_rate": 2.592603370006508e-06, + "loss": 0.2057, + "num_input_tokens_seen": 132586048, + "step": 42130 + }, + { + "epoch": 2.6973305166122525, + "grad_norm": 13.240488052368164, + "learning_rate": 2.592045098735041e-06, + "loss": 0.292, + "num_input_tokens_seen": 132601088, + "step": 42135 + }, + { + "epoch": 2.6976505985532295, + "grad_norm": 7.6532440185546875, + "learning_rate": 2.5914868228673214e-06, + "loss": 0.1806, + "num_input_tokens_seen": 132616448, + "step": 42140 + }, + { + "epoch": 2.6979706804942065, + "grad_norm": 9.327024459838867, + "learning_rate": 2.590928542431227e-06, + "loss": 0.2344, + "num_input_tokens_seen": 132631296, + "step": 42145 + }, + { + "epoch": 2.6982907624351835, + "grad_norm": 14.489604949951172, + "learning_rate": 2.5903702574546346e-06, + "loss": 0.1493, + "num_input_tokens_seen": 132648256, + "step": 42150 + }, + { + "epoch": 2.6986108443761605, + "grad_norm": 12.850727081298828, + "learning_rate": 2.589811967965423e-06, + "loss": 0.2154, + "num_input_tokens_seen": 132664448, + "step": 42155 + }, + { + "epoch": 2.698930926317137, + "grad_norm": 293.7732238769531, + "learning_rate": 2.5892536739914685e-06, + "loss": 0.2631, + "num_input_tokens_seen": 132679872, + "step": 42160 + }, + { + "epoch": 2.699251008258114, + "grad_norm": 8.179464340209961, + "learning_rate": 2.588695375560651e-06, + "loss": 0.2164, + "num_input_tokens_seen": 132694464, + "step": 42165 + }, + { + "epoch": 2.699571090199091, + "grad_norm": 31.65673065185547, + "learning_rate": 2.5881370727008474e-06, + "loss": 0.2184, + "num_input_tokens_seen": 132710784, + "step": 42170 + }, + { + "epoch": 2.699891172140068, + "grad_norm": 18.42332649230957, + "learning_rate": 2.5875787654399387e-06, + "loss": 0.1727, + "num_input_tokens_seen": 132726976, + "step": 42175 + }, + { + "epoch": 2.7002112540810446, + "grad_norm": 8.759193420410156, + "learning_rate": 2.5870204538058023e-06, + "loss": 0.3153, + "num_input_tokens_seen": 132742144, + "step": 42180 + }, + { + "epoch": 2.7005313360220216, + "grad_norm": 17.76137351989746, + "learning_rate": 2.586462137826318e-06, + "loss": 0.1987, + "num_input_tokens_seen": 132757056, + "step": 42185 + }, + { + "epoch": 2.7008514179629985, + "grad_norm": 24.108078002929688, + "learning_rate": 2.5859038175293645e-06, + "loss": 0.2785, + "num_input_tokens_seen": 132772160, + "step": 42190 + }, + { + "epoch": 2.7011714999039755, + "grad_norm": 61.21319580078125, + "learning_rate": 2.5853454929428213e-06, + "loss": 0.2969, + "num_input_tokens_seen": 132786880, + "step": 42195 + }, + { + "epoch": 2.7014915818449525, + "grad_norm": 6.961666107177734, + "learning_rate": 2.584787164094569e-06, + "loss": 0.1321, + "num_input_tokens_seen": 132805760, + "step": 42200 + }, + { + "epoch": 2.701811663785929, + "grad_norm": 12.244194984436035, + "learning_rate": 2.5842288310124876e-06, + "loss": 0.2687, + "num_input_tokens_seen": 132822272, + "step": 42205 + }, + { + "epoch": 2.702131745726906, + "grad_norm": 8.42068862915039, + "learning_rate": 2.583670493724457e-06, + "loss": 0.1725, + "num_input_tokens_seen": 132837888, + "step": 42210 + }, + { + "epoch": 2.702451827667883, + "grad_norm": 6.3686394691467285, + "learning_rate": 2.583112152258358e-06, + "loss": 0.187, + "num_input_tokens_seen": 132852544, + "step": 42215 + }, + { + "epoch": 2.7027719096088596, + "grad_norm": 13.241046905517578, + "learning_rate": 2.5825538066420713e-06, + "loss": 0.3097, + "num_input_tokens_seen": 132868800, + "step": 42220 + }, + { + "epoch": 2.7030919915498366, + "grad_norm": 34.049198150634766, + "learning_rate": 2.5819954569034767e-06, + "loss": 0.2885, + "num_input_tokens_seen": 132884160, + "step": 42225 + }, + { + "epoch": 2.7034120734908136, + "grad_norm": 9.199872970581055, + "learning_rate": 2.5814371030704566e-06, + "loss": 0.2297, + "num_input_tokens_seen": 132900096, + "step": 42230 + }, + { + "epoch": 2.7037321554317906, + "grad_norm": 24.43341827392578, + "learning_rate": 2.5808787451708915e-06, + "loss": 0.1881, + "num_input_tokens_seen": 132915392, + "step": 42235 + }, + { + "epoch": 2.7040522373727676, + "grad_norm": 6.033308506011963, + "learning_rate": 2.580320383232664e-06, + "loss": 0.1998, + "num_input_tokens_seen": 132932096, + "step": 42240 + }, + { + "epoch": 2.7043723193137446, + "grad_norm": 8.837231636047363, + "learning_rate": 2.579762017283654e-06, + "loss": 0.1977, + "num_input_tokens_seen": 132948224, + "step": 42245 + }, + { + "epoch": 2.704692401254721, + "grad_norm": 12.751534461975098, + "learning_rate": 2.5792036473517444e-06, + "loss": 0.2544, + "num_input_tokens_seen": 132964096, + "step": 42250 + }, + { + "epoch": 2.705012483195698, + "grad_norm": 9.272912979125977, + "learning_rate": 2.578645273464817e-06, + "loss": 0.2317, + "num_input_tokens_seen": 132979392, + "step": 42255 + }, + { + "epoch": 2.705332565136675, + "grad_norm": 8.225361824035645, + "learning_rate": 2.578086895650755e-06, + "loss": 0.3085, + "num_input_tokens_seen": 132995840, + "step": 42260 + }, + { + "epoch": 2.7056526470776516, + "grad_norm": 7.736242294311523, + "learning_rate": 2.5775285139374396e-06, + "loss": 0.148, + "num_input_tokens_seen": 133011840, + "step": 42265 + }, + { + "epoch": 2.7059727290186286, + "grad_norm": 36.952423095703125, + "learning_rate": 2.576970128352755e-06, + "loss": 0.2201, + "num_input_tokens_seen": 133027200, + "step": 42270 + }, + { + "epoch": 2.7062928109596056, + "grad_norm": 7.144989967346191, + "learning_rate": 2.5764117389245822e-06, + "loss": 0.1732, + "num_input_tokens_seen": 133041280, + "step": 42275 + }, + { + "epoch": 2.7066128929005826, + "grad_norm": 27.4525089263916, + "learning_rate": 2.5758533456808055e-06, + "loss": 0.2119, + "num_input_tokens_seen": 133056128, + "step": 42280 + }, + { + "epoch": 2.7069329748415596, + "grad_norm": 54.48021697998047, + "learning_rate": 2.575294948649307e-06, + "loss": 0.2754, + "num_input_tokens_seen": 133072128, + "step": 42285 + }, + { + "epoch": 2.7072530567825366, + "grad_norm": 7.551522254943848, + "learning_rate": 2.574736547857972e-06, + "loss": 0.1819, + "num_input_tokens_seen": 133086784, + "step": 42290 + }, + { + "epoch": 2.707573138723513, + "grad_norm": 3.2993555068969727, + "learning_rate": 2.574178143334683e-06, + "loss": 0.1896, + "num_input_tokens_seen": 133103488, + "step": 42295 + }, + { + "epoch": 2.70789322066449, + "grad_norm": 4.881119251251221, + "learning_rate": 2.573619735107324e-06, + "loss": 0.2065, + "num_input_tokens_seen": 133118528, + "step": 42300 + }, + { + "epoch": 2.708213302605467, + "grad_norm": 10.532005310058594, + "learning_rate": 2.5730613232037787e-06, + "loss": 0.1918, + "num_input_tokens_seen": 133135360, + "step": 42305 + }, + { + "epoch": 2.7085333845464437, + "grad_norm": 9.131166458129883, + "learning_rate": 2.5725029076519308e-06, + "loss": 0.3044, + "num_input_tokens_seen": 133151168, + "step": 42310 + }, + { + "epoch": 2.7088534664874206, + "grad_norm": 9.265780448913574, + "learning_rate": 2.5719444884796653e-06, + "loss": 0.3606, + "num_input_tokens_seen": 133167232, + "step": 42315 + }, + { + "epoch": 2.7091735484283976, + "grad_norm": 10.825899124145508, + "learning_rate": 2.5713860657148677e-06, + "loss": 0.177, + "num_input_tokens_seen": 133183616, + "step": 42320 + }, + { + "epoch": 2.7094936303693746, + "grad_norm": 22.948320388793945, + "learning_rate": 2.5708276393854216e-06, + "loss": 0.2269, + "num_input_tokens_seen": 133198528, + "step": 42325 + }, + { + "epoch": 2.7098137123103516, + "grad_norm": 13.87636947631836, + "learning_rate": 2.5702692095192123e-06, + "loss": 0.2223, + "num_input_tokens_seen": 133213696, + "step": 42330 + }, + { + "epoch": 2.7101337942513286, + "grad_norm": 32.35072708129883, + "learning_rate": 2.569710776144124e-06, + "loss": 0.2247, + "num_input_tokens_seen": 133229248, + "step": 42335 + }, + { + "epoch": 2.710453876192305, + "grad_norm": 5.429462909698486, + "learning_rate": 2.5691523392880428e-06, + "loss": 0.3229, + "num_input_tokens_seen": 133245568, + "step": 42340 + }, + { + "epoch": 2.710773958133282, + "grad_norm": 22.138843536376953, + "learning_rate": 2.5685938989788545e-06, + "loss": 0.2048, + "num_input_tokens_seen": 133260992, + "step": 42345 + }, + { + "epoch": 2.711094040074259, + "grad_norm": 6.96938943862915, + "learning_rate": 2.568035455244444e-06, + "loss": 0.2256, + "num_input_tokens_seen": 133276544, + "step": 42350 + }, + { + "epoch": 2.7114141220152357, + "grad_norm": 11.691252708435059, + "learning_rate": 2.5674770081126977e-06, + "loss": 0.3146, + "num_input_tokens_seen": 133293184, + "step": 42355 + }, + { + "epoch": 2.7117342039562127, + "grad_norm": 6.976397514343262, + "learning_rate": 2.5669185576115004e-06, + "loss": 0.1427, + "num_input_tokens_seen": 133308352, + "step": 42360 + }, + { + "epoch": 2.7120542858971897, + "grad_norm": 31.848039627075195, + "learning_rate": 2.5663601037687386e-06, + "loss": 0.2569, + "num_input_tokens_seen": 133323968, + "step": 42365 + }, + { + "epoch": 2.7123743678381667, + "grad_norm": 10.183934211730957, + "learning_rate": 2.565801646612299e-06, + "loss": 0.2647, + "num_input_tokens_seen": 133339584, + "step": 42370 + }, + { + "epoch": 2.7126944497791436, + "grad_norm": 8.679727554321289, + "learning_rate": 2.5652431861700683e-06, + "loss": 0.334, + "num_input_tokens_seen": 133355584, + "step": 42375 + }, + { + "epoch": 2.71301453172012, + "grad_norm": 53.08897399902344, + "learning_rate": 2.5646847224699327e-06, + "loss": 0.3551, + "num_input_tokens_seen": 133370944, + "step": 42380 + }, + { + "epoch": 2.713334613661097, + "grad_norm": 91.38888549804688, + "learning_rate": 2.5641262555397787e-06, + "loss": 0.2646, + "num_input_tokens_seen": 133387456, + "step": 42385 + }, + { + "epoch": 2.713654695602074, + "grad_norm": 20.0068416595459, + "learning_rate": 2.5635677854074942e-06, + "loss": 0.1974, + "num_input_tokens_seen": 133403904, + "step": 42390 + }, + { + "epoch": 2.713974777543051, + "grad_norm": 6.8328633308410645, + "learning_rate": 2.563009312100965e-06, + "loss": 0.1941, + "num_input_tokens_seen": 133420736, + "step": 42395 + }, + { + "epoch": 2.7142948594840277, + "grad_norm": 4.2723774909973145, + "learning_rate": 2.5624508356480794e-06, + "loss": 0.2239, + "num_input_tokens_seen": 133436608, + "step": 42400 + }, + { + "epoch": 2.7146149414250047, + "grad_norm": 25.07231903076172, + "learning_rate": 2.561892356076724e-06, + "loss": 0.2457, + "num_input_tokens_seen": 133451840, + "step": 42405 + }, + { + "epoch": 2.7149350233659817, + "grad_norm": 7.470236301422119, + "learning_rate": 2.561333873414787e-06, + "loss": 0.1412, + "num_input_tokens_seen": 133467328, + "step": 42410 + }, + { + "epoch": 2.7152551053069587, + "grad_norm": 19.231586456298828, + "learning_rate": 2.560775387690156e-06, + "loss": 0.2157, + "num_input_tokens_seen": 133483264, + "step": 42415 + }, + { + "epoch": 2.7155751872479357, + "grad_norm": 20.286136627197266, + "learning_rate": 2.5602168989307187e-06, + "loss": 0.1953, + "num_input_tokens_seen": 133499584, + "step": 42420 + }, + { + "epoch": 2.715895269188912, + "grad_norm": 32.23820495605469, + "learning_rate": 2.559658407164362e-06, + "loss": 0.3926, + "num_input_tokens_seen": 133516480, + "step": 42425 + }, + { + "epoch": 2.716215351129889, + "grad_norm": 9.970162391662598, + "learning_rate": 2.5590999124189754e-06, + "loss": 0.2246, + "num_input_tokens_seen": 133532480, + "step": 42430 + }, + { + "epoch": 2.716535433070866, + "grad_norm": 14.196342468261719, + "learning_rate": 2.558541414722448e-06, + "loss": 0.2274, + "num_input_tokens_seen": 133547136, + "step": 42435 + }, + { + "epoch": 2.716855515011843, + "grad_norm": 9.545526504516602, + "learning_rate": 2.5579829141026667e-06, + "loss": 0.2198, + "num_input_tokens_seen": 133562880, + "step": 42440 + }, + { + "epoch": 2.7171755969528197, + "grad_norm": 6.556385517120361, + "learning_rate": 2.5574244105875212e-06, + "loss": 0.233, + "num_input_tokens_seen": 133578048, + "step": 42445 + }, + { + "epoch": 2.7174956788937967, + "grad_norm": 7.0955352783203125, + "learning_rate": 2.5568659042048994e-06, + "loss": 0.2013, + "num_input_tokens_seen": 133593920, + "step": 42450 + }, + { + "epoch": 2.7178157608347737, + "grad_norm": 5.681187629699707, + "learning_rate": 2.5563073949826907e-06, + "loss": 0.223, + "num_input_tokens_seen": 133613376, + "step": 42455 + }, + { + "epoch": 2.7181358427757507, + "grad_norm": 7.683139801025391, + "learning_rate": 2.5557488829487836e-06, + "loss": 0.1625, + "num_input_tokens_seen": 133628480, + "step": 42460 + }, + { + "epoch": 2.7184559247167277, + "grad_norm": 6.18312931060791, + "learning_rate": 2.555190368131068e-06, + "loss": 0.1886, + "num_input_tokens_seen": 133643904, + "step": 42465 + }, + { + "epoch": 2.7187760066577042, + "grad_norm": 8.316143989562988, + "learning_rate": 2.554631850557433e-06, + "loss": 0.269, + "num_input_tokens_seen": 133659584, + "step": 42470 + }, + { + "epoch": 2.7190960885986812, + "grad_norm": 103.9076156616211, + "learning_rate": 2.554073330255768e-06, + "loss": 0.241, + "num_input_tokens_seen": 133674880, + "step": 42475 + }, + { + "epoch": 2.7194161705396582, + "grad_norm": 46.634273529052734, + "learning_rate": 2.553514807253962e-06, + "loss": 0.1952, + "num_input_tokens_seen": 133690368, + "step": 42480 + }, + { + "epoch": 2.7197362524806348, + "grad_norm": 13.938005447387695, + "learning_rate": 2.552956281579906e-06, + "loss": 0.1502, + "num_input_tokens_seen": 133705664, + "step": 42485 + }, + { + "epoch": 2.7200563344216118, + "grad_norm": 10.386279106140137, + "learning_rate": 2.552397753261488e-06, + "loss": 0.27, + "num_input_tokens_seen": 133720768, + "step": 42490 + }, + { + "epoch": 2.7203764163625888, + "grad_norm": 9.404519081115723, + "learning_rate": 2.5518392223266e-06, + "loss": 0.1744, + "num_input_tokens_seen": 133735232, + "step": 42495 + }, + { + "epoch": 2.7206964983035657, + "grad_norm": 31.829132080078125, + "learning_rate": 2.551280688803131e-06, + "loss": 0.3103, + "num_input_tokens_seen": 133750080, + "step": 42500 + }, + { + "epoch": 2.7210165802445427, + "grad_norm": 11.551687240600586, + "learning_rate": 2.550722152718972e-06, + "loss": 0.2276, + "num_input_tokens_seen": 133766464, + "step": 42505 + }, + { + "epoch": 2.7213366621855197, + "grad_norm": 19.917387008666992, + "learning_rate": 2.550163614102012e-06, + "loss": 0.2621, + "num_input_tokens_seen": 133782848, + "step": 42510 + }, + { + "epoch": 2.7216567441264963, + "grad_norm": 7.112518787384033, + "learning_rate": 2.549605072980143e-06, + "loss": 0.1367, + "num_input_tokens_seen": 133797696, + "step": 42515 + }, + { + "epoch": 2.7219768260674733, + "grad_norm": 5.398011684417725, + "learning_rate": 2.549046529381255e-06, + "loss": 0.1574, + "num_input_tokens_seen": 133813120, + "step": 42520 + }, + { + "epoch": 2.7222969080084503, + "grad_norm": 7.406321048736572, + "learning_rate": 2.5484879833332392e-06, + "loss": 0.1621, + "num_input_tokens_seen": 133828608, + "step": 42525 + }, + { + "epoch": 2.722616989949427, + "grad_norm": 18.87177848815918, + "learning_rate": 2.5479294348639855e-06, + "loss": 0.2898, + "num_input_tokens_seen": 133844224, + "step": 42530 + }, + { + "epoch": 2.722937071890404, + "grad_norm": 11.899435997009277, + "learning_rate": 2.547370884001386e-06, + "loss": 0.274, + "num_input_tokens_seen": 133860416, + "step": 42535 + }, + { + "epoch": 2.723257153831381, + "grad_norm": 26.428062438964844, + "learning_rate": 2.5468123307733304e-06, + "loss": 0.2179, + "num_input_tokens_seen": 133877568, + "step": 42540 + }, + { + "epoch": 2.7235772357723578, + "grad_norm": 13.832944869995117, + "learning_rate": 2.546253775207711e-06, + "loss": 0.2668, + "num_input_tokens_seen": 133893568, + "step": 42545 + }, + { + "epoch": 2.7238973177133348, + "grad_norm": 11.648285865783691, + "learning_rate": 2.5456952173324197e-06, + "loss": 0.185, + "num_input_tokens_seen": 133909120, + "step": 42550 + }, + { + "epoch": 2.7242173996543118, + "grad_norm": 11.42922306060791, + "learning_rate": 2.5451366571753467e-06, + "loss": 0.3923, + "num_input_tokens_seen": 133924864, + "step": 42555 + }, + { + "epoch": 2.7245374815952883, + "grad_norm": 31.557878494262695, + "learning_rate": 2.5445780947643842e-06, + "loss": 0.2699, + "num_input_tokens_seen": 133940672, + "step": 42560 + }, + { + "epoch": 2.7248575635362653, + "grad_norm": 5.1295928955078125, + "learning_rate": 2.5440195301274247e-06, + "loss": 0.292, + "num_input_tokens_seen": 133956672, + "step": 42565 + }, + { + "epoch": 2.7251776454772423, + "grad_norm": 22.677345275878906, + "learning_rate": 2.543460963292358e-06, + "loss": 0.2636, + "num_input_tokens_seen": 133972416, + "step": 42570 + }, + { + "epoch": 2.725497727418219, + "grad_norm": 11.116344451904297, + "learning_rate": 2.542902394287078e-06, + "loss": 0.2519, + "num_input_tokens_seen": 133990144, + "step": 42575 + }, + { + "epoch": 2.725817809359196, + "grad_norm": 26.762422561645508, + "learning_rate": 2.5423438231394754e-06, + "loss": 0.2507, + "num_input_tokens_seen": 134005632, + "step": 42580 + }, + { + "epoch": 2.726137891300173, + "grad_norm": 12.385327339172363, + "learning_rate": 2.5417852498774433e-06, + "loss": 0.1917, + "num_input_tokens_seen": 134020928, + "step": 42585 + }, + { + "epoch": 2.72645797324115, + "grad_norm": 6.4904351234436035, + "learning_rate": 2.541226674528873e-06, + "loss": 0.2271, + "num_input_tokens_seen": 134036928, + "step": 42590 + }, + { + "epoch": 2.726778055182127, + "grad_norm": 26.585296630859375, + "learning_rate": 2.5406680971216567e-06, + "loss": 0.2658, + "num_input_tokens_seen": 134053056, + "step": 42595 + }, + { + "epoch": 2.727098137123104, + "grad_norm": 20.89179039001465, + "learning_rate": 2.5401095176836887e-06, + "loss": 0.2035, + "num_input_tokens_seen": 134068800, + "step": 42600 + }, + { + "epoch": 2.7274182190640803, + "grad_norm": 6.622619152069092, + "learning_rate": 2.5395509362428593e-06, + "loss": 0.224, + "num_input_tokens_seen": 134084736, + "step": 42605 + }, + { + "epoch": 2.7277383010050573, + "grad_norm": 9.459654808044434, + "learning_rate": 2.538992352827063e-06, + "loss": 0.2952, + "num_input_tokens_seen": 134101056, + "step": 42610 + }, + { + "epoch": 2.7280583829460343, + "grad_norm": 11.676200866699219, + "learning_rate": 2.5384337674641913e-06, + "loss": 0.2335, + "num_input_tokens_seen": 134115648, + "step": 42615 + }, + { + "epoch": 2.728378464887011, + "grad_norm": 8.338285446166992, + "learning_rate": 2.5378751801821375e-06, + "loss": 0.1952, + "num_input_tokens_seen": 134130880, + "step": 42620 + }, + { + "epoch": 2.728698546827988, + "grad_norm": 9.712977409362793, + "learning_rate": 2.5373165910087943e-06, + "loss": 0.2794, + "num_input_tokens_seen": 134147072, + "step": 42625 + }, + { + "epoch": 2.729018628768965, + "grad_norm": 6.30453634262085, + "learning_rate": 2.5367579999720552e-06, + "loss": 0.1391, + "num_input_tokens_seen": 134163520, + "step": 42630 + }, + { + "epoch": 2.729338710709942, + "grad_norm": 11.192170143127441, + "learning_rate": 2.5361994070998135e-06, + "loss": 0.1248, + "num_input_tokens_seen": 134179968, + "step": 42635 + }, + { + "epoch": 2.729658792650919, + "grad_norm": 15.723478317260742, + "learning_rate": 2.5356408124199616e-06, + "loss": 0.2446, + "num_input_tokens_seen": 134195072, + "step": 42640 + }, + { + "epoch": 2.7299788745918954, + "grad_norm": 10.675471305847168, + "learning_rate": 2.5350822159603927e-06, + "loss": 0.2583, + "num_input_tokens_seen": 134210816, + "step": 42645 + }, + { + "epoch": 2.7302989565328724, + "grad_norm": 32.060447692871094, + "learning_rate": 2.5345236177490012e-06, + "loss": 0.1995, + "num_input_tokens_seen": 134226432, + "step": 42650 + }, + { + "epoch": 2.7306190384738493, + "grad_norm": 6.202594757080078, + "learning_rate": 2.53396501781368e-06, + "loss": 0.2268, + "num_input_tokens_seen": 134241536, + "step": 42655 + }, + { + "epoch": 2.7309391204148263, + "grad_norm": 13.477593421936035, + "learning_rate": 2.5334064161823223e-06, + "loss": 0.2936, + "num_input_tokens_seen": 134256896, + "step": 42660 + }, + { + "epoch": 2.731259202355803, + "grad_norm": 23.247421264648438, + "learning_rate": 2.532847812882822e-06, + "loss": 0.1958, + "num_input_tokens_seen": 134272704, + "step": 42665 + }, + { + "epoch": 2.73157928429678, + "grad_norm": 6.66174840927124, + "learning_rate": 2.532289207943074e-06, + "loss": 0.2381, + "num_input_tokens_seen": 134287616, + "step": 42670 + }, + { + "epoch": 2.731899366237757, + "grad_norm": 6.1174492835998535, + "learning_rate": 2.5317306013909703e-06, + "loss": 0.2003, + "num_input_tokens_seen": 134303552, + "step": 42675 + }, + { + "epoch": 2.732219448178734, + "grad_norm": 6.191137790679932, + "learning_rate": 2.5311719932544067e-06, + "loss": 0.2278, + "num_input_tokens_seen": 134319936, + "step": 42680 + }, + { + "epoch": 2.732539530119711, + "grad_norm": 9.142228126525879, + "learning_rate": 2.5306133835612744e-06, + "loss": 0.2232, + "num_input_tokens_seen": 134335616, + "step": 42685 + }, + { + "epoch": 2.7328596120606874, + "grad_norm": 6.361174583435059, + "learning_rate": 2.530054772339471e-06, + "loss": 0.2385, + "num_input_tokens_seen": 134352320, + "step": 42690 + }, + { + "epoch": 2.7331796940016644, + "grad_norm": 45.695396423339844, + "learning_rate": 2.5294961596168876e-06, + "loss": 0.1843, + "num_input_tokens_seen": 134367104, + "step": 42695 + }, + { + "epoch": 2.7334997759426414, + "grad_norm": 7.538200855255127, + "learning_rate": 2.5289375454214203e-06, + "loss": 0.1649, + "num_input_tokens_seen": 134382976, + "step": 42700 + }, + { + "epoch": 2.7338198578836184, + "grad_norm": 7.098409652709961, + "learning_rate": 2.5283789297809626e-06, + "loss": 0.2481, + "num_input_tokens_seen": 134398528, + "step": 42705 + }, + { + "epoch": 2.734139939824595, + "grad_norm": 35.9707145690918, + "learning_rate": 2.5278203127234087e-06, + "loss": 0.2089, + "num_input_tokens_seen": 134414080, + "step": 42710 + }, + { + "epoch": 2.734460021765572, + "grad_norm": 17.002235412597656, + "learning_rate": 2.5272616942766533e-06, + "loss": 0.2383, + "num_input_tokens_seen": 134429824, + "step": 42715 + }, + { + "epoch": 2.734780103706549, + "grad_norm": 7.479164123535156, + "learning_rate": 2.5267030744685907e-06, + "loss": 0.2569, + "num_input_tokens_seen": 134445824, + "step": 42720 + }, + { + "epoch": 2.735100185647526, + "grad_norm": 7.433086395263672, + "learning_rate": 2.5261444533271157e-06, + "loss": 0.2395, + "num_input_tokens_seen": 134461376, + "step": 42725 + }, + { + "epoch": 2.735420267588503, + "grad_norm": 5.031731128692627, + "learning_rate": 2.5255858308801236e-06, + "loss": 0.215, + "num_input_tokens_seen": 134476992, + "step": 42730 + }, + { + "epoch": 2.7357403495294794, + "grad_norm": 11.729493141174316, + "learning_rate": 2.525027207155508e-06, + "loss": 0.2526, + "num_input_tokens_seen": 134493632, + "step": 42735 + }, + { + "epoch": 2.7360604314704564, + "grad_norm": 11.952950477600098, + "learning_rate": 2.5244685821811633e-06, + "loss": 0.2965, + "num_input_tokens_seen": 134508992, + "step": 42740 + }, + { + "epoch": 2.7363805134114334, + "grad_norm": 14.32541561126709, + "learning_rate": 2.523909955984986e-06, + "loss": 0.1246, + "num_input_tokens_seen": 134524608, + "step": 42745 + }, + { + "epoch": 2.73670059535241, + "grad_norm": 57.916107177734375, + "learning_rate": 2.5233513285948694e-06, + "loss": 0.2433, + "num_input_tokens_seen": 134538880, + "step": 42750 + }, + { + "epoch": 2.737020677293387, + "grad_norm": 9.841031074523926, + "learning_rate": 2.52279270003871e-06, + "loss": 0.2578, + "num_input_tokens_seen": 134554752, + "step": 42755 + }, + { + "epoch": 2.737340759234364, + "grad_norm": 11.152612686157227, + "learning_rate": 2.5222340703444014e-06, + "loss": 0.3722, + "num_input_tokens_seen": 134569728, + "step": 42760 + }, + { + "epoch": 2.737660841175341, + "grad_norm": 5.723878383636475, + "learning_rate": 2.521675439539839e-06, + "loss": 0.2052, + "num_input_tokens_seen": 134585088, + "step": 42765 + }, + { + "epoch": 2.737980923116318, + "grad_norm": 42.22344970703125, + "learning_rate": 2.521116807652918e-06, + "loss": 0.3977, + "num_input_tokens_seen": 134599296, + "step": 42770 + }, + { + "epoch": 2.738301005057295, + "grad_norm": 11.946226119995117, + "learning_rate": 2.5205581747115337e-06, + "loss": 0.1524, + "num_input_tokens_seen": 134614528, + "step": 42775 + }, + { + "epoch": 2.7386210869982714, + "grad_norm": 91.23129272460938, + "learning_rate": 2.519999540743581e-06, + "loss": 0.2973, + "num_input_tokens_seen": 134629312, + "step": 42780 + }, + { + "epoch": 2.7389411689392484, + "grad_norm": 44.321998596191406, + "learning_rate": 2.519440905776956e-06, + "loss": 0.1537, + "num_input_tokens_seen": 134644736, + "step": 42785 + }, + { + "epoch": 2.7392612508802254, + "grad_norm": 8.278681755065918, + "learning_rate": 2.5188822698395533e-06, + "loss": 0.2517, + "num_input_tokens_seen": 134660224, + "step": 42790 + }, + { + "epoch": 2.739581332821202, + "grad_norm": 24.75688934326172, + "learning_rate": 2.518323632959268e-06, + "loss": 0.3474, + "num_input_tokens_seen": 134675584, + "step": 42795 + }, + { + "epoch": 2.739901414762179, + "grad_norm": 14.177594184875488, + "learning_rate": 2.5177649951639958e-06, + "loss": 0.2745, + "num_input_tokens_seen": 134692416, + "step": 42800 + }, + { + "epoch": 2.740221496703156, + "grad_norm": 10.637560844421387, + "learning_rate": 2.517206356481633e-06, + "loss": 0.2483, + "num_input_tokens_seen": 134707392, + "step": 42805 + }, + { + "epoch": 2.740541578644133, + "grad_norm": 6.029300689697266, + "learning_rate": 2.5166477169400736e-06, + "loss": 0.2671, + "num_input_tokens_seen": 134722368, + "step": 42810 + }, + { + "epoch": 2.74086166058511, + "grad_norm": 29.170148849487305, + "learning_rate": 2.516089076567215e-06, + "loss": 0.2706, + "num_input_tokens_seen": 134737472, + "step": 42815 + }, + { + "epoch": 2.741181742526087, + "grad_norm": 3.960242509841919, + "learning_rate": 2.515530435390951e-06, + "loss": 0.2192, + "num_input_tokens_seen": 134752448, + "step": 42820 + }, + { + "epoch": 2.7415018244670635, + "grad_norm": 85.27262878417969, + "learning_rate": 2.514971793439178e-06, + "loss": 0.2388, + "num_input_tokens_seen": 134771520, + "step": 42825 + }, + { + "epoch": 2.7418219064080405, + "grad_norm": 14.72422981262207, + "learning_rate": 2.514413150739791e-06, + "loss": 0.2213, + "num_input_tokens_seen": 134787264, + "step": 42830 + }, + { + "epoch": 2.7421419883490175, + "grad_norm": 9.21638011932373, + "learning_rate": 2.513854507320687e-06, + "loss": 0.2271, + "num_input_tokens_seen": 134803584, + "step": 42835 + }, + { + "epoch": 2.742462070289994, + "grad_norm": 7.768130302429199, + "learning_rate": 2.513295863209761e-06, + "loss": 0.2145, + "num_input_tokens_seen": 134819904, + "step": 42840 + }, + { + "epoch": 2.742782152230971, + "grad_norm": 43.22362518310547, + "learning_rate": 2.5127372184349084e-06, + "loss": 0.1872, + "num_input_tokens_seen": 134836736, + "step": 42845 + }, + { + "epoch": 2.743102234171948, + "grad_norm": 10.697723388671875, + "learning_rate": 2.512178573024026e-06, + "loss": 0.1589, + "num_input_tokens_seen": 134851584, + "step": 42850 + }, + { + "epoch": 2.743422316112925, + "grad_norm": 12.738363265991211, + "learning_rate": 2.511619927005009e-06, + "loss": 0.148, + "num_input_tokens_seen": 134867136, + "step": 42855 + }, + { + "epoch": 2.743742398053902, + "grad_norm": 20.915746688842773, + "learning_rate": 2.511061280405753e-06, + "loss": 0.1865, + "num_input_tokens_seen": 134884288, + "step": 42860 + }, + { + "epoch": 2.744062479994879, + "grad_norm": 36.47816467285156, + "learning_rate": 2.5105026332541553e-06, + "loss": 0.3706, + "num_input_tokens_seen": 134901120, + "step": 42865 + }, + { + "epoch": 2.7443825619358555, + "grad_norm": 6.8677191734313965, + "learning_rate": 2.5099439855781107e-06, + "loss": 0.2, + "num_input_tokens_seen": 134916736, + "step": 42870 + }, + { + "epoch": 2.7447026438768325, + "grad_norm": 30.190481185913086, + "learning_rate": 2.509385337405514e-06, + "loss": 0.3615, + "num_input_tokens_seen": 134933312, + "step": 42875 + }, + { + "epoch": 2.7450227258178095, + "grad_norm": 7.8789286613464355, + "learning_rate": 2.5088266887642637e-06, + "loss": 0.2678, + "num_input_tokens_seen": 134948224, + "step": 42880 + }, + { + "epoch": 2.745342807758786, + "grad_norm": 6.305541038513184, + "learning_rate": 2.5082680396822533e-06, + "loss": 0.2857, + "num_input_tokens_seen": 134964672, + "step": 42885 + }, + { + "epoch": 2.745662889699763, + "grad_norm": 11.599844932556152, + "learning_rate": 2.507709390187381e-06, + "loss": 0.2464, + "num_input_tokens_seen": 134979840, + "step": 42890 + }, + { + "epoch": 2.74598297164074, + "grad_norm": 10.405330657958984, + "learning_rate": 2.507150740307541e-06, + "loss": 0.2792, + "num_input_tokens_seen": 134995648, + "step": 42895 + }, + { + "epoch": 2.746303053581717, + "grad_norm": 14.855920791625977, + "learning_rate": 2.5065920900706313e-06, + "loss": 0.2634, + "num_input_tokens_seen": 135011904, + "step": 42900 + }, + { + "epoch": 2.746623135522694, + "grad_norm": 8.207921028137207, + "learning_rate": 2.5060334395045467e-06, + "loss": 0.2425, + "num_input_tokens_seen": 135029440, + "step": 42905 + }, + { + "epoch": 2.7469432174636705, + "grad_norm": 16.428668975830078, + "learning_rate": 2.505474788637184e-06, + "loss": 0.3343, + "num_input_tokens_seen": 135044992, + "step": 42910 + }, + { + "epoch": 2.7472632994046475, + "grad_norm": 7.161342144012451, + "learning_rate": 2.5049161374964375e-06, + "loss": 0.1481, + "num_input_tokens_seen": 135061824, + "step": 42915 + }, + { + "epoch": 2.7475833813456245, + "grad_norm": 19.94185447692871, + "learning_rate": 2.5043574861102056e-06, + "loss": 0.2286, + "num_input_tokens_seen": 135076736, + "step": 42920 + }, + { + "epoch": 2.7479034632866015, + "grad_norm": 33.96413803100586, + "learning_rate": 2.5037988345063837e-06, + "loss": 0.4465, + "num_input_tokens_seen": 135094528, + "step": 42925 + }, + { + "epoch": 2.748223545227578, + "grad_norm": 24.580101013183594, + "learning_rate": 2.503240182712868e-06, + "loss": 0.2589, + "num_input_tokens_seen": 135111104, + "step": 42930 + }, + { + "epoch": 2.748543627168555, + "grad_norm": 7.769843578338623, + "learning_rate": 2.5026815307575536e-06, + "loss": 0.1621, + "num_input_tokens_seen": 135126336, + "step": 42935 + }, + { + "epoch": 2.748863709109532, + "grad_norm": 39.58757781982422, + "learning_rate": 2.5021228786683377e-06, + "loss": 0.2015, + "num_input_tokens_seen": 135140928, + "step": 42940 + }, + { + "epoch": 2.749183791050509, + "grad_norm": 34.82770538330078, + "learning_rate": 2.5015642264731165e-06, + "loss": 0.1642, + "num_input_tokens_seen": 135156736, + "step": 42945 + }, + { + "epoch": 2.749503872991486, + "grad_norm": 49.2876091003418, + "learning_rate": 2.5010055741997862e-06, + "loss": 0.2276, + "num_input_tokens_seen": 135171264, + "step": 42950 + }, + { + "epoch": 2.7498239549324626, + "grad_norm": 10.063271522521973, + "learning_rate": 2.500446921876242e-06, + "loss": 0.2371, + "num_input_tokens_seen": 135188096, + "step": 42955 + }, + { + "epoch": 2.7501440368734396, + "grad_norm": 45.68312454223633, + "learning_rate": 2.4998882695303825e-06, + "loss": 0.252, + "num_input_tokens_seen": 135203840, + "step": 42960 + }, + { + "epoch": 2.7504641188144165, + "grad_norm": 6.3154988288879395, + "learning_rate": 2.4993296171901006e-06, + "loss": 0.2395, + "num_input_tokens_seen": 135219840, + "step": 42965 + }, + { + "epoch": 2.750528135202612, + "eval_loss": 0.5056279301643372, + "eval_runtime": 50.9811, + "eval_samples_per_second": 272.376, + "eval_steps_per_second": 34.052, + "num_input_tokens_seen": 135222656, + "step": 42966 + }, + { + "epoch": 2.7507842007553935, + "grad_norm": 55.08515548706055, + "learning_rate": 2.4987709648832947e-06, + "loss": 0.208, + "num_input_tokens_seen": 135235328, + "step": 42970 + }, + { + "epoch": 2.75110428269637, + "grad_norm": 4.283700942993164, + "learning_rate": 2.49821231263786e-06, + "loss": 0.3785, + "num_input_tokens_seen": 135249984, + "step": 42975 + }, + { + "epoch": 2.751424364637347, + "grad_norm": 29.067626953125, + "learning_rate": 2.4976536604816944e-06, + "loss": 0.3224, + "num_input_tokens_seen": 135265024, + "step": 42980 + }, + { + "epoch": 2.751744446578324, + "grad_norm": 6.4455485343933105, + "learning_rate": 2.497095008442691e-06, + "loss": 0.1517, + "num_input_tokens_seen": 135282368, + "step": 42985 + }, + { + "epoch": 2.752064528519301, + "grad_norm": 12.422325134277344, + "learning_rate": 2.4965363565487485e-06, + "loss": 0.1871, + "num_input_tokens_seen": 135298560, + "step": 42990 + }, + { + "epoch": 2.752384610460278, + "grad_norm": 8.636114120483398, + "learning_rate": 2.4959777048277624e-06, + "loss": 0.2182, + "num_input_tokens_seen": 135314112, + "step": 42995 + }, + { + "epoch": 2.7527046924012546, + "grad_norm": 19.09644317626953, + "learning_rate": 2.495419053307629e-06, + "loss": 0.2873, + "num_input_tokens_seen": 135328832, + "step": 43000 + }, + { + "epoch": 2.7530247743422316, + "grad_norm": 59.00430679321289, + "learning_rate": 2.494860402016244e-06, + "loss": 0.2026, + "num_input_tokens_seen": 135343360, + "step": 43005 + }, + { + "epoch": 2.7533448562832086, + "grad_norm": 7.46682071685791, + "learning_rate": 2.4943017509815034e-06, + "loss": 0.2787, + "num_input_tokens_seen": 135358720, + "step": 43010 + }, + { + "epoch": 2.753664938224185, + "grad_norm": 22.837352752685547, + "learning_rate": 2.493743100231305e-06, + "loss": 0.261, + "num_input_tokens_seen": 135373248, + "step": 43015 + }, + { + "epoch": 2.753985020165162, + "grad_norm": 15.136038780212402, + "learning_rate": 2.493184449793542e-06, + "loss": 0.2126, + "num_input_tokens_seen": 135388800, + "step": 43020 + }, + { + "epoch": 2.754305102106139, + "grad_norm": 18.15383529663086, + "learning_rate": 2.492625799696113e-06, + "loss": 0.3461, + "num_input_tokens_seen": 135404736, + "step": 43025 + }, + { + "epoch": 2.754625184047116, + "grad_norm": 9.810629844665527, + "learning_rate": 2.4920671499669122e-06, + "loss": 0.155, + "num_input_tokens_seen": 135419840, + "step": 43030 + }, + { + "epoch": 2.754945265988093, + "grad_norm": 8.85180950164795, + "learning_rate": 2.491508500633838e-06, + "loss": 0.2702, + "num_input_tokens_seen": 135435840, + "step": 43035 + }, + { + "epoch": 2.75526534792907, + "grad_norm": 12.034476280212402, + "learning_rate": 2.490949851724784e-06, + "loss": 0.2132, + "num_input_tokens_seen": 135451840, + "step": 43040 + }, + { + "epoch": 2.7555854298700466, + "grad_norm": 4.944514274597168, + "learning_rate": 2.490391203267647e-06, + "loss": 0.2534, + "num_input_tokens_seen": 135467392, + "step": 43045 + }, + { + "epoch": 2.7559055118110236, + "grad_norm": 6.3771843910217285, + "learning_rate": 2.4898325552903247e-06, + "loss": 0.2481, + "num_input_tokens_seen": 135482048, + "step": 43050 + }, + { + "epoch": 2.7562255937520006, + "grad_norm": 8.340085983276367, + "learning_rate": 2.48927390782071e-06, + "loss": 0.4394, + "num_input_tokens_seen": 135497792, + "step": 43055 + }, + { + "epoch": 2.756545675692977, + "grad_norm": 8.47768783569336, + "learning_rate": 2.4887152608867015e-06, + "loss": 0.2531, + "num_input_tokens_seen": 135514176, + "step": 43060 + }, + { + "epoch": 2.756865757633954, + "grad_norm": 11.779770851135254, + "learning_rate": 2.4881566145161934e-06, + "loss": 0.2848, + "num_input_tokens_seen": 135529792, + "step": 43065 + }, + { + "epoch": 2.757185839574931, + "grad_norm": 14.53695011138916, + "learning_rate": 2.4875979687370834e-06, + "loss": 0.3337, + "num_input_tokens_seen": 135545664, + "step": 43070 + }, + { + "epoch": 2.757505921515908, + "grad_norm": 8.647736549377441, + "learning_rate": 2.4870393235772653e-06, + "loss": 0.2029, + "num_input_tokens_seen": 135561472, + "step": 43075 + }, + { + "epoch": 2.757826003456885, + "grad_norm": 7.6244587898254395, + "learning_rate": 2.4864806790646364e-06, + "loss": 0.2714, + "num_input_tokens_seen": 135577024, + "step": 43080 + }, + { + "epoch": 2.758146085397862, + "grad_norm": 5.994846820831299, + "learning_rate": 2.485922035227091e-06, + "loss": 0.2499, + "num_input_tokens_seen": 135592320, + "step": 43085 + }, + { + "epoch": 2.7584661673388386, + "grad_norm": 16.36492156982422, + "learning_rate": 2.485363392092527e-06, + "loss": 0.171, + "num_input_tokens_seen": 135608000, + "step": 43090 + }, + { + "epoch": 2.7587862492798156, + "grad_norm": 5.821822643280029, + "learning_rate": 2.484804749688838e-06, + "loss": 0.2985, + "num_input_tokens_seen": 135623232, + "step": 43095 + }, + { + "epoch": 2.7591063312207926, + "grad_norm": 3.7201120853424072, + "learning_rate": 2.484246108043921e-06, + "loss": 0.1919, + "num_input_tokens_seen": 135639744, + "step": 43100 + }, + { + "epoch": 2.759426413161769, + "grad_norm": 16.102458953857422, + "learning_rate": 2.483687467185672e-06, + "loss": 0.2325, + "num_input_tokens_seen": 135654976, + "step": 43105 + }, + { + "epoch": 2.759746495102746, + "grad_norm": 5.573049068450928, + "learning_rate": 2.483128827141985e-06, + "loss": 0.2432, + "num_input_tokens_seen": 135672192, + "step": 43110 + }, + { + "epoch": 2.760066577043723, + "grad_norm": 9.1056489944458, + "learning_rate": 2.482570187940757e-06, + "loss": 0.2071, + "num_input_tokens_seen": 135687808, + "step": 43115 + }, + { + "epoch": 2.7603866589847, + "grad_norm": 12.517367362976074, + "learning_rate": 2.4820115496098823e-06, + "loss": 0.2047, + "num_input_tokens_seen": 135702336, + "step": 43120 + }, + { + "epoch": 2.760706740925677, + "grad_norm": 6.493283748626709, + "learning_rate": 2.4814529121772583e-06, + "loss": 0.1747, + "num_input_tokens_seen": 135718016, + "step": 43125 + }, + { + "epoch": 2.761026822866654, + "grad_norm": 9.050101280212402, + "learning_rate": 2.4808942756707784e-06, + "loss": 0.311, + "num_input_tokens_seen": 135732672, + "step": 43130 + }, + { + "epoch": 2.7613469048076307, + "grad_norm": 8.430560111999512, + "learning_rate": 2.4803356401183397e-06, + "loss": 0.1994, + "num_input_tokens_seen": 135747648, + "step": 43135 + }, + { + "epoch": 2.7616669867486077, + "grad_norm": 20.633018493652344, + "learning_rate": 2.479777005547836e-06, + "loss": 0.27, + "num_input_tokens_seen": 135762944, + "step": 43140 + }, + { + "epoch": 2.7619870686895847, + "grad_norm": 56.98191452026367, + "learning_rate": 2.4792183719871633e-06, + "loss": 0.2364, + "num_input_tokens_seen": 135779008, + "step": 43145 + }, + { + "epoch": 2.762307150630561, + "grad_norm": 5.92833948135376, + "learning_rate": 2.4786597394642162e-06, + "loss": 0.1714, + "num_input_tokens_seen": 135794560, + "step": 43150 + }, + { + "epoch": 2.762627232571538, + "grad_norm": 6.511507034301758, + "learning_rate": 2.478101108006892e-06, + "loss": 0.1911, + "num_input_tokens_seen": 135809280, + "step": 43155 + }, + { + "epoch": 2.762947314512515, + "grad_norm": 10.575133323669434, + "learning_rate": 2.4775424776430833e-06, + "loss": 0.2429, + "num_input_tokens_seen": 135824192, + "step": 43160 + }, + { + "epoch": 2.763267396453492, + "grad_norm": 4.162867069244385, + "learning_rate": 2.4769838484006864e-06, + "loss": 0.2434, + "num_input_tokens_seen": 135840576, + "step": 43165 + }, + { + "epoch": 2.763587478394469, + "grad_norm": 7.383270263671875, + "learning_rate": 2.4764252203075968e-06, + "loss": 0.2945, + "num_input_tokens_seen": 135856640, + "step": 43170 + }, + { + "epoch": 2.7639075603354457, + "grad_norm": 8.334977149963379, + "learning_rate": 2.475866593391708e-06, + "loss": 0.1681, + "num_input_tokens_seen": 135872576, + "step": 43175 + }, + { + "epoch": 2.7642276422764227, + "grad_norm": 41.25577926635742, + "learning_rate": 2.475307967680917e-06, + "loss": 0.3037, + "num_input_tokens_seen": 135887680, + "step": 43180 + }, + { + "epoch": 2.7645477242173997, + "grad_norm": 8.695828437805176, + "learning_rate": 2.474749343203117e-06, + "loss": 0.3184, + "num_input_tokens_seen": 135903488, + "step": 43185 + }, + { + "epoch": 2.7648678061583767, + "grad_norm": 7.3974690437316895, + "learning_rate": 2.4741907199862037e-06, + "loss": 0.1819, + "num_input_tokens_seen": 135920192, + "step": 43190 + }, + { + "epoch": 2.7651878880993532, + "grad_norm": 16.207805633544922, + "learning_rate": 2.4736320980580706e-06, + "loss": 0.2257, + "num_input_tokens_seen": 135935552, + "step": 43195 + }, + { + "epoch": 2.76550797004033, + "grad_norm": 12.920351028442383, + "learning_rate": 2.4730734774466137e-06, + "loss": 0.1637, + "num_input_tokens_seen": 135951936, + "step": 43200 + }, + { + "epoch": 2.765828051981307, + "grad_norm": 6.2161383628845215, + "learning_rate": 2.4725148581797264e-06, + "loss": 0.1806, + "num_input_tokens_seen": 135966144, + "step": 43205 + }, + { + "epoch": 2.766148133922284, + "grad_norm": 15.190690994262695, + "learning_rate": 2.4719562402853047e-06, + "loss": 0.2165, + "num_input_tokens_seen": 135981568, + "step": 43210 + }, + { + "epoch": 2.766468215863261, + "grad_norm": 18.901018142700195, + "learning_rate": 2.471397623791242e-06, + "loss": 0.264, + "num_input_tokens_seen": 135997248, + "step": 43215 + }, + { + "epoch": 2.7667882978042377, + "grad_norm": 19.94736099243164, + "learning_rate": 2.4708390087254322e-06, + "loss": 0.1761, + "num_input_tokens_seen": 136012416, + "step": 43220 + }, + { + "epoch": 2.7671083797452147, + "grad_norm": 33.21749496459961, + "learning_rate": 2.470280395115771e-06, + "loss": 0.2544, + "num_input_tokens_seen": 136027264, + "step": 43225 + }, + { + "epoch": 2.7674284616861917, + "grad_norm": 11.913784980773926, + "learning_rate": 2.4697217829901526e-06, + "loss": 0.2152, + "num_input_tokens_seen": 136041600, + "step": 43230 + }, + { + "epoch": 2.7677485436271687, + "grad_norm": 6.907220363616943, + "learning_rate": 2.4691631723764704e-06, + "loss": 0.1551, + "num_input_tokens_seen": 136057600, + "step": 43235 + }, + { + "epoch": 2.7680686255681453, + "grad_norm": 7.276086807250977, + "learning_rate": 2.4686045633026183e-06, + "loss": 0.2603, + "num_input_tokens_seen": 136073856, + "step": 43240 + }, + { + "epoch": 2.7683887075091222, + "grad_norm": 9.032536506652832, + "learning_rate": 2.4680459557964916e-06, + "loss": 0.1746, + "num_input_tokens_seen": 136089600, + "step": 43245 + }, + { + "epoch": 2.7687087894500992, + "grad_norm": 30.192995071411133, + "learning_rate": 2.4674873498859827e-06, + "loss": 0.2231, + "num_input_tokens_seen": 136108224, + "step": 43250 + }, + { + "epoch": 2.7690288713910762, + "grad_norm": 8.76580810546875, + "learning_rate": 2.4669287455989866e-06, + "loss": 0.2962, + "num_input_tokens_seen": 136123648, + "step": 43255 + }, + { + "epoch": 2.769348953332053, + "grad_norm": 10.654685020446777, + "learning_rate": 2.466370142963396e-06, + "loss": 0.1811, + "num_input_tokens_seen": 136140224, + "step": 43260 + }, + { + "epoch": 2.7696690352730298, + "grad_norm": 3.7841415405273438, + "learning_rate": 2.4658115420071065e-06, + "loss": 0.1884, + "num_input_tokens_seen": 136157056, + "step": 43265 + }, + { + "epoch": 2.7699891172140068, + "grad_norm": 28.976619720458984, + "learning_rate": 2.4652529427580095e-06, + "loss": 0.2015, + "num_input_tokens_seen": 136173056, + "step": 43270 + }, + { + "epoch": 2.7703091991549837, + "grad_norm": 19.748641967773438, + "learning_rate": 2.4646943452439993e-06, + "loss": 0.2896, + "num_input_tokens_seen": 136187648, + "step": 43275 + }, + { + "epoch": 2.7706292810959603, + "grad_norm": 7.110306739807129, + "learning_rate": 2.464135749492971e-06, + "loss": 0.2189, + "num_input_tokens_seen": 136202880, + "step": 43280 + }, + { + "epoch": 2.7709493630369373, + "grad_norm": 38.67921447753906, + "learning_rate": 2.463577155532815e-06, + "loss": 0.1735, + "num_input_tokens_seen": 136218048, + "step": 43285 + }, + { + "epoch": 2.7712694449779143, + "grad_norm": 10.0114107131958, + "learning_rate": 2.463018563391427e-06, + "loss": 0.2068, + "num_input_tokens_seen": 136232960, + "step": 43290 + }, + { + "epoch": 2.7715895269188913, + "grad_norm": 5.919747352600098, + "learning_rate": 2.462459973096699e-06, + "loss": 0.151, + "num_input_tokens_seen": 136248896, + "step": 43295 + }, + { + "epoch": 2.7719096088598683, + "grad_norm": 27.53730583190918, + "learning_rate": 2.461901384676525e-06, + "loss": 0.2065, + "num_input_tokens_seen": 136264128, + "step": 43300 + }, + { + "epoch": 2.7722296908008452, + "grad_norm": 5.460186004638672, + "learning_rate": 2.4613427981587967e-06, + "loss": 0.228, + "num_input_tokens_seen": 136279232, + "step": 43305 + }, + { + "epoch": 2.772549772741822, + "grad_norm": 13.62498950958252, + "learning_rate": 2.460784213571408e-06, + "loss": 0.1915, + "num_input_tokens_seen": 136295872, + "step": 43310 + }, + { + "epoch": 2.772869854682799, + "grad_norm": 52.488746643066406, + "learning_rate": 2.460225630942251e-06, + "loss": 0.2784, + "num_input_tokens_seen": 136311360, + "step": 43315 + }, + { + "epoch": 2.7731899366237758, + "grad_norm": 24.908252716064453, + "learning_rate": 2.45966705029922e-06, + "loss": 0.2536, + "num_input_tokens_seen": 136326848, + "step": 43320 + }, + { + "epoch": 2.7735100185647523, + "grad_norm": 15.315704345703125, + "learning_rate": 2.459108471670205e-06, + "loss": 0.2467, + "num_input_tokens_seen": 136342400, + "step": 43325 + }, + { + "epoch": 2.7738301005057293, + "grad_norm": 106.16853332519531, + "learning_rate": 2.4585498950831012e-06, + "loss": 0.2912, + "num_input_tokens_seen": 136358016, + "step": 43330 + }, + { + "epoch": 2.7741501824467063, + "grad_norm": 10.843302726745605, + "learning_rate": 2.457991320565799e-06, + "loss": 0.2382, + "num_input_tokens_seen": 136373824, + "step": 43335 + }, + { + "epoch": 2.7744702643876833, + "grad_norm": 5.370889186859131, + "learning_rate": 2.457432748146191e-06, + "loss": 0.2367, + "num_input_tokens_seen": 136390976, + "step": 43340 + }, + { + "epoch": 2.7747903463286603, + "grad_norm": 22.696163177490234, + "learning_rate": 2.45687417785217e-06, + "loss": 0.3068, + "num_input_tokens_seen": 136406336, + "step": 43345 + }, + { + "epoch": 2.7751104282696373, + "grad_norm": 7.519336223602295, + "learning_rate": 2.4563156097116282e-06, + "loss": 0.3203, + "num_input_tokens_seen": 136421888, + "step": 43350 + }, + { + "epoch": 2.775430510210614, + "grad_norm": 20.458580017089844, + "learning_rate": 2.455757043752458e-06, + "loss": 0.1842, + "num_input_tokens_seen": 136437120, + "step": 43355 + }, + { + "epoch": 2.775750592151591, + "grad_norm": 12.64765739440918, + "learning_rate": 2.4551984800025494e-06, + "loss": 0.2768, + "num_input_tokens_seen": 136452608, + "step": 43360 + }, + { + "epoch": 2.776070674092568, + "grad_norm": 6.153046607971191, + "learning_rate": 2.4546399184897963e-06, + "loss": 0.1828, + "num_input_tokens_seen": 136467584, + "step": 43365 + }, + { + "epoch": 2.7763907560335443, + "grad_norm": 10.003385543823242, + "learning_rate": 2.4540813592420885e-06, + "loss": 0.2901, + "num_input_tokens_seen": 136483584, + "step": 43370 + }, + { + "epoch": 2.7767108379745213, + "grad_norm": 12.158830642700195, + "learning_rate": 2.4535228022873193e-06, + "loss": 0.2584, + "num_input_tokens_seen": 136498816, + "step": 43375 + }, + { + "epoch": 2.7770309199154983, + "grad_norm": 6.8261494636535645, + "learning_rate": 2.4529642476533784e-06, + "loss": 0.3089, + "num_input_tokens_seen": 136514368, + "step": 43380 + }, + { + "epoch": 2.7773510018564753, + "grad_norm": 15.989245414733887, + "learning_rate": 2.4524056953681595e-06, + "loss": 0.2567, + "num_input_tokens_seen": 136530240, + "step": 43385 + }, + { + "epoch": 2.7776710837974523, + "grad_norm": 6.579270362854004, + "learning_rate": 2.4518471454595503e-06, + "loss": 0.2025, + "num_input_tokens_seen": 136545216, + "step": 43390 + }, + { + "epoch": 2.7779911657384293, + "grad_norm": 29.4997501373291, + "learning_rate": 2.4512885979554445e-06, + "loss": 0.2924, + "num_input_tokens_seen": 136560576, + "step": 43395 + }, + { + "epoch": 2.778311247679406, + "grad_norm": 54.40536117553711, + "learning_rate": 2.450730052883733e-06, + "loss": 0.3014, + "num_input_tokens_seen": 136574912, + "step": 43400 + }, + { + "epoch": 2.778631329620383, + "grad_norm": 55.624656677246094, + "learning_rate": 2.450171510272305e-06, + "loss": 0.1743, + "num_input_tokens_seen": 136590336, + "step": 43405 + }, + { + "epoch": 2.77895141156136, + "grad_norm": 10.512645721435547, + "learning_rate": 2.4496129701490537e-06, + "loss": 0.2105, + "num_input_tokens_seen": 136607616, + "step": 43410 + }, + { + "epoch": 2.7792714935023364, + "grad_norm": 12.723374366760254, + "learning_rate": 2.4490544325418667e-06, + "loss": 0.1322, + "num_input_tokens_seen": 136624768, + "step": 43415 + }, + { + "epoch": 2.7795915754433134, + "grad_norm": 82.21390533447266, + "learning_rate": 2.4484958974786378e-06, + "loss": 0.1722, + "num_input_tokens_seen": 136641280, + "step": 43420 + }, + { + "epoch": 2.7799116573842904, + "grad_norm": 5.201021671295166, + "learning_rate": 2.447937364987254e-06, + "loss": 0.2395, + "num_input_tokens_seen": 136656000, + "step": 43425 + }, + { + "epoch": 2.7802317393252673, + "grad_norm": 10.09290885925293, + "learning_rate": 2.447378835095607e-06, + "loss": 0.2012, + "num_input_tokens_seen": 136671680, + "step": 43430 + }, + { + "epoch": 2.7805518212662443, + "grad_norm": 32.50284194946289, + "learning_rate": 2.4468203078315874e-06, + "loss": 0.2567, + "num_input_tokens_seen": 136686400, + "step": 43435 + }, + { + "epoch": 2.780871903207221, + "grad_norm": 15.273271560668945, + "learning_rate": 2.446261783223085e-06, + "loss": 0.2767, + "num_input_tokens_seen": 136702272, + "step": 43440 + }, + { + "epoch": 2.781191985148198, + "grad_norm": 21.253524780273438, + "learning_rate": 2.4457032612979883e-06, + "loss": 0.3365, + "num_input_tokens_seen": 136718144, + "step": 43445 + }, + { + "epoch": 2.781512067089175, + "grad_norm": 6.100516319274902, + "learning_rate": 2.4451447420841883e-06, + "loss": 0.201, + "num_input_tokens_seen": 136734016, + "step": 43450 + }, + { + "epoch": 2.781832149030152, + "grad_norm": 34.0341911315918, + "learning_rate": 2.444586225609574e-06, + "loss": 0.2646, + "num_input_tokens_seen": 136750656, + "step": 43455 + }, + { + "epoch": 2.7821522309711284, + "grad_norm": 6.2613205909729, + "learning_rate": 2.4440277119020347e-06, + "loss": 0.1631, + "num_input_tokens_seen": 136765504, + "step": 43460 + }, + { + "epoch": 2.7824723129121054, + "grad_norm": 12.69620132446289, + "learning_rate": 2.443469200989461e-06, + "loss": 0.2374, + "num_input_tokens_seen": 136780416, + "step": 43465 + }, + { + "epoch": 2.7827923948530824, + "grad_norm": 12.802886962890625, + "learning_rate": 2.44291069289974e-06, + "loss": 0.3309, + "num_input_tokens_seen": 136796544, + "step": 43470 + }, + { + "epoch": 2.7831124767940594, + "grad_norm": 18.84488296508789, + "learning_rate": 2.4423521876607628e-06, + "loss": 0.2476, + "num_input_tokens_seen": 136812672, + "step": 43475 + }, + { + "epoch": 2.7834325587350364, + "grad_norm": 8.57394790649414, + "learning_rate": 2.441793685300416e-06, + "loss": 0.2369, + "num_input_tokens_seen": 136828224, + "step": 43480 + }, + { + "epoch": 2.783752640676013, + "grad_norm": 15.974312782287598, + "learning_rate": 2.441235185846589e-06, + "loss": 0.2058, + "num_input_tokens_seen": 136842624, + "step": 43485 + }, + { + "epoch": 2.78407272261699, + "grad_norm": 7.594621658325195, + "learning_rate": 2.4406766893271714e-06, + "loss": 0.1918, + "num_input_tokens_seen": 136858368, + "step": 43490 + }, + { + "epoch": 2.784392804557967, + "grad_norm": 9.662344932556152, + "learning_rate": 2.4401181957700514e-06, + "loss": 0.2121, + "num_input_tokens_seen": 136872704, + "step": 43495 + }, + { + "epoch": 2.784712886498944, + "grad_norm": 8.300578117370605, + "learning_rate": 2.4395597052031157e-06, + "loss": 0.2706, + "num_input_tokens_seen": 136889088, + "step": 43500 + }, + { + "epoch": 2.7850329684399204, + "grad_norm": 9.663064956665039, + "learning_rate": 2.439001217654255e-06, + "loss": 0.2903, + "num_input_tokens_seen": 136904960, + "step": 43505 + }, + { + "epoch": 2.7853530503808974, + "grad_norm": 16.140949249267578, + "learning_rate": 2.438442733151354e-06, + "loss": 0.1893, + "num_input_tokens_seen": 136919360, + "step": 43510 + }, + { + "epoch": 2.7856731323218744, + "grad_norm": 5.701998710632324, + "learning_rate": 2.437884251722303e-06, + "loss": 0.1027, + "num_input_tokens_seen": 136934208, + "step": 43515 + }, + { + "epoch": 2.7859932142628514, + "grad_norm": 11.318862915039062, + "learning_rate": 2.437325773394989e-06, + "loss": 0.1721, + "num_input_tokens_seen": 136950208, + "step": 43520 + }, + { + "epoch": 2.7863132962038284, + "grad_norm": 9.968844413757324, + "learning_rate": 2.436767298197299e-06, + "loss": 0.2443, + "num_input_tokens_seen": 136966208, + "step": 43525 + }, + { + "epoch": 2.786633378144805, + "grad_norm": 31.52315330505371, + "learning_rate": 2.4362088261571218e-06, + "loss": 0.2573, + "num_input_tokens_seen": 136982528, + "step": 43530 + }, + { + "epoch": 2.786953460085782, + "grad_norm": 22.690141677856445, + "learning_rate": 2.4356503573023425e-06, + "loss": 0.2591, + "num_input_tokens_seen": 136996928, + "step": 43535 + }, + { + "epoch": 2.787273542026759, + "grad_norm": 14.809111595153809, + "learning_rate": 2.4350918916608497e-06, + "loss": 0.2069, + "num_input_tokens_seen": 137011840, + "step": 43540 + }, + { + "epoch": 2.7875936239677355, + "grad_norm": 11.300162315368652, + "learning_rate": 2.4345334292605293e-06, + "loss": 0.2047, + "num_input_tokens_seen": 137027072, + "step": 43545 + }, + { + "epoch": 2.7879137059087125, + "grad_norm": 7.202683448791504, + "learning_rate": 2.433974970129269e-06, + "loss": 0.2738, + "num_input_tokens_seen": 137041856, + "step": 43550 + }, + { + "epoch": 2.7882337878496894, + "grad_norm": 7.5862836837768555, + "learning_rate": 2.4334165142949545e-06, + "loss": 0.197, + "num_input_tokens_seen": 137058176, + "step": 43555 + }, + { + "epoch": 2.7885538697906664, + "grad_norm": 18.28313636779785, + "learning_rate": 2.432858061785473e-06, + "loss": 0.2104, + "num_input_tokens_seen": 137073920, + "step": 43560 + }, + { + "epoch": 2.7888739517316434, + "grad_norm": 13.664697647094727, + "learning_rate": 2.432299612628709e-06, + "loss": 0.3731, + "num_input_tokens_seen": 137091008, + "step": 43565 + }, + { + "epoch": 2.7891940336726204, + "grad_norm": 10.328131675720215, + "learning_rate": 2.4317411668525505e-06, + "loss": 0.2411, + "num_input_tokens_seen": 137106112, + "step": 43570 + }, + { + "epoch": 2.789514115613597, + "grad_norm": 12.77632999420166, + "learning_rate": 2.4311827244848824e-06, + "loss": 0.3352, + "num_input_tokens_seen": 137120960, + "step": 43575 + }, + { + "epoch": 2.789834197554574, + "grad_norm": 16.485258102416992, + "learning_rate": 2.43062428555359e-06, + "loss": 0.1853, + "num_input_tokens_seen": 137136896, + "step": 43580 + }, + { + "epoch": 2.790154279495551, + "grad_norm": 65.78507995605469, + "learning_rate": 2.430065850086561e-06, + "loss": 0.1409, + "num_input_tokens_seen": 137152448, + "step": 43585 + }, + { + "epoch": 2.7904743614365275, + "grad_norm": 9.565727233886719, + "learning_rate": 2.4295074181116777e-06, + "loss": 0.1979, + "num_input_tokens_seen": 137169472, + "step": 43590 + }, + { + "epoch": 2.7907944433775045, + "grad_norm": 60.306846618652344, + "learning_rate": 2.4289489896568273e-06, + "loss": 0.2905, + "num_input_tokens_seen": 137185344, + "step": 43595 + }, + { + "epoch": 2.7911145253184815, + "grad_norm": 8.322763442993164, + "learning_rate": 2.4283905647498943e-06, + "loss": 0.2394, + "num_input_tokens_seen": 137202816, + "step": 43600 + }, + { + "epoch": 2.7914346072594585, + "grad_norm": 10.315837860107422, + "learning_rate": 2.427832143418764e-06, + "loss": 0.172, + "num_input_tokens_seen": 137218432, + "step": 43605 + }, + { + "epoch": 2.7917546892004355, + "grad_norm": 29.077838897705078, + "learning_rate": 2.4272737256913194e-06, + "loss": 0.3095, + "num_input_tokens_seen": 137235776, + "step": 43610 + }, + { + "epoch": 2.7920747711414124, + "grad_norm": 10.792579650878906, + "learning_rate": 2.4267153115954474e-06, + "loss": 0.2425, + "num_input_tokens_seen": 137251328, + "step": 43615 + }, + { + "epoch": 2.792394853082389, + "grad_norm": 8.335935592651367, + "learning_rate": 2.4261569011590296e-06, + "loss": 0.1534, + "num_input_tokens_seen": 137266176, + "step": 43620 + }, + { + "epoch": 2.792714935023366, + "grad_norm": 15.659269332885742, + "learning_rate": 2.425598494409952e-06, + "loss": 0.2048, + "num_input_tokens_seen": 137281600, + "step": 43625 + }, + { + "epoch": 2.793035016964343, + "grad_norm": 7.37756872177124, + "learning_rate": 2.4250400913760982e-06, + "loss": 0.2732, + "num_input_tokens_seen": 137296448, + "step": 43630 + }, + { + "epoch": 2.7933550989053195, + "grad_norm": 6.532591342926025, + "learning_rate": 2.4244816920853516e-06, + "loss": 0.2189, + "num_input_tokens_seen": 137313152, + "step": 43635 + }, + { + "epoch": 2.7936751808462965, + "grad_norm": 9.100616455078125, + "learning_rate": 2.423923296565597e-06, + "loss": 0.2186, + "num_input_tokens_seen": 137328128, + "step": 43640 + }, + { + "epoch": 2.7939952627872735, + "grad_norm": 7.908510208129883, + "learning_rate": 2.423364904844715e-06, + "loss": 0.1657, + "num_input_tokens_seen": 137342592, + "step": 43645 + }, + { + "epoch": 2.7943153447282505, + "grad_norm": 7.024486541748047, + "learning_rate": 2.4228065169505923e-06, + "loss": 0.1955, + "num_input_tokens_seen": 137357760, + "step": 43650 + }, + { + "epoch": 2.7946354266692275, + "grad_norm": 5.8852057456970215, + "learning_rate": 2.422248132911109e-06, + "loss": 0.1772, + "num_input_tokens_seen": 137373504, + "step": 43655 + }, + { + "epoch": 2.7949555086102045, + "grad_norm": 9.469365119934082, + "learning_rate": 2.421689752754149e-06, + "loss": 0.2534, + "num_input_tokens_seen": 137388992, + "step": 43660 + }, + { + "epoch": 2.795275590551181, + "grad_norm": 18.839075088500977, + "learning_rate": 2.4211313765075945e-06, + "loss": 0.1881, + "num_input_tokens_seen": 137404416, + "step": 43665 + }, + { + "epoch": 2.795595672492158, + "grad_norm": 7.3297224044799805, + "learning_rate": 2.4205730041993294e-06, + "loss": 0.1914, + "num_input_tokens_seen": 137418304, + "step": 43670 + }, + { + "epoch": 2.795915754433135, + "grad_norm": 22.670480728149414, + "learning_rate": 2.4200146358572333e-06, + "loss": 0.209, + "num_input_tokens_seen": 137435008, + "step": 43675 + }, + { + "epoch": 2.7962358363741115, + "grad_norm": 13.342639923095703, + "learning_rate": 2.4194562715091906e-06, + "loss": 0.2195, + "num_input_tokens_seen": 137449856, + "step": 43680 + }, + { + "epoch": 2.7965559183150885, + "grad_norm": 108.00162506103516, + "learning_rate": 2.418897911183082e-06, + "loss": 0.2168, + "num_input_tokens_seen": 137465088, + "step": 43685 + }, + { + "epoch": 2.7968760002560655, + "grad_norm": 13.493239402770996, + "learning_rate": 2.4183395549067885e-06, + "loss": 0.1584, + "num_input_tokens_seen": 137480512, + "step": 43690 + }, + { + "epoch": 2.7971960821970425, + "grad_norm": 68.74655151367188, + "learning_rate": 2.417781202708194e-06, + "loss": 0.2449, + "num_input_tokens_seen": 137494976, + "step": 43695 + }, + { + "epoch": 2.7975161641380195, + "grad_norm": 6.0787577629089355, + "learning_rate": 2.4172228546151763e-06, + "loss": 0.2626, + "num_input_tokens_seen": 137511552, + "step": 43700 + }, + { + "epoch": 2.797836246078996, + "grad_norm": 10.110869407653809, + "learning_rate": 2.4166645106556195e-06, + "loss": 0.1722, + "num_input_tokens_seen": 137526784, + "step": 43705 + }, + { + "epoch": 2.798156328019973, + "grad_norm": 26.582738876342773, + "learning_rate": 2.4161061708574017e-06, + "loss": 0.2264, + "num_input_tokens_seen": 137541952, + "step": 43710 + }, + { + "epoch": 2.79847640996095, + "grad_norm": 72.15458679199219, + "learning_rate": 2.415547835248405e-06, + "loss": 0.1979, + "num_input_tokens_seen": 137558144, + "step": 43715 + }, + { + "epoch": 2.798796491901927, + "grad_norm": 32.733882904052734, + "learning_rate": 2.4149895038565094e-06, + "loss": 0.3175, + "num_input_tokens_seen": 137574016, + "step": 43720 + }, + { + "epoch": 2.7991165738429036, + "grad_norm": 12.892979621887207, + "learning_rate": 2.4144311767095965e-06, + "loss": 0.2088, + "num_input_tokens_seen": 137589120, + "step": 43725 + }, + { + "epoch": 2.7994366557838806, + "grad_norm": 12.622599601745605, + "learning_rate": 2.4138728538355433e-06, + "loss": 0.3295, + "num_input_tokens_seen": 137605120, + "step": 43730 + }, + { + "epoch": 2.7997567377248576, + "grad_norm": 14.10838508605957, + "learning_rate": 2.4133145352622327e-06, + "loss": 0.2233, + "num_input_tokens_seen": 137622528, + "step": 43735 + }, + { + "epoch": 2.8000768196658345, + "grad_norm": 17.866621017456055, + "learning_rate": 2.412756221017541e-06, + "loss": 0.1735, + "num_input_tokens_seen": 137638016, + "step": 43740 + }, + { + "epoch": 2.8003969016068115, + "grad_norm": 45.64139938354492, + "learning_rate": 2.4121979111293497e-06, + "loss": 0.1693, + "num_input_tokens_seen": 137653376, + "step": 43745 + }, + { + "epoch": 2.800716983547788, + "grad_norm": 11.940332412719727, + "learning_rate": 2.411639605625538e-06, + "loss": 0.2275, + "num_input_tokens_seen": 137669632, + "step": 43750 + }, + { + "epoch": 2.801037065488765, + "grad_norm": 22.889312744140625, + "learning_rate": 2.411081304533983e-06, + "loss": 0.2669, + "num_input_tokens_seen": 137685568, + "step": 43755 + }, + { + "epoch": 2.801357147429742, + "grad_norm": 16.574565887451172, + "learning_rate": 2.410523007882566e-06, + "loss": 0.2473, + "num_input_tokens_seen": 137700864, + "step": 43760 + }, + { + "epoch": 2.801677229370719, + "grad_norm": 7.500177383422852, + "learning_rate": 2.409964715699163e-06, + "loss": 0.215, + "num_input_tokens_seen": 137717056, + "step": 43765 + }, + { + "epoch": 2.8019973113116956, + "grad_norm": 15.925015449523926, + "learning_rate": 2.4094064280116533e-06, + "loss": 0.267, + "num_input_tokens_seen": 137732096, + "step": 43770 + }, + { + "epoch": 2.8023173932526726, + "grad_norm": 5.701509952545166, + "learning_rate": 2.4088481448479146e-06, + "loss": 0.3291, + "num_input_tokens_seen": 137748352, + "step": 43775 + }, + { + "epoch": 2.8026374751936496, + "grad_norm": 8.839020729064941, + "learning_rate": 2.408289866235825e-06, + "loss": 0.2483, + "num_input_tokens_seen": 137764096, + "step": 43780 + }, + { + "epoch": 2.8029575571346266, + "grad_norm": 36.26005172729492, + "learning_rate": 2.4077315922032617e-06, + "loss": 0.2571, + "num_input_tokens_seen": 137779712, + "step": 43785 + }, + { + "epoch": 2.8032776390756036, + "grad_norm": 5.188800811767578, + "learning_rate": 2.4071733227781027e-06, + "loss": 0.1611, + "num_input_tokens_seen": 137794496, + "step": 43790 + }, + { + "epoch": 2.80359772101658, + "grad_norm": 15.786709785461426, + "learning_rate": 2.4066150579882234e-06, + "loss": 0.3214, + "num_input_tokens_seen": 137809792, + "step": 43795 + }, + { + "epoch": 2.803917802957557, + "grad_norm": 9.610613822937012, + "learning_rate": 2.406056797861502e-06, + "loss": 0.2097, + "num_input_tokens_seen": 137826048, + "step": 43800 + }, + { + "epoch": 2.804237884898534, + "grad_norm": 8.592144012451172, + "learning_rate": 2.4054985424258146e-06, + "loss": 0.2012, + "num_input_tokens_seen": 137841536, + "step": 43805 + }, + { + "epoch": 2.8045579668395106, + "grad_norm": 6.930125713348389, + "learning_rate": 2.4049402917090375e-06, + "loss": 0.1748, + "num_input_tokens_seen": 137856640, + "step": 43810 + }, + { + "epoch": 2.8048780487804876, + "grad_norm": 69.87688446044922, + "learning_rate": 2.4043820457390483e-06, + "loss": 0.2603, + "num_input_tokens_seen": 137872576, + "step": 43815 + }, + { + "epoch": 2.8051981307214646, + "grad_norm": 7.143552303314209, + "learning_rate": 2.4038238045437203e-06, + "loss": 0.1837, + "num_input_tokens_seen": 137890432, + "step": 43820 + }, + { + "epoch": 2.8055182126624416, + "grad_norm": 6.029365539550781, + "learning_rate": 2.4032655681509315e-06, + "loss": 0.25, + "num_input_tokens_seen": 137906112, + "step": 43825 + }, + { + "epoch": 2.8058382946034186, + "grad_norm": 7.317527770996094, + "learning_rate": 2.402707336588556e-06, + "loss": 0.2715, + "num_input_tokens_seen": 137921472, + "step": 43830 + }, + { + "epoch": 2.8061583765443956, + "grad_norm": 19.210660934448242, + "learning_rate": 2.4021491098844697e-06, + "loss": 0.3797, + "num_input_tokens_seen": 137938368, + "step": 43835 + }, + { + "epoch": 2.806478458485372, + "grad_norm": 19.58868980407715, + "learning_rate": 2.4015908880665463e-06, + "loss": 0.2277, + "num_input_tokens_seen": 137955008, + "step": 43840 + }, + { + "epoch": 2.806798540426349, + "grad_norm": 6.510552883148193, + "learning_rate": 2.4010326711626625e-06, + "loss": 0.2341, + "num_input_tokens_seen": 137970432, + "step": 43845 + }, + { + "epoch": 2.807118622367326, + "grad_norm": 14.557902336120605, + "learning_rate": 2.400474459200691e-06, + "loss": 0.2173, + "num_input_tokens_seen": 137986112, + "step": 43850 + }, + { + "epoch": 2.8074387043083027, + "grad_norm": 7.874668121337891, + "learning_rate": 2.3999162522085062e-06, + "loss": 0.2485, + "num_input_tokens_seen": 138001024, + "step": 43855 + }, + { + "epoch": 2.8077587862492797, + "grad_norm": 6.552776336669922, + "learning_rate": 2.3993580502139825e-06, + "loss": 0.2806, + "num_input_tokens_seen": 138016384, + "step": 43860 + }, + { + "epoch": 2.8080788681902566, + "grad_norm": 5.368563175201416, + "learning_rate": 2.3987998532449937e-06, + "loss": 0.2032, + "num_input_tokens_seen": 138034560, + "step": 43865 + }, + { + "epoch": 2.8083989501312336, + "grad_norm": 8.209771156311035, + "learning_rate": 2.3982416613294137e-06, + "loss": 0.2277, + "num_input_tokens_seen": 138049536, + "step": 43870 + }, + { + "epoch": 2.8087190320722106, + "grad_norm": 8.712635040283203, + "learning_rate": 2.3976834744951144e-06, + "loss": 0.1843, + "num_input_tokens_seen": 138065088, + "step": 43875 + }, + { + "epoch": 2.8090391140131876, + "grad_norm": 5.330470561981201, + "learning_rate": 2.3971252927699705e-06, + "loss": 0.1947, + "num_input_tokens_seen": 138081152, + "step": 43880 + }, + { + "epoch": 2.809359195954164, + "grad_norm": 43.46947479248047, + "learning_rate": 2.3965671161818522e-06, + "loss": 0.2614, + "num_input_tokens_seen": 138096896, + "step": 43885 + }, + { + "epoch": 2.809679277895141, + "grad_norm": 112.72895050048828, + "learning_rate": 2.396008944758634e-06, + "loss": 0.2865, + "num_input_tokens_seen": 138112000, + "step": 43890 + }, + { + "epoch": 2.809999359836118, + "grad_norm": 25.710294723510742, + "learning_rate": 2.395450778528187e-06, + "loss": 0.1542, + "num_input_tokens_seen": 138127808, + "step": 43895 + }, + { + "epoch": 2.8103194417770947, + "grad_norm": 19.035903930664062, + "learning_rate": 2.394892617518385e-06, + "loss": 0.2239, + "num_input_tokens_seen": 138142784, + "step": 43900 + }, + { + "epoch": 2.8106395237180717, + "grad_norm": 33.09626770019531, + "learning_rate": 2.3943344617570967e-06, + "loss": 0.2381, + "num_input_tokens_seen": 138157696, + "step": 43905 + }, + { + "epoch": 2.8109596056590487, + "grad_norm": 5.8104681968688965, + "learning_rate": 2.3937763112721953e-06, + "loss": 0.2195, + "num_input_tokens_seen": 138173120, + "step": 43910 + }, + { + "epoch": 2.8112796876000257, + "grad_norm": 13.130827903747559, + "learning_rate": 2.3932181660915514e-06, + "loss": 0.2862, + "num_input_tokens_seen": 138189568, + "step": 43915 + }, + { + "epoch": 2.8115997695410027, + "grad_norm": 6.769569396972656, + "learning_rate": 2.3926600262430362e-06, + "loss": 0.2011, + "num_input_tokens_seen": 138204736, + "step": 43920 + }, + { + "epoch": 2.8119198514819796, + "grad_norm": 39.75661087036133, + "learning_rate": 2.392101891754521e-06, + "loss": 0.1736, + "num_input_tokens_seen": 138220544, + "step": 43925 + }, + { + "epoch": 2.812239933422956, + "grad_norm": 7.279970645904541, + "learning_rate": 2.3915437626538743e-06, + "loss": 0.1633, + "num_input_tokens_seen": 138234816, + "step": 43930 + }, + { + "epoch": 2.812560015363933, + "grad_norm": 19.458168029785156, + "learning_rate": 2.3909856389689683e-06, + "loss": 0.2308, + "num_input_tokens_seen": 138250048, + "step": 43935 + }, + { + "epoch": 2.81288009730491, + "grad_norm": 9.564401626586914, + "learning_rate": 2.3904275207276707e-06, + "loss": 0.2256, + "num_input_tokens_seen": 138265088, + "step": 43940 + }, + { + "epoch": 2.8132001792458867, + "grad_norm": 32.30343246459961, + "learning_rate": 2.3898694079578522e-06, + "loss": 0.212, + "num_input_tokens_seen": 138279744, + "step": 43945 + }, + { + "epoch": 2.8135202611868637, + "grad_norm": 20.5518741607666, + "learning_rate": 2.3893113006873813e-06, + "loss": 0.2526, + "num_input_tokens_seen": 138296128, + "step": 43950 + }, + { + "epoch": 2.8138403431278407, + "grad_norm": 14.278891563415527, + "learning_rate": 2.388753198944129e-06, + "loss": 0.2185, + "num_input_tokens_seen": 138312064, + "step": 43955 + }, + { + "epoch": 2.8141604250688177, + "grad_norm": 7.802984714508057, + "learning_rate": 2.388195102755961e-06, + "loss": 0.2491, + "num_input_tokens_seen": 138327104, + "step": 43960 + }, + { + "epoch": 2.8144805070097947, + "grad_norm": 10.042474746704102, + "learning_rate": 2.3876370121507484e-06, + "loss": 0.2342, + "num_input_tokens_seen": 138342976, + "step": 43965 + }, + { + "epoch": 2.8148005889507712, + "grad_norm": 8.38970947265625, + "learning_rate": 2.3870789271563567e-06, + "loss": 0.242, + "num_input_tokens_seen": 138359872, + "step": 43970 + }, + { + "epoch": 2.8151206708917482, + "grad_norm": 8.235441207885742, + "learning_rate": 2.3865208478006565e-06, + "loss": 0.3118, + "num_input_tokens_seen": 138375808, + "step": 43975 + }, + { + "epoch": 2.815440752832725, + "grad_norm": 6.347928047180176, + "learning_rate": 2.3859627741115127e-06, + "loss": 0.206, + "num_input_tokens_seen": 138390976, + "step": 43980 + }, + { + "epoch": 2.815760834773702, + "grad_norm": 49.6830940246582, + "learning_rate": 2.3854047061167944e-06, + "loss": 0.1974, + "num_input_tokens_seen": 138406464, + "step": 43985 + }, + { + "epoch": 2.8160809167146788, + "grad_norm": 10.631996154785156, + "learning_rate": 2.3848466438443694e-06, + "loss": 0.2446, + "num_input_tokens_seen": 138422272, + "step": 43990 + }, + { + "epoch": 2.8164009986556557, + "grad_norm": 14.758634567260742, + "learning_rate": 2.3842885873221016e-06, + "loss": 0.1551, + "num_input_tokens_seen": 138437888, + "step": 43995 + }, + { + "epoch": 2.8167210805966327, + "grad_norm": 15.158795356750488, + "learning_rate": 2.3837305365778602e-06, + "loss": 0.1492, + "num_input_tokens_seen": 138453952, + "step": 44000 + }, + { + "epoch": 2.8170411625376097, + "grad_norm": 44.58005905151367, + "learning_rate": 2.383172491639509e-06, + "loss": 0.2079, + "num_input_tokens_seen": 138469120, + "step": 44005 + }, + { + "epoch": 2.8173612444785867, + "grad_norm": 9.15219783782959, + "learning_rate": 2.3826144525349163e-06, + "loss": 0.2003, + "num_input_tokens_seen": 138484608, + "step": 44010 + }, + { + "epoch": 2.8176813264195633, + "grad_norm": 51.1493034362793, + "learning_rate": 2.3820564192919456e-06, + "loss": 0.2145, + "num_input_tokens_seen": 138500160, + "step": 44015 + }, + { + "epoch": 2.8180014083605402, + "grad_norm": 92.06684875488281, + "learning_rate": 2.381498391938464e-06, + "loss": 0.233, + "num_input_tokens_seen": 138515584, + "step": 44020 + }, + { + "epoch": 2.8183214903015172, + "grad_norm": 2.721709966659546, + "learning_rate": 2.3809403705023342e-06, + "loss": 0.1965, + "num_input_tokens_seen": 138531456, + "step": 44025 + }, + { + "epoch": 2.8186415722424942, + "grad_norm": 44.906768798828125, + "learning_rate": 2.380382355011423e-06, + "loss": 0.2553, + "num_input_tokens_seen": 138546816, + "step": 44030 + }, + { + "epoch": 2.8189616541834708, + "grad_norm": 17.125293731689453, + "learning_rate": 2.3798243454935934e-06, + "loss": 0.2613, + "num_input_tokens_seen": 138562752, + "step": 44035 + }, + { + "epoch": 2.8192817361244478, + "grad_norm": 16.143604278564453, + "learning_rate": 2.3792663419767095e-06, + "loss": 0.1717, + "num_input_tokens_seen": 138577856, + "step": 44040 + }, + { + "epoch": 2.8196018180654248, + "grad_norm": 7.006464958190918, + "learning_rate": 2.3787083444886368e-06, + "loss": 0.1816, + "num_input_tokens_seen": 138593728, + "step": 44045 + }, + { + "epoch": 2.8199219000064017, + "grad_norm": 5.925995826721191, + "learning_rate": 2.3781503530572367e-06, + "loss": 0.2282, + "num_input_tokens_seen": 138609024, + "step": 44050 + }, + { + "epoch": 2.8202419819473787, + "grad_norm": 9.002537727355957, + "learning_rate": 2.3775923677103738e-06, + "loss": 0.1734, + "num_input_tokens_seen": 138625408, + "step": 44055 + }, + { + "epoch": 2.8205620638883553, + "grad_norm": 30.827360153198242, + "learning_rate": 2.37703438847591e-06, + "loss": 0.2477, + "num_input_tokens_seen": 138642688, + "step": 44060 + }, + { + "epoch": 2.8208821458293323, + "grad_norm": 10.548030853271484, + "learning_rate": 2.376476415381709e-06, + "loss": 0.4014, + "num_input_tokens_seen": 138657856, + "step": 44065 + }, + { + "epoch": 2.8212022277703093, + "grad_norm": 7.417482376098633, + "learning_rate": 2.3759184484556317e-06, + "loss": 0.176, + "num_input_tokens_seen": 138672640, + "step": 44070 + }, + { + "epoch": 2.821522309711286, + "grad_norm": 34.53367614746094, + "learning_rate": 2.3753604877255422e-06, + "loss": 0.2552, + "num_input_tokens_seen": 138690560, + "step": 44075 + }, + { + "epoch": 2.821842391652263, + "grad_norm": 30.153522491455078, + "learning_rate": 2.374802533219299e-06, + "loss": 0.2514, + "num_input_tokens_seen": 138707008, + "step": 44080 + }, + { + "epoch": 2.82216247359324, + "grad_norm": 9.338449478149414, + "learning_rate": 2.374244584964766e-06, + "loss": 0.1726, + "num_input_tokens_seen": 138722368, + "step": 44085 + }, + { + "epoch": 2.822482555534217, + "grad_norm": 23.20780372619629, + "learning_rate": 2.3736866429898027e-06, + "loss": 0.1757, + "num_input_tokens_seen": 138738112, + "step": 44090 + }, + { + "epoch": 2.8228026374751938, + "grad_norm": 17.660554885864258, + "learning_rate": 2.3731287073222705e-06, + "loss": 0.2233, + "num_input_tokens_seen": 138753280, + "step": 44095 + }, + { + "epoch": 2.8231227194161708, + "grad_norm": 10.35053539276123, + "learning_rate": 2.3725707779900313e-06, + "loss": 0.3008, + "num_input_tokens_seen": 138773120, + "step": 44100 + }, + { + "epoch": 2.8234428013571473, + "grad_norm": 12.108437538146973, + "learning_rate": 2.372012855020942e-06, + "loss": 0.2513, + "num_input_tokens_seen": 138788800, + "step": 44105 + }, + { + "epoch": 2.8237628832981243, + "grad_norm": 4.037095069885254, + "learning_rate": 2.371454938442865e-06, + "loss": 0.1958, + "num_input_tokens_seen": 138804672, + "step": 44110 + }, + { + "epoch": 2.8240829652391013, + "grad_norm": 36.27638244628906, + "learning_rate": 2.3708970282836585e-06, + "loss": 0.1894, + "num_input_tokens_seen": 138819520, + "step": 44115 + }, + { + "epoch": 2.824403047180078, + "grad_norm": 44.243019104003906, + "learning_rate": 2.3703391245711823e-06, + "loss": 0.2009, + "num_input_tokens_seen": 138834752, + "step": 44120 + }, + { + "epoch": 2.824723129121055, + "grad_norm": 23.476594924926758, + "learning_rate": 2.369781227333294e-06, + "loss": 0.2332, + "num_input_tokens_seen": 138850048, + "step": 44125 + }, + { + "epoch": 2.825043211062032, + "grad_norm": 11.50737476348877, + "learning_rate": 2.369223336597854e-06, + "loss": 0.1679, + "num_input_tokens_seen": 138865152, + "step": 44130 + }, + { + "epoch": 2.825363293003009, + "grad_norm": 11.563224792480469, + "learning_rate": 2.3686654523927187e-06, + "loss": 0.2943, + "num_input_tokens_seen": 138882880, + "step": 44135 + }, + { + "epoch": 2.825683374943986, + "grad_norm": 14.067082405090332, + "learning_rate": 2.368107574745747e-06, + "loss": 0.1469, + "num_input_tokens_seen": 138898112, + "step": 44140 + }, + { + "epoch": 2.826003456884963, + "grad_norm": 5.345070838928223, + "learning_rate": 2.367549703684795e-06, + "loss": 0.1727, + "num_input_tokens_seen": 138914752, + "step": 44145 + }, + { + "epoch": 2.8263235388259393, + "grad_norm": 8.938332557678223, + "learning_rate": 2.3669918392377224e-06, + "loss": 0.135, + "num_input_tokens_seen": 138930688, + "step": 44150 + }, + { + "epoch": 2.8266436207669163, + "grad_norm": 6.442167282104492, + "learning_rate": 2.3664339814323835e-06, + "loss": 0.1783, + "num_input_tokens_seen": 138945920, + "step": 44155 + }, + { + "epoch": 2.8269637027078933, + "grad_norm": 6.017593860626221, + "learning_rate": 2.3658761302966357e-06, + "loss": 0.1945, + "num_input_tokens_seen": 138961408, + "step": 44160 + }, + { + "epoch": 2.82728378464887, + "grad_norm": 5.150472164154053, + "learning_rate": 2.3653182858583364e-06, + "loss": 0.1959, + "num_input_tokens_seen": 138979008, + "step": 44165 + }, + { + "epoch": 2.827603866589847, + "grad_norm": 16.453020095825195, + "learning_rate": 2.3647604481453393e-06, + "loss": 0.2449, + "num_input_tokens_seen": 138994112, + "step": 44170 + }, + { + "epoch": 2.827923948530824, + "grad_norm": 53.96631622314453, + "learning_rate": 2.3642026171855013e-06, + "loss": 0.3041, + "num_input_tokens_seen": 139010176, + "step": 44175 + }, + { + "epoch": 2.828244030471801, + "grad_norm": 56.95595169067383, + "learning_rate": 2.363644793006677e-06, + "loss": 0.3095, + "num_input_tokens_seen": 139026048, + "step": 44180 + }, + { + "epoch": 2.828564112412778, + "grad_norm": 9.543591499328613, + "learning_rate": 2.363086975636723e-06, + "loss": 0.2471, + "num_input_tokens_seen": 139041856, + "step": 44185 + }, + { + "epoch": 2.828884194353755, + "grad_norm": 9.978188514709473, + "learning_rate": 2.3625291651034905e-06, + "loss": 0.2208, + "num_input_tokens_seen": 139057408, + "step": 44190 + }, + { + "epoch": 2.8292042762947314, + "grad_norm": 10.341927528381348, + "learning_rate": 2.361971361434837e-06, + "loss": 0.1601, + "num_input_tokens_seen": 139073344, + "step": 44195 + }, + { + "epoch": 2.8295243582357084, + "grad_norm": 6.869327068328857, + "learning_rate": 2.361413564658613e-06, + "loss": 0.1999, + "num_input_tokens_seen": 139092096, + "step": 44200 + }, + { + "epoch": 2.8298444401766853, + "grad_norm": 22.180347442626953, + "learning_rate": 2.3608557748026747e-06, + "loss": 0.2826, + "num_input_tokens_seen": 139108544, + "step": 44205 + }, + { + "epoch": 2.830164522117662, + "grad_norm": 9.8265380859375, + "learning_rate": 2.360297991894874e-06, + "loss": 0.2114, + "num_input_tokens_seen": 139124352, + "step": 44210 + }, + { + "epoch": 2.830484604058639, + "grad_norm": 10.51626968383789, + "learning_rate": 2.3597402159630632e-06, + "loss": 0.1929, + "num_input_tokens_seen": 139139072, + "step": 44215 + }, + { + "epoch": 2.830804685999616, + "grad_norm": 6.024188995361328, + "learning_rate": 2.359182447035097e-06, + "loss": 0.2083, + "num_input_tokens_seen": 139155392, + "step": 44220 + }, + { + "epoch": 2.831124767940593, + "grad_norm": 7.798969268798828, + "learning_rate": 2.3586246851388243e-06, + "loss": 0.1707, + "num_input_tokens_seen": 139171968, + "step": 44225 + }, + { + "epoch": 2.83144484988157, + "grad_norm": 26.233428955078125, + "learning_rate": 2.3580669303021e-06, + "loss": 0.1931, + "num_input_tokens_seen": 139187648, + "step": 44230 + }, + { + "epoch": 2.831764931822547, + "grad_norm": 9.949795722961426, + "learning_rate": 2.3575091825527724e-06, + "loss": 0.3123, + "num_input_tokens_seen": 139203584, + "step": 44235 + }, + { + "epoch": 2.8320850137635234, + "grad_norm": 27.021757125854492, + "learning_rate": 2.3569514419186953e-06, + "loss": 0.2331, + "num_input_tokens_seen": 139218688, + "step": 44240 + }, + { + "epoch": 2.8324050957045004, + "grad_norm": 23.329463958740234, + "learning_rate": 2.3563937084277174e-06, + "loss": 0.2012, + "num_input_tokens_seen": 139233920, + "step": 44245 + }, + { + "epoch": 2.8327251776454774, + "grad_norm": 36.63928985595703, + "learning_rate": 2.3558359821076903e-06, + "loss": 0.3717, + "num_input_tokens_seen": 139251392, + "step": 44250 + }, + { + "epoch": 2.833045259586454, + "grad_norm": 69.75578308105469, + "learning_rate": 2.3552782629864625e-06, + "loss": 0.2139, + "num_input_tokens_seen": 139266304, + "step": 44255 + }, + { + "epoch": 2.833365341527431, + "grad_norm": 5.381767272949219, + "learning_rate": 2.3547205510918846e-06, + "loss": 0.1798, + "num_input_tokens_seen": 139282304, + "step": 44260 + }, + { + "epoch": 2.833685423468408, + "grad_norm": 82.9127426147461, + "learning_rate": 2.3541628464518056e-06, + "loss": 0.251, + "num_input_tokens_seen": 139298816, + "step": 44265 + }, + { + "epoch": 2.834005505409385, + "grad_norm": 7.046069622039795, + "learning_rate": 2.353605149094074e-06, + "loss": 0.1523, + "num_input_tokens_seen": 139313920, + "step": 44270 + }, + { + "epoch": 2.834325587350362, + "grad_norm": 4.657879829406738, + "learning_rate": 2.3530474590465403e-06, + "loss": 0.2123, + "num_input_tokens_seen": 139329472, + "step": 44275 + }, + { + "epoch": 2.8346456692913384, + "grad_norm": 23.006711959838867, + "learning_rate": 2.3524897763370493e-06, + "loss": 0.2351, + "num_input_tokens_seen": 139344960, + "step": 44280 + }, + { + "epoch": 2.8349657512323154, + "grad_norm": 5.377460956573486, + "learning_rate": 2.3519321009934513e-06, + "loss": 0.3499, + "num_input_tokens_seen": 139359296, + "step": 44285 + }, + { + "epoch": 2.8352858331732924, + "grad_norm": 10.440454483032227, + "learning_rate": 2.351374433043593e-06, + "loss": 0.1809, + "num_input_tokens_seen": 139374528, + "step": 44290 + }, + { + "epoch": 2.8356059151142694, + "grad_norm": 3.9017040729522705, + "learning_rate": 2.3508167725153216e-06, + "loss": 0.2806, + "num_input_tokens_seen": 139391488, + "step": 44295 + }, + { + "epoch": 2.835925997055246, + "grad_norm": 9.748298645019531, + "learning_rate": 2.350259119436483e-06, + "loss": 0.2491, + "num_input_tokens_seen": 139406272, + "step": 44300 + }, + { + "epoch": 2.836246078996223, + "grad_norm": 9.001078605651855, + "learning_rate": 2.349701473834925e-06, + "loss": 0.1931, + "num_input_tokens_seen": 139421312, + "step": 44305 + }, + { + "epoch": 2.8365661609372, + "grad_norm": 8.525505065917969, + "learning_rate": 2.349143835738491e-06, + "loss": 0.1616, + "num_input_tokens_seen": 139435840, + "step": 44310 + }, + { + "epoch": 2.836886242878177, + "grad_norm": 21.024986267089844, + "learning_rate": 2.348586205175029e-06, + "loss": 0.2365, + "num_input_tokens_seen": 139452032, + "step": 44315 + }, + { + "epoch": 2.837206324819154, + "grad_norm": 6.699838161468506, + "learning_rate": 2.3480285821723826e-06, + "loss": 0.2702, + "num_input_tokens_seen": 139466816, + "step": 44320 + }, + { + "epoch": 2.8375264067601305, + "grad_norm": 8.241732597351074, + "learning_rate": 2.347470966758398e-06, + "loss": 0.2039, + "num_input_tokens_seen": 139482240, + "step": 44325 + }, + { + "epoch": 2.8378464887011075, + "grad_norm": 77.54084014892578, + "learning_rate": 2.346913358960918e-06, + "loss": 0.2812, + "num_input_tokens_seen": 139497408, + "step": 44330 + }, + { + "epoch": 2.8381665706420844, + "grad_norm": 8.123604774475098, + "learning_rate": 2.3463557588077877e-06, + "loss": 0.2855, + "num_input_tokens_seen": 139512960, + "step": 44335 + }, + { + "epoch": 2.838486652583061, + "grad_norm": 43.339691162109375, + "learning_rate": 2.3457981663268513e-06, + "loss": 0.218, + "num_input_tokens_seen": 139529152, + "step": 44340 + }, + { + "epoch": 2.838806734524038, + "grad_norm": 8.772618293762207, + "learning_rate": 2.34524058154595e-06, + "loss": 0.2036, + "num_input_tokens_seen": 139545792, + "step": 44345 + }, + { + "epoch": 2.839126816465015, + "grad_norm": 55.38491439819336, + "learning_rate": 2.344683004492929e-06, + "loss": 0.2265, + "num_input_tokens_seen": 139561728, + "step": 44350 + }, + { + "epoch": 2.839446898405992, + "grad_norm": 12.438664436340332, + "learning_rate": 2.344125435195629e-06, + "loss": 0.2324, + "num_input_tokens_seen": 139576256, + "step": 44355 + }, + { + "epoch": 2.839766980346969, + "grad_norm": 12.705944061279297, + "learning_rate": 2.343567873681894e-06, + "loss": 0.1874, + "num_input_tokens_seen": 139590464, + "step": 44360 + }, + { + "epoch": 2.840087062287946, + "grad_norm": 5.416101932525635, + "learning_rate": 2.343010319979563e-06, + "loss": 0.2494, + "num_input_tokens_seen": 139605248, + "step": 44365 + }, + { + "epoch": 2.8404071442289225, + "grad_norm": 4.297988414764404, + "learning_rate": 2.3424527741164798e-06, + "loss": 0.2666, + "num_input_tokens_seen": 139621504, + "step": 44370 + }, + { + "epoch": 2.8407272261698995, + "grad_norm": 22.796756744384766, + "learning_rate": 2.3418952361204837e-06, + "loss": 0.3019, + "num_input_tokens_seen": 139636416, + "step": 44375 + }, + { + "epoch": 2.8410473081108765, + "grad_norm": 16.34748077392578, + "learning_rate": 2.341337706019417e-06, + "loss": 0.2298, + "num_input_tokens_seen": 139652480, + "step": 44380 + }, + { + "epoch": 2.841367390051853, + "grad_norm": 14.986146926879883, + "learning_rate": 2.340780183841118e-06, + "loss": 0.1877, + "num_input_tokens_seen": 139667584, + "step": 44385 + }, + { + "epoch": 2.84168747199283, + "grad_norm": 8.799727439880371, + "learning_rate": 2.340222669613427e-06, + "loss": 0.1671, + "num_input_tokens_seen": 139682816, + "step": 44390 + }, + { + "epoch": 2.842007553933807, + "grad_norm": 25.471759796142578, + "learning_rate": 2.3396651633641854e-06, + "loss": 0.2568, + "num_input_tokens_seen": 139697792, + "step": 44395 + }, + { + "epoch": 2.842327635874784, + "grad_norm": 26.468238830566406, + "learning_rate": 2.3391076651212286e-06, + "loss": 0.2184, + "num_input_tokens_seen": 139714496, + "step": 44400 + }, + { + "epoch": 2.842647717815761, + "grad_norm": 61.189029693603516, + "learning_rate": 2.338550174912398e-06, + "loss": 0.1701, + "num_input_tokens_seen": 139730432, + "step": 44405 + }, + { + "epoch": 2.842967799756738, + "grad_norm": 33.06325149536133, + "learning_rate": 2.33799269276553e-06, + "loss": 0.2167, + "num_input_tokens_seen": 139745600, + "step": 44410 + }, + { + "epoch": 2.8432878816977145, + "grad_norm": 24.9425106048584, + "learning_rate": 2.3374352187084638e-06, + "loss": 0.2717, + "num_input_tokens_seen": 139761152, + "step": 44415 + }, + { + "epoch": 2.8436079636386915, + "grad_norm": 56.823612213134766, + "learning_rate": 2.336877752769035e-06, + "loss": 0.215, + "num_input_tokens_seen": 139776512, + "step": 44420 + }, + { + "epoch": 2.8439280455796685, + "grad_norm": 9.637126922607422, + "learning_rate": 2.336320294975082e-06, + "loss": 0.2383, + "num_input_tokens_seen": 139791936, + "step": 44425 + }, + { + "epoch": 2.844248127520645, + "grad_norm": 12.644391059875488, + "learning_rate": 2.3357628453544413e-06, + "loss": 0.1699, + "num_input_tokens_seen": 139807360, + "step": 44430 + }, + { + "epoch": 2.844568209461622, + "grad_norm": 47.193275451660156, + "learning_rate": 2.3352054039349485e-06, + "loss": 0.3171, + "num_input_tokens_seen": 139822400, + "step": 44435 + }, + { + "epoch": 2.844888291402599, + "grad_norm": 11.413281440734863, + "learning_rate": 2.334647970744439e-06, + "loss": 0.1592, + "num_input_tokens_seen": 139838784, + "step": 44440 + }, + { + "epoch": 2.845208373343576, + "grad_norm": 11.299769401550293, + "learning_rate": 2.3340905458107483e-06, + "loss": 0.183, + "num_input_tokens_seen": 139856000, + "step": 44445 + }, + { + "epoch": 2.845528455284553, + "grad_norm": 4.91032600402832, + "learning_rate": 2.333533129161713e-06, + "loss": 0.1716, + "num_input_tokens_seen": 139871296, + "step": 44450 + }, + { + "epoch": 2.84584853722553, + "grad_norm": 52.30210494995117, + "learning_rate": 2.332975720825164e-06, + "loss": 0.5357, + "num_input_tokens_seen": 139886528, + "step": 44455 + }, + { + "epoch": 2.8461686191665065, + "grad_norm": 29.272695541381836, + "learning_rate": 2.3324183208289393e-06, + "loss": 0.2261, + "num_input_tokens_seen": 139903168, + "step": 44460 + }, + { + "epoch": 2.8464887011074835, + "grad_norm": 11.828428268432617, + "learning_rate": 2.3318609292008697e-06, + "loss": 0.244, + "num_input_tokens_seen": 139918848, + "step": 44465 + }, + { + "epoch": 2.8468087830484605, + "grad_norm": 12.200491905212402, + "learning_rate": 2.3313035459687906e-06, + "loss": 0.3307, + "num_input_tokens_seen": 139934976, + "step": 44470 + }, + { + "epoch": 2.847128864989437, + "grad_norm": 36.7005500793457, + "learning_rate": 2.330746171160533e-06, + "loss": 0.1675, + "num_input_tokens_seen": 139951616, + "step": 44475 + }, + { + "epoch": 2.847448946930414, + "grad_norm": 6.048367500305176, + "learning_rate": 2.3301888048039306e-06, + "loss": 0.1356, + "num_input_tokens_seen": 139968256, + "step": 44480 + }, + { + "epoch": 2.847769028871391, + "grad_norm": 40.510337829589844, + "learning_rate": 2.3296314469268138e-06, + "loss": 0.1824, + "num_input_tokens_seen": 139983616, + "step": 44485 + }, + { + "epoch": 2.848089110812368, + "grad_norm": 13.355423927307129, + "learning_rate": 2.3290740975570155e-06, + "loss": 0.1731, + "num_input_tokens_seen": 139999232, + "step": 44490 + }, + { + "epoch": 2.848409192753345, + "grad_norm": 8.61824893951416, + "learning_rate": 2.3285167567223662e-06, + "loss": 0.2596, + "num_input_tokens_seen": 140014400, + "step": 44495 + }, + { + "epoch": 2.848729274694322, + "grad_norm": 10.268290519714355, + "learning_rate": 2.327959424450698e-06, + "loss": 0.2312, + "num_input_tokens_seen": 140030528, + "step": 44500 + }, + { + "epoch": 2.8490493566352986, + "grad_norm": 3.5701138973236084, + "learning_rate": 2.3274021007698385e-06, + "loss": 0.2017, + "num_input_tokens_seen": 140046208, + "step": 44505 + }, + { + "epoch": 2.8493694385762756, + "grad_norm": 8.333362579345703, + "learning_rate": 2.3268447857076194e-06, + "loss": 0.2373, + "num_input_tokens_seen": 140063296, + "step": 44510 + }, + { + "epoch": 2.8496895205172526, + "grad_norm": 9.460142135620117, + "learning_rate": 2.3262874792918703e-06, + "loss": 0.3689, + "num_input_tokens_seen": 140077888, + "step": 44515 + }, + { + "epoch": 2.850009602458229, + "grad_norm": 17.978103637695312, + "learning_rate": 2.325730181550419e-06, + "loss": 0.1692, + "num_input_tokens_seen": 140094208, + "step": 44520 + }, + { + "epoch": 2.850329684399206, + "grad_norm": 7.56003999710083, + "learning_rate": 2.325172892511096e-06, + "loss": 0.2089, + "num_input_tokens_seen": 140112000, + "step": 44525 + }, + { + "epoch": 2.850649766340183, + "grad_norm": 11.039495468139648, + "learning_rate": 2.3246156122017264e-06, + "loss": 0.258, + "num_input_tokens_seen": 140127552, + "step": 44530 + }, + { + "epoch": 2.85096984828116, + "grad_norm": 66.56489562988281, + "learning_rate": 2.3240583406501414e-06, + "loss": 0.2803, + "num_input_tokens_seen": 140143552, + "step": 44535 + }, + { + "epoch": 2.851289930222137, + "grad_norm": 34.86906814575195, + "learning_rate": 2.323501077884165e-06, + "loss": 0.321, + "num_input_tokens_seen": 140158720, + "step": 44540 + }, + { + "epoch": 2.8516100121631136, + "grad_norm": 15.729593276977539, + "learning_rate": 2.3229438239316258e-06, + "loss": 0.1971, + "num_input_tokens_seen": 140174784, + "step": 44545 + }, + { + "epoch": 2.8519300941040906, + "grad_norm": 9.156904220581055, + "learning_rate": 2.3223865788203495e-06, + "loss": 0.1966, + "num_input_tokens_seen": 140190720, + "step": 44550 + }, + { + "epoch": 2.8522501760450676, + "grad_norm": 10.291642189025879, + "learning_rate": 2.3218293425781634e-06, + "loss": 0.2565, + "num_input_tokens_seen": 140206400, + "step": 44555 + }, + { + "epoch": 2.8525702579860446, + "grad_norm": 5.778217792510986, + "learning_rate": 2.3212721152328907e-06, + "loss": 0.2011, + "num_input_tokens_seen": 140221120, + "step": 44560 + }, + { + "epoch": 2.852890339927021, + "grad_norm": 3.6404831409454346, + "learning_rate": 2.320714896812358e-06, + "loss": 0.2197, + "num_input_tokens_seen": 140236800, + "step": 44565 + }, + { + "epoch": 2.853210421867998, + "grad_norm": 4.181295394897461, + "learning_rate": 2.32015768734439e-06, + "loss": 0.2038, + "num_input_tokens_seen": 140251968, + "step": 44570 + }, + { + "epoch": 2.853530503808975, + "grad_norm": 36.81958770751953, + "learning_rate": 2.319600486856809e-06, + "loss": 0.3535, + "num_input_tokens_seen": 140267136, + "step": 44575 + }, + { + "epoch": 2.853850585749952, + "grad_norm": 18.984893798828125, + "learning_rate": 2.319043295377441e-06, + "loss": 0.2445, + "num_input_tokens_seen": 140281792, + "step": 44580 + }, + { + "epoch": 2.854170667690929, + "grad_norm": 5.378918647766113, + "learning_rate": 2.318486112934108e-06, + "loss": 0.1777, + "num_input_tokens_seen": 140296832, + "step": 44585 + }, + { + "epoch": 2.8544907496319056, + "grad_norm": 5.506240367889404, + "learning_rate": 2.317928939554634e-06, + "loss": 0.1406, + "num_input_tokens_seen": 140312064, + "step": 44590 + }, + { + "epoch": 2.8548108315728826, + "grad_norm": 19.796207427978516, + "learning_rate": 2.317371775266839e-06, + "loss": 0.2001, + "num_input_tokens_seen": 140327360, + "step": 44595 + }, + { + "epoch": 2.8551309135138596, + "grad_norm": 10.084561347961426, + "learning_rate": 2.316814620098547e-06, + "loss": 0.1539, + "num_input_tokens_seen": 140343296, + "step": 44600 + }, + { + "epoch": 2.8554509954548366, + "grad_norm": 9.420028686523438, + "learning_rate": 2.316257474077578e-06, + "loss": 0.2062, + "num_input_tokens_seen": 140359232, + "step": 44605 + }, + { + "epoch": 2.855771077395813, + "grad_norm": 42.58849334716797, + "learning_rate": 2.315700337231755e-06, + "loss": 0.2805, + "num_input_tokens_seen": 140375552, + "step": 44610 + }, + { + "epoch": 2.85609115933679, + "grad_norm": 19.56777000427246, + "learning_rate": 2.315143209588896e-06, + "loss": 0.1845, + "num_input_tokens_seen": 140392576, + "step": 44615 + }, + { + "epoch": 2.856411241277767, + "grad_norm": 6.7820563316345215, + "learning_rate": 2.314586091176822e-06, + "loss": 0.1562, + "num_input_tokens_seen": 140408256, + "step": 44620 + }, + { + "epoch": 2.856731323218744, + "grad_norm": 13.45396900177002, + "learning_rate": 2.314028982023354e-06, + "loss": 0.233, + "num_input_tokens_seen": 140424128, + "step": 44625 + }, + { + "epoch": 2.857051405159721, + "grad_norm": 25.247695922851562, + "learning_rate": 2.313471882156309e-06, + "loss": 0.2935, + "num_input_tokens_seen": 140439168, + "step": 44630 + }, + { + "epoch": 2.8573714871006977, + "grad_norm": 6.428766250610352, + "learning_rate": 2.312914791603507e-06, + "loss": 0.1529, + "num_input_tokens_seen": 140455360, + "step": 44635 + }, + { + "epoch": 2.8576915690416747, + "grad_norm": 53.758644104003906, + "learning_rate": 2.3123577103927656e-06, + "loss": 0.2748, + "num_input_tokens_seen": 140471872, + "step": 44640 + }, + { + "epoch": 2.8580116509826516, + "grad_norm": 16.01015853881836, + "learning_rate": 2.311800638551904e-06, + "loss": 0.2308, + "num_input_tokens_seen": 140486592, + "step": 44645 + }, + { + "epoch": 2.858331732923628, + "grad_norm": 7.850808620452881, + "learning_rate": 2.3112435761087367e-06, + "loss": 0.219, + "num_input_tokens_seen": 140500864, + "step": 44650 + }, + { + "epoch": 2.858651814864605, + "grad_norm": 43.26953125, + "learning_rate": 2.3106865230910824e-06, + "loss": 0.2197, + "num_input_tokens_seen": 140517056, + "step": 44655 + }, + { + "epoch": 2.858971896805582, + "grad_norm": 72.77174377441406, + "learning_rate": 2.3101294795267567e-06, + "loss": 0.1609, + "num_input_tokens_seen": 140531264, + "step": 44660 + }, + { + "epoch": 2.859291978746559, + "grad_norm": 4.3033928871154785, + "learning_rate": 2.309572445443577e-06, + "loss": 0.1922, + "num_input_tokens_seen": 140547136, + "step": 44665 + }, + { + "epoch": 2.859612060687536, + "grad_norm": 7.494821071624756, + "learning_rate": 2.309015420869356e-06, + "loss": 0.2598, + "num_input_tokens_seen": 140562304, + "step": 44670 + }, + { + "epoch": 2.859932142628513, + "grad_norm": 35.98329162597656, + "learning_rate": 2.3084584058319115e-06, + "loss": 0.265, + "num_input_tokens_seen": 140578176, + "step": 44675 + }, + { + "epoch": 2.8602522245694897, + "grad_norm": 5.269334316253662, + "learning_rate": 2.3079014003590555e-06, + "loss": 0.1118, + "num_input_tokens_seen": 140593600, + "step": 44680 + }, + { + "epoch": 2.8605723065104667, + "grad_norm": 5.986654281616211, + "learning_rate": 2.3073444044786025e-06, + "loss": 0.2334, + "num_input_tokens_seen": 140609472, + "step": 44685 + }, + { + "epoch": 2.8608923884514437, + "grad_norm": 6.0711822509765625, + "learning_rate": 2.3067874182183676e-06, + "loss": 0.1526, + "num_input_tokens_seen": 140624960, + "step": 44690 + }, + { + "epoch": 2.86121247039242, + "grad_norm": 57.510719299316406, + "learning_rate": 2.3062304416061616e-06, + "loss": 0.263, + "num_input_tokens_seen": 140640512, + "step": 44695 + }, + { + "epoch": 2.861532552333397, + "grad_norm": 46.14715576171875, + "learning_rate": 2.3056734746697997e-06, + "loss": 0.2402, + "num_input_tokens_seen": 140657408, + "step": 44700 + }, + { + "epoch": 2.861852634274374, + "grad_norm": 32.226810455322266, + "learning_rate": 2.3051165174370904e-06, + "loss": 0.2162, + "num_input_tokens_seen": 140674624, + "step": 44705 + }, + { + "epoch": 2.862172716215351, + "grad_norm": 6.161228179931641, + "learning_rate": 2.3045595699358487e-06, + "loss": 0.2667, + "num_input_tokens_seen": 140690368, + "step": 44710 + }, + { + "epoch": 2.862492798156328, + "grad_norm": 4.970460891723633, + "learning_rate": 2.3040026321938826e-06, + "loss": 0.2578, + "num_input_tokens_seen": 140706304, + "step": 44715 + }, + { + "epoch": 2.862812880097305, + "grad_norm": 10.9617919921875, + "learning_rate": 2.303445704239005e-06, + "loss": 0.2047, + "num_input_tokens_seen": 140721920, + "step": 44720 + }, + { + "epoch": 2.8631329620382817, + "grad_norm": 65.54487609863281, + "learning_rate": 2.3028887860990247e-06, + "loss": 0.2405, + "num_input_tokens_seen": 140738240, + "step": 44725 + }, + { + "epoch": 2.8634530439792587, + "grad_norm": 12.706920623779297, + "learning_rate": 2.3023318778017526e-06, + "loss": 0.2576, + "num_input_tokens_seen": 140755200, + "step": 44730 + }, + { + "epoch": 2.8637731259202357, + "grad_norm": 10.660140991210938, + "learning_rate": 2.3017749793749956e-06, + "loss": 0.2057, + "num_input_tokens_seen": 140770624, + "step": 44735 + }, + { + "epoch": 2.8640932078612122, + "grad_norm": 13.265670776367188, + "learning_rate": 2.301218090846564e-06, + "loss": 0.2664, + "num_input_tokens_seen": 140787392, + "step": 44740 + }, + { + "epoch": 2.8644132898021892, + "grad_norm": 6.471550941467285, + "learning_rate": 2.300661212244266e-06, + "loss": 0.2434, + "num_input_tokens_seen": 140803584, + "step": 44745 + }, + { + "epoch": 2.8647333717431662, + "grad_norm": 9.716593742370605, + "learning_rate": 2.300104343595908e-06, + "loss": 0.1918, + "num_input_tokens_seen": 140819776, + "step": 44750 + }, + { + "epoch": 2.865053453684143, + "grad_norm": 40.48215866088867, + "learning_rate": 2.299547484929299e-06, + "loss": 0.2781, + "num_input_tokens_seen": 140836224, + "step": 44755 + }, + { + "epoch": 2.86537353562512, + "grad_norm": 9.471050262451172, + "learning_rate": 2.2989906362722434e-06, + "loss": 0.2081, + "num_input_tokens_seen": 140854976, + "step": 44760 + }, + { + "epoch": 2.865693617566097, + "grad_norm": 11.274715423583984, + "learning_rate": 2.29843379765255e-06, + "loss": 0.2745, + "num_input_tokens_seen": 140870464, + "step": 44765 + }, + { + "epoch": 2.8660136995070737, + "grad_norm": 2.0270142555236816, + "learning_rate": 2.297876969098021e-06, + "loss": 0.2269, + "num_input_tokens_seen": 140886016, + "step": 44770 + }, + { + "epoch": 2.8663337814480507, + "grad_norm": 11.236063003540039, + "learning_rate": 2.297320150636464e-06, + "loss": 0.2441, + "num_input_tokens_seen": 140900416, + "step": 44775 + }, + { + "epoch": 2.8666538633890277, + "grad_norm": 11.29294204711914, + "learning_rate": 2.2967633422956827e-06, + "loss": 0.3098, + "num_input_tokens_seen": 140916928, + "step": 44780 + }, + { + "epoch": 2.8669739453300043, + "grad_norm": 40.038570404052734, + "learning_rate": 2.2962065441034826e-06, + "loss": 0.2314, + "num_input_tokens_seen": 140932352, + "step": 44785 + }, + { + "epoch": 2.8672940272709813, + "grad_norm": 12.105152130126953, + "learning_rate": 2.2956497560876647e-06, + "loss": 0.2473, + "num_input_tokens_seen": 140947200, + "step": 44790 + }, + { + "epoch": 2.8676141092119583, + "grad_norm": 5.9752068519592285, + "learning_rate": 2.295092978276034e-06, + "loss": 0.1707, + "num_input_tokens_seen": 140963584, + "step": 44795 + }, + { + "epoch": 2.8679341911529352, + "grad_norm": 7.048514366149902, + "learning_rate": 2.2945362106963933e-06, + "loss": 0.193, + "num_input_tokens_seen": 140978368, + "step": 44800 + }, + { + "epoch": 2.8682542730939122, + "grad_norm": 8.136107444763184, + "learning_rate": 2.293979453376543e-06, + "loss": 0.1898, + "num_input_tokens_seen": 140994048, + "step": 44805 + }, + { + "epoch": 2.868574355034889, + "grad_norm": 21.039608001708984, + "learning_rate": 2.2934227063442866e-06, + "loss": 0.2355, + "num_input_tokens_seen": 141008704, + "step": 44810 + }, + { + "epoch": 2.8688944369758658, + "grad_norm": 18.158462524414062, + "learning_rate": 2.2928659696274234e-06, + "loss": 0.2554, + "num_input_tokens_seen": 141024384, + "step": 44815 + }, + { + "epoch": 2.8692145189168428, + "grad_norm": 13.090629577636719, + "learning_rate": 2.2923092432537556e-06, + "loss": 0.183, + "num_input_tokens_seen": 141041344, + "step": 44820 + }, + { + "epoch": 2.8695346008578198, + "grad_norm": 41.732330322265625, + "learning_rate": 2.291752527251082e-06, + "loss": 0.1648, + "num_input_tokens_seen": 141057664, + "step": 44825 + }, + { + "epoch": 2.8698546827987963, + "grad_norm": 7.430674076080322, + "learning_rate": 2.2911958216472024e-06, + "loss": 0.2397, + "num_input_tokens_seen": 141072640, + "step": 44830 + }, + { + "epoch": 2.8701747647397733, + "grad_norm": 4.112245559692383, + "learning_rate": 2.2906391264699152e-06, + "loss": 0.1741, + "num_input_tokens_seen": 141088000, + "step": 44835 + }, + { + "epoch": 2.8704948466807503, + "grad_norm": 7.630370616912842, + "learning_rate": 2.290082441747021e-06, + "loss": 0.1857, + "num_input_tokens_seen": 141103936, + "step": 44840 + }, + { + "epoch": 2.8708149286217273, + "grad_norm": 4.825627326965332, + "learning_rate": 2.289525767506315e-06, + "loss": 0.1053, + "num_input_tokens_seen": 141132352, + "step": 44845 + }, + { + "epoch": 2.8711350105627043, + "grad_norm": 69.4739990234375, + "learning_rate": 2.288969103775597e-06, + "loss": 0.2696, + "num_input_tokens_seen": 141147520, + "step": 44850 + }, + { + "epoch": 2.871455092503681, + "grad_norm": 6.08812952041626, + "learning_rate": 2.2884124505826617e-06, + "loss": 0.1419, + "num_input_tokens_seen": 141163072, + "step": 44855 + }, + { + "epoch": 2.871775174444658, + "grad_norm": 18.477794647216797, + "learning_rate": 2.2878558079553062e-06, + "loss": 0.2222, + "num_input_tokens_seen": 141178304, + "step": 44860 + }, + { + "epoch": 2.872095256385635, + "grad_norm": 5.584245204925537, + "learning_rate": 2.2872991759213277e-06, + "loss": 0.3013, + "num_input_tokens_seen": 141196096, + "step": 44865 + }, + { + "epoch": 2.8724153383266118, + "grad_norm": 13.426213264465332, + "learning_rate": 2.2867425545085192e-06, + "loss": 0.2045, + "num_input_tokens_seen": 141211776, + "step": 44870 + }, + { + "epoch": 2.8727354202675883, + "grad_norm": 50.2409553527832, + "learning_rate": 2.286185943744678e-06, + "loss": 0.2384, + "num_input_tokens_seen": 141227328, + "step": 44875 + }, + { + "epoch": 2.8730555022085653, + "grad_norm": 9.141767501831055, + "learning_rate": 2.285629343657596e-06, + "loss": 0.2071, + "num_input_tokens_seen": 141242688, + "step": 44880 + }, + { + "epoch": 2.8733755841495423, + "grad_norm": 7.685677528381348, + "learning_rate": 2.285072754275069e-06, + "loss": 0.2278, + "num_input_tokens_seen": 141258048, + "step": 44885 + }, + { + "epoch": 2.8736956660905193, + "grad_norm": 14.267231941223145, + "learning_rate": 2.284516175624888e-06, + "loss": 0.1792, + "num_input_tokens_seen": 141272512, + "step": 44890 + }, + { + "epoch": 2.8740157480314963, + "grad_norm": 11.490274429321289, + "learning_rate": 2.2839596077348483e-06, + "loss": 0.327, + "num_input_tokens_seen": 141288640, + "step": 44895 + }, + { + "epoch": 2.874335829972473, + "grad_norm": 8.699707984924316, + "learning_rate": 2.2834030506327394e-06, + "loss": 0.2094, + "num_input_tokens_seen": 141304640, + "step": 44900 + }, + { + "epoch": 2.87465591191345, + "grad_norm": 17.587505340576172, + "learning_rate": 2.2828465043463544e-06, + "loss": 0.2244, + "num_input_tokens_seen": 141322560, + "step": 44905 + }, + { + "epoch": 2.874975993854427, + "grad_norm": 5.932873249053955, + "learning_rate": 2.2822899689034832e-06, + "loss": 0.2849, + "num_input_tokens_seen": 141337472, + "step": 44910 + }, + { + "epoch": 2.8752960757954034, + "grad_norm": 11.52650260925293, + "learning_rate": 2.2817334443319174e-06, + "loss": 0.2835, + "num_input_tokens_seen": 141352896, + "step": 44915 + }, + { + "epoch": 2.8756161577363804, + "grad_norm": 19.188047409057617, + "learning_rate": 2.2811769306594463e-06, + "loss": 0.2221, + "num_input_tokens_seen": 141368320, + "step": 44920 + }, + { + "epoch": 2.8759362396773573, + "grad_norm": 10.010477066040039, + "learning_rate": 2.280620427913859e-06, + "loss": 0.1893, + "num_input_tokens_seen": 141384000, + "step": 44925 + }, + { + "epoch": 2.8762563216183343, + "grad_norm": 14.529715538024902, + "learning_rate": 2.2800639361229464e-06, + "loss": 0.1827, + "num_input_tokens_seen": 141399040, + "step": 44930 + }, + { + "epoch": 2.8765764035593113, + "grad_norm": 52.88648223876953, + "learning_rate": 2.2795074553144936e-06, + "loss": 0.1448, + "num_input_tokens_seen": 141415680, + "step": 44935 + }, + { + "epoch": 2.8768964855002883, + "grad_norm": 11.354787826538086, + "learning_rate": 2.2789509855162912e-06, + "loss": 0.2351, + "num_input_tokens_seen": 141432064, + "step": 44940 + }, + { + "epoch": 2.877216567441265, + "grad_norm": 29.582080841064453, + "learning_rate": 2.2783945267561246e-06, + "loss": 0.3497, + "num_input_tokens_seen": 141447936, + "step": 44945 + }, + { + "epoch": 2.877536649382242, + "grad_norm": 5.558516502380371, + "learning_rate": 2.277838079061781e-06, + "loss": 0.1935, + "num_input_tokens_seen": 141463808, + "step": 44950 + }, + { + "epoch": 2.877856731323219, + "grad_norm": 21.149003982543945, + "learning_rate": 2.2772816424610463e-06, + "loss": 0.2442, + "num_input_tokens_seen": 141480256, + "step": 44955 + }, + { + "epoch": 2.8781768132641954, + "grad_norm": 82.64320373535156, + "learning_rate": 2.276725216981707e-06, + "loss": 0.3129, + "num_input_tokens_seen": 141495424, + "step": 44960 + }, + { + "epoch": 2.8784968952051724, + "grad_norm": 27.126083374023438, + "learning_rate": 2.2761688026515467e-06, + "loss": 0.3059, + "num_input_tokens_seen": 141510272, + "step": 44965 + }, + { + "epoch": 2.8788169771461494, + "grad_norm": 21.157564163208008, + "learning_rate": 2.27561239949835e-06, + "loss": 0.1473, + "num_input_tokens_seen": 141525312, + "step": 44970 + }, + { + "epoch": 2.8791370590871264, + "grad_norm": 6.670485973358154, + "learning_rate": 2.2750560075499025e-06, + "loss": 0.2226, + "num_input_tokens_seen": 141541312, + "step": 44975 + }, + { + "epoch": 2.8794571410281034, + "grad_norm": 9.539111137390137, + "learning_rate": 2.274499626833985e-06, + "loss": 0.1887, + "num_input_tokens_seen": 141556160, + "step": 44980 + }, + { + "epoch": 2.8797772229690803, + "grad_norm": 4.440025329589844, + "learning_rate": 2.2739432573783832e-06, + "loss": 0.191, + "num_input_tokens_seen": 141571648, + "step": 44985 + }, + { + "epoch": 2.880097304910057, + "grad_norm": 39.68186569213867, + "learning_rate": 2.273386899210876e-06, + "loss": 0.232, + "num_input_tokens_seen": 141585856, + "step": 44990 + }, + { + "epoch": 2.880417386851034, + "grad_norm": 5.528408527374268, + "learning_rate": 2.2728305523592482e-06, + "loss": 0.1903, + "num_input_tokens_seen": 141601152, + "step": 44995 + }, + { + "epoch": 2.880737468792011, + "grad_norm": 7.246140480041504, + "learning_rate": 2.272274216851278e-06, + "loss": 0.1379, + "num_input_tokens_seen": 141616896, + "step": 45000 + }, + { + "epoch": 2.8810575507329874, + "grad_norm": 49.93489074707031, + "learning_rate": 2.2717178927147483e-06, + "loss": 0.3167, + "num_input_tokens_seen": 141632320, + "step": 45005 + }, + { + "epoch": 2.8813776326739644, + "grad_norm": 18.112194061279297, + "learning_rate": 2.271161579977437e-06, + "loss": 0.1945, + "num_input_tokens_seen": 141648384, + "step": 45010 + }, + { + "epoch": 2.8816977146149414, + "grad_norm": 54.933677673339844, + "learning_rate": 2.2706052786671255e-06, + "loss": 0.2173, + "num_input_tokens_seen": 141664448, + "step": 45015 + }, + { + "epoch": 2.8820177965559184, + "grad_norm": 6.245638370513916, + "learning_rate": 2.2700489888115907e-06, + "loss": 0.2076, + "num_input_tokens_seen": 141680064, + "step": 45020 + }, + { + "epoch": 2.8823378784968954, + "grad_norm": 42.2402458190918, + "learning_rate": 2.2694927104386115e-06, + "loss": 0.293, + "num_input_tokens_seen": 141696448, + "step": 45025 + }, + { + "epoch": 2.8826579604378724, + "grad_norm": 31.22047996520996, + "learning_rate": 2.2689364435759665e-06, + "loss": 0.2419, + "num_input_tokens_seen": 141711744, + "step": 45030 + }, + { + "epoch": 2.882978042378849, + "grad_norm": 6.447256088256836, + "learning_rate": 2.268380188251431e-06, + "loss": 0.1816, + "num_input_tokens_seen": 141727616, + "step": 45035 + }, + { + "epoch": 2.883298124319826, + "grad_norm": 29.148645401000977, + "learning_rate": 2.267823944492783e-06, + "loss": 0.3126, + "num_input_tokens_seen": 141744000, + "step": 45040 + }, + { + "epoch": 2.883618206260803, + "grad_norm": 9.542536735534668, + "learning_rate": 2.267267712327797e-06, + "loss": 0.204, + "num_input_tokens_seen": 141759168, + "step": 45045 + }, + { + "epoch": 2.8839382882017794, + "grad_norm": 8.633610725402832, + "learning_rate": 2.266711491784251e-06, + "loss": 0.248, + "num_input_tokens_seen": 141774656, + "step": 45050 + }, + { + "epoch": 2.8842583701427564, + "grad_norm": 106.38309478759766, + "learning_rate": 2.266155282889916e-06, + "loss": 0.2116, + "num_input_tokens_seen": 141791296, + "step": 45055 + }, + { + "epoch": 2.8845784520837334, + "grad_norm": 25.957374572753906, + "learning_rate": 2.2655990856725695e-06, + "loss": 0.2342, + "num_input_tokens_seen": 141805504, + "step": 45060 + }, + { + "epoch": 2.8848985340247104, + "grad_norm": 58.18632125854492, + "learning_rate": 2.2650429001599826e-06, + "loss": 0.2994, + "num_input_tokens_seen": 141821632, + "step": 45065 + }, + { + "epoch": 2.8852186159656874, + "grad_norm": 11.601447105407715, + "learning_rate": 2.2644867263799307e-06, + "loss": 0.1775, + "num_input_tokens_seen": 141836736, + "step": 45070 + }, + { + "epoch": 2.885538697906664, + "grad_norm": 16.286542892456055, + "learning_rate": 2.2639305643601838e-06, + "loss": 0.1947, + "num_input_tokens_seen": 141852032, + "step": 45075 + }, + { + "epoch": 2.885858779847641, + "grad_norm": 9.44049072265625, + "learning_rate": 2.263374414128516e-06, + "loss": 0.2518, + "num_input_tokens_seen": 141869056, + "step": 45080 + }, + { + "epoch": 2.886178861788618, + "grad_norm": 7.442718029022217, + "learning_rate": 2.262818275712696e-06, + "loss": 0.1722, + "num_input_tokens_seen": 141883648, + "step": 45085 + }, + { + "epoch": 2.886498943729595, + "grad_norm": 17.017539978027344, + "learning_rate": 2.2622621491404962e-06, + "loss": 0.3109, + "num_input_tokens_seen": 141900352, + "step": 45090 + }, + { + "epoch": 2.8868190256705715, + "grad_norm": 13.714919090270996, + "learning_rate": 2.261706034439687e-06, + "loss": 0.2064, + "num_input_tokens_seen": 141916992, + "step": 45095 + }, + { + "epoch": 2.8871391076115485, + "grad_norm": 9.21906852722168, + "learning_rate": 2.2611499316380367e-06, + "loss": 0.1981, + "num_input_tokens_seen": 141933376, + "step": 45100 + }, + { + "epoch": 2.8874591895525255, + "grad_norm": 12.546385765075684, + "learning_rate": 2.2605938407633157e-06, + "loss": 0.1708, + "num_input_tokens_seen": 141948480, + "step": 45105 + }, + { + "epoch": 2.8877792714935024, + "grad_norm": 17.138837814331055, + "learning_rate": 2.26003776184329e-06, + "loss": 0.2531, + "num_input_tokens_seen": 141964224, + "step": 45110 + }, + { + "epoch": 2.8880993534344794, + "grad_norm": 10.164844512939453, + "learning_rate": 2.259481694905729e-06, + "loss": 0.2189, + "num_input_tokens_seen": 141980096, + "step": 45115 + }, + { + "epoch": 2.888419435375456, + "grad_norm": 8.32834243774414, + "learning_rate": 2.2589256399783993e-06, + "loss": 0.2182, + "num_input_tokens_seen": 141996800, + "step": 45120 + }, + { + "epoch": 2.888739517316433, + "grad_norm": 220.1660919189453, + "learning_rate": 2.258369597089068e-06, + "loss": 0.2882, + "num_input_tokens_seen": 142011584, + "step": 45125 + }, + { + "epoch": 2.88905959925741, + "grad_norm": 50.19822692871094, + "learning_rate": 2.2578135662655e-06, + "loss": 0.2331, + "num_input_tokens_seen": 142028672, + "step": 45130 + }, + { + "epoch": 2.889379681198387, + "grad_norm": 4.341165065765381, + "learning_rate": 2.2572575475354614e-06, + "loss": 0.1424, + "num_input_tokens_seen": 142043648, + "step": 45135 + }, + { + "epoch": 2.8896997631393635, + "grad_norm": 6.330866813659668, + "learning_rate": 2.256701540926716e-06, + "loss": 0.2004, + "num_input_tokens_seen": 142058240, + "step": 45140 + }, + { + "epoch": 2.8900198450803405, + "grad_norm": 7.357062339782715, + "learning_rate": 2.2561455464670274e-06, + "loss": 0.2502, + "num_input_tokens_seen": 142073216, + "step": 45145 + }, + { + "epoch": 2.8903399270213175, + "grad_norm": 19.72622299194336, + "learning_rate": 2.2555895641841614e-06, + "loss": 0.3251, + "num_input_tokens_seen": 142087488, + "step": 45150 + }, + { + "epoch": 2.8906600089622945, + "grad_norm": 52.53864288330078, + "learning_rate": 2.255033594105879e-06, + "loss": 0.1674, + "num_input_tokens_seen": 142103232, + "step": 45155 + }, + { + "epoch": 2.8909800909032715, + "grad_norm": 11.794320106506348, + "learning_rate": 2.2544776362599433e-06, + "loss": 0.3264, + "num_input_tokens_seen": 142118144, + "step": 45160 + }, + { + "epoch": 2.891300172844248, + "grad_norm": 20.23564910888672, + "learning_rate": 2.253921690674115e-06, + "loss": 0.1983, + "num_input_tokens_seen": 142133824, + "step": 45165 + }, + { + "epoch": 2.891620254785225, + "grad_norm": 22.726383209228516, + "learning_rate": 2.2533657573761564e-06, + "loss": 0.3949, + "num_input_tokens_seen": 142149568, + "step": 45170 + }, + { + "epoch": 2.891940336726202, + "grad_norm": 36.372127532958984, + "learning_rate": 2.2528098363938262e-06, + "loss": 0.3101, + "num_input_tokens_seen": 142164928, + "step": 45175 + }, + { + "epoch": 2.8922604186671785, + "grad_norm": 6.7679948806762695, + "learning_rate": 2.252253927754886e-06, + "loss": 0.1955, + "num_input_tokens_seen": 142181120, + "step": 45180 + }, + { + "epoch": 2.8925805006081555, + "grad_norm": 5.403812885284424, + "learning_rate": 2.2516980314870936e-06, + "loss": 0.2241, + "num_input_tokens_seen": 142196544, + "step": 45185 + }, + { + "epoch": 2.8929005825491325, + "grad_norm": 16.787145614624023, + "learning_rate": 2.2511421476182096e-06, + "loss": 0.2419, + "num_input_tokens_seen": 142211648, + "step": 45190 + }, + { + "epoch": 2.8932206644901095, + "grad_norm": 6.951927185058594, + "learning_rate": 2.2505862761759888e-06, + "loss": 0.2536, + "num_input_tokens_seen": 142227712, + "step": 45195 + }, + { + "epoch": 2.8935407464310865, + "grad_norm": 11.475650787353516, + "learning_rate": 2.250030417188191e-06, + "loss": 0.2748, + "num_input_tokens_seen": 142243776, + "step": 45200 + }, + { + "epoch": 2.8938608283720635, + "grad_norm": 121.93859100341797, + "learning_rate": 2.2494745706825714e-06, + "loss": 0.2847, + "num_input_tokens_seen": 142261056, + "step": 45205 + }, + { + "epoch": 2.89418091031304, + "grad_norm": 36.60737228393555, + "learning_rate": 2.248918736686887e-06, + "loss": 0.2542, + "num_input_tokens_seen": 142276608, + "step": 45210 + }, + { + "epoch": 2.894500992254017, + "grad_norm": 11.980611801147461, + "learning_rate": 2.248362915228894e-06, + "loss": 0.2531, + "num_input_tokens_seen": 142292224, + "step": 45215 + }, + { + "epoch": 2.894821074194994, + "grad_norm": 17.701995849609375, + "learning_rate": 2.2478071063363454e-06, + "loss": 0.1431, + "num_input_tokens_seen": 142308032, + "step": 45220 + }, + { + "epoch": 2.8951411561359706, + "grad_norm": 8.196606636047363, + "learning_rate": 2.2472513100369974e-06, + "loss": 0.1981, + "num_input_tokens_seen": 142324096, + "step": 45225 + }, + { + "epoch": 2.8954612380769476, + "grad_norm": 6.146968364715576, + "learning_rate": 2.246695526358601e-06, + "loss": 0.2682, + "num_input_tokens_seen": 142338624, + "step": 45230 + }, + { + "epoch": 2.8957813200179245, + "grad_norm": 6.550442218780518, + "learning_rate": 2.2461397553289112e-06, + "loss": 0.1736, + "num_input_tokens_seen": 142354048, + "step": 45235 + }, + { + "epoch": 2.8961014019589015, + "grad_norm": 7.375586986541748, + "learning_rate": 2.2455839969756796e-06, + "loss": 0.1856, + "num_input_tokens_seen": 142369280, + "step": 45240 + }, + { + "epoch": 2.8964214838998785, + "grad_norm": 23.622051239013672, + "learning_rate": 2.2450282513266585e-06, + "loss": 0.1625, + "num_input_tokens_seen": 142385152, + "step": 45245 + }, + { + "epoch": 2.8967415658408555, + "grad_norm": 57.591426849365234, + "learning_rate": 2.244472518409598e-06, + "loss": 0.3317, + "num_input_tokens_seen": 142400576, + "step": 45250 + }, + { + "epoch": 2.897061647781832, + "grad_norm": 13.072665214538574, + "learning_rate": 2.243916798252249e-06, + "loss": 0.2302, + "num_input_tokens_seen": 142418368, + "step": 45255 + }, + { + "epoch": 2.897381729722809, + "grad_norm": 40.104217529296875, + "learning_rate": 2.2433610908823607e-06, + "loss": 0.2291, + "num_input_tokens_seen": 142433280, + "step": 45260 + }, + { + "epoch": 2.897701811663786, + "grad_norm": 6.557322978973389, + "learning_rate": 2.2428053963276836e-06, + "loss": 0.1481, + "num_input_tokens_seen": 142451392, + "step": 45265 + }, + { + "epoch": 2.8980218936047626, + "grad_norm": 38.59238052368164, + "learning_rate": 2.242249714615965e-06, + "loss": 0.1916, + "num_input_tokens_seen": 142466880, + "step": 45270 + }, + { + "epoch": 2.8983419755457396, + "grad_norm": 12.124866485595703, + "learning_rate": 2.2416940457749525e-06, + "loss": 0.2127, + "num_input_tokens_seen": 142483904, + "step": 45275 + }, + { + "epoch": 2.8986620574867166, + "grad_norm": 7.834893226623535, + "learning_rate": 2.241138389832395e-06, + "loss": 0.226, + "num_input_tokens_seen": 142499584, + "step": 45280 + }, + { + "epoch": 2.8989821394276936, + "grad_norm": 13.350981712341309, + "learning_rate": 2.240582746816037e-06, + "loss": 0.2064, + "num_input_tokens_seen": 142516032, + "step": 45285 + }, + { + "epoch": 2.8993022213686706, + "grad_norm": 18.390846252441406, + "learning_rate": 2.2400271167536262e-06, + "loss": 0.2315, + "num_input_tokens_seen": 142533312, + "step": 45290 + }, + { + "epoch": 2.8996223033096475, + "grad_norm": 26.780715942382812, + "learning_rate": 2.2394714996729062e-06, + "loss": 0.231, + "num_input_tokens_seen": 142549312, + "step": 45295 + }, + { + "epoch": 2.899942385250624, + "grad_norm": 8.541666984558105, + "learning_rate": 2.2389158956016234e-06, + "loss": 0.2421, + "num_input_tokens_seen": 142565184, + "step": 45300 + }, + { + "epoch": 2.900262467191601, + "grad_norm": 11.284149169921875, + "learning_rate": 2.2383603045675197e-06, + "loss": 0.2441, + "num_input_tokens_seen": 142580224, + "step": 45305 + }, + { + "epoch": 2.900582549132578, + "grad_norm": 5.620405673980713, + "learning_rate": 2.237804726598341e-06, + "loss": 0.151, + "num_input_tokens_seen": 142596288, + "step": 45310 + }, + { + "epoch": 2.9009026310735546, + "grad_norm": 33.367431640625, + "learning_rate": 2.2372491617218274e-06, + "loss": 0.2503, + "num_input_tokens_seen": 142612032, + "step": 45315 + }, + { + "epoch": 2.9012227130145316, + "grad_norm": 39.65778732299805, + "learning_rate": 2.236693609965722e-06, + "loss": 0.3537, + "num_input_tokens_seen": 142627328, + "step": 45320 + }, + { + "epoch": 2.9015427949555086, + "grad_norm": 21.66847801208496, + "learning_rate": 2.236138071357766e-06, + "loss": 0.1754, + "num_input_tokens_seen": 142643264, + "step": 45325 + }, + { + "epoch": 2.9018628768964856, + "grad_norm": 5.594630718231201, + "learning_rate": 2.2355825459257006e-06, + "loss": 0.1899, + "num_input_tokens_seen": 142658688, + "step": 45330 + }, + { + "epoch": 2.9021829588374626, + "grad_norm": 39.5420036315918, + "learning_rate": 2.235027033697267e-06, + "loss": 0.2643, + "num_input_tokens_seen": 142675200, + "step": 45335 + }, + { + "epoch": 2.902503040778439, + "grad_norm": 23.919103622436523, + "learning_rate": 2.2344715347002013e-06, + "loss": 0.3192, + "num_input_tokens_seen": 142693120, + "step": 45340 + }, + { + "epoch": 2.902823122719416, + "grad_norm": 7.065695285797119, + "learning_rate": 2.2339160489622448e-06, + "loss": 0.1802, + "num_input_tokens_seen": 142708352, + "step": 45345 + }, + { + "epoch": 2.903143204660393, + "grad_norm": 8.264090538024902, + "learning_rate": 2.233360576511134e-06, + "loss": 0.1539, + "num_input_tokens_seen": 142725824, + "step": 45350 + }, + { + "epoch": 2.90346328660137, + "grad_norm": 26.684650421142578, + "learning_rate": 2.232805117374609e-06, + "loss": 0.2356, + "num_input_tokens_seen": 142741120, + "step": 45355 + }, + { + "epoch": 2.9037833685423466, + "grad_norm": 39.203338623046875, + "learning_rate": 2.232249671580403e-06, + "loss": 0.2417, + "num_input_tokens_seen": 142756928, + "step": 45360 + }, + { + "epoch": 2.9041034504833236, + "grad_norm": 9.217988967895508, + "learning_rate": 2.231694239156255e-06, + "loss": 0.2518, + "num_input_tokens_seen": 142773632, + "step": 45365 + }, + { + "epoch": 2.9044235324243006, + "grad_norm": 9.42285442352295, + "learning_rate": 2.231138820129898e-06, + "loss": 0.2215, + "num_input_tokens_seen": 142789056, + "step": 45370 + }, + { + "epoch": 2.9047436143652776, + "grad_norm": 8.879241943359375, + "learning_rate": 2.2305834145290683e-06, + "loss": 0.2013, + "num_input_tokens_seen": 142804672, + "step": 45375 + }, + { + "epoch": 2.9050636963062546, + "grad_norm": 7.194128513336182, + "learning_rate": 2.2300280223814994e-06, + "loss": 0.2605, + "num_input_tokens_seen": 142822016, + "step": 45380 + }, + { + "epoch": 2.905383778247231, + "grad_norm": 5.8055830001831055, + "learning_rate": 2.2294726437149246e-06, + "loss": 0.1667, + "num_input_tokens_seen": 142838464, + "step": 45385 + }, + { + "epoch": 2.905703860188208, + "grad_norm": 16.3393497467041, + "learning_rate": 2.2289172785570776e-06, + "loss": 0.2685, + "num_input_tokens_seen": 142854848, + "step": 45390 + }, + { + "epoch": 2.906023942129185, + "grad_norm": 10.793367385864258, + "learning_rate": 2.2283619269356892e-06, + "loss": 0.208, + "num_input_tokens_seen": 142870912, + "step": 45395 + }, + { + "epoch": 2.906344024070162, + "grad_norm": 26.338775634765625, + "learning_rate": 2.227806588878492e-06, + "loss": 0.2718, + "num_input_tokens_seen": 142885760, + "step": 45400 + }, + { + "epoch": 2.9066641060111387, + "grad_norm": 10.748498916625977, + "learning_rate": 2.227251264413215e-06, + "loss": 0.1724, + "num_input_tokens_seen": 142901056, + "step": 45405 + }, + { + "epoch": 2.9069841879521157, + "grad_norm": 5.009538173675537, + "learning_rate": 2.2266959535675894e-06, + "loss": 0.1677, + "num_input_tokens_seen": 142917376, + "step": 45410 + }, + { + "epoch": 2.9073042698930927, + "grad_norm": 11.342508316040039, + "learning_rate": 2.226140656369344e-06, + "loss": 0.3009, + "num_input_tokens_seen": 142933312, + "step": 45415 + }, + { + "epoch": 2.9076243518340696, + "grad_norm": 28.486637115478516, + "learning_rate": 2.2255853728462086e-06, + "loss": 0.1602, + "num_input_tokens_seen": 142949440, + "step": 45420 + }, + { + "epoch": 2.9079444337750466, + "grad_norm": 10.352119445800781, + "learning_rate": 2.22503010302591e-06, + "loss": 0.2475, + "num_input_tokens_seen": 142964416, + "step": 45425 + }, + { + "epoch": 2.908264515716023, + "grad_norm": 7.76255989074707, + "learning_rate": 2.2244748469361756e-06, + "loss": 0.1824, + "num_input_tokens_seen": 142980480, + "step": 45430 + }, + { + "epoch": 2.908584597657, + "grad_norm": 17.537586212158203, + "learning_rate": 2.2239196046047315e-06, + "loss": 0.1742, + "num_input_tokens_seen": 142996352, + "step": 45435 + }, + { + "epoch": 2.908904679597977, + "grad_norm": 49.521453857421875, + "learning_rate": 2.2233643760593047e-06, + "loss": 0.2532, + "num_input_tokens_seen": 143011840, + "step": 45440 + }, + { + "epoch": 2.9092247615389537, + "grad_norm": 37.80205154418945, + "learning_rate": 2.222809161327621e-06, + "loss": 0.2405, + "num_input_tokens_seen": 143027712, + "step": 45445 + }, + { + "epoch": 2.9095448434799307, + "grad_norm": 8.303319931030273, + "learning_rate": 2.222253960437403e-06, + "loss": 0.3159, + "num_input_tokens_seen": 143043584, + "step": 45450 + }, + { + "epoch": 2.9098649254209077, + "grad_norm": 12.883772850036621, + "learning_rate": 2.221698773416376e-06, + "loss": 0.3149, + "num_input_tokens_seen": 143060288, + "step": 45455 + }, + { + "epoch": 2.9101850073618847, + "grad_norm": 19.133216857910156, + "learning_rate": 2.221143600292262e-06, + "loss": 0.2105, + "num_input_tokens_seen": 143074816, + "step": 45460 + }, + { + "epoch": 2.9105050893028617, + "grad_norm": 11.528620719909668, + "learning_rate": 2.2205884410927847e-06, + "loss": 0.1477, + "num_input_tokens_seen": 143090816, + "step": 45465 + }, + { + "epoch": 2.9108251712438387, + "grad_norm": 10.363228797912598, + "learning_rate": 2.2200332958456647e-06, + "loss": 0.1625, + "num_input_tokens_seen": 143106752, + "step": 45470 + }, + { + "epoch": 2.911145253184815, + "grad_norm": 28.111927032470703, + "learning_rate": 2.2194781645786244e-06, + "loss": 0.2599, + "num_input_tokens_seen": 143124672, + "step": 45475 + }, + { + "epoch": 2.911465335125792, + "grad_norm": 6.28642463684082, + "learning_rate": 2.2189230473193826e-06, + "loss": 0.1547, + "num_input_tokens_seen": 143141440, + "step": 45480 + }, + { + "epoch": 2.911785417066769, + "grad_norm": 47.88820266723633, + "learning_rate": 2.21836794409566e-06, + "loss": 0.2389, + "num_input_tokens_seen": 143157248, + "step": 45485 + }, + { + "epoch": 2.9121054990077457, + "grad_norm": 5.153526782989502, + "learning_rate": 2.217812854935175e-06, + "loss": 0.2098, + "num_input_tokens_seen": 143173440, + "step": 45490 + }, + { + "epoch": 2.9124255809487227, + "grad_norm": 15.917511940002441, + "learning_rate": 2.2172577798656467e-06, + "loss": 0.2594, + "num_input_tokens_seen": 143188992, + "step": 45495 + }, + { + "epoch": 2.9127456628896997, + "grad_norm": 97.57157897949219, + "learning_rate": 2.216702718914792e-06, + "loss": 0.1787, + "num_input_tokens_seen": 143205440, + "step": 45500 + }, + { + "epoch": 2.9130657448306767, + "grad_norm": 18.91417121887207, + "learning_rate": 2.2161476721103277e-06, + "loss": 0.1835, + "num_input_tokens_seen": 143219328, + "step": 45505 + }, + { + "epoch": 2.9133858267716537, + "grad_norm": 9.65983772277832, + "learning_rate": 2.2155926394799705e-06, + "loss": 0.2448, + "num_input_tokens_seen": 143234816, + "step": 45510 + }, + { + "epoch": 2.9137059087126307, + "grad_norm": 8.580087661743164, + "learning_rate": 2.215037621051435e-06, + "loss": 0.3062, + "num_input_tokens_seen": 143250624, + "step": 45515 + }, + { + "epoch": 2.9140259906536072, + "grad_norm": 8.284689903259277, + "learning_rate": 2.2144826168524368e-06, + "loss": 0.2098, + "num_input_tokens_seen": 143266112, + "step": 45520 + }, + { + "epoch": 2.9143460725945842, + "grad_norm": 9.712929725646973, + "learning_rate": 2.213927626910689e-06, + "loss": 0.2695, + "num_input_tokens_seen": 143283328, + "step": 45525 + }, + { + "epoch": 2.914666154535561, + "grad_norm": 7.7699995040893555, + "learning_rate": 2.213372651253906e-06, + "loss": 0.194, + "num_input_tokens_seen": 143298176, + "step": 45530 + }, + { + "epoch": 2.9149862364765378, + "grad_norm": 5.169548034667969, + "learning_rate": 2.2128176899097993e-06, + "loss": 0.1273, + "num_input_tokens_seen": 143314368, + "step": 45535 + }, + { + "epoch": 2.9153063184175148, + "grad_norm": 6.650978088378906, + "learning_rate": 2.2122627429060823e-06, + "loss": 0.1717, + "num_input_tokens_seen": 143328960, + "step": 45540 + }, + { + "epoch": 2.9156264003584917, + "grad_norm": 80.07391357421875, + "learning_rate": 2.211707810270464e-06, + "loss": 0.219, + "num_input_tokens_seen": 143344256, + "step": 45545 + }, + { + "epoch": 2.9159464822994687, + "grad_norm": 20.451032638549805, + "learning_rate": 2.211152892030656e-06, + "loss": 0.306, + "num_input_tokens_seen": 143360896, + "step": 45550 + }, + { + "epoch": 2.9162665642404457, + "grad_norm": 9.924090385437012, + "learning_rate": 2.2105979882143683e-06, + "loss": 0.178, + "num_input_tokens_seen": 143376704, + "step": 45555 + }, + { + "epoch": 2.9165866461814227, + "grad_norm": 6.57304048538208, + "learning_rate": 2.2100430988493092e-06, + "loss": 0.2196, + "num_input_tokens_seen": 143391296, + "step": 45560 + }, + { + "epoch": 2.9169067281223993, + "grad_norm": 8.458481788635254, + "learning_rate": 2.2094882239631886e-06, + "loss": 0.21, + "num_input_tokens_seen": 143407488, + "step": 45565 + }, + { + "epoch": 2.9172268100633763, + "grad_norm": 14.761883735656738, + "learning_rate": 2.2089333635837117e-06, + "loss": 0.2053, + "num_input_tokens_seen": 143423232, + "step": 45570 + }, + { + "epoch": 2.9175468920043532, + "grad_norm": 86.89244079589844, + "learning_rate": 2.2083785177385875e-06, + "loss": 0.3258, + "num_input_tokens_seen": 143440704, + "step": 45575 + }, + { + "epoch": 2.91786697394533, + "grad_norm": 11.348769187927246, + "learning_rate": 2.20782368645552e-06, + "loss": 0.2386, + "num_input_tokens_seen": 143455680, + "step": 45580 + }, + { + "epoch": 2.918187055886307, + "grad_norm": 10.04068374633789, + "learning_rate": 2.207268869762217e-06, + "loss": 0.2883, + "num_input_tokens_seen": 143470528, + "step": 45585 + }, + { + "epoch": 2.9185071378272838, + "grad_norm": 18.111549377441406, + "learning_rate": 2.2067140676863807e-06, + "loss": 0.2332, + "num_input_tokens_seen": 143486272, + "step": 45590 + }, + { + "epoch": 2.9188272197682608, + "grad_norm": 10.406375885009766, + "learning_rate": 2.2061592802557174e-06, + "loss": 0.2276, + "num_input_tokens_seen": 143501888, + "step": 45595 + }, + { + "epoch": 2.9191473017092378, + "grad_norm": 32.071006774902344, + "learning_rate": 2.2056045074979284e-06, + "loss": 0.2908, + "num_input_tokens_seen": 143517056, + "step": 45600 + }, + { + "epoch": 2.9194673836502143, + "grad_norm": 28.192197799682617, + "learning_rate": 2.2050497494407168e-06, + "loss": 0.2319, + "num_input_tokens_seen": 143531904, + "step": 45605 + }, + { + "epoch": 2.9197874655911913, + "grad_norm": 10.121891975402832, + "learning_rate": 2.2044950061117835e-06, + "loss": 0.1832, + "num_input_tokens_seen": 143547840, + "step": 45610 + }, + { + "epoch": 2.9201075475321683, + "grad_norm": 7.552251815795898, + "learning_rate": 2.203940277538831e-06, + "loss": 0.1995, + "num_input_tokens_seen": 143563264, + "step": 45615 + }, + { + "epoch": 2.9204276294731453, + "grad_norm": 13.738832473754883, + "learning_rate": 2.20338556374956e-06, + "loss": 0.1516, + "num_input_tokens_seen": 143578304, + "step": 45620 + }, + { + "epoch": 2.920747711414122, + "grad_norm": 17.545549392700195, + "learning_rate": 2.202830864771668e-06, + "loss": 0.2949, + "num_input_tokens_seen": 143593984, + "step": 45625 + }, + { + "epoch": 2.921067793355099, + "grad_norm": 37.64973831176758, + "learning_rate": 2.2022761806328552e-06, + "loss": 0.1497, + "num_input_tokens_seen": 143610112, + "step": 45630 + }, + { + "epoch": 2.921387875296076, + "grad_norm": 8.62736701965332, + "learning_rate": 2.2017215113608185e-06, + "loss": 0.3329, + "num_input_tokens_seen": 143627200, + "step": 45635 + }, + { + "epoch": 2.921707957237053, + "grad_norm": 41.461299896240234, + "learning_rate": 2.201166856983256e-06, + "loss": 0.2323, + "num_input_tokens_seen": 143642816, + "step": 45640 + }, + { + "epoch": 2.92202803917803, + "grad_norm": 5.9118452072143555, + "learning_rate": 2.200612217527864e-06, + "loss": 0.1793, + "num_input_tokens_seen": 143658240, + "step": 45645 + }, + { + "epoch": 2.9223481211190063, + "grad_norm": 34.519474029541016, + "learning_rate": 2.2000575930223394e-06, + "loss": 0.1993, + "num_input_tokens_seen": 143675520, + "step": 45650 + }, + { + "epoch": 2.9226682030599833, + "grad_norm": 8.377067565917969, + "learning_rate": 2.199502983494375e-06, + "loss": 0.2178, + "num_input_tokens_seen": 143690112, + "step": 45655 + }, + { + "epoch": 2.9229882850009603, + "grad_norm": 6.632393836975098, + "learning_rate": 2.198948388971667e-06, + "loss": 0.1915, + "num_input_tokens_seen": 143704832, + "step": 45660 + }, + { + "epoch": 2.9233083669419373, + "grad_norm": 18.200403213500977, + "learning_rate": 2.198393809481908e-06, + "loss": 0.2821, + "num_input_tokens_seen": 143719808, + "step": 45665 + }, + { + "epoch": 2.923628448882914, + "grad_norm": 11.346427917480469, + "learning_rate": 2.197839245052791e-06, + "loss": 0.2764, + "num_input_tokens_seen": 143735616, + "step": 45670 + }, + { + "epoch": 2.923948530823891, + "grad_norm": 17.248266220092773, + "learning_rate": 2.197284695712009e-06, + "loss": 0.2563, + "num_input_tokens_seen": 143752448, + "step": 45675 + }, + { + "epoch": 2.924268612764868, + "grad_norm": 18.888946533203125, + "learning_rate": 2.1967301614872516e-06, + "loss": 0.2517, + "num_input_tokens_seen": 143768384, + "step": 45680 + }, + { + "epoch": 2.924588694705845, + "grad_norm": 9.224985122680664, + "learning_rate": 2.1961756424062114e-06, + "loss": 0.2312, + "num_input_tokens_seen": 143783296, + "step": 45685 + }, + { + "epoch": 2.924908776646822, + "grad_norm": 6.050090789794922, + "learning_rate": 2.195621138496576e-06, + "loss": 0.2144, + "num_input_tokens_seen": 143799360, + "step": 45690 + }, + { + "epoch": 2.9252288585877984, + "grad_norm": 6.738865852355957, + "learning_rate": 2.195066649786036e-06, + "loss": 0.2718, + "num_input_tokens_seen": 143814144, + "step": 45695 + }, + { + "epoch": 2.9255489405287753, + "grad_norm": 20.103321075439453, + "learning_rate": 2.1945121763022786e-06, + "loss": 0.285, + "num_input_tokens_seen": 143828992, + "step": 45700 + }, + { + "epoch": 2.9258690224697523, + "grad_norm": 6.67264461517334, + "learning_rate": 2.193957718072993e-06, + "loss": 0.1564, + "num_input_tokens_seen": 143844672, + "step": 45705 + }, + { + "epoch": 2.926189104410729, + "grad_norm": 8.902226448059082, + "learning_rate": 2.1934032751258638e-06, + "loss": 0.2277, + "num_input_tokens_seen": 143860416, + "step": 45710 + }, + { + "epoch": 2.926509186351706, + "grad_norm": 11.260635375976562, + "learning_rate": 2.1928488474885786e-06, + "loss": 0.2406, + "num_input_tokens_seen": 143875520, + "step": 45715 + }, + { + "epoch": 2.926829268292683, + "grad_norm": 21.879703521728516, + "learning_rate": 2.1922944351888214e-06, + "loss": 0.2056, + "num_input_tokens_seen": 143891264, + "step": 45720 + }, + { + "epoch": 2.92714935023366, + "grad_norm": 59.423377990722656, + "learning_rate": 2.1917400382542783e-06, + "loss": 0.2964, + "num_input_tokens_seen": 143906240, + "step": 45725 + }, + { + "epoch": 2.927469432174637, + "grad_norm": 57.251136779785156, + "learning_rate": 2.191185656712631e-06, + "loss": 0.2918, + "num_input_tokens_seen": 143922240, + "step": 45730 + }, + { + "epoch": 2.927789514115614, + "grad_norm": 4.567270755767822, + "learning_rate": 2.190631290591564e-06, + "loss": 0.2193, + "num_input_tokens_seen": 143938112, + "step": 45735 + }, + { + "epoch": 2.9281095960565904, + "grad_norm": 9.729174613952637, + "learning_rate": 2.190076939918759e-06, + "loss": 0.2415, + "num_input_tokens_seen": 143954176, + "step": 45740 + }, + { + "epoch": 2.9284296779975674, + "grad_norm": 12.095681190490723, + "learning_rate": 2.189522604721897e-06, + "loss": 0.3458, + "num_input_tokens_seen": 143969856, + "step": 45745 + }, + { + "epoch": 2.9287497599385444, + "grad_norm": 10.796175956726074, + "learning_rate": 2.188968285028659e-06, + "loss": 0.2386, + "num_input_tokens_seen": 143984768, + "step": 45750 + }, + { + "epoch": 2.929069841879521, + "grad_norm": 40.76509094238281, + "learning_rate": 2.1884139808667247e-06, + "loss": 0.3346, + "num_input_tokens_seen": 144000128, + "step": 45755 + }, + { + "epoch": 2.929389923820498, + "grad_norm": 6.870314121246338, + "learning_rate": 2.187859692263774e-06, + "loss": 0.1727, + "num_input_tokens_seen": 144015360, + "step": 45760 + }, + { + "epoch": 2.929710005761475, + "grad_norm": 53.985782623291016, + "learning_rate": 2.187305419247483e-06, + "loss": 0.2571, + "num_input_tokens_seen": 144030656, + "step": 45765 + }, + { + "epoch": 2.930030087702452, + "grad_norm": 21.544673919677734, + "learning_rate": 2.186751161845532e-06, + "loss": 0.2338, + "num_input_tokens_seen": 144046272, + "step": 45770 + }, + { + "epoch": 2.930350169643429, + "grad_norm": 12.37812328338623, + "learning_rate": 2.1861969200855954e-06, + "loss": 0.2016, + "num_input_tokens_seen": 144061632, + "step": 45775 + }, + { + "epoch": 2.930670251584406, + "grad_norm": 8.331997871398926, + "learning_rate": 2.1856426939953506e-06, + "loss": 0.1527, + "num_input_tokens_seen": 144077248, + "step": 45780 + }, + { + "epoch": 2.9309903335253824, + "grad_norm": 8.58332633972168, + "learning_rate": 2.185088483602471e-06, + "loss": 0.2234, + "num_input_tokens_seen": 144094080, + "step": 45785 + }, + { + "epoch": 2.9313104154663594, + "grad_norm": 7.8348283767700195, + "learning_rate": 2.184534288934633e-06, + "loss": 0.1364, + "num_input_tokens_seen": 144109440, + "step": 45790 + }, + { + "epoch": 2.9316304974073364, + "grad_norm": 6.473622798919678, + "learning_rate": 2.18398011001951e-06, + "loss": 0.1791, + "num_input_tokens_seen": 144124416, + "step": 45795 + }, + { + "epoch": 2.931950579348313, + "grad_norm": 9.71863079071045, + "learning_rate": 2.1834259468847734e-06, + "loss": 0.2824, + "num_input_tokens_seen": 144139136, + "step": 45800 + }, + { + "epoch": 2.93227066128929, + "grad_norm": 93.59211730957031, + "learning_rate": 2.1828717995580966e-06, + "loss": 0.2023, + "num_input_tokens_seen": 144155712, + "step": 45805 + }, + { + "epoch": 2.932590743230267, + "grad_norm": 24.208396911621094, + "learning_rate": 2.1823176680671498e-06, + "loss": 0.2327, + "num_input_tokens_seen": 144171264, + "step": 45810 + }, + { + "epoch": 2.932910825171244, + "grad_norm": 13.594462394714355, + "learning_rate": 2.1817635524396045e-06, + "loss": 0.1862, + "num_input_tokens_seen": 144188160, + "step": 45815 + }, + { + "epoch": 2.933230907112221, + "grad_norm": 8.722628593444824, + "learning_rate": 2.181209452703129e-06, + "loss": 0.16, + "num_input_tokens_seen": 144202688, + "step": 45820 + }, + { + "epoch": 2.933550989053198, + "grad_norm": 9.858564376831055, + "learning_rate": 2.1806553688853942e-06, + "loss": 0.2288, + "num_input_tokens_seen": 144218048, + "step": 45825 + }, + { + "epoch": 2.9338710709941744, + "grad_norm": 31.21377944946289, + "learning_rate": 2.180101301014066e-06, + "loss": 0.2404, + "num_input_tokens_seen": 144234304, + "step": 45830 + }, + { + "epoch": 2.9341911529351514, + "grad_norm": 23.060705184936523, + "learning_rate": 2.1795472491168124e-06, + "loss": 0.2155, + "num_input_tokens_seen": 144249856, + "step": 45835 + }, + { + "epoch": 2.9345112348761284, + "grad_norm": 7.594893932342529, + "learning_rate": 2.1789932132212997e-06, + "loss": 0.265, + "num_input_tokens_seen": 144265920, + "step": 45840 + }, + { + "epoch": 2.934831316817105, + "grad_norm": 13.784098625183105, + "learning_rate": 2.178439193355194e-06, + "loss": 0.1973, + "num_input_tokens_seen": 144282048, + "step": 45845 + }, + { + "epoch": 2.935151398758082, + "grad_norm": 9.138261795043945, + "learning_rate": 2.1778851895461612e-06, + "loss": 0.2021, + "num_input_tokens_seen": 144296960, + "step": 45850 + }, + { + "epoch": 2.935471480699059, + "grad_norm": 62.26091766357422, + "learning_rate": 2.177331201821863e-06, + "loss": 0.1903, + "num_input_tokens_seen": 144312896, + "step": 45855 + }, + { + "epoch": 2.935791562640036, + "grad_norm": 5.954648494720459, + "learning_rate": 2.1767772302099645e-06, + "loss": 0.1487, + "num_input_tokens_seen": 144328448, + "step": 45860 + }, + { + "epoch": 2.936111644581013, + "grad_norm": 28.09783172607422, + "learning_rate": 2.176223274738127e-06, + "loss": 0.1638, + "num_input_tokens_seen": 144345344, + "step": 45865 + }, + { + "epoch": 2.9364317265219895, + "grad_norm": 5.303364276885986, + "learning_rate": 2.175669335434013e-06, + "loss": 0.2904, + "num_input_tokens_seen": 144360320, + "step": 45870 + }, + { + "epoch": 2.9367518084629665, + "grad_norm": 35.88361358642578, + "learning_rate": 2.175115412325283e-06, + "loss": 0.1791, + "num_input_tokens_seen": 144376640, + "step": 45875 + }, + { + "epoch": 2.9370718904039435, + "grad_norm": 58.59677505493164, + "learning_rate": 2.1745615054395973e-06, + "loss": 0.2334, + "num_input_tokens_seen": 144392064, + "step": 45880 + }, + { + "epoch": 2.9373919723449204, + "grad_norm": 14.390789031982422, + "learning_rate": 2.174007614804614e-06, + "loss": 0.1733, + "num_input_tokens_seen": 144409216, + "step": 45885 + }, + { + "epoch": 2.937712054285897, + "grad_norm": 9.81860637664795, + "learning_rate": 2.173453740447993e-06, + "loss": 0.2628, + "num_input_tokens_seen": 144424832, + "step": 45890 + }, + { + "epoch": 2.938032136226874, + "grad_norm": 17.234180450439453, + "learning_rate": 2.1728998823973908e-06, + "loss": 0.1869, + "num_input_tokens_seen": 144441408, + "step": 45895 + }, + { + "epoch": 2.938352218167851, + "grad_norm": 12.503710746765137, + "learning_rate": 2.1723460406804657e-06, + "loss": 0.2113, + "num_input_tokens_seen": 144456000, + "step": 45900 + }, + { + "epoch": 2.938672300108828, + "grad_norm": 47.19841003417969, + "learning_rate": 2.171792215324872e-06, + "loss": 0.2059, + "num_input_tokens_seen": 144472576, + "step": 45905 + }, + { + "epoch": 2.938992382049805, + "grad_norm": 5.502781867980957, + "learning_rate": 2.1712384063582647e-06, + "loss": 0.1148, + "num_input_tokens_seen": 144488704, + "step": 45910 + }, + { + "epoch": 2.9393124639907815, + "grad_norm": 26.107688903808594, + "learning_rate": 2.170684613808301e-06, + "loss": 0.2741, + "num_input_tokens_seen": 144504000, + "step": 45915 + }, + { + "epoch": 2.9396325459317585, + "grad_norm": 10.274260520935059, + "learning_rate": 2.1701308377026305e-06, + "loss": 0.1434, + "num_input_tokens_seen": 144518848, + "step": 45920 + }, + { + "epoch": 2.9399526278727355, + "grad_norm": 15.262290000915527, + "learning_rate": 2.1695770780689083e-06, + "loss": 0.3417, + "num_input_tokens_seen": 144535424, + "step": 45925 + }, + { + "epoch": 2.9402727098137125, + "grad_norm": 6.463888168334961, + "learning_rate": 2.169023334934785e-06, + "loss": 0.1506, + "num_input_tokens_seen": 144550656, + "step": 45930 + }, + { + "epoch": 2.940592791754689, + "grad_norm": 6.6844987869262695, + "learning_rate": 2.1684696083279143e-06, + "loss": 0.1679, + "num_input_tokens_seen": 144565312, + "step": 45935 + }, + { + "epoch": 2.940912873695666, + "grad_norm": 10.466426849365234, + "learning_rate": 2.1679158982759425e-06, + "loss": 0.2042, + "num_input_tokens_seen": 144581504, + "step": 45940 + }, + { + "epoch": 2.941232955636643, + "grad_norm": 25.88392448425293, + "learning_rate": 2.1673622048065222e-06, + "loss": 0.2542, + "num_input_tokens_seen": 144597568, + "step": 45945 + }, + { + "epoch": 2.94155303757762, + "grad_norm": 7.448742866516113, + "learning_rate": 2.1668085279473e-06, + "loss": 0.2439, + "num_input_tokens_seen": 144612544, + "step": 45950 + }, + { + "epoch": 2.941873119518597, + "grad_norm": 5.019881248474121, + "learning_rate": 2.1662548677259252e-06, + "loss": 0.2301, + "num_input_tokens_seen": 144627776, + "step": 45955 + }, + { + "epoch": 2.9421932014595735, + "grad_norm": 9.577919006347656, + "learning_rate": 2.1657012241700428e-06, + "loss": 0.1665, + "num_input_tokens_seen": 144643712, + "step": 45960 + }, + { + "epoch": 2.9425132834005505, + "grad_norm": 28.5513916015625, + "learning_rate": 2.1651475973073e-06, + "loss": 0.3135, + "num_input_tokens_seen": 144658688, + "step": 45965 + }, + { + "epoch": 2.9428333653415275, + "grad_norm": 4.992776393890381, + "learning_rate": 2.1645939871653433e-06, + "loss": 0.2947, + "num_input_tokens_seen": 144674752, + "step": 45970 + }, + { + "epoch": 2.943153447282504, + "grad_norm": 11.679194450378418, + "learning_rate": 2.1640403937718146e-06, + "loss": 0.1797, + "num_input_tokens_seen": 144690688, + "step": 45975 + }, + { + "epoch": 2.943473529223481, + "grad_norm": 52.273128509521484, + "learning_rate": 2.1634868171543595e-06, + "loss": 0.3034, + "num_input_tokens_seen": 144706816, + "step": 45980 + }, + { + "epoch": 2.943793611164458, + "grad_norm": 10.259122848510742, + "learning_rate": 2.1629332573406194e-06, + "loss": 0.1714, + "num_input_tokens_seen": 144722752, + "step": 45985 + }, + { + "epoch": 2.944113693105435, + "grad_norm": 10.193107604980469, + "learning_rate": 2.1623797143582375e-06, + "loss": 0.2672, + "num_input_tokens_seen": 144739200, + "step": 45990 + }, + { + "epoch": 2.944433775046412, + "grad_norm": 28.37932777404785, + "learning_rate": 2.161826188234853e-06, + "loss": 0.2714, + "num_input_tokens_seen": 144754432, + "step": 45995 + }, + { + "epoch": 2.944753856987389, + "grad_norm": 78.15143585205078, + "learning_rate": 2.161272678998108e-06, + "loss": 0.2298, + "num_input_tokens_seen": 144769600, + "step": 46000 + }, + { + "epoch": 2.9450739389283656, + "grad_norm": 5.972991466522217, + "learning_rate": 2.1607191866756403e-06, + "loss": 0.2261, + "num_input_tokens_seen": 144785152, + "step": 46005 + }, + { + "epoch": 2.9453940208693425, + "grad_norm": 9.403034210205078, + "learning_rate": 2.1601657112950897e-06, + "loss": 0.2334, + "num_input_tokens_seen": 144801088, + "step": 46010 + }, + { + "epoch": 2.9457141028103195, + "grad_norm": 58.78618240356445, + "learning_rate": 2.1596122528840928e-06, + "loss": 0.2539, + "num_input_tokens_seen": 144818560, + "step": 46015 + }, + { + "epoch": 2.946034184751296, + "grad_norm": 12.294151306152344, + "learning_rate": 2.159058811470287e-06, + "loss": 0.278, + "num_input_tokens_seen": 144832896, + "step": 46020 + }, + { + "epoch": 2.946354266692273, + "grad_norm": 9.720659255981445, + "learning_rate": 2.158505387081309e-06, + "loss": 0.1882, + "num_input_tokens_seen": 144848576, + "step": 46025 + }, + { + "epoch": 2.94667434863325, + "grad_norm": 48.66188049316406, + "learning_rate": 2.1579519797447926e-06, + "loss": 0.3166, + "num_input_tokens_seen": 144864000, + "step": 46030 + }, + { + "epoch": 2.946994430574227, + "grad_norm": 3.894882917404175, + "learning_rate": 2.157398589488373e-06, + "loss": 0.1769, + "num_input_tokens_seen": 144879232, + "step": 46035 + }, + { + "epoch": 2.947314512515204, + "grad_norm": 10.146574974060059, + "learning_rate": 2.1568452163396826e-06, + "loss": 0.2262, + "num_input_tokens_seen": 144895104, + "step": 46040 + }, + { + "epoch": 2.947634594456181, + "grad_norm": 13.600933074951172, + "learning_rate": 2.156291860326356e-06, + "loss": 0.2183, + "num_input_tokens_seen": 144909632, + "step": 46045 + }, + { + "epoch": 2.9479546763971576, + "grad_norm": 87.6832504272461, + "learning_rate": 2.155738521476022e-06, + "loss": 0.2074, + "num_input_tokens_seen": 144924992, + "step": 46050 + }, + { + "epoch": 2.9482747583381346, + "grad_norm": 7.039710521697998, + "learning_rate": 2.155185199816315e-06, + "loss": 0.1439, + "num_input_tokens_seen": 144940032, + "step": 46055 + }, + { + "epoch": 2.9485948402791116, + "grad_norm": 6.238719940185547, + "learning_rate": 2.1546318953748615e-06, + "loss": 0.2487, + "num_input_tokens_seen": 144955392, + "step": 46060 + }, + { + "epoch": 2.948914922220088, + "grad_norm": 5.865658760070801, + "learning_rate": 2.154078608179293e-06, + "loss": 0.1977, + "num_input_tokens_seen": 144970880, + "step": 46065 + }, + { + "epoch": 2.949235004161065, + "grad_norm": 3.524693012237549, + "learning_rate": 2.153525338257236e-06, + "loss": 0.2199, + "num_input_tokens_seen": 144988224, + "step": 46070 + }, + { + "epoch": 2.949555086102042, + "grad_norm": 7.403204917907715, + "learning_rate": 2.15297208563632e-06, + "loss": 0.2367, + "num_input_tokens_seen": 145003648, + "step": 46075 + }, + { + "epoch": 2.949875168043019, + "grad_norm": 9.853251457214355, + "learning_rate": 2.1524188503441697e-06, + "loss": 0.2339, + "num_input_tokens_seen": 145021952, + "step": 46080 + }, + { + "epoch": 2.950195249983996, + "grad_norm": 7.419816017150879, + "learning_rate": 2.1518656324084113e-06, + "loss": 0.2185, + "num_input_tokens_seen": 145036992, + "step": 46085 + }, + { + "epoch": 2.950515331924973, + "grad_norm": 9.467193603515625, + "learning_rate": 2.1513124318566704e-06, + "loss": 0.2829, + "num_input_tokens_seen": 145052224, + "step": 46090 + }, + { + "epoch": 2.9508354138659496, + "grad_norm": 59.97950744628906, + "learning_rate": 2.15075924871657e-06, + "loss": 0.2023, + "num_input_tokens_seen": 145067072, + "step": 46095 + }, + { + "epoch": 2.9511554958069266, + "grad_norm": 58.64202117919922, + "learning_rate": 2.1502060830157344e-06, + "loss": 0.2653, + "num_input_tokens_seen": 145082880, + "step": 46100 + }, + { + "epoch": 2.9514755777479036, + "grad_norm": 89.77820587158203, + "learning_rate": 2.149652934781784e-06, + "loss": 0.279, + "num_input_tokens_seen": 145098368, + "step": 46105 + }, + { + "epoch": 2.95179565968888, + "grad_norm": 7.812224864959717, + "learning_rate": 2.1490998040423424e-06, + "loss": 0.1865, + "num_input_tokens_seen": 145114368, + "step": 46110 + }, + { + "epoch": 2.952115741629857, + "grad_norm": 20.33704948425293, + "learning_rate": 2.1485466908250275e-06, + "loss": 0.246, + "num_input_tokens_seen": 145129664, + "step": 46115 + }, + { + "epoch": 2.952435823570834, + "grad_norm": 5.852193832397461, + "learning_rate": 2.1479935951574606e-06, + "loss": 0.1757, + "num_input_tokens_seen": 145145536, + "step": 46120 + }, + { + "epoch": 2.952755905511811, + "grad_norm": 11.39107608795166, + "learning_rate": 2.14744051706726e-06, + "loss": 0.2171, + "num_input_tokens_seen": 145161664, + "step": 46125 + }, + { + "epoch": 2.953075987452788, + "grad_norm": 33.623870849609375, + "learning_rate": 2.1468874565820446e-06, + "loss": 0.2985, + "num_input_tokens_seen": 145177024, + "step": 46130 + }, + { + "epoch": 2.9533960693937646, + "grad_norm": 18.366588592529297, + "learning_rate": 2.1463344137294287e-06, + "loss": 0.3048, + "num_input_tokens_seen": 145192384, + "step": 46135 + }, + { + "epoch": 2.9537161513347416, + "grad_norm": 5.33693265914917, + "learning_rate": 2.1457813885370304e-06, + "loss": 0.2254, + "num_input_tokens_seen": 145208320, + "step": 46140 + }, + { + "epoch": 2.9540362332757186, + "grad_norm": 8.989057540893555, + "learning_rate": 2.1452283810324657e-06, + "loss": 0.2513, + "num_input_tokens_seen": 145223808, + "step": 46145 + }, + { + "epoch": 2.9543563152166956, + "grad_norm": 56.30320739746094, + "learning_rate": 2.1446753912433464e-06, + "loss": 0.2454, + "num_input_tokens_seen": 145239104, + "step": 46150 + }, + { + "epoch": 2.954676397157672, + "grad_norm": 19.727458953857422, + "learning_rate": 2.1441224191972876e-06, + "loss": 0.3205, + "num_input_tokens_seen": 145254080, + "step": 46155 + }, + { + "epoch": 2.954996479098649, + "grad_norm": 5.874113082885742, + "learning_rate": 2.1435694649219012e-06, + "loss": 0.2263, + "num_input_tokens_seen": 145270464, + "step": 46160 + }, + { + "epoch": 2.955316561039626, + "grad_norm": 8.750679969787598, + "learning_rate": 2.1430165284448006e-06, + "loss": 0.2373, + "num_input_tokens_seen": 145285888, + "step": 46165 + }, + { + "epoch": 2.955636642980603, + "grad_norm": 19.889312744140625, + "learning_rate": 2.1424636097935935e-06, + "loss": 0.2568, + "num_input_tokens_seen": 145301056, + "step": 46170 + }, + { + "epoch": 2.95595672492158, + "grad_norm": 12.36249828338623, + "learning_rate": 2.141910708995892e-06, + "loss": 0.1594, + "num_input_tokens_seen": 145317504, + "step": 46175 + }, + { + "epoch": 2.9562768068625567, + "grad_norm": 11.131906509399414, + "learning_rate": 2.141357826079304e-06, + "loss": 0.2354, + "num_input_tokens_seen": 145333696, + "step": 46180 + }, + { + "epoch": 2.9565968888035337, + "grad_norm": 12.047443389892578, + "learning_rate": 2.140804961071439e-06, + "loss": 0.1758, + "num_input_tokens_seen": 145349312, + "step": 46185 + }, + { + "epoch": 2.9569169707445107, + "grad_norm": 10.108980178833008, + "learning_rate": 2.1402521139999024e-06, + "loss": 0.158, + "num_input_tokens_seen": 145364416, + "step": 46190 + }, + { + "epoch": 2.9572370526854876, + "grad_norm": 5.435890197753906, + "learning_rate": 2.1396992848923013e-06, + "loss": 0.2748, + "num_input_tokens_seen": 145381376, + "step": 46195 + }, + { + "epoch": 2.957557134626464, + "grad_norm": 8.095308303833008, + "learning_rate": 2.1391464737762425e-06, + "loss": 0.2563, + "num_input_tokens_seen": 145397568, + "step": 46200 + }, + { + "epoch": 2.957877216567441, + "grad_norm": 10.295620918273926, + "learning_rate": 2.1385936806793277e-06, + "loss": 0.3571, + "num_input_tokens_seen": 145412800, + "step": 46205 + }, + { + "epoch": 2.958197298508418, + "grad_norm": 15.500110626220703, + "learning_rate": 2.1380409056291627e-06, + "loss": 0.169, + "num_input_tokens_seen": 145429952, + "step": 46210 + }, + { + "epoch": 2.958517380449395, + "grad_norm": 4.85261344909668, + "learning_rate": 2.1374881486533488e-06, + "loss": 0.2084, + "num_input_tokens_seen": 145447296, + "step": 46215 + }, + { + "epoch": 2.958837462390372, + "grad_norm": 56.601173400878906, + "learning_rate": 2.13693540977949e-06, + "loss": 0.2288, + "num_input_tokens_seen": 145465216, + "step": 46220 + }, + { + "epoch": 2.9591575443313487, + "grad_norm": 15.057127952575684, + "learning_rate": 2.1363826890351842e-06, + "loss": 0.2945, + "num_input_tokens_seen": 145480640, + "step": 46225 + }, + { + "epoch": 2.9594776262723257, + "grad_norm": 79.8947525024414, + "learning_rate": 2.1358299864480344e-06, + "loss": 0.3031, + "num_input_tokens_seen": 145496000, + "step": 46230 + }, + { + "epoch": 2.9597977082133027, + "grad_norm": 13.992719650268555, + "learning_rate": 2.135277302045636e-06, + "loss": 0.2104, + "num_input_tokens_seen": 145512512, + "step": 46235 + }, + { + "epoch": 2.9601177901542792, + "grad_norm": 32.58135986328125, + "learning_rate": 2.134724635855591e-06, + "loss": 0.168, + "num_input_tokens_seen": 145527872, + "step": 46240 + }, + { + "epoch": 2.960437872095256, + "grad_norm": 28.22154998779297, + "learning_rate": 2.1341719879054947e-06, + "loss": 0.2986, + "num_input_tokens_seen": 145543808, + "step": 46245 + }, + { + "epoch": 2.960757954036233, + "grad_norm": 5.506070137023926, + "learning_rate": 2.133619358222944e-06, + "loss": 0.187, + "num_input_tokens_seen": 145559168, + "step": 46250 + }, + { + "epoch": 2.96107803597721, + "grad_norm": 4.533425331115723, + "learning_rate": 2.133066746835534e-06, + "loss": 0.2369, + "num_input_tokens_seen": 145573376, + "step": 46255 + }, + { + "epoch": 2.961398117918187, + "grad_norm": 29.873878479003906, + "learning_rate": 2.1325141537708583e-06, + "loss": 0.2194, + "num_input_tokens_seen": 145590016, + "step": 46260 + }, + { + "epoch": 2.961718199859164, + "grad_norm": 10.57371711730957, + "learning_rate": 2.131961579056513e-06, + "loss": 0.1865, + "num_input_tokens_seen": 145608832, + "step": 46265 + }, + { + "epoch": 2.9620382818001407, + "grad_norm": 4.5096755027771, + "learning_rate": 2.1314090227200885e-06, + "loss": 0.2651, + "num_input_tokens_seen": 145624640, + "step": 46270 + }, + { + "epoch": 2.9623583637411177, + "grad_norm": 20.192983627319336, + "learning_rate": 2.1308564847891784e-06, + "loss": 0.2519, + "num_input_tokens_seen": 145640320, + "step": 46275 + }, + { + "epoch": 2.9626784456820947, + "grad_norm": 10.878148078918457, + "learning_rate": 2.1303039652913718e-06, + "loss": 0.2431, + "num_input_tokens_seen": 145655296, + "step": 46280 + }, + { + "epoch": 2.9629985276230713, + "grad_norm": 12.120038986206055, + "learning_rate": 2.1297514642542607e-06, + "loss": 0.1792, + "num_input_tokens_seen": 145670528, + "step": 46285 + }, + { + "epoch": 2.9633186095640482, + "grad_norm": 8.36530590057373, + "learning_rate": 2.1291989817054314e-06, + "loss": 0.1373, + "num_input_tokens_seen": 145686720, + "step": 46290 + }, + { + "epoch": 2.9636386915050252, + "grad_norm": 11.161117553710938, + "learning_rate": 2.1286465176724746e-06, + "loss": 0.1459, + "num_input_tokens_seen": 145702144, + "step": 46295 + }, + { + "epoch": 2.9639587734460022, + "grad_norm": 7.407889366149902, + "learning_rate": 2.1280940721829757e-06, + "loss": 0.2048, + "num_input_tokens_seen": 145716992, + "step": 46300 + }, + { + "epoch": 2.964278855386979, + "grad_norm": 9.59768009185791, + "learning_rate": 2.1275416452645226e-06, + "loss": 0.2093, + "num_input_tokens_seen": 145731968, + "step": 46305 + }, + { + "epoch": 2.964598937327956, + "grad_norm": 17.449684143066406, + "learning_rate": 2.1269892369446984e-06, + "loss": 0.3044, + "num_input_tokens_seen": 145747392, + "step": 46310 + }, + { + "epoch": 2.9649190192689328, + "grad_norm": 6.358615875244141, + "learning_rate": 2.126436847251089e-06, + "loss": 0.2091, + "num_input_tokens_seen": 145763136, + "step": 46315 + }, + { + "epoch": 2.9652391012099097, + "grad_norm": 21.305023193359375, + "learning_rate": 2.1258844762112787e-06, + "loss": 0.2518, + "num_input_tokens_seen": 145779840, + "step": 46320 + }, + { + "epoch": 2.9655591831508867, + "grad_norm": 35.629058837890625, + "learning_rate": 2.1253321238528474e-06, + "loss": 0.3357, + "num_input_tokens_seen": 145795584, + "step": 46325 + }, + { + "epoch": 2.9658792650918633, + "grad_norm": 10.92965030670166, + "learning_rate": 2.12477979020338e-06, + "loss": 0.2661, + "num_input_tokens_seen": 145810688, + "step": 46330 + }, + { + "epoch": 2.9661993470328403, + "grad_norm": 9.73359489440918, + "learning_rate": 2.124227475290454e-06, + "loss": 0.1763, + "num_input_tokens_seen": 145826048, + "step": 46335 + }, + { + "epoch": 2.9665194289738173, + "grad_norm": 10.653956413269043, + "learning_rate": 2.123675179141652e-06, + "loss": 0.3211, + "num_input_tokens_seen": 145841408, + "step": 46340 + }, + { + "epoch": 2.9668395109147943, + "grad_norm": 28.96004295349121, + "learning_rate": 2.12312290178455e-06, + "loss": 0.2432, + "num_input_tokens_seen": 145858432, + "step": 46345 + }, + { + "epoch": 2.9671595928557712, + "grad_norm": 6.734583854675293, + "learning_rate": 2.1225706432467277e-06, + "loss": 0.2069, + "num_input_tokens_seen": 145873216, + "step": 46350 + }, + { + "epoch": 2.9674796747967482, + "grad_norm": 11.726709365844727, + "learning_rate": 2.1220184035557613e-06, + "loss": 0.1425, + "num_input_tokens_seen": 145888320, + "step": 46355 + }, + { + "epoch": 2.967799756737725, + "grad_norm": 11.858061790466309, + "learning_rate": 2.1214661827392275e-06, + "loss": 0.2318, + "num_input_tokens_seen": 145903424, + "step": 46360 + }, + { + "epoch": 2.9681198386787018, + "grad_norm": 26.827604293823242, + "learning_rate": 2.1209139808247003e-06, + "loss": 0.4214, + "num_input_tokens_seen": 145932608, + "step": 46365 + }, + { + "epoch": 2.9684399206196788, + "grad_norm": 16.59739875793457, + "learning_rate": 2.1203617978397543e-06, + "loss": 0.2261, + "num_input_tokens_seen": 145947456, + "step": 46370 + }, + { + "epoch": 2.9687600025606553, + "grad_norm": 7.65756368637085, + "learning_rate": 2.1198096338119634e-06, + "loss": 0.2228, + "num_input_tokens_seen": 145962560, + "step": 46375 + }, + { + "epoch": 2.9690800845016323, + "grad_norm": 9.667323112487793, + "learning_rate": 2.119257488768898e-06, + "loss": 0.2131, + "num_input_tokens_seen": 145978944, + "step": 46380 + }, + { + "epoch": 2.9694001664426093, + "grad_norm": 6.5888991355896, + "learning_rate": 2.118705362738131e-06, + "loss": 0.1967, + "num_input_tokens_seen": 145994304, + "step": 46385 + }, + { + "epoch": 2.9697202483835863, + "grad_norm": 28.137468338012695, + "learning_rate": 2.1181532557472316e-06, + "loss": 0.2386, + "num_input_tokens_seen": 146009856, + "step": 46390 + }, + { + "epoch": 2.9700403303245633, + "grad_norm": 11.045731544494629, + "learning_rate": 2.117601167823771e-06, + "loss": 0.2441, + "num_input_tokens_seen": 146025792, + "step": 46395 + }, + { + "epoch": 2.97036041226554, + "grad_norm": 15.21705436706543, + "learning_rate": 2.1170490989953148e-06, + "loss": 0.2369, + "num_input_tokens_seen": 146040832, + "step": 46400 + }, + { + "epoch": 2.970680494206517, + "grad_norm": 11.333219528198242, + "learning_rate": 2.1164970492894326e-06, + "loss": 0.2341, + "num_input_tokens_seen": 146055616, + "step": 46405 + }, + { + "epoch": 2.971000576147494, + "grad_norm": 17.861988067626953, + "learning_rate": 2.1159450187336893e-06, + "loss": 0.2157, + "num_input_tokens_seen": 146071168, + "step": 46410 + }, + { + "epoch": 2.971320658088471, + "grad_norm": 23.190298080444336, + "learning_rate": 2.115393007355653e-06, + "loss": 0.2758, + "num_input_tokens_seen": 146086976, + "step": 46415 + }, + { + "epoch": 2.9716407400294473, + "grad_norm": 5.285590171813965, + "learning_rate": 2.114841015182885e-06, + "loss": 0.2013, + "num_input_tokens_seen": 146101824, + "step": 46420 + }, + { + "epoch": 2.9719608219704243, + "grad_norm": 30.136463165283203, + "learning_rate": 2.1142890422429516e-06, + "loss": 0.2896, + "num_input_tokens_seen": 146117056, + "step": 46425 + }, + { + "epoch": 2.9722809039114013, + "grad_norm": 9.531946182250977, + "learning_rate": 2.1137370885634133e-06, + "loss": 0.1726, + "num_input_tokens_seen": 146133184, + "step": 46430 + }, + { + "epoch": 2.9726009858523783, + "grad_norm": 9.248900413513184, + "learning_rate": 2.113185154171833e-06, + "loss": 0.2081, + "num_input_tokens_seen": 146149440, + "step": 46435 + }, + { + "epoch": 2.9729210677933553, + "grad_norm": 36.2501106262207, + "learning_rate": 2.1126332390957714e-06, + "loss": 0.2804, + "num_input_tokens_seen": 146163776, + "step": 46440 + }, + { + "epoch": 2.973241149734332, + "grad_norm": 15.635826110839844, + "learning_rate": 2.112081343362788e-06, + "loss": 0.2193, + "num_input_tokens_seen": 146180224, + "step": 46445 + }, + { + "epoch": 2.973561231675309, + "grad_norm": 5.822676181793213, + "learning_rate": 2.111529467000443e-06, + "loss": 0.1553, + "num_input_tokens_seen": 146195072, + "step": 46450 + }, + { + "epoch": 2.973881313616286, + "grad_norm": 12.167620658874512, + "learning_rate": 2.1109776100362912e-06, + "loss": 0.2079, + "num_input_tokens_seen": 146211200, + "step": 46455 + }, + { + "epoch": 2.974201395557263, + "grad_norm": 5.761699676513672, + "learning_rate": 2.1104257724978924e-06, + "loss": 0.173, + "num_input_tokens_seen": 146227008, + "step": 46460 + }, + { + "epoch": 2.9745214774982394, + "grad_norm": 102.28231811523438, + "learning_rate": 2.1098739544128003e-06, + "loss": 0.3075, + "num_input_tokens_seen": 146243968, + "step": 46465 + }, + { + "epoch": 2.9748415594392164, + "grad_norm": 19.577653884887695, + "learning_rate": 2.1093221558085716e-06, + "loss": 0.2205, + "num_input_tokens_seen": 146259776, + "step": 46470 + }, + { + "epoch": 2.9751616413801933, + "grad_norm": 6.970047950744629, + "learning_rate": 2.108770376712759e-06, + "loss": 0.1693, + "num_input_tokens_seen": 146276736, + "step": 46475 + }, + { + "epoch": 2.9754817233211703, + "grad_norm": 10.38145923614502, + "learning_rate": 2.1082186171529166e-06, + "loss": 0.2099, + "num_input_tokens_seen": 146291904, + "step": 46480 + }, + { + "epoch": 2.9758018052621473, + "grad_norm": 48.471553802490234, + "learning_rate": 2.107666877156595e-06, + "loss": 0.2414, + "num_input_tokens_seen": 146306944, + "step": 46485 + }, + { + "epoch": 2.976121887203124, + "grad_norm": 32.0954475402832, + "learning_rate": 2.107115156751346e-06, + "loss": 0.3572, + "num_input_tokens_seen": 146322176, + "step": 46490 + }, + { + "epoch": 2.976441969144101, + "grad_norm": 9.558411598205566, + "learning_rate": 2.1065634559647197e-06, + "loss": 0.1976, + "num_input_tokens_seen": 146337728, + "step": 46495 + }, + { + "epoch": 2.976762051085078, + "grad_norm": 8.727431297302246, + "learning_rate": 2.1060117748242647e-06, + "loss": 0.2953, + "num_input_tokens_seen": 146353408, + "step": 46500 + }, + { + "epoch": 2.9770821330260544, + "grad_norm": 19.596691131591797, + "learning_rate": 2.105460113357531e-06, + "loss": 0.1739, + "num_input_tokens_seen": 146370048, + "step": 46505 + }, + { + "epoch": 2.9774022149670314, + "grad_norm": 10.949677467346191, + "learning_rate": 2.1049084715920628e-06, + "loss": 0.3382, + "num_input_tokens_seen": 146384384, + "step": 46510 + }, + { + "epoch": 2.9777222969080084, + "grad_norm": 9.250324249267578, + "learning_rate": 2.1043568495554086e-06, + "loss": 0.2107, + "num_input_tokens_seen": 146400320, + "step": 46515 + }, + { + "epoch": 2.9780423788489854, + "grad_norm": 21.094003677368164, + "learning_rate": 2.1038052472751114e-06, + "loss": 0.3154, + "num_input_tokens_seen": 146415168, + "step": 46520 + }, + { + "epoch": 2.9783624607899624, + "grad_norm": 74.58306884765625, + "learning_rate": 2.103253664778717e-06, + "loss": 0.1901, + "num_input_tokens_seen": 146430080, + "step": 46525 + }, + { + "epoch": 2.9786825427309394, + "grad_norm": 11.353202819824219, + "learning_rate": 2.1027021020937674e-06, + "loss": 0.2543, + "num_input_tokens_seen": 146446336, + "step": 46530 + }, + { + "epoch": 2.979002624671916, + "grad_norm": 11.413433074951172, + "learning_rate": 2.1021505592478066e-06, + "loss": 0.1713, + "num_input_tokens_seen": 146461952, + "step": 46535 + }, + { + "epoch": 2.979322706612893, + "grad_norm": 4.562779903411865, + "learning_rate": 2.1015990362683733e-06, + "loss": 0.2301, + "num_input_tokens_seen": 146476544, + "step": 46540 + }, + { + "epoch": 2.97964278855387, + "grad_norm": 27.411483764648438, + "learning_rate": 2.101047533183009e-06, + "loss": 0.2889, + "num_input_tokens_seen": 146491968, + "step": 46545 + }, + { + "epoch": 2.9799628704948464, + "grad_norm": 46.97186279296875, + "learning_rate": 2.1004960500192534e-06, + "loss": 0.3376, + "num_input_tokens_seen": 146507392, + "step": 46550 + }, + { + "epoch": 2.9802829524358234, + "grad_norm": 20.71112060546875, + "learning_rate": 2.0999445868046438e-06, + "loss": 0.1538, + "num_input_tokens_seen": 146521792, + "step": 46555 + }, + { + "epoch": 2.9806030343768004, + "grad_norm": 4.431971073150635, + "learning_rate": 2.0993931435667184e-06, + "loss": 0.1267, + "num_input_tokens_seen": 146538048, + "step": 46560 + }, + { + "epoch": 2.9809231163177774, + "grad_norm": 11.05723762512207, + "learning_rate": 2.098841720333012e-06, + "loss": 0.253, + "num_input_tokens_seen": 146554688, + "step": 46565 + }, + { + "epoch": 2.9812431982587544, + "grad_norm": 5.520634651184082, + "learning_rate": 2.0982903171310612e-06, + "loss": 0.1851, + "num_input_tokens_seen": 146569856, + "step": 46570 + }, + { + "epoch": 2.9815632801997314, + "grad_norm": 8.32331657409668, + "learning_rate": 2.0977389339883984e-06, + "loss": 0.1949, + "num_input_tokens_seen": 146584192, + "step": 46575 + }, + { + "epoch": 2.981883362140708, + "grad_norm": 6.858303070068359, + "learning_rate": 2.097187570932559e-06, + "loss": 0.2788, + "num_input_tokens_seen": 146600512, + "step": 46580 + }, + { + "epoch": 2.982203444081685, + "grad_norm": 31.72218894958496, + "learning_rate": 2.0966362279910736e-06, + "loss": 0.2399, + "num_input_tokens_seen": 146615744, + "step": 46585 + }, + { + "epoch": 2.982523526022662, + "grad_norm": 43.61980438232422, + "learning_rate": 2.0960849051914743e-06, + "loss": 0.2708, + "num_input_tokens_seen": 146630592, + "step": 46590 + }, + { + "epoch": 2.9828436079636385, + "grad_norm": 69.28331756591797, + "learning_rate": 2.0955336025612904e-06, + "loss": 0.268, + "num_input_tokens_seen": 146645440, + "step": 46595 + }, + { + "epoch": 2.9831636899046154, + "grad_norm": 10.702780723571777, + "learning_rate": 2.094982320128053e-06, + "loss": 0.2394, + "num_input_tokens_seen": 146660992, + "step": 46600 + }, + { + "epoch": 2.9834837718455924, + "grad_norm": 5.755463600158691, + "learning_rate": 2.094431057919287e-06, + "loss": 0.2049, + "num_input_tokens_seen": 146676992, + "step": 46605 + }, + { + "epoch": 2.9838038537865694, + "grad_norm": 8.946259498596191, + "learning_rate": 2.093879815962522e-06, + "loss": 0.2071, + "num_input_tokens_seen": 146692288, + "step": 46610 + }, + { + "epoch": 2.9841239357275464, + "grad_norm": 5.563060760498047, + "learning_rate": 2.0933285942852838e-06, + "loss": 0.2099, + "num_input_tokens_seen": 146708096, + "step": 46615 + }, + { + "epoch": 2.9844440176685234, + "grad_norm": 5.20370626449585, + "learning_rate": 2.092777392915097e-06, + "loss": 0.1691, + "num_input_tokens_seen": 146723328, + "step": 46620 + }, + { + "epoch": 2.9847640996095, + "grad_norm": 8.471957206726074, + "learning_rate": 2.0922262118794874e-06, + "loss": 0.1584, + "num_input_tokens_seen": 146739072, + "step": 46625 + }, + { + "epoch": 2.985084181550477, + "grad_norm": 12.706944465637207, + "learning_rate": 2.091675051205975e-06, + "loss": 0.2923, + "num_input_tokens_seen": 146754240, + "step": 46630 + }, + { + "epoch": 2.985404263491454, + "grad_norm": 45.14031982421875, + "learning_rate": 2.0911239109220846e-06, + "loss": 0.1786, + "num_input_tokens_seen": 146768960, + "step": 46635 + }, + { + "epoch": 2.9857243454324305, + "grad_norm": 7.876107215881348, + "learning_rate": 2.090572791055336e-06, + "loss": 0.2447, + "num_input_tokens_seen": 146784960, + "step": 46640 + }, + { + "epoch": 2.9860444273734075, + "grad_norm": 55.72060775756836, + "learning_rate": 2.09002169163325e-06, + "loss": 0.2335, + "num_input_tokens_seen": 146800896, + "step": 46645 + }, + { + "epoch": 2.9863645093143845, + "grad_norm": 9.710463523864746, + "learning_rate": 2.089470612683345e-06, + "loss": 0.2207, + "num_input_tokens_seen": 146815232, + "step": 46650 + }, + { + "epoch": 2.9866845912553615, + "grad_norm": 19.30753517150879, + "learning_rate": 2.0889195542331395e-06, + "loss": 0.3087, + "num_input_tokens_seen": 146830336, + "step": 46655 + }, + { + "epoch": 2.9870046731963384, + "grad_norm": 7.821834564208984, + "learning_rate": 2.0883685163101495e-06, + "loss": 0.1565, + "num_input_tokens_seen": 146846528, + "step": 46660 + }, + { + "epoch": 2.987324755137315, + "grad_norm": 5.720136642456055, + "learning_rate": 2.0878174989418916e-06, + "loss": 0.1609, + "num_input_tokens_seen": 146861824, + "step": 46665 + }, + { + "epoch": 2.987644837078292, + "grad_norm": 7.08976411819458, + "learning_rate": 2.0872665021558817e-06, + "loss": 0.2016, + "num_input_tokens_seen": 146877952, + "step": 46670 + }, + { + "epoch": 2.987964919019269, + "grad_norm": 10.226284980773926, + "learning_rate": 2.086715525979632e-06, + "loss": 0.2071, + "num_input_tokens_seen": 146894272, + "step": 46675 + }, + { + "epoch": 2.988285000960246, + "grad_norm": 5.527083873748779, + "learning_rate": 2.086164570440657e-06, + "loss": 0.1368, + "num_input_tokens_seen": 146909248, + "step": 46680 + }, + { + "epoch": 2.9886050829012225, + "grad_norm": 6.578205585479736, + "learning_rate": 2.0856136355664673e-06, + "loss": 0.3046, + "num_input_tokens_seen": 146923840, + "step": 46685 + }, + { + "epoch": 2.9889251648421995, + "grad_norm": 41.65579605102539, + "learning_rate": 2.085062721384574e-06, + "loss": 0.2109, + "num_input_tokens_seen": 146938816, + "step": 46690 + }, + { + "epoch": 2.9892452467831765, + "grad_norm": 7.360971450805664, + "learning_rate": 2.084511827922488e-06, + "loss": 0.2048, + "num_input_tokens_seen": 146955648, + "step": 46695 + }, + { + "epoch": 2.9895653287241535, + "grad_norm": 11.637482643127441, + "learning_rate": 2.0839609552077164e-06, + "loss": 0.2616, + "num_input_tokens_seen": 146971840, + "step": 46700 + }, + { + "epoch": 2.9898854106651305, + "grad_norm": 21.7947940826416, + "learning_rate": 2.083410103267767e-06, + "loss": 0.1943, + "num_input_tokens_seen": 146987840, + "step": 46705 + }, + { + "epoch": 2.990205492606107, + "grad_norm": 3.8053765296936035, + "learning_rate": 2.082859272130149e-06, + "loss": 0.2437, + "num_input_tokens_seen": 147003840, + "step": 46710 + }, + { + "epoch": 2.990525574547084, + "grad_norm": 11.44146728515625, + "learning_rate": 2.0823084618223644e-06, + "loss": 0.2044, + "num_input_tokens_seen": 147019200, + "step": 46715 + }, + { + "epoch": 2.990845656488061, + "grad_norm": 5.978348255157471, + "learning_rate": 2.08175767237192e-06, + "loss": 0.1568, + "num_input_tokens_seen": 147035456, + "step": 46720 + }, + { + "epoch": 2.991165738429038, + "grad_norm": 9.892834663391113, + "learning_rate": 2.081206903806319e-06, + "loss": 0.1793, + "num_input_tokens_seen": 147053120, + "step": 46725 + }, + { + "epoch": 2.9914858203700145, + "grad_norm": 9.19635009765625, + "learning_rate": 2.080656156153063e-06, + "loss": 0.2353, + "num_input_tokens_seen": 147068992, + "step": 46730 + }, + { + "epoch": 2.9918059023109915, + "grad_norm": 7.012229919433594, + "learning_rate": 2.0801054294396557e-06, + "loss": 0.1396, + "num_input_tokens_seen": 147085632, + "step": 46735 + }, + { + "epoch": 2.9921259842519685, + "grad_norm": 83.84510803222656, + "learning_rate": 2.079554723693595e-06, + "loss": 0.267, + "num_input_tokens_seen": 147102144, + "step": 46740 + }, + { + "epoch": 2.9924460661929455, + "grad_norm": 9.814900398254395, + "learning_rate": 2.0790040389423826e-06, + "loss": 0.2866, + "num_input_tokens_seen": 147120384, + "step": 46745 + }, + { + "epoch": 2.9927661481339225, + "grad_norm": 4.791541576385498, + "learning_rate": 2.0784533752135146e-06, + "loss": 0.1751, + "num_input_tokens_seen": 147135232, + "step": 46750 + }, + { + "epoch": 2.993086230074899, + "grad_norm": 29.73863410949707, + "learning_rate": 2.07790273253449e-06, + "loss": 0.2485, + "num_input_tokens_seen": 147151168, + "step": 46755 + }, + { + "epoch": 2.993406312015876, + "grad_norm": 3.313887119293213, + "learning_rate": 2.077352110932803e-06, + "loss": 0.2188, + "num_input_tokens_seen": 147166208, + "step": 46760 + }, + { + "epoch": 2.993726393956853, + "grad_norm": 8.696001052856445, + "learning_rate": 2.0768015104359517e-06, + "loss": 0.2365, + "num_input_tokens_seen": 147182528, + "step": 46765 + }, + { + "epoch": 2.9940464758978296, + "grad_norm": 9.220388412475586, + "learning_rate": 2.0762509310714272e-06, + "loss": 0.2243, + "num_input_tokens_seen": 147197888, + "step": 46770 + }, + { + "epoch": 2.9943665578388066, + "grad_norm": 10.365457534790039, + "learning_rate": 2.075700372866725e-06, + "loss": 0.3133, + "num_input_tokens_seen": 147214592, + "step": 46775 + }, + { + "epoch": 2.9946866397797836, + "grad_norm": 7.363966941833496, + "learning_rate": 2.0751498358493355e-06, + "loss": 0.2857, + "num_input_tokens_seen": 147231680, + "step": 46780 + }, + { + "epoch": 2.9950067217207605, + "grad_norm": 56.38151931762695, + "learning_rate": 2.07459932004675e-06, + "loss": 0.1859, + "num_input_tokens_seen": 147247104, + "step": 46785 + }, + { + "epoch": 2.9953268036617375, + "grad_norm": 6.19166898727417, + "learning_rate": 2.0740488254864594e-06, + "loss": 0.1572, + "num_input_tokens_seen": 147262656, + "step": 46790 + }, + { + "epoch": 2.9956468856027145, + "grad_norm": 6.813566207885742, + "learning_rate": 2.073498352195951e-06, + "loss": 0.2312, + "num_input_tokens_seen": 147276992, + "step": 46795 + }, + { + "epoch": 2.995966967543691, + "grad_norm": 6.6856231689453125, + "learning_rate": 2.0729479002027147e-06, + "loss": 0.2586, + "num_input_tokens_seen": 147293120, + "step": 46800 + }, + { + "epoch": 2.996287049484668, + "grad_norm": 10.244423866271973, + "learning_rate": 2.0723974695342346e-06, + "loss": 0.1988, + "num_input_tokens_seen": 147308160, + "step": 46805 + }, + { + "epoch": 2.996607131425645, + "grad_norm": 7.240883827209473, + "learning_rate": 2.071847060217998e-06, + "loss": 0.1825, + "num_input_tokens_seen": 147323200, + "step": 46810 + }, + { + "epoch": 2.9969272133666216, + "grad_norm": 98.97113037109375, + "learning_rate": 2.0712966722814883e-06, + "loss": 0.1861, + "num_input_tokens_seen": 147338688, + "step": 46815 + }, + { + "epoch": 2.9972472953075986, + "grad_norm": 6.880701541900635, + "learning_rate": 2.070746305752191e-06, + "loss": 0.2376, + "num_input_tokens_seen": 147353600, + "step": 46820 + }, + { + "epoch": 2.9975673772485756, + "grad_norm": 30.277664184570312, + "learning_rate": 2.0701959606575863e-06, + "loss": 0.2324, + "num_input_tokens_seen": 147369472, + "step": 46825 + }, + { + "epoch": 2.9978874591895526, + "grad_norm": 7.395526885986328, + "learning_rate": 2.0696456370251576e-06, + "loss": 0.1667, + "num_input_tokens_seen": 147384896, + "step": 46830 + }, + { + "epoch": 2.9982075411305296, + "grad_norm": 9.020480155944824, + "learning_rate": 2.069095334882383e-06, + "loss": 0.1573, + "num_input_tokens_seen": 147400832, + "step": 46835 + }, + { + "epoch": 2.9985276230715066, + "grad_norm": 10.659223556518555, + "learning_rate": 2.0685450542567432e-06, + "loss": 0.1533, + "num_input_tokens_seen": 147417280, + "step": 46840 + }, + { + "epoch": 2.998847705012483, + "grad_norm": 10.776005744934082, + "learning_rate": 2.0679947951757164e-06, + "loss": 0.2064, + "num_input_tokens_seen": 147433408, + "step": 46845 + }, + { + "epoch": 2.99916778695346, + "grad_norm": 8.2322416305542, + "learning_rate": 2.0674445576667785e-06, + "loss": 0.216, + "num_input_tokens_seen": 147448960, + "step": 46850 + }, + { + "epoch": 2.999487868894437, + "grad_norm": 7.997879505157471, + "learning_rate": 2.0668943417574073e-06, + "loss": 0.2719, + "num_input_tokens_seen": 147463744, + "step": 46855 + }, + { + "epoch": 2.9998079508354136, + "grad_norm": 33.3482666015625, + "learning_rate": 2.066344147475076e-06, + "loss": 0.2388, + "num_input_tokens_seen": 147480512, + "step": 46860 + }, + { + "epoch": 3.0001280327763906, + "grad_norm": 9.049976348876953, + "learning_rate": 2.0657939748472593e-06, + "loss": 0.1646, + "num_input_tokens_seen": 147496384, + "step": 46865 + }, + { + "epoch": 3.0004481147173676, + "grad_norm": 8.153816223144531, + "learning_rate": 2.06524382390143e-06, + "loss": 0.1496, + "num_input_tokens_seen": 147511104, + "step": 46870 + }, + { + "epoch": 3.0005761474937582, + "eval_loss": 0.5023031234741211, + "eval_runtime": 50.9868, + "eval_samples_per_second": 272.345, + "eval_steps_per_second": 34.048, + "num_input_tokens_seen": 147516736, + "step": 46872 + }, + { + "epoch": 3.0007681966583446, + "grad_norm": 5.878737926483154, + "learning_rate": 2.06469369466506e-06, + "loss": 0.1556, + "num_input_tokens_seen": 147525440, + "step": 46875 + }, + { + "epoch": 3.0010882785993216, + "grad_norm": 7.643129348754883, + "learning_rate": 2.0641435871656183e-06, + "loss": 0.1, + "num_input_tokens_seen": 147540800, + "step": 46880 + }, + { + "epoch": 3.001408360540298, + "grad_norm": 5.830235004425049, + "learning_rate": 2.063593501430577e-06, + "loss": 0.1753, + "num_input_tokens_seen": 147556992, + "step": 46885 + }, + { + "epoch": 3.001728442481275, + "grad_norm": 4.983327388763428, + "learning_rate": 2.0630434374874017e-06, + "loss": 0.1897, + "num_input_tokens_seen": 147574080, + "step": 46890 + }, + { + "epoch": 3.002048524422252, + "grad_norm": 8.063118934631348, + "learning_rate": 2.062493395363562e-06, + "loss": 0.1506, + "num_input_tokens_seen": 147588864, + "step": 46895 + }, + { + "epoch": 3.002368606363229, + "grad_norm": 3.5612926483154297, + "learning_rate": 2.0619433750865227e-06, + "loss": 0.1401, + "num_input_tokens_seen": 147603776, + "step": 46900 + }, + { + "epoch": 3.0026886883042057, + "grad_norm": 6.8659467697143555, + "learning_rate": 2.061393376683749e-06, + "loss": 0.1737, + "num_input_tokens_seen": 147620288, + "step": 46905 + }, + { + "epoch": 3.0030087702451826, + "grad_norm": 6.266153335571289, + "learning_rate": 2.060843400182707e-06, + "loss": 0.1351, + "num_input_tokens_seen": 147634816, + "step": 46910 + }, + { + "epoch": 3.0033288521861596, + "grad_norm": 48.206878662109375, + "learning_rate": 2.0602934456108567e-06, + "loss": 0.1831, + "num_input_tokens_seen": 147652032, + "step": 46915 + }, + { + "epoch": 3.0036489341271366, + "grad_norm": 7.691180229187012, + "learning_rate": 2.059743512995662e-06, + "loss": 0.1352, + "num_input_tokens_seen": 147667136, + "step": 46920 + }, + { + "epoch": 3.0039690160681136, + "grad_norm": 2.208003282546997, + "learning_rate": 2.059193602364583e-06, + "loss": 0.1296, + "num_input_tokens_seen": 147683520, + "step": 46925 + }, + { + "epoch": 3.00428909800909, + "grad_norm": 9.4638032913208, + "learning_rate": 2.05864371374508e-06, + "loss": 0.1696, + "num_input_tokens_seen": 147698496, + "step": 46930 + }, + { + "epoch": 3.004609179950067, + "grad_norm": 7.170914173126221, + "learning_rate": 2.05809384716461e-06, + "loss": 0.1052, + "num_input_tokens_seen": 147714496, + "step": 46935 + }, + { + "epoch": 3.004929261891044, + "grad_norm": 7.448221683502197, + "learning_rate": 2.057544002650632e-06, + "loss": 0.2007, + "num_input_tokens_seen": 147730048, + "step": 46940 + }, + { + "epoch": 3.005249343832021, + "grad_norm": 6.558478355407715, + "learning_rate": 2.0569941802306018e-06, + "loss": 0.1726, + "num_input_tokens_seen": 147746624, + "step": 46945 + }, + { + "epoch": 3.0055694257729977, + "grad_norm": 8.497262001037598, + "learning_rate": 2.0564443799319747e-06, + "loss": 0.1319, + "num_input_tokens_seen": 147761792, + "step": 46950 + }, + { + "epoch": 3.0058895077139747, + "grad_norm": 5.642021179199219, + "learning_rate": 2.0558946017822047e-06, + "loss": 0.1215, + "num_input_tokens_seen": 147777984, + "step": 46955 + }, + { + "epoch": 3.0062095896549517, + "grad_norm": 5.066093921661377, + "learning_rate": 2.0553448458087445e-06, + "loss": 0.1655, + "num_input_tokens_seen": 147793600, + "step": 46960 + }, + { + "epoch": 3.0065296715959287, + "grad_norm": 5.544450283050537, + "learning_rate": 2.0547951120390476e-06, + "loss": 0.1754, + "num_input_tokens_seen": 147809728, + "step": 46965 + }, + { + "epoch": 3.0068497535369056, + "grad_norm": 26.67751693725586, + "learning_rate": 2.054245400500563e-06, + "loss": 0.0864, + "num_input_tokens_seen": 147825600, + "step": 46970 + }, + { + "epoch": 3.007169835477882, + "grad_norm": 59.55647659301758, + "learning_rate": 2.0536957112207424e-06, + "loss": 0.1677, + "num_input_tokens_seen": 147841152, + "step": 46975 + }, + { + "epoch": 3.007489917418859, + "grad_norm": 7.349465847015381, + "learning_rate": 2.0531460442270324e-06, + "loss": 0.1463, + "num_input_tokens_seen": 147856512, + "step": 46980 + }, + { + "epoch": 3.007809999359836, + "grad_norm": 4.4511613845825195, + "learning_rate": 2.0525963995468815e-06, + "loss": 0.0892, + "num_input_tokens_seen": 147873088, + "step": 46985 + }, + { + "epoch": 3.008130081300813, + "grad_norm": 22.8629093170166, + "learning_rate": 2.052046777207736e-06, + "loss": 0.2207, + "num_input_tokens_seen": 147888448, + "step": 46990 + }, + { + "epoch": 3.0084501632417897, + "grad_norm": 7.875115394592285, + "learning_rate": 2.0514971772370413e-06, + "loss": 0.1632, + "num_input_tokens_seen": 147902464, + "step": 46995 + }, + { + "epoch": 3.0087702451827667, + "grad_norm": 7.973884582519531, + "learning_rate": 2.050947599662241e-06, + "loss": 0.1545, + "num_input_tokens_seen": 147918272, + "step": 47000 + }, + { + "epoch": 3.0090903271237437, + "grad_norm": 6.374778747558594, + "learning_rate": 2.0503980445107786e-06, + "loss": 0.1162, + "num_input_tokens_seen": 147933696, + "step": 47005 + }, + { + "epoch": 3.0094104090647207, + "grad_norm": 10.419112205505371, + "learning_rate": 2.049848511810096e-06, + "loss": 0.1201, + "num_input_tokens_seen": 147949376, + "step": 47010 + }, + { + "epoch": 3.0097304910056977, + "grad_norm": 5.112634181976318, + "learning_rate": 2.049299001587634e-06, + "loss": 0.193, + "num_input_tokens_seen": 147964928, + "step": 47015 + }, + { + "epoch": 3.010050572946674, + "grad_norm": 5.6585893630981445, + "learning_rate": 2.0487495138708328e-06, + "loss": 0.1333, + "num_input_tokens_seen": 147980672, + "step": 47020 + }, + { + "epoch": 3.010370654887651, + "grad_norm": 12.131438255310059, + "learning_rate": 2.0482000486871295e-06, + "loss": 0.2778, + "num_input_tokens_seen": 147996992, + "step": 47025 + }, + { + "epoch": 3.010690736828628, + "grad_norm": 6.101233005523682, + "learning_rate": 2.0476506060639633e-06, + "loss": 0.1639, + "num_input_tokens_seen": 148012736, + "step": 47030 + }, + { + "epoch": 3.011010818769605, + "grad_norm": 4.144140243530273, + "learning_rate": 2.047101186028769e-06, + "loss": 0.1334, + "num_input_tokens_seen": 148028544, + "step": 47035 + }, + { + "epoch": 3.0113309007105817, + "grad_norm": 6.675116539001465, + "learning_rate": 2.046551788608983e-06, + "loss": 0.0719, + "num_input_tokens_seen": 148043392, + "step": 47040 + }, + { + "epoch": 3.0116509826515587, + "grad_norm": 3.278372287750244, + "learning_rate": 2.0460024138320377e-06, + "loss": 0.1151, + "num_input_tokens_seen": 148058368, + "step": 47045 + }, + { + "epoch": 3.0119710645925357, + "grad_norm": 12.057754516601562, + "learning_rate": 2.0454530617253686e-06, + "loss": 0.1467, + "num_input_tokens_seen": 148072256, + "step": 47050 + }, + { + "epoch": 3.0122911465335127, + "grad_norm": 3.0360124111175537, + "learning_rate": 2.0449037323164046e-06, + "loss": 0.1342, + "num_input_tokens_seen": 148087616, + "step": 47055 + }, + { + "epoch": 3.0126112284744893, + "grad_norm": 6.825588703155518, + "learning_rate": 2.0443544256325786e-06, + "loss": 0.1396, + "num_input_tokens_seen": 148103680, + "step": 47060 + }, + { + "epoch": 3.0129313104154662, + "grad_norm": 9.76211929321289, + "learning_rate": 2.0438051417013184e-06, + "loss": 0.1688, + "num_input_tokens_seen": 148118848, + "step": 47065 + }, + { + "epoch": 3.0132513923564432, + "grad_norm": 9.253535270690918, + "learning_rate": 2.043255880550054e-06, + "loss": 0.2015, + "num_input_tokens_seen": 148135296, + "step": 47070 + }, + { + "epoch": 3.0135714742974202, + "grad_norm": 5.793148994445801, + "learning_rate": 2.0427066422062107e-06, + "loss": 0.1305, + "num_input_tokens_seen": 148151040, + "step": 47075 + }, + { + "epoch": 3.013891556238397, + "grad_norm": 8.427189826965332, + "learning_rate": 2.042157426697216e-06, + "loss": 0.2057, + "num_input_tokens_seen": 148166784, + "step": 47080 + }, + { + "epoch": 3.0142116381793738, + "grad_norm": 7.398636341094971, + "learning_rate": 2.041608234050496e-06, + "loss": 0.1627, + "num_input_tokens_seen": 148182912, + "step": 47085 + }, + { + "epoch": 3.0145317201203508, + "grad_norm": 8.25279712677002, + "learning_rate": 2.0410590642934715e-06, + "loss": 0.1708, + "num_input_tokens_seen": 148198336, + "step": 47090 + }, + { + "epoch": 3.0148518020613277, + "grad_norm": 9.5991849899292, + "learning_rate": 2.040509917453568e-06, + "loss": 0.1436, + "num_input_tokens_seen": 148214464, + "step": 47095 + }, + { + "epoch": 3.0151718840023047, + "grad_norm": 6.884991645812988, + "learning_rate": 2.039960793558205e-06, + "loss": 0.118, + "num_input_tokens_seen": 148229760, + "step": 47100 + }, + { + "epoch": 3.0154919659432813, + "grad_norm": 19.95142364501953, + "learning_rate": 2.039411692634804e-06, + "loss": 0.141, + "num_input_tokens_seen": 148246208, + "step": 47105 + }, + { + "epoch": 3.0158120478842583, + "grad_norm": 7.709207057952881, + "learning_rate": 2.0388626147107837e-06, + "loss": 0.1982, + "num_input_tokens_seen": 148261312, + "step": 47110 + }, + { + "epoch": 3.0161321298252353, + "grad_norm": 8.627106666564941, + "learning_rate": 2.0383135598135635e-06, + "loss": 0.1154, + "num_input_tokens_seen": 148277824, + "step": 47115 + }, + { + "epoch": 3.0164522117662123, + "grad_norm": 11.03551959991455, + "learning_rate": 2.0377645279705583e-06, + "loss": 0.1589, + "num_input_tokens_seen": 148292480, + "step": 47120 + }, + { + "epoch": 3.0167722937071892, + "grad_norm": 8.620903968811035, + "learning_rate": 2.037215519209185e-06, + "loss": 0.1566, + "num_input_tokens_seen": 148308544, + "step": 47125 + }, + { + "epoch": 3.017092375648166, + "grad_norm": 8.248734474182129, + "learning_rate": 2.0366665335568576e-06, + "loss": 0.182, + "num_input_tokens_seen": 148323840, + "step": 47130 + }, + { + "epoch": 3.017412457589143, + "grad_norm": 3.8033738136291504, + "learning_rate": 2.0361175710409902e-06, + "loss": 0.1763, + "num_input_tokens_seen": 148339264, + "step": 47135 + }, + { + "epoch": 3.0177325395301198, + "grad_norm": 6.122775554656982, + "learning_rate": 2.0355686316889964e-06, + "loss": 0.2021, + "num_input_tokens_seen": 148354944, + "step": 47140 + }, + { + "epoch": 3.0180526214710968, + "grad_norm": 6.883595943450928, + "learning_rate": 2.0350197155282848e-06, + "loss": 0.1247, + "num_input_tokens_seen": 148370880, + "step": 47145 + }, + { + "epoch": 3.0183727034120733, + "grad_norm": 3.990628719329834, + "learning_rate": 2.034470822586267e-06, + "loss": 0.1669, + "num_input_tokens_seen": 148386368, + "step": 47150 + }, + { + "epoch": 3.0186927853530503, + "grad_norm": 72.85891723632812, + "learning_rate": 2.0339219528903508e-06, + "loss": 0.205, + "num_input_tokens_seen": 148401600, + "step": 47155 + }, + { + "epoch": 3.0190128672940273, + "grad_norm": 3.7758395671844482, + "learning_rate": 2.033373106467946e-06, + "loss": 0.216, + "num_input_tokens_seen": 148416576, + "step": 47160 + }, + { + "epoch": 3.0193329492350043, + "grad_norm": 5.2030029296875, + "learning_rate": 2.0328242833464563e-06, + "loss": 0.164, + "num_input_tokens_seen": 148431872, + "step": 47165 + }, + { + "epoch": 3.019653031175981, + "grad_norm": 10.366077423095703, + "learning_rate": 2.0322754835532897e-06, + "loss": 0.1878, + "num_input_tokens_seen": 148446784, + "step": 47170 + }, + { + "epoch": 3.019973113116958, + "grad_norm": 8.4495849609375, + "learning_rate": 2.0317267071158482e-06, + "loss": 0.132, + "num_input_tokens_seen": 148462592, + "step": 47175 + }, + { + "epoch": 3.020293195057935, + "grad_norm": 9.921636581420898, + "learning_rate": 2.031177954061536e-06, + "loss": 0.1745, + "num_input_tokens_seen": 148477632, + "step": 47180 + }, + { + "epoch": 3.020613276998912, + "grad_norm": 10.115055084228516, + "learning_rate": 2.0306292244177543e-06, + "loss": 0.1788, + "num_input_tokens_seen": 148493312, + "step": 47185 + }, + { + "epoch": 3.020933358939889, + "grad_norm": 6.002085208892822, + "learning_rate": 2.0300805182119045e-06, + "loss": 0.1414, + "num_input_tokens_seen": 148510144, + "step": 47190 + }, + { + "epoch": 3.0212534408808653, + "grad_norm": 5.797027111053467, + "learning_rate": 2.0295318354713867e-06, + "loss": 0.1231, + "num_input_tokens_seen": 148524480, + "step": 47195 + }, + { + "epoch": 3.0215735228218423, + "grad_norm": 11.03966999053955, + "learning_rate": 2.0289831762235976e-06, + "loss": 0.1774, + "num_input_tokens_seen": 148540416, + "step": 47200 + }, + { + "epoch": 3.0218936047628193, + "grad_norm": 8.119213104248047, + "learning_rate": 2.0284345404959364e-06, + "loss": 0.1439, + "num_input_tokens_seen": 148556288, + "step": 47205 + }, + { + "epoch": 3.0222136867037963, + "grad_norm": 9.015398979187012, + "learning_rate": 2.0278859283157966e-06, + "loss": 0.1966, + "num_input_tokens_seen": 148571712, + "step": 47210 + }, + { + "epoch": 3.022533768644773, + "grad_norm": 6.044960975646973, + "learning_rate": 2.027337339710575e-06, + "loss": 0.1737, + "num_input_tokens_seen": 148587264, + "step": 47215 + }, + { + "epoch": 3.02285385058575, + "grad_norm": 6.639914035797119, + "learning_rate": 2.0267887747076642e-06, + "loss": 0.1391, + "num_input_tokens_seen": 148602816, + "step": 47220 + }, + { + "epoch": 3.023173932526727, + "grad_norm": 3.0005838871002197, + "learning_rate": 2.0262402333344585e-06, + "loss": 0.1438, + "num_input_tokens_seen": 148618368, + "step": 47225 + }, + { + "epoch": 3.023494014467704, + "grad_norm": 4.163475513458252, + "learning_rate": 2.025691715618346e-06, + "loss": 0.1759, + "num_input_tokens_seen": 148632960, + "step": 47230 + }, + { + "epoch": 3.023814096408681, + "grad_norm": 3.8361096382141113, + "learning_rate": 2.0251432215867197e-06, + "loss": 0.148, + "num_input_tokens_seen": 148648000, + "step": 47235 + }, + { + "epoch": 3.0241341783496574, + "grad_norm": 6.066221237182617, + "learning_rate": 2.0245947512669668e-06, + "loss": 0.1652, + "num_input_tokens_seen": 148664832, + "step": 47240 + }, + { + "epoch": 3.0244542602906344, + "grad_norm": 5.574875354766846, + "learning_rate": 2.024046304686477e-06, + "loss": 0.1096, + "num_input_tokens_seen": 148680384, + "step": 47245 + }, + { + "epoch": 3.0247743422316113, + "grad_norm": 8.913908004760742, + "learning_rate": 2.023497881872634e-06, + "loss": 0.1457, + "num_input_tokens_seen": 148695424, + "step": 47250 + }, + { + "epoch": 3.0250944241725883, + "grad_norm": 4.021956920623779, + "learning_rate": 2.0229494828528252e-06, + "loss": 0.1612, + "num_input_tokens_seen": 148710016, + "step": 47255 + }, + { + "epoch": 3.025414506113565, + "grad_norm": 5.3656229972839355, + "learning_rate": 2.0224011076544352e-06, + "loss": 0.1602, + "num_input_tokens_seen": 148725632, + "step": 47260 + }, + { + "epoch": 3.025734588054542, + "grad_norm": 9.500147819519043, + "learning_rate": 2.0218527563048453e-06, + "loss": 0.1816, + "num_input_tokens_seen": 148741632, + "step": 47265 + }, + { + "epoch": 3.026054669995519, + "grad_norm": 4.207950115203857, + "learning_rate": 2.0213044288314386e-06, + "loss": 0.1029, + "num_input_tokens_seen": 148757248, + "step": 47270 + }, + { + "epoch": 3.026374751936496, + "grad_norm": 11.106679916381836, + "learning_rate": 2.0207561252615953e-06, + "loss": 0.1873, + "num_input_tokens_seen": 148772864, + "step": 47275 + }, + { + "epoch": 3.026694833877473, + "grad_norm": 6.288750648498535, + "learning_rate": 2.020207845622695e-06, + "loss": 0.0767, + "num_input_tokens_seen": 148789440, + "step": 47280 + }, + { + "epoch": 3.0270149158184494, + "grad_norm": 5.796869277954102, + "learning_rate": 2.019659589942115e-06, + "loss": 0.1261, + "num_input_tokens_seen": 148805120, + "step": 47285 + }, + { + "epoch": 3.0273349977594264, + "grad_norm": 4.302918910980225, + "learning_rate": 2.019111358247234e-06, + "loss": 0.1392, + "num_input_tokens_seen": 148820544, + "step": 47290 + }, + { + "epoch": 3.0276550797004034, + "grad_norm": 10.964800834655762, + "learning_rate": 2.0185631505654262e-06, + "loss": 0.1823, + "num_input_tokens_seen": 148835264, + "step": 47295 + }, + { + "epoch": 3.0279751616413804, + "grad_norm": 8.841522216796875, + "learning_rate": 2.0180149669240675e-06, + "loss": 0.138, + "num_input_tokens_seen": 148850368, + "step": 47300 + }, + { + "epoch": 3.028295243582357, + "grad_norm": 5.996749401092529, + "learning_rate": 2.0174668073505303e-06, + "loss": 0.1199, + "num_input_tokens_seen": 148866304, + "step": 47305 + }, + { + "epoch": 3.028615325523334, + "grad_norm": 4.018023490905762, + "learning_rate": 2.016918671872187e-06, + "loss": 0.1164, + "num_input_tokens_seen": 148881408, + "step": 47310 + }, + { + "epoch": 3.028935407464311, + "grad_norm": 6.141058921813965, + "learning_rate": 2.01637056051641e-06, + "loss": 0.1429, + "num_input_tokens_seen": 148896192, + "step": 47315 + }, + { + "epoch": 3.029255489405288, + "grad_norm": 4.70374870300293, + "learning_rate": 2.015822473310567e-06, + "loss": 0.1199, + "num_input_tokens_seen": 148911552, + "step": 47320 + }, + { + "epoch": 3.0295755713462644, + "grad_norm": 2.839017629623413, + "learning_rate": 2.0152744102820286e-06, + "loss": 0.0901, + "num_input_tokens_seen": 148927040, + "step": 47325 + }, + { + "epoch": 3.0298956532872414, + "grad_norm": 4.658024311065674, + "learning_rate": 2.0147263714581606e-06, + "loss": 0.1823, + "num_input_tokens_seen": 148943808, + "step": 47330 + }, + { + "epoch": 3.0302157352282184, + "grad_norm": 13.222366333007812, + "learning_rate": 2.0141783568663304e-06, + "loss": 0.2101, + "num_input_tokens_seen": 148958464, + "step": 47335 + }, + { + "epoch": 3.0305358171691954, + "grad_norm": 7.360287666320801, + "learning_rate": 2.013630366533902e-06, + "loss": 0.2033, + "num_input_tokens_seen": 148974528, + "step": 47340 + }, + { + "epoch": 3.0308558991101724, + "grad_norm": 9.281476020812988, + "learning_rate": 2.013082400488241e-06, + "loss": 0.1524, + "num_input_tokens_seen": 148990016, + "step": 47345 + }, + { + "epoch": 3.031175981051149, + "grad_norm": 8.60020637512207, + "learning_rate": 2.012534458756707e-06, + "loss": 0.1503, + "num_input_tokens_seen": 149006016, + "step": 47350 + }, + { + "epoch": 3.031496062992126, + "grad_norm": 5.654721260070801, + "learning_rate": 2.0119865413666638e-06, + "loss": 0.1339, + "num_input_tokens_seen": 149021824, + "step": 47355 + }, + { + "epoch": 3.031816144933103, + "grad_norm": 7.716291904449463, + "learning_rate": 2.0114386483454708e-06, + "loss": 0.1924, + "num_input_tokens_seen": 149037376, + "step": 47360 + }, + { + "epoch": 3.03213622687408, + "grad_norm": 16.364582061767578, + "learning_rate": 2.010890779720486e-06, + "loss": 0.1623, + "num_input_tokens_seen": 149053888, + "step": 47365 + }, + { + "epoch": 3.0324563088150565, + "grad_norm": 6.828835964202881, + "learning_rate": 2.0103429355190695e-06, + "loss": 0.1361, + "num_input_tokens_seen": 149069248, + "step": 47370 + }, + { + "epoch": 3.0327763907560334, + "grad_norm": 4.6212029457092285, + "learning_rate": 2.009795115768575e-06, + "loss": 0.1533, + "num_input_tokens_seen": 149084480, + "step": 47375 + }, + { + "epoch": 3.0330964726970104, + "grad_norm": 8.59306812286377, + "learning_rate": 2.00924732049636e-06, + "loss": 0.2616, + "num_input_tokens_seen": 149099712, + "step": 47380 + }, + { + "epoch": 3.0334165546379874, + "grad_norm": 16.104183197021484, + "learning_rate": 2.0086995497297764e-06, + "loss": 0.1906, + "num_input_tokens_seen": 149114880, + "step": 47385 + }, + { + "epoch": 3.0337366365789644, + "grad_norm": 7.637288570404053, + "learning_rate": 2.0081518034961795e-06, + "loss": 0.1628, + "num_input_tokens_seen": 149131072, + "step": 47390 + }, + { + "epoch": 3.034056718519941, + "grad_norm": 4.410763740539551, + "learning_rate": 2.0076040818229187e-06, + "loss": 0.1103, + "num_input_tokens_seen": 149147200, + "step": 47395 + }, + { + "epoch": 3.034376800460918, + "grad_norm": 17.07895278930664, + "learning_rate": 2.007056384737346e-06, + "loss": 0.155, + "num_input_tokens_seen": 149162944, + "step": 47400 + }, + { + "epoch": 3.034696882401895, + "grad_norm": 7.061725616455078, + "learning_rate": 2.006508712266809e-06, + "loss": 0.1416, + "num_input_tokens_seen": 149178432, + "step": 47405 + }, + { + "epoch": 3.035016964342872, + "grad_norm": 6.788725852966309, + "learning_rate": 2.005961064438657e-06, + "loss": 0.1509, + "num_input_tokens_seen": 149194240, + "step": 47410 + }, + { + "epoch": 3.0353370462838485, + "grad_norm": 3.981688976287842, + "learning_rate": 2.005413441280235e-06, + "loss": 0.1214, + "num_input_tokens_seen": 149210560, + "step": 47415 + }, + { + "epoch": 3.0356571282248255, + "grad_norm": 9.21435260772705, + "learning_rate": 2.004865842818891e-06, + "loss": 0.2117, + "num_input_tokens_seen": 149225728, + "step": 47420 + }, + { + "epoch": 3.0359772101658025, + "grad_norm": 9.407035827636719, + "learning_rate": 2.004318269081967e-06, + "loss": 0.1594, + "num_input_tokens_seen": 149241664, + "step": 47425 + }, + { + "epoch": 3.0362972921067795, + "grad_norm": 6.511951923370361, + "learning_rate": 2.0037707200968064e-06, + "loss": 0.1372, + "num_input_tokens_seen": 149257536, + "step": 47430 + }, + { + "epoch": 3.036617374047756, + "grad_norm": 10.065853118896484, + "learning_rate": 2.0032231958907527e-06, + "loss": 0.1418, + "num_input_tokens_seen": 149272704, + "step": 47435 + }, + { + "epoch": 3.036937455988733, + "grad_norm": 10.708297729492188, + "learning_rate": 2.0026756964911434e-06, + "loss": 0.1928, + "num_input_tokens_seen": 149288384, + "step": 47440 + }, + { + "epoch": 3.03725753792971, + "grad_norm": 16.34319496154785, + "learning_rate": 2.00212822192532e-06, + "loss": 0.1998, + "num_input_tokens_seen": 149303168, + "step": 47445 + }, + { + "epoch": 3.037577619870687, + "grad_norm": 8.093623161315918, + "learning_rate": 2.0015807722206196e-06, + "loss": 0.1389, + "num_input_tokens_seen": 149318336, + "step": 47450 + }, + { + "epoch": 3.037897701811664, + "grad_norm": 4.919217586517334, + "learning_rate": 2.00103334740438e-06, + "loss": 0.0876, + "num_input_tokens_seen": 149333504, + "step": 47455 + }, + { + "epoch": 3.0382177837526405, + "grad_norm": 8.08926773071289, + "learning_rate": 2.000485947503935e-06, + "loss": 0.163, + "num_input_tokens_seen": 149350656, + "step": 47460 + }, + { + "epoch": 3.0385378656936175, + "grad_norm": 5.636338710784912, + "learning_rate": 1.9999385725466207e-06, + "loss": 0.1282, + "num_input_tokens_seen": 149365760, + "step": 47465 + }, + { + "epoch": 3.0388579476345945, + "grad_norm": 5.978944778442383, + "learning_rate": 1.9993912225597685e-06, + "loss": 0.1662, + "num_input_tokens_seen": 149380736, + "step": 47470 + }, + { + "epoch": 3.0391780295755715, + "grad_norm": 11.938923835754395, + "learning_rate": 1.9988438975707127e-06, + "loss": 0.1963, + "num_input_tokens_seen": 149396608, + "step": 47475 + }, + { + "epoch": 3.039498111516548, + "grad_norm": 73.31082153320312, + "learning_rate": 1.9982965976067808e-06, + "loss": 0.2719, + "num_input_tokens_seen": 149411712, + "step": 47480 + }, + { + "epoch": 3.039818193457525, + "grad_norm": 4.028939723968506, + "learning_rate": 1.997749322695303e-06, + "loss": 0.1177, + "num_input_tokens_seen": 149428992, + "step": 47485 + }, + { + "epoch": 3.040138275398502, + "grad_norm": 5.2650065422058105, + "learning_rate": 1.99720207286361e-06, + "loss": 0.1604, + "num_input_tokens_seen": 149445568, + "step": 47490 + }, + { + "epoch": 3.040458357339479, + "grad_norm": 2.049490451812744, + "learning_rate": 1.9966548481390248e-06, + "loss": 0.1245, + "num_input_tokens_seen": 149461568, + "step": 47495 + }, + { + "epoch": 3.040778439280456, + "grad_norm": 7.814560413360596, + "learning_rate": 1.9961076485488753e-06, + "loss": 0.1583, + "num_input_tokens_seen": 149477440, + "step": 47500 + }, + { + "epoch": 3.0410985212214325, + "grad_norm": 26.62441635131836, + "learning_rate": 1.995560474120484e-06, + "loss": 0.1525, + "num_input_tokens_seen": 149494464, + "step": 47505 + }, + { + "epoch": 3.0414186031624095, + "grad_norm": 8.823613166809082, + "learning_rate": 1.995013324881177e-06, + "loss": 0.2602, + "num_input_tokens_seen": 149511616, + "step": 47510 + }, + { + "epoch": 3.0417386851033865, + "grad_norm": 8.186843872070312, + "learning_rate": 1.9944662008582734e-06, + "loss": 0.1628, + "num_input_tokens_seen": 149528128, + "step": 47515 + }, + { + "epoch": 3.0420587670443635, + "grad_norm": 5.636887073516846, + "learning_rate": 1.9939191020790944e-06, + "loss": 0.1647, + "num_input_tokens_seen": 149543232, + "step": 47520 + }, + { + "epoch": 3.04237884898534, + "grad_norm": 2.0257415771484375, + "learning_rate": 1.9933720285709587e-06, + "loss": 0.0676, + "num_input_tokens_seen": 149559040, + "step": 47525 + }, + { + "epoch": 3.042698930926317, + "grad_norm": 3.6817240715026855, + "learning_rate": 1.992824980361186e-06, + "loss": 0.2869, + "num_input_tokens_seen": 149575360, + "step": 47530 + }, + { + "epoch": 3.043019012867294, + "grad_norm": 3.5703084468841553, + "learning_rate": 1.9922779574770913e-06, + "loss": 0.1776, + "num_input_tokens_seen": 149591104, + "step": 47535 + }, + { + "epoch": 3.043339094808271, + "grad_norm": 6.723302841186523, + "learning_rate": 1.9917309599459906e-06, + "loss": 0.1012, + "num_input_tokens_seen": 149619904, + "step": 47540 + }, + { + "epoch": 3.043659176749248, + "grad_norm": 5.188940525054932, + "learning_rate": 1.9911839877951997e-06, + "loss": 0.2155, + "num_input_tokens_seen": 149635328, + "step": 47545 + }, + { + "epoch": 3.0439792586902246, + "grad_norm": 7.6852946281433105, + "learning_rate": 1.9906370410520286e-06, + "loss": 0.1333, + "num_input_tokens_seen": 149651328, + "step": 47550 + }, + { + "epoch": 3.0442993406312016, + "grad_norm": 5.8347368240356445, + "learning_rate": 1.9900901197437916e-06, + "loss": 0.1287, + "num_input_tokens_seen": 149666560, + "step": 47555 + }, + { + "epoch": 3.0446194225721785, + "grad_norm": 8.30665111541748, + "learning_rate": 1.989543223897797e-06, + "loss": 0.1752, + "num_input_tokens_seen": 149681792, + "step": 47560 + }, + { + "epoch": 3.0449395045131555, + "grad_norm": 2.493302822113037, + "learning_rate": 1.988996353541356e-06, + "loss": 0.1434, + "num_input_tokens_seen": 149698432, + "step": 47565 + }, + { + "epoch": 3.045259586454132, + "grad_norm": 4.3040642738342285, + "learning_rate": 1.9884495087017743e-06, + "loss": 0.1517, + "num_input_tokens_seen": 149715264, + "step": 47570 + }, + { + "epoch": 3.045579668395109, + "grad_norm": 9.025083541870117, + "learning_rate": 1.9879026894063606e-06, + "loss": 0.1653, + "num_input_tokens_seen": 149731264, + "step": 47575 + }, + { + "epoch": 3.045899750336086, + "grad_norm": 9.754730224609375, + "learning_rate": 1.9873558956824185e-06, + "loss": 0.1442, + "num_input_tokens_seen": 149747584, + "step": 47580 + }, + { + "epoch": 3.046219832277063, + "grad_norm": 6.654227256774902, + "learning_rate": 1.9868091275572528e-06, + "loss": 0.1226, + "num_input_tokens_seen": 149763712, + "step": 47585 + }, + { + "epoch": 3.0465399142180396, + "grad_norm": 14.42161750793457, + "learning_rate": 1.986262385058166e-06, + "loss": 0.1999, + "num_input_tokens_seen": 149779712, + "step": 47590 + }, + { + "epoch": 3.0468599961590166, + "grad_norm": 3.397545099258423, + "learning_rate": 1.985715668212461e-06, + "loss": 0.1887, + "num_input_tokens_seen": 149796160, + "step": 47595 + }, + { + "epoch": 3.0471800780999936, + "grad_norm": 8.60867691040039, + "learning_rate": 1.9851689770474348e-06, + "loss": 0.1351, + "num_input_tokens_seen": 149811136, + "step": 47600 + }, + { + "epoch": 3.0475001600409706, + "grad_norm": 7.574692249298096, + "learning_rate": 1.984622311590389e-06, + "loss": 0.1378, + "num_input_tokens_seen": 149827072, + "step": 47605 + }, + { + "epoch": 3.0478202419819476, + "grad_norm": 8.342270851135254, + "learning_rate": 1.98407567186862e-06, + "loss": 0.1605, + "num_input_tokens_seen": 149844288, + "step": 47610 + }, + { + "epoch": 3.048140323922924, + "grad_norm": 33.34131622314453, + "learning_rate": 1.983529057909425e-06, + "loss": 0.2217, + "num_input_tokens_seen": 149859008, + "step": 47615 + }, + { + "epoch": 3.048460405863901, + "grad_norm": 6.51214075088501, + "learning_rate": 1.9829824697400994e-06, + "loss": 0.1383, + "num_input_tokens_seen": 149874880, + "step": 47620 + }, + { + "epoch": 3.048780487804878, + "grad_norm": 39.51240921020508, + "learning_rate": 1.982435907387935e-06, + "loss": 0.2013, + "num_input_tokens_seen": 149890496, + "step": 47625 + }, + { + "epoch": 3.049100569745855, + "grad_norm": 107.63813018798828, + "learning_rate": 1.981889370880227e-06, + "loss": 0.2065, + "num_input_tokens_seen": 149905344, + "step": 47630 + }, + { + "epoch": 3.0494206516868316, + "grad_norm": 3.6963465213775635, + "learning_rate": 1.9813428602442636e-06, + "loss": 0.1493, + "num_input_tokens_seen": 149920768, + "step": 47635 + }, + { + "epoch": 3.0497407336278086, + "grad_norm": 3.5485332012176514, + "learning_rate": 1.980796375507337e-06, + "loss": 0.1375, + "num_input_tokens_seen": 149936064, + "step": 47640 + }, + { + "epoch": 3.0500608155687856, + "grad_norm": 6.617356300354004, + "learning_rate": 1.980249916696734e-06, + "loss": 0.1064, + "num_input_tokens_seen": 149950592, + "step": 47645 + }, + { + "epoch": 3.0503808975097626, + "grad_norm": 8.744461059570312, + "learning_rate": 1.9797034838397446e-06, + "loss": 0.1358, + "num_input_tokens_seen": 149965056, + "step": 47650 + }, + { + "epoch": 3.0507009794507396, + "grad_norm": 13.046825408935547, + "learning_rate": 1.9791570769636514e-06, + "loss": 0.1829, + "num_input_tokens_seen": 149981184, + "step": 47655 + }, + { + "epoch": 3.051021061391716, + "grad_norm": 6.335928440093994, + "learning_rate": 1.9786106960957412e-06, + "loss": 0.1375, + "num_input_tokens_seen": 149996544, + "step": 47660 + }, + { + "epoch": 3.051341143332693, + "grad_norm": 5.553415298461914, + "learning_rate": 1.978064341263298e-06, + "loss": 0.1414, + "num_input_tokens_seen": 150013312, + "step": 47665 + }, + { + "epoch": 3.05166122527367, + "grad_norm": 6.711698532104492, + "learning_rate": 1.977518012493602e-06, + "loss": 0.1625, + "num_input_tokens_seen": 150030016, + "step": 47670 + }, + { + "epoch": 3.051981307214647, + "grad_norm": 9.717565536499023, + "learning_rate": 1.976971709813935e-06, + "loss": 0.1525, + "num_input_tokens_seen": 150045056, + "step": 47675 + }, + { + "epoch": 3.0523013891556237, + "grad_norm": 6.893523693084717, + "learning_rate": 1.9764254332515765e-06, + "loss": 0.114, + "num_input_tokens_seen": 150060480, + "step": 47680 + }, + { + "epoch": 3.0526214710966006, + "grad_norm": 23.1082763671875, + "learning_rate": 1.9758791828338053e-06, + "loss": 0.2572, + "num_input_tokens_seen": 150077120, + "step": 47685 + }, + { + "epoch": 3.0529415530375776, + "grad_norm": 8.982189178466797, + "learning_rate": 1.975332958587897e-06, + "loss": 0.1731, + "num_input_tokens_seen": 150093952, + "step": 47690 + }, + { + "epoch": 3.0532616349785546, + "grad_norm": 7.5522260665893555, + "learning_rate": 1.9747867605411282e-06, + "loss": 0.1819, + "num_input_tokens_seen": 150109504, + "step": 47695 + }, + { + "epoch": 3.053581716919531, + "grad_norm": 6.027405261993408, + "learning_rate": 1.974240588720772e-06, + "loss": 0.1143, + "num_input_tokens_seen": 150127552, + "step": 47700 + }, + { + "epoch": 3.053901798860508, + "grad_norm": 5.610767841339111, + "learning_rate": 1.9736944431541038e-06, + "loss": 0.1824, + "num_input_tokens_seen": 150142720, + "step": 47705 + }, + { + "epoch": 3.054221880801485, + "grad_norm": 6.191157817840576, + "learning_rate": 1.973148323868392e-06, + "loss": 0.1491, + "num_input_tokens_seen": 150157504, + "step": 47710 + }, + { + "epoch": 3.054541962742462, + "grad_norm": 31.756052017211914, + "learning_rate": 1.9726022308909095e-06, + "loss": 0.1599, + "num_input_tokens_seen": 150172992, + "step": 47715 + }, + { + "epoch": 3.054862044683439, + "grad_norm": 17.463008880615234, + "learning_rate": 1.9720561642489247e-06, + "loss": 0.1498, + "num_input_tokens_seen": 150188992, + "step": 47720 + }, + { + "epoch": 3.0551821266244157, + "grad_norm": 5.790544509887695, + "learning_rate": 1.971510123969704e-06, + "loss": 0.1453, + "num_input_tokens_seen": 150205952, + "step": 47725 + }, + { + "epoch": 3.0555022085653927, + "grad_norm": 6.048426628112793, + "learning_rate": 1.9709641100805163e-06, + "loss": 0.1228, + "num_input_tokens_seen": 150221248, + "step": 47730 + }, + { + "epoch": 3.0558222905063697, + "grad_norm": 4.738977432250977, + "learning_rate": 1.970418122608624e-06, + "loss": 0.1354, + "num_input_tokens_seen": 150236672, + "step": 47735 + }, + { + "epoch": 3.0561423724473467, + "grad_norm": 11.129562377929688, + "learning_rate": 1.969872161581294e-06, + "loss": 0.155, + "num_input_tokens_seen": 150252672, + "step": 47740 + }, + { + "epoch": 3.056462454388323, + "grad_norm": 18.555599212646484, + "learning_rate": 1.9693262270257853e-06, + "loss": 0.1819, + "num_input_tokens_seen": 150269056, + "step": 47745 + }, + { + "epoch": 3.0567825363293, + "grad_norm": 11.489178657531738, + "learning_rate": 1.968780318969361e-06, + "loss": 0.2936, + "num_input_tokens_seen": 150284416, + "step": 47750 + }, + { + "epoch": 3.057102618270277, + "grad_norm": 1.4345465898513794, + "learning_rate": 1.9682344374392805e-06, + "loss": 0.1472, + "num_input_tokens_seen": 150299776, + "step": 47755 + }, + { + "epoch": 3.057422700211254, + "grad_norm": 4.37770414352417, + "learning_rate": 1.967688582462803e-06, + "loss": 0.0795, + "num_input_tokens_seen": 150315264, + "step": 47760 + }, + { + "epoch": 3.057742782152231, + "grad_norm": 34.924644470214844, + "learning_rate": 1.9671427540671838e-06, + "loss": 0.1907, + "num_input_tokens_seen": 150331200, + "step": 47765 + }, + { + "epoch": 3.0580628640932077, + "grad_norm": 9.4660062789917, + "learning_rate": 1.9665969522796813e-06, + "loss": 0.1474, + "num_input_tokens_seen": 150347072, + "step": 47770 + }, + { + "epoch": 3.0583829460341847, + "grad_norm": 2.4678943157196045, + "learning_rate": 1.9660511771275474e-06, + "loss": 0.1164, + "num_input_tokens_seen": 150363392, + "step": 47775 + }, + { + "epoch": 3.0587030279751617, + "grad_norm": 7.9056878089904785, + "learning_rate": 1.9655054286380367e-06, + "loss": 0.2084, + "num_input_tokens_seen": 150379008, + "step": 47780 + }, + { + "epoch": 3.0590231099161387, + "grad_norm": 8.891844749450684, + "learning_rate": 1.9649597068384012e-06, + "loss": 0.2199, + "num_input_tokens_seen": 150394624, + "step": 47785 + }, + { + "epoch": 3.0593431918571152, + "grad_norm": 11.53364372253418, + "learning_rate": 1.9644140117558904e-06, + "loss": 0.1773, + "num_input_tokens_seen": 150410432, + "step": 47790 + }, + { + "epoch": 3.059663273798092, + "grad_norm": 7.725661277770996, + "learning_rate": 1.9638683434177554e-06, + "loss": 0.1238, + "num_input_tokens_seen": 150425216, + "step": 47795 + }, + { + "epoch": 3.059983355739069, + "grad_norm": 6.768837928771973, + "learning_rate": 1.9633227018512414e-06, + "loss": 0.1551, + "num_input_tokens_seen": 150440192, + "step": 47800 + }, + { + "epoch": 3.060303437680046, + "grad_norm": 12.157726287841797, + "learning_rate": 1.9627770870835976e-06, + "loss": 0.1245, + "num_input_tokens_seen": 150456128, + "step": 47805 + }, + { + "epoch": 3.060623519621023, + "grad_norm": 82.02564239501953, + "learning_rate": 1.962231499142066e-06, + "loss": 0.0953, + "num_input_tokens_seen": 150473088, + "step": 47810 + }, + { + "epoch": 3.0609436015619997, + "grad_norm": 8.664937019348145, + "learning_rate": 1.9616859380538936e-06, + "loss": 0.1509, + "num_input_tokens_seen": 150487744, + "step": 47815 + }, + { + "epoch": 3.0612636835029767, + "grad_norm": 5.6081624031066895, + "learning_rate": 1.9611404038463205e-06, + "loss": 0.1472, + "num_input_tokens_seen": 150504000, + "step": 47820 + }, + { + "epoch": 3.0615837654439537, + "grad_norm": 83.44660186767578, + "learning_rate": 1.96059489654659e-06, + "loss": 0.118, + "num_input_tokens_seen": 150518912, + "step": 47825 + }, + { + "epoch": 3.0619038473849307, + "grad_norm": 11.3397216796875, + "learning_rate": 1.9600494161819393e-06, + "loss": 0.1458, + "num_input_tokens_seen": 150535232, + "step": 47830 + }, + { + "epoch": 3.0622239293259073, + "grad_norm": 6.711071491241455, + "learning_rate": 1.9595039627796085e-06, + "loss": 0.1911, + "num_input_tokens_seen": 150550912, + "step": 47835 + }, + { + "epoch": 3.0625440112668842, + "grad_norm": 8.654720306396484, + "learning_rate": 1.9589585363668353e-06, + "loss": 0.1329, + "num_input_tokens_seen": 150566528, + "step": 47840 + }, + { + "epoch": 3.0628640932078612, + "grad_norm": 9.603074073791504, + "learning_rate": 1.9584131369708535e-06, + "loss": 0.2175, + "num_input_tokens_seen": 150581056, + "step": 47845 + }, + { + "epoch": 3.0631841751488382, + "grad_norm": 11.752891540527344, + "learning_rate": 1.9578677646189e-06, + "loss": 0.1842, + "num_input_tokens_seen": 150596416, + "step": 47850 + }, + { + "epoch": 3.063504257089815, + "grad_norm": 9.54983139038086, + "learning_rate": 1.957322419338206e-06, + "loss": 0.1962, + "num_input_tokens_seen": 150612096, + "step": 47855 + }, + { + "epoch": 3.0638243390307918, + "grad_norm": 4.248686790466309, + "learning_rate": 1.956777101156004e-06, + "loss": 0.138, + "num_input_tokens_seen": 150628224, + "step": 47860 + }, + { + "epoch": 3.0641444209717688, + "grad_norm": 6.327320575714111, + "learning_rate": 1.956231810099523e-06, + "loss": 0.1429, + "num_input_tokens_seen": 150644160, + "step": 47865 + }, + { + "epoch": 3.0644645029127457, + "grad_norm": 6.081218242645264, + "learning_rate": 1.9556865461959936e-06, + "loss": 0.1739, + "num_input_tokens_seen": 150660736, + "step": 47870 + }, + { + "epoch": 3.0647845848537227, + "grad_norm": 11.527145385742188, + "learning_rate": 1.9551413094726422e-06, + "loss": 0.1547, + "num_input_tokens_seen": 150676800, + "step": 47875 + }, + { + "epoch": 3.0651046667946993, + "grad_norm": 11.199006080627441, + "learning_rate": 1.9545960999566973e-06, + "loss": 0.2209, + "num_input_tokens_seen": 150692800, + "step": 47880 + }, + { + "epoch": 3.0654247487356763, + "grad_norm": 7.295675277709961, + "learning_rate": 1.9540509176753807e-06, + "loss": 0.131, + "num_input_tokens_seen": 150707776, + "step": 47885 + }, + { + "epoch": 3.0657448306766533, + "grad_norm": 5.376594066619873, + "learning_rate": 1.9535057626559177e-06, + "loss": 0.1623, + "num_input_tokens_seen": 150723008, + "step": 47890 + }, + { + "epoch": 3.0660649126176303, + "grad_norm": 11.226005554199219, + "learning_rate": 1.9529606349255316e-06, + "loss": 0.1904, + "num_input_tokens_seen": 150738944, + "step": 47895 + }, + { + "epoch": 3.066384994558607, + "grad_norm": 13.9881010055542, + "learning_rate": 1.9524155345114406e-06, + "loss": 0.1105, + "num_input_tokens_seen": 150755200, + "step": 47900 + }, + { + "epoch": 3.066705076499584, + "grad_norm": 9.56058406829834, + "learning_rate": 1.951870461440866e-06, + "loss": 0.2196, + "num_input_tokens_seen": 150771136, + "step": 47905 + }, + { + "epoch": 3.067025158440561, + "grad_norm": 7.31786584854126, + "learning_rate": 1.9513254157410253e-06, + "loss": 0.1726, + "num_input_tokens_seen": 150785984, + "step": 47910 + }, + { + "epoch": 3.0673452403815378, + "grad_norm": 8.17006778717041, + "learning_rate": 1.950780397439136e-06, + "loss": 0.1345, + "num_input_tokens_seen": 150803712, + "step": 47915 + }, + { + "epoch": 3.0676653223225148, + "grad_norm": 6.377230167388916, + "learning_rate": 1.9502354065624124e-06, + "loss": 0.1882, + "num_input_tokens_seen": 150819712, + "step": 47920 + }, + { + "epoch": 3.0679854042634913, + "grad_norm": 6.984063148498535, + "learning_rate": 1.949690443138069e-06, + "loss": 0.1113, + "num_input_tokens_seen": 150835776, + "step": 47925 + }, + { + "epoch": 3.0683054862044683, + "grad_norm": 8.833870887756348, + "learning_rate": 1.949145507193318e-06, + "loss": 0.1513, + "num_input_tokens_seen": 150850816, + "step": 47930 + }, + { + "epoch": 3.0686255681454453, + "grad_norm": 1.159833550453186, + "learning_rate": 1.9486005987553722e-06, + "loss": 0.0757, + "num_input_tokens_seen": 150867264, + "step": 47935 + }, + { + "epoch": 3.0689456500864223, + "grad_norm": 10.471057891845703, + "learning_rate": 1.948055717851439e-06, + "loss": 0.103, + "num_input_tokens_seen": 150881728, + "step": 47940 + }, + { + "epoch": 3.069265732027399, + "grad_norm": 10.929597854614258, + "learning_rate": 1.9475108645087297e-06, + "loss": 0.1954, + "num_input_tokens_seen": 150898880, + "step": 47945 + }, + { + "epoch": 3.069585813968376, + "grad_norm": 8.1904878616333, + "learning_rate": 1.9469660387544493e-06, + "loss": 0.1764, + "num_input_tokens_seen": 150914624, + "step": 47950 + }, + { + "epoch": 3.069905895909353, + "grad_norm": 6.930128574371338, + "learning_rate": 1.9464212406158036e-06, + "loss": 0.2132, + "num_input_tokens_seen": 150930432, + "step": 47955 + }, + { + "epoch": 3.07022597785033, + "grad_norm": 62.1556282043457, + "learning_rate": 1.9458764701199986e-06, + "loss": 0.2277, + "num_input_tokens_seen": 150945664, + "step": 47960 + }, + { + "epoch": 3.0705460597913063, + "grad_norm": 8.339032173156738, + "learning_rate": 1.9453317272942358e-06, + "loss": 0.1633, + "num_input_tokens_seen": 150961344, + "step": 47965 + }, + { + "epoch": 3.0708661417322833, + "grad_norm": 48.42457962036133, + "learning_rate": 1.944787012165718e-06, + "loss": 0.2098, + "num_input_tokens_seen": 150976512, + "step": 47970 + }, + { + "epoch": 3.0711862236732603, + "grad_norm": 10.201193809509277, + "learning_rate": 1.944242324761644e-06, + "loss": 0.1358, + "num_input_tokens_seen": 150995136, + "step": 47975 + }, + { + "epoch": 3.0715063056142373, + "grad_norm": 5.953369140625, + "learning_rate": 1.9436976651092143e-06, + "loss": 0.0953, + "num_input_tokens_seen": 151012416, + "step": 47980 + }, + { + "epoch": 3.0718263875552143, + "grad_norm": 9.214427947998047, + "learning_rate": 1.9431530332356247e-06, + "loss": 0.158, + "num_input_tokens_seen": 151028032, + "step": 47985 + }, + { + "epoch": 3.072146469496191, + "grad_norm": 42.11057662963867, + "learning_rate": 1.942608429168073e-06, + "loss": 0.1351, + "num_input_tokens_seen": 151043200, + "step": 47990 + }, + { + "epoch": 3.072466551437168, + "grad_norm": 14.780330657958984, + "learning_rate": 1.9420638529337528e-06, + "loss": 0.1743, + "num_input_tokens_seen": 151059712, + "step": 47995 + }, + { + "epoch": 3.072786633378145, + "grad_norm": 4.730185508728027, + "learning_rate": 1.9415193045598575e-06, + "loss": 0.2285, + "num_input_tokens_seen": 151076608, + "step": 48000 + }, + { + "epoch": 3.073106715319122, + "grad_norm": 4.657031536102295, + "learning_rate": 1.940974784073579e-06, + "loss": 0.1377, + "num_input_tokens_seen": 151092288, + "step": 48005 + }, + { + "epoch": 3.0734267972600984, + "grad_norm": 8.764199256896973, + "learning_rate": 1.940430291502107e-06, + "loss": 0.1857, + "num_input_tokens_seen": 151108992, + "step": 48010 + }, + { + "epoch": 3.0737468792010754, + "grad_norm": 7.919713973999023, + "learning_rate": 1.9398858268726327e-06, + "loss": 0.1747, + "num_input_tokens_seen": 151124224, + "step": 48015 + }, + { + "epoch": 3.0740669611420524, + "grad_norm": 7.022489547729492, + "learning_rate": 1.9393413902123425e-06, + "loss": 0.1461, + "num_input_tokens_seen": 151140928, + "step": 48020 + }, + { + "epoch": 3.0743870430830293, + "grad_norm": 35.990692138671875, + "learning_rate": 1.9387969815484233e-06, + "loss": 0.1835, + "num_input_tokens_seen": 151156416, + "step": 48025 + }, + { + "epoch": 3.0747071250240063, + "grad_norm": 8.744819641113281, + "learning_rate": 1.938252600908059e-06, + "loss": 0.1423, + "num_input_tokens_seen": 151172800, + "step": 48030 + }, + { + "epoch": 3.075027206964983, + "grad_norm": 4.221840858459473, + "learning_rate": 1.9377082483184344e-06, + "loss": 0.1535, + "num_input_tokens_seen": 151188352, + "step": 48035 + }, + { + "epoch": 3.07534728890596, + "grad_norm": 11.671757698059082, + "learning_rate": 1.9371639238067302e-06, + "loss": 0.2626, + "num_input_tokens_seen": 151204288, + "step": 48040 + }, + { + "epoch": 3.075667370846937, + "grad_norm": 11.567667961120605, + "learning_rate": 1.9366196274001286e-06, + "loss": 0.2089, + "num_input_tokens_seen": 151219712, + "step": 48045 + }, + { + "epoch": 3.075987452787914, + "grad_norm": 7.2014479637146, + "learning_rate": 1.9360753591258073e-06, + "loss": 0.1127, + "num_input_tokens_seen": 151235136, + "step": 48050 + }, + { + "epoch": 3.0763075347288904, + "grad_norm": 0.714469313621521, + "learning_rate": 1.9355311190109465e-06, + "loss": 0.131, + "num_input_tokens_seen": 151250624, + "step": 48055 + }, + { + "epoch": 3.0766276166698674, + "grad_norm": 11.670907020568848, + "learning_rate": 1.9349869070827205e-06, + "loss": 0.1795, + "num_input_tokens_seen": 151265792, + "step": 48060 + }, + { + "epoch": 3.0769476986108444, + "grad_norm": 31.264114379882812, + "learning_rate": 1.9344427233683053e-06, + "loss": 0.1688, + "num_input_tokens_seen": 151281664, + "step": 48065 + }, + { + "epoch": 3.0772677805518214, + "grad_norm": 10.560315132141113, + "learning_rate": 1.933898567894875e-06, + "loss": 0.1275, + "num_input_tokens_seen": 151297216, + "step": 48070 + }, + { + "epoch": 3.0775878624927984, + "grad_norm": 11.050223350524902, + "learning_rate": 1.933354440689601e-06, + "loss": 0.2356, + "num_input_tokens_seen": 151313152, + "step": 48075 + }, + { + "epoch": 3.077907944433775, + "grad_norm": 10.935500144958496, + "learning_rate": 1.932810341779656e-06, + "loss": 0.155, + "num_input_tokens_seen": 151328704, + "step": 48080 + }, + { + "epoch": 3.078228026374752, + "grad_norm": 8.42541217803955, + "learning_rate": 1.9322662711922064e-06, + "loss": 0.1709, + "num_input_tokens_seen": 151344128, + "step": 48085 + }, + { + "epoch": 3.078548108315729, + "grad_norm": 7.901092052459717, + "learning_rate": 1.9317222289544234e-06, + "loss": 0.2147, + "num_input_tokens_seen": 151361024, + "step": 48090 + }, + { + "epoch": 3.078868190256706, + "grad_norm": 5.785308361053467, + "learning_rate": 1.9311782150934715e-06, + "loss": 0.1662, + "num_input_tokens_seen": 151377600, + "step": 48095 + }, + { + "epoch": 3.0791882721976824, + "grad_norm": 22.6225643157959, + "learning_rate": 1.930634229636517e-06, + "loss": 0.1914, + "num_input_tokens_seen": 151393152, + "step": 48100 + }, + { + "epoch": 3.0795083541386594, + "grad_norm": 8.636334419250488, + "learning_rate": 1.930090272610723e-06, + "loss": 0.1662, + "num_input_tokens_seen": 151408576, + "step": 48105 + }, + { + "epoch": 3.0798284360796364, + "grad_norm": 28.346538543701172, + "learning_rate": 1.9295463440432528e-06, + "loss": 0.1979, + "num_input_tokens_seen": 151423616, + "step": 48110 + }, + { + "epoch": 3.0801485180206134, + "grad_norm": 5.480241298675537, + "learning_rate": 1.929002443961266e-06, + "loss": 0.1284, + "num_input_tokens_seen": 151439616, + "step": 48115 + }, + { + "epoch": 3.0804685999615904, + "grad_norm": 6.547815322875977, + "learning_rate": 1.9284585723919237e-06, + "loss": 0.144, + "num_input_tokens_seen": 151455424, + "step": 48120 + }, + { + "epoch": 3.080788681902567, + "grad_norm": 5.102705478668213, + "learning_rate": 1.9279147293623825e-06, + "loss": 0.1426, + "num_input_tokens_seen": 151470528, + "step": 48125 + }, + { + "epoch": 3.081108763843544, + "grad_norm": 4.488611221313477, + "learning_rate": 1.9273709148998003e-06, + "loss": 0.1396, + "num_input_tokens_seen": 151488768, + "step": 48130 + }, + { + "epoch": 3.081428845784521, + "grad_norm": 7.947751522064209, + "learning_rate": 1.9268271290313317e-06, + "loss": 0.1163, + "num_input_tokens_seen": 151504000, + "step": 48135 + }, + { + "epoch": 3.081748927725498, + "grad_norm": 7.661884784698486, + "learning_rate": 1.9262833717841306e-06, + "loss": 0.177, + "num_input_tokens_seen": 151519424, + "step": 48140 + }, + { + "epoch": 3.0820690096664745, + "grad_norm": 4.826125621795654, + "learning_rate": 1.9257396431853503e-06, + "loss": 0.1353, + "num_input_tokens_seen": 151535232, + "step": 48145 + }, + { + "epoch": 3.0823890916074514, + "grad_norm": 6.387275695800781, + "learning_rate": 1.92519594326214e-06, + "loss": 0.1628, + "num_input_tokens_seen": 151550272, + "step": 48150 + }, + { + "epoch": 3.0827091735484284, + "grad_norm": 3.935572862625122, + "learning_rate": 1.924652272041651e-06, + "loss": 0.1283, + "num_input_tokens_seen": 151565824, + "step": 48155 + }, + { + "epoch": 3.0830292554894054, + "grad_norm": 10.074673652648926, + "learning_rate": 1.92410862955103e-06, + "loss": 0.1678, + "num_input_tokens_seen": 151583040, + "step": 48160 + }, + { + "epoch": 3.083349337430382, + "grad_norm": 13.681431770324707, + "learning_rate": 1.9235650158174253e-06, + "loss": 0.2132, + "num_input_tokens_seen": 151599168, + "step": 48165 + }, + { + "epoch": 3.083669419371359, + "grad_norm": 7.8524675369262695, + "learning_rate": 1.9230214308679806e-06, + "loss": 0.1977, + "num_input_tokens_seen": 151614464, + "step": 48170 + }, + { + "epoch": 3.083989501312336, + "grad_norm": 12.403188705444336, + "learning_rate": 1.9224778747298407e-06, + "loss": 0.1759, + "num_input_tokens_seen": 151629440, + "step": 48175 + }, + { + "epoch": 3.084309583253313, + "grad_norm": 12.163168907165527, + "learning_rate": 1.9219343474301466e-06, + "loss": 0.2023, + "num_input_tokens_seen": 151645312, + "step": 48180 + }, + { + "epoch": 3.08462966519429, + "grad_norm": 36.17488098144531, + "learning_rate": 1.92139084899604e-06, + "loss": 0.1556, + "num_input_tokens_seen": 151660736, + "step": 48185 + }, + { + "epoch": 3.0849497471352665, + "grad_norm": 8.01367473602295, + "learning_rate": 1.920847379454662e-06, + "loss": 0.1916, + "num_input_tokens_seen": 151676672, + "step": 48190 + }, + { + "epoch": 3.0852698290762435, + "grad_norm": 8.638959884643555, + "learning_rate": 1.9203039388331473e-06, + "loss": 0.1536, + "num_input_tokens_seen": 151692032, + "step": 48195 + }, + { + "epoch": 3.0855899110172205, + "grad_norm": 13.682109832763672, + "learning_rate": 1.9197605271586364e-06, + "loss": 0.2809, + "num_input_tokens_seen": 151707264, + "step": 48200 + }, + { + "epoch": 3.0859099929581975, + "grad_norm": 8.42918586730957, + "learning_rate": 1.9192171444582606e-06, + "loss": 0.1642, + "num_input_tokens_seen": 151723328, + "step": 48205 + }, + { + "epoch": 3.086230074899174, + "grad_norm": 6.015219211578369, + "learning_rate": 1.918673790759157e-06, + "loss": 0.195, + "num_input_tokens_seen": 151739008, + "step": 48210 + }, + { + "epoch": 3.086550156840151, + "grad_norm": 7.643377780914307, + "learning_rate": 1.918130466088455e-06, + "loss": 0.2135, + "num_input_tokens_seen": 151754752, + "step": 48215 + }, + { + "epoch": 3.086870238781128, + "grad_norm": 8.174402236938477, + "learning_rate": 1.917587170473288e-06, + "loss": 0.162, + "num_input_tokens_seen": 151770688, + "step": 48220 + }, + { + "epoch": 3.087190320722105, + "grad_norm": 9.818923950195312, + "learning_rate": 1.9170439039407827e-06, + "loss": 0.2155, + "num_input_tokens_seen": 151786112, + "step": 48225 + }, + { + "epoch": 3.0875104026630815, + "grad_norm": 17.166475296020508, + "learning_rate": 1.9165006665180694e-06, + "loss": 0.2069, + "num_input_tokens_seen": 151800896, + "step": 48230 + }, + { + "epoch": 3.0878304846040585, + "grad_norm": 31.25103759765625, + "learning_rate": 1.9159574582322725e-06, + "loss": 0.1197, + "num_input_tokens_seen": 151816192, + "step": 48235 + }, + { + "epoch": 3.0881505665450355, + "grad_norm": 10.441934585571289, + "learning_rate": 1.915414279110518e-06, + "loss": 0.1611, + "num_input_tokens_seen": 151831808, + "step": 48240 + }, + { + "epoch": 3.0884706484860125, + "grad_norm": 11.857178688049316, + "learning_rate": 1.91487112917993e-06, + "loss": 0.1622, + "num_input_tokens_seen": 151847168, + "step": 48245 + }, + { + "epoch": 3.0887907304269895, + "grad_norm": 10.164546966552734, + "learning_rate": 1.9143280084676295e-06, + "loss": 0.2085, + "num_input_tokens_seen": 151864256, + "step": 48250 + }, + { + "epoch": 3.089110812367966, + "grad_norm": 10.014410018920898, + "learning_rate": 1.9137849170007382e-06, + "loss": 0.1397, + "num_input_tokens_seen": 151879168, + "step": 48255 + }, + { + "epoch": 3.089430894308943, + "grad_norm": 4.317190647125244, + "learning_rate": 1.9132418548063743e-06, + "loss": 0.1202, + "num_input_tokens_seen": 151894592, + "step": 48260 + }, + { + "epoch": 3.08975097624992, + "grad_norm": 11.237287521362305, + "learning_rate": 1.9126988219116567e-06, + "loss": 0.1594, + "num_input_tokens_seen": 151908864, + "step": 48265 + }, + { + "epoch": 3.090071058190897, + "grad_norm": 33.516902923583984, + "learning_rate": 1.9121558183436994e-06, + "loss": 0.2039, + "num_input_tokens_seen": 151924736, + "step": 48270 + }, + { + "epoch": 3.0903911401318735, + "grad_norm": 6.106914520263672, + "learning_rate": 1.9116128441296194e-06, + "loss": 0.1521, + "num_input_tokens_seen": 151940864, + "step": 48275 + }, + { + "epoch": 3.0907112220728505, + "grad_norm": 5.993496894836426, + "learning_rate": 1.9110698992965284e-06, + "loss": 0.1123, + "num_input_tokens_seen": 151958144, + "step": 48280 + }, + { + "epoch": 3.0910313040138275, + "grad_norm": 25.004961013793945, + "learning_rate": 1.9105269838715405e-06, + "loss": 0.1449, + "num_input_tokens_seen": 151974144, + "step": 48285 + }, + { + "epoch": 3.0913513859548045, + "grad_norm": 7.763745307922363, + "learning_rate": 1.909984097881763e-06, + "loss": 0.1393, + "num_input_tokens_seen": 151991488, + "step": 48290 + }, + { + "epoch": 3.0916714678957815, + "grad_norm": 7.87980842590332, + "learning_rate": 1.9094412413543074e-06, + "loss": 0.2076, + "num_input_tokens_seen": 152006912, + "step": 48295 + }, + { + "epoch": 3.091991549836758, + "grad_norm": 32.89320755004883, + "learning_rate": 1.908898414316279e-06, + "loss": 0.2696, + "num_input_tokens_seen": 152021184, + "step": 48300 + }, + { + "epoch": 3.092311631777735, + "grad_norm": 6.434081077575684, + "learning_rate": 1.908355616794785e-06, + "loss": 0.1362, + "num_input_tokens_seen": 152036288, + "step": 48305 + }, + { + "epoch": 3.092631713718712, + "grad_norm": 4.236703395843506, + "learning_rate": 1.9078128488169313e-06, + "loss": 0.1609, + "num_input_tokens_seen": 152051200, + "step": 48310 + }, + { + "epoch": 3.092951795659689, + "grad_norm": 6.953202724456787, + "learning_rate": 1.907270110409818e-06, + "loss": 0.1461, + "num_input_tokens_seen": 152066240, + "step": 48315 + }, + { + "epoch": 3.0932718776006656, + "grad_norm": 9.947153091430664, + "learning_rate": 1.9067274016005488e-06, + "loss": 0.2316, + "num_input_tokens_seen": 152082304, + "step": 48320 + }, + { + "epoch": 3.0935919595416426, + "grad_norm": 5.249383449554443, + "learning_rate": 1.906184722416222e-06, + "loss": 0.0797, + "num_input_tokens_seen": 152098240, + "step": 48325 + }, + { + "epoch": 3.0939120414826196, + "grad_norm": 4.817366600036621, + "learning_rate": 1.9056420728839375e-06, + "loss": 0.1827, + "num_input_tokens_seen": 152113536, + "step": 48330 + }, + { + "epoch": 3.0942321234235965, + "grad_norm": 5.458787441253662, + "learning_rate": 1.9050994530307918e-06, + "loss": 0.1234, + "num_input_tokens_seen": 152129152, + "step": 48335 + }, + { + "epoch": 3.0945522053645735, + "grad_norm": 12.592992782592773, + "learning_rate": 1.9045568628838814e-06, + "loss": 0.2724, + "num_input_tokens_seen": 152144704, + "step": 48340 + }, + { + "epoch": 3.09487228730555, + "grad_norm": 6.660630226135254, + "learning_rate": 1.9040143024702988e-06, + "loss": 0.1893, + "num_input_tokens_seen": 152160640, + "step": 48345 + }, + { + "epoch": 3.095192369246527, + "grad_norm": 14.843986511230469, + "learning_rate": 1.903471771817138e-06, + "loss": 0.1086, + "num_input_tokens_seen": 152176320, + "step": 48350 + }, + { + "epoch": 3.095512451187504, + "grad_norm": 8.40536880493164, + "learning_rate": 1.9029292709514894e-06, + "loss": 0.1531, + "num_input_tokens_seen": 152191808, + "step": 48355 + }, + { + "epoch": 3.095832533128481, + "grad_norm": 11.606794357299805, + "learning_rate": 1.9023867999004433e-06, + "loss": 0.1193, + "num_input_tokens_seen": 152209280, + "step": 48360 + }, + { + "epoch": 3.0961526150694576, + "grad_norm": 6.3357744216918945, + "learning_rate": 1.9018443586910875e-06, + "loss": 0.1564, + "num_input_tokens_seen": 152223936, + "step": 48365 + }, + { + "epoch": 3.0964726970104346, + "grad_norm": 11.044204711914062, + "learning_rate": 1.9013019473505085e-06, + "loss": 0.1128, + "num_input_tokens_seen": 152238400, + "step": 48370 + }, + { + "epoch": 3.0967927789514116, + "grad_norm": 8.675580024719238, + "learning_rate": 1.9007595659057925e-06, + "loss": 0.1442, + "num_input_tokens_seen": 152254336, + "step": 48375 + }, + { + "epoch": 3.0971128608923886, + "grad_norm": 11.271017074584961, + "learning_rate": 1.9002172143840217e-06, + "loss": 0.1276, + "num_input_tokens_seen": 152269760, + "step": 48380 + }, + { + "epoch": 3.0974329428333656, + "grad_norm": 12.716301918029785, + "learning_rate": 1.8996748928122794e-06, + "loss": 0.1542, + "num_input_tokens_seen": 152284736, + "step": 48385 + }, + { + "epoch": 3.097753024774342, + "grad_norm": 3.5826849937438965, + "learning_rate": 1.8991326012176455e-06, + "loss": 0.1119, + "num_input_tokens_seen": 152300608, + "step": 48390 + }, + { + "epoch": 3.098073106715319, + "grad_norm": 4.930143356323242, + "learning_rate": 1.898590339627201e-06, + "loss": 0.1317, + "num_input_tokens_seen": 152315264, + "step": 48395 + }, + { + "epoch": 3.098393188656296, + "grad_norm": 8.400947570800781, + "learning_rate": 1.8980481080680207e-06, + "loss": 0.1217, + "num_input_tokens_seen": 152332096, + "step": 48400 + }, + { + "epoch": 3.098713270597273, + "grad_norm": 4.793694019317627, + "learning_rate": 1.897505906567184e-06, + "loss": 0.1199, + "num_input_tokens_seen": 152347712, + "step": 48405 + }, + { + "epoch": 3.0990333525382496, + "grad_norm": 5.642166614532471, + "learning_rate": 1.8969637351517625e-06, + "loss": 0.2118, + "num_input_tokens_seen": 152362816, + "step": 48410 + }, + { + "epoch": 3.0993534344792266, + "grad_norm": 5.7690043449401855, + "learning_rate": 1.8964215938488312e-06, + "loss": 0.1492, + "num_input_tokens_seen": 152378880, + "step": 48415 + }, + { + "epoch": 3.0996735164202036, + "grad_norm": 5.777259349822998, + "learning_rate": 1.8958794826854623e-06, + "loss": 0.1298, + "num_input_tokens_seen": 152394368, + "step": 48420 + }, + { + "epoch": 3.0999935983611806, + "grad_norm": 18.892667770385742, + "learning_rate": 1.8953374016887247e-06, + "loss": 0.1463, + "num_input_tokens_seen": 152409792, + "step": 48425 + }, + { + "epoch": 3.100313680302157, + "grad_norm": 8.798538208007812, + "learning_rate": 1.8947953508856887e-06, + "loss": 0.1744, + "num_input_tokens_seen": 152424640, + "step": 48430 + }, + { + "epoch": 3.100633762243134, + "grad_norm": 7.737065315246582, + "learning_rate": 1.8942533303034192e-06, + "loss": 0.1828, + "num_input_tokens_seen": 152441856, + "step": 48435 + }, + { + "epoch": 3.100953844184111, + "grad_norm": 5.984939098358154, + "learning_rate": 1.8937113399689838e-06, + "loss": 0.2279, + "num_input_tokens_seen": 152457216, + "step": 48440 + }, + { + "epoch": 3.101273926125088, + "grad_norm": 7.30410099029541, + "learning_rate": 1.8931693799094456e-06, + "loss": 0.1664, + "num_input_tokens_seen": 152472704, + "step": 48445 + }, + { + "epoch": 3.101594008066065, + "grad_norm": 7.507612705230713, + "learning_rate": 1.8926274501518687e-06, + "loss": 0.1746, + "num_input_tokens_seen": 152488896, + "step": 48450 + }, + { + "epoch": 3.1019140900070417, + "grad_norm": 5.142396926879883, + "learning_rate": 1.892085550723312e-06, + "loss": 0.1245, + "num_input_tokens_seen": 152504384, + "step": 48455 + }, + { + "epoch": 3.1022341719480186, + "grad_norm": 10.93131160736084, + "learning_rate": 1.8915436816508379e-06, + "loss": 0.1303, + "num_input_tokens_seen": 152519104, + "step": 48460 + }, + { + "epoch": 3.1025542538889956, + "grad_norm": 5.83906364440918, + "learning_rate": 1.8910018429615017e-06, + "loss": 0.1384, + "num_input_tokens_seen": 152534208, + "step": 48465 + }, + { + "epoch": 3.1028743358299726, + "grad_norm": 5.538097381591797, + "learning_rate": 1.8904600346823617e-06, + "loss": 0.1584, + "num_input_tokens_seen": 152549184, + "step": 48470 + }, + { + "epoch": 3.103194417770949, + "grad_norm": 33.82347106933594, + "learning_rate": 1.8899182568404723e-06, + "loss": 0.2436, + "num_input_tokens_seen": 152564416, + "step": 48475 + }, + { + "epoch": 3.103514499711926, + "grad_norm": 6.24334192276001, + "learning_rate": 1.8893765094628875e-06, + "loss": 0.1038, + "num_input_tokens_seen": 152580032, + "step": 48480 + }, + { + "epoch": 3.103834581652903, + "grad_norm": 7.770177364349365, + "learning_rate": 1.8888347925766598e-06, + "loss": 0.1456, + "num_input_tokens_seen": 152596160, + "step": 48485 + }, + { + "epoch": 3.10415466359388, + "grad_norm": 5.941619873046875, + "learning_rate": 1.8882931062088383e-06, + "loss": 0.1355, + "num_input_tokens_seen": 152612864, + "step": 48490 + }, + { + "epoch": 3.1044747455348567, + "grad_norm": 7.622331619262695, + "learning_rate": 1.8877514503864738e-06, + "loss": 0.1907, + "num_input_tokens_seen": 152628672, + "step": 48495 + }, + { + "epoch": 3.1047948274758337, + "grad_norm": 5.366537570953369, + "learning_rate": 1.8872098251366117e-06, + "loss": 0.0881, + "num_input_tokens_seen": 152643968, + "step": 48500 + }, + { + "epoch": 3.1051149094168107, + "grad_norm": 6.856066703796387, + "learning_rate": 1.8866682304862999e-06, + "loss": 0.1305, + "num_input_tokens_seen": 152660288, + "step": 48505 + }, + { + "epoch": 3.1054349913577877, + "grad_norm": 4.560678958892822, + "learning_rate": 1.886126666462581e-06, + "loss": 0.2122, + "num_input_tokens_seen": 152675712, + "step": 48510 + }, + { + "epoch": 3.1057550732987647, + "grad_norm": 7.239043235778809, + "learning_rate": 1.8855851330925002e-06, + "loss": 0.1203, + "num_input_tokens_seen": 152691840, + "step": 48515 + }, + { + "epoch": 3.106075155239741, + "grad_norm": 7.296117305755615, + "learning_rate": 1.885043630403096e-06, + "loss": 0.1581, + "num_input_tokens_seen": 152708224, + "step": 48520 + }, + { + "epoch": 3.106395237180718, + "grad_norm": 6.531590938568115, + "learning_rate": 1.8845021584214104e-06, + "loss": 0.1143, + "num_input_tokens_seen": 152725376, + "step": 48525 + }, + { + "epoch": 3.106715319121695, + "grad_norm": 7.347155570983887, + "learning_rate": 1.8839607171744805e-06, + "loss": 0.1379, + "num_input_tokens_seen": 152742656, + "step": 48530 + }, + { + "epoch": 3.107035401062672, + "grad_norm": 4.6199140548706055, + "learning_rate": 1.8834193066893436e-06, + "loss": 0.1376, + "num_input_tokens_seen": 152757120, + "step": 48535 + }, + { + "epoch": 3.1073554830036487, + "grad_norm": 9.321779251098633, + "learning_rate": 1.8828779269930362e-06, + "loss": 0.1454, + "num_input_tokens_seen": 152771840, + "step": 48540 + }, + { + "epoch": 3.1076755649446257, + "grad_norm": 66.0434341430664, + "learning_rate": 1.882336578112589e-06, + "loss": 0.2025, + "num_input_tokens_seen": 152787712, + "step": 48545 + }, + { + "epoch": 3.1079956468856027, + "grad_norm": 4.636098384857178, + "learning_rate": 1.8817952600750373e-06, + "loss": 0.1196, + "num_input_tokens_seen": 152803136, + "step": 48550 + }, + { + "epoch": 3.1083157288265797, + "grad_norm": 7.115172863006592, + "learning_rate": 1.8812539729074092e-06, + "loss": 0.1579, + "num_input_tokens_seen": 152818432, + "step": 48555 + }, + { + "epoch": 3.1086358107675567, + "grad_norm": 6.795969009399414, + "learning_rate": 1.880712716636735e-06, + "loss": 0.1381, + "num_input_tokens_seen": 152833920, + "step": 48560 + }, + { + "epoch": 3.1089558927085332, + "grad_norm": 26.08523178100586, + "learning_rate": 1.8801714912900415e-06, + "loss": 0.1764, + "num_input_tokens_seen": 152850112, + "step": 48565 + }, + { + "epoch": 3.10927597464951, + "grad_norm": 10.613505363464355, + "learning_rate": 1.8796302968943564e-06, + "loss": 0.1551, + "num_input_tokens_seen": 152865536, + "step": 48570 + }, + { + "epoch": 3.109596056590487, + "grad_norm": 7.747730255126953, + "learning_rate": 1.8790891334767017e-06, + "loss": 0.2279, + "num_input_tokens_seen": 152881664, + "step": 48575 + }, + { + "epoch": 3.109916138531464, + "grad_norm": 5.097565174102783, + "learning_rate": 1.8785480010641018e-06, + "loss": 0.1577, + "num_input_tokens_seen": 152898112, + "step": 48580 + }, + { + "epoch": 3.1102362204724407, + "grad_norm": 5.54273796081543, + "learning_rate": 1.8780068996835776e-06, + "loss": 0.1234, + "num_input_tokens_seen": 152913280, + "step": 48585 + }, + { + "epoch": 3.1105563024134177, + "grad_norm": 16.613269805908203, + "learning_rate": 1.8774658293621486e-06, + "loss": 0.1973, + "num_input_tokens_seen": 152928960, + "step": 48590 + }, + { + "epoch": 3.1108763843543947, + "grad_norm": 7.360964298248291, + "learning_rate": 1.8769247901268348e-06, + "loss": 0.1454, + "num_input_tokens_seen": 152946880, + "step": 48595 + }, + { + "epoch": 3.1111964662953717, + "grad_norm": 13.458824157714844, + "learning_rate": 1.8763837820046502e-06, + "loss": 0.1297, + "num_input_tokens_seen": 152962880, + "step": 48600 + }, + { + "epoch": 3.1115165482363487, + "grad_norm": 6.955830097198486, + "learning_rate": 1.8758428050226124e-06, + "loss": 0.1453, + "num_input_tokens_seen": 152978432, + "step": 48605 + }, + { + "epoch": 3.1118366301773253, + "grad_norm": 54.26490020751953, + "learning_rate": 1.8753018592077326e-06, + "loss": 0.1802, + "num_input_tokens_seen": 152993984, + "step": 48610 + }, + { + "epoch": 3.1121567121183022, + "grad_norm": 10.108452796936035, + "learning_rate": 1.8747609445870246e-06, + "loss": 0.1786, + "num_input_tokens_seen": 153008512, + "step": 48615 + }, + { + "epoch": 3.1124767940592792, + "grad_norm": 12.018275260925293, + "learning_rate": 1.8742200611874978e-06, + "loss": 0.1538, + "num_input_tokens_seen": 153025344, + "step": 48620 + }, + { + "epoch": 3.1127968760002562, + "grad_norm": 8.232158660888672, + "learning_rate": 1.8736792090361628e-06, + "loss": 0.1805, + "num_input_tokens_seen": 153040320, + "step": 48625 + }, + { + "epoch": 3.1131169579412328, + "grad_norm": 11.422394752502441, + "learning_rate": 1.8731383881600247e-06, + "loss": 0.1545, + "num_input_tokens_seen": 153055360, + "step": 48630 + }, + { + "epoch": 3.1134370398822098, + "grad_norm": 9.553625106811523, + "learning_rate": 1.8725975985860912e-06, + "loss": 0.1554, + "num_input_tokens_seen": 153070656, + "step": 48635 + }, + { + "epoch": 3.1137571218231868, + "grad_norm": 100.90194702148438, + "learning_rate": 1.8720568403413642e-06, + "loss": 0.1662, + "num_input_tokens_seen": 153086336, + "step": 48640 + }, + { + "epoch": 3.1140772037641637, + "grad_norm": 4.418367385864258, + "learning_rate": 1.8715161134528487e-06, + "loss": 0.2895, + "num_input_tokens_seen": 153101056, + "step": 48645 + }, + { + "epoch": 3.1143972857051407, + "grad_norm": 26.062192916870117, + "learning_rate": 1.8709754179475442e-06, + "loss": 0.1772, + "num_input_tokens_seen": 153117568, + "step": 48650 + }, + { + "epoch": 3.1147173676461173, + "grad_norm": 4.335071086883545, + "learning_rate": 1.8704347538524509e-06, + "loss": 0.1515, + "num_input_tokens_seen": 153132608, + "step": 48655 + }, + { + "epoch": 3.1150374495870943, + "grad_norm": 16.756450653076172, + "learning_rate": 1.8698941211945675e-06, + "loss": 0.1736, + "num_input_tokens_seen": 153148480, + "step": 48660 + }, + { + "epoch": 3.1153575315280713, + "grad_norm": 12.633633613586426, + "learning_rate": 1.869353520000889e-06, + "loss": 0.1968, + "num_input_tokens_seen": 153163328, + "step": 48665 + }, + { + "epoch": 3.1156776134690483, + "grad_norm": 7.718558311462402, + "learning_rate": 1.868812950298411e-06, + "loss": 0.2051, + "num_input_tokens_seen": 153179072, + "step": 48670 + }, + { + "epoch": 3.115997695410025, + "grad_norm": 2.336106061935425, + "learning_rate": 1.868272412114126e-06, + "loss": 0.1486, + "num_input_tokens_seen": 153194944, + "step": 48675 + }, + { + "epoch": 3.116317777351002, + "grad_norm": 11.76494026184082, + "learning_rate": 1.8677319054750272e-06, + "loss": 0.129, + "num_input_tokens_seen": 153209856, + "step": 48680 + }, + { + "epoch": 3.116637859291979, + "grad_norm": 8.672822952270508, + "learning_rate": 1.867191430408103e-06, + "loss": 0.1309, + "num_input_tokens_seen": 153225472, + "step": 48685 + }, + { + "epoch": 3.1169579412329558, + "grad_norm": 6.01549768447876, + "learning_rate": 1.8666509869403433e-06, + "loss": 0.1015, + "num_input_tokens_seen": 153243392, + "step": 48690 + }, + { + "epoch": 3.1172780231739323, + "grad_norm": 23.3664608001709, + "learning_rate": 1.8661105750987333e-06, + "loss": 0.1797, + "num_input_tokens_seen": 153258752, + "step": 48695 + }, + { + "epoch": 3.1175981051149093, + "grad_norm": 8.605633735656738, + "learning_rate": 1.86557019491026e-06, + "loss": 0.3228, + "num_input_tokens_seen": 153273856, + "step": 48700 + }, + { + "epoch": 3.1179181870558863, + "grad_norm": 21.72252082824707, + "learning_rate": 1.8650298464019057e-06, + "loss": 0.1741, + "num_input_tokens_seen": 153290496, + "step": 48705 + }, + { + "epoch": 3.1182382689968633, + "grad_norm": 6.114857196807861, + "learning_rate": 1.8644895296006539e-06, + "loss": 0.1642, + "num_input_tokens_seen": 153306752, + "step": 48710 + }, + { + "epoch": 3.1185583509378403, + "grad_norm": 5.662710189819336, + "learning_rate": 1.8639492445334857e-06, + "loss": 0.1198, + "num_input_tokens_seen": 153321600, + "step": 48715 + }, + { + "epoch": 3.118878432878817, + "grad_norm": 7.379400730133057, + "learning_rate": 1.8634089912273779e-06, + "loss": 0.1153, + "num_input_tokens_seen": 153336640, + "step": 48720 + }, + { + "epoch": 3.119198514819794, + "grad_norm": 6.429266452789307, + "learning_rate": 1.8628687697093101e-06, + "loss": 0.1617, + "num_input_tokens_seen": 153352832, + "step": 48725 + }, + { + "epoch": 3.119518596760771, + "grad_norm": 4.40596342086792, + "learning_rate": 1.8623285800062565e-06, + "loss": 0.2183, + "num_input_tokens_seen": 153368768, + "step": 48730 + }, + { + "epoch": 3.119838678701748, + "grad_norm": 6.894643783569336, + "learning_rate": 1.8617884221451927e-06, + "loss": 0.1451, + "num_input_tokens_seen": 153385920, + "step": 48735 + }, + { + "epoch": 3.1201587606427243, + "grad_norm": 7.190025329589844, + "learning_rate": 1.8612482961530897e-06, + "loss": 0.1351, + "num_input_tokens_seen": 153401216, + "step": 48740 + }, + { + "epoch": 3.1204788425837013, + "grad_norm": 8.157914161682129, + "learning_rate": 1.8607082020569214e-06, + "loss": 0.1363, + "num_input_tokens_seen": 153417024, + "step": 48745 + }, + { + "epoch": 3.1207989245246783, + "grad_norm": 8.885576248168945, + "learning_rate": 1.8601681398836541e-06, + "loss": 0.2222, + "num_input_tokens_seen": 153433664, + "step": 48750 + }, + { + "epoch": 3.1211190064656553, + "grad_norm": 8.374499320983887, + "learning_rate": 1.859628109660258e-06, + "loss": 0.1448, + "num_input_tokens_seen": 153450112, + "step": 48755 + }, + { + "epoch": 3.1214390884066323, + "grad_norm": 5.268971920013428, + "learning_rate": 1.8590881114136978e-06, + "loss": 0.1144, + "num_input_tokens_seen": 153464512, + "step": 48760 + }, + { + "epoch": 3.121759170347609, + "grad_norm": 6.188100337982178, + "learning_rate": 1.8585481451709386e-06, + "loss": 0.1898, + "num_input_tokens_seen": 153479808, + "step": 48765 + }, + { + "epoch": 3.122079252288586, + "grad_norm": 7.194318771362305, + "learning_rate": 1.8580082109589454e-06, + "loss": 0.2137, + "num_input_tokens_seen": 153495680, + "step": 48770 + }, + { + "epoch": 3.122399334229563, + "grad_norm": 4.618453502655029, + "learning_rate": 1.8574683088046769e-06, + "loss": 0.1361, + "num_input_tokens_seen": 153512640, + "step": 48775 + }, + { + "epoch": 3.12271941617054, + "grad_norm": 11.325249671936035, + "learning_rate": 1.856928438735095e-06, + "loss": 0.1215, + "num_input_tokens_seen": 153527680, + "step": 48780 + }, + { + "epoch": 3.1230394981115164, + "grad_norm": 11.148679733276367, + "learning_rate": 1.8563886007771564e-06, + "loss": 0.2302, + "num_input_tokens_seen": 153542336, + "step": 48785 + }, + { + "epoch": 3.1233595800524934, + "grad_norm": 4.196473598480225, + "learning_rate": 1.8558487949578192e-06, + "loss": 0.1459, + "num_input_tokens_seen": 153557568, + "step": 48790 + }, + { + "epoch": 3.1236796619934704, + "grad_norm": 6.369085311889648, + "learning_rate": 1.8553090213040372e-06, + "loss": 0.1635, + "num_input_tokens_seen": 153574272, + "step": 48795 + }, + { + "epoch": 3.1239997439344473, + "grad_norm": 24.635231018066406, + "learning_rate": 1.8547692798427659e-06, + "loss": 0.2197, + "num_input_tokens_seen": 153589184, + "step": 48800 + }, + { + "epoch": 3.124319825875424, + "grad_norm": 9.200067520141602, + "learning_rate": 1.854229570600955e-06, + "loss": 0.2185, + "num_input_tokens_seen": 153603840, + "step": 48805 + }, + { + "epoch": 3.124639907816401, + "grad_norm": 9.212757110595703, + "learning_rate": 1.853689893605556e-06, + "loss": 0.1806, + "num_input_tokens_seen": 153620288, + "step": 48810 + }, + { + "epoch": 3.124959989757378, + "grad_norm": 48.99480438232422, + "learning_rate": 1.8531502488835162e-06, + "loss": 0.1389, + "num_input_tokens_seen": 153635264, + "step": 48815 + }, + { + "epoch": 3.125280071698355, + "grad_norm": 27.528221130371094, + "learning_rate": 1.852610636461785e-06, + "loss": 0.2212, + "num_input_tokens_seen": 153651264, + "step": 48820 + }, + { + "epoch": 3.125600153639332, + "grad_norm": 17.526859283447266, + "learning_rate": 1.8520710563673055e-06, + "loss": 0.1795, + "num_input_tokens_seen": 153667072, + "step": 48825 + }, + { + "epoch": 3.1259202355803084, + "grad_norm": 24.93773078918457, + "learning_rate": 1.8515315086270228e-06, + "loss": 0.1396, + "num_input_tokens_seen": 153682880, + "step": 48830 + }, + { + "epoch": 3.1262403175212854, + "grad_norm": 5.436725616455078, + "learning_rate": 1.8509919932678797e-06, + "loss": 0.1204, + "num_input_tokens_seen": 153698880, + "step": 48835 + }, + { + "epoch": 3.1265603994622624, + "grad_norm": 3.5452053546905518, + "learning_rate": 1.8504525103168147e-06, + "loss": 0.154, + "num_input_tokens_seen": 153715136, + "step": 48840 + }, + { + "epoch": 3.1268804814032394, + "grad_norm": 12.666833877563477, + "learning_rate": 1.8499130598007685e-06, + "loss": 0.1258, + "num_input_tokens_seen": 153729728, + "step": 48845 + }, + { + "epoch": 3.127200563344216, + "grad_norm": 6.468756675720215, + "learning_rate": 1.8493736417466774e-06, + "loss": 0.1056, + "num_input_tokens_seen": 153747264, + "step": 48850 + }, + { + "epoch": 3.127520645285193, + "grad_norm": 2.538612127304077, + "learning_rate": 1.8488342561814787e-06, + "loss": 0.1394, + "num_input_tokens_seen": 153761728, + "step": 48855 + }, + { + "epoch": 3.12784072722617, + "grad_norm": 5.357621192932129, + "learning_rate": 1.8482949031321045e-06, + "loss": 0.1739, + "num_input_tokens_seen": 153777856, + "step": 48860 + }, + { + "epoch": 3.128160809167147, + "grad_norm": 11.982983589172363, + "learning_rate": 1.8477555826254893e-06, + "loss": 0.1185, + "num_input_tokens_seen": 153793344, + "step": 48865 + }, + { + "epoch": 3.128480891108124, + "grad_norm": 7.580006122589111, + "learning_rate": 1.8472162946885617e-06, + "loss": 0.1611, + "num_input_tokens_seen": 153808704, + "step": 48870 + }, + { + "epoch": 3.1288009730491004, + "grad_norm": 37.30305099487305, + "learning_rate": 1.8466770393482526e-06, + "loss": 0.1739, + "num_input_tokens_seen": 153824576, + "step": 48875 + }, + { + "epoch": 3.1291210549900774, + "grad_norm": 9.712555885314941, + "learning_rate": 1.8461378166314886e-06, + "loss": 0.171, + "num_input_tokens_seen": 153839616, + "step": 48880 + }, + { + "epoch": 3.1294411369310544, + "grad_norm": 6.508283615112305, + "learning_rate": 1.8455986265651966e-06, + "loss": 0.1554, + "num_input_tokens_seen": 153855552, + "step": 48885 + }, + { + "epoch": 3.1297612188720314, + "grad_norm": 6.3439788818359375, + "learning_rate": 1.845059469176301e-06, + "loss": 0.1696, + "num_input_tokens_seen": 153871616, + "step": 48890 + }, + { + "epoch": 3.130081300813008, + "grad_norm": 2.4730913639068604, + "learning_rate": 1.8445203444917231e-06, + "loss": 0.1619, + "num_input_tokens_seen": 153886720, + "step": 48895 + }, + { + "epoch": 3.130401382753985, + "grad_norm": 8.650420188903809, + "learning_rate": 1.8439812525383858e-06, + "loss": 0.1497, + "num_input_tokens_seen": 153901568, + "step": 48900 + }, + { + "epoch": 3.130721464694962, + "grad_norm": 4.778258323669434, + "learning_rate": 1.8434421933432068e-06, + "loss": 0.1483, + "num_input_tokens_seen": 153918720, + "step": 48905 + }, + { + "epoch": 3.131041546635939, + "grad_norm": 10.425971031188965, + "learning_rate": 1.842903166933106e-06, + "loss": 0.1223, + "num_input_tokens_seen": 153935808, + "step": 48910 + }, + { + "epoch": 3.131361628576916, + "grad_norm": 12.461701393127441, + "learning_rate": 1.8423641733349974e-06, + "loss": 0.1352, + "num_input_tokens_seen": 153951616, + "step": 48915 + }, + { + "epoch": 3.1316817105178925, + "grad_norm": 9.618942260742188, + "learning_rate": 1.8418252125757974e-06, + "loss": 0.1278, + "num_input_tokens_seen": 153966976, + "step": 48920 + }, + { + "epoch": 3.1320017924588694, + "grad_norm": 4.498152256011963, + "learning_rate": 1.841286284682417e-06, + "loss": 0.1283, + "num_input_tokens_seen": 153982720, + "step": 48925 + }, + { + "epoch": 3.1323218743998464, + "grad_norm": 27.080629348754883, + "learning_rate": 1.8407473896817688e-06, + "loss": 0.2001, + "num_input_tokens_seen": 153998080, + "step": 48930 + }, + { + "epoch": 3.1326419563408234, + "grad_norm": 25.199369430541992, + "learning_rate": 1.8402085276007614e-06, + "loss": 0.1943, + "num_input_tokens_seen": 154013632, + "step": 48935 + }, + { + "epoch": 3.1329620382818, + "grad_norm": 7.7398481369018555, + "learning_rate": 1.8396696984663036e-06, + "loss": 0.1954, + "num_input_tokens_seen": 154029248, + "step": 48940 + }, + { + "epoch": 3.133282120222777, + "grad_norm": 13.524351119995117, + "learning_rate": 1.8391309023053028e-06, + "loss": 0.128, + "num_input_tokens_seen": 154044096, + "step": 48945 + }, + { + "epoch": 3.133602202163754, + "grad_norm": 10.587188720703125, + "learning_rate": 1.838592139144661e-06, + "loss": 0.1484, + "num_input_tokens_seen": 154060032, + "step": 48950 + }, + { + "epoch": 3.133922284104731, + "grad_norm": 6.403322219848633, + "learning_rate": 1.8380534090112834e-06, + "loss": 0.2031, + "num_input_tokens_seen": 154075776, + "step": 48955 + }, + { + "epoch": 3.1342423660457075, + "grad_norm": 12.169405937194824, + "learning_rate": 1.8375147119320705e-06, + "loss": 0.2037, + "num_input_tokens_seen": 154090176, + "step": 48960 + }, + { + "epoch": 3.1345624479866845, + "grad_norm": 6.018194675445557, + "learning_rate": 1.8369760479339222e-06, + "loss": 0.152, + "num_input_tokens_seen": 154107072, + "step": 48965 + }, + { + "epoch": 3.1348825299276615, + "grad_norm": 29.409473419189453, + "learning_rate": 1.8364374170437357e-06, + "loss": 0.1923, + "num_input_tokens_seen": 154121792, + "step": 48970 + }, + { + "epoch": 3.1352026118686385, + "grad_norm": 8.491571426391602, + "learning_rate": 1.8358988192884098e-06, + "loss": 0.152, + "num_input_tokens_seen": 154138240, + "step": 48975 + }, + { + "epoch": 3.1355226938096155, + "grad_norm": 41.84144592285156, + "learning_rate": 1.835360254694836e-06, + "loss": 0.1644, + "num_input_tokens_seen": 154153408, + "step": 48980 + }, + { + "epoch": 3.135842775750592, + "grad_norm": 12.78325366973877, + "learning_rate": 1.83482172328991e-06, + "loss": 0.1467, + "num_input_tokens_seen": 154169216, + "step": 48985 + }, + { + "epoch": 3.136162857691569, + "grad_norm": 7.415651798248291, + "learning_rate": 1.8342832251005216e-06, + "loss": 0.1552, + "num_input_tokens_seen": 154184832, + "step": 48990 + }, + { + "epoch": 3.136482939632546, + "grad_norm": 12.22590446472168, + "learning_rate": 1.8337447601535627e-06, + "loss": 0.2161, + "num_input_tokens_seen": 154201728, + "step": 48995 + }, + { + "epoch": 3.136803021573523, + "grad_norm": 8.875815391540527, + "learning_rate": 1.8332063284759188e-06, + "loss": 0.1876, + "num_input_tokens_seen": 154217024, + "step": 49000 + }, + { + "epoch": 3.1371231035144995, + "grad_norm": 6.411656856536865, + "learning_rate": 1.8326679300944778e-06, + "loss": 0.1428, + "num_input_tokens_seen": 154232640, + "step": 49005 + }, + { + "epoch": 3.1374431854554765, + "grad_norm": 14.458956718444824, + "learning_rate": 1.8321295650361253e-06, + "loss": 0.2121, + "num_input_tokens_seen": 154248064, + "step": 49010 + }, + { + "epoch": 3.1377632673964535, + "grad_norm": 13.64512825012207, + "learning_rate": 1.8315912333277425e-06, + "loss": 0.2179, + "num_input_tokens_seen": 154264192, + "step": 49015 + }, + { + "epoch": 3.1380833493374305, + "grad_norm": 5.543225288391113, + "learning_rate": 1.8310529349962125e-06, + "loss": 0.1238, + "num_input_tokens_seen": 154280576, + "step": 49020 + }, + { + "epoch": 3.138403431278407, + "grad_norm": 7.242893695831299, + "learning_rate": 1.830514670068414e-06, + "loss": 0.1334, + "num_input_tokens_seen": 154295872, + "step": 49025 + }, + { + "epoch": 3.138723513219384, + "grad_norm": 7.611598968505859, + "learning_rate": 1.8299764385712264e-06, + "loss": 0.1362, + "num_input_tokens_seen": 154310976, + "step": 49030 + }, + { + "epoch": 3.139043595160361, + "grad_norm": 14.84183406829834, + "learning_rate": 1.8294382405315248e-06, + "loss": 0.2198, + "num_input_tokens_seen": 154327680, + "step": 49035 + }, + { + "epoch": 3.139363677101338, + "grad_norm": 20.68044090270996, + "learning_rate": 1.8289000759761849e-06, + "loss": 0.1391, + "num_input_tokens_seen": 154343296, + "step": 49040 + }, + { + "epoch": 3.139683759042315, + "grad_norm": 5.743291854858398, + "learning_rate": 1.8283619449320788e-06, + "loss": 0.1522, + "num_input_tokens_seen": 154358976, + "step": 49045 + }, + { + "epoch": 3.1400038409832915, + "grad_norm": 8.663667678833008, + "learning_rate": 1.8278238474260801e-06, + "loss": 0.1084, + "num_input_tokens_seen": 154374784, + "step": 49050 + }, + { + "epoch": 3.1403239229242685, + "grad_norm": 14.369244575500488, + "learning_rate": 1.8272857834850562e-06, + "loss": 0.2112, + "num_input_tokens_seen": 154390080, + "step": 49055 + }, + { + "epoch": 3.1406440048652455, + "grad_norm": 3.9959585666656494, + "learning_rate": 1.8267477531358763e-06, + "loss": 0.1409, + "num_input_tokens_seen": 154406848, + "step": 49060 + }, + { + "epoch": 3.1409640868062225, + "grad_norm": 7.033081531524658, + "learning_rate": 1.8262097564054076e-06, + "loss": 0.1635, + "num_input_tokens_seen": 154423168, + "step": 49065 + }, + { + "epoch": 3.141284168747199, + "grad_norm": 6.724508285522461, + "learning_rate": 1.825671793320513e-06, + "loss": 0.154, + "num_input_tokens_seen": 154438720, + "step": 49070 + }, + { + "epoch": 3.141604250688176, + "grad_norm": 30.201505661010742, + "learning_rate": 1.8251338639080575e-06, + "loss": 0.1403, + "num_input_tokens_seen": 154454144, + "step": 49075 + }, + { + "epoch": 3.141924332629153, + "grad_norm": 6.599018096923828, + "learning_rate": 1.8245959681949004e-06, + "loss": 0.1403, + "num_input_tokens_seen": 154469632, + "step": 49080 + }, + { + "epoch": 3.14224441457013, + "grad_norm": 8.85939884185791, + "learning_rate": 1.8240581062079043e-06, + "loss": 0.2037, + "num_input_tokens_seen": 154485120, + "step": 49085 + }, + { + "epoch": 3.142564496511107, + "grad_norm": 8.430471420288086, + "learning_rate": 1.8235202779739242e-06, + "loss": 0.1463, + "num_input_tokens_seen": 154500864, + "step": 49090 + }, + { + "epoch": 3.1428845784520836, + "grad_norm": 9.178176879882812, + "learning_rate": 1.8229824835198187e-06, + "loss": 0.1618, + "num_input_tokens_seen": 154516608, + "step": 49095 + }, + { + "epoch": 3.1432046603930606, + "grad_norm": 5.400923728942871, + "learning_rate": 1.8224447228724407e-06, + "loss": 0.2045, + "num_input_tokens_seen": 154531904, + "step": 49100 + }, + { + "epoch": 3.1435247423340376, + "grad_norm": 5.460789203643799, + "learning_rate": 1.8219069960586444e-06, + "loss": 0.1027, + "num_input_tokens_seen": 154546944, + "step": 49105 + }, + { + "epoch": 3.1438448242750145, + "grad_norm": 7.884562969207764, + "learning_rate": 1.8213693031052798e-06, + "loss": 0.1544, + "num_input_tokens_seen": 154563072, + "step": 49110 + }, + { + "epoch": 3.144164906215991, + "grad_norm": 6.682459354400635, + "learning_rate": 1.8208316440391977e-06, + "loss": 0.1837, + "num_input_tokens_seen": 154579712, + "step": 49115 + }, + { + "epoch": 3.144484988156968, + "grad_norm": 9.298750877380371, + "learning_rate": 1.8202940188872468e-06, + "loss": 0.1791, + "num_input_tokens_seen": 154595008, + "step": 49120 + }, + { + "epoch": 3.144805070097945, + "grad_norm": 29.461992263793945, + "learning_rate": 1.8197564276762708e-06, + "loss": 0.2902, + "num_input_tokens_seen": 154611776, + "step": 49125 + }, + { + "epoch": 3.145125152038922, + "grad_norm": 9.63489055633545, + "learning_rate": 1.8192188704331163e-06, + "loss": 0.1741, + "num_input_tokens_seen": 154626432, + "step": 49130 + }, + { + "epoch": 3.145445233979899, + "grad_norm": 8.375779151916504, + "learning_rate": 1.8186813471846246e-06, + "loss": 0.1921, + "num_input_tokens_seen": 154643456, + "step": 49135 + }, + { + "epoch": 3.1457653159208756, + "grad_norm": 9.419687271118164, + "learning_rate": 1.818143857957639e-06, + "loss": 0.1822, + "num_input_tokens_seen": 154659136, + "step": 49140 + }, + { + "epoch": 3.1460853978618526, + "grad_norm": 8.171981811523438, + "learning_rate": 1.8176064027789958e-06, + "loss": 0.2113, + "num_input_tokens_seen": 154673408, + "step": 49145 + }, + { + "epoch": 3.1464054798028296, + "grad_norm": 7.410990238189697, + "learning_rate": 1.817068981675536e-06, + "loss": 0.1947, + "num_input_tokens_seen": 154688448, + "step": 49150 + }, + { + "epoch": 3.1467255617438066, + "grad_norm": 8.578924179077148, + "learning_rate": 1.8165315946740925e-06, + "loss": 0.1851, + "num_input_tokens_seen": 154704448, + "step": 49155 + }, + { + "epoch": 3.147045643684783, + "grad_norm": 7.075291156768799, + "learning_rate": 1.815994241801502e-06, + "loss": 0.1101, + "num_input_tokens_seen": 154719936, + "step": 49160 + }, + { + "epoch": 3.14736572562576, + "grad_norm": 7.906063079833984, + "learning_rate": 1.8154569230845957e-06, + "loss": 0.1941, + "num_input_tokens_seen": 154735808, + "step": 49165 + }, + { + "epoch": 3.147685807566737, + "grad_norm": 10.607504844665527, + "learning_rate": 1.8149196385502058e-06, + "loss": 0.1623, + "num_input_tokens_seen": 154750720, + "step": 49170 + }, + { + "epoch": 3.148005889507714, + "grad_norm": 30.479982376098633, + "learning_rate": 1.8143823882251598e-06, + "loss": 0.2436, + "num_input_tokens_seen": 154766144, + "step": 49175 + }, + { + "epoch": 3.148325971448691, + "grad_norm": 190.42410278320312, + "learning_rate": 1.813845172136286e-06, + "loss": 0.2159, + "num_input_tokens_seen": 154781952, + "step": 49180 + }, + { + "epoch": 3.1486460533896676, + "grad_norm": 6.2694292068481445, + "learning_rate": 1.8133079903104105e-06, + "loss": 0.1272, + "num_input_tokens_seen": 154797504, + "step": 49185 + }, + { + "epoch": 3.1489661353306446, + "grad_norm": 14.754781723022461, + "learning_rate": 1.8127708427743572e-06, + "loss": 0.1553, + "num_input_tokens_seen": 154812672, + "step": 49190 + }, + { + "epoch": 3.1492862172716216, + "grad_norm": 4.880363941192627, + "learning_rate": 1.8122337295549491e-06, + "loss": 0.1029, + "num_input_tokens_seen": 154827968, + "step": 49195 + }, + { + "epoch": 3.1496062992125986, + "grad_norm": 3.162040948867798, + "learning_rate": 1.8116966506790052e-06, + "loss": 0.0969, + "num_input_tokens_seen": 154846528, + "step": 49200 + }, + { + "epoch": 3.149926381153575, + "grad_norm": 5.3851704597473145, + "learning_rate": 1.811159606173346e-06, + "loss": 0.085, + "num_input_tokens_seen": 154862336, + "step": 49205 + }, + { + "epoch": 3.150246463094552, + "grad_norm": 8.769571304321289, + "learning_rate": 1.810622596064787e-06, + "loss": 0.1819, + "num_input_tokens_seen": 154876992, + "step": 49210 + }, + { + "epoch": 3.150566545035529, + "grad_norm": 5.238892078399658, + "learning_rate": 1.8100856203801458e-06, + "loss": 0.1521, + "num_input_tokens_seen": 154892864, + "step": 49215 + }, + { + "epoch": 3.150886626976506, + "grad_norm": 0.334196001291275, + "learning_rate": 1.8095486791462342e-06, + "loss": 0.2068, + "num_input_tokens_seen": 154908096, + "step": 49220 + }, + { + "epoch": 3.1512067089174827, + "grad_norm": 4.674709320068359, + "learning_rate": 1.8090117723898663e-06, + "loss": 0.0886, + "num_input_tokens_seen": 154923584, + "step": 49225 + }, + { + "epoch": 3.1515267908584597, + "grad_norm": 10.096443176269531, + "learning_rate": 1.8084749001378499e-06, + "loss": 0.2037, + "num_input_tokens_seen": 154938432, + "step": 49230 + }, + { + "epoch": 3.1518468727994366, + "grad_norm": 23.456361770629883, + "learning_rate": 1.8079380624169955e-06, + "loss": 0.1023, + "num_input_tokens_seen": 154954368, + "step": 49235 + }, + { + "epoch": 3.1521669547404136, + "grad_norm": 29.340728759765625, + "learning_rate": 1.8074012592541102e-06, + "loss": 0.3264, + "num_input_tokens_seen": 154970240, + "step": 49240 + }, + { + "epoch": 3.1524870366813906, + "grad_norm": 8.055460929870605, + "learning_rate": 1.8068644906759973e-06, + "loss": 0.1556, + "num_input_tokens_seen": 154984896, + "step": 49245 + }, + { + "epoch": 3.152807118622367, + "grad_norm": 3.3055620193481445, + "learning_rate": 1.8063277567094617e-06, + "loss": 0.1864, + "num_input_tokens_seen": 155000768, + "step": 49250 + }, + { + "epoch": 3.153127200563344, + "grad_norm": 8.000261306762695, + "learning_rate": 1.8057910573813042e-06, + "loss": 0.2295, + "num_input_tokens_seen": 155019072, + "step": 49255 + }, + { + "epoch": 3.153447282504321, + "grad_norm": 26.18157386779785, + "learning_rate": 1.805254392718327e-06, + "loss": 0.2016, + "num_input_tokens_seen": 155035072, + "step": 49260 + }, + { + "epoch": 3.153767364445298, + "grad_norm": 9.897100448608398, + "learning_rate": 1.804717762747325e-06, + "loss": 0.1776, + "num_input_tokens_seen": 155049920, + "step": 49265 + }, + { + "epoch": 3.1540874463862747, + "grad_norm": 7.768906593322754, + "learning_rate": 1.804181167495097e-06, + "loss": 0.0771, + "num_input_tokens_seen": 155066432, + "step": 49270 + }, + { + "epoch": 3.1544075283272517, + "grad_norm": 6.856673717498779, + "learning_rate": 1.8036446069884362e-06, + "loss": 0.1546, + "num_input_tokens_seen": 155082496, + "step": 49275 + }, + { + "epoch": 3.1547276102682287, + "grad_norm": 7.984756946563721, + "learning_rate": 1.803108081254138e-06, + "loss": 0.1381, + "num_input_tokens_seen": 155099904, + "step": 49280 + }, + { + "epoch": 3.1550476922092057, + "grad_norm": 5.913626670837402, + "learning_rate": 1.802571590318991e-06, + "loss": 0.1278, + "num_input_tokens_seen": 155114880, + "step": 49285 + }, + { + "epoch": 3.155367774150182, + "grad_norm": 9.612730026245117, + "learning_rate": 1.8020351342097864e-06, + "loss": 0.2006, + "num_input_tokens_seen": 155130496, + "step": 49290 + }, + { + "epoch": 3.155687856091159, + "grad_norm": 8.686410903930664, + "learning_rate": 1.8014987129533123e-06, + "loss": 0.1725, + "num_input_tokens_seen": 155146240, + "step": 49295 + }, + { + "epoch": 3.156007938032136, + "grad_norm": 7.781499862670898, + "learning_rate": 1.8009623265763531e-06, + "loss": 0.1611, + "num_input_tokens_seen": 155161984, + "step": 49300 + }, + { + "epoch": 3.156328019973113, + "grad_norm": 34.38680648803711, + "learning_rate": 1.800425975105695e-06, + "loss": 0.1304, + "num_input_tokens_seen": 155177408, + "step": 49305 + }, + { + "epoch": 3.15664810191409, + "grad_norm": 8.150683403015137, + "learning_rate": 1.7998896585681189e-06, + "loss": 0.1918, + "num_input_tokens_seen": 155194496, + "step": 49310 + }, + { + "epoch": 3.1569681838550667, + "grad_norm": 8.369011878967285, + "learning_rate": 1.7993533769904076e-06, + "loss": 0.1523, + "num_input_tokens_seen": 155210560, + "step": 49315 + }, + { + "epoch": 3.1572882657960437, + "grad_norm": 6.515023231506348, + "learning_rate": 1.7988171303993383e-06, + "loss": 0.1641, + "num_input_tokens_seen": 155226368, + "step": 49320 + }, + { + "epoch": 3.1576083477370207, + "grad_norm": 7.85992431640625, + "learning_rate": 1.79828091882169e-06, + "loss": 0.2784, + "num_input_tokens_seen": 155241472, + "step": 49325 + }, + { + "epoch": 3.1579284296779977, + "grad_norm": 13.111955642700195, + "learning_rate": 1.7977447422842364e-06, + "loss": 0.1695, + "num_input_tokens_seen": 155256640, + "step": 49330 + }, + { + "epoch": 3.1582485116189742, + "grad_norm": 7.245466232299805, + "learning_rate": 1.797208600813753e-06, + "loss": 0.1801, + "num_input_tokens_seen": 155273024, + "step": 49335 + }, + { + "epoch": 3.1585685935599512, + "grad_norm": 6.545849323272705, + "learning_rate": 1.7966724944370108e-06, + "loss": 0.2074, + "num_input_tokens_seen": 155289088, + "step": 49340 + }, + { + "epoch": 3.1588886755009282, + "grad_norm": 8.681533813476562, + "learning_rate": 1.7961364231807822e-06, + "loss": 0.1932, + "num_input_tokens_seen": 155305728, + "step": 49345 + }, + { + "epoch": 3.159208757441905, + "grad_norm": 47.5682258605957, + "learning_rate": 1.795600387071833e-06, + "loss": 0.2204, + "num_input_tokens_seen": 155321792, + "step": 49350 + }, + { + "epoch": 3.159528839382882, + "grad_norm": 12.186660766601562, + "learning_rate": 1.795064386136931e-06, + "loss": 0.2136, + "num_input_tokens_seen": 155339264, + "step": 49355 + }, + { + "epoch": 3.1598489213238588, + "grad_norm": 4.176931858062744, + "learning_rate": 1.7945284204028428e-06, + "loss": 0.1362, + "num_input_tokens_seen": 155354304, + "step": 49360 + }, + { + "epoch": 3.1601690032648357, + "grad_norm": 5.32323694229126, + "learning_rate": 1.79399248989633e-06, + "loss": 0.275, + "num_input_tokens_seen": 155369152, + "step": 49365 + }, + { + "epoch": 3.1604890852058127, + "grad_norm": 24.13262367248535, + "learning_rate": 1.7934565946441556e-06, + "loss": 0.2137, + "num_input_tokens_seen": 155384896, + "step": 49370 + }, + { + "epoch": 3.1608091671467897, + "grad_norm": 26.612361907958984, + "learning_rate": 1.7929207346730776e-06, + "loss": 0.1578, + "num_input_tokens_seen": 155400512, + "step": 49375 + }, + { + "epoch": 3.1611292490877663, + "grad_norm": 7.855679988861084, + "learning_rate": 1.7923849100098562e-06, + "loss": 0.1719, + "num_input_tokens_seen": 155416512, + "step": 49380 + }, + { + "epoch": 3.1614493310287433, + "grad_norm": 6.541477203369141, + "learning_rate": 1.7918491206812455e-06, + "loss": 0.169, + "num_input_tokens_seen": 155431296, + "step": 49385 + }, + { + "epoch": 3.1617694129697202, + "grad_norm": 2.6965103149414062, + "learning_rate": 1.7913133667140014e-06, + "loss": 0.1524, + "num_input_tokens_seen": 155447296, + "step": 49390 + }, + { + "epoch": 3.1620894949106972, + "grad_norm": 9.244712829589844, + "learning_rate": 1.7907776481348763e-06, + "loss": 0.1339, + "num_input_tokens_seen": 155463104, + "step": 49395 + }, + { + "epoch": 3.1624095768516742, + "grad_norm": 21.024782180786133, + "learning_rate": 1.790241964970622e-06, + "loss": 0.1519, + "num_input_tokens_seen": 155478784, + "step": 49400 + }, + { + "epoch": 3.1627296587926508, + "grad_norm": 6.783580780029297, + "learning_rate": 1.7897063172479862e-06, + "loss": 0.1732, + "num_input_tokens_seen": 155495616, + "step": 49405 + }, + { + "epoch": 3.1630497407336278, + "grad_norm": 9.887248992919922, + "learning_rate": 1.7891707049937168e-06, + "loss": 0.1998, + "num_input_tokens_seen": 155512640, + "step": 49410 + }, + { + "epoch": 3.1633698226746048, + "grad_norm": 2.084796667098999, + "learning_rate": 1.7886351282345604e-06, + "loss": 0.1176, + "num_input_tokens_seen": 155528576, + "step": 49415 + }, + { + "epoch": 3.1636899046155817, + "grad_norm": 7.190647602081299, + "learning_rate": 1.7880995869972598e-06, + "loss": 0.1531, + "num_input_tokens_seen": 155544704, + "step": 49420 + }, + { + "epoch": 3.1640099865565583, + "grad_norm": 4.87130069732666, + "learning_rate": 1.7875640813085587e-06, + "loss": 0.1638, + "num_input_tokens_seen": 155560704, + "step": 49425 + }, + { + "epoch": 3.1643300684975353, + "grad_norm": 6.263766765594482, + "learning_rate": 1.7870286111951957e-06, + "loss": 0.1806, + "num_input_tokens_seen": 155575680, + "step": 49430 + }, + { + "epoch": 3.1646501504385123, + "grad_norm": 5.785648822784424, + "learning_rate": 1.786493176683911e-06, + "loss": 0.1671, + "num_input_tokens_seen": 155592448, + "step": 49435 + }, + { + "epoch": 3.1649702323794893, + "grad_norm": 5.511886119842529, + "learning_rate": 1.7859577778014393e-06, + "loss": 0.1551, + "num_input_tokens_seen": 155608512, + "step": 49440 + }, + { + "epoch": 3.1652903143204663, + "grad_norm": 9.026702880859375, + "learning_rate": 1.7854224145745175e-06, + "loss": 0.1647, + "num_input_tokens_seen": 155625088, + "step": 49445 + }, + { + "epoch": 3.165610396261443, + "grad_norm": 6.889237880706787, + "learning_rate": 1.7848870870298777e-06, + "loss": 0.1323, + "num_input_tokens_seen": 155641344, + "step": 49450 + }, + { + "epoch": 3.16593047820242, + "grad_norm": 4.60921049118042, + "learning_rate": 1.784351795194253e-06, + "loss": 0.1538, + "num_input_tokens_seen": 155657088, + "step": 49455 + }, + { + "epoch": 3.166250560143397, + "grad_norm": 7.745090484619141, + "learning_rate": 1.7838165390943706e-06, + "loss": 0.2068, + "num_input_tokens_seen": 155673280, + "step": 49460 + }, + { + "epoch": 3.1665706420843738, + "grad_norm": 7.88144588470459, + "learning_rate": 1.783281318756961e-06, + "loss": 0.1617, + "num_input_tokens_seen": 155688832, + "step": 49465 + }, + { + "epoch": 3.1668907240253503, + "grad_norm": 4.351413726806641, + "learning_rate": 1.7827461342087483e-06, + "loss": 0.1426, + "num_input_tokens_seen": 155703552, + "step": 49470 + }, + { + "epoch": 3.1672108059663273, + "grad_norm": 12.446913719177246, + "learning_rate": 1.7822109854764575e-06, + "loss": 0.18, + "num_input_tokens_seen": 155720384, + "step": 49475 + }, + { + "epoch": 3.1675308879073043, + "grad_norm": 0.7245914340019226, + "learning_rate": 1.7816758725868117e-06, + "loss": 0.1643, + "num_input_tokens_seen": 155736960, + "step": 49480 + }, + { + "epoch": 3.1678509698482813, + "grad_norm": 6.128818035125732, + "learning_rate": 1.7811407955665306e-06, + "loss": 0.1909, + "num_input_tokens_seen": 155752320, + "step": 49485 + }, + { + "epoch": 3.1681710517892583, + "grad_norm": 13.595970153808594, + "learning_rate": 1.780605754442335e-06, + "loss": 0.1776, + "num_input_tokens_seen": 155767616, + "step": 49490 + }, + { + "epoch": 3.168491133730235, + "grad_norm": 6.888071060180664, + "learning_rate": 1.7800707492409398e-06, + "loss": 0.1464, + "num_input_tokens_seen": 155785088, + "step": 49495 + }, + { + "epoch": 3.168811215671212, + "grad_norm": 10.553888320922852, + "learning_rate": 1.7795357799890617e-06, + "loss": 0.1997, + "num_input_tokens_seen": 155801600, + "step": 49500 + }, + { + "epoch": 3.169131297612189, + "grad_norm": 3.3094441890716553, + "learning_rate": 1.7790008467134136e-06, + "loss": 0.1413, + "num_input_tokens_seen": 155818496, + "step": 49505 + }, + { + "epoch": 3.169451379553166, + "grad_norm": 8.221957206726074, + "learning_rate": 1.7784659494407085e-06, + "loss": 0.1477, + "num_input_tokens_seen": 155833664, + "step": 49510 + }, + { + "epoch": 3.1697714614941424, + "grad_norm": 14.293084144592285, + "learning_rate": 1.777931088197655e-06, + "loss": 0.168, + "num_input_tokens_seen": 155850496, + "step": 49515 + }, + { + "epoch": 3.1700915434351193, + "grad_norm": 6.590211868286133, + "learning_rate": 1.7773962630109625e-06, + "loss": 0.1681, + "num_input_tokens_seen": 155865664, + "step": 49520 + }, + { + "epoch": 3.1704116253760963, + "grad_norm": 44.57112121582031, + "learning_rate": 1.776861473907336e-06, + "loss": 0.2676, + "num_input_tokens_seen": 155881088, + "step": 49525 + }, + { + "epoch": 3.1707317073170733, + "grad_norm": 10.191996574401855, + "learning_rate": 1.7763267209134805e-06, + "loss": 0.1935, + "num_input_tokens_seen": 155896640, + "step": 49530 + }, + { + "epoch": 3.17105178925805, + "grad_norm": 7.469155311584473, + "learning_rate": 1.7757920040560996e-06, + "loss": 0.1933, + "num_input_tokens_seen": 155911616, + "step": 49535 + }, + { + "epoch": 3.171371871199027, + "grad_norm": 5.924636363983154, + "learning_rate": 1.7752573233618936e-06, + "loss": 0.1791, + "num_input_tokens_seen": 155926784, + "step": 49540 + }, + { + "epoch": 3.171691953140004, + "grad_norm": 6.778741836547852, + "learning_rate": 1.7747226788575622e-06, + "loss": 0.1269, + "num_input_tokens_seen": 155943552, + "step": 49545 + }, + { + "epoch": 3.172012035080981, + "grad_norm": 22.718143463134766, + "learning_rate": 1.774188070569802e-06, + "loss": 0.1519, + "num_input_tokens_seen": 155959040, + "step": 49550 + }, + { + "epoch": 3.1723321170219574, + "grad_norm": 8.553397178649902, + "learning_rate": 1.773653498525309e-06, + "loss": 0.1188, + "num_input_tokens_seen": 155974592, + "step": 49555 + }, + { + "epoch": 3.1726521989629344, + "grad_norm": 8.442671775817871, + "learning_rate": 1.7731189627507772e-06, + "loss": 0.1247, + "num_input_tokens_seen": 155990592, + "step": 49560 + }, + { + "epoch": 3.1729722809039114, + "grad_norm": 8.999322891235352, + "learning_rate": 1.772584463272898e-06, + "loss": 0.105, + "num_input_tokens_seen": 156006144, + "step": 49565 + }, + { + "epoch": 3.1732923628448884, + "grad_norm": 5.572163105010986, + "learning_rate": 1.7720500001183617e-06, + "loss": 0.0919, + "num_input_tokens_seen": 156022016, + "step": 49570 + }, + { + "epoch": 3.1736124447858653, + "grad_norm": 8.721187591552734, + "learning_rate": 1.771515573313857e-06, + "loss": 0.1088, + "num_input_tokens_seen": 156037824, + "step": 49575 + }, + { + "epoch": 3.173932526726842, + "grad_norm": 9.763681411743164, + "learning_rate": 1.7709811828860693e-06, + "loss": 0.2396, + "num_input_tokens_seen": 156053120, + "step": 49580 + }, + { + "epoch": 3.174252608667819, + "grad_norm": 3.1107325553894043, + "learning_rate": 1.770446828861684e-06, + "loss": 0.2079, + "num_input_tokens_seen": 156068160, + "step": 49585 + }, + { + "epoch": 3.174572690608796, + "grad_norm": 6.061523914337158, + "learning_rate": 1.7699125112673843e-06, + "loss": 0.0824, + "num_input_tokens_seen": 156084480, + "step": 49590 + }, + { + "epoch": 3.174892772549773, + "grad_norm": 8.337629318237305, + "learning_rate": 1.7693782301298508e-06, + "loss": 0.1434, + "num_input_tokens_seen": 156100736, + "step": 49595 + }, + { + "epoch": 3.1752128544907494, + "grad_norm": 67.24038696289062, + "learning_rate": 1.7688439854757635e-06, + "loss": 0.2225, + "num_input_tokens_seen": 156117056, + "step": 49600 + }, + { + "epoch": 3.1755329364317264, + "grad_norm": 8.733721733093262, + "learning_rate": 1.7683097773317981e-06, + "loss": 0.1643, + "num_input_tokens_seen": 156132672, + "step": 49605 + }, + { + "epoch": 3.1758530183727034, + "grad_norm": 8.166696548461914, + "learning_rate": 1.7677756057246326e-06, + "loss": 0.1787, + "num_input_tokens_seen": 156148480, + "step": 49610 + }, + { + "epoch": 3.1761731003136804, + "grad_norm": 6.0801239013671875, + "learning_rate": 1.7672414706809377e-06, + "loss": 0.1366, + "num_input_tokens_seen": 156164160, + "step": 49615 + }, + { + "epoch": 3.1764931822546574, + "grad_norm": 8.329777717590332, + "learning_rate": 1.7667073722273877e-06, + "loss": 0.1075, + "num_input_tokens_seen": 156179008, + "step": 49620 + }, + { + "epoch": 3.176813264195634, + "grad_norm": 13.72121524810791, + "learning_rate": 1.7661733103906517e-06, + "loss": 0.2051, + "num_input_tokens_seen": 156195072, + "step": 49625 + }, + { + "epoch": 3.177133346136611, + "grad_norm": 12.648659706115723, + "learning_rate": 1.765639285197399e-06, + "loss": 0.1316, + "num_input_tokens_seen": 156210560, + "step": 49630 + }, + { + "epoch": 3.177453428077588, + "grad_norm": 6.624975204467773, + "learning_rate": 1.7651052966742943e-06, + "loss": 0.1347, + "num_input_tokens_seen": 156226240, + "step": 49635 + }, + { + "epoch": 3.177773510018565, + "grad_norm": 4.121392726898193, + "learning_rate": 1.7645713448480037e-06, + "loss": 0.122, + "num_input_tokens_seen": 156242880, + "step": 49640 + }, + { + "epoch": 3.1780935919595414, + "grad_norm": 30.621509552001953, + "learning_rate": 1.764037429745189e-06, + "loss": 0.1503, + "num_input_tokens_seen": 156258432, + "step": 49645 + }, + { + "epoch": 3.1784136739005184, + "grad_norm": 9.462681770324707, + "learning_rate": 1.7635035513925115e-06, + "loss": 0.1754, + "num_input_tokens_seen": 156273664, + "step": 49650 + }, + { + "epoch": 3.1787337558414954, + "grad_norm": 10.234134674072266, + "learning_rate": 1.762969709816631e-06, + "loss": 0.231, + "num_input_tokens_seen": 156289792, + "step": 49655 + }, + { + "epoch": 3.1790538377824724, + "grad_norm": 9.532283782958984, + "learning_rate": 1.7624359050442037e-06, + "loss": 0.2028, + "num_input_tokens_seen": 156305408, + "step": 49660 + }, + { + "epoch": 3.1793739197234494, + "grad_norm": 3.360030174255371, + "learning_rate": 1.761902137101886e-06, + "loss": 0.1313, + "num_input_tokens_seen": 156320896, + "step": 49665 + }, + { + "epoch": 3.179694001664426, + "grad_norm": 7.84675407409668, + "learning_rate": 1.7613684060163303e-06, + "loss": 0.1381, + "num_input_tokens_seen": 156336576, + "step": 49670 + }, + { + "epoch": 3.180014083605403, + "grad_norm": 11.257711410522461, + "learning_rate": 1.7608347118141894e-06, + "loss": 0.1875, + "num_input_tokens_seen": 156351872, + "step": 49675 + }, + { + "epoch": 3.18033416554638, + "grad_norm": 9.128863334655762, + "learning_rate": 1.7603010545221123e-06, + "loss": 0.1498, + "num_input_tokens_seen": 156366976, + "step": 49680 + }, + { + "epoch": 3.180654247487357, + "grad_norm": 9.968802452087402, + "learning_rate": 1.7597674341667487e-06, + "loss": 0.2003, + "num_input_tokens_seen": 156383104, + "step": 49685 + }, + { + "epoch": 3.1809743294283335, + "grad_norm": 6.751899719238281, + "learning_rate": 1.7592338507747425e-06, + "loss": 0.1034, + "num_input_tokens_seen": 156398720, + "step": 49690 + }, + { + "epoch": 3.1812944113693105, + "grad_norm": 28.9630184173584, + "learning_rate": 1.7587003043727403e-06, + "loss": 0.2642, + "num_input_tokens_seen": 156414080, + "step": 49695 + }, + { + "epoch": 3.1816144933102875, + "grad_norm": 8.408283233642578, + "learning_rate": 1.7581667949873827e-06, + "loss": 0.1386, + "num_input_tokens_seen": 156429056, + "step": 49700 + }, + { + "epoch": 3.1819345752512644, + "grad_norm": 10.019903182983398, + "learning_rate": 1.7576333226453113e-06, + "loss": 0.1463, + "num_input_tokens_seen": 156445184, + "step": 49705 + }, + { + "epoch": 3.1822546571922414, + "grad_norm": 70.52263641357422, + "learning_rate": 1.7570998873731654e-06, + "loss": 0.1941, + "num_input_tokens_seen": 156460864, + "step": 49710 + }, + { + "epoch": 3.182574739133218, + "grad_norm": 6.113272666931152, + "learning_rate": 1.756566489197581e-06, + "loss": 0.2133, + "num_input_tokens_seen": 156478464, + "step": 49715 + }, + { + "epoch": 3.182894821074195, + "grad_norm": 7.7529401779174805, + "learning_rate": 1.7560331281451947e-06, + "loss": 0.1716, + "num_input_tokens_seen": 156495296, + "step": 49720 + }, + { + "epoch": 3.183214903015172, + "grad_norm": 20.642383575439453, + "learning_rate": 1.755499804242638e-06, + "loss": 0.1428, + "num_input_tokens_seen": 156510464, + "step": 49725 + }, + { + "epoch": 3.183534984956149, + "grad_norm": 4.499841690063477, + "learning_rate": 1.7549665175165435e-06, + "loss": 0.1476, + "num_input_tokens_seen": 156525632, + "step": 49730 + }, + { + "epoch": 3.1838550668971255, + "grad_norm": 4.543319225311279, + "learning_rate": 1.7544332679935397e-06, + "loss": 0.1398, + "num_input_tokens_seen": 156541568, + "step": 49735 + }, + { + "epoch": 3.1841751488381025, + "grad_norm": 7.971896171569824, + "learning_rate": 1.753900055700256e-06, + "loss": 0.1856, + "num_input_tokens_seen": 156556288, + "step": 49740 + }, + { + "epoch": 3.1844952307790795, + "grad_norm": 13.134099006652832, + "learning_rate": 1.7533668806633163e-06, + "loss": 0.1636, + "num_input_tokens_seen": 156571328, + "step": 49745 + }, + { + "epoch": 3.1848153127200565, + "grad_norm": 8.975635528564453, + "learning_rate": 1.7528337429093468e-06, + "loss": 0.2179, + "num_input_tokens_seen": 156586432, + "step": 49750 + }, + { + "epoch": 3.1851353946610335, + "grad_norm": 8.046643257141113, + "learning_rate": 1.7523006424649668e-06, + "loss": 0.1316, + "num_input_tokens_seen": 156602560, + "step": 49755 + }, + { + "epoch": 3.18545547660201, + "grad_norm": 6.3856520652771, + "learning_rate": 1.7517675793567984e-06, + "loss": 0.148, + "num_input_tokens_seen": 156617984, + "step": 49760 + }, + { + "epoch": 3.185775558542987, + "grad_norm": 35.161624908447266, + "learning_rate": 1.7512345536114605e-06, + "loss": 0.1879, + "num_input_tokens_seen": 156634304, + "step": 49765 + }, + { + "epoch": 3.186095640483964, + "grad_norm": 2.9713351726531982, + "learning_rate": 1.750701565255568e-06, + "loss": 0.1933, + "num_input_tokens_seen": 156650304, + "step": 49770 + }, + { + "epoch": 3.186415722424941, + "grad_norm": 5.447307109832764, + "learning_rate": 1.750168614315737e-06, + "loss": 0.1263, + "num_input_tokens_seen": 156666112, + "step": 49775 + }, + { + "epoch": 3.1867358043659175, + "grad_norm": 5.368768692016602, + "learning_rate": 1.7496357008185793e-06, + "loss": 0.1138, + "num_input_tokens_seen": 156681728, + "step": 49780 + }, + { + "epoch": 3.1870558863068945, + "grad_norm": 7.701848030090332, + "learning_rate": 1.7491028247907063e-06, + "loss": 0.1, + "num_input_tokens_seen": 156696320, + "step": 49785 + }, + { + "epoch": 3.1873759682478715, + "grad_norm": 6.808759689331055, + "learning_rate": 1.7485699862587267e-06, + "loss": 0.1686, + "num_input_tokens_seen": 156713088, + "step": 49790 + }, + { + "epoch": 3.1876960501888485, + "grad_norm": 10.6778564453125, + "learning_rate": 1.748037185249249e-06, + "loss": 0.1719, + "num_input_tokens_seen": 156728192, + "step": 49795 + }, + { + "epoch": 3.188016132129825, + "grad_norm": 4.567739963531494, + "learning_rate": 1.747504421788876e-06, + "loss": 0.1797, + "num_input_tokens_seen": 156743360, + "step": 49800 + }, + { + "epoch": 3.188336214070802, + "grad_norm": 9.379837989807129, + "learning_rate": 1.7469716959042137e-06, + "loss": 0.2801, + "num_input_tokens_seen": 156760960, + "step": 49805 + }, + { + "epoch": 3.188656296011779, + "grad_norm": 9.220263481140137, + "learning_rate": 1.7464390076218618e-06, + "loss": 0.1715, + "num_input_tokens_seen": 156775872, + "step": 49810 + }, + { + "epoch": 3.188976377952756, + "grad_norm": 7.552517414093018, + "learning_rate": 1.7459063569684209e-06, + "loss": 0.1884, + "num_input_tokens_seen": 156791040, + "step": 49815 + }, + { + "epoch": 3.1892964598937326, + "grad_norm": 12.821847915649414, + "learning_rate": 1.7453737439704882e-06, + "loss": 0.1933, + "num_input_tokens_seen": 156807360, + "step": 49820 + }, + { + "epoch": 3.1896165418347096, + "grad_norm": 49.20598602294922, + "learning_rate": 1.7448411686546602e-06, + "loss": 0.163, + "num_input_tokens_seen": 156822656, + "step": 49825 + }, + { + "epoch": 3.1899366237756865, + "grad_norm": 6.029746055603027, + "learning_rate": 1.7443086310475315e-06, + "loss": 0.2076, + "num_input_tokens_seen": 156837824, + "step": 49830 + }, + { + "epoch": 3.1902567057166635, + "grad_norm": 8.99903678894043, + "learning_rate": 1.7437761311756927e-06, + "loss": 0.1561, + "num_input_tokens_seen": 156853952, + "step": 49835 + }, + { + "epoch": 3.1905767876576405, + "grad_norm": 5.7051849365234375, + "learning_rate": 1.7432436690657357e-06, + "loss": 0.2274, + "num_input_tokens_seen": 156870144, + "step": 49840 + }, + { + "epoch": 3.190896869598617, + "grad_norm": 3.2334420680999756, + "learning_rate": 1.7427112447442474e-06, + "loss": 0.1089, + "num_input_tokens_seen": 156885952, + "step": 49845 + }, + { + "epoch": 3.191216951539594, + "grad_norm": 6.056878566741943, + "learning_rate": 1.7421788582378153e-06, + "loss": 0.1579, + "num_input_tokens_seen": 156901824, + "step": 49850 + }, + { + "epoch": 3.191537033480571, + "grad_norm": 18.882850646972656, + "learning_rate": 1.741646509573023e-06, + "loss": 0.2451, + "num_input_tokens_seen": 156917440, + "step": 49855 + }, + { + "epoch": 3.191857115421548, + "grad_norm": 8.942211151123047, + "learning_rate": 1.7411141987764552e-06, + "loss": 0.152, + "num_input_tokens_seen": 156933376, + "step": 49860 + }, + { + "epoch": 3.1921771973625246, + "grad_norm": 19.978769302368164, + "learning_rate": 1.7405819258746897e-06, + "loss": 0.2357, + "num_input_tokens_seen": 156948736, + "step": 49865 + }, + { + "epoch": 3.1924972793035016, + "grad_norm": 4.777136325836182, + "learning_rate": 1.7400496908943085e-06, + "loss": 0.141, + "num_input_tokens_seen": 156965120, + "step": 49870 + }, + { + "epoch": 3.1928173612444786, + "grad_norm": 4.513073921203613, + "learning_rate": 1.7395174938618864e-06, + "loss": 0.1475, + "num_input_tokens_seen": 156980608, + "step": 49875 + }, + { + "epoch": 3.1931374431854556, + "grad_norm": 9.411108016967773, + "learning_rate": 1.7389853348039996e-06, + "loss": 0.1343, + "num_input_tokens_seen": 156997056, + "step": 49880 + }, + { + "epoch": 3.1934575251264326, + "grad_norm": 7.342861175537109, + "learning_rate": 1.7384532137472221e-06, + "loss": 0.1241, + "num_input_tokens_seen": 157012800, + "step": 49885 + }, + { + "epoch": 3.193777607067409, + "grad_norm": 11.34343433380127, + "learning_rate": 1.7379211307181235e-06, + "loss": 0.1486, + "num_input_tokens_seen": 157030144, + "step": 49890 + }, + { + "epoch": 3.194097689008386, + "grad_norm": 7.0641889572143555, + "learning_rate": 1.7373890857432752e-06, + "loss": 0.2988, + "num_input_tokens_seen": 157045312, + "step": 49895 + }, + { + "epoch": 3.194417770949363, + "grad_norm": 3.5215368270874023, + "learning_rate": 1.7368570788492422e-06, + "loss": 0.1294, + "num_input_tokens_seen": 157062400, + "step": 49900 + }, + { + "epoch": 3.19473785289034, + "grad_norm": 6.640144348144531, + "learning_rate": 1.7363251100625922e-06, + "loss": 0.1612, + "num_input_tokens_seen": 157079488, + "step": 49905 + }, + { + "epoch": 3.1950579348313166, + "grad_norm": 6.543126106262207, + "learning_rate": 1.735793179409888e-06, + "loss": 0.1299, + "num_input_tokens_seen": 157096320, + "step": 49910 + }, + { + "epoch": 3.1953780167722936, + "grad_norm": 5.539280414581299, + "learning_rate": 1.7352612869176932e-06, + "loss": 0.1561, + "num_input_tokens_seen": 157111680, + "step": 49915 + }, + { + "epoch": 3.1956980987132706, + "grad_norm": 11.659318923950195, + "learning_rate": 1.7347294326125646e-06, + "loss": 0.1775, + "num_input_tokens_seen": 157127552, + "step": 49920 + }, + { + "epoch": 3.1960181806542476, + "grad_norm": 4.547883987426758, + "learning_rate": 1.7341976165210638e-06, + "loss": 0.1089, + "num_input_tokens_seen": 157143488, + "step": 49925 + }, + { + "epoch": 3.1963382625952246, + "grad_norm": 6.928500652313232, + "learning_rate": 1.7336658386697436e-06, + "loss": 0.197, + "num_input_tokens_seen": 157158720, + "step": 49930 + }, + { + "epoch": 3.196658344536201, + "grad_norm": 8.076120376586914, + "learning_rate": 1.7331340990851597e-06, + "loss": 0.1638, + "num_input_tokens_seen": 157176192, + "step": 49935 + }, + { + "epoch": 3.196978426477178, + "grad_norm": 4.991436004638672, + "learning_rate": 1.732602397793865e-06, + "loss": 0.147, + "num_input_tokens_seen": 157192192, + "step": 49940 + }, + { + "epoch": 3.197298508418155, + "grad_norm": 4.259150981903076, + "learning_rate": 1.732070734822409e-06, + "loss": 0.1966, + "num_input_tokens_seen": 157208384, + "step": 49945 + }, + { + "epoch": 3.197618590359132, + "grad_norm": 8.0092191696167, + "learning_rate": 1.7315391101973416e-06, + "loss": 0.1784, + "num_input_tokens_seen": 157224832, + "step": 49950 + }, + { + "epoch": 3.1979386723001086, + "grad_norm": 3.9045958518981934, + "learning_rate": 1.7310075239452067e-06, + "loss": 0.1672, + "num_input_tokens_seen": 157240064, + "step": 49955 + }, + { + "epoch": 3.1982587542410856, + "grad_norm": 10.533625602722168, + "learning_rate": 1.7304759760925516e-06, + "loss": 0.2047, + "num_input_tokens_seen": 157256000, + "step": 49960 + }, + { + "epoch": 3.1985788361820626, + "grad_norm": 3.603011131286621, + "learning_rate": 1.729944466665917e-06, + "loss": 0.1714, + "num_input_tokens_seen": 157272704, + "step": 49965 + }, + { + "epoch": 3.1988989181230396, + "grad_norm": 8.333820343017578, + "learning_rate": 1.7294129956918459e-06, + "loss": 0.1405, + "num_input_tokens_seen": 157289664, + "step": 49970 + }, + { + "epoch": 3.1992190000640166, + "grad_norm": 4.9975738525390625, + "learning_rate": 1.7288815631968752e-06, + "loss": 0.1312, + "num_input_tokens_seen": 157306688, + "step": 49975 + }, + { + "epoch": 3.199539082004993, + "grad_norm": 5.083139419555664, + "learning_rate": 1.7283501692075432e-06, + "loss": 0.1973, + "num_input_tokens_seen": 157322688, + "step": 49980 + }, + { + "epoch": 3.19985916394597, + "grad_norm": 5.393588066101074, + "learning_rate": 1.7278188137503837e-06, + "loss": 0.1386, + "num_input_tokens_seen": 157337664, + "step": 49985 + }, + { + "epoch": 3.200179245886947, + "grad_norm": 5.899371147155762, + "learning_rate": 1.727287496851931e-06, + "loss": 0.1684, + "num_input_tokens_seen": 157353472, + "step": 49990 + }, + { + "epoch": 3.200499327827924, + "grad_norm": 3.058800458908081, + "learning_rate": 1.7267562185387148e-06, + "loss": 0.1185, + "num_input_tokens_seen": 157368768, + "step": 49995 + }, + { + "epoch": 3.2008194097689007, + "grad_norm": 6.078229904174805, + "learning_rate": 1.7262249788372659e-06, + "loss": 0.2063, + "num_input_tokens_seen": 157385728, + "step": 50000 + }, + { + "epoch": 3.2011394917098777, + "grad_norm": 11.759400367736816, + "learning_rate": 1.7256937777741117e-06, + "loss": 0.2027, + "num_input_tokens_seen": 157401152, + "step": 50005 + }, + { + "epoch": 3.2014595736508547, + "grad_norm": 4.519024848937988, + "learning_rate": 1.7251626153757765e-06, + "loss": 0.1551, + "num_input_tokens_seen": 157416576, + "step": 50010 + }, + { + "epoch": 3.2017796555918316, + "grad_norm": 4.660715579986572, + "learning_rate": 1.7246314916687846e-06, + "loss": 0.2308, + "num_input_tokens_seen": 157432320, + "step": 50015 + }, + { + "epoch": 3.2020997375328086, + "grad_norm": 18.3087158203125, + "learning_rate": 1.7241004066796566e-06, + "loss": 0.1514, + "num_input_tokens_seen": 157449152, + "step": 50020 + }, + { + "epoch": 3.202419819473785, + "grad_norm": 7.913225173950195, + "learning_rate": 1.723569360434914e-06, + "loss": 0.1343, + "num_input_tokens_seen": 157465344, + "step": 50025 + }, + { + "epoch": 3.202739901414762, + "grad_norm": 8.738430976867676, + "learning_rate": 1.723038352961072e-06, + "loss": 0.1736, + "num_input_tokens_seen": 157480256, + "step": 50030 + }, + { + "epoch": 3.203059983355739, + "grad_norm": 8.861289024353027, + "learning_rate": 1.7225073842846487e-06, + "loss": 0.244, + "num_input_tokens_seen": 157496320, + "step": 50035 + }, + { + "epoch": 3.203380065296716, + "grad_norm": 7.349343299865723, + "learning_rate": 1.721976454432156e-06, + "loss": 0.1672, + "num_input_tokens_seen": 157512128, + "step": 50040 + }, + { + "epoch": 3.2037001472376927, + "grad_norm": 5.972769737243652, + "learning_rate": 1.721445563430107e-06, + "loss": 0.1483, + "num_input_tokens_seen": 157526656, + "step": 50045 + }, + { + "epoch": 3.2040202291786697, + "grad_norm": 6.851008415222168, + "learning_rate": 1.7209147113050106e-06, + "loss": 0.0944, + "num_input_tokens_seen": 157542144, + "step": 50050 + }, + { + "epoch": 3.2043403111196467, + "grad_norm": 7.09483003616333, + "learning_rate": 1.7203838980833753e-06, + "loss": 0.1734, + "num_input_tokens_seen": 157557888, + "step": 50055 + }, + { + "epoch": 3.2046603930606237, + "grad_norm": 5.341103553771973, + "learning_rate": 1.7198531237917087e-06, + "loss": 0.1445, + "num_input_tokens_seen": 157573760, + "step": 50060 + }, + { + "epoch": 3.2049804750016, + "grad_norm": 6.375762939453125, + "learning_rate": 1.7193223884565121e-06, + "loss": 0.132, + "num_input_tokens_seen": 157588928, + "step": 50065 + }, + { + "epoch": 3.205300556942577, + "grad_norm": 8.061624526977539, + "learning_rate": 1.7187916921042902e-06, + "loss": 0.1412, + "num_input_tokens_seen": 157603776, + "step": 50070 + }, + { + "epoch": 3.205620638883554, + "grad_norm": 8.680792808532715, + "learning_rate": 1.7182610347615409e-06, + "loss": 0.1607, + "num_input_tokens_seen": 157620160, + "step": 50075 + }, + { + "epoch": 3.205940720824531, + "grad_norm": 10.540274620056152, + "learning_rate": 1.7177304164547643e-06, + "loss": 0.1085, + "num_input_tokens_seen": 157635136, + "step": 50080 + }, + { + "epoch": 3.206260802765508, + "grad_norm": 44.925106048583984, + "learning_rate": 1.7171998372104553e-06, + "loss": 0.1608, + "num_input_tokens_seen": 157649728, + "step": 50085 + }, + { + "epoch": 3.2065808847064847, + "grad_norm": 4.215550422668457, + "learning_rate": 1.71666929705511e-06, + "loss": 0.0994, + "num_input_tokens_seen": 157665152, + "step": 50090 + }, + { + "epoch": 3.2069009666474617, + "grad_norm": 7.763234615325928, + "learning_rate": 1.7161387960152187e-06, + "loss": 0.1194, + "num_input_tokens_seen": 157680064, + "step": 50095 + }, + { + "epoch": 3.2072210485884387, + "grad_norm": 3.240880250930786, + "learning_rate": 1.7156083341172735e-06, + "loss": 0.1971, + "num_input_tokens_seen": 157695552, + "step": 50100 + }, + { + "epoch": 3.2075411305294157, + "grad_norm": 6.809750556945801, + "learning_rate": 1.7150779113877619e-06, + "loss": 0.1669, + "num_input_tokens_seen": 157710784, + "step": 50105 + }, + { + "epoch": 3.2078612124703922, + "grad_norm": 15.253809928894043, + "learning_rate": 1.714547527853171e-06, + "loss": 0.1842, + "num_input_tokens_seen": 157725312, + "step": 50110 + }, + { + "epoch": 3.2081812944113692, + "grad_norm": 2.730792999267578, + "learning_rate": 1.7140171835399862e-06, + "loss": 0.1329, + "num_input_tokens_seen": 157741184, + "step": 50115 + }, + { + "epoch": 3.2085013763523462, + "grad_norm": 7.756038188934326, + "learning_rate": 1.7134868784746883e-06, + "loss": 0.1845, + "num_input_tokens_seen": 157758144, + "step": 50120 + }, + { + "epoch": 3.208821458293323, + "grad_norm": 9.851516723632812, + "learning_rate": 1.7129566126837598e-06, + "loss": 0.15, + "num_input_tokens_seen": 157773120, + "step": 50125 + }, + { + "epoch": 3.2091415402342998, + "grad_norm": 5.20206880569458, + "learning_rate": 1.7124263861936774e-06, + "loss": 0.0986, + "num_input_tokens_seen": 157788736, + "step": 50130 + }, + { + "epoch": 3.2094616221752768, + "grad_norm": 7.333652496337891, + "learning_rate": 1.7118961990309196e-06, + "loss": 0.1814, + "num_input_tokens_seen": 157804480, + "step": 50135 + }, + { + "epoch": 3.2097817041162537, + "grad_norm": 5.6765875816345215, + "learning_rate": 1.71136605122196e-06, + "loss": 0.1131, + "num_input_tokens_seen": 157820992, + "step": 50140 + }, + { + "epoch": 3.2101017860572307, + "grad_norm": 4.175249099731445, + "learning_rate": 1.710835942793273e-06, + "loss": 0.0996, + "num_input_tokens_seen": 157835520, + "step": 50145 + }, + { + "epoch": 3.2104218679982077, + "grad_norm": 6.1282782554626465, + "learning_rate": 1.7103058737713275e-06, + "loss": 0.1827, + "num_input_tokens_seen": 157850560, + "step": 50150 + }, + { + "epoch": 3.2107419499391843, + "grad_norm": 4.848918914794922, + "learning_rate": 1.7097758441825934e-06, + "loss": 0.1209, + "num_input_tokens_seen": 157865728, + "step": 50155 + }, + { + "epoch": 3.2110620318801613, + "grad_norm": 4.931347846984863, + "learning_rate": 1.7092458540535378e-06, + "loss": 0.0946, + "num_input_tokens_seen": 157881920, + "step": 50160 + }, + { + "epoch": 3.2113821138211383, + "grad_norm": 7.107218265533447, + "learning_rate": 1.7087159034106255e-06, + "loss": 0.1415, + "num_input_tokens_seen": 157897792, + "step": 50165 + }, + { + "epoch": 3.2117021957621152, + "grad_norm": 8.261052131652832, + "learning_rate": 1.7081859922803184e-06, + "loss": 0.1709, + "num_input_tokens_seen": 157913024, + "step": 50170 + }, + { + "epoch": 3.212022277703092, + "grad_norm": 5.401148319244385, + "learning_rate": 1.7076561206890787e-06, + "loss": 0.146, + "num_input_tokens_seen": 157929728, + "step": 50175 + }, + { + "epoch": 3.212342359644069, + "grad_norm": 0.07494408637285233, + "learning_rate": 1.7071262886633663e-06, + "loss": 0.1187, + "num_input_tokens_seen": 157945280, + "step": 50180 + }, + { + "epoch": 3.2126624415850458, + "grad_norm": 18.863054275512695, + "learning_rate": 1.706596496229636e-06, + "loss": 0.1315, + "num_input_tokens_seen": 157961920, + "step": 50185 + }, + { + "epoch": 3.2129825235260228, + "grad_norm": 10.016732215881348, + "learning_rate": 1.7060667434143446e-06, + "loss": 0.1425, + "num_input_tokens_seen": 157977792, + "step": 50190 + }, + { + "epoch": 3.2133026054669998, + "grad_norm": 5.496235370635986, + "learning_rate": 1.7055370302439439e-06, + "loss": 0.117, + "num_input_tokens_seen": 157993664, + "step": 50195 + }, + { + "epoch": 3.2136226874079763, + "grad_norm": 6.502987861633301, + "learning_rate": 1.7050073567448867e-06, + "loss": 0.1704, + "num_input_tokens_seen": 158008704, + "step": 50200 + }, + { + "epoch": 3.2139427693489533, + "grad_norm": 4.134666919708252, + "learning_rate": 1.7044777229436201e-06, + "loss": 0.1336, + "num_input_tokens_seen": 158024640, + "step": 50205 + }, + { + "epoch": 3.2142628512899303, + "grad_norm": 24.276233673095703, + "learning_rate": 1.7039481288665935e-06, + "loss": 0.18, + "num_input_tokens_seen": 158039680, + "step": 50210 + }, + { + "epoch": 3.2145829332309073, + "grad_norm": 4.024935245513916, + "learning_rate": 1.7034185745402498e-06, + "loss": 0.1753, + "num_input_tokens_seen": 158055296, + "step": 50215 + }, + { + "epoch": 3.214903015171884, + "grad_norm": 8.680486679077148, + "learning_rate": 1.7028890599910337e-06, + "loss": 0.1483, + "num_input_tokens_seen": 158070528, + "step": 50220 + }, + { + "epoch": 3.215223097112861, + "grad_norm": 7.929074764251709, + "learning_rate": 1.7023595852453855e-06, + "loss": 0.1415, + "num_input_tokens_seen": 158086016, + "step": 50225 + }, + { + "epoch": 3.215543179053838, + "grad_norm": 7.486655235290527, + "learning_rate": 1.7018301503297447e-06, + "loss": 0.176, + "num_input_tokens_seen": 158101568, + "step": 50230 + }, + { + "epoch": 3.215863260994815, + "grad_norm": 8.025415420532227, + "learning_rate": 1.7013007552705495e-06, + "loss": 0.1828, + "num_input_tokens_seen": 158116352, + "step": 50235 + }, + { + "epoch": 3.216183342935792, + "grad_norm": 7.013148307800293, + "learning_rate": 1.7007714000942338e-06, + "loss": 0.1761, + "num_input_tokens_seen": 158131392, + "step": 50240 + }, + { + "epoch": 3.2165034248767683, + "grad_norm": 12.154388427734375, + "learning_rate": 1.7002420848272317e-06, + "loss": 0.1337, + "num_input_tokens_seen": 158147008, + "step": 50245 + }, + { + "epoch": 3.2168235068177453, + "grad_norm": 7.014977931976318, + "learning_rate": 1.6997128094959736e-06, + "loss": 0.1049, + "num_input_tokens_seen": 158161536, + "step": 50250 + }, + { + "epoch": 3.2171435887587223, + "grad_norm": 7.86269998550415, + "learning_rate": 1.6991835741268903e-06, + "loss": 0.2973, + "num_input_tokens_seen": 158177600, + "step": 50255 + }, + { + "epoch": 3.2174636706996993, + "grad_norm": 12.109865188598633, + "learning_rate": 1.6986543787464065e-06, + "loss": 0.2662, + "num_input_tokens_seen": 158192384, + "step": 50260 + }, + { + "epoch": 3.217783752640676, + "grad_norm": 32.997982025146484, + "learning_rate": 1.6981252233809505e-06, + "loss": 0.2164, + "num_input_tokens_seen": 158207424, + "step": 50265 + }, + { + "epoch": 3.218103834581653, + "grad_norm": 9.469545364379883, + "learning_rate": 1.697596108056943e-06, + "loss": 0.3059, + "num_input_tokens_seen": 158223680, + "step": 50270 + }, + { + "epoch": 3.21842391652263, + "grad_norm": 16.414350509643555, + "learning_rate": 1.6970670328008066e-06, + "loss": 0.1693, + "num_input_tokens_seen": 158240448, + "step": 50275 + }, + { + "epoch": 3.218743998463607, + "grad_norm": 5.759683609008789, + "learning_rate": 1.69653799763896e-06, + "loss": 0.1316, + "num_input_tokens_seen": 158255552, + "step": 50280 + }, + { + "epoch": 3.219064080404584, + "grad_norm": 5.7089996337890625, + "learning_rate": 1.6960090025978204e-06, + "loss": 0.1596, + "num_input_tokens_seen": 158272704, + "step": 50285 + }, + { + "epoch": 3.2193841623455604, + "grad_norm": 4.506603717803955, + "learning_rate": 1.6954800477038046e-06, + "loss": 0.1489, + "num_input_tokens_seen": 158288064, + "step": 50290 + }, + { + "epoch": 3.2197042442865373, + "grad_norm": 3.901923656463623, + "learning_rate": 1.6949511329833236e-06, + "loss": 0.1086, + "num_input_tokens_seen": 158304704, + "step": 50295 + }, + { + "epoch": 3.2200243262275143, + "grad_norm": 79.28550720214844, + "learning_rate": 1.6944222584627907e-06, + "loss": 0.1302, + "num_input_tokens_seen": 158320384, + "step": 50300 + }, + { + "epoch": 3.2203444081684913, + "grad_norm": 7.471673011779785, + "learning_rate": 1.6938934241686128e-06, + "loss": 0.15, + "num_input_tokens_seen": 158335168, + "step": 50305 + }, + { + "epoch": 3.220664490109468, + "grad_norm": 6.76357364654541, + "learning_rate": 1.6933646301271993e-06, + "loss": 0.1386, + "num_input_tokens_seen": 158350784, + "step": 50310 + }, + { + "epoch": 3.220984572050445, + "grad_norm": 30.80588722229004, + "learning_rate": 1.692835876364954e-06, + "loss": 0.1085, + "num_input_tokens_seen": 158365504, + "step": 50315 + }, + { + "epoch": 3.221304653991422, + "grad_norm": 5.804274082183838, + "learning_rate": 1.6923071629082815e-06, + "loss": 0.1634, + "num_input_tokens_seen": 158380416, + "step": 50320 + }, + { + "epoch": 3.221624735932399, + "grad_norm": 4.919623851776123, + "learning_rate": 1.6917784897835815e-06, + "loss": 0.1808, + "num_input_tokens_seen": 158397184, + "step": 50325 + }, + { + "epoch": 3.2219448178733754, + "grad_norm": 29.315393447875977, + "learning_rate": 1.6912498570172542e-06, + "loss": 0.2651, + "num_input_tokens_seen": 158411904, + "step": 50330 + }, + { + "epoch": 3.2222648998143524, + "grad_norm": 7.251054286956787, + "learning_rate": 1.6907212646356957e-06, + "loss": 0.2043, + "num_input_tokens_seen": 158427264, + "step": 50335 + }, + { + "epoch": 3.2225849817553294, + "grad_norm": 7.655329704284668, + "learning_rate": 1.6901927126653028e-06, + "loss": 0.1144, + "num_input_tokens_seen": 158442240, + "step": 50340 + }, + { + "epoch": 3.2229050636963064, + "grad_norm": 6.608954906463623, + "learning_rate": 1.689664201132467e-06, + "loss": 0.2137, + "num_input_tokens_seen": 158457920, + "step": 50345 + }, + { + "epoch": 3.2232251456372834, + "grad_norm": 5.528016090393066, + "learning_rate": 1.6891357300635803e-06, + "loss": 0.0984, + "num_input_tokens_seen": 158473664, + "step": 50350 + }, + { + "epoch": 3.22354522757826, + "grad_norm": 13.120588302612305, + "learning_rate": 1.6886072994850322e-06, + "loss": 0.2136, + "num_input_tokens_seen": 158491200, + "step": 50355 + }, + { + "epoch": 3.223865309519237, + "grad_norm": 7.37809944152832, + "learning_rate": 1.688078909423208e-06, + "loss": 0.1379, + "num_input_tokens_seen": 158506880, + "step": 50360 + }, + { + "epoch": 3.224185391460214, + "grad_norm": 8.804204940795898, + "learning_rate": 1.687550559904495e-06, + "loss": 0.1701, + "num_input_tokens_seen": 158522112, + "step": 50365 + }, + { + "epoch": 3.224505473401191, + "grad_norm": 4.085415363311768, + "learning_rate": 1.6870222509552742e-06, + "loss": 0.2155, + "num_input_tokens_seen": 158538176, + "step": 50370 + }, + { + "epoch": 3.2248255553421674, + "grad_norm": 9.345420837402344, + "learning_rate": 1.6864939826019288e-06, + "loss": 0.1279, + "num_input_tokens_seen": 158553728, + "step": 50375 + }, + { + "epoch": 3.2251456372831444, + "grad_norm": 7.49587869644165, + "learning_rate": 1.6859657548708353e-06, + "loss": 0.1478, + "num_input_tokens_seen": 158568768, + "step": 50380 + }, + { + "epoch": 3.2254657192241214, + "grad_norm": 4.653459072113037, + "learning_rate": 1.6854375677883727e-06, + "loss": 0.1325, + "num_input_tokens_seen": 158585024, + "step": 50385 + }, + { + "epoch": 3.2257858011650984, + "grad_norm": 7.100080490112305, + "learning_rate": 1.6849094213809142e-06, + "loss": 0.087, + "num_input_tokens_seen": 158601024, + "step": 50390 + }, + { + "epoch": 3.226105883106075, + "grad_norm": 19.201475143432617, + "learning_rate": 1.6843813156748345e-06, + "loss": 0.2116, + "num_input_tokens_seen": 158616512, + "step": 50395 + }, + { + "epoch": 3.226425965047052, + "grad_norm": 8.550983428955078, + "learning_rate": 1.6838532506965028e-06, + "loss": 0.2442, + "num_input_tokens_seen": 158632512, + "step": 50400 + }, + { + "epoch": 3.226746046988029, + "grad_norm": 9.168266296386719, + "learning_rate": 1.6833252264722885e-06, + "loss": 0.1336, + "num_input_tokens_seen": 158648192, + "step": 50405 + }, + { + "epoch": 3.227066128929006, + "grad_norm": 6.830845355987549, + "learning_rate": 1.6827972430285595e-06, + "loss": 0.1862, + "num_input_tokens_seen": 158663744, + "step": 50410 + }, + { + "epoch": 3.227386210869983, + "grad_norm": 6.72151517868042, + "learning_rate": 1.6822693003916784e-06, + "loss": 0.1386, + "num_input_tokens_seen": 158679808, + "step": 50415 + }, + { + "epoch": 3.2277062928109594, + "grad_norm": 12.175323486328125, + "learning_rate": 1.6817413985880098e-06, + "loss": 0.1487, + "num_input_tokens_seen": 158695552, + "step": 50420 + }, + { + "epoch": 3.2280263747519364, + "grad_norm": 3.4181559085845947, + "learning_rate": 1.6812135376439125e-06, + "loss": 0.1717, + "num_input_tokens_seen": 158711040, + "step": 50425 + }, + { + "epoch": 3.2283464566929134, + "grad_norm": 9.166351318359375, + "learning_rate": 1.680685717585748e-06, + "loss": 0.2013, + "num_input_tokens_seen": 158725888, + "step": 50430 + }, + { + "epoch": 3.2286665386338904, + "grad_norm": 11.179890632629395, + "learning_rate": 1.6801579384398698e-06, + "loss": 0.1652, + "num_input_tokens_seen": 158741760, + "step": 50435 + }, + { + "epoch": 3.228986620574867, + "grad_norm": 9.733363151550293, + "learning_rate": 1.6796302002326347e-06, + "loss": 0.1905, + "num_input_tokens_seen": 158756352, + "step": 50440 + }, + { + "epoch": 3.229306702515844, + "grad_norm": 4.1514410972595215, + "learning_rate": 1.6791025029903932e-06, + "loss": 0.1151, + "num_input_tokens_seen": 158773120, + "step": 50445 + }, + { + "epoch": 3.229626784456821, + "grad_norm": 7.271023273468018, + "learning_rate": 1.6785748467394974e-06, + "loss": 0.1392, + "num_input_tokens_seen": 158789312, + "step": 50450 + }, + { + "epoch": 3.229946866397798, + "grad_norm": 5.503317832946777, + "learning_rate": 1.6780472315062947e-06, + "loss": 0.1836, + "num_input_tokens_seen": 158804544, + "step": 50455 + }, + { + "epoch": 3.230266948338775, + "grad_norm": 14.047463417053223, + "learning_rate": 1.6775196573171315e-06, + "loss": 0.2771, + "num_input_tokens_seen": 158819904, + "step": 50460 + }, + { + "epoch": 3.2305870302797515, + "grad_norm": 4.355475902557373, + "learning_rate": 1.676992124198354e-06, + "loss": 0.1154, + "num_input_tokens_seen": 158834112, + "step": 50465 + }, + { + "epoch": 3.2309071122207285, + "grad_norm": 70.97622680664062, + "learning_rate": 1.6764646321763014e-06, + "loss": 0.1504, + "num_input_tokens_seen": 158850112, + "step": 50470 + }, + { + "epoch": 3.2312271941617055, + "grad_norm": 27.300739288330078, + "learning_rate": 1.6759371812773163e-06, + "loss": 0.1946, + "num_input_tokens_seen": 158866176, + "step": 50475 + }, + { + "epoch": 3.2315472761026824, + "grad_norm": 24.682632446289062, + "learning_rate": 1.675409771527735e-06, + "loss": 0.1821, + "num_input_tokens_seen": 158880768, + "step": 50480 + }, + { + "epoch": 3.231867358043659, + "grad_norm": 3.9564075469970703, + "learning_rate": 1.6748824029538963e-06, + "loss": 0.242, + "num_input_tokens_seen": 158895872, + "step": 50485 + }, + { + "epoch": 3.232187439984636, + "grad_norm": 4.199217319488525, + "learning_rate": 1.6743550755821308e-06, + "loss": 0.1342, + "num_input_tokens_seen": 158911872, + "step": 50490 + }, + { + "epoch": 3.232507521925613, + "grad_norm": 7.682406425476074, + "learning_rate": 1.673827789438774e-06, + "loss": 0.1507, + "num_input_tokens_seen": 158928768, + "step": 50495 + }, + { + "epoch": 3.23282760386659, + "grad_norm": 8.630600929260254, + "learning_rate": 1.6733005445501521e-06, + "loss": 0.1835, + "num_input_tokens_seen": 158947392, + "step": 50500 + }, + { + "epoch": 3.233147685807567, + "grad_norm": 7.353184223175049, + "learning_rate": 1.6727733409425962e-06, + "loss": 0.1034, + "num_input_tokens_seen": 158963520, + "step": 50505 + }, + { + "epoch": 3.2334677677485435, + "grad_norm": 3.4102721214294434, + "learning_rate": 1.6722461786424299e-06, + "loss": 0.172, + "num_input_tokens_seen": 158979392, + "step": 50510 + }, + { + "epoch": 3.2337878496895205, + "grad_norm": 3.2200675010681152, + "learning_rate": 1.6717190576759787e-06, + "loss": 0.2307, + "num_input_tokens_seen": 158993728, + "step": 50515 + }, + { + "epoch": 3.2341079316304975, + "grad_norm": 7.407596111297607, + "learning_rate": 1.6711919780695623e-06, + "loss": 0.1392, + "num_input_tokens_seen": 159008768, + "step": 50520 + }, + { + "epoch": 3.2344280135714745, + "grad_norm": 80.88648986816406, + "learning_rate": 1.6706649398495018e-06, + "loss": 0.1858, + "num_input_tokens_seen": 159025216, + "step": 50525 + }, + { + "epoch": 3.234748095512451, + "grad_norm": 11.51675033569336, + "learning_rate": 1.6701379430421153e-06, + "loss": 0.1169, + "num_input_tokens_seen": 159040064, + "step": 50530 + }, + { + "epoch": 3.235068177453428, + "grad_norm": 16.507680892944336, + "learning_rate": 1.6696109876737163e-06, + "loss": 0.1845, + "num_input_tokens_seen": 159055616, + "step": 50535 + }, + { + "epoch": 3.235388259394405, + "grad_norm": 3.938551187515259, + "learning_rate": 1.6690840737706195e-06, + "loss": 0.176, + "num_input_tokens_seen": 159071296, + "step": 50540 + }, + { + "epoch": 3.235708341335382, + "grad_norm": 42.259552001953125, + "learning_rate": 1.6685572013591355e-06, + "loss": 0.2208, + "num_input_tokens_seen": 159086016, + "step": 50545 + }, + { + "epoch": 3.236028423276359, + "grad_norm": 7.4962053298950195, + "learning_rate": 1.6680303704655753e-06, + "loss": 0.144, + "num_input_tokens_seen": 159100864, + "step": 50550 + }, + { + "epoch": 3.2363485052173355, + "grad_norm": 7.248525619506836, + "learning_rate": 1.6675035811162437e-06, + "loss": 0.1831, + "num_input_tokens_seen": 159117056, + "step": 50555 + }, + { + "epoch": 3.2366685871583125, + "grad_norm": 7.513858318328857, + "learning_rate": 1.6669768333374476e-06, + "loss": 0.1634, + "num_input_tokens_seen": 159133440, + "step": 50560 + }, + { + "epoch": 3.2369886690992895, + "grad_norm": 49.73922348022461, + "learning_rate": 1.6664501271554889e-06, + "loss": 0.1818, + "num_input_tokens_seen": 159149568, + "step": 50565 + }, + { + "epoch": 3.2373087510402665, + "grad_norm": 4.977810859680176, + "learning_rate": 1.6659234625966697e-06, + "loss": 0.1208, + "num_input_tokens_seen": 159164608, + "step": 50570 + }, + { + "epoch": 3.237628832981243, + "grad_norm": 5.141548156738281, + "learning_rate": 1.6653968396872877e-06, + "loss": 0.0906, + "num_input_tokens_seen": 159179136, + "step": 50575 + }, + { + "epoch": 3.23794891492222, + "grad_norm": 7.466357707977295, + "learning_rate": 1.6648702584536398e-06, + "loss": 0.2349, + "num_input_tokens_seen": 159195456, + "step": 50580 + }, + { + "epoch": 3.238268996863197, + "grad_norm": 6.614376068115234, + "learning_rate": 1.6643437189220223e-06, + "loss": 0.1733, + "num_input_tokens_seen": 159211840, + "step": 50585 + }, + { + "epoch": 3.238589078804174, + "grad_norm": 6.789371967315674, + "learning_rate": 1.6638172211187258e-06, + "loss": 0.1659, + "num_input_tokens_seen": 159229888, + "step": 50590 + }, + { + "epoch": 3.2389091607451506, + "grad_norm": 7.176531791687012, + "learning_rate": 1.6632907650700419e-06, + "loss": 0.1958, + "num_input_tokens_seen": 159245376, + "step": 50595 + }, + { + "epoch": 3.2392292426861276, + "grad_norm": 6.915567398071289, + "learning_rate": 1.662764350802259e-06, + "loss": 0.1657, + "num_input_tokens_seen": 159260608, + "step": 50600 + }, + { + "epoch": 3.2395493246271045, + "grad_norm": 7.518538475036621, + "learning_rate": 1.6622379783416641e-06, + "loss": 0.1726, + "num_input_tokens_seen": 159276096, + "step": 50605 + }, + { + "epoch": 3.2398694065680815, + "grad_norm": 6.770845413208008, + "learning_rate": 1.6617116477145397e-06, + "loss": 0.15, + "num_input_tokens_seen": 159292992, + "step": 50610 + }, + { + "epoch": 3.2401894885090585, + "grad_norm": 100.22765350341797, + "learning_rate": 1.66118535894717e-06, + "loss": 0.1745, + "num_input_tokens_seen": 159307776, + "step": 50615 + }, + { + "epoch": 3.240509570450035, + "grad_norm": 7.256779670715332, + "learning_rate": 1.6606591120658333e-06, + "loss": 0.1346, + "num_input_tokens_seen": 159324160, + "step": 50620 + }, + { + "epoch": 3.240829652391012, + "grad_norm": 6.366015911102295, + "learning_rate": 1.66013290709681e-06, + "loss": 0.1276, + "num_input_tokens_seen": 159339392, + "step": 50625 + }, + { + "epoch": 3.241149734331989, + "grad_norm": 8.726564407348633, + "learning_rate": 1.6596067440663731e-06, + "loss": 0.1764, + "num_input_tokens_seen": 159356096, + "step": 50630 + }, + { + "epoch": 3.241469816272966, + "grad_norm": 6.336706638336182, + "learning_rate": 1.6590806230007986e-06, + "loss": 0.1711, + "num_input_tokens_seen": 159371008, + "step": 50635 + }, + { + "epoch": 3.2417898982139426, + "grad_norm": 21.165353775024414, + "learning_rate": 1.6585545439263585e-06, + "loss": 0.1747, + "num_input_tokens_seen": 159385792, + "step": 50640 + }, + { + "epoch": 3.2421099801549196, + "grad_norm": 8.124398231506348, + "learning_rate": 1.6580285068693202e-06, + "loss": 0.1891, + "num_input_tokens_seen": 159401600, + "step": 50645 + }, + { + "epoch": 3.2424300620958966, + "grad_norm": 7.710707187652588, + "learning_rate": 1.6575025118559532e-06, + "loss": 0.181, + "num_input_tokens_seen": 159417344, + "step": 50650 + }, + { + "epoch": 3.2427501440368736, + "grad_norm": 7.323596000671387, + "learning_rate": 1.656976558912522e-06, + "loss": 0.1942, + "num_input_tokens_seen": 159433792, + "step": 50655 + }, + { + "epoch": 3.24307022597785, + "grad_norm": 18.449819564819336, + "learning_rate": 1.6564506480652908e-06, + "loss": 0.1781, + "num_input_tokens_seen": 159449728, + "step": 50660 + }, + { + "epoch": 3.243390307918827, + "grad_norm": 5.332301139831543, + "learning_rate": 1.6559247793405198e-06, + "loss": 0.1456, + "num_input_tokens_seen": 159464704, + "step": 50665 + }, + { + "epoch": 3.243710389859804, + "grad_norm": 4.815529823303223, + "learning_rate": 1.6553989527644698e-06, + "loss": 0.113, + "num_input_tokens_seen": 159479936, + "step": 50670 + }, + { + "epoch": 3.244030471800781, + "grad_norm": 9.625861167907715, + "learning_rate": 1.6548731683633957e-06, + "loss": 0.1559, + "num_input_tokens_seen": 159495360, + "step": 50675 + }, + { + "epoch": 3.244350553741758, + "grad_norm": 7.5791144371032715, + "learning_rate": 1.6543474261635535e-06, + "loss": 0.1396, + "num_input_tokens_seen": 159509952, + "step": 50680 + }, + { + "epoch": 3.2446706356827346, + "grad_norm": 9.949881553649902, + "learning_rate": 1.6538217261911959e-06, + "loss": 0.1429, + "num_input_tokens_seen": 159526528, + "step": 50685 + }, + { + "epoch": 3.2449907176237116, + "grad_norm": 4.025523662567139, + "learning_rate": 1.6532960684725745e-06, + "loss": 0.1186, + "num_input_tokens_seen": 159541760, + "step": 50690 + }, + { + "epoch": 3.2453107995646886, + "grad_norm": 4.571617126464844, + "learning_rate": 1.6527704530339361e-06, + "loss": 0.1645, + "num_input_tokens_seen": 159558016, + "step": 50695 + }, + { + "epoch": 3.2456308815056656, + "grad_norm": 8.251895904541016, + "learning_rate": 1.6522448799015284e-06, + "loss": 0.1832, + "num_input_tokens_seen": 159576640, + "step": 50700 + }, + { + "epoch": 3.245950963446642, + "grad_norm": 3.4999189376831055, + "learning_rate": 1.6517193491015963e-06, + "loss": 0.1161, + "num_input_tokens_seen": 159592256, + "step": 50705 + }, + { + "epoch": 3.246271045387619, + "grad_norm": 2.7857489585876465, + "learning_rate": 1.6511938606603805e-06, + "loss": 0.0625, + "num_input_tokens_seen": 159607616, + "step": 50710 + }, + { + "epoch": 3.246591127328596, + "grad_norm": 4.985165119171143, + "learning_rate": 1.6506684146041233e-06, + "loss": 0.1533, + "num_input_tokens_seen": 159623744, + "step": 50715 + }, + { + "epoch": 3.246911209269573, + "grad_norm": 12.047113418579102, + "learning_rate": 1.6501430109590603e-06, + "loss": 0.1751, + "num_input_tokens_seen": 159639104, + "step": 50720 + }, + { + "epoch": 3.24723129121055, + "grad_norm": 6.111703395843506, + "learning_rate": 1.6496176497514294e-06, + "loss": 0.1446, + "num_input_tokens_seen": 159655168, + "step": 50725 + }, + { + "epoch": 3.2475513731515266, + "grad_norm": 7.109614372253418, + "learning_rate": 1.649092331007463e-06, + "loss": 0.1741, + "num_input_tokens_seen": 159671872, + "step": 50730 + }, + { + "epoch": 3.2478714550925036, + "grad_norm": 5.625573635101318, + "learning_rate": 1.6485670547533936e-06, + "loss": 0.1633, + "num_input_tokens_seen": 159686272, + "step": 50735 + }, + { + "epoch": 3.2481915370334806, + "grad_norm": 11.44894790649414, + "learning_rate": 1.6480418210154502e-06, + "loss": 0.2598, + "num_input_tokens_seen": 159705344, + "step": 50740 + }, + { + "epoch": 3.2485116189744576, + "grad_norm": 13.013175010681152, + "learning_rate": 1.6475166298198614e-06, + "loss": 0.189, + "num_input_tokens_seen": 159720576, + "step": 50745 + }, + { + "epoch": 3.248831700915434, + "grad_norm": 5.714058876037598, + "learning_rate": 1.6469914811928505e-06, + "loss": 0.0661, + "num_input_tokens_seen": 159736896, + "step": 50750 + }, + { + "epoch": 3.249151782856411, + "grad_norm": 4.709922790527344, + "learning_rate": 1.6464663751606425e-06, + "loss": 0.2317, + "num_input_tokens_seen": 159752320, + "step": 50755 + }, + { + "epoch": 3.249471864797388, + "grad_norm": 5.8126606941223145, + "learning_rate": 1.6459413117494582e-06, + "loss": 0.1442, + "num_input_tokens_seen": 159767872, + "step": 50760 + }, + { + "epoch": 3.249791946738365, + "grad_norm": 8.493279457092285, + "learning_rate": 1.6454162909855156e-06, + "loss": 0.1711, + "num_input_tokens_seen": 159782784, + "step": 50765 + }, + { + "epoch": 3.250112028679342, + "grad_norm": 8.73250675201416, + "learning_rate": 1.6448913128950321e-06, + "loss": 0.1899, + "num_input_tokens_seen": 159798016, + "step": 50770 + }, + { + "epoch": 3.2504321106203187, + "grad_norm": 4.852880954742432, + "learning_rate": 1.6443663775042224e-06, + "loss": 0.1005, + "num_input_tokens_seen": 159814976, + "step": 50775 + }, + { + "epoch": 3.250624159784905, + "eval_loss": 0.5568700432777405, + "eval_runtime": 51.2329, + "eval_samples_per_second": 271.037, + "eval_steps_per_second": 33.884, + "num_input_tokens_seen": 159826368, + "step": 50778 + }, + { + "epoch": 3.2507521925612957, + "grad_norm": 8.376089096069336, + "learning_rate": 1.6438414848392997e-06, + "loss": 0.2279, + "num_input_tokens_seen": 159832960, + "step": 50780 + }, + { + "epoch": 3.2510722745022727, + "grad_norm": 10.989237785339355, + "learning_rate": 1.6433166349264728e-06, + "loss": 0.1268, + "num_input_tokens_seen": 159848704, + "step": 50785 + }, + { + "epoch": 3.2513923564432496, + "grad_norm": 15.345236778259277, + "learning_rate": 1.642791827791951e-06, + "loss": 0.1757, + "num_input_tokens_seen": 159864832, + "step": 50790 + }, + { + "epoch": 3.251712438384226, + "grad_norm": 7.41381311416626, + "learning_rate": 1.6422670634619404e-06, + "loss": 0.1713, + "num_input_tokens_seen": 159880256, + "step": 50795 + }, + { + "epoch": 3.252032520325203, + "grad_norm": 9.944352149963379, + "learning_rate": 1.6417423419626452e-06, + "loss": 0.263, + "num_input_tokens_seen": 159896640, + "step": 50800 + }, + { + "epoch": 3.25235260226618, + "grad_norm": 18.889863967895508, + "learning_rate": 1.6412176633202664e-06, + "loss": 0.1778, + "num_input_tokens_seen": 159911616, + "step": 50805 + }, + { + "epoch": 3.252672684207157, + "grad_norm": 8.544744491577148, + "learning_rate": 1.640693027561004e-06, + "loss": 0.1722, + "num_input_tokens_seen": 159927680, + "step": 50810 + }, + { + "epoch": 3.252992766148134, + "grad_norm": 12.795489311218262, + "learning_rate": 1.6401684347110569e-06, + "loss": 0.1964, + "num_input_tokens_seen": 159943424, + "step": 50815 + }, + { + "epoch": 3.2533128480891107, + "grad_norm": 6.264442443847656, + "learning_rate": 1.6396438847966185e-06, + "loss": 0.2241, + "num_input_tokens_seen": 159961152, + "step": 50820 + }, + { + "epoch": 3.2536329300300877, + "grad_norm": 8.334115982055664, + "learning_rate": 1.6391193778438835e-06, + "loss": 0.1458, + "num_input_tokens_seen": 159976768, + "step": 50825 + }, + { + "epoch": 3.2539530119710647, + "grad_norm": 6.628175258636475, + "learning_rate": 1.6385949138790425e-06, + "loss": 0.1221, + "num_input_tokens_seen": 159992896, + "step": 50830 + }, + { + "epoch": 3.2542730939120417, + "grad_norm": 10.917234420776367, + "learning_rate": 1.6380704929282852e-06, + "loss": 0.198, + "num_input_tokens_seen": 160009792, + "step": 50835 + }, + { + "epoch": 3.254593175853018, + "grad_norm": 6.84970235824585, + "learning_rate": 1.6375461150177969e-06, + "loss": 0.165, + "num_input_tokens_seen": 160025856, + "step": 50840 + }, + { + "epoch": 3.254913257793995, + "grad_norm": 4.490631103515625, + "learning_rate": 1.6370217801737637e-06, + "loss": 0.2053, + "num_input_tokens_seen": 160040768, + "step": 50845 + }, + { + "epoch": 3.255233339734972, + "grad_norm": 5.272819519042969, + "learning_rate": 1.6364974884223672e-06, + "loss": 0.176, + "num_input_tokens_seen": 160056000, + "step": 50850 + }, + { + "epoch": 3.255553421675949, + "grad_norm": 8.668850898742676, + "learning_rate": 1.6359732397897895e-06, + "loss": 0.1947, + "num_input_tokens_seen": 160073600, + "step": 50855 + }, + { + "epoch": 3.255873503616926, + "grad_norm": 4.215150356292725, + "learning_rate": 1.6354490343022068e-06, + "loss": 0.1147, + "num_input_tokens_seen": 160088320, + "step": 50860 + }, + { + "epoch": 3.2561935855579027, + "grad_norm": 6.0204644203186035, + "learning_rate": 1.6349248719857966e-06, + "loss": 0.1613, + "num_input_tokens_seen": 160103296, + "step": 50865 + }, + { + "epoch": 3.2565136674988797, + "grad_norm": 9.217720985412598, + "learning_rate": 1.6344007528667317e-06, + "loss": 0.2662, + "num_input_tokens_seen": 160118336, + "step": 50870 + }, + { + "epoch": 3.2568337494398567, + "grad_norm": 6.383248805999756, + "learning_rate": 1.6338766769711839e-06, + "loss": 0.1258, + "num_input_tokens_seen": 160134080, + "step": 50875 + }, + { + "epoch": 3.2571538313808333, + "grad_norm": 5.129843235015869, + "learning_rate": 1.6333526443253245e-06, + "loss": 0.1309, + "num_input_tokens_seen": 160148992, + "step": 50880 + }, + { + "epoch": 3.2574739133218102, + "grad_norm": 4.34611177444458, + "learning_rate": 1.6328286549553192e-06, + "loss": 0.1755, + "num_input_tokens_seen": 160164544, + "step": 50885 + }, + { + "epoch": 3.2577939952627872, + "grad_norm": 48.572776794433594, + "learning_rate": 1.6323047088873345e-06, + "loss": 0.143, + "num_input_tokens_seen": 160179328, + "step": 50890 + }, + { + "epoch": 3.2581140772037642, + "grad_norm": 3.620229959487915, + "learning_rate": 1.6317808061475324e-06, + "loss": 0.1076, + "num_input_tokens_seen": 160194176, + "step": 50895 + }, + { + "epoch": 3.258434159144741, + "grad_norm": 10.993093490600586, + "learning_rate": 1.6312569467620754e-06, + "loss": 0.3658, + "num_input_tokens_seen": 160209472, + "step": 50900 + }, + { + "epoch": 3.2587542410857178, + "grad_norm": 2.816941738128662, + "learning_rate": 1.63073313075712e-06, + "loss": 0.1431, + "num_input_tokens_seen": 160225920, + "step": 50905 + }, + { + "epoch": 3.2590743230266948, + "grad_norm": 12.143885612487793, + "learning_rate": 1.6302093581588252e-06, + "loss": 0.1891, + "num_input_tokens_seen": 160241088, + "step": 50910 + }, + { + "epoch": 3.2593944049676717, + "grad_norm": 4.888676643371582, + "learning_rate": 1.629685628993344e-06, + "loss": 0.1269, + "num_input_tokens_seen": 160257984, + "step": 50915 + }, + { + "epoch": 3.2597144869086487, + "grad_norm": 8.27382755279541, + "learning_rate": 1.62916194328683e-06, + "loss": 0.1527, + "num_input_tokens_seen": 160273792, + "step": 50920 + }, + { + "epoch": 3.2600345688496253, + "grad_norm": 9.403180122375488, + "learning_rate": 1.6286383010654313e-06, + "loss": 0.1758, + "num_input_tokens_seen": 160290176, + "step": 50925 + }, + { + "epoch": 3.2603546507906023, + "grad_norm": 12.002925872802734, + "learning_rate": 1.6281147023552974e-06, + "loss": 0.2347, + "num_input_tokens_seen": 160308096, + "step": 50930 + }, + { + "epoch": 3.2606747327315793, + "grad_norm": 6.308529376983643, + "learning_rate": 1.627591147182574e-06, + "loss": 0.2139, + "num_input_tokens_seen": 160325504, + "step": 50935 + }, + { + "epoch": 3.2609948146725563, + "grad_norm": 5.981540679931641, + "learning_rate": 1.6270676355734043e-06, + "loss": 0.1226, + "num_input_tokens_seen": 160342208, + "step": 50940 + }, + { + "epoch": 3.2613148966135332, + "grad_norm": 19.349592208862305, + "learning_rate": 1.6265441675539306e-06, + "loss": 0.1524, + "num_input_tokens_seen": 160357184, + "step": 50945 + }, + { + "epoch": 3.26163497855451, + "grad_norm": 4.571577072143555, + "learning_rate": 1.626020743150291e-06, + "loss": 0.1378, + "num_input_tokens_seen": 160372352, + "step": 50950 + }, + { + "epoch": 3.261955060495487, + "grad_norm": 9.599946975708008, + "learning_rate": 1.625497362388624e-06, + "loss": 0.1331, + "num_input_tokens_seen": 160392448, + "step": 50955 + }, + { + "epoch": 3.2622751424364638, + "grad_norm": 5.2609124183654785, + "learning_rate": 1.6249740252950618e-06, + "loss": 0.143, + "num_input_tokens_seen": 160407616, + "step": 50960 + }, + { + "epoch": 3.2625952243774408, + "grad_norm": 5.8866376876831055, + "learning_rate": 1.6244507318957403e-06, + "loss": 0.1814, + "num_input_tokens_seen": 160423552, + "step": 50965 + }, + { + "epoch": 3.2629153063184173, + "grad_norm": 53.50702667236328, + "learning_rate": 1.6239274822167877e-06, + "loss": 0.1984, + "num_input_tokens_seen": 160440064, + "step": 50970 + }, + { + "epoch": 3.2632353882593943, + "grad_norm": 6.185179233551025, + "learning_rate": 1.623404276284335e-06, + "loss": 0.1385, + "num_input_tokens_seen": 160455360, + "step": 50975 + }, + { + "epoch": 3.2635554702003713, + "grad_norm": 4.0700531005859375, + "learning_rate": 1.6228811141245052e-06, + "loss": 0.1309, + "num_input_tokens_seen": 160471232, + "step": 50980 + }, + { + "epoch": 3.2638755521413483, + "grad_norm": 10.547647476196289, + "learning_rate": 1.622357995763424e-06, + "loss": 0.1685, + "num_input_tokens_seen": 160486976, + "step": 50985 + }, + { + "epoch": 3.2641956340823253, + "grad_norm": 8.046807289123535, + "learning_rate": 1.6218349212272139e-06, + "loss": 0.2235, + "num_input_tokens_seen": 160504256, + "step": 50990 + }, + { + "epoch": 3.264515716023302, + "grad_norm": 4.775209903717041, + "learning_rate": 1.6213118905419924e-06, + "loss": 0.1593, + "num_input_tokens_seen": 160521664, + "step": 50995 + }, + { + "epoch": 3.264835797964279, + "grad_norm": 4.244060039520264, + "learning_rate": 1.6207889037338792e-06, + "loss": 0.1769, + "num_input_tokens_seen": 160537024, + "step": 51000 + }, + { + "epoch": 3.265155879905256, + "grad_norm": 11.671082496643066, + "learning_rate": 1.6202659608289875e-06, + "loss": 0.1815, + "num_input_tokens_seen": 160552192, + "step": 51005 + }, + { + "epoch": 3.265475961846233, + "grad_norm": 4.334646701812744, + "learning_rate": 1.6197430618534327e-06, + "loss": 0.1378, + "num_input_tokens_seen": 160567616, + "step": 51010 + }, + { + "epoch": 3.2657960437872093, + "grad_norm": 7.317715167999268, + "learning_rate": 1.619220206833323e-06, + "loss": 0.1762, + "num_input_tokens_seen": 160584128, + "step": 51015 + }, + { + "epoch": 3.2661161257281863, + "grad_norm": 4.698061466217041, + "learning_rate": 1.6186973957947694e-06, + "loss": 0.1345, + "num_input_tokens_seen": 160600256, + "step": 51020 + }, + { + "epoch": 3.2664362076691633, + "grad_norm": 6.118804454803467, + "learning_rate": 1.618174628763876e-06, + "loss": 0.1763, + "num_input_tokens_seen": 160616000, + "step": 51025 + }, + { + "epoch": 3.2667562896101403, + "grad_norm": 9.342235565185547, + "learning_rate": 1.61765190576675e-06, + "loss": 0.1702, + "num_input_tokens_seen": 160633024, + "step": 51030 + }, + { + "epoch": 3.2670763715511173, + "grad_norm": 12.206131935119629, + "learning_rate": 1.6171292268294908e-06, + "loss": 0.2605, + "num_input_tokens_seen": 160648128, + "step": 51035 + }, + { + "epoch": 3.267396453492094, + "grad_norm": 10.355862617492676, + "learning_rate": 1.6166065919782004e-06, + "loss": 0.1063, + "num_input_tokens_seen": 160663360, + "step": 51040 + }, + { + "epoch": 3.267716535433071, + "grad_norm": 6.798655986785889, + "learning_rate": 1.616084001238974e-06, + "loss": 0.1636, + "num_input_tokens_seen": 160678848, + "step": 51045 + }, + { + "epoch": 3.268036617374048, + "grad_norm": 9.49485969543457, + "learning_rate": 1.6155614546379083e-06, + "loss": 0.2143, + "num_input_tokens_seen": 160694400, + "step": 51050 + }, + { + "epoch": 3.268356699315025, + "grad_norm": 6.040946006774902, + "learning_rate": 1.6150389522010975e-06, + "loss": 0.1625, + "num_input_tokens_seen": 160710912, + "step": 51055 + }, + { + "epoch": 3.2686767812560014, + "grad_norm": 7.118452072143555, + "learning_rate": 1.6145164939546315e-06, + "loss": 0.1617, + "num_input_tokens_seen": 160726016, + "step": 51060 + }, + { + "epoch": 3.2689968631969784, + "grad_norm": 13.180788040161133, + "learning_rate": 1.6139940799246001e-06, + "loss": 0.176, + "num_input_tokens_seen": 160741824, + "step": 51065 + }, + { + "epoch": 3.2693169451379553, + "grad_norm": 4.113795280456543, + "learning_rate": 1.6134717101370886e-06, + "loss": 0.1164, + "num_input_tokens_seen": 160757120, + "step": 51070 + }, + { + "epoch": 3.2696370270789323, + "grad_norm": 6.428089141845703, + "learning_rate": 1.6129493846181827e-06, + "loss": 0.1337, + "num_input_tokens_seen": 160772800, + "step": 51075 + }, + { + "epoch": 3.2699571090199093, + "grad_norm": 5.828808784484863, + "learning_rate": 1.612427103393963e-06, + "loss": 0.1944, + "num_input_tokens_seen": 160787584, + "step": 51080 + }, + { + "epoch": 3.270277190960886, + "grad_norm": 4.364309787750244, + "learning_rate": 1.6119048664905122e-06, + "loss": 0.1293, + "num_input_tokens_seen": 160804224, + "step": 51085 + }, + { + "epoch": 3.270597272901863, + "grad_norm": 5.266845703125, + "learning_rate": 1.611382673933905e-06, + "loss": 0.1281, + "num_input_tokens_seen": 160820160, + "step": 51090 + }, + { + "epoch": 3.27091735484284, + "grad_norm": 7.699068546295166, + "learning_rate": 1.6108605257502196e-06, + "loss": 0.1661, + "num_input_tokens_seen": 160835008, + "step": 51095 + }, + { + "epoch": 3.271237436783817, + "grad_norm": 15.779589653015137, + "learning_rate": 1.6103384219655271e-06, + "loss": 0.0908, + "num_input_tokens_seen": 160850944, + "step": 51100 + }, + { + "epoch": 3.2715575187247934, + "grad_norm": 7.480963706970215, + "learning_rate": 1.6098163626059e-06, + "loss": 0.11, + "num_input_tokens_seen": 160867328, + "step": 51105 + }, + { + "epoch": 3.2718776006657704, + "grad_norm": 22.19379234313965, + "learning_rate": 1.609294347697407e-06, + "loss": 0.1708, + "num_input_tokens_seen": 160881984, + "step": 51110 + }, + { + "epoch": 3.2721976826067474, + "grad_norm": 7.17547607421875, + "learning_rate": 1.608772377266115e-06, + "loss": 0.2074, + "num_input_tokens_seen": 160896768, + "step": 51115 + }, + { + "epoch": 3.2725177645477244, + "grad_norm": 12.681262969970703, + "learning_rate": 1.6082504513380886e-06, + "loss": 0.1457, + "num_input_tokens_seen": 160912960, + "step": 51120 + }, + { + "epoch": 3.2728378464887014, + "grad_norm": 13.415139198303223, + "learning_rate": 1.607728569939389e-06, + "loss": 0.1962, + "num_input_tokens_seen": 160930944, + "step": 51125 + }, + { + "epoch": 3.273157928429678, + "grad_norm": 10.885397911071777, + "learning_rate": 1.6072067330960777e-06, + "loss": 0.154, + "num_input_tokens_seen": 160945920, + "step": 51130 + }, + { + "epoch": 3.273478010370655, + "grad_norm": 10.294135093688965, + "learning_rate": 1.6066849408342112e-06, + "loss": 0.1215, + "num_input_tokens_seen": 160960576, + "step": 51135 + }, + { + "epoch": 3.273798092311632, + "grad_norm": 10.020764350891113, + "learning_rate": 1.6061631931798454e-06, + "loss": 0.1596, + "num_input_tokens_seen": 160976576, + "step": 51140 + }, + { + "epoch": 3.2741181742526084, + "grad_norm": 12.36303424835205, + "learning_rate": 1.605641490159034e-06, + "loss": 0.2135, + "num_input_tokens_seen": 160992768, + "step": 51145 + }, + { + "epoch": 3.2744382561935854, + "grad_norm": 8.709256172180176, + "learning_rate": 1.605119831797829e-06, + "loss": 0.1276, + "num_input_tokens_seen": 161008128, + "step": 51150 + }, + { + "epoch": 3.2747583381345624, + "grad_norm": 11.521294593811035, + "learning_rate": 1.6045982181222772e-06, + "loss": 0.1963, + "num_input_tokens_seen": 161024512, + "step": 51155 + }, + { + "epoch": 3.2750784200755394, + "grad_norm": 5.067239761352539, + "learning_rate": 1.6040766491584264e-06, + "loss": 0.1377, + "num_input_tokens_seen": 161039232, + "step": 51160 + }, + { + "epoch": 3.2753985020165164, + "grad_norm": 5.801013469696045, + "learning_rate": 1.6035551249323216e-06, + "loss": 0.1584, + "num_input_tokens_seen": 161054528, + "step": 51165 + }, + { + "epoch": 3.275718583957493, + "grad_norm": 5.472098350524902, + "learning_rate": 1.603033645470004e-06, + "loss": 0.1557, + "num_input_tokens_seen": 161069312, + "step": 51170 + }, + { + "epoch": 3.27603866589847, + "grad_norm": 9.958457946777344, + "learning_rate": 1.602512210797515e-06, + "loss": 0.1792, + "num_input_tokens_seen": 161084352, + "step": 51175 + }, + { + "epoch": 3.276358747839447, + "grad_norm": 25.952972412109375, + "learning_rate": 1.6019908209408902e-06, + "loss": 0.1318, + "num_input_tokens_seen": 161100672, + "step": 51180 + }, + { + "epoch": 3.276678829780424, + "grad_norm": 9.743878364562988, + "learning_rate": 1.6014694759261674e-06, + "loss": 0.1644, + "num_input_tokens_seen": 161116224, + "step": 51185 + }, + { + "epoch": 3.2769989117214005, + "grad_norm": 8.815506935119629, + "learning_rate": 1.6009481757793776e-06, + "loss": 0.1413, + "num_input_tokens_seen": 161132608, + "step": 51190 + }, + { + "epoch": 3.2773189936623774, + "grad_norm": 6.400786876678467, + "learning_rate": 1.6004269205265532e-06, + "loss": 0.1782, + "num_input_tokens_seen": 161149312, + "step": 51195 + }, + { + "epoch": 3.2776390756033544, + "grad_norm": 51.01892852783203, + "learning_rate": 1.5999057101937227e-06, + "loss": 0.1237, + "num_input_tokens_seen": 161165504, + "step": 51200 + }, + { + "epoch": 3.2779591575443314, + "grad_norm": 7.468023777008057, + "learning_rate": 1.5993845448069134e-06, + "loss": 0.1619, + "num_input_tokens_seen": 161180800, + "step": 51205 + }, + { + "epoch": 3.2782792394853084, + "grad_norm": 4.308183193206787, + "learning_rate": 1.5988634243921474e-06, + "loss": 0.1661, + "num_input_tokens_seen": 161196864, + "step": 51210 + }, + { + "epoch": 3.278599321426285, + "grad_norm": 6.91493034362793, + "learning_rate": 1.598342348975449e-06, + "loss": 0.1789, + "num_input_tokens_seen": 161211776, + "step": 51215 + }, + { + "epoch": 3.278919403367262, + "grad_norm": 10.787957191467285, + "learning_rate": 1.5978213185828368e-06, + "loss": 0.1678, + "num_input_tokens_seen": 161227008, + "step": 51220 + }, + { + "epoch": 3.279239485308239, + "grad_norm": 7.913827896118164, + "learning_rate": 1.5973003332403288e-06, + "loss": 0.1313, + "num_input_tokens_seen": 161243200, + "step": 51225 + }, + { + "epoch": 3.279559567249216, + "grad_norm": 6.867233753204346, + "learning_rate": 1.5967793929739406e-06, + "loss": 0.159, + "num_input_tokens_seen": 161257856, + "step": 51230 + }, + { + "epoch": 3.2798796491901925, + "grad_norm": 6.979186534881592, + "learning_rate": 1.596258497809684e-06, + "loss": 0.1918, + "num_input_tokens_seen": 161273536, + "step": 51235 + }, + { + "epoch": 3.2801997311311695, + "grad_norm": 3.613539934158325, + "learning_rate": 1.5957376477735714e-06, + "loss": 0.1254, + "num_input_tokens_seen": 161288192, + "step": 51240 + }, + { + "epoch": 3.2805198130721465, + "grad_norm": 25.235689163208008, + "learning_rate": 1.5952168428916098e-06, + "loss": 0.2384, + "num_input_tokens_seen": 161304832, + "step": 51245 + }, + { + "epoch": 3.2808398950131235, + "grad_norm": 6.078299045562744, + "learning_rate": 1.5946960831898068e-06, + "loss": 0.1288, + "num_input_tokens_seen": 161321472, + "step": 51250 + }, + { + "epoch": 3.2811599769541004, + "grad_norm": 8.404891014099121, + "learning_rate": 1.5941753686941652e-06, + "loss": 0.1756, + "num_input_tokens_seen": 161337152, + "step": 51255 + }, + { + "epoch": 3.281480058895077, + "grad_norm": 18.833829879760742, + "learning_rate": 1.5936546994306887e-06, + "loss": 0.167, + "num_input_tokens_seen": 161353472, + "step": 51260 + }, + { + "epoch": 3.281800140836054, + "grad_norm": 20.31379508972168, + "learning_rate": 1.5931340754253743e-06, + "loss": 0.2041, + "num_input_tokens_seen": 161368576, + "step": 51265 + }, + { + "epoch": 3.282120222777031, + "grad_norm": 3.512507200241089, + "learning_rate": 1.5926134967042218e-06, + "loss": 0.2391, + "num_input_tokens_seen": 161385024, + "step": 51270 + }, + { + "epoch": 3.282440304718008, + "grad_norm": 4.214332580566406, + "learning_rate": 1.5920929632932233e-06, + "loss": 0.2544, + "num_input_tokens_seen": 161401664, + "step": 51275 + }, + { + "epoch": 3.2827603866589845, + "grad_norm": 8.945878028869629, + "learning_rate": 1.5915724752183733e-06, + "loss": 0.2, + "num_input_tokens_seen": 161416000, + "step": 51280 + }, + { + "epoch": 3.2830804685999615, + "grad_norm": 44.066280364990234, + "learning_rate": 1.5910520325056627e-06, + "loss": 0.1225, + "num_input_tokens_seen": 161431424, + "step": 51285 + }, + { + "epoch": 3.2834005505409385, + "grad_norm": 4.688138484954834, + "learning_rate": 1.5905316351810784e-06, + "loss": 0.142, + "num_input_tokens_seen": 161446272, + "step": 51290 + }, + { + "epoch": 3.2837206324819155, + "grad_norm": 12.221532821655273, + "learning_rate": 1.5900112832706084e-06, + "loss": 0.1649, + "num_input_tokens_seen": 161461696, + "step": 51295 + }, + { + "epoch": 3.2840407144228925, + "grad_norm": 6.962289810180664, + "learning_rate": 1.5894909768002334e-06, + "loss": 0.1297, + "num_input_tokens_seen": 161476736, + "step": 51300 + }, + { + "epoch": 3.284360796363869, + "grad_norm": 74.34107971191406, + "learning_rate": 1.5889707157959373e-06, + "loss": 0.1668, + "num_input_tokens_seen": 161490944, + "step": 51305 + }, + { + "epoch": 3.284680878304846, + "grad_norm": 6.371334075927734, + "learning_rate": 1.5884505002836975e-06, + "loss": 0.0876, + "num_input_tokens_seen": 161505792, + "step": 51310 + }, + { + "epoch": 3.285000960245823, + "grad_norm": 16.569595336914062, + "learning_rate": 1.5879303302894928e-06, + "loss": 0.202, + "num_input_tokens_seen": 161522624, + "step": 51315 + }, + { + "epoch": 3.2853210421868, + "grad_norm": 12.565905570983887, + "learning_rate": 1.5874102058392954e-06, + "loss": 0.2409, + "num_input_tokens_seen": 161537152, + "step": 51320 + }, + { + "epoch": 3.2856411241277765, + "grad_norm": 9.080452919006348, + "learning_rate": 1.5868901269590797e-06, + "loss": 0.1001, + "num_input_tokens_seen": 161552832, + "step": 51325 + }, + { + "epoch": 3.2859612060687535, + "grad_norm": 5.898855686187744, + "learning_rate": 1.5863700936748142e-06, + "loss": 0.1714, + "num_input_tokens_seen": 161569024, + "step": 51330 + }, + { + "epoch": 3.2862812880097305, + "grad_norm": 9.416687965393066, + "learning_rate": 1.585850106012467e-06, + "loss": 0.1252, + "num_input_tokens_seen": 161584000, + "step": 51335 + }, + { + "epoch": 3.2866013699507075, + "grad_norm": 13.736821174621582, + "learning_rate": 1.5853301639980046e-06, + "loss": 0.2069, + "num_input_tokens_seen": 161600128, + "step": 51340 + }, + { + "epoch": 3.2869214518916845, + "grad_norm": 6.6939897537231445, + "learning_rate": 1.584810267657389e-06, + "loss": 0.17, + "num_input_tokens_seen": 161615744, + "step": 51345 + }, + { + "epoch": 3.287241533832661, + "grad_norm": 7.850003242492676, + "learning_rate": 1.5842904170165825e-06, + "loss": 0.1748, + "num_input_tokens_seen": 161631040, + "step": 51350 + }, + { + "epoch": 3.287561615773638, + "grad_norm": 5.772465705871582, + "learning_rate": 1.5837706121015418e-06, + "loss": 0.1542, + "num_input_tokens_seen": 161647424, + "step": 51355 + }, + { + "epoch": 3.287881697714615, + "grad_norm": 6.731820583343506, + "learning_rate": 1.5832508529382251e-06, + "loss": 0.1548, + "num_input_tokens_seen": 161663424, + "step": 51360 + }, + { + "epoch": 3.288201779655592, + "grad_norm": 12.970227241516113, + "learning_rate": 1.582731139552585e-06, + "loss": 0.2099, + "num_input_tokens_seen": 161678720, + "step": 51365 + }, + { + "epoch": 3.2885218615965686, + "grad_norm": 7.684327125549316, + "learning_rate": 1.5822114719705738e-06, + "loss": 0.1522, + "num_input_tokens_seen": 161694720, + "step": 51370 + }, + { + "epoch": 3.2888419435375456, + "grad_norm": 7.027535915374756, + "learning_rate": 1.5816918502181412e-06, + "loss": 0.2042, + "num_input_tokens_seen": 161711104, + "step": 51375 + }, + { + "epoch": 3.2891620254785225, + "grad_norm": 5.066629409790039, + "learning_rate": 1.5811722743212351e-06, + "loss": 0.1617, + "num_input_tokens_seen": 161727168, + "step": 51380 + }, + { + "epoch": 3.2894821074194995, + "grad_norm": 6.358968257904053, + "learning_rate": 1.5806527443057987e-06, + "loss": 0.1495, + "num_input_tokens_seen": 161742656, + "step": 51385 + }, + { + "epoch": 3.2898021893604765, + "grad_norm": 6.430131912231445, + "learning_rate": 1.580133260197776e-06, + "loss": 0.1643, + "num_input_tokens_seen": 161759104, + "step": 51390 + }, + { + "epoch": 3.290122271301453, + "grad_norm": 10.545416831970215, + "learning_rate": 1.5796138220231065e-06, + "loss": 0.1352, + "num_input_tokens_seen": 161774720, + "step": 51395 + }, + { + "epoch": 3.29044235324243, + "grad_norm": 9.921621322631836, + "learning_rate": 1.5790944298077281e-06, + "loss": 0.1548, + "num_input_tokens_seen": 161790336, + "step": 51400 + }, + { + "epoch": 3.290762435183407, + "grad_norm": 8.314552307128906, + "learning_rate": 1.5785750835775788e-06, + "loss": 0.1835, + "num_input_tokens_seen": 161805568, + "step": 51405 + }, + { + "epoch": 3.2910825171243836, + "grad_norm": 86.82206726074219, + "learning_rate": 1.5780557833585886e-06, + "loss": 0.2432, + "num_input_tokens_seen": 161821376, + "step": 51410 + }, + { + "epoch": 3.2914025990653606, + "grad_norm": 3.549093246459961, + "learning_rate": 1.577536529176692e-06, + "loss": 0.1006, + "num_input_tokens_seen": 161836992, + "step": 51415 + }, + { + "epoch": 3.2917226810063376, + "grad_norm": 5.708745956420898, + "learning_rate": 1.5770173210578146e-06, + "loss": 0.2084, + "num_input_tokens_seen": 161852608, + "step": 51420 + }, + { + "epoch": 3.2920427629473146, + "grad_norm": 3.8569540977478027, + "learning_rate": 1.5764981590278854e-06, + "loss": 0.1015, + "num_input_tokens_seen": 161868480, + "step": 51425 + }, + { + "epoch": 3.2923628448882916, + "grad_norm": 13.511311531066895, + "learning_rate": 1.5759790431128273e-06, + "loss": 0.1725, + "num_input_tokens_seen": 161884416, + "step": 51430 + }, + { + "epoch": 3.292682926829268, + "grad_norm": 18.962574005126953, + "learning_rate": 1.5754599733385635e-06, + "loss": 0.1928, + "num_input_tokens_seen": 161900288, + "step": 51435 + }, + { + "epoch": 3.293003008770245, + "grad_norm": 8.928439140319824, + "learning_rate": 1.574940949731012e-06, + "loss": 0.1744, + "num_input_tokens_seen": 161915712, + "step": 51440 + }, + { + "epoch": 3.293323090711222, + "grad_norm": 4.657034397125244, + "learning_rate": 1.5744219723160913e-06, + "loss": 0.1224, + "num_input_tokens_seen": 161931648, + "step": 51445 + }, + { + "epoch": 3.293643172652199, + "grad_norm": 51.14805603027344, + "learning_rate": 1.5739030411197162e-06, + "loss": 0.3029, + "num_input_tokens_seen": 161947072, + "step": 51450 + }, + { + "epoch": 3.2939632545931756, + "grad_norm": 18.115123748779297, + "learning_rate": 1.573384156167799e-06, + "loss": 0.2028, + "num_input_tokens_seen": 161961472, + "step": 51455 + }, + { + "epoch": 3.2942833365341526, + "grad_norm": 5.862053871154785, + "learning_rate": 1.5728653174862516e-06, + "loss": 0.2772, + "num_input_tokens_seen": 161976320, + "step": 51460 + }, + { + "epoch": 3.2946034184751296, + "grad_norm": 10.04053020477295, + "learning_rate": 1.5723465251009797e-06, + "loss": 0.2452, + "num_input_tokens_seen": 161991488, + "step": 51465 + }, + { + "epoch": 3.2949235004161066, + "grad_norm": 4.231895923614502, + "learning_rate": 1.5718277790378916e-06, + "loss": 0.1274, + "num_input_tokens_seen": 162007552, + "step": 51470 + }, + { + "epoch": 3.2952435823570836, + "grad_norm": 6.932429313659668, + "learning_rate": 1.5713090793228886e-06, + "loss": 0.1282, + "num_input_tokens_seen": 162023616, + "step": 51475 + }, + { + "epoch": 3.29556366429806, + "grad_norm": 4.809487342834473, + "learning_rate": 1.5707904259818731e-06, + "loss": 0.1591, + "num_input_tokens_seen": 162038848, + "step": 51480 + }, + { + "epoch": 3.295883746239037, + "grad_norm": 5.184708118438721, + "learning_rate": 1.5702718190407435e-06, + "loss": 0.1127, + "num_input_tokens_seen": 162053824, + "step": 51485 + }, + { + "epoch": 3.296203828180014, + "grad_norm": 8.855317115783691, + "learning_rate": 1.5697532585253972e-06, + "loss": 0.1609, + "num_input_tokens_seen": 162069120, + "step": 51490 + }, + { + "epoch": 3.296523910120991, + "grad_norm": 7.273053169250488, + "learning_rate": 1.5692347444617267e-06, + "loss": 0.1923, + "num_input_tokens_seen": 162084800, + "step": 51495 + }, + { + "epoch": 3.2968439920619677, + "grad_norm": 7.493747234344482, + "learning_rate": 1.5687162768756259e-06, + "loss": 0.1272, + "num_input_tokens_seen": 162100992, + "step": 51500 + }, + { + "epoch": 3.2971640740029446, + "grad_norm": 7.362309455871582, + "learning_rate": 1.5681978557929823e-06, + "loss": 0.1235, + "num_input_tokens_seen": 162115904, + "step": 51505 + }, + { + "epoch": 3.2974841559439216, + "grad_norm": 10.430526733398438, + "learning_rate": 1.5676794812396843e-06, + "loss": 0.2148, + "num_input_tokens_seen": 162131712, + "step": 51510 + }, + { + "epoch": 3.2978042378848986, + "grad_norm": 5.8010663986206055, + "learning_rate": 1.567161153241617e-06, + "loss": 0.1377, + "num_input_tokens_seen": 162147584, + "step": 51515 + }, + { + "epoch": 3.2981243198258756, + "grad_norm": 5.7173895835876465, + "learning_rate": 1.5666428718246623e-06, + "loss": 0.1467, + "num_input_tokens_seen": 162162496, + "step": 51520 + }, + { + "epoch": 3.298444401766852, + "grad_norm": 10.39479923248291, + "learning_rate": 1.566124637014702e-06, + "loss": 0.2769, + "num_input_tokens_seen": 162177408, + "step": 51525 + }, + { + "epoch": 3.298764483707829, + "grad_norm": 5.0975165367126465, + "learning_rate": 1.5656064488376122e-06, + "loss": 0.1259, + "num_input_tokens_seen": 162193216, + "step": 51530 + }, + { + "epoch": 3.299084565648806, + "grad_norm": 6.972218990325928, + "learning_rate": 1.5650883073192697e-06, + "loss": 0.1456, + "num_input_tokens_seen": 162208960, + "step": 51535 + }, + { + "epoch": 3.299404647589783, + "grad_norm": 5.528942108154297, + "learning_rate": 1.5645702124855466e-06, + "loss": 0.154, + "num_input_tokens_seen": 162226944, + "step": 51540 + }, + { + "epoch": 3.2997247295307597, + "grad_norm": 10.468759536743164, + "learning_rate": 1.5640521643623157e-06, + "loss": 0.2134, + "num_input_tokens_seen": 162242816, + "step": 51545 + }, + { + "epoch": 3.3000448114717367, + "grad_norm": 11.094947814941406, + "learning_rate": 1.5635341629754435e-06, + "loss": 0.1578, + "num_input_tokens_seen": 162258880, + "step": 51550 + }, + { + "epoch": 3.3003648934127137, + "grad_norm": 10.135830879211426, + "learning_rate": 1.5630162083507983e-06, + "loss": 0.2403, + "num_input_tokens_seen": 162274752, + "step": 51555 + }, + { + "epoch": 3.3006849753536907, + "grad_norm": 9.095956802368164, + "learning_rate": 1.5624983005142422e-06, + "loss": 0.2098, + "num_input_tokens_seen": 162289856, + "step": 51560 + }, + { + "epoch": 3.3010050572946676, + "grad_norm": 8.490371704101562, + "learning_rate": 1.5619804394916382e-06, + "loss": 0.211, + "num_input_tokens_seen": 162305856, + "step": 51565 + }, + { + "epoch": 3.301325139235644, + "grad_norm": 8.443197250366211, + "learning_rate": 1.5614626253088444e-06, + "loss": 0.1482, + "num_input_tokens_seen": 162321920, + "step": 51570 + }, + { + "epoch": 3.301645221176621, + "grad_norm": 7.3229851722717285, + "learning_rate": 1.5609448579917186e-06, + "loss": 0.2047, + "num_input_tokens_seen": 162338304, + "step": 51575 + }, + { + "epoch": 3.301965303117598, + "grad_norm": 6.452525615692139, + "learning_rate": 1.5604271375661162e-06, + "loss": 0.1453, + "num_input_tokens_seen": 162352768, + "step": 51580 + }, + { + "epoch": 3.302285385058575, + "grad_norm": 11.709814071655273, + "learning_rate": 1.5599094640578876e-06, + "loss": 0.2241, + "num_input_tokens_seen": 162369536, + "step": 51585 + }, + { + "epoch": 3.3026054669995517, + "grad_norm": 5.487593650817871, + "learning_rate": 1.5593918374928846e-06, + "loss": 0.1666, + "num_input_tokens_seen": 162384704, + "step": 51590 + }, + { + "epoch": 3.3029255489405287, + "grad_norm": 8.46298599243164, + "learning_rate": 1.5588742578969523e-06, + "loss": 0.2035, + "num_input_tokens_seen": 162400832, + "step": 51595 + }, + { + "epoch": 3.3032456308815057, + "grad_norm": 7.703113555908203, + "learning_rate": 1.5583567252959387e-06, + "loss": 0.125, + "num_input_tokens_seen": 162415552, + "step": 51600 + }, + { + "epoch": 3.3035657128224827, + "grad_norm": 6.132352828979492, + "learning_rate": 1.5578392397156845e-06, + "loss": 0.1395, + "num_input_tokens_seen": 162431680, + "step": 51605 + }, + { + "epoch": 3.3038857947634597, + "grad_norm": 13.416685104370117, + "learning_rate": 1.5573218011820317e-06, + "loss": 0.2007, + "num_input_tokens_seen": 162446912, + "step": 51610 + }, + { + "epoch": 3.304205876704436, + "grad_norm": 7.394120216369629, + "learning_rate": 1.5568044097208177e-06, + "loss": 0.1321, + "num_input_tokens_seen": 162463744, + "step": 51615 + }, + { + "epoch": 3.304525958645413, + "grad_norm": 7.468619346618652, + "learning_rate": 1.5562870653578788e-06, + "loss": 0.1678, + "num_input_tokens_seen": 162479232, + "step": 51620 + }, + { + "epoch": 3.30484604058639, + "grad_norm": 25.954212188720703, + "learning_rate": 1.5557697681190475e-06, + "loss": 0.1444, + "num_input_tokens_seen": 162494464, + "step": 51625 + }, + { + "epoch": 3.305166122527367, + "grad_norm": 8.051196098327637, + "learning_rate": 1.555252518030156e-06, + "loss": 0.1948, + "num_input_tokens_seen": 162510784, + "step": 51630 + }, + { + "epoch": 3.3054862044683437, + "grad_norm": 4.203535556793213, + "learning_rate": 1.5547353151170333e-06, + "loss": 0.1419, + "num_input_tokens_seen": 162527104, + "step": 51635 + }, + { + "epoch": 3.3058062864093207, + "grad_norm": 18.51481819152832, + "learning_rate": 1.5542181594055045e-06, + "loss": 0.1725, + "num_input_tokens_seen": 162544064, + "step": 51640 + }, + { + "epoch": 3.3061263683502977, + "grad_norm": 7.096724987030029, + "learning_rate": 1.5537010509213955e-06, + "loss": 0.1908, + "num_input_tokens_seen": 162558848, + "step": 51645 + }, + { + "epoch": 3.3064464502912747, + "grad_norm": 12.726764678955078, + "learning_rate": 1.5531839896905257e-06, + "loss": 0.1431, + "num_input_tokens_seen": 162574400, + "step": 51650 + }, + { + "epoch": 3.3067665322322517, + "grad_norm": 8.307439804077148, + "learning_rate": 1.5526669757387164e-06, + "loss": 0.146, + "num_input_tokens_seen": 162590464, + "step": 51655 + }, + { + "epoch": 3.3070866141732282, + "grad_norm": 4.492352485656738, + "learning_rate": 1.5521500090917832e-06, + "loss": 0.1786, + "num_input_tokens_seen": 162607424, + "step": 51660 + }, + { + "epoch": 3.3074066961142052, + "grad_norm": 7.41853666305542, + "learning_rate": 1.5516330897755427e-06, + "loss": 0.259, + "num_input_tokens_seen": 162623296, + "step": 51665 + }, + { + "epoch": 3.3077267780551822, + "grad_norm": 15.218579292297363, + "learning_rate": 1.5511162178158047e-06, + "loss": 0.1724, + "num_input_tokens_seen": 162639296, + "step": 51670 + }, + { + "epoch": 3.3080468599961588, + "grad_norm": 7.030643463134766, + "learning_rate": 1.5505993932383803e-06, + "loss": 0.1288, + "num_input_tokens_seen": 162654912, + "step": 51675 + }, + { + "epoch": 3.3083669419371358, + "grad_norm": 3.734372615814209, + "learning_rate": 1.5500826160690768e-06, + "loss": 0.1219, + "num_input_tokens_seen": 162669568, + "step": 51680 + }, + { + "epoch": 3.3086870238781128, + "grad_norm": 8.638437271118164, + "learning_rate": 1.5495658863336993e-06, + "loss": 0.1946, + "num_input_tokens_seen": 162685184, + "step": 51685 + }, + { + "epoch": 3.3090071058190897, + "grad_norm": 8.215057373046875, + "learning_rate": 1.5490492040580521e-06, + "loss": 0.1778, + "num_input_tokens_seen": 162700800, + "step": 51690 + }, + { + "epoch": 3.3093271877600667, + "grad_norm": 15.103486061096191, + "learning_rate": 1.5485325692679332e-06, + "loss": 0.2389, + "num_input_tokens_seen": 162715712, + "step": 51695 + }, + { + "epoch": 3.3096472697010433, + "grad_norm": 20.291696548461914, + "learning_rate": 1.5480159819891427e-06, + "loss": 0.1923, + "num_input_tokens_seen": 162731328, + "step": 51700 + }, + { + "epoch": 3.3099673516420203, + "grad_norm": 5.688510894775391, + "learning_rate": 1.5474994422474745e-06, + "loss": 0.1421, + "num_input_tokens_seen": 162746304, + "step": 51705 + }, + { + "epoch": 3.3102874335829973, + "grad_norm": 4.60178804397583, + "learning_rate": 1.5469829500687233e-06, + "loss": 0.1303, + "num_input_tokens_seen": 162762368, + "step": 51710 + }, + { + "epoch": 3.3106075155239743, + "grad_norm": 8.641544342041016, + "learning_rate": 1.5464665054786787e-06, + "loss": 0.1704, + "num_input_tokens_seen": 162777728, + "step": 51715 + }, + { + "epoch": 3.310927597464951, + "grad_norm": 3.4092397689819336, + "learning_rate": 1.5459501085031314e-06, + "loss": 0.0864, + "num_input_tokens_seen": 162794944, + "step": 51720 + }, + { + "epoch": 3.311247679405928, + "grad_norm": 6.543027400970459, + "learning_rate": 1.545433759167865e-06, + "loss": 0.1593, + "num_input_tokens_seen": 162810304, + "step": 51725 + }, + { + "epoch": 3.311567761346905, + "grad_norm": 6.434479713439941, + "learning_rate": 1.5449174574986658e-06, + "loss": 0.1378, + "num_input_tokens_seen": 162826176, + "step": 51730 + }, + { + "epoch": 3.3118878432878818, + "grad_norm": 3.6637415885925293, + "learning_rate": 1.5444012035213124e-06, + "loss": 0.2269, + "num_input_tokens_seen": 162841152, + "step": 51735 + }, + { + "epoch": 3.3122079252288588, + "grad_norm": 5.140065670013428, + "learning_rate": 1.5438849972615863e-06, + "loss": 0.1379, + "num_input_tokens_seen": 162856896, + "step": 51740 + }, + { + "epoch": 3.3125280071698353, + "grad_norm": 8.583602905273438, + "learning_rate": 1.5433688387452627e-06, + "loss": 0.112, + "num_input_tokens_seen": 162872768, + "step": 51745 + }, + { + "epoch": 3.3128480891108123, + "grad_norm": 7.297093391418457, + "learning_rate": 1.5428527279981161e-06, + "loss": 0.164, + "num_input_tokens_seen": 162888832, + "step": 51750 + }, + { + "epoch": 3.3131681710517893, + "grad_norm": 7.232794284820557, + "learning_rate": 1.54233666504592e-06, + "loss": 0.1244, + "num_input_tokens_seen": 162904512, + "step": 51755 + }, + { + "epoch": 3.3134882529927663, + "grad_norm": 4.6484880447387695, + "learning_rate": 1.5418206499144411e-06, + "loss": 0.1858, + "num_input_tokens_seen": 162920000, + "step": 51760 + }, + { + "epoch": 3.313808334933743, + "grad_norm": 4.847228050231934, + "learning_rate": 1.541304682629449e-06, + "loss": 0.1148, + "num_input_tokens_seen": 162934912, + "step": 51765 + }, + { + "epoch": 3.31412841687472, + "grad_norm": 5.570897579193115, + "learning_rate": 1.5407887632167065e-06, + "loss": 0.1397, + "num_input_tokens_seen": 162949824, + "step": 51770 + }, + { + "epoch": 3.314448498815697, + "grad_norm": 6.848281383514404, + "learning_rate": 1.540272891701978e-06, + "loss": 0.1157, + "num_input_tokens_seen": 162965632, + "step": 51775 + }, + { + "epoch": 3.314768580756674, + "grad_norm": 11.86388111114502, + "learning_rate": 1.5397570681110214e-06, + "loss": 0.2852, + "num_input_tokens_seen": 162980672, + "step": 51780 + }, + { + "epoch": 3.315088662697651, + "grad_norm": 7.68745756149292, + "learning_rate": 1.5392412924695958e-06, + "loss": 0.1575, + "num_input_tokens_seen": 162995392, + "step": 51785 + }, + { + "epoch": 3.3154087446386273, + "grad_norm": 5.540602207183838, + "learning_rate": 1.5387255648034545e-06, + "loss": 0.1573, + "num_input_tokens_seen": 163010624, + "step": 51790 + }, + { + "epoch": 3.3157288265796043, + "grad_norm": 6.346529483795166, + "learning_rate": 1.5382098851383524e-06, + "loss": 0.0979, + "num_input_tokens_seen": 163026304, + "step": 51795 + }, + { + "epoch": 3.3160489085205813, + "grad_norm": 12.972993850708008, + "learning_rate": 1.537694253500038e-06, + "loss": 0.1413, + "num_input_tokens_seen": 163043136, + "step": 51800 + }, + { + "epoch": 3.3163689904615583, + "grad_norm": 8.878958702087402, + "learning_rate": 1.5371786699142604e-06, + "loss": 0.1279, + "num_input_tokens_seen": 163058432, + "step": 51805 + }, + { + "epoch": 3.316689072402535, + "grad_norm": 37.72602081298828, + "learning_rate": 1.5366631344067657e-06, + "loss": 0.1982, + "num_input_tokens_seen": 163074048, + "step": 51810 + }, + { + "epoch": 3.317009154343512, + "grad_norm": 7.972975730895996, + "learning_rate": 1.5361476470032949e-06, + "loss": 0.1727, + "num_input_tokens_seen": 163089216, + "step": 51815 + }, + { + "epoch": 3.317329236284489, + "grad_norm": 6.455933094024658, + "learning_rate": 1.535632207729591e-06, + "loss": 0.1093, + "num_input_tokens_seen": 163106176, + "step": 51820 + }, + { + "epoch": 3.317649318225466, + "grad_norm": 7.900494575500488, + "learning_rate": 1.5351168166113915e-06, + "loss": 0.1487, + "num_input_tokens_seen": 163122816, + "step": 51825 + }, + { + "epoch": 3.317969400166443, + "grad_norm": 22.393447875976562, + "learning_rate": 1.5346014736744318e-06, + "loss": 0.1041, + "num_input_tokens_seen": 163138240, + "step": 51830 + }, + { + "epoch": 3.3182894821074194, + "grad_norm": 6.244485855102539, + "learning_rate": 1.534086178944446e-06, + "loss": 0.144, + "num_input_tokens_seen": 163153600, + "step": 51835 + }, + { + "epoch": 3.3186095640483964, + "grad_norm": 65.55116271972656, + "learning_rate": 1.5335709324471659e-06, + "loss": 0.1569, + "num_input_tokens_seen": 163168832, + "step": 51840 + }, + { + "epoch": 3.3189296459893733, + "grad_norm": 10.714997291564941, + "learning_rate": 1.5330557342083186e-06, + "loss": 0.2149, + "num_input_tokens_seen": 163183616, + "step": 51845 + }, + { + "epoch": 3.3192497279303503, + "grad_norm": 55.372310638427734, + "learning_rate": 1.5325405842536317e-06, + "loss": 0.2786, + "num_input_tokens_seen": 163199040, + "step": 51850 + }, + { + "epoch": 3.319569809871327, + "grad_norm": 7.706201076507568, + "learning_rate": 1.5320254826088282e-06, + "loss": 0.1501, + "num_input_tokens_seen": 163214784, + "step": 51855 + }, + { + "epoch": 3.319889891812304, + "grad_norm": 25.658390045166016, + "learning_rate": 1.5315104292996302e-06, + "loss": 0.1559, + "num_input_tokens_seen": 163231360, + "step": 51860 + }, + { + "epoch": 3.320209973753281, + "grad_norm": 9.078184127807617, + "learning_rate": 1.5309954243517575e-06, + "loss": 0.2074, + "num_input_tokens_seen": 163246656, + "step": 51865 + }, + { + "epoch": 3.320530055694258, + "grad_norm": 5.237385272979736, + "learning_rate": 1.5304804677909253e-06, + "loss": 0.2297, + "num_input_tokens_seen": 163262016, + "step": 51870 + }, + { + "epoch": 3.320850137635235, + "grad_norm": 8.072219848632812, + "learning_rate": 1.529965559642849e-06, + "loss": 0.1756, + "num_input_tokens_seen": 163276288, + "step": 51875 + }, + { + "epoch": 3.3211702195762114, + "grad_norm": 13.562582015991211, + "learning_rate": 1.5294506999332392e-06, + "loss": 0.1887, + "num_input_tokens_seen": 163291904, + "step": 51880 + }, + { + "epoch": 3.3214903015171884, + "grad_norm": 6.595180511474609, + "learning_rate": 1.5289358886878065e-06, + "loss": 0.1598, + "num_input_tokens_seen": 163306304, + "step": 51885 + }, + { + "epoch": 3.3218103834581654, + "grad_norm": 36.91824722290039, + "learning_rate": 1.5284211259322569e-06, + "loss": 0.1919, + "num_input_tokens_seen": 163323072, + "step": 51890 + }, + { + "epoch": 3.3221304653991424, + "grad_norm": 53.745548248291016, + "learning_rate": 1.5279064116922964e-06, + "loss": 0.1985, + "num_input_tokens_seen": 163337344, + "step": 51895 + }, + { + "epoch": 3.322450547340119, + "grad_norm": 6.135787010192871, + "learning_rate": 1.5273917459936255e-06, + "loss": 0.1853, + "num_input_tokens_seen": 163353984, + "step": 51900 + }, + { + "epoch": 3.322770629281096, + "grad_norm": 55.04901123046875, + "learning_rate": 1.526877128861945e-06, + "loss": 0.1524, + "num_input_tokens_seen": 163368960, + "step": 51905 + }, + { + "epoch": 3.323090711222073, + "grad_norm": 3.4666614532470703, + "learning_rate": 1.5263625603229515e-06, + "loss": 0.2759, + "num_input_tokens_seen": 163383616, + "step": 51910 + }, + { + "epoch": 3.32341079316305, + "grad_norm": 6.01698637008667, + "learning_rate": 1.5258480404023413e-06, + "loss": 0.1216, + "num_input_tokens_seen": 163398464, + "step": 51915 + }, + { + "epoch": 3.323730875104027, + "grad_norm": 17.592363357543945, + "learning_rate": 1.5253335691258049e-06, + "loss": 0.2778, + "num_input_tokens_seen": 163413504, + "step": 51920 + }, + { + "epoch": 3.3240509570450034, + "grad_norm": 5.765629768371582, + "learning_rate": 1.5248191465190326e-06, + "loss": 0.1836, + "num_input_tokens_seen": 163429888, + "step": 51925 + }, + { + "epoch": 3.3243710389859804, + "grad_norm": 8.308171272277832, + "learning_rate": 1.524304772607714e-06, + "loss": 0.1867, + "num_input_tokens_seen": 163444288, + "step": 51930 + }, + { + "epoch": 3.3246911209269574, + "grad_norm": 5.821718215942383, + "learning_rate": 1.5237904474175319e-06, + "loss": 0.1265, + "num_input_tokens_seen": 163461504, + "step": 51935 + }, + { + "epoch": 3.325011202867934, + "grad_norm": 7.616827487945557, + "learning_rate": 1.5232761709741703e-06, + "loss": 0.1699, + "num_input_tokens_seen": 163477952, + "step": 51940 + }, + { + "epoch": 3.325331284808911, + "grad_norm": 49.360145568847656, + "learning_rate": 1.5227619433033087e-06, + "loss": 0.3431, + "num_input_tokens_seen": 163493760, + "step": 51945 + }, + { + "epoch": 3.325651366749888, + "grad_norm": 7.22015905380249, + "learning_rate": 1.5222477644306265e-06, + "loss": 0.1206, + "num_input_tokens_seen": 163509568, + "step": 51950 + }, + { + "epoch": 3.325971448690865, + "grad_norm": 5.262717247009277, + "learning_rate": 1.5217336343817967e-06, + "loss": 0.1551, + "num_input_tokens_seen": 163525888, + "step": 51955 + }, + { + "epoch": 3.326291530631842, + "grad_norm": 10.68807601928711, + "learning_rate": 1.5212195531824952e-06, + "loss": 0.2371, + "num_input_tokens_seen": 163542016, + "step": 51960 + }, + { + "epoch": 3.3266116125728185, + "grad_norm": 6.770501136779785, + "learning_rate": 1.5207055208583893e-06, + "loss": 0.1392, + "num_input_tokens_seen": 163559872, + "step": 51965 + }, + { + "epoch": 3.3269316945137954, + "grad_norm": 15.291889190673828, + "learning_rate": 1.5201915374351498e-06, + "loss": 0.2296, + "num_input_tokens_seen": 163576832, + "step": 51970 + }, + { + "epoch": 3.3272517764547724, + "grad_norm": 62.57432174682617, + "learning_rate": 1.5196776029384402e-06, + "loss": 0.1343, + "num_input_tokens_seen": 163592448, + "step": 51975 + }, + { + "epoch": 3.3275718583957494, + "grad_norm": 6.300469875335693, + "learning_rate": 1.5191637173939254e-06, + "loss": 0.1818, + "num_input_tokens_seen": 163607680, + "step": 51980 + }, + { + "epoch": 3.327891940336726, + "grad_norm": 3.2493011951446533, + "learning_rate": 1.5186498808272662e-06, + "loss": 0.102, + "num_input_tokens_seen": 163623104, + "step": 51985 + }, + { + "epoch": 3.328212022277703, + "grad_norm": 7.6488213539123535, + "learning_rate": 1.5181360932641194e-06, + "loss": 0.2705, + "num_input_tokens_seen": 163638272, + "step": 51990 + }, + { + "epoch": 3.32853210421868, + "grad_norm": 8.778740882873535, + "learning_rate": 1.5176223547301423e-06, + "loss": 0.1754, + "num_input_tokens_seen": 163654720, + "step": 51995 + }, + { + "epoch": 3.328852186159657, + "grad_norm": 8.984928131103516, + "learning_rate": 1.517108665250987e-06, + "loss": 0.1243, + "num_input_tokens_seen": 163670400, + "step": 52000 + }, + { + "epoch": 3.329172268100634, + "grad_norm": 4.451147556304932, + "learning_rate": 1.5165950248523065e-06, + "loss": 0.1807, + "num_input_tokens_seen": 163686400, + "step": 52005 + }, + { + "epoch": 3.3294923500416105, + "grad_norm": 44.752960205078125, + "learning_rate": 1.5160814335597468e-06, + "loss": 0.1709, + "num_input_tokens_seen": 163702208, + "step": 52010 + }, + { + "epoch": 3.3298124319825875, + "grad_norm": 7.563310146331787, + "learning_rate": 1.5155678913989568e-06, + "loss": 0.1957, + "num_input_tokens_seen": 163719296, + "step": 52015 + }, + { + "epoch": 3.3301325139235645, + "grad_norm": 4.633562088012695, + "learning_rate": 1.5150543983955774e-06, + "loss": 0.2161, + "num_input_tokens_seen": 163735616, + "step": 52020 + }, + { + "epoch": 3.3304525958645415, + "grad_norm": 28.623489379882812, + "learning_rate": 1.514540954575251e-06, + "loss": 0.2133, + "num_input_tokens_seen": 163749952, + "step": 52025 + }, + { + "epoch": 3.330772677805518, + "grad_norm": 11.759675979614258, + "learning_rate": 1.5140275599636162e-06, + "loss": 0.1288, + "num_input_tokens_seen": 163765952, + "step": 52030 + }, + { + "epoch": 3.331092759746495, + "grad_norm": 7.8506879806518555, + "learning_rate": 1.513514214586309e-06, + "loss": 0.1351, + "num_input_tokens_seen": 163781056, + "step": 52035 + }, + { + "epoch": 3.331412841687472, + "grad_norm": 4.865603446960449, + "learning_rate": 1.5130009184689648e-06, + "loss": 0.15, + "num_input_tokens_seen": 163795584, + "step": 52040 + }, + { + "epoch": 3.331732923628449, + "grad_norm": 6.936765193939209, + "learning_rate": 1.5124876716372127e-06, + "loss": 0.2191, + "num_input_tokens_seen": 163811392, + "step": 52045 + }, + { + "epoch": 3.332053005569426, + "grad_norm": 6.909383773803711, + "learning_rate": 1.511974474116683e-06, + "loss": 0.1737, + "num_input_tokens_seen": 163827904, + "step": 52050 + }, + { + "epoch": 3.3323730875104025, + "grad_norm": 6.936972141265869, + "learning_rate": 1.5114613259330007e-06, + "loss": 0.1451, + "num_input_tokens_seen": 163843072, + "step": 52055 + }, + { + "epoch": 3.3326931694513795, + "grad_norm": 8.404257774353027, + "learning_rate": 1.510948227111792e-06, + "loss": 0.1819, + "num_input_tokens_seen": 163858240, + "step": 52060 + }, + { + "epoch": 3.3330132513923565, + "grad_norm": 4.592989444732666, + "learning_rate": 1.5104351776786762e-06, + "loss": 0.1353, + "num_input_tokens_seen": 163874176, + "step": 52065 + }, + { + "epoch": 3.3333333333333335, + "grad_norm": 9.449913024902344, + "learning_rate": 1.509922177659274e-06, + "loss": 0.147, + "num_input_tokens_seen": 163890176, + "step": 52070 + }, + { + "epoch": 3.33365341527431, + "grad_norm": 6.0767741203308105, + "learning_rate": 1.5094092270792001e-06, + "loss": 0.1087, + "num_input_tokens_seen": 163905472, + "step": 52075 + }, + { + "epoch": 3.333973497215287, + "grad_norm": 22.75580406188965, + "learning_rate": 1.5088963259640702e-06, + "loss": 0.2845, + "num_input_tokens_seen": 163921600, + "step": 52080 + }, + { + "epoch": 3.334293579156264, + "grad_norm": 5.116380214691162, + "learning_rate": 1.5083834743394948e-06, + "loss": 0.1563, + "num_input_tokens_seen": 163937280, + "step": 52085 + }, + { + "epoch": 3.334613661097241, + "grad_norm": 4.443319797515869, + "learning_rate": 1.5078706722310843e-06, + "loss": 0.1064, + "num_input_tokens_seen": 163952960, + "step": 52090 + }, + { + "epoch": 3.334933743038218, + "grad_norm": 6.605875492095947, + "learning_rate": 1.5073579196644439e-06, + "loss": 0.1586, + "num_input_tokens_seen": 163967680, + "step": 52095 + }, + { + "epoch": 3.3352538249791945, + "grad_norm": 5.285025596618652, + "learning_rate": 1.5068452166651784e-06, + "loss": 0.1476, + "num_input_tokens_seen": 163983616, + "step": 52100 + }, + { + "epoch": 3.3355739069201715, + "grad_norm": 6.211344242095947, + "learning_rate": 1.5063325632588905e-06, + "loss": 0.1312, + "num_input_tokens_seen": 163998528, + "step": 52105 + }, + { + "epoch": 3.3358939888611485, + "grad_norm": 6.72028112411499, + "learning_rate": 1.5058199594711779e-06, + "loss": 0.1513, + "num_input_tokens_seen": 164013952, + "step": 52110 + }, + { + "epoch": 3.3362140708021255, + "grad_norm": 7.338369369506836, + "learning_rate": 1.5053074053276384e-06, + "loss": 0.1451, + "num_input_tokens_seen": 164028416, + "step": 52115 + }, + { + "epoch": 3.336534152743102, + "grad_norm": 7.59047794342041, + "learning_rate": 1.504794900853865e-06, + "loss": 0.1569, + "num_input_tokens_seen": 164043456, + "step": 52120 + }, + { + "epoch": 3.336854234684079, + "grad_norm": 8.283979415893555, + "learning_rate": 1.5042824460754518e-06, + "loss": 0.1808, + "num_input_tokens_seen": 164058816, + "step": 52125 + }, + { + "epoch": 3.337174316625056, + "grad_norm": 11.002339363098145, + "learning_rate": 1.5037700410179856e-06, + "loss": 0.2024, + "num_input_tokens_seen": 164074624, + "step": 52130 + }, + { + "epoch": 3.337494398566033, + "grad_norm": 9.89813232421875, + "learning_rate": 1.5032576857070552e-06, + "loss": 0.2193, + "num_input_tokens_seen": 164090112, + "step": 52135 + }, + { + "epoch": 3.33781448050701, + "grad_norm": 9.145771026611328, + "learning_rate": 1.5027453801682435e-06, + "loss": 0.2415, + "num_input_tokens_seen": 164105664, + "step": 52140 + }, + { + "epoch": 3.3381345624479866, + "grad_norm": 5.869986057281494, + "learning_rate": 1.5022331244271338e-06, + "loss": 0.157, + "num_input_tokens_seen": 164120512, + "step": 52145 + }, + { + "epoch": 3.3384546443889636, + "grad_norm": 7.252628326416016, + "learning_rate": 1.5017209185093038e-06, + "loss": 0.1663, + "num_input_tokens_seen": 164136576, + "step": 52150 + }, + { + "epoch": 3.3387747263299405, + "grad_norm": 16.247154235839844, + "learning_rate": 1.5012087624403313e-06, + "loss": 0.2125, + "num_input_tokens_seen": 164151552, + "step": 52155 + }, + { + "epoch": 3.3390948082709175, + "grad_norm": 5.897877216339111, + "learning_rate": 1.5006966562457916e-06, + "loss": 0.2435, + "num_input_tokens_seen": 164169088, + "step": 52160 + }, + { + "epoch": 3.339414890211894, + "grad_norm": 7.50331449508667, + "learning_rate": 1.5001845999512548e-06, + "loss": 0.119, + "num_input_tokens_seen": 164184448, + "step": 52165 + }, + { + "epoch": 3.339734972152871, + "grad_norm": 11.284799575805664, + "learning_rate": 1.4996725935822918e-06, + "loss": 0.1432, + "num_input_tokens_seen": 164201664, + "step": 52170 + }, + { + "epoch": 3.340055054093848, + "grad_norm": 5.610910415649414, + "learning_rate": 1.4991606371644684e-06, + "loss": 0.1176, + "num_input_tokens_seen": 164216640, + "step": 52175 + }, + { + "epoch": 3.340375136034825, + "grad_norm": 6.774298191070557, + "learning_rate": 1.4986487307233506e-06, + "loss": 0.2017, + "num_input_tokens_seen": 164231616, + "step": 52180 + }, + { + "epoch": 3.340695217975802, + "grad_norm": 6.688351154327393, + "learning_rate": 1.498136874284498e-06, + "loss": 0.1749, + "num_input_tokens_seen": 164247040, + "step": 52185 + }, + { + "epoch": 3.3410152999167786, + "grad_norm": 6.775697708129883, + "learning_rate": 1.4976250678734727e-06, + "loss": 0.1614, + "num_input_tokens_seen": 164262080, + "step": 52190 + }, + { + "epoch": 3.3413353818577556, + "grad_norm": 6.962528228759766, + "learning_rate": 1.4971133115158297e-06, + "loss": 0.1396, + "num_input_tokens_seen": 164277312, + "step": 52195 + }, + { + "epoch": 3.3416554637987326, + "grad_norm": 10.284735679626465, + "learning_rate": 1.4966016052371241e-06, + "loss": 0.2273, + "num_input_tokens_seen": 164293312, + "step": 52200 + }, + { + "epoch": 3.341975545739709, + "grad_norm": 6.098212242126465, + "learning_rate": 1.4960899490629073e-06, + "loss": 0.2192, + "num_input_tokens_seen": 164309440, + "step": 52205 + }, + { + "epoch": 3.342295627680686, + "grad_norm": 5.664607524871826, + "learning_rate": 1.4955783430187305e-06, + "loss": 0.1269, + "num_input_tokens_seen": 164323968, + "step": 52210 + }, + { + "epoch": 3.342615709621663, + "grad_norm": 16.880094528198242, + "learning_rate": 1.4950667871301384e-06, + "loss": 0.2158, + "num_input_tokens_seen": 164338752, + "step": 52215 + }, + { + "epoch": 3.34293579156264, + "grad_norm": 3.5367043018341064, + "learning_rate": 1.494555281422676e-06, + "loss": 0.1074, + "num_input_tokens_seen": 164354560, + "step": 52220 + }, + { + "epoch": 3.343255873503617, + "grad_norm": 9.95263671875, + "learning_rate": 1.4940438259218868e-06, + "loss": 0.2454, + "num_input_tokens_seen": 164369920, + "step": 52225 + }, + { + "epoch": 3.3435759554445936, + "grad_norm": 7.048960208892822, + "learning_rate": 1.493532420653308e-06, + "loss": 0.1124, + "num_input_tokens_seen": 164384640, + "step": 52230 + }, + { + "epoch": 3.3438960373855706, + "grad_norm": 10.528343200683594, + "learning_rate": 1.4930210656424794e-06, + "loss": 0.2178, + "num_input_tokens_seen": 164400448, + "step": 52235 + }, + { + "epoch": 3.3442161193265476, + "grad_norm": 7.305088996887207, + "learning_rate": 1.4925097609149319e-06, + "loss": 0.1355, + "num_input_tokens_seen": 164416064, + "step": 52240 + }, + { + "epoch": 3.3445362012675246, + "grad_norm": 6.093564033508301, + "learning_rate": 1.4919985064962003e-06, + "loss": 0.1537, + "num_input_tokens_seen": 164431680, + "step": 52245 + }, + { + "epoch": 3.344856283208501, + "grad_norm": 16.88485336303711, + "learning_rate": 1.491487302411812e-06, + "loss": 0.2147, + "num_input_tokens_seen": 164446464, + "step": 52250 + }, + { + "epoch": 3.345176365149478, + "grad_norm": 4.686936855316162, + "learning_rate": 1.4909761486872947e-06, + "loss": 0.1536, + "num_input_tokens_seen": 164462656, + "step": 52255 + }, + { + "epoch": 3.345496447090455, + "grad_norm": 8.72258472442627, + "learning_rate": 1.4904650453481728e-06, + "loss": 0.1829, + "num_input_tokens_seen": 164477248, + "step": 52260 + }, + { + "epoch": 3.345816529031432, + "grad_norm": 6.50726842880249, + "learning_rate": 1.4899539924199688e-06, + "loss": 0.119, + "num_input_tokens_seen": 164494272, + "step": 52265 + }, + { + "epoch": 3.346136610972409, + "grad_norm": 10.56436538696289, + "learning_rate": 1.4894429899282004e-06, + "loss": 0.1539, + "num_input_tokens_seen": 164509312, + "step": 52270 + }, + { + "epoch": 3.3464566929133857, + "grad_norm": 0.5926663279533386, + "learning_rate": 1.488932037898385e-06, + "loss": 0.0911, + "num_input_tokens_seen": 164525120, + "step": 52275 + }, + { + "epoch": 3.3467767748543626, + "grad_norm": 7.890963077545166, + "learning_rate": 1.488421136356038e-06, + "loss": 0.2595, + "num_input_tokens_seen": 164542016, + "step": 52280 + }, + { + "epoch": 3.3470968567953396, + "grad_norm": 4.636873722076416, + "learning_rate": 1.4879102853266696e-06, + "loss": 0.1644, + "num_input_tokens_seen": 164557056, + "step": 52285 + }, + { + "epoch": 3.3474169387363166, + "grad_norm": 5.380117893218994, + "learning_rate": 1.4873994848357909e-06, + "loss": 0.1882, + "num_input_tokens_seen": 164572608, + "step": 52290 + }, + { + "epoch": 3.347737020677293, + "grad_norm": 4.968493938446045, + "learning_rate": 1.4868887349089065e-06, + "loss": 0.1258, + "num_input_tokens_seen": 164589184, + "step": 52295 + }, + { + "epoch": 3.34805710261827, + "grad_norm": 6.0164361000061035, + "learning_rate": 1.4863780355715225e-06, + "loss": 0.1126, + "num_input_tokens_seen": 164604160, + "step": 52300 + }, + { + "epoch": 3.348377184559247, + "grad_norm": 8.423675537109375, + "learning_rate": 1.4858673868491386e-06, + "loss": 0.2105, + "num_input_tokens_seen": 164619712, + "step": 52305 + }, + { + "epoch": 3.348697266500224, + "grad_norm": 30.411907196044922, + "learning_rate": 1.4853567887672555e-06, + "loss": 0.2556, + "num_input_tokens_seen": 164635904, + "step": 52310 + }, + { + "epoch": 3.349017348441201, + "grad_norm": 5.207770347595215, + "learning_rate": 1.4848462413513686e-06, + "loss": 0.1156, + "num_input_tokens_seen": 164652288, + "step": 52315 + }, + { + "epoch": 3.3493374303821777, + "grad_norm": 7.884646415710449, + "learning_rate": 1.484335744626974e-06, + "loss": 0.1689, + "num_input_tokens_seen": 164667776, + "step": 52320 + }, + { + "epoch": 3.3496575123231547, + "grad_norm": 36.185035705566406, + "learning_rate": 1.4838252986195605e-06, + "loss": 0.1575, + "num_input_tokens_seen": 164682752, + "step": 52325 + }, + { + "epoch": 3.3499775942641317, + "grad_norm": 6.599663257598877, + "learning_rate": 1.483314903354619e-06, + "loss": 0.1663, + "num_input_tokens_seen": 164698688, + "step": 52330 + }, + { + "epoch": 3.3502976762051087, + "grad_norm": 24.01430320739746, + "learning_rate": 1.4828045588576361e-06, + "loss": 0.1392, + "num_input_tokens_seen": 164714560, + "step": 52335 + }, + { + "epoch": 3.350617758146085, + "grad_norm": 8.514653205871582, + "learning_rate": 1.4822942651540944e-06, + "loss": 0.1694, + "num_input_tokens_seen": 164729920, + "step": 52340 + }, + { + "epoch": 3.350937840087062, + "grad_norm": 6.878607749938965, + "learning_rate": 1.4817840222694768e-06, + "loss": 0.1632, + "num_input_tokens_seen": 164745920, + "step": 52345 + }, + { + "epoch": 3.351257922028039, + "grad_norm": 8.421953201293945, + "learning_rate": 1.4812738302292605e-06, + "loss": 0.1718, + "num_input_tokens_seen": 164761344, + "step": 52350 + }, + { + "epoch": 3.351578003969016, + "grad_norm": 4.789196014404297, + "learning_rate": 1.4807636890589238e-06, + "loss": 0.186, + "num_input_tokens_seen": 164777728, + "step": 52355 + }, + { + "epoch": 3.351898085909993, + "grad_norm": 6.798847198486328, + "learning_rate": 1.480253598783939e-06, + "loss": 0.1383, + "num_input_tokens_seen": 164792896, + "step": 52360 + }, + { + "epoch": 3.3522181678509697, + "grad_norm": 3.3059024810791016, + "learning_rate": 1.4797435594297777e-06, + "loss": 0.1454, + "num_input_tokens_seen": 164809216, + "step": 52365 + }, + { + "epoch": 3.3525382497919467, + "grad_norm": 2.6689295768737793, + "learning_rate": 1.4792335710219086e-06, + "loss": 0.1029, + "num_input_tokens_seen": 164824000, + "step": 52370 + }, + { + "epoch": 3.3528583317329237, + "grad_norm": 4.012296676635742, + "learning_rate": 1.4787236335857984e-06, + "loss": 0.1583, + "num_input_tokens_seen": 164838784, + "step": 52375 + }, + { + "epoch": 3.3531784136739007, + "grad_norm": 8.640170097351074, + "learning_rate": 1.47821374714691e-06, + "loss": 0.1767, + "num_input_tokens_seen": 164853376, + "step": 52380 + }, + { + "epoch": 3.3534984956148772, + "grad_norm": 34.24935531616211, + "learning_rate": 1.4777039117307054e-06, + "loss": 0.155, + "num_input_tokens_seen": 164868416, + "step": 52385 + }, + { + "epoch": 3.353818577555854, + "grad_norm": 19.316692352294922, + "learning_rate": 1.4771941273626417e-06, + "loss": 0.1631, + "num_input_tokens_seen": 164883392, + "step": 52390 + }, + { + "epoch": 3.354138659496831, + "grad_norm": 6.849915027618408, + "learning_rate": 1.4766843940681755e-06, + "loss": 0.1535, + "num_input_tokens_seen": 164900224, + "step": 52395 + }, + { + "epoch": 3.354458741437808, + "grad_norm": 4.658212661743164, + "learning_rate": 1.476174711872761e-06, + "loss": 0.1386, + "num_input_tokens_seen": 164915456, + "step": 52400 + }, + { + "epoch": 3.354778823378785, + "grad_norm": 7.365574836730957, + "learning_rate": 1.475665080801848e-06, + "loss": 0.158, + "num_input_tokens_seen": 164932096, + "step": 52405 + }, + { + "epoch": 3.3550989053197617, + "grad_norm": 11.032416343688965, + "learning_rate": 1.475155500880886e-06, + "loss": 0.1584, + "num_input_tokens_seen": 164951168, + "step": 52410 + }, + { + "epoch": 3.3554189872607387, + "grad_norm": 9.69088077545166, + "learning_rate": 1.4746459721353196e-06, + "loss": 0.1488, + "num_input_tokens_seen": 164965568, + "step": 52415 + }, + { + "epoch": 3.3557390692017157, + "grad_norm": 8.568215370178223, + "learning_rate": 1.4741364945905925e-06, + "loss": 0.1788, + "num_input_tokens_seen": 164981504, + "step": 52420 + }, + { + "epoch": 3.3560591511426927, + "grad_norm": 7.354351043701172, + "learning_rate": 1.4736270682721454e-06, + "loss": 0.2274, + "num_input_tokens_seen": 164998400, + "step": 52425 + }, + { + "epoch": 3.3563792330836693, + "grad_norm": 7.179473876953125, + "learning_rate": 1.4731176932054163e-06, + "loss": 0.1673, + "num_input_tokens_seen": 165012736, + "step": 52430 + }, + { + "epoch": 3.3566993150246462, + "grad_norm": 6.111384868621826, + "learning_rate": 1.4726083694158407e-06, + "loss": 0.1393, + "num_input_tokens_seen": 165027968, + "step": 52435 + }, + { + "epoch": 3.3570193969656232, + "grad_norm": 4.723777770996094, + "learning_rate": 1.4720990969288523e-06, + "loss": 0.167, + "num_input_tokens_seen": 165044096, + "step": 52440 + }, + { + "epoch": 3.3573394789066002, + "grad_norm": 6.645390033721924, + "learning_rate": 1.47158987576988e-06, + "loss": 0.1592, + "num_input_tokens_seen": 165060096, + "step": 52445 + }, + { + "epoch": 3.357659560847577, + "grad_norm": 8.98142147064209, + "learning_rate": 1.4710807059643523e-06, + "loss": 0.1531, + "num_input_tokens_seen": 165075904, + "step": 52450 + }, + { + "epoch": 3.3579796427885538, + "grad_norm": 7.071345329284668, + "learning_rate": 1.4705715875376956e-06, + "loss": 0.1206, + "num_input_tokens_seen": 165090432, + "step": 52455 + }, + { + "epoch": 3.3582997247295308, + "grad_norm": 7.29410982131958, + "learning_rate": 1.4700625205153307e-06, + "loss": 0.1642, + "num_input_tokens_seen": 165105344, + "step": 52460 + }, + { + "epoch": 3.3586198066705077, + "grad_norm": 2.589694023132324, + "learning_rate": 1.46955350492268e-06, + "loss": 0.1409, + "num_input_tokens_seen": 165120896, + "step": 52465 + }, + { + "epoch": 3.3589398886114843, + "grad_norm": 10.453875541687012, + "learning_rate": 1.469044540785159e-06, + "loss": 0.1729, + "num_input_tokens_seen": 165137408, + "step": 52470 + }, + { + "epoch": 3.3592599705524613, + "grad_norm": 4.329784393310547, + "learning_rate": 1.4685356281281845e-06, + "loss": 0.1621, + "num_input_tokens_seen": 165152896, + "step": 52475 + }, + { + "epoch": 3.3595800524934383, + "grad_norm": 8.911556243896484, + "learning_rate": 1.468026766977167e-06, + "loss": 0.2257, + "num_input_tokens_seen": 165169600, + "step": 52480 + }, + { + "epoch": 3.3599001344344153, + "grad_norm": 8.065875053405762, + "learning_rate": 1.467517957357518e-06, + "loss": 0.1312, + "num_input_tokens_seen": 165185920, + "step": 52485 + }, + { + "epoch": 3.3602202163753923, + "grad_norm": 5.164493083953857, + "learning_rate": 1.4670091992946433e-06, + "loss": 0.1837, + "num_input_tokens_seen": 165201728, + "step": 52490 + }, + { + "epoch": 3.360540298316369, + "grad_norm": 8.224647521972656, + "learning_rate": 1.4665004928139501e-06, + "loss": 0.1726, + "num_input_tokens_seen": 165217216, + "step": 52495 + }, + { + "epoch": 3.360860380257346, + "grad_norm": 6.222757339477539, + "learning_rate": 1.4659918379408378e-06, + "loss": 0.1053, + "num_input_tokens_seen": 165233088, + "step": 52500 + }, + { + "epoch": 3.361180462198323, + "grad_norm": 2.6394641399383545, + "learning_rate": 1.4654832347007069e-06, + "loss": 0.1588, + "num_input_tokens_seen": 165249472, + "step": 52505 + }, + { + "epoch": 3.3615005441392998, + "grad_norm": 17.073667526245117, + "learning_rate": 1.4649746831189553e-06, + "loss": 0.1919, + "num_input_tokens_seen": 165265664, + "step": 52510 + }, + { + "epoch": 3.3618206260802763, + "grad_norm": 14.488227844238281, + "learning_rate": 1.4644661832209767e-06, + "loss": 0.2331, + "num_input_tokens_seen": 165281856, + "step": 52515 + }, + { + "epoch": 3.3621407080212533, + "grad_norm": 9.12533950805664, + "learning_rate": 1.4639577350321635e-06, + "loss": 0.1156, + "num_input_tokens_seen": 165297280, + "step": 52520 + }, + { + "epoch": 3.3624607899622303, + "grad_norm": 5.99130916595459, + "learning_rate": 1.4634493385779036e-06, + "loss": 0.1516, + "num_input_tokens_seen": 165313664, + "step": 52525 + }, + { + "epoch": 3.3627808719032073, + "grad_norm": 5.668313026428223, + "learning_rate": 1.4629409938835853e-06, + "loss": 0.1599, + "num_input_tokens_seen": 165328960, + "step": 52530 + }, + { + "epoch": 3.3631009538441843, + "grad_norm": 8.518805503845215, + "learning_rate": 1.4624327009745915e-06, + "loss": 0.139, + "num_input_tokens_seen": 165344960, + "step": 52535 + }, + { + "epoch": 3.363421035785161, + "grad_norm": 5.254278659820557, + "learning_rate": 1.4619244598763041e-06, + "loss": 0.1452, + "num_input_tokens_seen": 165361536, + "step": 52540 + }, + { + "epoch": 3.363741117726138, + "grad_norm": 12.307120323181152, + "learning_rate": 1.4614162706141017e-06, + "loss": 0.15, + "num_input_tokens_seen": 165376320, + "step": 52545 + }, + { + "epoch": 3.364061199667115, + "grad_norm": 5.17288064956665, + "learning_rate": 1.4609081332133621e-06, + "loss": 0.1734, + "num_input_tokens_seen": 165392128, + "step": 52550 + }, + { + "epoch": 3.364381281608092, + "grad_norm": 7.5507612228393555, + "learning_rate": 1.4604000476994568e-06, + "loss": 0.1577, + "num_input_tokens_seen": 165408704, + "step": 52555 + }, + { + "epoch": 3.3647013635490683, + "grad_norm": 8.537002563476562, + "learning_rate": 1.459892014097759e-06, + "loss": 0.1585, + "num_input_tokens_seen": 165423872, + "step": 52560 + }, + { + "epoch": 3.3650214454900453, + "grad_norm": 48.527530670166016, + "learning_rate": 1.459384032433635e-06, + "loss": 0.1832, + "num_input_tokens_seen": 165439168, + "step": 52565 + }, + { + "epoch": 3.3653415274310223, + "grad_norm": 7.943100929260254, + "learning_rate": 1.458876102732452e-06, + "loss": 0.2009, + "num_input_tokens_seen": 165455040, + "step": 52570 + }, + { + "epoch": 3.3656616093719993, + "grad_norm": 7.949818134307861, + "learning_rate": 1.4583682250195753e-06, + "loss": 0.1912, + "num_input_tokens_seen": 165470656, + "step": 52575 + }, + { + "epoch": 3.3659816913129763, + "grad_norm": 5.012765407562256, + "learning_rate": 1.4578603993203617e-06, + "loss": 0.1539, + "num_input_tokens_seen": 165486080, + "step": 52580 + }, + { + "epoch": 3.366301773253953, + "grad_norm": 5.306439399719238, + "learning_rate": 1.4573526256601728e-06, + "loss": 0.167, + "num_input_tokens_seen": 165501568, + "step": 52585 + }, + { + "epoch": 3.36662185519493, + "grad_norm": 12.11042594909668, + "learning_rate": 1.4568449040643622e-06, + "loss": 0.1771, + "num_input_tokens_seen": 165516800, + "step": 52590 + }, + { + "epoch": 3.366941937135907, + "grad_norm": 6.344541072845459, + "learning_rate": 1.4563372345582838e-06, + "loss": 0.1523, + "num_input_tokens_seen": 165533248, + "step": 52595 + }, + { + "epoch": 3.367262019076884, + "grad_norm": 10.677865982055664, + "learning_rate": 1.4558296171672878e-06, + "loss": 0.2097, + "num_input_tokens_seen": 165548288, + "step": 52600 + }, + { + "epoch": 3.3675821010178604, + "grad_norm": 6.83715295791626, + "learning_rate": 1.4553220519167216e-06, + "loss": 0.2918, + "num_input_tokens_seen": 165563456, + "step": 52605 + }, + { + "epoch": 3.3679021829588374, + "grad_norm": 5.001124858856201, + "learning_rate": 1.4548145388319306e-06, + "loss": 0.1531, + "num_input_tokens_seen": 165581248, + "step": 52610 + }, + { + "epoch": 3.3682222648998144, + "grad_norm": 9.56906795501709, + "learning_rate": 1.454307077938258e-06, + "loss": 0.2317, + "num_input_tokens_seen": 165597824, + "step": 52615 + }, + { + "epoch": 3.3685423468407913, + "grad_norm": 7.712647438049316, + "learning_rate": 1.4537996692610413e-06, + "loss": 0.1681, + "num_input_tokens_seen": 165614016, + "step": 52620 + }, + { + "epoch": 3.3688624287817683, + "grad_norm": 29.82527732849121, + "learning_rate": 1.453292312825621e-06, + "loss": 0.1565, + "num_input_tokens_seen": 165629568, + "step": 52625 + }, + { + "epoch": 3.369182510722745, + "grad_norm": 3.6935713291168213, + "learning_rate": 1.4527850086573303e-06, + "loss": 0.1623, + "num_input_tokens_seen": 165645120, + "step": 52630 + }, + { + "epoch": 3.369502592663722, + "grad_norm": 5.653095245361328, + "learning_rate": 1.4522777567815017e-06, + "loss": 0.1302, + "num_input_tokens_seen": 165660352, + "step": 52635 + }, + { + "epoch": 3.369822674604699, + "grad_norm": 5.632166385650635, + "learning_rate": 1.4517705572234647e-06, + "loss": 0.1674, + "num_input_tokens_seen": 165676160, + "step": 52640 + }, + { + "epoch": 3.370142756545676, + "grad_norm": 6.431558609008789, + "learning_rate": 1.4512634100085449e-06, + "loss": 0.1841, + "num_input_tokens_seen": 165691968, + "step": 52645 + }, + { + "epoch": 3.3704628384866524, + "grad_norm": 6.859799861907959, + "learning_rate": 1.4507563151620696e-06, + "loss": 0.1902, + "num_input_tokens_seen": 165707968, + "step": 52650 + }, + { + "epoch": 3.3707829204276294, + "grad_norm": 10.69072151184082, + "learning_rate": 1.4502492727093565e-06, + "loss": 0.1807, + "num_input_tokens_seen": 165723456, + "step": 52655 + }, + { + "epoch": 3.3711030023686064, + "grad_norm": 10.408520698547363, + "learning_rate": 1.449742282675729e-06, + "loss": 0.1911, + "num_input_tokens_seen": 165738432, + "step": 52660 + }, + { + "epoch": 3.3714230843095834, + "grad_norm": 15.228877067565918, + "learning_rate": 1.4492353450864992e-06, + "loss": 0.1701, + "num_input_tokens_seen": 165754624, + "step": 52665 + }, + { + "epoch": 3.3717431662505604, + "grad_norm": 12.67880916595459, + "learning_rate": 1.4487284599669842e-06, + "loss": 0.1334, + "num_input_tokens_seen": 165770432, + "step": 52670 + }, + { + "epoch": 3.372063248191537, + "grad_norm": 5.158274173736572, + "learning_rate": 1.448221627342494e-06, + "loss": 0.1972, + "num_input_tokens_seen": 165786368, + "step": 52675 + }, + { + "epoch": 3.372383330132514, + "grad_norm": 7.611132621765137, + "learning_rate": 1.4477148472383363e-06, + "loss": 0.162, + "num_input_tokens_seen": 165801088, + "step": 52680 + }, + { + "epoch": 3.372703412073491, + "grad_norm": 11.288822174072266, + "learning_rate": 1.44720811967982e-06, + "loss": 0.249, + "num_input_tokens_seen": 165815744, + "step": 52685 + }, + { + "epoch": 3.373023494014468, + "grad_norm": 5.674350738525391, + "learning_rate": 1.446701444692244e-06, + "loss": 0.1416, + "num_input_tokens_seen": 165831104, + "step": 52690 + }, + { + "epoch": 3.3733435759554444, + "grad_norm": 10.62064266204834, + "learning_rate": 1.4461948223009126e-06, + "loss": 0.1625, + "num_input_tokens_seen": 165845760, + "step": 52695 + }, + { + "epoch": 3.3736636578964214, + "grad_norm": 8.597569465637207, + "learning_rate": 1.4456882525311228e-06, + "loss": 0.1882, + "num_input_tokens_seen": 165861952, + "step": 52700 + }, + { + "epoch": 3.3739837398373984, + "grad_norm": 7.79884147644043, + "learning_rate": 1.4451817354081698e-06, + "loss": 0.2035, + "num_input_tokens_seen": 165877760, + "step": 52705 + }, + { + "epoch": 3.3743038217783754, + "grad_norm": 35.33118438720703, + "learning_rate": 1.444675270957347e-06, + "loss": 0.1695, + "num_input_tokens_seen": 165892288, + "step": 52710 + }, + { + "epoch": 3.3746239037193524, + "grad_norm": 7.009253978729248, + "learning_rate": 1.4441688592039438e-06, + "loss": 0.1716, + "num_input_tokens_seen": 165906880, + "step": 52715 + }, + { + "epoch": 3.374943985660329, + "grad_norm": 8.640995025634766, + "learning_rate": 1.4436625001732483e-06, + "loss": 0.2366, + "num_input_tokens_seen": 165923456, + "step": 52720 + }, + { + "epoch": 3.375264067601306, + "grad_norm": 7.069134712219238, + "learning_rate": 1.4431561938905453e-06, + "loss": 0.1386, + "num_input_tokens_seen": 165939584, + "step": 52725 + }, + { + "epoch": 3.375584149542283, + "grad_norm": 41.410797119140625, + "learning_rate": 1.4426499403811156e-06, + "loss": 0.1757, + "num_input_tokens_seen": 165955392, + "step": 52730 + }, + { + "epoch": 3.3759042314832595, + "grad_norm": 6.931673526763916, + "learning_rate": 1.442143739670243e-06, + "loss": 0.1456, + "num_input_tokens_seen": 165971840, + "step": 52735 + }, + { + "epoch": 3.3762243134242365, + "grad_norm": 9.599045753479004, + "learning_rate": 1.4416375917831995e-06, + "loss": 0.1885, + "num_input_tokens_seen": 165987392, + "step": 52740 + }, + { + "epoch": 3.3765443953652134, + "grad_norm": 22.78236198425293, + "learning_rate": 1.441131496745263e-06, + "loss": 0.1549, + "num_input_tokens_seen": 166002432, + "step": 52745 + }, + { + "epoch": 3.3768644773061904, + "grad_norm": 3.934792995452881, + "learning_rate": 1.4406254545817041e-06, + "loss": 0.1498, + "num_input_tokens_seen": 166017856, + "step": 52750 + }, + { + "epoch": 3.3771845592471674, + "grad_norm": 5.5728230476379395, + "learning_rate": 1.440119465317792e-06, + "loss": 0.1363, + "num_input_tokens_seen": 166032192, + "step": 52755 + }, + { + "epoch": 3.377504641188144, + "grad_norm": 81.8727035522461, + "learning_rate": 1.4396135289787926e-06, + "loss": 0.1525, + "num_input_tokens_seen": 166047424, + "step": 52760 + }, + { + "epoch": 3.377824723129121, + "grad_norm": 6.726666450500488, + "learning_rate": 1.4391076455899699e-06, + "loss": 0.1357, + "num_input_tokens_seen": 166062976, + "step": 52765 + }, + { + "epoch": 3.378144805070098, + "grad_norm": 9.817558288574219, + "learning_rate": 1.438601815176587e-06, + "loss": 0.2519, + "num_input_tokens_seen": 166078720, + "step": 52770 + }, + { + "epoch": 3.378464887011075, + "grad_norm": 8.090876579284668, + "learning_rate": 1.4380960377638992e-06, + "loss": 0.1971, + "num_input_tokens_seen": 166094720, + "step": 52775 + }, + { + "epoch": 3.3787849689520515, + "grad_norm": 11.798617362976074, + "learning_rate": 1.4375903133771646e-06, + "loss": 0.166, + "num_input_tokens_seen": 166110208, + "step": 52780 + }, + { + "epoch": 3.3791050508930285, + "grad_norm": 8.463799476623535, + "learning_rate": 1.4370846420416359e-06, + "loss": 0.223, + "num_input_tokens_seen": 166125184, + "step": 52785 + }, + { + "epoch": 3.3794251328340055, + "grad_norm": 9.578483581542969, + "learning_rate": 1.436579023782564e-06, + "loss": 0.1478, + "num_input_tokens_seen": 166140992, + "step": 52790 + }, + { + "epoch": 3.3797452147749825, + "grad_norm": 5.057973861694336, + "learning_rate": 1.4360734586251958e-06, + "loss": 0.1542, + "num_input_tokens_seen": 166156672, + "step": 52795 + }, + { + "epoch": 3.3800652967159595, + "grad_norm": 8.879012107849121, + "learning_rate": 1.4355679465947768e-06, + "loss": 0.1927, + "num_input_tokens_seen": 166172224, + "step": 52800 + }, + { + "epoch": 3.380385378656936, + "grad_norm": 6.357833385467529, + "learning_rate": 1.435062487716552e-06, + "loss": 0.1953, + "num_input_tokens_seen": 166187904, + "step": 52805 + }, + { + "epoch": 3.380705460597913, + "grad_norm": 4.5565080642700195, + "learning_rate": 1.4345570820157577e-06, + "loss": 0.1237, + "num_input_tokens_seen": 166203840, + "step": 52810 + }, + { + "epoch": 3.38102554253889, + "grad_norm": 7.15064811706543, + "learning_rate": 1.4340517295176338e-06, + "loss": 0.1942, + "num_input_tokens_seen": 166219712, + "step": 52815 + }, + { + "epoch": 3.381345624479867, + "grad_norm": 7.9878644943237305, + "learning_rate": 1.4335464302474145e-06, + "loss": 0.1575, + "num_input_tokens_seen": 166234880, + "step": 52820 + }, + { + "epoch": 3.3816657064208435, + "grad_norm": 19.653818130493164, + "learning_rate": 1.4330411842303311e-06, + "loss": 0.1432, + "num_input_tokens_seen": 166251648, + "step": 52825 + }, + { + "epoch": 3.3819857883618205, + "grad_norm": 7.860955238342285, + "learning_rate": 1.4325359914916137e-06, + "loss": 0.2069, + "num_input_tokens_seen": 166266240, + "step": 52830 + }, + { + "epoch": 3.3823058703027975, + "grad_norm": 9.85230827331543, + "learning_rate": 1.4320308520564885e-06, + "loss": 0.2206, + "num_input_tokens_seen": 166281280, + "step": 52835 + }, + { + "epoch": 3.3826259522437745, + "grad_norm": 9.559427261352539, + "learning_rate": 1.4315257659501797e-06, + "loss": 0.1591, + "num_input_tokens_seen": 166296960, + "step": 52840 + }, + { + "epoch": 3.3829460341847515, + "grad_norm": 88.75778198242188, + "learning_rate": 1.4310207331979087e-06, + "loss": 0.1698, + "num_input_tokens_seen": 166312832, + "step": 52845 + }, + { + "epoch": 3.383266116125728, + "grad_norm": 7.4415435791015625, + "learning_rate": 1.4305157538248932e-06, + "loss": 0.1811, + "num_input_tokens_seen": 166328704, + "step": 52850 + }, + { + "epoch": 3.383586198066705, + "grad_norm": 40.740055084228516, + "learning_rate": 1.4300108278563512e-06, + "loss": 0.1651, + "num_input_tokens_seen": 166345280, + "step": 52855 + }, + { + "epoch": 3.383906280007682, + "grad_norm": 7.879505157470703, + "learning_rate": 1.4295059553174954e-06, + "loss": 0.1416, + "num_input_tokens_seen": 166360704, + "step": 52860 + }, + { + "epoch": 3.384226361948659, + "grad_norm": 10.66338062286377, + "learning_rate": 1.4290011362335358e-06, + "loss": 0.1792, + "num_input_tokens_seen": 166377408, + "step": 52865 + }, + { + "epoch": 3.3845464438896355, + "grad_norm": 6.844233989715576, + "learning_rate": 1.4284963706296808e-06, + "loss": 0.1122, + "num_input_tokens_seen": 166392768, + "step": 52870 + }, + { + "epoch": 3.3848665258306125, + "grad_norm": 12.088848114013672, + "learning_rate": 1.427991658531135e-06, + "loss": 0.212, + "num_input_tokens_seen": 166408960, + "step": 52875 + }, + { + "epoch": 3.3851866077715895, + "grad_norm": 8.532552719116211, + "learning_rate": 1.4274869999631036e-06, + "loss": 0.1707, + "num_input_tokens_seen": 166424320, + "step": 52880 + }, + { + "epoch": 3.3855066897125665, + "grad_norm": 4.53595495223999, + "learning_rate": 1.4269823949507832e-06, + "loss": 0.1575, + "num_input_tokens_seen": 166439616, + "step": 52885 + }, + { + "epoch": 3.3858267716535435, + "grad_norm": 7.372142314910889, + "learning_rate": 1.426477843519375e-06, + "loss": 0.116, + "num_input_tokens_seen": 166455552, + "step": 52890 + }, + { + "epoch": 3.38614685359452, + "grad_norm": 12.786469459533691, + "learning_rate": 1.4259733456940691e-06, + "loss": 0.2972, + "num_input_tokens_seen": 166471232, + "step": 52895 + }, + { + "epoch": 3.386466935535497, + "grad_norm": 7.684811592102051, + "learning_rate": 1.425468901500061e-06, + "loss": 0.1586, + "num_input_tokens_seen": 166486144, + "step": 52900 + }, + { + "epoch": 3.386787017476474, + "grad_norm": 3.043757915496826, + "learning_rate": 1.4249645109625394e-06, + "loss": 0.1247, + "num_input_tokens_seen": 166500608, + "step": 52905 + }, + { + "epoch": 3.387107099417451, + "grad_norm": 7.734921932220459, + "learning_rate": 1.42446017410669e-06, + "loss": 0.1257, + "num_input_tokens_seen": 166517504, + "step": 52910 + }, + { + "epoch": 3.3874271813584276, + "grad_norm": 4.613709926605225, + "learning_rate": 1.4239558909576973e-06, + "loss": 0.1979, + "num_input_tokens_seen": 166532608, + "step": 52915 + }, + { + "epoch": 3.3877472632994046, + "grad_norm": 8.730304718017578, + "learning_rate": 1.4234516615407413e-06, + "loss": 0.1649, + "num_input_tokens_seen": 166550784, + "step": 52920 + }, + { + "epoch": 3.3880673452403816, + "grad_norm": 4.179198741912842, + "learning_rate": 1.422947485881003e-06, + "loss": 0.0975, + "num_input_tokens_seen": 166566464, + "step": 52925 + }, + { + "epoch": 3.3883874271813585, + "grad_norm": 7.318863868713379, + "learning_rate": 1.4224433640036572e-06, + "loss": 0.1885, + "num_input_tokens_seen": 166581952, + "step": 52930 + }, + { + "epoch": 3.3887075091223355, + "grad_norm": 6.25510311126709, + "learning_rate": 1.4219392959338767e-06, + "loss": 0.1934, + "num_input_tokens_seen": 166598080, + "step": 52935 + }, + { + "epoch": 3.389027591063312, + "grad_norm": 11.344722747802734, + "learning_rate": 1.4214352816968327e-06, + "loss": 0.2168, + "num_input_tokens_seen": 166613120, + "step": 52940 + }, + { + "epoch": 3.389347673004289, + "grad_norm": 11.541083335876465, + "learning_rate": 1.4209313213176922e-06, + "loss": 0.1545, + "num_input_tokens_seen": 166628032, + "step": 52945 + }, + { + "epoch": 3.389667754945266, + "grad_norm": 11.00655746459961, + "learning_rate": 1.4204274148216217e-06, + "loss": 0.1895, + "num_input_tokens_seen": 166643328, + "step": 52950 + }, + { + "epoch": 3.389987836886243, + "grad_norm": 8.426525115966797, + "learning_rate": 1.419923562233782e-06, + "loss": 0.2368, + "num_input_tokens_seen": 166659328, + "step": 52955 + }, + { + "epoch": 3.3903079188272196, + "grad_norm": 6.389349460601807, + "learning_rate": 1.419419763579333e-06, + "loss": 0.1384, + "num_input_tokens_seen": 166674176, + "step": 52960 + }, + { + "epoch": 3.3906280007681966, + "grad_norm": 6.071285724639893, + "learning_rate": 1.4189160188834344e-06, + "loss": 0.1512, + "num_input_tokens_seen": 166690816, + "step": 52965 + }, + { + "epoch": 3.3909480827091736, + "grad_norm": 32.24332809448242, + "learning_rate": 1.418412328171237e-06, + "loss": 0.2621, + "num_input_tokens_seen": 166707264, + "step": 52970 + }, + { + "epoch": 3.3912681646501506, + "grad_norm": 11.708462715148926, + "learning_rate": 1.4179086914678947e-06, + "loss": 0.1546, + "num_input_tokens_seen": 166722432, + "step": 52975 + }, + { + "epoch": 3.3915882465911276, + "grad_norm": 9.044788360595703, + "learning_rate": 1.4174051087985563e-06, + "loss": 0.1353, + "num_input_tokens_seen": 166737536, + "step": 52980 + }, + { + "epoch": 3.391908328532104, + "grad_norm": 10.022064208984375, + "learning_rate": 1.4169015801883674e-06, + "loss": 0.1705, + "num_input_tokens_seen": 166752768, + "step": 52985 + }, + { + "epoch": 3.392228410473081, + "grad_norm": 10.947242736816406, + "learning_rate": 1.416398105662472e-06, + "loss": 0.1519, + "num_input_tokens_seen": 166767168, + "step": 52990 + }, + { + "epoch": 3.392548492414058, + "grad_norm": 4.857823371887207, + "learning_rate": 1.41589468524601e-06, + "loss": 0.1357, + "num_input_tokens_seen": 166782848, + "step": 52995 + }, + { + "epoch": 3.392868574355035, + "grad_norm": 65.0591812133789, + "learning_rate": 1.4153913189641226e-06, + "loss": 0.2156, + "num_input_tokens_seen": 166799552, + "step": 53000 + }, + { + "epoch": 3.3931886562960116, + "grad_norm": 7.288938045501709, + "learning_rate": 1.4148880068419413e-06, + "loss": 0.141, + "num_input_tokens_seen": 166815936, + "step": 53005 + }, + { + "epoch": 3.3935087382369886, + "grad_norm": 5.02947998046875, + "learning_rate": 1.4143847489046014e-06, + "loss": 0.156, + "num_input_tokens_seen": 166832320, + "step": 53010 + }, + { + "epoch": 3.3938288201779656, + "grad_norm": 8.915199279785156, + "learning_rate": 1.4138815451772325e-06, + "loss": 0.1561, + "num_input_tokens_seen": 166847680, + "step": 53015 + }, + { + "epoch": 3.3941489021189426, + "grad_norm": 6.672360897064209, + "learning_rate": 1.4133783956849623e-06, + "loss": 0.1001, + "num_input_tokens_seen": 166862400, + "step": 53020 + }, + { + "epoch": 3.394468984059919, + "grad_norm": 5.17584753036499, + "learning_rate": 1.4128753004529145e-06, + "loss": 0.1729, + "num_input_tokens_seen": 166878144, + "step": 53025 + }, + { + "epoch": 3.394789066000896, + "grad_norm": 7.020318031311035, + "learning_rate": 1.4123722595062107e-06, + "loss": 0.1541, + "num_input_tokens_seen": 166893120, + "step": 53030 + }, + { + "epoch": 3.395109147941873, + "grad_norm": 2.1254725456237793, + "learning_rate": 1.4118692728699733e-06, + "loss": 0.1637, + "num_input_tokens_seen": 166910976, + "step": 53035 + }, + { + "epoch": 3.39542922988285, + "grad_norm": 11.566593170166016, + "learning_rate": 1.411366340569314e-06, + "loss": 0.1321, + "num_input_tokens_seen": 166928832, + "step": 53040 + }, + { + "epoch": 3.3957493118238267, + "grad_norm": 4.043353080749512, + "learning_rate": 1.4108634626293505e-06, + "loss": 0.1172, + "num_input_tokens_seen": 166944576, + "step": 53045 + }, + { + "epoch": 3.3960693937648037, + "grad_norm": 16.211267471313477, + "learning_rate": 1.4103606390751925e-06, + "loss": 0.1946, + "num_input_tokens_seen": 166959936, + "step": 53050 + }, + { + "epoch": 3.3963894757057806, + "grad_norm": 6.2671051025390625, + "learning_rate": 1.4098578699319486e-06, + "loss": 0.142, + "num_input_tokens_seen": 166975424, + "step": 53055 + }, + { + "epoch": 3.3967095576467576, + "grad_norm": 8.952936172485352, + "learning_rate": 1.4093551552247243e-06, + "loss": 0.1515, + "num_input_tokens_seen": 166989952, + "step": 53060 + }, + { + "epoch": 3.3970296395877346, + "grad_norm": 6.771392822265625, + "learning_rate": 1.4088524949786225e-06, + "loss": 0.1446, + "num_input_tokens_seen": 167004864, + "step": 53065 + }, + { + "epoch": 3.397349721528711, + "grad_norm": 6.633424758911133, + "learning_rate": 1.4083498892187424e-06, + "loss": 0.1743, + "num_input_tokens_seen": 167020352, + "step": 53070 + }, + { + "epoch": 3.397669803469688, + "grad_norm": 7.5407938957214355, + "learning_rate": 1.407847337970185e-06, + "loss": 0.1583, + "num_input_tokens_seen": 167036032, + "step": 53075 + }, + { + "epoch": 3.397989885410665, + "grad_norm": 7.733602523803711, + "learning_rate": 1.4073448412580403e-06, + "loss": 0.2474, + "num_input_tokens_seen": 167051520, + "step": 53080 + }, + { + "epoch": 3.398309967351642, + "grad_norm": 8.248526573181152, + "learning_rate": 1.406842399107405e-06, + "loss": 0.1208, + "num_input_tokens_seen": 167067392, + "step": 53085 + }, + { + "epoch": 3.3986300492926187, + "grad_norm": 8.061452865600586, + "learning_rate": 1.4063400115433642e-06, + "loss": 0.1728, + "num_input_tokens_seen": 167082816, + "step": 53090 + }, + { + "epoch": 3.3989501312335957, + "grad_norm": 15.732970237731934, + "learning_rate": 1.4058376785910078e-06, + "loss": 0.1649, + "num_input_tokens_seen": 167097920, + "step": 53095 + }, + { + "epoch": 3.3992702131745727, + "grad_norm": 8.213377952575684, + "learning_rate": 1.4053354002754183e-06, + "loss": 0.1787, + "num_input_tokens_seen": 167113152, + "step": 53100 + }, + { + "epoch": 3.3995902951155497, + "grad_norm": 5.313148021697998, + "learning_rate": 1.4048331766216757e-06, + "loss": 0.1523, + "num_input_tokens_seen": 167129472, + "step": 53105 + }, + { + "epoch": 3.3999103770565267, + "grad_norm": 20.92670440673828, + "learning_rate": 1.4043310076548621e-06, + "loss": 0.2269, + "num_input_tokens_seen": 167144192, + "step": 53110 + }, + { + "epoch": 3.400230458997503, + "grad_norm": 9.842975616455078, + "learning_rate": 1.403828893400049e-06, + "loss": 0.1375, + "num_input_tokens_seen": 167158976, + "step": 53115 + }, + { + "epoch": 3.40055054093848, + "grad_norm": 77.7071762084961, + "learning_rate": 1.4033268338823125e-06, + "loss": 0.1697, + "num_input_tokens_seen": 167174656, + "step": 53120 + }, + { + "epoch": 3.400870622879457, + "grad_norm": 8.534131050109863, + "learning_rate": 1.4028248291267203e-06, + "loss": 0.1128, + "num_input_tokens_seen": 167189632, + "step": 53125 + }, + { + "epoch": 3.401190704820434, + "grad_norm": 3.82572078704834, + "learning_rate": 1.4023228791583416e-06, + "loss": 0.1365, + "num_input_tokens_seen": 167205440, + "step": 53130 + }, + { + "epoch": 3.4015107867614107, + "grad_norm": 8.110597610473633, + "learning_rate": 1.4018209840022412e-06, + "loss": 0.1522, + "num_input_tokens_seen": 167221120, + "step": 53135 + }, + { + "epoch": 3.4018308687023877, + "grad_norm": 7.503003120422363, + "learning_rate": 1.4013191436834805e-06, + "loss": 0.1633, + "num_input_tokens_seen": 167236160, + "step": 53140 + }, + { + "epoch": 3.4021509506433647, + "grad_norm": 7.4529337882995605, + "learning_rate": 1.4008173582271186e-06, + "loss": 0.2166, + "num_input_tokens_seen": 167252224, + "step": 53145 + }, + { + "epoch": 3.4024710325843417, + "grad_norm": 8.569381713867188, + "learning_rate": 1.400315627658212e-06, + "loss": 0.1682, + "num_input_tokens_seen": 167268352, + "step": 53150 + }, + { + "epoch": 3.4027911145253187, + "grad_norm": 6.4609150886535645, + "learning_rate": 1.3998139520018155e-06, + "loss": 0.1205, + "num_input_tokens_seen": 167283072, + "step": 53155 + }, + { + "epoch": 3.4031111964662952, + "grad_norm": 9.514110565185547, + "learning_rate": 1.3993123312829796e-06, + "loss": 0.1569, + "num_input_tokens_seen": 167300736, + "step": 53160 + }, + { + "epoch": 3.403431278407272, + "grad_norm": 4.188767910003662, + "learning_rate": 1.3988107655267527e-06, + "loss": 0.1232, + "num_input_tokens_seen": 167316800, + "step": 53165 + }, + { + "epoch": 3.403751360348249, + "grad_norm": 40.93212127685547, + "learning_rate": 1.39830925475818e-06, + "loss": 0.2691, + "num_input_tokens_seen": 167331520, + "step": 53170 + }, + { + "epoch": 3.404071442289226, + "grad_norm": 7.82546329498291, + "learning_rate": 1.397807799002305e-06, + "loss": 0.1229, + "num_input_tokens_seen": 167347072, + "step": 53175 + }, + { + "epoch": 3.4043915242302027, + "grad_norm": 7.184103965759277, + "learning_rate": 1.3973063982841673e-06, + "loss": 0.155, + "num_input_tokens_seen": 167362880, + "step": 53180 + }, + { + "epoch": 3.4047116061711797, + "grad_norm": 13.493721961975098, + "learning_rate": 1.3968050526288042e-06, + "loss": 0.135, + "num_input_tokens_seen": 167378304, + "step": 53185 + }, + { + "epoch": 3.4050316881121567, + "grad_norm": 8.638809204101562, + "learning_rate": 1.3963037620612496e-06, + "loss": 0.1764, + "num_input_tokens_seen": 167393408, + "step": 53190 + }, + { + "epoch": 3.4053517700531337, + "grad_norm": 4.198339939117432, + "learning_rate": 1.3958025266065384e-06, + "loss": 0.1792, + "num_input_tokens_seen": 167408960, + "step": 53195 + }, + { + "epoch": 3.4056718519941107, + "grad_norm": 5.847392559051514, + "learning_rate": 1.3953013462896952e-06, + "loss": 0.1613, + "num_input_tokens_seen": 167424896, + "step": 53200 + }, + { + "epoch": 3.4059919339350873, + "grad_norm": 5.222421169281006, + "learning_rate": 1.3948002211357496e-06, + "loss": 0.1138, + "num_input_tokens_seen": 167441088, + "step": 53205 + }, + { + "epoch": 3.4063120158760642, + "grad_norm": 11.625004768371582, + "learning_rate": 1.394299151169724e-06, + "loss": 0.1472, + "num_input_tokens_seen": 167457024, + "step": 53210 + }, + { + "epoch": 3.4066320978170412, + "grad_norm": 7.521533012390137, + "learning_rate": 1.3937981364166397e-06, + "loss": 0.121, + "num_input_tokens_seen": 167471936, + "step": 53215 + }, + { + "epoch": 3.4069521797580182, + "grad_norm": 9.69710636138916, + "learning_rate": 1.3932971769015143e-06, + "loss": 0.1657, + "num_input_tokens_seen": 167487104, + "step": 53220 + }, + { + "epoch": 3.4072722616989948, + "grad_norm": 4.260190486907959, + "learning_rate": 1.3927962726493624e-06, + "loss": 0.1632, + "num_input_tokens_seen": 167502848, + "step": 53225 + }, + { + "epoch": 3.4075923436399718, + "grad_norm": 13.110208511352539, + "learning_rate": 1.3922954236851993e-06, + "loss": 0.2176, + "num_input_tokens_seen": 167520192, + "step": 53230 + }, + { + "epoch": 3.4079124255809488, + "grad_norm": 8.19334888458252, + "learning_rate": 1.3917946300340312e-06, + "loss": 0.1436, + "num_input_tokens_seen": 167535680, + "step": 53235 + }, + { + "epoch": 3.4082325075219257, + "grad_norm": 30.202672958374023, + "learning_rate": 1.3912938917208677e-06, + "loss": 0.248, + "num_input_tokens_seen": 167552768, + "step": 53240 + }, + { + "epoch": 3.4085525894629027, + "grad_norm": 6.981102466583252, + "learning_rate": 1.3907932087707121e-06, + "loss": 0.1496, + "num_input_tokens_seen": 167568320, + "step": 53245 + }, + { + "epoch": 3.4088726714038793, + "grad_norm": 181.1956024169922, + "learning_rate": 1.3902925812085661e-06, + "loss": 0.172, + "num_input_tokens_seen": 167583616, + "step": 53250 + }, + { + "epoch": 3.4091927533448563, + "grad_norm": 4.923469066619873, + "learning_rate": 1.389792009059428e-06, + "loss": 0.1069, + "num_input_tokens_seen": 167599616, + "step": 53255 + }, + { + "epoch": 3.4095128352858333, + "grad_norm": 9.575169563293457, + "learning_rate": 1.3892914923482947e-06, + "loss": 0.2235, + "num_input_tokens_seen": 167615616, + "step": 53260 + }, + { + "epoch": 3.4098329172268103, + "grad_norm": 1.2735304832458496, + "learning_rate": 1.3887910311001584e-06, + "loss": 0.1071, + "num_input_tokens_seen": 167630464, + "step": 53265 + }, + { + "epoch": 3.410152999167787, + "grad_norm": 56.8235969543457, + "learning_rate": 1.388290625340009e-06, + "loss": 0.2736, + "num_input_tokens_seen": 167647424, + "step": 53270 + }, + { + "epoch": 3.410473081108764, + "grad_norm": 10.273002624511719, + "learning_rate": 1.3877902750928364e-06, + "loss": 0.1264, + "num_input_tokens_seen": 167663424, + "step": 53275 + }, + { + "epoch": 3.410793163049741, + "grad_norm": 16.932723999023438, + "learning_rate": 1.3872899803836237e-06, + "loss": 0.2118, + "num_input_tokens_seen": 167678208, + "step": 53280 + }, + { + "epoch": 3.4111132449907178, + "grad_norm": 2.9764275550842285, + "learning_rate": 1.3867897412373538e-06, + "loss": 0.1357, + "num_input_tokens_seen": 167693696, + "step": 53285 + }, + { + "epoch": 3.4114333269316943, + "grad_norm": 3.7616560459136963, + "learning_rate": 1.3862895576790052e-06, + "loss": 0.1305, + "num_input_tokens_seen": 167708608, + "step": 53290 + }, + { + "epoch": 3.4117534088726713, + "grad_norm": 24.57952117919922, + "learning_rate": 1.3857894297335555e-06, + "loss": 0.2431, + "num_input_tokens_seen": 167724992, + "step": 53295 + }, + { + "epoch": 3.4120734908136483, + "grad_norm": 8.870489120483398, + "learning_rate": 1.385289357425977e-06, + "loss": 0.1511, + "num_input_tokens_seen": 167740288, + "step": 53300 + }, + { + "epoch": 3.4123935727546253, + "grad_norm": 5.203019618988037, + "learning_rate": 1.3847893407812434e-06, + "loss": 0.1275, + "num_input_tokens_seen": 167755328, + "step": 53305 + }, + { + "epoch": 3.412713654695602, + "grad_norm": 3.7271480560302734, + "learning_rate": 1.3842893798243195e-06, + "loss": 0.1331, + "num_input_tokens_seen": 167770752, + "step": 53310 + }, + { + "epoch": 3.413033736636579, + "grad_norm": 8.697669982910156, + "learning_rate": 1.3837894745801745e-06, + "loss": 0.2498, + "num_input_tokens_seen": 167785984, + "step": 53315 + }, + { + "epoch": 3.413353818577556, + "grad_norm": 8.170347213745117, + "learning_rate": 1.3832896250737665e-06, + "loss": 0.1911, + "num_input_tokens_seen": 167801792, + "step": 53320 + }, + { + "epoch": 3.413673900518533, + "grad_norm": 13.35934829711914, + "learning_rate": 1.382789831330059e-06, + "loss": 0.1828, + "num_input_tokens_seen": 167816896, + "step": 53325 + }, + { + "epoch": 3.41399398245951, + "grad_norm": 7.139914512634277, + "learning_rate": 1.3822900933740085e-06, + "loss": 0.1473, + "num_input_tokens_seen": 167832320, + "step": 53330 + }, + { + "epoch": 3.4143140644004863, + "grad_norm": 7.172038555145264, + "learning_rate": 1.3817904112305676e-06, + "loss": 0.2207, + "num_input_tokens_seen": 167848512, + "step": 53335 + }, + { + "epoch": 3.4146341463414633, + "grad_norm": 6.430922031402588, + "learning_rate": 1.3812907849246907e-06, + "loss": 0.2879, + "num_input_tokens_seen": 167863680, + "step": 53340 + }, + { + "epoch": 3.4149542282824403, + "grad_norm": 16.44270896911621, + "learning_rate": 1.380791214481323e-06, + "loss": 0.1084, + "num_input_tokens_seen": 167879488, + "step": 53345 + }, + { + "epoch": 3.4152743102234173, + "grad_norm": 9.214180946350098, + "learning_rate": 1.3802916999254146e-06, + "loss": 0.2407, + "num_input_tokens_seen": 167895296, + "step": 53350 + }, + { + "epoch": 3.415594392164394, + "grad_norm": 9.434697151184082, + "learning_rate": 1.379792241281904e-06, + "loss": 0.2134, + "num_input_tokens_seen": 167910528, + "step": 53355 + }, + { + "epoch": 3.415914474105371, + "grad_norm": 10.02392292022705, + "learning_rate": 1.3792928385757348e-06, + "loss": 0.1455, + "num_input_tokens_seen": 167926400, + "step": 53360 + }, + { + "epoch": 3.416234556046348, + "grad_norm": 6.711104393005371, + "learning_rate": 1.378793491831844e-06, + "loss": 0.1733, + "num_input_tokens_seen": 167942976, + "step": 53365 + }, + { + "epoch": 3.416554637987325, + "grad_norm": 3.717214345932007, + "learning_rate": 1.378294201075166e-06, + "loss": 0.1124, + "num_input_tokens_seen": 167958656, + "step": 53370 + }, + { + "epoch": 3.416874719928302, + "grad_norm": 6.8364973068237305, + "learning_rate": 1.3777949663306329e-06, + "loss": 0.1323, + "num_input_tokens_seen": 167975552, + "step": 53375 + }, + { + "epoch": 3.4171948018692784, + "grad_norm": 9.273433685302734, + "learning_rate": 1.377295787623173e-06, + "loss": 0.1302, + "num_input_tokens_seen": 167990720, + "step": 53380 + }, + { + "epoch": 3.4175148838102554, + "grad_norm": 5.762359619140625, + "learning_rate": 1.3767966649777143e-06, + "loss": 0.1207, + "num_input_tokens_seen": 168005632, + "step": 53385 + }, + { + "epoch": 3.4178349657512324, + "grad_norm": 6.688249111175537, + "learning_rate": 1.3762975984191795e-06, + "loss": 0.0946, + "num_input_tokens_seen": 168020992, + "step": 53390 + }, + { + "epoch": 3.4181550476922093, + "grad_norm": 7.25406551361084, + "learning_rate": 1.3757985879724894e-06, + "loss": 0.1956, + "num_input_tokens_seen": 168037440, + "step": 53395 + }, + { + "epoch": 3.418475129633186, + "grad_norm": 15.391536712646484, + "learning_rate": 1.3752996336625623e-06, + "loss": 0.1871, + "num_input_tokens_seen": 168054528, + "step": 53400 + }, + { + "epoch": 3.418795211574163, + "grad_norm": 26.783985137939453, + "learning_rate": 1.3748007355143129e-06, + "loss": 0.1581, + "num_input_tokens_seen": 168071744, + "step": 53405 + }, + { + "epoch": 3.41911529351514, + "grad_norm": 6.580310821533203, + "learning_rate": 1.3743018935526542e-06, + "loss": 0.1422, + "num_input_tokens_seen": 168087744, + "step": 53410 + }, + { + "epoch": 3.419435375456117, + "grad_norm": 6.0127129554748535, + "learning_rate": 1.373803107802495e-06, + "loss": 0.1392, + "num_input_tokens_seen": 168104256, + "step": 53415 + }, + { + "epoch": 3.419755457397094, + "grad_norm": 6.207236289978027, + "learning_rate": 1.3733043782887417e-06, + "loss": 0.1463, + "num_input_tokens_seen": 168121472, + "step": 53420 + }, + { + "epoch": 3.4200755393380704, + "grad_norm": 10.241035461425781, + "learning_rate": 1.372805705036301e-06, + "loss": 0.1322, + "num_input_tokens_seen": 168137344, + "step": 53425 + }, + { + "epoch": 3.4203956212790474, + "grad_norm": 3.5068910121917725, + "learning_rate": 1.37230708807007e-06, + "loss": 0.2022, + "num_input_tokens_seen": 168152768, + "step": 53430 + }, + { + "epoch": 3.4207157032200244, + "grad_norm": 5.184983253479004, + "learning_rate": 1.37180852741495e-06, + "loss": 0.1392, + "num_input_tokens_seen": 168169984, + "step": 53435 + }, + { + "epoch": 3.4210357851610014, + "grad_norm": 6.798260688781738, + "learning_rate": 1.3713100230958356e-06, + "loss": 0.1241, + "num_input_tokens_seen": 168186560, + "step": 53440 + }, + { + "epoch": 3.421355867101978, + "grad_norm": 11.477228164672852, + "learning_rate": 1.3708115751376194e-06, + "loss": 0.1573, + "num_input_tokens_seen": 168201728, + "step": 53445 + }, + { + "epoch": 3.421675949042955, + "grad_norm": 7.581920146942139, + "learning_rate": 1.3703131835651917e-06, + "loss": 0.136, + "num_input_tokens_seen": 168217408, + "step": 53450 + }, + { + "epoch": 3.421996030983932, + "grad_norm": 12.966188430786133, + "learning_rate": 1.3698148484034385e-06, + "loss": 0.1847, + "num_input_tokens_seen": 168233408, + "step": 53455 + }, + { + "epoch": 3.422316112924909, + "grad_norm": 12.449392318725586, + "learning_rate": 1.3693165696772465e-06, + "loss": 0.1363, + "num_input_tokens_seen": 168249472, + "step": 53460 + }, + { + "epoch": 3.422636194865886, + "grad_norm": 6.544641971588135, + "learning_rate": 1.3688183474114936e-06, + "loss": 0.1404, + "num_input_tokens_seen": 168263808, + "step": 53465 + }, + { + "epoch": 3.4229562768068624, + "grad_norm": 6.519454479217529, + "learning_rate": 1.3683201816310609e-06, + "loss": 0.2012, + "num_input_tokens_seen": 168279104, + "step": 53470 + }, + { + "epoch": 3.4232763587478394, + "grad_norm": 7.783103942871094, + "learning_rate": 1.367822072360824e-06, + "loss": 0.147, + "num_input_tokens_seen": 168294720, + "step": 53475 + }, + { + "epoch": 3.4235964406888164, + "grad_norm": 7.010312557220459, + "learning_rate": 1.3673240196256554e-06, + "loss": 0.265, + "num_input_tokens_seen": 168310848, + "step": 53480 + }, + { + "epoch": 3.4239165226297934, + "grad_norm": 89.81525421142578, + "learning_rate": 1.3668260234504255e-06, + "loss": 0.1805, + "num_input_tokens_seen": 168326208, + "step": 53485 + }, + { + "epoch": 3.42423660457077, + "grad_norm": 9.163293838500977, + "learning_rate": 1.3663280838600015e-06, + "loss": 0.1349, + "num_input_tokens_seen": 168341568, + "step": 53490 + }, + { + "epoch": 3.424556686511747, + "grad_norm": 8.70788860321045, + "learning_rate": 1.365830200879248e-06, + "loss": 0.1316, + "num_input_tokens_seen": 168358016, + "step": 53495 + }, + { + "epoch": 3.424876768452724, + "grad_norm": 8.45548152923584, + "learning_rate": 1.3653323745330255e-06, + "loss": 0.1207, + "num_input_tokens_seen": 168372672, + "step": 53500 + }, + { + "epoch": 3.425196850393701, + "grad_norm": 10.575445175170898, + "learning_rate": 1.3648346048461946e-06, + "loss": 0.1532, + "num_input_tokens_seen": 168387584, + "step": 53505 + }, + { + "epoch": 3.425516932334678, + "grad_norm": 15.10100269317627, + "learning_rate": 1.3643368918436112e-06, + "loss": 0.1866, + "num_input_tokens_seen": 168403264, + "step": 53510 + }, + { + "epoch": 3.4258370142756545, + "grad_norm": 13.628291130065918, + "learning_rate": 1.3638392355501279e-06, + "loss": 0.127, + "num_input_tokens_seen": 168417856, + "step": 53515 + }, + { + "epoch": 3.4261570962166314, + "grad_norm": 6.126720428466797, + "learning_rate": 1.363341635990595e-06, + "loss": 0.1511, + "num_input_tokens_seen": 168436224, + "step": 53520 + }, + { + "epoch": 3.4264771781576084, + "grad_norm": 8.892020225524902, + "learning_rate": 1.3628440931898602e-06, + "loss": 0.2167, + "num_input_tokens_seen": 168450816, + "step": 53525 + }, + { + "epoch": 3.4267972600985854, + "grad_norm": 5.5088019371032715, + "learning_rate": 1.362346607172767e-06, + "loss": 0.1586, + "num_input_tokens_seen": 168466880, + "step": 53530 + }, + { + "epoch": 3.427117342039562, + "grad_norm": 7.919561386108398, + "learning_rate": 1.3618491779641603e-06, + "loss": 0.1427, + "num_input_tokens_seen": 168483648, + "step": 53535 + }, + { + "epoch": 3.427437423980539, + "grad_norm": 8.890533447265625, + "learning_rate": 1.3613518055888755e-06, + "loss": 0.1957, + "num_input_tokens_seen": 168499456, + "step": 53540 + }, + { + "epoch": 3.427757505921516, + "grad_norm": 4.429718494415283, + "learning_rate": 1.3608544900717524e-06, + "loss": 0.1738, + "num_input_tokens_seen": 168515008, + "step": 53545 + }, + { + "epoch": 3.428077587862493, + "grad_norm": 5.249697208404541, + "learning_rate": 1.3603572314376207e-06, + "loss": 0.1664, + "num_input_tokens_seen": 168531200, + "step": 53550 + }, + { + "epoch": 3.4283976698034695, + "grad_norm": 5.978188514709473, + "learning_rate": 1.3598600297113135e-06, + "loss": 0.1422, + "num_input_tokens_seen": 168547328, + "step": 53555 + }, + { + "epoch": 3.4287177517444465, + "grad_norm": 18.365747451782227, + "learning_rate": 1.3593628849176576e-06, + "loss": 0.159, + "num_input_tokens_seen": 168563328, + "step": 53560 + }, + { + "epoch": 3.4290378336854235, + "grad_norm": 6.350849628448486, + "learning_rate": 1.3588657970814769e-06, + "loss": 0.1592, + "num_input_tokens_seen": 168579264, + "step": 53565 + }, + { + "epoch": 3.4293579156264005, + "grad_norm": 9.559139251708984, + "learning_rate": 1.3583687662275962e-06, + "loss": 0.1542, + "num_input_tokens_seen": 168595712, + "step": 53570 + }, + { + "epoch": 3.429677997567377, + "grad_norm": 6.2492804527282715, + "learning_rate": 1.3578717923808312e-06, + "loss": 0.1631, + "num_input_tokens_seen": 168612672, + "step": 53575 + }, + { + "epoch": 3.429998079508354, + "grad_norm": 11.267230987548828, + "learning_rate": 1.3573748755660015e-06, + "loss": 0.1705, + "num_input_tokens_seen": 168627776, + "step": 53580 + }, + { + "epoch": 3.430318161449331, + "grad_norm": 7.825589179992676, + "learning_rate": 1.356878015807917e-06, + "loss": 0.1229, + "num_input_tokens_seen": 168642816, + "step": 53585 + }, + { + "epoch": 3.430638243390308, + "grad_norm": 10.294879913330078, + "learning_rate": 1.3563812131313909e-06, + "loss": 0.2726, + "num_input_tokens_seen": 168658432, + "step": 53590 + }, + { + "epoch": 3.430958325331285, + "grad_norm": 2.036364793777466, + "learning_rate": 1.3558844675612297e-06, + "loss": 0.2198, + "num_input_tokens_seen": 168673472, + "step": 53595 + }, + { + "epoch": 3.4312784072722615, + "grad_norm": 6.826594829559326, + "learning_rate": 1.3553877791222394e-06, + "loss": 0.1386, + "num_input_tokens_seen": 168689664, + "step": 53600 + }, + { + "epoch": 3.4315984892132385, + "grad_norm": 62.02061080932617, + "learning_rate": 1.3548911478392213e-06, + "loss": 0.1974, + "num_input_tokens_seen": 168706112, + "step": 53605 + }, + { + "epoch": 3.4319185711542155, + "grad_norm": 8.498767852783203, + "learning_rate": 1.354394573736974e-06, + "loss": 0.2249, + "num_input_tokens_seen": 168721024, + "step": 53610 + }, + { + "epoch": 3.4322386530951925, + "grad_norm": 5.36261510848999, + "learning_rate": 1.353898056840294e-06, + "loss": 0.1364, + "num_input_tokens_seen": 168736448, + "step": 53615 + }, + { + "epoch": 3.432558735036169, + "grad_norm": 6.163771629333496, + "learning_rate": 1.3534015971739761e-06, + "loss": 0.1433, + "num_input_tokens_seen": 168751936, + "step": 53620 + }, + { + "epoch": 3.432878816977146, + "grad_norm": 6.478553771972656, + "learning_rate": 1.35290519476281e-06, + "loss": 0.2215, + "num_input_tokens_seen": 168767360, + "step": 53625 + }, + { + "epoch": 3.433198898918123, + "grad_norm": 8.190142631530762, + "learning_rate": 1.3524088496315835e-06, + "loss": 0.2581, + "num_input_tokens_seen": 168783104, + "step": 53630 + }, + { + "epoch": 3.4335189808591, + "grad_norm": 10.46129035949707, + "learning_rate": 1.3519125618050814e-06, + "loss": 0.2828, + "num_input_tokens_seen": 168799616, + "step": 53635 + }, + { + "epoch": 3.433839062800077, + "grad_norm": 47.46709442138672, + "learning_rate": 1.3514163313080857e-06, + "loss": 0.1831, + "num_input_tokens_seen": 168814656, + "step": 53640 + }, + { + "epoch": 3.4341591447410535, + "grad_norm": 4.7494611740112305, + "learning_rate": 1.350920158165376e-06, + "loss": 0.1593, + "num_input_tokens_seen": 168833152, + "step": 53645 + }, + { + "epoch": 3.4344792266820305, + "grad_norm": 7.999111652374268, + "learning_rate": 1.3504240424017268e-06, + "loss": 0.1567, + "num_input_tokens_seen": 168849216, + "step": 53650 + }, + { + "epoch": 3.4347993086230075, + "grad_norm": 3.994631767272949, + "learning_rate": 1.3499279840419155e-06, + "loss": 0.1661, + "num_input_tokens_seen": 168864512, + "step": 53655 + }, + { + "epoch": 3.4351193905639845, + "grad_norm": 91.12308502197266, + "learning_rate": 1.3494319831107079e-06, + "loss": 0.1892, + "num_input_tokens_seen": 168879616, + "step": 53660 + }, + { + "epoch": 3.435439472504961, + "grad_norm": 27.043670654296875, + "learning_rate": 1.348936039632875e-06, + "loss": 0.1741, + "num_input_tokens_seen": 168895616, + "step": 53665 + }, + { + "epoch": 3.435759554445938, + "grad_norm": 5.760595321655273, + "learning_rate": 1.3484401536331804e-06, + "loss": 0.1418, + "num_input_tokens_seen": 168910912, + "step": 53670 + }, + { + "epoch": 3.436079636386915, + "grad_norm": 12.078997611999512, + "learning_rate": 1.3479443251363867e-06, + "loss": 0.257, + "num_input_tokens_seen": 168926336, + "step": 53675 + }, + { + "epoch": 3.436399718327892, + "grad_norm": 10.701131820678711, + "learning_rate": 1.3474485541672521e-06, + "loss": 0.1206, + "num_input_tokens_seen": 168940800, + "step": 53680 + }, + { + "epoch": 3.436719800268869, + "grad_norm": 8.503337860107422, + "learning_rate": 1.3469528407505326e-06, + "loss": 0.2162, + "num_input_tokens_seen": 168956544, + "step": 53685 + }, + { + "epoch": 3.4370398822098456, + "grad_norm": 7.095001697540283, + "learning_rate": 1.3464571849109842e-06, + "loss": 0.1356, + "num_input_tokens_seen": 168971776, + "step": 53690 + }, + { + "epoch": 3.4373599641508226, + "grad_norm": 8.267680168151855, + "learning_rate": 1.3459615866733534e-06, + "loss": 0.1781, + "num_input_tokens_seen": 168986560, + "step": 53695 + }, + { + "epoch": 3.4376800460917996, + "grad_norm": 8.07947826385498, + "learning_rate": 1.3454660460623905e-06, + "loss": 0.1384, + "num_input_tokens_seen": 169001344, + "step": 53700 + }, + { + "epoch": 3.4380001280327765, + "grad_norm": 12.671607971191406, + "learning_rate": 1.3449705631028398e-06, + "loss": 0.1207, + "num_input_tokens_seen": 169017856, + "step": 53705 + }, + { + "epoch": 3.438320209973753, + "grad_norm": 15.12105655670166, + "learning_rate": 1.3444751378194427e-06, + "loss": 0.2258, + "num_input_tokens_seen": 169034816, + "step": 53710 + }, + { + "epoch": 3.43864029191473, + "grad_norm": 7.114831924438477, + "learning_rate": 1.3439797702369385e-06, + "loss": 0.1529, + "num_input_tokens_seen": 169049920, + "step": 53715 + }, + { + "epoch": 3.438960373855707, + "grad_norm": 5.805447101593018, + "learning_rate": 1.343484460380063e-06, + "loss": 0.2182, + "num_input_tokens_seen": 169065664, + "step": 53720 + }, + { + "epoch": 3.439280455796684, + "grad_norm": 8.022381782531738, + "learning_rate": 1.3429892082735496e-06, + "loss": 0.2003, + "num_input_tokens_seen": 169081216, + "step": 53725 + }, + { + "epoch": 3.439600537737661, + "grad_norm": 5.979063510894775, + "learning_rate": 1.3424940139421273e-06, + "loss": 0.2119, + "num_input_tokens_seen": 169095296, + "step": 53730 + }, + { + "epoch": 3.4399206196786376, + "grad_norm": 3.2248504161834717, + "learning_rate": 1.3419988774105258e-06, + "loss": 0.1635, + "num_input_tokens_seen": 169111680, + "step": 53735 + }, + { + "epoch": 3.4402407016196146, + "grad_norm": 6.371617794036865, + "learning_rate": 1.3415037987034685e-06, + "loss": 0.0987, + "num_input_tokens_seen": 169127872, + "step": 53740 + }, + { + "epoch": 3.4405607835605916, + "grad_norm": 6.488640308380127, + "learning_rate": 1.341008777845677e-06, + "loss": 0.1418, + "num_input_tokens_seen": 169142848, + "step": 53745 + }, + { + "epoch": 3.4408808655015686, + "grad_norm": 10.399250984191895, + "learning_rate": 1.3405138148618703e-06, + "loss": 0.1778, + "num_input_tokens_seen": 169160256, + "step": 53750 + }, + { + "epoch": 3.441200947442545, + "grad_norm": 4.571449279785156, + "learning_rate": 1.3400189097767641e-06, + "loss": 0.1404, + "num_input_tokens_seen": 169175936, + "step": 53755 + }, + { + "epoch": 3.441521029383522, + "grad_norm": 20.715045928955078, + "learning_rate": 1.3395240626150707e-06, + "loss": 0.1682, + "num_input_tokens_seen": 169190912, + "step": 53760 + }, + { + "epoch": 3.441841111324499, + "grad_norm": 5.437716007232666, + "learning_rate": 1.3390292734015025e-06, + "loss": 0.1384, + "num_input_tokens_seen": 169205760, + "step": 53765 + }, + { + "epoch": 3.442161193265476, + "grad_norm": 3.3705995082855225, + "learning_rate": 1.3385345421607632e-06, + "loss": 0.2142, + "num_input_tokens_seen": 169222144, + "step": 53770 + }, + { + "epoch": 3.442481275206453, + "grad_norm": 9.116323471069336, + "learning_rate": 1.338039868917561e-06, + "loss": 0.1446, + "num_input_tokens_seen": 169239104, + "step": 53775 + }, + { + "epoch": 3.4428013571474296, + "grad_norm": 8.259223937988281, + "learning_rate": 1.3375452536965938e-06, + "loss": 0.2169, + "num_input_tokens_seen": 169254400, + "step": 53780 + }, + { + "epoch": 3.4431214390884066, + "grad_norm": 4.188472747802734, + "learning_rate": 1.3370506965225621e-06, + "loss": 0.1781, + "num_input_tokens_seen": 169269632, + "step": 53785 + }, + { + "epoch": 3.4434415210293836, + "grad_norm": 32.33436965942383, + "learning_rate": 1.3365561974201612e-06, + "loss": 0.1089, + "num_input_tokens_seen": 169284800, + "step": 53790 + }, + { + "epoch": 3.4437616029703606, + "grad_norm": 10.330961227416992, + "learning_rate": 1.3360617564140826e-06, + "loss": 0.1775, + "num_input_tokens_seen": 169299648, + "step": 53795 + }, + { + "epoch": 3.444081684911337, + "grad_norm": 5.95247220993042, + "learning_rate": 1.3355673735290192e-06, + "loss": 0.2354, + "num_input_tokens_seen": 169315200, + "step": 53800 + }, + { + "epoch": 3.444401766852314, + "grad_norm": 2.505462408065796, + "learning_rate": 1.335073048789654e-06, + "loss": 0.1418, + "num_input_tokens_seen": 169331904, + "step": 53805 + }, + { + "epoch": 3.444721848793291, + "grad_norm": 6.227570533752441, + "learning_rate": 1.3345787822206745e-06, + "loss": 0.1935, + "num_input_tokens_seen": 169348032, + "step": 53810 + }, + { + "epoch": 3.445041930734268, + "grad_norm": 6.16571569442749, + "learning_rate": 1.3340845738467584e-06, + "loss": 0.1424, + "num_input_tokens_seen": 169363776, + "step": 53815 + }, + { + "epoch": 3.445362012675245, + "grad_norm": 7.238463878631592, + "learning_rate": 1.3335904236925868e-06, + "loss": 0.19, + "num_input_tokens_seen": 169378368, + "step": 53820 + }, + { + "epoch": 3.4456820946162217, + "grad_norm": 8.910646438598633, + "learning_rate": 1.3330963317828333e-06, + "loss": 0.1903, + "num_input_tokens_seen": 169393280, + "step": 53825 + }, + { + "epoch": 3.4460021765571986, + "grad_norm": 7.738539218902588, + "learning_rate": 1.3326022981421713e-06, + "loss": 0.1758, + "num_input_tokens_seen": 169409984, + "step": 53830 + }, + { + "epoch": 3.4463222584981756, + "grad_norm": 4.228361129760742, + "learning_rate": 1.3321083227952697e-06, + "loss": 0.1885, + "num_input_tokens_seen": 169425024, + "step": 53835 + }, + { + "epoch": 3.446642340439152, + "grad_norm": 38.32769775390625, + "learning_rate": 1.3316144057667951e-06, + "loss": 0.1566, + "num_input_tokens_seen": 169440960, + "step": 53840 + }, + { + "epoch": 3.446962422380129, + "grad_norm": 6.792619228363037, + "learning_rate": 1.3311205470814102e-06, + "loss": 0.1346, + "num_input_tokens_seen": 169455936, + "step": 53845 + }, + { + "epoch": 3.447282504321106, + "grad_norm": 5.762736797332764, + "learning_rate": 1.3306267467637773e-06, + "loss": 0.1984, + "num_input_tokens_seen": 169471168, + "step": 53850 + }, + { + "epoch": 3.447602586262083, + "grad_norm": 28.17193031311035, + "learning_rate": 1.3301330048385542e-06, + "loss": 0.1497, + "num_input_tokens_seen": 169486336, + "step": 53855 + }, + { + "epoch": 3.44792266820306, + "grad_norm": 8.623478889465332, + "learning_rate": 1.329639321330395e-06, + "loss": 0.163, + "num_input_tokens_seen": 169501440, + "step": 53860 + }, + { + "epoch": 3.4482427501440367, + "grad_norm": 7.940911293029785, + "learning_rate": 1.3291456962639518e-06, + "loss": 0.0954, + "num_input_tokens_seen": 169516480, + "step": 53865 + }, + { + "epoch": 3.4485628320850137, + "grad_norm": 6.4755377769470215, + "learning_rate": 1.328652129663874e-06, + "loss": 0.1782, + "num_input_tokens_seen": 169532864, + "step": 53870 + }, + { + "epoch": 3.4488829140259907, + "grad_norm": 20.055137634277344, + "learning_rate": 1.3281586215548072e-06, + "loss": 0.2489, + "num_input_tokens_seen": 169548032, + "step": 53875 + }, + { + "epoch": 3.4492029959669677, + "grad_norm": 8.698409080505371, + "learning_rate": 1.3276651719613942e-06, + "loss": 0.166, + "num_input_tokens_seen": 169564352, + "step": 53880 + }, + { + "epoch": 3.449523077907944, + "grad_norm": 9.435842514038086, + "learning_rate": 1.327171780908278e-06, + "loss": 0.1573, + "num_input_tokens_seen": 169579456, + "step": 53885 + }, + { + "epoch": 3.449843159848921, + "grad_norm": 5.806588649749756, + "learning_rate": 1.326678448420092e-06, + "loss": 0.1914, + "num_input_tokens_seen": 169595392, + "step": 53890 + }, + { + "epoch": 3.450163241789898, + "grad_norm": 7.715129852294922, + "learning_rate": 1.3261851745214738e-06, + "loss": 0.1444, + "num_input_tokens_seen": 169612928, + "step": 53895 + }, + { + "epoch": 3.450483323730875, + "grad_norm": 7.310022830963135, + "learning_rate": 1.3256919592370538e-06, + "loss": 0.1282, + "num_input_tokens_seen": 169628608, + "step": 53900 + }, + { + "epoch": 3.450803405671852, + "grad_norm": 5.408576011657715, + "learning_rate": 1.3251988025914604e-06, + "loss": 0.1949, + "num_input_tokens_seen": 169643840, + "step": 53905 + }, + { + "epoch": 3.4511234876128287, + "grad_norm": 3.863081932067871, + "learning_rate": 1.3247057046093192e-06, + "loss": 0.0894, + "num_input_tokens_seen": 169659904, + "step": 53910 + }, + { + "epoch": 3.4514435695538057, + "grad_norm": 6.273022651672363, + "learning_rate": 1.3242126653152526e-06, + "loss": 0.1221, + "num_input_tokens_seen": 169676288, + "step": 53915 + }, + { + "epoch": 3.4517636514947827, + "grad_norm": 6.877509593963623, + "learning_rate": 1.3237196847338833e-06, + "loss": 0.2169, + "num_input_tokens_seen": 169692224, + "step": 53920 + }, + { + "epoch": 3.4520837334357597, + "grad_norm": 6.197966575622559, + "learning_rate": 1.3232267628898238e-06, + "loss": 0.1808, + "num_input_tokens_seen": 169708864, + "step": 53925 + }, + { + "epoch": 3.4524038153767362, + "grad_norm": 6.659448623657227, + "learning_rate": 1.3227338998076907e-06, + "loss": 0.1437, + "num_input_tokens_seen": 169724288, + "step": 53930 + }, + { + "epoch": 3.4527238973177132, + "grad_norm": 8.578710556030273, + "learning_rate": 1.322241095512095e-06, + "loss": 0.1715, + "num_input_tokens_seen": 169739776, + "step": 53935 + }, + { + "epoch": 3.45304397925869, + "grad_norm": 10.475442886352539, + "learning_rate": 1.321748350027644e-06, + "loss": 0.1788, + "num_input_tokens_seen": 169755904, + "step": 53940 + }, + { + "epoch": 3.453364061199667, + "grad_norm": 11.665395736694336, + "learning_rate": 1.321255663378943e-06, + "loss": 0.174, + "num_input_tokens_seen": 169771200, + "step": 53945 + }, + { + "epoch": 3.453684143140644, + "grad_norm": 8.764792442321777, + "learning_rate": 1.3207630355905948e-06, + "loss": 0.1106, + "num_input_tokens_seen": 169785984, + "step": 53950 + }, + { + "epoch": 3.4540042250816207, + "grad_norm": 50.822959899902344, + "learning_rate": 1.320270466687198e-06, + "loss": 0.2024, + "num_input_tokens_seen": 169802560, + "step": 53955 + }, + { + "epoch": 3.4543243070225977, + "grad_norm": 6.225214004516602, + "learning_rate": 1.319777956693349e-06, + "loss": 0.0919, + "num_input_tokens_seen": 169817152, + "step": 53960 + }, + { + "epoch": 3.4546443889635747, + "grad_norm": 14.35523796081543, + "learning_rate": 1.3192855056336402e-06, + "loss": 0.1311, + "num_input_tokens_seen": 169833408, + "step": 53965 + }, + { + "epoch": 3.4549644709045517, + "grad_norm": 36.66970443725586, + "learning_rate": 1.3187931135326642e-06, + "loss": 0.1746, + "num_input_tokens_seen": 169849088, + "step": 53970 + }, + { + "epoch": 3.4552845528455283, + "grad_norm": 8.093793869018555, + "learning_rate": 1.3183007804150074e-06, + "loss": 0.1401, + "num_input_tokens_seen": 169864192, + "step": 53975 + }, + { + "epoch": 3.4556046347865053, + "grad_norm": 56.13145446777344, + "learning_rate": 1.3178085063052546e-06, + "loss": 0.2488, + "num_input_tokens_seen": 169879488, + "step": 53980 + }, + { + "epoch": 3.4559247167274822, + "grad_norm": 6.950430870056152, + "learning_rate": 1.317316291227987e-06, + "loss": 0.1743, + "num_input_tokens_seen": 169894976, + "step": 53985 + }, + { + "epoch": 3.4562447986684592, + "grad_norm": 11.305705070495605, + "learning_rate": 1.3168241352077823e-06, + "loss": 0.1497, + "num_input_tokens_seen": 169910080, + "step": 53990 + }, + { + "epoch": 3.4565648806094362, + "grad_norm": 7.557485103607178, + "learning_rate": 1.3163320382692196e-06, + "loss": 0.1819, + "num_input_tokens_seen": 169925120, + "step": 53995 + }, + { + "epoch": 3.4568849625504128, + "grad_norm": 5.793562889099121, + "learning_rate": 1.3158400004368672e-06, + "loss": 0.1647, + "num_input_tokens_seen": 169940096, + "step": 54000 + }, + { + "epoch": 3.4572050444913898, + "grad_norm": 4.147988319396973, + "learning_rate": 1.3153480217352993e-06, + "loss": 0.1532, + "num_input_tokens_seen": 169955520, + "step": 54005 + }, + { + "epoch": 3.4575251264323668, + "grad_norm": 6.0254316329956055, + "learning_rate": 1.3148561021890784e-06, + "loss": 0.1127, + "num_input_tokens_seen": 169972160, + "step": 54010 + }, + { + "epoch": 3.4578452083733437, + "grad_norm": 7.981163501739502, + "learning_rate": 1.314364241822772e-06, + "loss": 0.2193, + "num_input_tokens_seen": 169987712, + "step": 54015 + }, + { + "epoch": 3.4581652903143203, + "grad_norm": 9.44925594329834, + "learning_rate": 1.313872440660939e-06, + "loss": 0.1667, + "num_input_tokens_seen": 170003968, + "step": 54020 + }, + { + "epoch": 3.4584853722552973, + "grad_norm": 2.3284804821014404, + "learning_rate": 1.3133806987281373e-06, + "loss": 0.1623, + "num_input_tokens_seen": 170019008, + "step": 54025 + }, + { + "epoch": 3.4588054541962743, + "grad_norm": 14.280406951904297, + "learning_rate": 1.3128890160489248e-06, + "loss": 0.1978, + "num_input_tokens_seen": 170034368, + "step": 54030 + }, + { + "epoch": 3.4591255361372513, + "grad_norm": 6.121983528137207, + "learning_rate": 1.312397392647849e-06, + "loss": 0.1493, + "num_input_tokens_seen": 170050368, + "step": 54035 + }, + { + "epoch": 3.4594456180782283, + "grad_norm": 6.383867263793945, + "learning_rate": 1.3119058285494638e-06, + "loss": 0.2024, + "num_input_tokens_seen": 170066560, + "step": 54040 + }, + { + "epoch": 3.459765700019205, + "grad_norm": 4.258298397064209, + "learning_rate": 1.3114143237783106e-06, + "loss": 0.1027, + "num_input_tokens_seen": 170083520, + "step": 54045 + }, + { + "epoch": 3.460085781960182, + "grad_norm": 6.881887912750244, + "learning_rate": 1.3109228783589361e-06, + "loss": 0.1372, + "num_input_tokens_seen": 170099904, + "step": 54050 + }, + { + "epoch": 3.460405863901159, + "grad_norm": 8.09726333618164, + "learning_rate": 1.3104314923158794e-06, + "loss": 0.1701, + "num_input_tokens_seen": 170116736, + "step": 54055 + }, + { + "epoch": 3.4607259458421358, + "grad_norm": 9.061049461364746, + "learning_rate": 1.309940165673678e-06, + "loss": 0.1479, + "num_input_tokens_seen": 170131328, + "step": 54060 + }, + { + "epoch": 3.4610460277831123, + "grad_norm": 0.5233128070831299, + "learning_rate": 1.3094488984568657e-06, + "loss": 0.1405, + "num_input_tokens_seen": 170147584, + "step": 54065 + }, + { + "epoch": 3.4613661097240893, + "grad_norm": 7.557052135467529, + "learning_rate": 1.308957690689974e-06, + "loss": 0.1724, + "num_input_tokens_seen": 170164288, + "step": 54070 + }, + { + "epoch": 3.4616861916650663, + "grad_norm": 25.838651657104492, + "learning_rate": 1.3084665423975304e-06, + "loss": 0.2134, + "num_input_tokens_seen": 170179776, + "step": 54075 + }, + { + "epoch": 3.4620062736060433, + "grad_norm": 12.085880279541016, + "learning_rate": 1.3079754536040624e-06, + "loss": 0.1867, + "num_input_tokens_seen": 170195648, + "step": 54080 + }, + { + "epoch": 3.4623263555470203, + "grad_norm": 4.754971981048584, + "learning_rate": 1.3074844243340906e-06, + "loss": 0.0758, + "num_input_tokens_seen": 170210816, + "step": 54085 + }, + { + "epoch": 3.462646437487997, + "grad_norm": 8.748384475708008, + "learning_rate": 1.3069934546121354e-06, + "loss": 0.2233, + "num_input_tokens_seen": 170225408, + "step": 54090 + }, + { + "epoch": 3.462966519428974, + "grad_norm": 18.211225509643555, + "learning_rate": 1.3065025444627134e-06, + "loss": 0.1863, + "num_input_tokens_seen": 170241088, + "step": 54095 + }, + { + "epoch": 3.463286601369951, + "grad_norm": 6.175481796264648, + "learning_rate": 1.3060116939103368e-06, + "loss": 0.1855, + "num_input_tokens_seen": 170255936, + "step": 54100 + }, + { + "epoch": 3.4636066833109274, + "grad_norm": 6.4635539054870605, + "learning_rate": 1.3055209029795174e-06, + "loss": 0.0975, + "num_input_tokens_seen": 170271040, + "step": 54105 + }, + { + "epoch": 3.4639267652519043, + "grad_norm": 5.007818222045898, + "learning_rate": 1.3050301716947613e-06, + "loss": 0.1701, + "num_input_tokens_seen": 170287552, + "step": 54110 + }, + { + "epoch": 3.4642468471928813, + "grad_norm": 8.480005264282227, + "learning_rate": 1.304539500080576e-06, + "loss": 0.1511, + "num_input_tokens_seen": 170303744, + "step": 54115 + }, + { + "epoch": 3.4645669291338583, + "grad_norm": 5.046184539794922, + "learning_rate": 1.3040488881614593e-06, + "loss": 0.1382, + "num_input_tokens_seen": 170321728, + "step": 54120 + }, + { + "epoch": 3.4648870110748353, + "grad_norm": 10.968684196472168, + "learning_rate": 1.3035583359619125e-06, + "loss": 0.2174, + "num_input_tokens_seen": 170336768, + "step": 54125 + }, + { + "epoch": 3.465207093015812, + "grad_norm": 13.400850296020508, + "learning_rate": 1.3030678435064304e-06, + "loss": 0.2128, + "num_input_tokens_seen": 170352576, + "step": 54130 + }, + { + "epoch": 3.465527174956789, + "grad_norm": 7.730222225189209, + "learning_rate": 1.3025774108195055e-06, + "loss": 0.1207, + "num_input_tokens_seen": 170368832, + "step": 54135 + }, + { + "epoch": 3.465847256897766, + "grad_norm": 7.529388904571533, + "learning_rate": 1.3020870379256273e-06, + "loss": 0.1648, + "num_input_tokens_seen": 170384256, + "step": 54140 + }, + { + "epoch": 3.466167338838743, + "grad_norm": 6.173052787780762, + "learning_rate": 1.3015967248492822e-06, + "loss": 0.2808, + "num_input_tokens_seen": 170399360, + "step": 54145 + }, + { + "epoch": 3.4664874207797194, + "grad_norm": 2.647343397140503, + "learning_rate": 1.3011064716149563e-06, + "loss": 0.1558, + "num_input_tokens_seen": 170415360, + "step": 54150 + }, + { + "epoch": 3.4668075027206964, + "grad_norm": 5.751936435699463, + "learning_rate": 1.3006162782471264e-06, + "loss": 0.1096, + "num_input_tokens_seen": 170430528, + "step": 54155 + }, + { + "epoch": 3.4671275846616734, + "grad_norm": 9.07454776763916, + "learning_rate": 1.300126144770273e-06, + "loss": 0.1795, + "num_input_tokens_seen": 170448000, + "step": 54160 + }, + { + "epoch": 3.4674476666026504, + "grad_norm": 5.940525054931641, + "learning_rate": 1.2996360712088702e-06, + "loss": 0.1462, + "num_input_tokens_seen": 170463488, + "step": 54165 + }, + { + "epoch": 3.4677677485436273, + "grad_norm": 13.774595260620117, + "learning_rate": 1.2991460575873893e-06, + "loss": 0.1631, + "num_input_tokens_seen": 170479680, + "step": 54170 + }, + { + "epoch": 3.468087830484604, + "grad_norm": 10.769243240356445, + "learning_rate": 1.2986561039302997e-06, + "loss": 0.2201, + "num_input_tokens_seen": 170495616, + "step": 54175 + }, + { + "epoch": 3.468407912425581, + "grad_norm": 8.523555755615234, + "learning_rate": 1.2981662102620662e-06, + "loss": 0.1784, + "num_input_tokens_seen": 170512960, + "step": 54180 + }, + { + "epoch": 3.468727994366558, + "grad_norm": 38.95656204223633, + "learning_rate": 1.2976763766071525e-06, + "loss": 0.2556, + "num_input_tokens_seen": 170529024, + "step": 54185 + }, + { + "epoch": 3.469048076307535, + "grad_norm": 6.709425449371338, + "learning_rate": 1.2971866029900176e-06, + "loss": 0.1448, + "num_input_tokens_seen": 170544832, + "step": 54190 + }, + { + "epoch": 3.4693681582485114, + "grad_norm": 6.289869785308838, + "learning_rate": 1.2966968894351175e-06, + "loss": 0.1645, + "num_input_tokens_seen": 170559808, + "step": 54195 + }, + { + "epoch": 3.4696882401894884, + "grad_norm": 10.717452049255371, + "learning_rate": 1.296207235966908e-06, + "loss": 0.1558, + "num_input_tokens_seen": 170575424, + "step": 54200 + }, + { + "epoch": 3.4700083221304654, + "grad_norm": 6.226255416870117, + "learning_rate": 1.295717642609839e-06, + "loss": 0.1591, + "num_input_tokens_seen": 170591232, + "step": 54205 + }, + { + "epoch": 3.4703284040714424, + "grad_norm": 10.230362892150879, + "learning_rate": 1.295228109388358e-06, + "loss": 0.1287, + "num_input_tokens_seen": 170607232, + "step": 54210 + }, + { + "epoch": 3.4706484860124194, + "grad_norm": 7.8446736335754395, + "learning_rate": 1.2947386363269097e-06, + "loss": 0.1459, + "num_input_tokens_seen": 170622272, + "step": 54215 + }, + { + "epoch": 3.470968567953396, + "grad_norm": 5.72114372253418, + "learning_rate": 1.2942492234499348e-06, + "loss": 0.1371, + "num_input_tokens_seen": 170637824, + "step": 54220 + }, + { + "epoch": 3.471288649894373, + "grad_norm": 14.693325996398926, + "learning_rate": 1.2937598707818755e-06, + "loss": 0.1282, + "num_input_tokens_seen": 170652928, + "step": 54225 + }, + { + "epoch": 3.47160873183535, + "grad_norm": 50.40850067138672, + "learning_rate": 1.2932705783471628e-06, + "loss": 0.1599, + "num_input_tokens_seen": 170668864, + "step": 54230 + }, + { + "epoch": 3.471928813776327, + "grad_norm": 10.043702125549316, + "learning_rate": 1.2927813461702337e-06, + "loss": 0.134, + "num_input_tokens_seen": 170684416, + "step": 54235 + }, + { + "epoch": 3.4722488957173034, + "grad_norm": 10.823333740234375, + "learning_rate": 1.2922921742755145e-06, + "loss": 0.1454, + "num_input_tokens_seen": 170702720, + "step": 54240 + }, + { + "epoch": 3.4725689776582804, + "grad_norm": 8.208476066589355, + "learning_rate": 1.2918030626874339e-06, + "loss": 0.1346, + "num_input_tokens_seen": 170717568, + "step": 54245 + }, + { + "epoch": 3.4728890595992574, + "grad_norm": 20.151227951049805, + "learning_rate": 1.2913140114304157e-06, + "loss": 0.1632, + "num_input_tokens_seen": 170732416, + "step": 54250 + }, + { + "epoch": 3.4732091415402344, + "grad_norm": 7.197824478149414, + "learning_rate": 1.2908250205288786e-06, + "loss": 0.1729, + "num_input_tokens_seen": 170747008, + "step": 54255 + }, + { + "epoch": 3.4735292234812114, + "grad_norm": 4.025030612945557, + "learning_rate": 1.2903360900072434e-06, + "loss": 0.1996, + "num_input_tokens_seen": 170762624, + "step": 54260 + }, + { + "epoch": 3.473849305422188, + "grad_norm": 7.491279602050781, + "learning_rate": 1.2898472198899212e-06, + "loss": 0.1166, + "num_input_tokens_seen": 170777216, + "step": 54265 + }, + { + "epoch": 3.474169387363165, + "grad_norm": 12.970098495483398, + "learning_rate": 1.2893584102013262e-06, + "loss": 0.1858, + "num_input_tokens_seen": 170792128, + "step": 54270 + }, + { + "epoch": 3.474489469304142, + "grad_norm": 13.22617244720459, + "learning_rate": 1.2888696609658663e-06, + "loss": 0.1894, + "num_input_tokens_seen": 170808128, + "step": 54275 + }, + { + "epoch": 3.474809551245119, + "grad_norm": 6.4993743896484375, + "learning_rate": 1.288380972207947e-06, + "loss": 0.1623, + "num_input_tokens_seen": 170824448, + "step": 54280 + }, + { + "epoch": 3.4751296331860955, + "grad_norm": 5.669597148895264, + "learning_rate": 1.2878923439519709e-06, + "loss": 0.224, + "num_input_tokens_seen": 170840512, + "step": 54285 + }, + { + "epoch": 3.4754497151270725, + "grad_norm": 8.18204402923584, + "learning_rate": 1.2874037762223373e-06, + "loss": 0.1499, + "num_input_tokens_seen": 170857088, + "step": 54290 + }, + { + "epoch": 3.4757697970680494, + "grad_norm": 4.03633451461792, + "learning_rate": 1.2869152690434434e-06, + "loss": 0.1297, + "num_input_tokens_seen": 170872576, + "step": 54295 + }, + { + "epoch": 3.4760898790090264, + "grad_norm": 3.072360038757324, + "learning_rate": 1.286426822439682e-06, + "loss": 0.1237, + "num_input_tokens_seen": 170887872, + "step": 54300 + }, + { + "epoch": 3.4764099609500034, + "grad_norm": 6.638493537902832, + "learning_rate": 1.2859384364354426e-06, + "loss": 0.1636, + "num_input_tokens_seen": 170903168, + "step": 54305 + }, + { + "epoch": 3.47673004289098, + "grad_norm": 6.378756046295166, + "learning_rate": 1.285450111055116e-06, + "loss": 0.1947, + "num_input_tokens_seen": 170919424, + "step": 54310 + }, + { + "epoch": 3.477050124831957, + "grad_norm": 22.806344985961914, + "learning_rate": 1.2849618463230826e-06, + "loss": 0.1981, + "num_input_tokens_seen": 170934336, + "step": 54315 + }, + { + "epoch": 3.477370206772934, + "grad_norm": 10.023478507995605, + "learning_rate": 1.2844736422637267e-06, + "loss": 0.199, + "num_input_tokens_seen": 170953024, + "step": 54320 + }, + { + "epoch": 3.477690288713911, + "grad_norm": 11.75989818572998, + "learning_rate": 1.2839854989014256e-06, + "loss": 0.1898, + "num_input_tokens_seen": 170967808, + "step": 54325 + }, + { + "epoch": 3.4780103706548875, + "grad_norm": 3.5723876953125, + "learning_rate": 1.2834974162605546e-06, + "loss": 0.111, + "num_input_tokens_seen": 170983360, + "step": 54330 + }, + { + "epoch": 3.4783304525958645, + "grad_norm": 7.460383892059326, + "learning_rate": 1.2830093943654858e-06, + "loss": 0.0923, + "num_input_tokens_seen": 170998784, + "step": 54335 + }, + { + "epoch": 3.4786505345368415, + "grad_norm": 7.625888824462891, + "learning_rate": 1.2825214332405884e-06, + "loss": 0.128, + "num_input_tokens_seen": 171013440, + "step": 54340 + }, + { + "epoch": 3.4789706164778185, + "grad_norm": 7.242503643035889, + "learning_rate": 1.2820335329102308e-06, + "loss": 0.1324, + "num_input_tokens_seen": 171028928, + "step": 54345 + }, + { + "epoch": 3.4792906984187955, + "grad_norm": 59.40541458129883, + "learning_rate": 1.2815456933987725e-06, + "loss": 0.1495, + "num_input_tokens_seen": 171044672, + "step": 54350 + }, + { + "epoch": 3.479610780359772, + "grad_norm": 4.307080268859863, + "learning_rate": 1.2810579147305762e-06, + "loss": 0.1096, + "num_input_tokens_seen": 171060480, + "step": 54355 + }, + { + "epoch": 3.479930862300749, + "grad_norm": 7.514297008514404, + "learning_rate": 1.280570196929999e-06, + "loss": 0.1723, + "num_input_tokens_seen": 171075968, + "step": 54360 + }, + { + "epoch": 3.480250944241726, + "grad_norm": 6.617292404174805, + "learning_rate": 1.2800825400213937e-06, + "loss": 0.1517, + "num_input_tokens_seen": 171091840, + "step": 54365 + }, + { + "epoch": 3.4805710261827025, + "grad_norm": 14.457925796508789, + "learning_rate": 1.2795949440291122e-06, + "loss": 0.1621, + "num_input_tokens_seen": 171107008, + "step": 54370 + }, + { + "epoch": 3.4808911081236795, + "grad_norm": 5.516589164733887, + "learning_rate": 1.2791074089775018e-06, + "loss": 0.123, + "num_input_tokens_seen": 171122176, + "step": 54375 + }, + { + "epoch": 3.4812111900646565, + "grad_norm": 8.064054489135742, + "learning_rate": 1.2786199348909095e-06, + "loss": 0.1391, + "num_input_tokens_seen": 171137600, + "step": 54380 + }, + { + "epoch": 3.4815312720056335, + "grad_norm": 4.474936485290527, + "learning_rate": 1.2781325217936741e-06, + "loss": 0.123, + "num_input_tokens_seen": 171153536, + "step": 54385 + }, + { + "epoch": 3.4818513539466105, + "grad_norm": 5.524846076965332, + "learning_rate": 1.2776451697101367e-06, + "loss": 0.0861, + "num_input_tokens_seen": 171170048, + "step": 54390 + }, + { + "epoch": 3.482171435887587, + "grad_norm": 17.236263275146484, + "learning_rate": 1.2771578786646328e-06, + "loss": 0.1005, + "num_input_tokens_seen": 171185664, + "step": 54395 + }, + { + "epoch": 3.482491517828564, + "grad_norm": 5.803300380706787, + "learning_rate": 1.2766706486814948e-06, + "loss": 0.1379, + "num_input_tokens_seen": 171200448, + "step": 54400 + }, + { + "epoch": 3.482811599769541, + "grad_norm": 6.408257961273193, + "learning_rate": 1.2761834797850524e-06, + "loss": 0.19, + "num_input_tokens_seen": 171216000, + "step": 54405 + }, + { + "epoch": 3.483131681710518, + "grad_norm": 7.625069618225098, + "learning_rate": 1.2756963719996323e-06, + "loss": 0.109, + "num_input_tokens_seen": 171231424, + "step": 54410 + }, + { + "epoch": 3.4834517636514946, + "grad_norm": 8.229514122009277, + "learning_rate": 1.2752093253495584e-06, + "loss": 0.146, + "num_input_tokens_seen": 171246272, + "step": 54415 + }, + { + "epoch": 3.4837718455924715, + "grad_norm": 13.41826057434082, + "learning_rate": 1.2747223398591513e-06, + "loss": 0.172, + "num_input_tokens_seen": 171261440, + "step": 54420 + }, + { + "epoch": 3.4840919275334485, + "grad_norm": 3.7026875019073486, + "learning_rate": 1.274235415552727e-06, + "loss": 0.1479, + "num_input_tokens_seen": 171275264, + "step": 54425 + }, + { + "epoch": 3.4844120094744255, + "grad_norm": 5.665170192718506, + "learning_rate": 1.2737485524546025e-06, + "loss": 0.2251, + "num_input_tokens_seen": 171291392, + "step": 54430 + }, + { + "epoch": 3.4847320914154025, + "grad_norm": 7.663479328155518, + "learning_rate": 1.2732617505890878e-06, + "loss": 0.1986, + "num_input_tokens_seen": 171306944, + "step": 54435 + }, + { + "epoch": 3.485052173356379, + "grad_norm": 8.687122344970703, + "learning_rate": 1.2727750099804915e-06, + "loss": 0.2145, + "num_input_tokens_seen": 171323136, + "step": 54440 + }, + { + "epoch": 3.485372255297356, + "grad_norm": 25.693119049072266, + "learning_rate": 1.2722883306531188e-06, + "loss": 0.1658, + "num_input_tokens_seen": 171339264, + "step": 54445 + }, + { + "epoch": 3.485692337238333, + "grad_norm": 9.72400951385498, + "learning_rate": 1.271801712631271e-06, + "loss": 0.1896, + "num_input_tokens_seen": 171355392, + "step": 54450 + }, + { + "epoch": 3.48601241917931, + "grad_norm": 56.094093322753906, + "learning_rate": 1.27131515593925e-06, + "loss": 0.2414, + "num_input_tokens_seen": 171371456, + "step": 54455 + }, + { + "epoch": 3.4863325011202866, + "grad_norm": 7.483601093292236, + "learning_rate": 1.2708286606013487e-06, + "loss": 0.1557, + "num_input_tokens_seen": 171386304, + "step": 54460 + }, + { + "epoch": 3.4866525830612636, + "grad_norm": 7.957963943481445, + "learning_rate": 1.270342226641863e-06, + "loss": 0.1722, + "num_input_tokens_seen": 171400896, + "step": 54465 + }, + { + "epoch": 3.4869726650022406, + "grad_norm": 5.943670272827148, + "learning_rate": 1.2698558540850797e-06, + "loss": 0.1206, + "num_input_tokens_seen": 171415488, + "step": 54470 + }, + { + "epoch": 3.4872927469432176, + "grad_norm": 4.708800792694092, + "learning_rate": 1.2693695429552883e-06, + "loss": 0.1393, + "num_input_tokens_seen": 171430976, + "step": 54475 + }, + { + "epoch": 3.4876128288841945, + "grad_norm": 23.689510345458984, + "learning_rate": 1.268883293276772e-06, + "loss": 0.1704, + "num_input_tokens_seen": 171447040, + "step": 54480 + }, + { + "epoch": 3.487932910825171, + "grad_norm": 6.109200954437256, + "learning_rate": 1.268397105073811e-06, + "loss": 0.1885, + "num_input_tokens_seen": 171463616, + "step": 54485 + }, + { + "epoch": 3.488252992766148, + "grad_norm": 13.895539283752441, + "learning_rate": 1.2679109783706838e-06, + "loss": 0.2525, + "num_input_tokens_seen": 171480128, + "step": 54490 + }, + { + "epoch": 3.488573074707125, + "grad_norm": 9.085663795471191, + "learning_rate": 1.2674249131916636e-06, + "loss": 0.1436, + "num_input_tokens_seen": 171495744, + "step": 54495 + }, + { + "epoch": 3.488893156648102, + "grad_norm": 10.335644721984863, + "learning_rate": 1.2669389095610237e-06, + "loss": 0.183, + "num_input_tokens_seen": 171510272, + "step": 54500 + }, + { + "epoch": 3.4892132385890786, + "grad_norm": 4.133541584014893, + "learning_rate": 1.2664529675030321e-06, + "loss": 0.1267, + "num_input_tokens_seen": 171525568, + "step": 54505 + }, + { + "epoch": 3.4895333205300556, + "grad_norm": 8.66139030456543, + "learning_rate": 1.2659670870419537e-06, + "loss": 0.2177, + "num_input_tokens_seen": 171541376, + "step": 54510 + }, + { + "epoch": 3.4898534024710326, + "grad_norm": 7.197035789489746, + "learning_rate": 1.2654812682020517e-06, + "loss": 0.1468, + "num_input_tokens_seen": 171557312, + "step": 54515 + }, + { + "epoch": 3.4901734844120096, + "grad_norm": 5.347679138183594, + "learning_rate": 1.2649955110075846e-06, + "loss": 0.0905, + "num_input_tokens_seen": 171573696, + "step": 54520 + }, + { + "epoch": 3.4904935663529866, + "grad_norm": 6.566662311553955, + "learning_rate": 1.2645098154828084e-06, + "loss": 0.1485, + "num_input_tokens_seen": 171588928, + "step": 54525 + }, + { + "epoch": 3.490813648293963, + "grad_norm": 3.677280902862549, + "learning_rate": 1.2640241816519765e-06, + "loss": 0.1302, + "num_input_tokens_seen": 171604160, + "step": 54530 + }, + { + "epoch": 3.49113373023494, + "grad_norm": 17.880069732666016, + "learning_rate": 1.2635386095393387e-06, + "loss": 0.1761, + "num_input_tokens_seen": 171619584, + "step": 54535 + }, + { + "epoch": 3.491453812175917, + "grad_norm": 23.331031799316406, + "learning_rate": 1.2630530991691436e-06, + "loss": 0.2513, + "num_input_tokens_seen": 171635456, + "step": 54540 + }, + { + "epoch": 3.491773894116894, + "grad_norm": 10.289852142333984, + "learning_rate": 1.262567650565632e-06, + "loss": 0.1897, + "num_input_tokens_seen": 171650880, + "step": 54545 + }, + { + "epoch": 3.4920939760578706, + "grad_norm": 9.117496490478516, + "learning_rate": 1.2620822637530475e-06, + "loss": 0.1038, + "num_input_tokens_seen": 171666304, + "step": 54550 + }, + { + "epoch": 3.4924140579988476, + "grad_norm": 6.866469383239746, + "learning_rate": 1.2615969387556265e-06, + "loss": 0.1508, + "num_input_tokens_seen": 171681856, + "step": 54555 + }, + { + "epoch": 3.4927341399398246, + "grad_norm": 3.5174481868743896, + "learning_rate": 1.2611116755976037e-06, + "loss": 0.2129, + "num_input_tokens_seen": 171697600, + "step": 54560 + }, + { + "epoch": 3.4930542218808016, + "grad_norm": 9.414402961730957, + "learning_rate": 1.2606264743032108e-06, + "loss": 0.1515, + "num_input_tokens_seen": 171713024, + "step": 54565 + }, + { + "epoch": 3.4933743038217786, + "grad_norm": 10.448552131652832, + "learning_rate": 1.260141334896675e-06, + "loss": 0.2095, + "num_input_tokens_seen": 171728128, + "step": 54570 + }, + { + "epoch": 3.493694385762755, + "grad_norm": 9.364716529846191, + "learning_rate": 1.259656257402225e-06, + "loss": 0.1379, + "num_input_tokens_seen": 171743744, + "step": 54575 + }, + { + "epoch": 3.494014467703732, + "grad_norm": 6.057508945465088, + "learning_rate": 1.2591712418440788e-06, + "loss": 0.2119, + "num_input_tokens_seen": 171759616, + "step": 54580 + }, + { + "epoch": 3.494334549644709, + "grad_norm": 7.28433084487915, + "learning_rate": 1.2586862882464584e-06, + "loss": 0.1256, + "num_input_tokens_seen": 171776192, + "step": 54585 + }, + { + "epoch": 3.494654631585686, + "grad_norm": 3.2514150142669678, + "learning_rate": 1.2582013966335788e-06, + "loss": 0.1207, + "num_input_tokens_seen": 171791168, + "step": 54590 + }, + { + "epoch": 3.4949747135266627, + "grad_norm": 5.7048444747924805, + "learning_rate": 1.2577165670296535e-06, + "loss": 0.1626, + "num_input_tokens_seen": 171806528, + "step": 54595 + }, + { + "epoch": 3.4952947954676397, + "grad_norm": 19.845829010009766, + "learning_rate": 1.2572317994588918e-06, + "loss": 0.1652, + "num_input_tokens_seen": 171821504, + "step": 54600 + }, + { + "epoch": 3.4956148774086166, + "grad_norm": 37.32059097290039, + "learning_rate": 1.2567470939454995e-06, + "loss": 0.1281, + "num_input_tokens_seen": 171837312, + "step": 54605 + }, + { + "epoch": 3.4959349593495936, + "grad_norm": 12.959693908691406, + "learning_rate": 1.2562624505136839e-06, + "loss": 0.164, + "num_input_tokens_seen": 171852416, + "step": 54610 + }, + { + "epoch": 3.4962550412905706, + "grad_norm": 10.574673652648926, + "learning_rate": 1.255777869187641e-06, + "loss": 0.1751, + "num_input_tokens_seen": 171867840, + "step": 54615 + }, + { + "epoch": 3.496575123231547, + "grad_norm": 10.810070037841797, + "learning_rate": 1.2552933499915715e-06, + "loss": 0.2423, + "num_input_tokens_seen": 171882816, + "step": 54620 + }, + { + "epoch": 3.496895205172524, + "grad_norm": 6.985939025878906, + "learning_rate": 1.2548088929496682e-06, + "loss": 0.2101, + "num_input_tokens_seen": 171899520, + "step": 54625 + }, + { + "epoch": 3.497215287113501, + "grad_norm": 3.815375328063965, + "learning_rate": 1.254324498086123e-06, + "loss": 0.1913, + "num_input_tokens_seen": 171915584, + "step": 54630 + }, + { + "epoch": 3.4975353690544777, + "grad_norm": 5.155181884765625, + "learning_rate": 1.2538401654251242e-06, + "loss": 0.1486, + "num_input_tokens_seen": 171930624, + "step": 54635 + }, + { + "epoch": 3.4978554509954547, + "grad_norm": 3.693718671798706, + "learning_rate": 1.2533558949908564e-06, + "loss": 0.1292, + "num_input_tokens_seen": 171946112, + "step": 54640 + }, + { + "epoch": 3.4981755329364317, + "grad_norm": 9.416372299194336, + "learning_rate": 1.2528716868075014e-06, + "loss": 0.183, + "num_input_tokens_seen": 171961856, + "step": 54645 + }, + { + "epoch": 3.4984956148774087, + "grad_norm": 6.98086404800415, + "learning_rate": 1.2523875408992387e-06, + "loss": 0.1333, + "num_input_tokens_seen": 171978624, + "step": 54650 + }, + { + "epoch": 3.4988156968183857, + "grad_norm": 7.8055925369262695, + "learning_rate": 1.2519034572902422e-06, + "loss": 0.1237, + "num_input_tokens_seen": 171994688, + "step": 54655 + }, + { + "epoch": 3.499135778759362, + "grad_norm": 10.854934692382812, + "learning_rate": 1.2514194360046883e-06, + "loss": 0.1462, + "num_input_tokens_seen": 172010048, + "step": 54660 + }, + { + "epoch": 3.499455860700339, + "grad_norm": 10.010387420654297, + "learning_rate": 1.250935477066742e-06, + "loss": 0.19, + "num_input_tokens_seen": 172025152, + "step": 54665 + }, + { + "epoch": 3.499775942641316, + "grad_norm": 8.871697425842285, + "learning_rate": 1.250451580500573e-06, + "loss": 0.169, + "num_input_tokens_seen": 172041856, + "step": 54670 + }, + { + "epoch": 3.500096024582293, + "grad_norm": 6.262462139129639, + "learning_rate": 1.249967746330343e-06, + "loss": 0.1667, + "num_input_tokens_seen": 172056576, + "step": 54675 + }, + { + "epoch": 3.5004161065232697, + "grad_norm": 3.5407485961914062, + "learning_rate": 1.249483974580212e-06, + "loss": 0.159, + "num_input_tokens_seen": 172072256, + "step": 54680 + }, + { + "epoch": 3.5006721720760514, + "eval_loss": 0.5747029781341553, + "eval_runtime": 51.0151, + "eval_samples_per_second": 272.194, + "eval_steps_per_second": 34.029, + "num_input_tokens_seen": 172084032, + "step": 54684 + }, + { + "epoch": 3.5007361884642467, + "grad_norm": 10.29598331451416, + "learning_rate": 1.2490002652743394e-06, + "loss": 0.1904, + "num_input_tokens_seen": 172086912, + "step": 54685 + }, + { + "epoch": 3.5010562704052237, + "grad_norm": 14.774727821350098, + "learning_rate": 1.2485166184368756e-06, + "loss": 0.1551, + "num_input_tokens_seen": 172102144, + "step": 54690 + }, + { + "epoch": 3.5013763523462007, + "grad_norm": 5.197239875793457, + "learning_rate": 1.2480330340919746e-06, + "loss": 0.1735, + "num_input_tokens_seen": 172117184, + "step": 54695 + }, + { + "epoch": 3.5016964342871777, + "grad_norm": 5.285827159881592, + "learning_rate": 1.2475495122637813e-06, + "loss": 0.1312, + "num_input_tokens_seen": 172132736, + "step": 54700 + }, + { + "epoch": 3.5020165162281542, + "grad_norm": 11.04332447052002, + "learning_rate": 1.247066052976442e-06, + "loss": 0.1723, + "num_input_tokens_seen": 172148160, + "step": 54705 + }, + { + "epoch": 3.5023365981691312, + "grad_norm": 17.59695816040039, + "learning_rate": 1.246582656254098e-06, + "loss": 0.2084, + "num_input_tokens_seen": 172163392, + "step": 54710 + }, + { + "epoch": 3.5026566801101082, + "grad_norm": 14.077553749084473, + "learning_rate": 1.2460993221208872e-06, + "loss": 0.2715, + "num_input_tokens_seen": 172178688, + "step": 54715 + }, + { + "epoch": 3.502976762051085, + "grad_norm": 6.678267002105713, + "learning_rate": 1.245616050600945e-06, + "loss": 0.1911, + "num_input_tokens_seen": 172195712, + "step": 54720 + }, + { + "epoch": 3.5032968439920618, + "grad_norm": 4.643550872802734, + "learning_rate": 1.2451328417184025e-06, + "loss": 0.1843, + "num_input_tokens_seen": 172211712, + "step": 54725 + }, + { + "epoch": 3.5036169259330388, + "grad_norm": 8.544946670532227, + "learning_rate": 1.2446496954973903e-06, + "loss": 0.2073, + "num_input_tokens_seen": 172228288, + "step": 54730 + }, + { + "epoch": 3.5039370078740157, + "grad_norm": 5.980713844299316, + "learning_rate": 1.2441666119620335e-06, + "loss": 0.185, + "num_input_tokens_seen": 172244864, + "step": 54735 + }, + { + "epoch": 3.5042570898149927, + "grad_norm": 8.895001411437988, + "learning_rate": 1.2436835911364548e-06, + "loss": 0.3193, + "num_input_tokens_seen": 172260288, + "step": 54740 + }, + { + "epoch": 3.5045771717559697, + "grad_norm": 9.68055248260498, + "learning_rate": 1.2432006330447736e-06, + "loss": 0.2089, + "num_input_tokens_seen": 172275008, + "step": 54745 + }, + { + "epoch": 3.5048972536969463, + "grad_norm": 10.16061782836914, + "learning_rate": 1.2427177377111062e-06, + "loss": 0.1869, + "num_input_tokens_seen": 172291520, + "step": 54750 + }, + { + "epoch": 3.5052173356379233, + "grad_norm": 10.147220611572266, + "learning_rate": 1.2422349051595662e-06, + "loss": 0.1421, + "num_input_tokens_seen": 172306816, + "step": 54755 + }, + { + "epoch": 3.5055374175789002, + "grad_norm": 30.063499450683594, + "learning_rate": 1.2417521354142637e-06, + "loss": 0.1578, + "num_input_tokens_seen": 172322752, + "step": 54760 + }, + { + "epoch": 3.5058574995198772, + "grad_norm": 8.466110229492188, + "learning_rate": 1.2412694284993043e-06, + "loss": 0.2266, + "num_input_tokens_seen": 172337984, + "step": 54765 + }, + { + "epoch": 3.506177581460854, + "grad_norm": 10.006741523742676, + "learning_rate": 1.2407867844387952e-06, + "loss": 0.1267, + "num_input_tokens_seen": 172353728, + "step": 54770 + }, + { + "epoch": 3.5064976634018308, + "grad_norm": 8.67696475982666, + "learning_rate": 1.2403042032568332e-06, + "loss": 0.1485, + "num_input_tokens_seen": 172369472, + "step": 54775 + }, + { + "epoch": 3.5068177453428078, + "grad_norm": 7.116778373718262, + "learning_rate": 1.2398216849775189e-06, + "loss": 0.1988, + "num_input_tokens_seen": 172384768, + "step": 54780 + }, + { + "epoch": 3.5071378272837848, + "grad_norm": 15.94039535522461, + "learning_rate": 1.2393392296249454e-06, + "loss": 0.1131, + "num_input_tokens_seen": 172399680, + "step": 54785 + }, + { + "epoch": 3.5074579092247617, + "grad_norm": 34.5817985534668, + "learning_rate": 1.2388568372232043e-06, + "loss": 0.2084, + "num_input_tokens_seen": 172415040, + "step": 54790 + }, + { + "epoch": 3.5077779911657383, + "grad_norm": 5.602054119110107, + "learning_rate": 1.2383745077963836e-06, + "loss": 0.2005, + "num_input_tokens_seen": 172429888, + "step": 54795 + }, + { + "epoch": 3.5080980731067153, + "grad_norm": 5.061655044555664, + "learning_rate": 1.2378922413685677e-06, + "loss": 0.1536, + "num_input_tokens_seen": 172444416, + "step": 54800 + }, + { + "epoch": 3.5084181550476923, + "grad_norm": 9.544976234436035, + "learning_rate": 1.2374100379638406e-06, + "loss": 0.2097, + "num_input_tokens_seen": 172459968, + "step": 54805 + }, + { + "epoch": 3.508738236988669, + "grad_norm": 6.207693099975586, + "learning_rate": 1.2369278976062783e-06, + "loss": 0.1501, + "num_input_tokens_seen": 172474816, + "step": 54810 + }, + { + "epoch": 3.509058318929646, + "grad_norm": 10.5822172164917, + "learning_rate": 1.2364458203199583e-06, + "loss": 0.162, + "num_input_tokens_seen": 172491392, + "step": 54815 + }, + { + "epoch": 3.509378400870623, + "grad_norm": 9.400445938110352, + "learning_rate": 1.2359638061289524e-06, + "loss": 0.1621, + "num_input_tokens_seen": 172507072, + "step": 54820 + }, + { + "epoch": 3.5096984828116, + "grad_norm": 3.2155585289001465, + "learning_rate": 1.23548185505733e-06, + "loss": 0.1915, + "num_input_tokens_seen": 172522368, + "step": 54825 + }, + { + "epoch": 3.510018564752577, + "grad_norm": 7.802979469299316, + "learning_rate": 1.234999967129157e-06, + "loss": 0.1609, + "num_input_tokens_seen": 172540032, + "step": 54830 + }, + { + "epoch": 3.5103386466935538, + "grad_norm": 4.4707794189453125, + "learning_rate": 1.2345181423684968e-06, + "loss": 0.1412, + "num_input_tokens_seen": 172555840, + "step": 54835 + }, + { + "epoch": 3.5106587286345303, + "grad_norm": 6.313077926635742, + "learning_rate": 1.2340363807994087e-06, + "loss": 0.1772, + "num_input_tokens_seen": 172571712, + "step": 54840 + }, + { + "epoch": 3.5109788105755073, + "grad_norm": 6.58981990814209, + "learning_rate": 1.2335546824459487e-06, + "loss": 0.1899, + "num_input_tokens_seen": 172588032, + "step": 54845 + }, + { + "epoch": 3.5112988925164843, + "grad_norm": 5.368871212005615, + "learning_rate": 1.233073047332172e-06, + "loss": 0.1741, + "num_input_tokens_seen": 172603200, + "step": 54850 + }, + { + "epoch": 3.511618974457461, + "grad_norm": 6.114776134490967, + "learning_rate": 1.2325914754821284e-06, + "loss": 0.1684, + "num_input_tokens_seen": 172618880, + "step": 54855 + }, + { + "epoch": 3.511939056398438, + "grad_norm": 7.942864894866943, + "learning_rate": 1.2321099669198646e-06, + "loss": 0.1823, + "num_input_tokens_seen": 172635456, + "step": 54860 + }, + { + "epoch": 3.512259138339415, + "grad_norm": 5.1205315589904785, + "learning_rate": 1.231628521669425e-06, + "loss": 0.0929, + "num_input_tokens_seen": 172651456, + "step": 54865 + }, + { + "epoch": 3.512579220280392, + "grad_norm": 13.773468971252441, + "learning_rate": 1.2311471397548503e-06, + "loss": 0.2091, + "num_input_tokens_seen": 172668096, + "step": 54870 + }, + { + "epoch": 3.512899302221369, + "grad_norm": 5.260471343994141, + "learning_rate": 1.230665821200178e-06, + "loss": 0.1772, + "num_input_tokens_seen": 172683456, + "step": 54875 + }, + { + "epoch": 3.513219384162346, + "grad_norm": 5.582279205322266, + "learning_rate": 1.2301845660294429e-06, + "loss": 0.1792, + "num_input_tokens_seen": 172699712, + "step": 54880 + }, + { + "epoch": 3.5135394661033224, + "grad_norm": 7.941822052001953, + "learning_rate": 1.2297033742666756e-06, + "loss": 0.1578, + "num_input_tokens_seen": 172715200, + "step": 54885 + }, + { + "epoch": 3.5138595480442993, + "grad_norm": 8.822956085205078, + "learning_rate": 1.2292222459359068e-06, + "loss": 0.2734, + "num_input_tokens_seen": 172730176, + "step": 54890 + }, + { + "epoch": 3.5141796299852763, + "grad_norm": 31.25523567199707, + "learning_rate": 1.2287411810611579e-06, + "loss": 0.1462, + "num_input_tokens_seen": 172745728, + "step": 54895 + }, + { + "epoch": 3.514499711926253, + "grad_norm": 7.119717121124268, + "learning_rate": 1.228260179666454e-06, + "loss": 0.1788, + "num_input_tokens_seen": 172761536, + "step": 54900 + }, + { + "epoch": 3.51481979386723, + "grad_norm": 5.9354705810546875, + "learning_rate": 1.227779241775812e-06, + "loss": 0.1697, + "num_input_tokens_seen": 172777280, + "step": 54905 + }, + { + "epoch": 3.515139875808207, + "grad_norm": 9.665034294128418, + "learning_rate": 1.2272983674132472e-06, + "loss": 0.2206, + "num_input_tokens_seen": 172792704, + "step": 54910 + }, + { + "epoch": 3.515459957749184, + "grad_norm": 5.339786052703857, + "learning_rate": 1.2268175566027745e-06, + "loss": 0.1639, + "num_input_tokens_seen": 172809088, + "step": 54915 + }, + { + "epoch": 3.515780039690161, + "grad_norm": 4.644982814788818, + "learning_rate": 1.2263368093683994e-06, + "loss": 0.11, + "num_input_tokens_seen": 172823744, + "step": 54920 + }, + { + "epoch": 3.516100121631138, + "grad_norm": 55.43913650512695, + "learning_rate": 1.2258561257341317e-06, + "loss": 0.2312, + "num_input_tokens_seen": 172840896, + "step": 54925 + }, + { + "epoch": 3.5164202035721144, + "grad_norm": 4.229804515838623, + "learning_rate": 1.2253755057239702e-06, + "loss": 0.1506, + "num_input_tokens_seen": 172857472, + "step": 54930 + }, + { + "epoch": 3.5167402855130914, + "grad_norm": 6.3584980964660645, + "learning_rate": 1.2248949493619178e-06, + "loss": 0.1447, + "num_input_tokens_seen": 172872384, + "step": 54935 + }, + { + "epoch": 3.5170603674540684, + "grad_norm": 8.196375846862793, + "learning_rate": 1.2244144566719699e-06, + "loss": 0.2104, + "num_input_tokens_seen": 172887680, + "step": 54940 + }, + { + "epoch": 3.517380449395045, + "grad_norm": 17.291404724121094, + "learning_rate": 1.2239340276781195e-06, + "loss": 0.1787, + "num_input_tokens_seen": 172903488, + "step": 54945 + }, + { + "epoch": 3.517700531336022, + "grad_norm": 12.089887619018555, + "learning_rate": 1.2234536624043573e-06, + "loss": 0.1806, + "num_input_tokens_seen": 172918464, + "step": 54950 + }, + { + "epoch": 3.518020613276999, + "grad_norm": 8.141754150390625, + "learning_rate": 1.2229733608746696e-06, + "loss": 0.2439, + "num_input_tokens_seen": 172935488, + "step": 54955 + }, + { + "epoch": 3.518340695217976, + "grad_norm": 6.696374416351318, + "learning_rate": 1.2224931231130396e-06, + "loss": 0.2157, + "num_input_tokens_seen": 172951488, + "step": 54960 + }, + { + "epoch": 3.518660777158953, + "grad_norm": 7.853329658508301, + "learning_rate": 1.2220129491434499e-06, + "loss": 0.2159, + "num_input_tokens_seen": 172967360, + "step": 54965 + }, + { + "epoch": 3.51898085909993, + "grad_norm": 51.24517059326172, + "learning_rate": 1.2215328389898766e-06, + "loss": 0.1599, + "num_input_tokens_seen": 172982336, + "step": 54970 + }, + { + "epoch": 3.5193009410409064, + "grad_norm": 13.48294734954834, + "learning_rate": 1.2210527926762939e-06, + "loss": 0.2029, + "num_input_tokens_seen": 172998464, + "step": 54975 + }, + { + "epoch": 3.5196210229818834, + "grad_norm": 5.815339088439941, + "learning_rate": 1.220572810226673e-06, + "loss": 0.1688, + "num_input_tokens_seen": 173013824, + "step": 54980 + }, + { + "epoch": 3.5199411049228604, + "grad_norm": 18.022314071655273, + "learning_rate": 1.2200928916649818e-06, + "loss": 0.148, + "num_input_tokens_seen": 173029504, + "step": 54985 + }, + { + "epoch": 3.520261186863837, + "grad_norm": 6.194011688232422, + "learning_rate": 1.2196130370151847e-06, + "loss": 0.1436, + "num_input_tokens_seen": 173045056, + "step": 54990 + }, + { + "epoch": 3.520581268804814, + "grad_norm": 7.150049209594727, + "learning_rate": 1.2191332463012424e-06, + "loss": 0.1622, + "num_input_tokens_seen": 173062144, + "step": 54995 + }, + { + "epoch": 3.520901350745791, + "grad_norm": 4.68004846572876, + "learning_rate": 1.2186535195471159e-06, + "loss": 0.0933, + "num_input_tokens_seen": 173076544, + "step": 55000 + }, + { + "epoch": 3.521221432686768, + "grad_norm": 10.329951286315918, + "learning_rate": 1.2181738567767562e-06, + "loss": 0.1678, + "num_input_tokens_seen": 173092864, + "step": 55005 + }, + { + "epoch": 3.521541514627745, + "grad_norm": 19.056711196899414, + "learning_rate": 1.2176942580141193e-06, + "loss": 0.1625, + "num_input_tokens_seen": 173108736, + "step": 55010 + }, + { + "epoch": 3.5218615965687214, + "grad_norm": 3.617506742477417, + "learning_rate": 1.2172147232831499e-06, + "loss": 0.1287, + "num_input_tokens_seen": 173125952, + "step": 55015 + }, + { + "epoch": 3.5221816785096984, + "grad_norm": 6.612812042236328, + "learning_rate": 1.2167352526077967e-06, + "loss": 0.1979, + "num_input_tokens_seen": 173141376, + "step": 55020 + }, + { + "epoch": 3.5225017604506754, + "grad_norm": 6.486300945281982, + "learning_rate": 1.2162558460120002e-06, + "loss": 0.1789, + "num_input_tokens_seen": 173157312, + "step": 55025 + }, + { + "epoch": 3.5228218423916524, + "grad_norm": 3.1535696983337402, + "learning_rate": 1.2157765035196995e-06, + "loss": 0.1455, + "num_input_tokens_seen": 173174464, + "step": 55030 + }, + { + "epoch": 3.523141924332629, + "grad_norm": 20.226350784301758, + "learning_rate": 1.2152972251548326e-06, + "loss": 0.1821, + "num_input_tokens_seen": 173189824, + "step": 55035 + }, + { + "epoch": 3.523462006273606, + "grad_norm": 14.614562034606934, + "learning_rate": 1.2148180109413288e-06, + "loss": 0.1983, + "num_input_tokens_seen": 173205056, + "step": 55040 + }, + { + "epoch": 3.523782088214583, + "grad_norm": 8.21279239654541, + "learning_rate": 1.2143388609031202e-06, + "loss": 0.1334, + "num_input_tokens_seen": 173220288, + "step": 55045 + }, + { + "epoch": 3.52410217015556, + "grad_norm": 6.633750915527344, + "learning_rate": 1.2138597750641319e-06, + "loss": 0.2197, + "num_input_tokens_seen": 173235904, + "step": 55050 + }, + { + "epoch": 3.524422252096537, + "grad_norm": 12.462386131286621, + "learning_rate": 1.2133807534482872e-06, + "loss": 0.1482, + "num_input_tokens_seen": 173250816, + "step": 55055 + }, + { + "epoch": 3.5247423340375135, + "grad_norm": 9.040291786193848, + "learning_rate": 1.212901796079506e-06, + "loss": 0.1846, + "num_input_tokens_seen": 173266048, + "step": 55060 + }, + { + "epoch": 3.5250624159784905, + "grad_norm": 7.322051525115967, + "learning_rate": 1.2124229029817047e-06, + "loss": 0.1462, + "num_input_tokens_seen": 173281408, + "step": 55065 + }, + { + "epoch": 3.5253824979194675, + "grad_norm": 7.900232315063477, + "learning_rate": 1.2119440741787972e-06, + "loss": 0.1719, + "num_input_tokens_seen": 173298560, + "step": 55070 + }, + { + "epoch": 3.525702579860444, + "grad_norm": 16.838836669921875, + "learning_rate": 1.2114653096946922e-06, + "loss": 0.1421, + "num_input_tokens_seen": 173313920, + "step": 55075 + }, + { + "epoch": 3.526022661801421, + "grad_norm": 6.288451671600342, + "learning_rate": 1.2109866095532988e-06, + "loss": 0.1484, + "num_input_tokens_seen": 173330304, + "step": 55080 + }, + { + "epoch": 3.526342743742398, + "grad_norm": 16.80400276184082, + "learning_rate": 1.21050797377852e-06, + "loss": 0.1517, + "num_input_tokens_seen": 173345664, + "step": 55085 + }, + { + "epoch": 3.526662825683375, + "grad_norm": 7.934755802154541, + "learning_rate": 1.2100294023942563e-06, + "loss": 0.1688, + "num_input_tokens_seen": 173361408, + "step": 55090 + }, + { + "epoch": 3.526982907624352, + "grad_norm": 22.663944244384766, + "learning_rate": 1.2095508954244051e-06, + "loss": 0.2171, + "num_input_tokens_seen": 173378112, + "step": 55095 + }, + { + "epoch": 3.527302989565329, + "grad_norm": 7.357274055480957, + "learning_rate": 1.2090724528928604e-06, + "loss": 0.1459, + "num_input_tokens_seen": 173393856, + "step": 55100 + }, + { + "epoch": 3.5276230715063055, + "grad_norm": 7.500917434692383, + "learning_rate": 1.208594074823513e-06, + "loss": 0.1535, + "num_input_tokens_seen": 173409920, + "step": 55105 + }, + { + "epoch": 3.5279431534472825, + "grad_norm": 9.68139934539795, + "learning_rate": 1.2081157612402512e-06, + "loss": 0.1757, + "num_input_tokens_seen": 173425472, + "step": 55110 + }, + { + "epoch": 3.5282632353882595, + "grad_norm": 3.834547758102417, + "learning_rate": 1.2076375121669578e-06, + "loss": 0.1457, + "num_input_tokens_seen": 173441408, + "step": 55115 + }, + { + "epoch": 3.528583317329236, + "grad_norm": 9.72586441040039, + "learning_rate": 1.2071593276275176e-06, + "loss": 0.1543, + "num_input_tokens_seen": 173458688, + "step": 55120 + }, + { + "epoch": 3.528903399270213, + "grad_norm": 5.256271839141846, + "learning_rate": 1.2066812076458043e-06, + "loss": 0.1125, + "num_input_tokens_seen": 173474048, + "step": 55125 + }, + { + "epoch": 3.52922348121119, + "grad_norm": 8.334349632263184, + "learning_rate": 1.206203152245696e-06, + "loss": 0.1115, + "num_input_tokens_seen": 173488704, + "step": 55130 + }, + { + "epoch": 3.529543563152167, + "grad_norm": 4.168070316314697, + "learning_rate": 1.2057251614510629e-06, + "loss": 0.1522, + "num_input_tokens_seen": 173503808, + "step": 55135 + }, + { + "epoch": 3.529863645093144, + "grad_norm": 8.319173812866211, + "learning_rate": 1.2052472352857727e-06, + "loss": 0.2235, + "num_input_tokens_seen": 173520256, + "step": 55140 + }, + { + "epoch": 3.530183727034121, + "grad_norm": 37.06742477416992, + "learning_rate": 1.2047693737736935e-06, + "loss": 0.1624, + "num_input_tokens_seen": 173538816, + "step": 55145 + }, + { + "epoch": 3.5305038089750975, + "grad_norm": 5.869406700134277, + "learning_rate": 1.2042915769386832e-06, + "loss": 0.1122, + "num_input_tokens_seen": 173554624, + "step": 55150 + }, + { + "epoch": 3.5308238909160745, + "grad_norm": 9.994107246398926, + "learning_rate": 1.203813844804604e-06, + "loss": 0.1504, + "num_input_tokens_seen": 173570880, + "step": 55155 + }, + { + "epoch": 3.5311439728570515, + "grad_norm": 10.48332405090332, + "learning_rate": 1.2033361773953084e-06, + "loss": 0.2002, + "num_input_tokens_seen": 173587008, + "step": 55160 + }, + { + "epoch": 3.531464054798028, + "grad_norm": 7.140283107757568, + "learning_rate": 1.2028585747346508e-06, + "loss": 0.1973, + "num_input_tokens_seen": 173602432, + "step": 55165 + }, + { + "epoch": 3.531784136739005, + "grad_norm": 3.2235021591186523, + "learning_rate": 1.2023810368464791e-06, + "loss": 0.1014, + "num_input_tokens_seen": 173618560, + "step": 55170 + }, + { + "epoch": 3.532104218679982, + "grad_norm": 6.795405387878418, + "learning_rate": 1.2019035637546396e-06, + "loss": 0.1761, + "num_input_tokens_seen": 173633664, + "step": 55175 + }, + { + "epoch": 3.532424300620959, + "grad_norm": 6.759466171264648, + "learning_rate": 1.2014261554829748e-06, + "loss": 0.1754, + "num_input_tokens_seen": 173650240, + "step": 55180 + }, + { + "epoch": 3.532744382561936, + "grad_norm": 3.2738418579101562, + "learning_rate": 1.2009488120553236e-06, + "loss": 0.1137, + "num_input_tokens_seen": 173665088, + "step": 55185 + }, + { + "epoch": 3.533064464502913, + "grad_norm": 5.2755842208862305, + "learning_rate": 1.2004715334955214e-06, + "loss": 0.154, + "num_input_tokens_seen": 173680256, + "step": 55190 + }, + { + "epoch": 3.5333845464438896, + "grad_norm": 7.58892297744751, + "learning_rate": 1.1999943198274027e-06, + "loss": 0.2658, + "num_input_tokens_seen": 173695872, + "step": 55195 + }, + { + "epoch": 3.5337046283848665, + "grad_norm": 2.7404839992523193, + "learning_rate": 1.199517171074796e-06, + "loss": 0.1829, + "num_input_tokens_seen": 173711104, + "step": 55200 + }, + { + "epoch": 3.5340247103258435, + "grad_norm": 3.1182312965393066, + "learning_rate": 1.199040087261528e-06, + "loss": 0.1435, + "num_input_tokens_seen": 173728128, + "step": 55205 + }, + { + "epoch": 3.53434479226682, + "grad_norm": 5.029367923736572, + "learning_rate": 1.1985630684114216e-06, + "loss": 0.1638, + "num_input_tokens_seen": 173744640, + "step": 55210 + }, + { + "epoch": 3.534664874207797, + "grad_norm": 6.876863956451416, + "learning_rate": 1.1980861145482968e-06, + "loss": 0.1891, + "num_input_tokens_seen": 173760384, + "step": 55215 + }, + { + "epoch": 3.534984956148774, + "grad_norm": 5.6031975746154785, + "learning_rate": 1.1976092256959696e-06, + "loss": 0.1534, + "num_input_tokens_seen": 173775680, + "step": 55220 + }, + { + "epoch": 3.535305038089751, + "grad_norm": 6.127768516540527, + "learning_rate": 1.197132401878253e-06, + "loss": 0.1374, + "num_input_tokens_seen": 173793472, + "step": 55225 + }, + { + "epoch": 3.535625120030728, + "grad_norm": 6.348074913024902, + "learning_rate": 1.1966556431189597e-06, + "loss": 0.1977, + "num_input_tokens_seen": 173809664, + "step": 55230 + }, + { + "epoch": 3.535945201971705, + "grad_norm": 5.016312599182129, + "learning_rate": 1.196178949441893e-06, + "loss": 0.1825, + "num_input_tokens_seen": 173824832, + "step": 55235 + }, + { + "epoch": 3.5362652839126816, + "grad_norm": 5.802535057067871, + "learning_rate": 1.1957023208708598e-06, + "loss": 0.1694, + "num_input_tokens_seen": 173839808, + "step": 55240 + }, + { + "epoch": 3.5365853658536586, + "grad_norm": 57.542991638183594, + "learning_rate": 1.1952257574296572e-06, + "loss": 0.1843, + "num_input_tokens_seen": 173855424, + "step": 55245 + }, + { + "epoch": 3.5369054477946356, + "grad_norm": 4.2082390785217285, + "learning_rate": 1.1947492591420847e-06, + "loss": 0.1045, + "num_input_tokens_seen": 173870144, + "step": 55250 + }, + { + "epoch": 3.537225529735612, + "grad_norm": 6.609803199768066, + "learning_rate": 1.1942728260319355e-06, + "loss": 0.2019, + "num_input_tokens_seen": 173885824, + "step": 55255 + }, + { + "epoch": 3.537545611676589, + "grad_norm": 7.977047920227051, + "learning_rate": 1.1937964581229993e-06, + "loss": 0.1624, + "num_input_tokens_seen": 173901056, + "step": 55260 + }, + { + "epoch": 3.537865693617566, + "grad_norm": 9.85469913482666, + "learning_rate": 1.1933201554390661e-06, + "loss": 0.2457, + "num_input_tokens_seen": 173917376, + "step": 55265 + }, + { + "epoch": 3.538185775558543, + "grad_norm": 5.775628566741943, + "learning_rate": 1.192843918003916e-06, + "loss": 0.0853, + "num_input_tokens_seen": 173932224, + "step": 55270 + }, + { + "epoch": 3.53850585749952, + "grad_norm": 3.7012908458709717, + "learning_rate": 1.192367745841333e-06, + "loss": 0.1423, + "num_input_tokens_seen": 173947520, + "step": 55275 + }, + { + "epoch": 3.5388259394404966, + "grad_norm": 5.661716938018799, + "learning_rate": 1.1918916389750934e-06, + "loss": 0.2957, + "num_input_tokens_seen": 173962944, + "step": 55280 + }, + { + "epoch": 3.5391460213814736, + "grad_norm": 10.782541275024414, + "learning_rate": 1.1914155974289719e-06, + "loss": 0.1717, + "num_input_tokens_seen": 173977472, + "step": 55285 + }, + { + "epoch": 3.5394661033224506, + "grad_norm": 6.029150009155273, + "learning_rate": 1.190939621226739e-06, + "loss": 0.1313, + "num_input_tokens_seen": 173993408, + "step": 55290 + }, + { + "epoch": 3.5397861852634276, + "grad_norm": 11.015084266662598, + "learning_rate": 1.1904637103921629e-06, + "loss": 0.1651, + "num_input_tokens_seen": 174009536, + "step": 55295 + }, + { + "epoch": 3.540106267204404, + "grad_norm": 6.790406227111816, + "learning_rate": 1.189987864949008e-06, + "loss": 0.2041, + "num_input_tokens_seen": 174025024, + "step": 55300 + }, + { + "epoch": 3.540426349145381, + "grad_norm": 4.357156276702881, + "learning_rate": 1.1895120849210355e-06, + "loss": 0.157, + "num_input_tokens_seen": 174041536, + "step": 55305 + }, + { + "epoch": 3.540746431086358, + "grad_norm": 16.507923126220703, + "learning_rate": 1.1890363703320024e-06, + "loss": 0.2251, + "num_input_tokens_seen": 174061184, + "step": 55310 + }, + { + "epoch": 3.541066513027335, + "grad_norm": 7.175117015838623, + "learning_rate": 1.1885607212056654e-06, + "loss": 0.162, + "num_input_tokens_seen": 174076864, + "step": 55315 + }, + { + "epoch": 3.541386594968312, + "grad_norm": 9.170829772949219, + "learning_rate": 1.1880851375657751e-06, + "loss": 0.1838, + "num_input_tokens_seen": 174091776, + "step": 55320 + }, + { + "epoch": 3.5417066769092886, + "grad_norm": 9.541138648986816, + "learning_rate": 1.1876096194360792e-06, + "loss": 0.1919, + "num_input_tokens_seen": 174109632, + "step": 55325 + }, + { + "epoch": 3.5420267588502656, + "grad_norm": 5.457056045532227, + "learning_rate": 1.1871341668403233e-06, + "loss": 0.1432, + "num_input_tokens_seen": 174124992, + "step": 55330 + }, + { + "epoch": 3.5423468407912426, + "grad_norm": 23.21512794494629, + "learning_rate": 1.1866587798022474e-06, + "loss": 0.221, + "num_input_tokens_seen": 174139968, + "step": 55335 + }, + { + "epoch": 3.5426669227322196, + "grad_norm": 8.863643646240234, + "learning_rate": 1.1861834583455931e-06, + "loss": 0.1514, + "num_input_tokens_seen": 174156224, + "step": 55340 + }, + { + "epoch": 3.542987004673196, + "grad_norm": 5.984522819519043, + "learning_rate": 1.1857082024940917e-06, + "loss": 0.136, + "num_input_tokens_seen": 174174144, + "step": 55345 + }, + { + "epoch": 3.543307086614173, + "grad_norm": 5.5082221031188965, + "learning_rate": 1.1852330122714783e-06, + "loss": 0.1773, + "num_input_tokens_seen": 174191168, + "step": 55350 + }, + { + "epoch": 3.54362716855515, + "grad_norm": 9.665388107299805, + "learning_rate": 1.184757887701478e-06, + "loss": 0.226, + "num_input_tokens_seen": 174206208, + "step": 55355 + }, + { + "epoch": 3.543947250496127, + "grad_norm": 9.698586463928223, + "learning_rate": 1.1842828288078193e-06, + "loss": 0.1229, + "num_input_tokens_seen": 174222016, + "step": 55360 + }, + { + "epoch": 3.544267332437104, + "grad_norm": 7.285982131958008, + "learning_rate": 1.1838078356142227e-06, + "loss": 0.1571, + "num_input_tokens_seen": 174239104, + "step": 55365 + }, + { + "epoch": 3.5445874143780807, + "grad_norm": 5.296481132507324, + "learning_rate": 1.1833329081444059e-06, + "loss": 0.1274, + "num_input_tokens_seen": 174254784, + "step": 55370 + }, + { + "epoch": 3.5449074963190577, + "grad_norm": 5.021642684936523, + "learning_rate": 1.1828580464220873e-06, + "loss": 0.1939, + "num_input_tokens_seen": 174270336, + "step": 55375 + }, + { + "epoch": 3.5452275782600347, + "grad_norm": 6.7375946044921875, + "learning_rate": 1.1823832504709754e-06, + "loss": 0.1964, + "num_input_tokens_seen": 174285696, + "step": 55380 + }, + { + "epoch": 3.545547660201011, + "grad_norm": 5.394513130187988, + "learning_rate": 1.1819085203147823e-06, + "loss": 0.1029, + "num_input_tokens_seen": 174300672, + "step": 55385 + }, + { + "epoch": 3.545867742141988, + "grad_norm": 7.605320930480957, + "learning_rate": 1.1814338559772104e-06, + "loss": 0.1274, + "num_input_tokens_seen": 174316672, + "step": 55390 + }, + { + "epoch": 3.546187824082965, + "grad_norm": 7.768385887145996, + "learning_rate": 1.1809592574819644e-06, + "loss": 0.1423, + "num_input_tokens_seen": 174332928, + "step": 55395 + }, + { + "epoch": 3.546507906023942, + "grad_norm": 8.756219863891602, + "learning_rate": 1.1804847248527426e-06, + "loss": 0.1825, + "num_input_tokens_seen": 174347712, + "step": 55400 + }, + { + "epoch": 3.546827987964919, + "grad_norm": 6.778501033782959, + "learning_rate": 1.1800102581132405e-06, + "loss": 0.1215, + "num_input_tokens_seen": 174362752, + "step": 55405 + }, + { + "epoch": 3.547148069905896, + "grad_norm": 25.564495086669922, + "learning_rate": 1.1795358572871504e-06, + "loss": 0.159, + "num_input_tokens_seen": 174378880, + "step": 55410 + }, + { + "epoch": 3.5474681518468727, + "grad_norm": 6.9304609298706055, + "learning_rate": 1.1790615223981616e-06, + "loss": 0.1177, + "num_input_tokens_seen": 174395520, + "step": 55415 + }, + { + "epoch": 3.5477882337878497, + "grad_norm": 6.254627704620361, + "learning_rate": 1.1785872534699591e-06, + "loss": 0.1959, + "num_input_tokens_seen": 174412672, + "step": 55420 + }, + { + "epoch": 3.5481083157288267, + "grad_norm": 6.540167331695557, + "learning_rate": 1.178113050526227e-06, + "loss": 0.1277, + "num_input_tokens_seen": 174428096, + "step": 55425 + }, + { + "epoch": 3.5484283976698032, + "grad_norm": 6.365068435668945, + "learning_rate": 1.1776389135906435e-06, + "loss": 0.1724, + "num_input_tokens_seen": 174442880, + "step": 55430 + }, + { + "epoch": 3.54874847961078, + "grad_norm": 15.037252426147461, + "learning_rate": 1.1771648426868847e-06, + "loss": 0.2434, + "num_input_tokens_seen": 174457600, + "step": 55435 + }, + { + "epoch": 3.549068561551757, + "grad_norm": 4.768892288208008, + "learning_rate": 1.1766908378386236e-06, + "loss": 0.1841, + "num_input_tokens_seen": 174473152, + "step": 55440 + }, + { + "epoch": 3.549388643492734, + "grad_norm": 8.228860855102539, + "learning_rate": 1.1762168990695292e-06, + "loss": 0.1522, + "num_input_tokens_seen": 174489728, + "step": 55445 + }, + { + "epoch": 3.549708725433711, + "grad_norm": 13.110769271850586, + "learning_rate": 1.1757430264032671e-06, + "loss": 0.2114, + "num_input_tokens_seen": 174505792, + "step": 55450 + }, + { + "epoch": 3.550028807374688, + "grad_norm": 8.796831130981445, + "learning_rate": 1.1752692198635e-06, + "loss": 0.1655, + "num_input_tokens_seen": 174520896, + "step": 55455 + }, + { + "epoch": 3.5503488893156647, + "grad_norm": 7.56244421005249, + "learning_rate": 1.1747954794738895e-06, + "loss": 0.1716, + "num_input_tokens_seen": 174535488, + "step": 55460 + }, + { + "epoch": 3.5506689712566417, + "grad_norm": 5.048981666564941, + "learning_rate": 1.174321805258088e-06, + "loss": 0.1682, + "num_input_tokens_seen": 174550208, + "step": 55465 + }, + { + "epoch": 3.5509890531976187, + "grad_norm": 4.733469486236572, + "learning_rate": 1.1738481972397522e-06, + "loss": 0.1313, + "num_input_tokens_seen": 174565824, + "step": 55470 + }, + { + "epoch": 3.5513091351385953, + "grad_norm": 5.527329444885254, + "learning_rate": 1.1733746554425278e-06, + "loss": 0.1355, + "num_input_tokens_seen": 174582208, + "step": 55475 + }, + { + "epoch": 3.5516292170795722, + "grad_norm": 7.4515252113342285, + "learning_rate": 1.172901179890064e-06, + "loss": 0.2098, + "num_input_tokens_seen": 174599232, + "step": 55480 + }, + { + "epoch": 3.5519492990205492, + "grad_norm": 5.209251403808594, + "learning_rate": 1.172427770606003e-06, + "loss": 0.1394, + "num_input_tokens_seen": 174616320, + "step": 55485 + }, + { + "epoch": 3.5522693809615262, + "grad_norm": 5.726590156555176, + "learning_rate": 1.171954427613983e-06, + "loss": 0.1705, + "num_input_tokens_seen": 174631808, + "step": 55490 + }, + { + "epoch": 3.552589462902503, + "grad_norm": 4.845456123352051, + "learning_rate": 1.1714811509376432e-06, + "loss": 0.1855, + "num_input_tokens_seen": 174646784, + "step": 55495 + }, + { + "epoch": 3.55290954484348, + "grad_norm": 5.783003807067871, + "learning_rate": 1.1710079406006125e-06, + "loss": 0.1401, + "num_input_tokens_seen": 174662592, + "step": 55500 + }, + { + "epoch": 3.5532296267844568, + "grad_norm": 3.762643337249756, + "learning_rate": 1.170534796626524e-06, + "loss": 0.1048, + "num_input_tokens_seen": 174678720, + "step": 55505 + }, + { + "epoch": 3.5535497087254337, + "grad_norm": 7.809393405914307, + "learning_rate": 1.1700617190390029e-06, + "loss": 0.157, + "num_input_tokens_seen": 174694848, + "step": 55510 + }, + { + "epoch": 3.5538697906664107, + "grad_norm": 3.5926361083984375, + "learning_rate": 1.1695887078616718e-06, + "loss": 0.1073, + "num_input_tokens_seen": 174711488, + "step": 55515 + }, + { + "epoch": 3.5541898726073873, + "grad_norm": 6.797061920166016, + "learning_rate": 1.169115763118151e-06, + "loss": 0.195, + "num_input_tokens_seen": 174730240, + "step": 55520 + }, + { + "epoch": 3.5545099545483643, + "grad_norm": 45.49135208129883, + "learning_rate": 1.1686428848320568e-06, + "loss": 0.2061, + "num_input_tokens_seen": 174746112, + "step": 55525 + }, + { + "epoch": 3.5548300364893413, + "grad_norm": 4.766177654266357, + "learning_rate": 1.168170073027002e-06, + "loss": 0.1502, + "num_input_tokens_seen": 174762048, + "step": 55530 + }, + { + "epoch": 3.5551501184303183, + "grad_norm": 4.730963706970215, + "learning_rate": 1.1676973277265965e-06, + "loss": 0.2336, + "num_input_tokens_seen": 174777344, + "step": 55535 + }, + { + "epoch": 3.5554702003712952, + "grad_norm": 10.005858421325684, + "learning_rate": 1.167224648954446e-06, + "loss": 0.1855, + "num_input_tokens_seen": 174793792, + "step": 55540 + }, + { + "epoch": 3.555790282312272, + "grad_norm": 34.17401885986328, + "learning_rate": 1.166752036734155e-06, + "loss": 0.2353, + "num_input_tokens_seen": 174810112, + "step": 55545 + }, + { + "epoch": 3.556110364253249, + "grad_norm": 7.596006393432617, + "learning_rate": 1.1662794910893228e-06, + "loss": 0.1519, + "num_input_tokens_seen": 174825344, + "step": 55550 + }, + { + "epoch": 3.5564304461942258, + "grad_norm": 8.14116096496582, + "learning_rate": 1.1658070120435457e-06, + "loss": 0.1605, + "num_input_tokens_seen": 174840448, + "step": 55555 + }, + { + "epoch": 3.5567505281352028, + "grad_norm": 5.413498401641846, + "learning_rate": 1.1653345996204168e-06, + "loss": 0.2179, + "num_input_tokens_seen": 174855168, + "step": 55560 + }, + { + "epoch": 3.5570706100761793, + "grad_norm": 11.899226188659668, + "learning_rate": 1.1648622538435252e-06, + "loss": 0.176, + "num_input_tokens_seen": 174870400, + "step": 55565 + }, + { + "epoch": 3.5573906920171563, + "grad_norm": 9.81383228302002, + "learning_rate": 1.1643899747364599e-06, + "loss": 0.2301, + "num_input_tokens_seen": 174885312, + "step": 55570 + }, + { + "epoch": 3.5577107739581333, + "grad_norm": 4.6800408363342285, + "learning_rate": 1.1639177623228005e-06, + "loss": 0.1409, + "num_input_tokens_seen": 174900288, + "step": 55575 + }, + { + "epoch": 3.5580308558991103, + "grad_norm": 5.698324203491211, + "learning_rate": 1.1634456166261304e-06, + "loss": 0.202, + "num_input_tokens_seen": 174916672, + "step": 55580 + }, + { + "epoch": 3.5583509378400873, + "grad_norm": 5.208603382110596, + "learning_rate": 1.1629735376700227e-06, + "loss": 0.1512, + "num_input_tokens_seen": 174932800, + "step": 55585 + }, + { + "epoch": 3.558671019781064, + "grad_norm": 10.383502960205078, + "learning_rate": 1.1625015254780531e-06, + "loss": 0.1533, + "num_input_tokens_seen": 174948608, + "step": 55590 + }, + { + "epoch": 3.558991101722041, + "grad_norm": 4.410126686096191, + "learning_rate": 1.1620295800737905e-06, + "loss": 0.1162, + "num_input_tokens_seen": 174962816, + "step": 55595 + }, + { + "epoch": 3.559311183663018, + "grad_norm": 7.467769622802734, + "learning_rate": 1.1615577014808005e-06, + "loss": 0.2085, + "num_input_tokens_seen": 174979200, + "step": 55600 + }, + { + "epoch": 3.559631265603995, + "grad_norm": 27.914613723754883, + "learning_rate": 1.161085889722649e-06, + "loss": 0.1848, + "num_input_tokens_seen": 174995008, + "step": 55605 + }, + { + "epoch": 3.5599513475449713, + "grad_norm": 7.078128814697266, + "learning_rate": 1.160614144822892e-06, + "loss": 0.0892, + "num_input_tokens_seen": 175010560, + "step": 55610 + }, + { + "epoch": 3.5602714294859483, + "grad_norm": 8.26203441619873, + "learning_rate": 1.16014246680509e-06, + "loss": 0.1707, + "num_input_tokens_seen": 175026240, + "step": 55615 + }, + { + "epoch": 3.5605915114269253, + "grad_norm": 8.092947006225586, + "learning_rate": 1.1596708556927924e-06, + "loss": 0.1431, + "num_input_tokens_seen": 175042752, + "step": 55620 + }, + { + "epoch": 3.5609115933679023, + "grad_norm": 9.648736953735352, + "learning_rate": 1.1591993115095517e-06, + "loss": 0.166, + "num_input_tokens_seen": 175057856, + "step": 55625 + }, + { + "epoch": 3.5612316753088793, + "grad_norm": 6.110933780670166, + "learning_rate": 1.1587278342789135e-06, + "loss": 0.2111, + "num_input_tokens_seen": 175073472, + "step": 55630 + }, + { + "epoch": 3.561551757249856, + "grad_norm": 5.921648025512695, + "learning_rate": 1.1582564240244205e-06, + "loss": 0.1581, + "num_input_tokens_seen": 175090880, + "step": 55635 + }, + { + "epoch": 3.561871839190833, + "grad_norm": 10.568117141723633, + "learning_rate": 1.1577850807696131e-06, + "loss": 0.1837, + "num_input_tokens_seen": 175107072, + "step": 55640 + }, + { + "epoch": 3.56219192113181, + "grad_norm": 6.133892059326172, + "learning_rate": 1.1573138045380273e-06, + "loss": 0.1625, + "num_input_tokens_seen": 175123008, + "step": 55645 + }, + { + "epoch": 3.5625120030727864, + "grad_norm": 6.541493892669678, + "learning_rate": 1.1568425953531953e-06, + "loss": 0.1638, + "num_input_tokens_seen": 175139584, + "step": 55650 + }, + { + "epoch": 3.5628320850137634, + "grad_norm": 50.026344299316406, + "learning_rate": 1.1563714532386498e-06, + "loss": 0.2076, + "num_input_tokens_seen": 175154240, + "step": 55655 + }, + { + "epoch": 3.5631521669547404, + "grad_norm": 28.825830459594727, + "learning_rate": 1.1559003782179131e-06, + "loss": 0.2162, + "num_input_tokens_seen": 175168896, + "step": 55660 + }, + { + "epoch": 3.5634722488957173, + "grad_norm": 4.398725986480713, + "learning_rate": 1.1554293703145114e-06, + "loss": 0.0951, + "num_input_tokens_seen": 175183552, + "step": 55665 + }, + { + "epoch": 3.5637923308366943, + "grad_norm": 6.580506801605225, + "learning_rate": 1.1549584295519635e-06, + "loss": 0.1163, + "num_input_tokens_seen": 175198528, + "step": 55670 + }, + { + "epoch": 3.5641124127776713, + "grad_norm": 7.239826679229736, + "learning_rate": 1.1544875559537856e-06, + "loss": 0.1664, + "num_input_tokens_seen": 175213952, + "step": 55675 + }, + { + "epoch": 3.564432494718648, + "grad_norm": 6.353450298309326, + "learning_rate": 1.1540167495434904e-06, + "loss": 0.1836, + "num_input_tokens_seen": 175229184, + "step": 55680 + }, + { + "epoch": 3.564752576659625, + "grad_norm": 5.457586765289307, + "learning_rate": 1.153546010344587e-06, + "loss": 0.1354, + "num_input_tokens_seen": 175245312, + "step": 55685 + }, + { + "epoch": 3.565072658600602, + "grad_norm": 7.629557132720947, + "learning_rate": 1.1530753383805843e-06, + "loss": 0.1861, + "num_input_tokens_seen": 175260416, + "step": 55690 + }, + { + "epoch": 3.5653927405415784, + "grad_norm": 7.123376369476318, + "learning_rate": 1.1526047336749812e-06, + "loss": 0.2018, + "num_input_tokens_seen": 175275200, + "step": 55695 + }, + { + "epoch": 3.5657128224825554, + "grad_norm": 6.369529724121094, + "learning_rate": 1.1521341962512805e-06, + "loss": 0.1482, + "num_input_tokens_seen": 175291072, + "step": 55700 + }, + { + "epoch": 3.5660329044235324, + "grad_norm": 8.538448333740234, + "learning_rate": 1.1516637261329774e-06, + "loss": 0.15, + "num_input_tokens_seen": 175307968, + "step": 55705 + }, + { + "epoch": 3.5663529863645094, + "grad_norm": 6.7503533363342285, + "learning_rate": 1.1511933233435643e-06, + "loss": 0.2098, + "num_input_tokens_seen": 175322496, + "step": 55710 + }, + { + "epoch": 3.5666730683054864, + "grad_norm": 8.782033920288086, + "learning_rate": 1.1507229879065312e-06, + "loss": 0.238, + "num_input_tokens_seen": 175338368, + "step": 55715 + }, + { + "epoch": 3.5669931502464634, + "grad_norm": 5.867002010345459, + "learning_rate": 1.1502527198453632e-06, + "loss": 0.2193, + "num_input_tokens_seen": 175354688, + "step": 55720 + }, + { + "epoch": 3.56731323218744, + "grad_norm": 4.679290771484375, + "learning_rate": 1.1497825191835453e-06, + "loss": 0.2203, + "num_input_tokens_seen": 175372032, + "step": 55725 + }, + { + "epoch": 3.567633314128417, + "grad_norm": 6.1966166496276855, + "learning_rate": 1.1493123859445534e-06, + "loss": 0.1978, + "num_input_tokens_seen": 175387392, + "step": 55730 + }, + { + "epoch": 3.567953396069394, + "grad_norm": 7.691241264343262, + "learning_rate": 1.1488423201518668e-06, + "loss": 0.1106, + "num_input_tokens_seen": 175402624, + "step": 55735 + }, + { + "epoch": 3.5682734780103704, + "grad_norm": 7.44338846206665, + "learning_rate": 1.1483723218289564e-06, + "loss": 0.2546, + "num_input_tokens_seen": 175418368, + "step": 55740 + }, + { + "epoch": 3.5685935599513474, + "grad_norm": 9.047250747680664, + "learning_rate": 1.147902390999292e-06, + "loss": 0.1019, + "num_input_tokens_seen": 175434112, + "step": 55745 + }, + { + "epoch": 3.5689136418923244, + "grad_norm": 9.142672538757324, + "learning_rate": 1.1474325276863395e-06, + "loss": 0.1804, + "num_input_tokens_seen": 175450752, + "step": 55750 + }, + { + "epoch": 3.5692337238333014, + "grad_norm": 5.634812355041504, + "learning_rate": 1.1469627319135612e-06, + "loss": 0.232, + "num_input_tokens_seen": 175467008, + "step": 55755 + }, + { + "epoch": 3.5695538057742784, + "grad_norm": 6.576854705810547, + "learning_rate": 1.1464930037044164e-06, + "loss": 0.1739, + "num_input_tokens_seen": 175482752, + "step": 55760 + }, + { + "epoch": 3.5698738877152554, + "grad_norm": 9.411863327026367, + "learning_rate": 1.1460233430823608e-06, + "loss": 0.1691, + "num_input_tokens_seen": 175498368, + "step": 55765 + }, + { + "epoch": 3.570193969656232, + "grad_norm": 7.785600185394287, + "learning_rate": 1.145553750070846e-06, + "loss": 0.1854, + "num_input_tokens_seen": 175518400, + "step": 55770 + }, + { + "epoch": 3.570514051597209, + "grad_norm": 5.8174920082092285, + "learning_rate": 1.1450842246933227e-06, + "loss": 0.1446, + "num_input_tokens_seen": 175534144, + "step": 55775 + }, + { + "epoch": 3.570834133538186, + "grad_norm": 5.151172161102295, + "learning_rate": 1.144614766973236e-06, + "loss": 0.1739, + "num_input_tokens_seen": 175549952, + "step": 55780 + }, + { + "epoch": 3.5711542154791625, + "grad_norm": 7.581540107727051, + "learning_rate": 1.1441453769340275e-06, + "loss": 0.1334, + "num_input_tokens_seen": 175566336, + "step": 55785 + }, + { + "epoch": 3.5714742974201394, + "grad_norm": 4.7871012687683105, + "learning_rate": 1.1436760545991368e-06, + "loss": 0.1545, + "num_input_tokens_seen": 175582400, + "step": 55790 + }, + { + "epoch": 3.5717943793611164, + "grad_norm": 6.3465166091918945, + "learning_rate": 1.1432067999919979e-06, + "loss": 0.166, + "num_input_tokens_seen": 175598784, + "step": 55795 + }, + { + "epoch": 3.5721144613020934, + "grad_norm": 22.846715927124023, + "learning_rate": 1.1427376131360462e-06, + "loss": 0.2502, + "num_input_tokens_seen": 175617152, + "step": 55800 + }, + { + "epoch": 3.5724345432430704, + "grad_norm": 7.618628025054932, + "learning_rate": 1.1422684940547064e-06, + "loss": 0.1629, + "num_input_tokens_seen": 175633664, + "step": 55805 + }, + { + "epoch": 3.572754625184047, + "grad_norm": 5.809410572052002, + "learning_rate": 1.1417994427714076e-06, + "loss": 0.1444, + "num_input_tokens_seen": 175648448, + "step": 55810 + }, + { + "epoch": 3.573074707125024, + "grad_norm": 5.560142517089844, + "learning_rate": 1.1413304593095683e-06, + "loss": 0.1474, + "num_input_tokens_seen": 175664640, + "step": 55815 + }, + { + "epoch": 3.573394789066001, + "grad_norm": 7.082676410675049, + "learning_rate": 1.1408615436926098e-06, + "loss": 0.0936, + "num_input_tokens_seen": 175679488, + "step": 55820 + }, + { + "epoch": 3.573714871006978, + "grad_norm": 8.253941535949707, + "learning_rate": 1.1403926959439462e-06, + "loss": 0.1603, + "num_input_tokens_seen": 175695744, + "step": 55825 + }, + { + "epoch": 3.5740349529479545, + "grad_norm": 5.120306491851807, + "learning_rate": 1.1399239160869897e-06, + "loss": 0.1229, + "num_input_tokens_seen": 175710848, + "step": 55830 + }, + { + "epoch": 3.5743550348889315, + "grad_norm": 7.193739891052246, + "learning_rate": 1.1394552041451482e-06, + "loss": 0.1613, + "num_input_tokens_seen": 175726592, + "step": 55835 + }, + { + "epoch": 3.5746751168299085, + "grad_norm": 6.914603233337402, + "learning_rate": 1.138986560141826e-06, + "loss": 0.2505, + "num_input_tokens_seen": 175741632, + "step": 55840 + }, + { + "epoch": 3.5749951987708855, + "grad_norm": 9.756719589233398, + "learning_rate": 1.1385179841004276e-06, + "loss": 0.1542, + "num_input_tokens_seen": 175756608, + "step": 55845 + }, + { + "epoch": 3.5753152807118624, + "grad_norm": 12.760088920593262, + "learning_rate": 1.1380494760443475e-06, + "loss": 0.2027, + "num_input_tokens_seen": 175771904, + "step": 55850 + }, + { + "epoch": 3.575635362652839, + "grad_norm": 5.225096702575684, + "learning_rate": 1.1375810359969834e-06, + "loss": 0.2972, + "num_input_tokens_seen": 175787264, + "step": 55855 + }, + { + "epoch": 3.575955444593816, + "grad_norm": 5.372066497802734, + "learning_rate": 1.137112663981726e-06, + "loss": 0.2696, + "num_input_tokens_seen": 175802240, + "step": 55860 + }, + { + "epoch": 3.576275526534793, + "grad_norm": 6.744933128356934, + "learning_rate": 1.136644360021963e-06, + "loss": 0.1377, + "num_input_tokens_seen": 175817728, + "step": 55865 + }, + { + "epoch": 3.57659560847577, + "grad_norm": 6.461648941040039, + "learning_rate": 1.136176124141079e-06, + "loss": 0.1294, + "num_input_tokens_seen": 175833344, + "step": 55870 + }, + { + "epoch": 3.5769156904167465, + "grad_norm": 7.122200965881348, + "learning_rate": 1.1357079563624556e-06, + "loss": 0.1668, + "num_input_tokens_seen": 175849600, + "step": 55875 + }, + { + "epoch": 3.5772357723577235, + "grad_norm": 11.675095558166504, + "learning_rate": 1.1352398567094696e-06, + "loss": 0.1578, + "num_input_tokens_seen": 175864064, + "step": 55880 + }, + { + "epoch": 3.5775558542987005, + "grad_norm": 6.638768196105957, + "learning_rate": 1.1347718252054981e-06, + "loss": 0.1167, + "num_input_tokens_seen": 175879104, + "step": 55885 + }, + { + "epoch": 3.5778759362396775, + "grad_norm": 10.968093872070312, + "learning_rate": 1.134303861873909e-06, + "loss": 0.1646, + "num_input_tokens_seen": 175896320, + "step": 55890 + }, + { + "epoch": 3.5781960181806545, + "grad_norm": 13.002299308776855, + "learning_rate": 1.133835966738072e-06, + "loss": 0.2506, + "num_input_tokens_seen": 175910784, + "step": 55895 + }, + { + "epoch": 3.578516100121631, + "grad_norm": 8.474078178405762, + "learning_rate": 1.1333681398213509e-06, + "loss": 0.1526, + "num_input_tokens_seen": 175925760, + "step": 55900 + }, + { + "epoch": 3.578836182062608, + "grad_norm": 5.985116481781006, + "learning_rate": 1.1329003811471061e-06, + "loss": 0.1295, + "num_input_tokens_seen": 175941568, + "step": 55905 + }, + { + "epoch": 3.579156264003585, + "grad_norm": 9.519434928894043, + "learning_rate": 1.1324326907386955e-06, + "loss": 0.1643, + "num_input_tokens_seen": 175956352, + "step": 55910 + }, + { + "epoch": 3.5794763459445615, + "grad_norm": 4.607539653778076, + "learning_rate": 1.1319650686194719e-06, + "loss": 0.1576, + "num_input_tokens_seen": 175971648, + "step": 55915 + }, + { + "epoch": 3.5797964278855385, + "grad_norm": 7.007773399353027, + "learning_rate": 1.1314975148127889e-06, + "loss": 0.1446, + "num_input_tokens_seen": 175986752, + "step": 55920 + }, + { + "epoch": 3.5801165098265155, + "grad_norm": 8.177225112915039, + "learning_rate": 1.1310300293419898e-06, + "loss": 0.15, + "num_input_tokens_seen": 176002368, + "step": 55925 + }, + { + "epoch": 3.5804365917674925, + "grad_norm": 6.461019039154053, + "learning_rate": 1.130562612230421e-06, + "loss": 0.1144, + "num_input_tokens_seen": 176018304, + "step": 55930 + }, + { + "epoch": 3.5807566737084695, + "grad_norm": 5.237262725830078, + "learning_rate": 1.1300952635014226e-06, + "loss": 0.1389, + "num_input_tokens_seen": 176034560, + "step": 55935 + }, + { + "epoch": 3.5810767556494465, + "grad_norm": 25.421022415161133, + "learning_rate": 1.129627983178331e-06, + "loss": 0.2644, + "num_input_tokens_seen": 176049472, + "step": 55940 + }, + { + "epoch": 3.581396837590423, + "grad_norm": 2.77398943901062, + "learning_rate": 1.12916077128448e-06, + "loss": 0.1385, + "num_input_tokens_seen": 176066560, + "step": 55945 + }, + { + "epoch": 3.5817169195314, + "grad_norm": 6.5892438888549805, + "learning_rate": 1.1286936278431986e-06, + "loss": 0.1688, + "num_input_tokens_seen": 176082432, + "step": 55950 + }, + { + "epoch": 3.582037001472377, + "grad_norm": 6.407619476318359, + "learning_rate": 1.1282265528778167e-06, + "loss": 0.161, + "num_input_tokens_seen": 176097664, + "step": 55955 + }, + { + "epoch": 3.5823570834133536, + "grad_norm": 5.30725622177124, + "learning_rate": 1.1277595464116534e-06, + "loss": 0.1433, + "num_input_tokens_seen": 176113024, + "step": 55960 + }, + { + "epoch": 3.5826771653543306, + "grad_norm": 9.833479881286621, + "learning_rate": 1.1272926084680317e-06, + "loss": 0.1655, + "num_input_tokens_seen": 176128064, + "step": 55965 + }, + { + "epoch": 3.5829972472953076, + "grad_norm": 14.332094192504883, + "learning_rate": 1.1268257390702667e-06, + "loss": 0.216, + "num_input_tokens_seen": 176144512, + "step": 55970 + }, + { + "epoch": 3.5833173292362845, + "grad_norm": 7.338766574859619, + "learning_rate": 1.1263589382416717e-06, + "loss": 0.2733, + "num_input_tokens_seen": 176160128, + "step": 55975 + }, + { + "epoch": 3.5836374111772615, + "grad_norm": 9.22124195098877, + "learning_rate": 1.1258922060055563e-06, + "loss": 0.1822, + "num_input_tokens_seen": 176175616, + "step": 55980 + }, + { + "epoch": 3.5839574931182385, + "grad_norm": 5.999307632446289, + "learning_rate": 1.1254255423852267e-06, + "loss": 0.1332, + "num_input_tokens_seen": 176191616, + "step": 55985 + }, + { + "epoch": 3.584277575059215, + "grad_norm": 6.551064968109131, + "learning_rate": 1.1249589474039857e-06, + "loss": 0.1587, + "num_input_tokens_seen": 176206976, + "step": 55990 + }, + { + "epoch": 3.584597657000192, + "grad_norm": 7.974338054656982, + "learning_rate": 1.1244924210851324e-06, + "loss": 0.1667, + "num_input_tokens_seen": 176223552, + "step": 55995 + }, + { + "epoch": 3.584917738941169, + "grad_norm": 2.896372079849243, + "learning_rate": 1.1240259634519622e-06, + "loss": 0.1975, + "num_input_tokens_seen": 176238400, + "step": 56000 + }, + { + "epoch": 3.5852378208821456, + "grad_norm": 7.04586935043335, + "learning_rate": 1.1235595745277697e-06, + "loss": 0.1504, + "num_input_tokens_seen": 176254208, + "step": 56005 + }, + { + "epoch": 3.5855579028231226, + "grad_norm": 4.960413932800293, + "learning_rate": 1.1230932543358405e-06, + "loss": 0.1632, + "num_input_tokens_seen": 176270208, + "step": 56010 + }, + { + "epoch": 3.5858779847640996, + "grad_norm": 7.402371406555176, + "learning_rate": 1.1226270028994634e-06, + "loss": 0.1512, + "num_input_tokens_seen": 176284736, + "step": 56015 + }, + { + "epoch": 3.5861980667050766, + "grad_norm": 5.863852500915527, + "learning_rate": 1.1221608202419194e-06, + "loss": 0.1237, + "num_input_tokens_seen": 176299968, + "step": 56020 + }, + { + "epoch": 3.5865181486460536, + "grad_norm": 7.565903663635254, + "learning_rate": 1.1216947063864857e-06, + "loss": 0.175, + "num_input_tokens_seen": 176317056, + "step": 56025 + }, + { + "epoch": 3.5868382305870306, + "grad_norm": 6.965278625488281, + "learning_rate": 1.1212286613564413e-06, + "loss": 0.1639, + "num_input_tokens_seen": 176332288, + "step": 56030 + }, + { + "epoch": 3.587158312528007, + "grad_norm": 9.116934776306152, + "learning_rate": 1.1207626851750535e-06, + "loss": 0.1594, + "num_input_tokens_seen": 176349440, + "step": 56035 + }, + { + "epoch": 3.587478394468984, + "grad_norm": 44.235172271728516, + "learning_rate": 1.1202967778655954e-06, + "loss": 0.2285, + "num_input_tokens_seen": 176364672, + "step": 56040 + }, + { + "epoch": 3.587798476409961, + "grad_norm": 9.036544799804688, + "learning_rate": 1.1198309394513277e-06, + "loss": 0.1599, + "num_input_tokens_seen": 176380480, + "step": 56045 + }, + { + "epoch": 3.5881185583509376, + "grad_norm": 6.472044467926025, + "learning_rate": 1.1193651699555144e-06, + "loss": 0.1678, + "num_input_tokens_seen": 176395712, + "step": 56050 + }, + { + "epoch": 3.5884386402919146, + "grad_norm": 63.86207962036133, + "learning_rate": 1.1188994694014132e-06, + "loss": 0.18, + "num_input_tokens_seen": 176410880, + "step": 56055 + }, + { + "epoch": 3.5887587222328916, + "grad_norm": 7.6953816413879395, + "learning_rate": 1.1184338378122785e-06, + "loss": 0.0902, + "num_input_tokens_seen": 176426496, + "step": 56060 + }, + { + "epoch": 3.5890788041738686, + "grad_norm": 17.090803146362305, + "learning_rate": 1.1179682752113622e-06, + "loss": 0.1495, + "num_input_tokens_seen": 176441472, + "step": 56065 + }, + { + "epoch": 3.5893988861148456, + "grad_norm": 7.642971038818359, + "learning_rate": 1.11750278162191e-06, + "loss": 0.1449, + "num_input_tokens_seen": 176458176, + "step": 56070 + }, + { + "epoch": 3.589718968055822, + "grad_norm": 99.18804168701172, + "learning_rate": 1.1170373570671697e-06, + "loss": 0.2215, + "num_input_tokens_seen": 176473856, + "step": 56075 + }, + { + "epoch": 3.590039049996799, + "grad_norm": 9.724260330200195, + "learning_rate": 1.1165720015703784e-06, + "loss": 0.1884, + "num_input_tokens_seen": 176489216, + "step": 56080 + }, + { + "epoch": 3.590359131937776, + "grad_norm": 6.645610332489014, + "learning_rate": 1.1161067151547761e-06, + "loss": 0.1233, + "num_input_tokens_seen": 176504192, + "step": 56085 + }, + { + "epoch": 3.590679213878753, + "grad_norm": 6.885283470153809, + "learning_rate": 1.1156414978435961e-06, + "loss": 0.1553, + "num_input_tokens_seen": 176518912, + "step": 56090 + }, + { + "epoch": 3.5909992958197297, + "grad_norm": 5.146466255187988, + "learning_rate": 1.115176349660069e-06, + "loss": 0.1501, + "num_input_tokens_seen": 176535744, + "step": 56095 + }, + { + "epoch": 3.5913193777607066, + "grad_norm": 52.238548278808594, + "learning_rate": 1.1147112706274216e-06, + "loss": 0.2381, + "num_input_tokens_seen": 176550784, + "step": 56100 + }, + { + "epoch": 3.5916394597016836, + "grad_norm": 6.318117618560791, + "learning_rate": 1.1142462607688776e-06, + "loss": 0.3078, + "num_input_tokens_seen": 176566464, + "step": 56105 + }, + { + "epoch": 3.5919595416426606, + "grad_norm": 4.70468282699585, + "learning_rate": 1.1137813201076564e-06, + "loss": 0.1107, + "num_input_tokens_seen": 176582592, + "step": 56110 + }, + { + "epoch": 3.5922796235836376, + "grad_norm": 2.5041089057922363, + "learning_rate": 1.113316448666977e-06, + "loss": 0.1347, + "num_input_tokens_seen": 176598528, + "step": 56115 + }, + { + "epoch": 3.592599705524614, + "grad_norm": 8.947285652160645, + "learning_rate": 1.1128516464700498e-06, + "loss": 0.1809, + "num_input_tokens_seen": 176615552, + "step": 56120 + }, + { + "epoch": 3.592919787465591, + "grad_norm": 8.668355941772461, + "learning_rate": 1.1123869135400866e-06, + "loss": 0.1755, + "num_input_tokens_seen": 176631232, + "step": 56125 + }, + { + "epoch": 3.593239869406568, + "grad_norm": 8.011661529541016, + "learning_rate": 1.1119222499002931e-06, + "loss": 0.238, + "num_input_tokens_seen": 176646400, + "step": 56130 + }, + { + "epoch": 3.593559951347545, + "grad_norm": 9.344932556152344, + "learning_rate": 1.1114576555738725e-06, + "loss": 0.2218, + "num_input_tokens_seen": 176662528, + "step": 56135 + }, + { + "epoch": 3.5938800332885217, + "grad_norm": 5.591253280639648, + "learning_rate": 1.1109931305840238e-06, + "loss": 0.1394, + "num_input_tokens_seen": 176678720, + "step": 56140 + }, + { + "epoch": 3.5942001152294987, + "grad_norm": 7.516134738922119, + "learning_rate": 1.110528674953942e-06, + "loss": 0.1446, + "num_input_tokens_seen": 176695552, + "step": 56145 + }, + { + "epoch": 3.5945201971704757, + "grad_norm": 7.601511001586914, + "learning_rate": 1.1100642887068228e-06, + "loss": 0.1611, + "num_input_tokens_seen": 176709760, + "step": 56150 + }, + { + "epoch": 3.5948402791114527, + "grad_norm": 9.157389640808105, + "learning_rate": 1.1095999718658512e-06, + "loss": 0.1788, + "num_input_tokens_seen": 176724544, + "step": 56155 + }, + { + "epoch": 3.5951603610524296, + "grad_norm": 5.9692254066467285, + "learning_rate": 1.1091357244542156e-06, + "loss": 0.1416, + "num_input_tokens_seen": 176740864, + "step": 56160 + }, + { + "epoch": 3.595480442993406, + "grad_norm": 8.152477264404297, + "learning_rate": 1.108671546495097e-06, + "loss": 0.1585, + "num_input_tokens_seen": 176757504, + "step": 56165 + }, + { + "epoch": 3.595800524934383, + "grad_norm": 9.135732650756836, + "learning_rate": 1.1082074380116746e-06, + "loss": 0.2267, + "num_input_tokens_seen": 176773888, + "step": 56170 + }, + { + "epoch": 3.59612060687536, + "grad_norm": 4.457187652587891, + "learning_rate": 1.1077433990271228e-06, + "loss": 0.1493, + "num_input_tokens_seen": 176789568, + "step": 56175 + }, + { + "epoch": 3.5964406888163367, + "grad_norm": 11.876298904418945, + "learning_rate": 1.1072794295646135e-06, + "loss": 0.1922, + "num_input_tokens_seen": 176805696, + "step": 56180 + }, + { + "epoch": 3.5967607707573137, + "grad_norm": 6.563093662261963, + "learning_rate": 1.1068155296473153e-06, + "loss": 0.1178, + "num_input_tokens_seen": 176824064, + "step": 56185 + }, + { + "epoch": 3.5970808526982907, + "grad_norm": 5.915838718414307, + "learning_rate": 1.1063516992983918e-06, + "loss": 0.1523, + "num_input_tokens_seen": 176839040, + "step": 56190 + }, + { + "epoch": 3.5974009346392677, + "grad_norm": 6.513148784637451, + "learning_rate": 1.1058879385410062e-06, + "loss": 0.1325, + "num_input_tokens_seen": 176854720, + "step": 56195 + }, + { + "epoch": 3.5977210165802447, + "grad_norm": 6.98793363571167, + "learning_rate": 1.1054242473983151e-06, + "loss": 0.2025, + "num_input_tokens_seen": 176869376, + "step": 56200 + }, + { + "epoch": 3.5980410985212217, + "grad_norm": 7.184045791625977, + "learning_rate": 1.104960625893473e-06, + "loss": 0.0986, + "num_input_tokens_seen": 176886784, + "step": 56205 + }, + { + "epoch": 3.598361180462198, + "grad_norm": 5.955117702484131, + "learning_rate": 1.1044970740496308e-06, + "loss": 0.1185, + "num_input_tokens_seen": 176901376, + "step": 56210 + }, + { + "epoch": 3.598681262403175, + "grad_norm": 5.262508392333984, + "learning_rate": 1.104033591889936e-06, + "loss": 0.148, + "num_input_tokens_seen": 176916928, + "step": 56215 + }, + { + "epoch": 3.599001344344152, + "grad_norm": 6.019082069396973, + "learning_rate": 1.1035701794375322e-06, + "loss": 0.184, + "num_input_tokens_seen": 176932288, + "step": 56220 + }, + { + "epoch": 3.5993214262851287, + "grad_norm": 7.054994583129883, + "learning_rate": 1.1031068367155598e-06, + "loss": 0.1429, + "num_input_tokens_seen": 176948800, + "step": 56225 + }, + { + "epoch": 3.5996415082261057, + "grad_norm": 64.26366424560547, + "learning_rate": 1.102643563747155e-06, + "loss": 0.2124, + "num_input_tokens_seen": 176963648, + "step": 56230 + }, + { + "epoch": 3.5999615901670827, + "grad_norm": 4.432478904724121, + "learning_rate": 1.1021803605554538e-06, + "loss": 0.1687, + "num_input_tokens_seen": 176979200, + "step": 56235 + }, + { + "epoch": 3.6002816721080597, + "grad_norm": 9.661032676696777, + "learning_rate": 1.1017172271635825e-06, + "loss": 0.1796, + "num_input_tokens_seen": 176994880, + "step": 56240 + }, + { + "epoch": 3.6006017540490367, + "grad_norm": 9.059343338012695, + "learning_rate": 1.1012541635946705e-06, + "loss": 0.1608, + "num_input_tokens_seen": 177009792, + "step": 56245 + }, + { + "epoch": 3.6009218359900137, + "grad_norm": 5.524026393890381, + "learning_rate": 1.10079116987184e-06, + "loss": 0.1098, + "num_input_tokens_seen": 177025024, + "step": 56250 + }, + { + "epoch": 3.6012419179309902, + "grad_norm": 6.672245025634766, + "learning_rate": 1.1003282460182091e-06, + "loss": 0.2529, + "num_input_tokens_seen": 177039872, + "step": 56255 + }, + { + "epoch": 3.6015619998719672, + "grad_norm": 8.333463668823242, + "learning_rate": 1.099865392056897e-06, + "loss": 0.1617, + "num_input_tokens_seen": 177055872, + "step": 56260 + }, + { + "epoch": 3.6018820818129442, + "grad_norm": 6.0383124351501465, + "learning_rate": 1.099402608011012e-06, + "loss": 0.204, + "num_input_tokens_seen": 177072320, + "step": 56265 + }, + { + "epoch": 3.6022021637539208, + "grad_norm": 7.073526382446289, + "learning_rate": 1.0989398939036674e-06, + "loss": 0.1869, + "num_input_tokens_seen": 177088512, + "step": 56270 + }, + { + "epoch": 3.6025222456948978, + "grad_norm": 6.889311790466309, + "learning_rate": 1.0984772497579648e-06, + "loss": 0.2186, + "num_input_tokens_seen": 177103744, + "step": 56275 + }, + { + "epoch": 3.6028423276358748, + "grad_norm": 4.545431613922119, + "learning_rate": 1.0980146755970088e-06, + "loss": 0.1175, + "num_input_tokens_seen": 177118400, + "step": 56280 + }, + { + "epoch": 3.6031624095768517, + "grad_norm": 6.1740946769714355, + "learning_rate": 1.0975521714438975e-06, + "loss": 0.1222, + "num_input_tokens_seen": 177133696, + "step": 56285 + }, + { + "epoch": 3.6034824915178287, + "grad_norm": 7.999080181121826, + "learning_rate": 1.0970897373217258e-06, + "loss": 0.1266, + "num_input_tokens_seen": 177148608, + "step": 56290 + }, + { + "epoch": 3.6038025734588057, + "grad_norm": 9.812166213989258, + "learning_rate": 1.0966273732535846e-06, + "loss": 0.2266, + "num_input_tokens_seen": 177163904, + "step": 56295 + }, + { + "epoch": 3.6041226553997823, + "grad_norm": 59.51697540283203, + "learning_rate": 1.0961650792625618e-06, + "loss": 0.21, + "num_input_tokens_seen": 177179008, + "step": 56300 + }, + { + "epoch": 3.6044427373407593, + "grad_norm": 13.518210411071777, + "learning_rate": 1.0957028553717448e-06, + "loss": 0.2096, + "num_input_tokens_seen": 177195584, + "step": 56305 + }, + { + "epoch": 3.6047628192817363, + "grad_norm": 8.815262794494629, + "learning_rate": 1.0952407016042103e-06, + "loss": 0.1885, + "num_input_tokens_seen": 177211200, + "step": 56310 + }, + { + "epoch": 3.605082901222713, + "grad_norm": 9.909342765808105, + "learning_rate": 1.0947786179830388e-06, + "loss": 0.1208, + "num_input_tokens_seen": 177226624, + "step": 56315 + }, + { + "epoch": 3.60540298316369, + "grad_norm": 6.044792652130127, + "learning_rate": 1.0943166045313039e-06, + "loss": 0.1208, + "num_input_tokens_seen": 177242240, + "step": 56320 + }, + { + "epoch": 3.605723065104667, + "grad_norm": 8.950325965881348, + "learning_rate": 1.0938546612720756e-06, + "loss": 0.1427, + "num_input_tokens_seen": 177258944, + "step": 56325 + }, + { + "epoch": 3.6060431470456438, + "grad_norm": 9.946361541748047, + "learning_rate": 1.0933927882284216e-06, + "loss": 0.1503, + "num_input_tokens_seen": 177274240, + "step": 56330 + }, + { + "epoch": 3.6063632289866208, + "grad_norm": 67.79535675048828, + "learning_rate": 1.0929309854234047e-06, + "loss": 0.1458, + "num_input_tokens_seen": 177289024, + "step": 56335 + }, + { + "epoch": 3.6066833109275973, + "grad_norm": 7.494733810424805, + "learning_rate": 1.0924692528800844e-06, + "loss": 0.1496, + "num_input_tokens_seen": 177303744, + "step": 56340 + }, + { + "epoch": 3.6070033928685743, + "grad_norm": 11.137843132019043, + "learning_rate": 1.0920075906215196e-06, + "loss": 0.1769, + "num_input_tokens_seen": 177319488, + "step": 56345 + }, + { + "epoch": 3.6073234748095513, + "grad_norm": 5.92828369140625, + "learning_rate": 1.0915459986707601e-06, + "loss": 0.1742, + "num_input_tokens_seen": 177334400, + "step": 56350 + }, + { + "epoch": 3.6076435567505283, + "grad_norm": 46.93727493286133, + "learning_rate": 1.0910844770508588e-06, + "loss": 0.2224, + "num_input_tokens_seen": 177350336, + "step": 56355 + }, + { + "epoch": 3.607963638691505, + "grad_norm": 4.242053985595703, + "learning_rate": 1.0906230257848586e-06, + "loss": 0.1737, + "num_input_tokens_seen": 177366784, + "step": 56360 + }, + { + "epoch": 3.608283720632482, + "grad_norm": 8.30048942565918, + "learning_rate": 1.0901616448958036e-06, + "loss": 0.1851, + "num_input_tokens_seen": 177381504, + "step": 56365 + }, + { + "epoch": 3.608603802573459, + "grad_norm": 13.444388389587402, + "learning_rate": 1.0897003344067328e-06, + "loss": 0.23, + "num_input_tokens_seen": 177396864, + "step": 56370 + }, + { + "epoch": 3.608923884514436, + "grad_norm": 4.7966108322143555, + "learning_rate": 1.0892390943406803e-06, + "loss": 0.124, + "num_input_tokens_seen": 177412800, + "step": 56375 + }, + { + "epoch": 3.609243966455413, + "grad_norm": 12.583475112915039, + "learning_rate": 1.0887779247206809e-06, + "loss": 0.1778, + "num_input_tokens_seen": 177428736, + "step": 56380 + }, + { + "epoch": 3.6095640483963893, + "grad_norm": 7.680540561676025, + "learning_rate": 1.0883168255697596e-06, + "loss": 0.114, + "num_input_tokens_seen": 177444544, + "step": 56385 + }, + { + "epoch": 3.6098841303373663, + "grad_norm": 4.795150279998779, + "learning_rate": 1.0878557969109432e-06, + "loss": 0.1327, + "num_input_tokens_seen": 177460224, + "step": 56390 + }, + { + "epoch": 3.6102042122783433, + "grad_norm": 7.39362096786499, + "learning_rate": 1.0873948387672531e-06, + "loss": 0.1233, + "num_input_tokens_seen": 177475776, + "step": 56395 + }, + { + "epoch": 3.6105242942193203, + "grad_norm": 16.939613342285156, + "learning_rate": 1.0869339511617066e-06, + "loss": 0.1733, + "num_input_tokens_seen": 177491520, + "step": 56400 + }, + { + "epoch": 3.610844376160297, + "grad_norm": 7.0870232582092285, + "learning_rate": 1.0864731341173186e-06, + "loss": 0.1546, + "num_input_tokens_seen": 177506048, + "step": 56405 + }, + { + "epoch": 3.611164458101274, + "grad_norm": 10.378623008728027, + "learning_rate": 1.0860123876570991e-06, + "loss": 0.1158, + "num_input_tokens_seen": 177523904, + "step": 56410 + }, + { + "epoch": 3.611484540042251, + "grad_norm": 5.1644978523254395, + "learning_rate": 1.0855517118040556e-06, + "loss": 0.162, + "num_input_tokens_seen": 177539264, + "step": 56415 + }, + { + "epoch": 3.611804621983228, + "grad_norm": 49.593772888183594, + "learning_rate": 1.0850911065811914e-06, + "loss": 0.255, + "num_input_tokens_seen": 177555392, + "step": 56420 + }, + { + "epoch": 3.612124703924205, + "grad_norm": 8.5261869430542, + "learning_rate": 1.0846305720115083e-06, + "loss": 0.1543, + "num_input_tokens_seen": 177570816, + "step": 56425 + }, + { + "epoch": 3.6124447858651814, + "grad_norm": 8.893317222595215, + "learning_rate": 1.0841701081180018e-06, + "loss": 0.136, + "num_input_tokens_seen": 177587136, + "step": 56430 + }, + { + "epoch": 3.6127648678061584, + "grad_norm": 14.10189151763916, + "learning_rate": 1.0837097149236655e-06, + "loss": 0.2711, + "num_input_tokens_seen": 177602752, + "step": 56435 + }, + { + "epoch": 3.6130849497471353, + "grad_norm": 28.21265411376953, + "learning_rate": 1.0832493924514886e-06, + "loss": 0.2058, + "num_input_tokens_seen": 177619712, + "step": 56440 + }, + { + "epoch": 3.613405031688112, + "grad_norm": 54.49020767211914, + "learning_rate": 1.0827891407244578e-06, + "loss": 0.183, + "num_input_tokens_seen": 177635008, + "step": 56445 + }, + { + "epoch": 3.613725113629089, + "grad_norm": 7.170092582702637, + "learning_rate": 1.082328959765555e-06, + "loss": 0.1528, + "num_input_tokens_seen": 177650688, + "step": 56450 + }, + { + "epoch": 3.614045195570066, + "grad_norm": 6.909509658813477, + "learning_rate": 1.08186884959776e-06, + "loss": 0.1042, + "num_input_tokens_seen": 177665792, + "step": 56455 + }, + { + "epoch": 3.614365277511043, + "grad_norm": 5.740866184234619, + "learning_rate": 1.0814088102440464e-06, + "loss": 0.14, + "num_input_tokens_seen": 177681792, + "step": 56460 + }, + { + "epoch": 3.61468535945202, + "grad_norm": 5.829693794250488, + "learning_rate": 1.08094884172739e-06, + "loss": 0.1633, + "num_input_tokens_seen": 177696128, + "step": 56465 + }, + { + "epoch": 3.615005441392997, + "grad_norm": 7.156543731689453, + "learning_rate": 1.0804889440707548e-06, + "loss": 0.1264, + "num_input_tokens_seen": 177712960, + "step": 56470 + }, + { + "epoch": 3.6153255233339734, + "grad_norm": 7.742795467376709, + "learning_rate": 1.0800291172971085e-06, + "loss": 0.1892, + "num_input_tokens_seen": 177728128, + "step": 56475 + }, + { + "epoch": 3.6156456052749504, + "grad_norm": 4.450780868530273, + "learning_rate": 1.0795693614294122e-06, + "loss": 0.1293, + "num_input_tokens_seen": 177744832, + "step": 56480 + }, + { + "epoch": 3.6159656872159274, + "grad_norm": 3.8088700771331787, + "learning_rate": 1.079109676490622e-06, + "loss": 0.1196, + "num_input_tokens_seen": 177760512, + "step": 56485 + }, + { + "epoch": 3.616285769156904, + "grad_norm": 21.40488624572754, + "learning_rate": 1.0786500625036952e-06, + "loss": 0.136, + "num_input_tokens_seen": 177776896, + "step": 56490 + }, + { + "epoch": 3.616605851097881, + "grad_norm": 26.229644775390625, + "learning_rate": 1.0781905194915793e-06, + "loss": 0.2233, + "num_input_tokens_seen": 177792256, + "step": 56495 + }, + { + "epoch": 3.616925933038858, + "grad_norm": 42.455169677734375, + "learning_rate": 1.0777310474772242e-06, + "loss": 0.1567, + "num_input_tokens_seen": 177807616, + "step": 56500 + }, + { + "epoch": 3.617246014979835, + "grad_norm": 4.53291654586792, + "learning_rate": 1.0772716464835708e-06, + "loss": 0.1297, + "num_input_tokens_seen": 177822848, + "step": 56505 + }, + { + "epoch": 3.617566096920812, + "grad_norm": 3.8168232440948486, + "learning_rate": 1.0768123165335615e-06, + "loss": 0.177, + "num_input_tokens_seen": 177838656, + "step": 56510 + }, + { + "epoch": 3.617886178861789, + "grad_norm": 4.978646278381348, + "learning_rate": 1.076353057650132e-06, + "loss": 0.1451, + "num_input_tokens_seen": 177855744, + "step": 56515 + }, + { + "epoch": 3.6182062608027654, + "grad_norm": 4.326525688171387, + "learning_rate": 1.0758938698562152e-06, + "loss": 0.1705, + "num_input_tokens_seen": 177870720, + "step": 56520 + }, + { + "epoch": 3.6185263427437424, + "grad_norm": 79.56486511230469, + "learning_rate": 1.0754347531747406e-06, + "loss": 0.2157, + "num_input_tokens_seen": 177886144, + "step": 56525 + }, + { + "epoch": 3.6188464246847194, + "grad_norm": 12.456022262573242, + "learning_rate": 1.0749757076286343e-06, + "loss": 0.2933, + "num_input_tokens_seen": 177902208, + "step": 56530 + }, + { + "epoch": 3.619166506625696, + "grad_norm": 36.655479431152344, + "learning_rate": 1.0745167332408175e-06, + "loss": 0.1728, + "num_input_tokens_seen": 177918272, + "step": 56535 + }, + { + "epoch": 3.619486588566673, + "grad_norm": 0.40591585636138916, + "learning_rate": 1.0740578300342107e-06, + "loss": 0.0814, + "num_input_tokens_seen": 177934592, + "step": 56540 + }, + { + "epoch": 3.61980667050765, + "grad_norm": 15.168286323547363, + "learning_rate": 1.0735989980317287e-06, + "loss": 0.1128, + "num_input_tokens_seen": 177950528, + "step": 56545 + }, + { + "epoch": 3.620126752448627, + "grad_norm": 5.198947429656982, + "learning_rate": 1.0731402372562826e-06, + "loss": 0.1652, + "num_input_tokens_seen": 177965696, + "step": 56550 + }, + { + "epoch": 3.620446834389604, + "grad_norm": 11.826290130615234, + "learning_rate": 1.072681547730781e-06, + "loss": 0.1763, + "num_input_tokens_seen": 177981056, + "step": 56555 + }, + { + "epoch": 3.620766916330581, + "grad_norm": 6.345130920410156, + "learning_rate": 1.0722229294781284e-06, + "loss": 0.1637, + "num_input_tokens_seen": 177997696, + "step": 56560 + }, + { + "epoch": 3.6210869982715574, + "grad_norm": 22.610910415649414, + "learning_rate": 1.0717643825212253e-06, + "loss": 0.1245, + "num_input_tokens_seen": 178013760, + "step": 56565 + }, + { + "epoch": 3.6214070802125344, + "grad_norm": 6.8308796882629395, + "learning_rate": 1.071305906882969e-06, + "loss": 0.1651, + "num_input_tokens_seen": 178029952, + "step": 56570 + }, + { + "epoch": 3.6217271621535114, + "grad_norm": 12.732839584350586, + "learning_rate": 1.0708475025862558e-06, + "loss": 0.1644, + "num_input_tokens_seen": 178046976, + "step": 56575 + }, + { + "epoch": 3.622047244094488, + "grad_norm": 5.264812469482422, + "learning_rate": 1.070389169653972e-06, + "loss": 0.0867, + "num_input_tokens_seen": 178063040, + "step": 56580 + }, + { + "epoch": 3.622367326035465, + "grad_norm": 13.393782615661621, + "learning_rate": 1.0699309081090085e-06, + "loss": 0.1882, + "num_input_tokens_seen": 178078912, + "step": 56585 + }, + { + "epoch": 3.622687407976442, + "grad_norm": 5.568122863769531, + "learning_rate": 1.0694727179742445e-06, + "loss": 0.1782, + "num_input_tokens_seen": 178093888, + "step": 56590 + }, + { + "epoch": 3.623007489917419, + "grad_norm": 6.223817825317383, + "learning_rate": 1.0690145992725625e-06, + "loss": 0.1866, + "num_input_tokens_seen": 178108992, + "step": 56595 + }, + { + "epoch": 3.623327571858396, + "grad_norm": 5.858948707580566, + "learning_rate": 1.068556552026838e-06, + "loss": 0.2354, + "num_input_tokens_seen": 178124352, + "step": 56600 + }, + { + "epoch": 3.6236476537993725, + "grad_norm": 11.162989616394043, + "learning_rate": 1.0680985762599418e-06, + "loss": 0.1695, + "num_input_tokens_seen": 178141568, + "step": 56605 + }, + { + "epoch": 3.6239677357403495, + "grad_norm": 16.831180572509766, + "learning_rate": 1.0676406719947466e-06, + "loss": 0.2273, + "num_input_tokens_seen": 178158720, + "step": 56610 + }, + { + "epoch": 3.6242878176813265, + "grad_norm": 5.402773380279541, + "learning_rate": 1.067182839254113e-06, + "loss": 0.1764, + "num_input_tokens_seen": 178175424, + "step": 56615 + }, + { + "epoch": 3.6246078996223035, + "grad_norm": 7.3056817054748535, + "learning_rate": 1.0667250780609065e-06, + "loss": 0.2011, + "num_input_tokens_seen": 178192000, + "step": 56620 + }, + { + "epoch": 3.62492798156328, + "grad_norm": 45.056243896484375, + "learning_rate": 1.0662673884379837e-06, + "loss": 0.2361, + "num_input_tokens_seen": 178206528, + "step": 56625 + }, + { + "epoch": 3.625248063504257, + "grad_norm": 4.484217643737793, + "learning_rate": 1.0658097704081993e-06, + "loss": 0.2, + "num_input_tokens_seen": 178222528, + "step": 56630 + }, + { + "epoch": 3.625568145445234, + "grad_norm": 9.775680541992188, + "learning_rate": 1.0653522239944048e-06, + "loss": 0.114, + "num_input_tokens_seen": 178237696, + "step": 56635 + }, + { + "epoch": 3.625888227386211, + "grad_norm": 7.958801746368408, + "learning_rate": 1.0648947492194473e-06, + "loss": 0.1414, + "num_input_tokens_seen": 178253376, + "step": 56640 + }, + { + "epoch": 3.626208309327188, + "grad_norm": 5.47607421875, + "learning_rate": 1.064437346106171e-06, + "loss": 0.138, + "num_input_tokens_seen": 178274368, + "step": 56645 + }, + { + "epoch": 3.6265283912681645, + "grad_norm": 4.102622032165527, + "learning_rate": 1.063980014677415e-06, + "loss": 0.1887, + "num_input_tokens_seen": 178289664, + "step": 56650 + }, + { + "epoch": 3.6268484732091415, + "grad_norm": 9.416786193847656, + "learning_rate": 1.0635227549560182e-06, + "loss": 0.1833, + "num_input_tokens_seen": 178306816, + "step": 56655 + }, + { + "epoch": 3.6271685551501185, + "grad_norm": 4.53117561340332, + "learning_rate": 1.0630655669648123e-06, + "loss": 0.1544, + "num_input_tokens_seen": 178322688, + "step": 56660 + }, + { + "epoch": 3.6274886370910955, + "grad_norm": 10.541534423828125, + "learning_rate": 1.0626084507266276e-06, + "loss": 0.2436, + "num_input_tokens_seen": 178338752, + "step": 56665 + }, + { + "epoch": 3.627808719032072, + "grad_norm": 6.01800012588501, + "learning_rate": 1.0621514062642898e-06, + "loss": 0.1756, + "num_input_tokens_seen": 178354496, + "step": 56670 + }, + { + "epoch": 3.628128800973049, + "grad_norm": 86.18816375732422, + "learning_rate": 1.061694433600621e-06, + "loss": 0.1773, + "num_input_tokens_seen": 178369984, + "step": 56675 + }, + { + "epoch": 3.628448882914026, + "grad_norm": 9.752937316894531, + "learning_rate": 1.0612375327584406e-06, + "loss": 0.197, + "num_input_tokens_seen": 178387392, + "step": 56680 + }, + { + "epoch": 3.628768964855003, + "grad_norm": 7.683132171630859, + "learning_rate": 1.0607807037605637e-06, + "loss": 0.1712, + "num_input_tokens_seen": 178403072, + "step": 56685 + }, + { + "epoch": 3.62908904679598, + "grad_norm": 12.467867851257324, + "learning_rate": 1.0603239466298004e-06, + "loss": 0.1992, + "num_input_tokens_seen": 178418240, + "step": 56690 + }, + { + "epoch": 3.6294091287369565, + "grad_norm": 5.007548809051514, + "learning_rate": 1.0598672613889623e-06, + "loss": 0.1193, + "num_input_tokens_seen": 178433280, + "step": 56695 + }, + { + "epoch": 3.6297292106779335, + "grad_norm": 6.367004871368408, + "learning_rate": 1.05941064806085e-06, + "loss": 0.1284, + "num_input_tokens_seen": 178448512, + "step": 56700 + }, + { + "epoch": 3.6300492926189105, + "grad_norm": 28.44603729248047, + "learning_rate": 1.0589541066682674e-06, + "loss": 0.2032, + "num_input_tokens_seen": 178464320, + "step": 56705 + }, + { + "epoch": 3.630369374559887, + "grad_norm": 6.198697090148926, + "learning_rate": 1.05849763723401e-06, + "loss": 0.1425, + "num_input_tokens_seen": 178479488, + "step": 56710 + }, + { + "epoch": 3.630689456500864, + "grad_norm": 11.972555160522461, + "learning_rate": 1.0580412397808715e-06, + "loss": 0.1901, + "num_input_tokens_seen": 178493632, + "step": 56715 + }, + { + "epoch": 3.631009538441841, + "grad_norm": 24.540124893188477, + "learning_rate": 1.0575849143316445e-06, + "loss": 0.176, + "num_input_tokens_seen": 178508608, + "step": 56720 + }, + { + "epoch": 3.631329620382818, + "grad_norm": 46.157318115234375, + "learning_rate": 1.0571286609091116e-06, + "loss": 0.1989, + "num_input_tokens_seen": 178523392, + "step": 56725 + }, + { + "epoch": 3.631649702323795, + "grad_norm": 9.977128028869629, + "learning_rate": 1.0566724795360594e-06, + "loss": 0.2151, + "num_input_tokens_seen": 178540096, + "step": 56730 + }, + { + "epoch": 3.631969784264772, + "grad_norm": 6.495114803314209, + "learning_rate": 1.056216370235264e-06, + "loss": 0.203, + "num_input_tokens_seen": 178555904, + "step": 56735 + }, + { + "epoch": 3.6322898662057486, + "grad_norm": 20.234315872192383, + "learning_rate": 1.0557603330295035e-06, + "loss": 0.2156, + "num_input_tokens_seen": 178571200, + "step": 56740 + }, + { + "epoch": 3.6326099481467256, + "grad_norm": 9.4321928024292, + "learning_rate": 1.0553043679415493e-06, + "loss": 0.2445, + "num_input_tokens_seen": 178589696, + "step": 56745 + }, + { + "epoch": 3.6329300300877025, + "grad_norm": 7.995316028594971, + "learning_rate": 1.05484847499417e-06, + "loss": 0.1446, + "num_input_tokens_seen": 178605376, + "step": 56750 + }, + { + "epoch": 3.633250112028679, + "grad_norm": 6.564454078674316, + "learning_rate": 1.0543926542101299e-06, + "loss": 0.1405, + "num_input_tokens_seen": 178621440, + "step": 56755 + }, + { + "epoch": 3.633570193969656, + "grad_norm": 8.881266593933105, + "learning_rate": 1.0539369056121909e-06, + "loss": 0.1387, + "num_input_tokens_seen": 178636864, + "step": 56760 + }, + { + "epoch": 3.633890275910633, + "grad_norm": 7.420421600341797, + "learning_rate": 1.0534812292231097e-06, + "loss": 0.1275, + "num_input_tokens_seen": 178651328, + "step": 56765 + }, + { + "epoch": 3.63421035785161, + "grad_norm": 8.05634880065918, + "learning_rate": 1.053025625065642e-06, + "loss": 0.1511, + "num_input_tokens_seen": 178666624, + "step": 56770 + }, + { + "epoch": 3.634530439792587, + "grad_norm": 5.215527534484863, + "learning_rate": 1.052570093162538e-06, + "loss": 0.1657, + "num_input_tokens_seen": 178681536, + "step": 56775 + }, + { + "epoch": 3.634850521733564, + "grad_norm": 10.101435661315918, + "learning_rate": 1.0521146335365438e-06, + "loss": 0.1717, + "num_input_tokens_seen": 178696512, + "step": 56780 + }, + { + "epoch": 3.6351706036745406, + "grad_norm": 5.791764736175537, + "learning_rate": 1.051659246210403e-06, + "loss": 0.1864, + "num_input_tokens_seen": 178711680, + "step": 56785 + }, + { + "epoch": 3.6354906856155176, + "grad_norm": 7.248486518859863, + "learning_rate": 1.0512039312068556e-06, + "loss": 0.1241, + "num_input_tokens_seen": 178728832, + "step": 56790 + }, + { + "epoch": 3.6358107675564946, + "grad_norm": 10.907119750976562, + "learning_rate": 1.0507486885486374e-06, + "loss": 0.2808, + "num_input_tokens_seen": 178743872, + "step": 56795 + }, + { + "epoch": 3.636130849497471, + "grad_norm": 10.380021095275879, + "learning_rate": 1.0502935182584794e-06, + "loss": 0.1577, + "num_input_tokens_seen": 178760192, + "step": 56800 + }, + { + "epoch": 3.636450931438448, + "grad_norm": 6.9404296875, + "learning_rate": 1.0498384203591141e-06, + "loss": 0.0967, + "num_input_tokens_seen": 178774784, + "step": 56805 + }, + { + "epoch": 3.636771013379425, + "grad_norm": 7.360585689544678, + "learning_rate": 1.0493833948732626e-06, + "loss": 0.1555, + "num_input_tokens_seen": 178790208, + "step": 56810 + }, + { + "epoch": 3.637091095320402, + "grad_norm": 6.903907775878906, + "learning_rate": 1.0489284418236504e-06, + "loss": 0.1846, + "num_input_tokens_seen": 178806016, + "step": 56815 + }, + { + "epoch": 3.637411177261379, + "grad_norm": 7.932101726531982, + "learning_rate": 1.0484735612329914e-06, + "loss": 0.1632, + "num_input_tokens_seen": 178821760, + "step": 56820 + }, + { + "epoch": 3.637731259202356, + "grad_norm": 7.688581466674805, + "learning_rate": 1.0480187531240028e-06, + "loss": 0.1746, + "num_input_tokens_seen": 178836160, + "step": 56825 + }, + { + "epoch": 3.6380513411433326, + "grad_norm": 20.46732521057129, + "learning_rate": 1.0475640175193946e-06, + "loss": 0.2226, + "num_input_tokens_seen": 178850880, + "step": 56830 + }, + { + "epoch": 3.6383714230843096, + "grad_norm": 4.909759521484375, + "learning_rate": 1.047109354441873e-06, + "loss": 0.1764, + "num_input_tokens_seen": 178866816, + "step": 56835 + }, + { + "epoch": 3.6386915050252866, + "grad_norm": 9.446880340576172, + "learning_rate": 1.0466547639141444e-06, + "loss": 0.1911, + "num_input_tokens_seen": 178882752, + "step": 56840 + }, + { + "epoch": 3.639011586966263, + "grad_norm": 10.348388671875, + "learning_rate": 1.046200245958905e-06, + "loss": 0.1797, + "num_input_tokens_seen": 178897664, + "step": 56845 + }, + { + "epoch": 3.63933166890724, + "grad_norm": 3.1736907958984375, + "learning_rate": 1.0457458005988533e-06, + "loss": 0.1386, + "num_input_tokens_seen": 178915072, + "step": 56850 + }, + { + "epoch": 3.639651750848217, + "grad_norm": 8.010270118713379, + "learning_rate": 1.0452914278566816e-06, + "loss": 0.1424, + "num_input_tokens_seen": 178930368, + "step": 56855 + }, + { + "epoch": 3.639971832789194, + "grad_norm": 6.551013469696045, + "learning_rate": 1.0448371277550787e-06, + "loss": 0.1593, + "num_input_tokens_seen": 178945984, + "step": 56860 + }, + { + "epoch": 3.640291914730171, + "grad_norm": 8.83361530303955, + "learning_rate": 1.0443829003167299e-06, + "loss": 0.1889, + "num_input_tokens_seen": 178965632, + "step": 56865 + }, + { + "epoch": 3.6406119966711477, + "grad_norm": 7.95425271987915, + "learning_rate": 1.043928745564317e-06, + "loss": 0.199, + "num_input_tokens_seen": 178980864, + "step": 56870 + }, + { + "epoch": 3.6409320786121246, + "grad_norm": 6.003549098968506, + "learning_rate": 1.0434746635205182e-06, + "loss": 0.1782, + "num_input_tokens_seen": 178996864, + "step": 56875 + }, + { + "epoch": 3.6412521605531016, + "grad_norm": 4.501711845397949, + "learning_rate": 1.043020654208008e-06, + "loss": 0.0972, + "num_input_tokens_seen": 179012288, + "step": 56880 + }, + { + "epoch": 3.6415722424940786, + "grad_norm": 5.475512981414795, + "learning_rate": 1.042566717649456e-06, + "loss": 0.1872, + "num_input_tokens_seen": 179028224, + "step": 56885 + }, + { + "epoch": 3.641892324435055, + "grad_norm": 7.269951343536377, + "learning_rate": 1.0421128538675319e-06, + "loss": 0.1648, + "num_input_tokens_seen": 179044992, + "step": 56890 + }, + { + "epoch": 3.642212406376032, + "grad_norm": 7.469634532928467, + "learning_rate": 1.0416590628848977e-06, + "loss": 0.1396, + "num_input_tokens_seen": 179060032, + "step": 56895 + }, + { + "epoch": 3.642532488317009, + "grad_norm": 6.794495582580566, + "learning_rate": 1.041205344724214e-06, + "loss": 0.2087, + "num_input_tokens_seen": 179076160, + "step": 56900 + }, + { + "epoch": 3.642852570257986, + "grad_norm": 10.339655876159668, + "learning_rate": 1.0407516994081365e-06, + "loss": 0.2219, + "num_input_tokens_seen": 179091584, + "step": 56905 + }, + { + "epoch": 3.643172652198963, + "grad_norm": 5.531235218048096, + "learning_rate": 1.0402981269593184e-06, + "loss": 0.1018, + "num_input_tokens_seen": 179107520, + "step": 56910 + }, + { + "epoch": 3.6434927341399397, + "grad_norm": 10.75761890411377, + "learning_rate": 1.0398446274004083e-06, + "loss": 0.1515, + "num_input_tokens_seen": 179123712, + "step": 56915 + }, + { + "epoch": 3.6438128160809167, + "grad_norm": 8.059671401977539, + "learning_rate": 1.0393912007540514e-06, + "loss": 0.1506, + "num_input_tokens_seen": 179138688, + "step": 56920 + }, + { + "epoch": 3.6441328980218937, + "grad_norm": 8.858016014099121, + "learning_rate": 1.0389378470428913e-06, + "loss": 0.1346, + "num_input_tokens_seen": 179154368, + "step": 56925 + }, + { + "epoch": 3.6444529799628707, + "grad_norm": 18.25931739807129, + "learning_rate": 1.0384845662895629e-06, + "loss": 0.1626, + "num_input_tokens_seen": 179169728, + "step": 56930 + }, + { + "epoch": 3.644773061903847, + "grad_norm": 5.752455711364746, + "learning_rate": 1.0380313585167037e-06, + "loss": 0.1644, + "num_input_tokens_seen": 179185536, + "step": 56935 + }, + { + "epoch": 3.645093143844824, + "grad_norm": 7.700901031494141, + "learning_rate": 1.0375782237469432e-06, + "loss": 0.184, + "num_input_tokens_seen": 179202048, + "step": 56940 + }, + { + "epoch": 3.645413225785801, + "grad_norm": 8.418062210083008, + "learning_rate": 1.0371251620029076e-06, + "loss": 0.1639, + "num_input_tokens_seen": 179217920, + "step": 56945 + }, + { + "epoch": 3.645733307726778, + "grad_norm": 10.121163368225098, + "learning_rate": 1.0366721733072239e-06, + "loss": 0.2135, + "num_input_tokens_seen": 179234752, + "step": 56950 + }, + { + "epoch": 3.646053389667755, + "grad_norm": 2.9682793617248535, + "learning_rate": 1.0362192576825072e-06, + "loss": 0.1203, + "num_input_tokens_seen": 179251264, + "step": 56955 + }, + { + "epoch": 3.6463734716087317, + "grad_norm": 9.139573097229004, + "learning_rate": 1.0357664151513785e-06, + "loss": 0.1495, + "num_input_tokens_seen": 179267712, + "step": 56960 + }, + { + "epoch": 3.6466935535497087, + "grad_norm": 3.9945273399353027, + "learning_rate": 1.035313645736446e-06, + "loss": 0.1079, + "num_input_tokens_seen": 179283072, + "step": 56965 + }, + { + "epoch": 3.6470136354906857, + "grad_norm": 7.772240161895752, + "learning_rate": 1.0348609494603221e-06, + "loss": 0.1446, + "num_input_tokens_seen": 179298560, + "step": 56970 + }, + { + "epoch": 3.6473337174316622, + "grad_norm": 24.20061683654785, + "learning_rate": 1.0344083263456103e-06, + "loss": 0.203, + "num_input_tokens_seen": 179313152, + "step": 56975 + }, + { + "epoch": 3.6476537993726392, + "grad_norm": 6.173045635223389, + "learning_rate": 1.033955776414913e-06, + "loss": 0.1554, + "num_input_tokens_seen": 179329024, + "step": 56980 + }, + { + "epoch": 3.647973881313616, + "grad_norm": 9.71008014678955, + "learning_rate": 1.0335032996908275e-06, + "loss": 0.1955, + "num_input_tokens_seen": 179344640, + "step": 56985 + }, + { + "epoch": 3.648293963254593, + "grad_norm": 5.530416011810303, + "learning_rate": 1.033050896195949e-06, + "loss": 0.1811, + "num_input_tokens_seen": 179361216, + "step": 56990 + }, + { + "epoch": 3.64861404519557, + "grad_norm": 6.97974967956543, + "learning_rate": 1.0325985659528664e-06, + "loss": 0.1391, + "num_input_tokens_seen": 179376128, + "step": 56995 + }, + { + "epoch": 3.648934127136547, + "grad_norm": 11.298145294189453, + "learning_rate": 1.0321463089841692e-06, + "loss": 0.1746, + "num_input_tokens_seen": 179392064, + "step": 57000 + }, + { + "epoch": 3.6492542090775237, + "grad_norm": 7.380802154541016, + "learning_rate": 1.0316941253124393e-06, + "loss": 0.1963, + "num_input_tokens_seen": 179407616, + "step": 57005 + }, + { + "epoch": 3.6495742910185007, + "grad_norm": 5.051912784576416, + "learning_rate": 1.0312420149602568e-06, + "loss": 0.1602, + "num_input_tokens_seen": 179423872, + "step": 57010 + }, + { + "epoch": 3.6498943729594777, + "grad_norm": 9.80338191986084, + "learning_rate": 1.0307899779501976e-06, + "loss": 0.1398, + "num_input_tokens_seen": 179438656, + "step": 57015 + }, + { + "epoch": 3.6502144549004543, + "grad_norm": 8.5582857131958, + "learning_rate": 1.030338014304834e-06, + "loss": 0.1756, + "num_input_tokens_seen": 179453184, + "step": 57020 + }, + { + "epoch": 3.6505345368414313, + "grad_norm": 10.474893569946289, + "learning_rate": 1.0298861240467348e-06, + "loss": 0.1713, + "num_input_tokens_seen": 179469120, + "step": 57025 + }, + { + "epoch": 3.6508546187824082, + "grad_norm": 6.7106475830078125, + "learning_rate": 1.0294343071984641e-06, + "loss": 0.1347, + "num_input_tokens_seen": 179485952, + "step": 57030 + }, + { + "epoch": 3.6511747007233852, + "grad_norm": 5.739043235778809, + "learning_rate": 1.028982563782586e-06, + "loss": 0.1821, + "num_input_tokens_seen": 179501696, + "step": 57035 + }, + { + "epoch": 3.6514947826643622, + "grad_norm": 9.675146102905273, + "learning_rate": 1.0285308938216545e-06, + "loss": 0.1825, + "num_input_tokens_seen": 179517568, + "step": 57040 + }, + { + "epoch": 3.651814864605339, + "grad_norm": 7.474940776824951, + "learning_rate": 1.0280792973382276e-06, + "loss": 0.2007, + "num_input_tokens_seen": 179534912, + "step": 57045 + }, + { + "epoch": 3.6521349465463158, + "grad_norm": 20.961973190307617, + "learning_rate": 1.0276277743548518e-06, + "loss": 0.1787, + "num_input_tokens_seen": 179550656, + "step": 57050 + }, + { + "epoch": 3.6524550284872928, + "grad_norm": 9.773758888244629, + "learning_rate": 1.0271763248940763e-06, + "loss": 0.2023, + "num_input_tokens_seen": 179566912, + "step": 57055 + }, + { + "epoch": 3.6527751104282697, + "grad_norm": 10.347891807556152, + "learning_rate": 1.0267249489784437e-06, + "loss": 0.2051, + "num_input_tokens_seen": 179583040, + "step": 57060 + }, + { + "epoch": 3.6530951923692463, + "grad_norm": 10.040665626525879, + "learning_rate": 1.0262736466304923e-06, + "loss": 0.2087, + "num_input_tokens_seen": 179599424, + "step": 57065 + }, + { + "epoch": 3.6534152743102233, + "grad_norm": 7.468776226043701, + "learning_rate": 1.0258224178727602e-06, + "loss": 0.1832, + "num_input_tokens_seen": 179614272, + "step": 57070 + }, + { + "epoch": 3.6537353562512003, + "grad_norm": 7.268024444580078, + "learning_rate": 1.0253712627277761e-06, + "loss": 0.2102, + "num_input_tokens_seen": 179630144, + "step": 57075 + }, + { + "epoch": 3.6540554381921773, + "grad_norm": 2.903226137161255, + "learning_rate": 1.0249201812180711e-06, + "loss": 0.1381, + "num_input_tokens_seen": 179647744, + "step": 57080 + }, + { + "epoch": 3.6543755201331543, + "grad_norm": 9.446502685546875, + "learning_rate": 1.0244691733661687e-06, + "loss": 0.2123, + "num_input_tokens_seen": 179663744, + "step": 57085 + }, + { + "epoch": 3.6546956020741312, + "grad_norm": 2.953901529312134, + "learning_rate": 1.0240182391945902e-06, + "loss": 0.1327, + "num_input_tokens_seen": 179679360, + "step": 57090 + }, + { + "epoch": 3.655015684015108, + "grad_norm": 6.327146053314209, + "learning_rate": 1.0235673787258527e-06, + "loss": 0.2272, + "num_input_tokens_seen": 179694272, + "step": 57095 + }, + { + "epoch": 3.655335765956085, + "grad_norm": 12.916123390197754, + "learning_rate": 1.0231165919824699e-06, + "loss": 0.2296, + "num_input_tokens_seen": 179709632, + "step": 57100 + }, + { + "epoch": 3.6556558478970618, + "grad_norm": 8.329012870788574, + "learning_rate": 1.0226658789869514e-06, + "loss": 0.2083, + "num_input_tokens_seen": 179724736, + "step": 57105 + }, + { + "epoch": 3.6559759298380383, + "grad_norm": 6.850568771362305, + "learning_rate": 1.0222152397618037e-06, + "loss": 0.1711, + "num_input_tokens_seen": 179741440, + "step": 57110 + }, + { + "epoch": 3.6562960117790153, + "grad_norm": 7.766393184661865, + "learning_rate": 1.021764674329529e-06, + "loss": 0.1709, + "num_input_tokens_seen": 179756992, + "step": 57115 + }, + { + "epoch": 3.6566160937199923, + "grad_norm": 20.72284698486328, + "learning_rate": 1.021314182712627e-06, + "loss": 0.1895, + "num_input_tokens_seen": 179771968, + "step": 57120 + }, + { + "epoch": 3.6569361756609693, + "grad_norm": 5.7537384033203125, + "learning_rate": 1.0208637649335928e-06, + "loss": 0.1812, + "num_input_tokens_seen": 179788096, + "step": 57125 + }, + { + "epoch": 3.6572562576019463, + "grad_norm": 17.517906188964844, + "learning_rate": 1.0204134210149178e-06, + "loss": 0.1849, + "num_input_tokens_seen": 179804864, + "step": 57130 + }, + { + "epoch": 3.657576339542923, + "grad_norm": 9.434067726135254, + "learning_rate": 1.0199631509790893e-06, + "loss": 0.1623, + "num_input_tokens_seen": 179820544, + "step": 57135 + }, + { + "epoch": 3.6578964214839, + "grad_norm": 6.318309783935547, + "learning_rate": 1.019512954848592e-06, + "loss": 0.119, + "num_input_tokens_seen": 179836544, + "step": 57140 + }, + { + "epoch": 3.658216503424877, + "grad_norm": 8.289783477783203, + "learning_rate": 1.0190628326459062e-06, + "loss": 0.1459, + "num_input_tokens_seen": 179851648, + "step": 57145 + }, + { + "epoch": 3.658536585365854, + "grad_norm": 7.803210258483887, + "learning_rate": 1.0186127843935079e-06, + "loss": 0.1805, + "num_input_tokens_seen": 179867072, + "step": 57150 + }, + { + "epoch": 3.6588566673068303, + "grad_norm": 4.523586750030518, + "learning_rate": 1.0181628101138724e-06, + "loss": 0.1724, + "num_input_tokens_seen": 179883008, + "step": 57155 + }, + { + "epoch": 3.6591767492478073, + "grad_norm": 8.79435920715332, + "learning_rate": 1.0177129098294658e-06, + "loss": 0.1869, + "num_input_tokens_seen": 179898560, + "step": 57160 + }, + { + "epoch": 3.6594968311887843, + "grad_norm": 6.774174690246582, + "learning_rate": 1.0172630835627564e-06, + "loss": 0.1669, + "num_input_tokens_seen": 179913856, + "step": 57165 + }, + { + "epoch": 3.6598169131297613, + "grad_norm": 6.6404218673706055, + "learning_rate": 1.0168133313362053e-06, + "loss": 0.1365, + "num_input_tokens_seen": 179931008, + "step": 57170 + }, + { + "epoch": 3.6601369950707383, + "grad_norm": 11.976490020751953, + "learning_rate": 1.01636365317227e-06, + "loss": 0.1969, + "num_input_tokens_seen": 179946240, + "step": 57175 + }, + { + "epoch": 3.660457077011715, + "grad_norm": 9.686668395996094, + "learning_rate": 1.0159140490934077e-06, + "loss": 0.1953, + "num_input_tokens_seen": 179961728, + "step": 57180 + }, + { + "epoch": 3.660777158952692, + "grad_norm": 3.706397533416748, + "learning_rate": 1.0154645191220655e-06, + "loss": 0.1594, + "num_input_tokens_seen": 179976576, + "step": 57185 + }, + { + "epoch": 3.661097240893669, + "grad_norm": 6.8864593505859375, + "learning_rate": 1.0150150632806944e-06, + "loss": 0.1153, + "num_input_tokens_seen": 179992384, + "step": 57190 + }, + { + "epoch": 3.661417322834646, + "grad_norm": 35.22498321533203, + "learning_rate": 1.0145656815917344e-06, + "loss": 0.144, + "num_input_tokens_seen": 180009600, + "step": 57195 + }, + { + "epoch": 3.6617374047756224, + "grad_norm": 6.451000213623047, + "learning_rate": 1.0141163740776277e-06, + "loss": 0.1512, + "num_input_tokens_seen": 180025152, + "step": 57200 + }, + { + "epoch": 3.6620574867165994, + "grad_norm": 6.010853290557861, + "learning_rate": 1.0136671407608096e-06, + "loss": 0.1425, + "num_input_tokens_seen": 180040320, + "step": 57205 + }, + { + "epoch": 3.6623775686575764, + "grad_norm": 8.376956939697266, + "learning_rate": 1.0132179816637126e-06, + "loss": 0.117, + "num_input_tokens_seen": 180058112, + "step": 57210 + }, + { + "epoch": 3.6626976505985533, + "grad_norm": 9.790850639343262, + "learning_rate": 1.0127688968087647e-06, + "loss": 0.1732, + "num_input_tokens_seen": 180072640, + "step": 57215 + }, + { + "epoch": 3.6630177325395303, + "grad_norm": 11.479248046875, + "learning_rate": 1.012319886218392e-06, + "loss": 0.2144, + "num_input_tokens_seen": 180087424, + "step": 57220 + }, + { + "epoch": 3.663337814480507, + "grad_norm": 9.41163158416748, + "learning_rate": 1.0118709499150139e-06, + "loss": 0.1504, + "num_input_tokens_seen": 180106176, + "step": 57225 + }, + { + "epoch": 3.663657896421484, + "grad_norm": 8.101212501525879, + "learning_rate": 1.0114220879210513e-06, + "loss": 0.1149, + "num_input_tokens_seen": 180121408, + "step": 57230 + }, + { + "epoch": 3.663977978362461, + "grad_norm": 1.7943185567855835, + "learning_rate": 1.0109733002589137e-06, + "loss": 0.1256, + "num_input_tokens_seen": 180137024, + "step": 57235 + }, + { + "epoch": 3.6642980603034374, + "grad_norm": 5.614006042480469, + "learning_rate": 1.0105245869510146e-06, + "loss": 0.1808, + "num_input_tokens_seen": 180153152, + "step": 57240 + }, + { + "epoch": 3.6646181422444144, + "grad_norm": 6.639805316925049, + "learning_rate": 1.0100759480197592e-06, + "loss": 0.1366, + "num_input_tokens_seen": 180168320, + "step": 57245 + }, + { + "epoch": 3.6649382241853914, + "grad_norm": 4.4542317390441895, + "learning_rate": 1.0096273834875502e-06, + "loss": 0.1463, + "num_input_tokens_seen": 180182784, + "step": 57250 + }, + { + "epoch": 3.6652583061263684, + "grad_norm": 6.25352668762207, + "learning_rate": 1.0091788933767865e-06, + "loss": 0.2409, + "num_input_tokens_seen": 180198848, + "step": 57255 + }, + { + "epoch": 3.6655783880673454, + "grad_norm": 6.310224533081055, + "learning_rate": 1.0087304777098628e-06, + "loss": 0.1213, + "num_input_tokens_seen": 180213568, + "step": 57260 + }, + { + "epoch": 3.6658984700083224, + "grad_norm": 6.388874053955078, + "learning_rate": 1.008282136509173e-06, + "loss": 0.1878, + "num_input_tokens_seen": 180229888, + "step": 57265 + }, + { + "epoch": 3.666218551949299, + "grad_norm": 17.800067901611328, + "learning_rate": 1.0078338697971013e-06, + "loss": 0.1587, + "num_input_tokens_seen": 180244864, + "step": 57270 + }, + { + "epoch": 3.666538633890276, + "grad_norm": 8.47903823852539, + "learning_rate": 1.0073856775960356e-06, + "loss": 0.1306, + "num_input_tokens_seen": 180261184, + "step": 57275 + }, + { + "epoch": 3.666858715831253, + "grad_norm": 6.170533180236816, + "learning_rate": 1.0069375599283526e-06, + "loss": 0.1935, + "num_input_tokens_seen": 180276224, + "step": 57280 + }, + { + "epoch": 3.6671787977722294, + "grad_norm": 13.13504695892334, + "learning_rate": 1.0064895168164319e-06, + "loss": 0.2373, + "num_input_tokens_seen": 180292096, + "step": 57285 + }, + { + "epoch": 3.6674988797132064, + "grad_norm": 6.903867721557617, + "learning_rate": 1.0060415482826449e-06, + "loss": 0.1558, + "num_input_tokens_seen": 180308160, + "step": 57290 + }, + { + "epoch": 3.6678189616541834, + "grad_norm": 6.47335147857666, + "learning_rate": 1.0055936543493609e-06, + "loss": 0.1774, + "num_input_tokens_seen": 180323200, + "step": 57295 + }, + { + "epoch": 3.6681390435951604, + "grad_norm": 6.9573822021484375, + "learning_rate": 1.005145835038947e-06, + "loss": 0.2222, + "num_input_tokens_seen": 180340032, + "step": 57300 + }, + { + "epoch": 3.6684591255361374, + "grad_norm": 10.750314712524414, + "learning_rate": 1.0046980903737621e-06, + "loss": 0.2245, + "num_input_tokens_seen": 180354752, + "step": 57305 + }, + { + "epoch": 3.6687792074771144, + "grad_norm": 4.179068565368652, + "learning_rate": 1.0042504203761667e-06, + "loss": 0.1197, + "num_input_tokens_seen": 180370944, + "step": 57310 + }, + { + "epoch": 3.669099289418091, + "grad_norm": 5.28499698638916, + "learning_rate": 1.0038028250685142e-06, + "loss": 0.1151, + "num_input_tokens_seen": 180385344, + "step": 57315 + }, + { + "epoch": 3.669419371359068, + "grad_norm": 8.2432279586792, + "learning_rate": 1.0033553044731554e-06, + "loss": 0.1282, + "num_input_tokens_seen": 180400256, + "step": 57320 + }, + { + "epoch": 3.669739453300045, + "grad_norm": 57.80598449707031, + "learning_rate": 1.0029078586124367e-06, + "loss": 0.1913, + "num_input_tokens_seen": 180417216, + "step": 57325 + }, + { + "epoch": 3.6700595352410215, + "grad_norm": 8.336015701293945, + "learning_rate": 1.0024604875087016e-06, + "loss": 0.1991, + "num_input_tokens_seen": 180433152, + "step": 57330 + }, + { + "epoch": 3.6703796171819985, + "grad_norm": 5.844956398010254, + "learning_rate": 1.0020131911842892e-06, + "loss": 0.1723, + "num_input_tokens_seen": 180450112, + "step": 57335 + }, + { + "epoch": 3.6706996991229754, + "grad_norm": 7.608006954193115, + "learning_rate": 1.0015659696615355e-06, + "loss": 0.1653, + "num_input_tokens_seen": 180466880, + "step": 57340 + }, + { + "epoch": 3.6710197810639524, + "grad_norm": 4.821533679962158, + "learning_rate": 1.001118822962771e-06, + "loss": 0.1518, + "num_input_tokens_seen": 180482944, + "step": 57345 + }, + { + "epoch": 3.6713398630049294, + "grad_norm": 9.891448020935059, + "learning_rate": 1.0006717511103264e-06, + "loss": 0.2415, + "num_input_tokens_seen": 180498112, + "step": 57350 + }, + { + "epoch": 3.6716599449459064, + "grad_norm": 24.635154724121094, + "learning_rate": 1.0002247541265247e-06, + "loss": 0.1801, + "num_input_tokens_seen": 180512704, + "step": 57355 + }, + { + "epoch": 3.671980026886883, + "grad_norm": 4.772122383117676, + "learning_rate": 9.997778320336867e-07, + "loss": 0.1716, + "num_input_tokens_seen": 180528000, + "step": 57360 + }, + { + "epoch": 3.67230010882786, + "grad_norm": 5.656070709228516, + "learning_rate": 9.993309848541295e-07, + "loss": 0.1643, + "num_input_tokens_seen": 180544768, + "step": 57365 + }, + { + "epoch": 3.672620190768837, + "grad_norm": 13.959539413452148, + "learning_rate": 9.98884212610166e-07, + "loss": 0.2062, + "num_input_tokens_seen": 180560384, + "step": 57370 + }, + { + "epoch": 3.6729402727098135, + "grad_norm": 11.769453048706055, + "learning_rate": 9.98437515324106e-07, + "loss": 0.1647, + "num_input_tokens_seen": 180577280, + "step": 57375 + }, + { + "epoch": 3.6732603546507905, + "grad_norm": 110.29047393798828, + "learning_rate": 9.979908930182543e-07, + "loss": 0.1513, + "num_input_tokens_seen": 180593920, + "step": 57380 + }, + { + "epoch": 3.6735804365917675, + "grad_norm": 10.763751029968262, + "learning_rate": 9.975443457149155e-07, + "loss": 0.1824, + "num_input_tokens_seen": 180608640, + "step": 57385 + }, + { + "epoch": 3.6739005185327445, + "grad_norm": 94.89071655273438, + "learning_rate": 9.970978734363843e-07, + "loss": 0.2315, + "num_input_tokens_seen": 180623680, + "step": 57390 + }, + { + "epoch": 3.6742206004737215, + "grad_norm": 5.493385314941406, + "learning_rate": 9.96651476204958e-07, + "loss": 0.157, + "num_input_tokens_seen": 180639296, + "step": 57395 + }, + { + "epoch": 3.674540682414698, + "grad_norm": 6.933585166931152, + "learning_rate": 9.962051540429257e-07, + "loss": 0.152, + "num_input_tokens_seen": 180655040, + "step": 57400 + }, + { + "epoch": 3.674860764355675, + "grad_norm": 7.010949611663818, + "learning_rate": 9.957589069725756e-07, + "loss": 0.1281, + "num_input_tokens_seen": 180670976, + "step": 57405 + }, + { + "epoch": 3.675180846296652, + "grad_norm": 9.981659889221191, + "learning_rate": 9.953127350161904e-07, + "loss": 0.273, + "num_input_tokens_seen": 180686400, + "step": 57410 + }, + { + "epoch": 3.675500928237629, + "grad_norm": 11.616377830505371, + "learning_rate": 9.948666381960484e-07, + "loss": 0.2331, + "num_input_tokens_seen": 180703488, + "step": 57415 + }, + { + "epoch": 3.6758210101786055, + "grad_norm": 12.218527793884277, + "learning_rate": 9.944206165344284e-07, + "loss": 0.2869, + "num_input_tokens_seen": 180719424, + "step": 57420 + }, + { + "epoch": 3.6761410921195825, + "grad_norm": 60.32040023803711, + "learning_rate": 9.939746700535987e-07, + "loss": 0.1517, + "num_input_tokens_seen": 180735168, + "step": 57425 + }, + { + "epoch": 3.6764611740605595, + "grad_norm": 9.38684368133545, + "learning_rate": 9.935287987758305e-07, + "loss": 0.1296, + "num_input_tokens_seen": 180751232, + "step": 57430 + }, + { + "epoch": 3.6767812560015365, + "grad_norm": 6.503768444061279, + "learning_rate": 9.930830027233871e-07, + "loss": 0.1411, + "num_input_tokens_seen": 180766784, + "step": 57435 + }, + { + "epoch": 3.6771013379425135, + "grad_norm": 19.643081665039062, + "learning_rate": 9.926372819185292e-07, + "loss": 0.2098, + "num_input_tokens_seen": 180781888, + "step": 57440 + }, + { + "epoch": 3.67742141988349, + "grad_norm": 4.473397731781006, + "learning_rate": 9.921916363835138e-07, + "loss": 0.1277, + "num_input_tokens_seen": 180798016, + "step": 57445 + }, + { + "epoch": 3.677741501824467, + "grad_norm": 13.242844581604004, + "learning_rate": 9.917460661405945e-07, + "loss": 0.1431, + "num_input_tokens_seen": 180814016, + "step": 57450 + }, + { + "epoch": 3.678061583765444, + "grad_norm": 6.678584098815918, + "learning_rate": 9.913005712120191e-07, + "loss": 0.1803, + "num_input_tokens_seen": 180829824, + "step": 57455 + }, + { + "epoch": 3.678381665706421, + "grad_norm": 6.027949333190918, + "learning_rate": 9.908551516200368e-07, + "loss": 0.1095, + "num_input_tokens_seen": 180844928, + "step": 57460 + }, + { + "epoch": 3.6787017476473975, + "grad_norm": 15.418444633483887, + "learning_rate": 9.904098073868854e-07, + "loss": 0.2054, + "num_input_tokens_seen": 180860800, + "step": 57465 + }, + { + "epoch": 3.6790218295883745, + "grad_norm": 11.533632278442383, + "learning_rate": 9.899645385348062e-07, + "loss": 0.1437, + "num_input_tokens_seen": 180875712, + "step": 57470 + }, + { + "epoch": 3.6793419115293515, + "grad_norm": 33.960594177246094, + "learning_rate": 9.89519345086032e-07, + "loss": 0.1594, + "num_input_tokens_seen": 180892672, + "step": 57475 + }, + { + "epoch": 3.6796619934703285, + "grad_norm": 9.621628761291504, + "learning_rate": 9.890742270627943e-07, + "loss": 0.1879, + "num_input_tokens_seen": 180907968, + "step": 57480 + }, + { + "epoch": 3.6799820754113055, + "grad_norm": 6.828265190124512, + "learning_rate": 9.886291844873194e-07, + "loss": 0.2108, + "num_input_tokens_seen": 180921984, + "step": 57485 + }, + { + "epoch": 3.680302157352282, + "grad_norm": 8.981942176818848, + "learning_rate": 9.8818421738183e-07, + "loss": 0.1362, + "num_input_tokens_seen": 180937280, + "step": 57490 + }, + { + "epoch": 3.680622239293259, + "grad_norm": 5.268496513366699, + "learning_rate": 9.877393257685474e-07, + "loss": 0.207, + "num_input_tokens_seen": 180952960, + "step": 57495 + }, + { + "epoch": 3.680942321234236, + "grad_norm": 8.865711212158203, + "learning_rate": 9.872945096696843e-07, + "loss": 0.192, + "num_input_tokens_seen": 180968768, + "step": 57500 + }, + { + "epoch": 3.6812624031752126, + "grad_norm": 17.258527755737305, + "learning_rate": 9.868497691074555e-07, + "loss": 0.1509, + "num_input_tokens_seen": 180984960, + "step": 57505 + }, + { + "epoch": 3.6815824851161896, + "grad_norm": 7.090397834777832, + "learning_rate": 9.86405104104066e-07, + "loss": 0.1594, + "num_input_tokens_seen": 181001088, + "step": 57510 + }, + { + "epoch": 3.6819025670571666, + "grad_norm": 8.969660758972168, + "learning_rate": 9.859605146817223e-07, + "loss": 0.1653, + "num_input_tokens_seen": 181016576, + "step": 57515 + }, + { + "epoch": 3.6822226489981436, + "grad_norm": 10.207901954650879, + "learning_rate": 9.85516000862624e-07, + "loss": 0.1897, + "num_input_tokens_seen": 181031488, + "step": 57520 + }, + { + "epoch": 3.6825427309391205, + "grad_norm": 9.265423774719238, + "learning_rate": 9.850715626689674e-07, + "loss": 0.1544, + "num_input_tokens_seen": 181047616, + "step": 57525 + }, + { + "epoch": 3.6828628128800975, + "grad_norm": 5.71071720123291, + "learning_rate": 9.846272001229475e-07, + "loss": 0.108, + "num_input_tokens_seen": 181062144, + "step": 57530 + }, + { + "epoch": 3.683182894821074, + "grad_norm": 9.46028995513916, + "learning_rate": 9.841829132467504e-07, + "loss": 0.1709, + "num_input_tokens_seen": 181077952, + "step": 57535 + }, + { + "epoch": 3.683502976762051, + "grad_norm": 9.006361961364746, + "learning_rate": 9.837387020625638e-07, + "loss": 0.1293, + "num_input_tokens_seen": 181094080, + "step": 57540 + }, + { + "epoch": 3.683823058703028, + "grad_norm": 7.412784099578857, + "learning_rate": 9.832945665925685e-07, + "loss": 0.1193, + "num_input_tokens_seen": 181109696, + "step": 57545 + }, + { + "epoch": 3.6841431406440046, + "grad_norm": 25.250568389892578, + "learning_rate": 9.828505068589422e-07, + "loss": 0.2667, + "num_input_tokens_seen": 181125440, + "step": 57550 + }, + { + "epoch": 3.6844632225849816, + "grad_norm": 9.990888595581055, + "learning_rate": 9.824065228838592e-07, + "loss": 0.1779, + "num_input_tokens_seen": 181139840, + "step": 57555 + }, + { + "epoch": 3.6847833045259586, + "grad_norm": 7.645722389221191, + "learning_rate": 9.819626146894895e-07, + "loss": 0.181, + "num_input_tokens_seen": 181154880, + "step": 57560 + }, + { + "epoch": 3.6851033864669356, + "grad_norm": 7.096983432769775, + "learning_rate": 9.815187822979995e-07, + "loss": 0.1546, + "num_input_tokens_seen": 181170752, + "step": 57565 + }, + { + "epoch": 3.6854234684079126, + "grad_norm": 5.773558616638184, + "learning_rate": 9.810750257315524e-07, + "loss": 0.1677, + "num_input_tokens_seen": 181186688, + "step": 57570 + }, + { + "epoch": 3.6857435503488896, + "grad_norm": 8.941823959350586, + "learning_rate": 9.806313450123056e-07, + "loss": 0.1408, + "num_input_tokens_seen": 181201984, + "step": 57575 + }, + { + "epoch": 3.686063632289866, + "grad_norm": 5.293064117431641, + "learning_rate": 9.801877401624175e-07, + "loss": 0.1191, + "num_input_tokens_seen": 181218048, + "step": 57580 + }, + { + "epoch": 3.686383714230843, + "grad_norm": 28.022645950317383, + "learning_rate": 9.797442112040349e-07, + "loss": 0.1615, + "num_input_tokens_seen": 181234368, + "step": 57585 + }, + { + "epoch": 3.68670379617182, + "grad_norm": 7.149477958679199, + "learning_rate": 9.79300758159309e-07, + "loss": 0.1587, + "num_input_tokens_seen": 181249792, + "step": 57590 + }, + { + "epoch": 3.6870238781127966, + "grad_norm": 8.634326934814453, + "learning_rate": 9.788573810503819e-07, + "loss": 0.1392, + "num_input_tokens_seen": 181265152, + "step": 57595 + }, + { + "epoch": 3.6873439600537736, + "grad_norm": 6.932226657867432, + "learning_rate": 9.784140798993929e-07, + "loss": 0.1723, + "num_input_tokens_seen": 181281728, + "step": 57600 + }, + { + "epoch": 3.6876640419947506, + "grad_norm": 9.319292068481445, + "learning_rate": 9.77970854728481e-07, + "loss": 0.1786, + "num_input_tokens_seen": 181297344, + "step": 57605 + }, + { + "epoch": 3.6879841239357276, + "grad_norm": 4.84767484664917, + "learning_rate": 9.775277055597748e-07, + "loss": 0.1465, + "num_input_tokens_seen": 181313536, + "step": 57610 + }, + { + "epoch": 3.6883042058767046, + "grad_norm": 5.3046064376831055, + "learning_rate": 9.770846324154069e-07, + "loss": 0.1422, + "num_input_tokens_seen": 181329536, + "step": 57615 + }, + { + "epoch": 3.6886242878176816, + "grad_norm": 8.818222999572754, + "learning_rate": 9.766416353174976e-07, + "loss": 0.2226, + "num_input_tokens_seen": 181344832, + "step": 57620 + }, + { + "epoch": 3.688944369758658, + "grad_norm": 6.620244026184082, + "learning_rate": 9.761987142881713e-07, + "loss": 0.1559, + "num_input_tokens_seen": 181360768, + "step": 57625 + }, + { + "epoch": 3.689264451699635, + "grad_norm": 9.165939331054688, + "learning_rate": 9.75755869349544e-07, + "loss": 0.2096, + "num_input_tokens_seen": 181376576, + "step": 57630 + }, + { + "epoch": 3.689584533640612, + "grad_norm": 7.977142333984375, + "learning_rate": 9.75313100523729e-07, + "loss": 0.146, + "num_input_tokens_seen": 181391680, + "step": 57635 + }, + { + "epoch": 3.6899046155815887, + "grad_norm": 10.525503158569336, + "learning_rate": 9.74870407832836e-07, + "loss": 0.1516, + "num_input_tokens_seen": 181406400, + "step": 57640 + }, + { + "epoch": 3.6902246975225657, + "grad_norm": 5.235145092010498, + "learning_rate": 9.744277912989697e-07, + "loss": 0.0999, + "num_input_tokens_seen": 181422144, + "step": 57645 + }, + { + "epoch": 3.6905447794635426, + "grad_norm": 9.806577682495117, + "learning_rate": 9.739852509442352e-07, + "loss": 0.1618, + "num_input_tokens_seen": 181438400, + "step": 57650 + }, + { + "epoch": 3.6908648614045196, + "grad_norm": 13.23413372039795, + "learning_rate": 9.735427867907265e-07, + "loss": 0.1733, + "num_input_tokens_seen": 181454400, + "step": 57655 + }, + { + "epoch": 3.6911849433454966, + "grad_norm": 37.96708297729492, + "learning_rate": 9.731003988605408e-07, + "loss": 0.1211, + "num_input_tokens_seen": 181470400, + "step": 57660 + }, + { + "epoch": 3.691505025286473, + "grad_norm": 5.577637672424316, + "learning_rate": 9.726580871757681e-07, + "loss": 0.1932, + "num_input_tokens_seen": 181485632, + "step": 57665 + }, + { + "epoch": 3.69182510722745, + "grad_norm": 8.387083053588867, + "learning_rate": 9.72215851758495e-07, + "loss": 0.1826, + "num_input_tokens_seen": 181502208, + "step": 57670 + }, + { + "epoch": 3.692145189168427, + "grad_norm": 32.40900421142578, + "learning_rate": 9.717736926308039e-07, + "loss": 0.2884, + "num_input_tokens_seen": 181517888, + "step": 57675 + }, + { + "epoch": 3.692465271109404, + "grad_norm": 5.1114115715026855, + "learning_rate": 9.713316098147747e-07, + "loss": 0.1474, + "num_input_tokens_seen": 181532992, + "step": 57680 + }, + { + "epoch": 3.6927853530503807, + "grad_norm": 7.501808166503906, + "learning_rate": 9.708896033324814e-07, + "loss": 0.1147, + "num_input_tokens_seen": 181548672, + "step": 57685 + }, + { + "epoch": 3.6931054349913577, + "grad_norm": 5.063461780548096, + "learning_rate": 9.70447673205998e-07, + "loss": 0.1595, + "num_input_tokens_seen": 181563328, + "step": 57690 + }, + { + "epoch": 3.6934255169323347, + "grad_norm": 7.248741149902344, + "learning_rate": 9.700058194573892e-07, + "loss": 0.1967, + "num_input_tokens_seen": 181579520, + "step": 57695 + }, + { + "epoch": 3.6937455988733117, + "grad_norm": 11.456868171691895, + "learning_rate": 9.69564042108721e-07, + "loss": 0.122, + "num_input_tokens_seen": 181594112, + "step": 57700 + }, + { + "epoch": 3.6940656808142887, + "grad_norm": 6.232638359069824, + "learning_rate": 9.691223411820528e-07, + "loss": 0.1416, + "num_input_tokens_seen": 181609536, + "step": 57705 + }, + { + "epoch": 3.694385762755265, + "grad_norm": 11.504237174987793, + "learning_rate": 9.686807166994407e-07, + "loss": 0.1628, + "num_input_tokens_seen": 181624192, + "step": 57710 + }, + { + "epoch": 3.694705844696242, + "grad_norm": 5.045443534851074, + "learning_rate": 9.682391686829376e-07, + "loss": 0.2056, + "num_input_tokens_seen": 181641216, + "step": 57715 + }, + { + "epoch": 3.695025926637219, + "grad_norm": 7.164358139038086, + "learning_rate": 9.677976971545908e-07, + "loss": 0.1575, + "num_input_tokens_seen": 181656832, + "step": 57720 + }, + { + "epoch": 3.695346008578196, + "grad_norm": 5.776247024536133, + "learning_rate": 9.673563021364476e-07, + "loss": 0.1759, + "num_input_tokens_seen": 181671808, + "step": 57725 + }, + { + "epoch": 3.6956660905191727, + "grad_norm": 10.480040550231934, + "learning_rate": 9.66914983650546e-07, + "loss": 0.1817, + "num_input_tokens_seen": 181687360, + "step": 57730 + }, + { + "epoch": 3.6959861724601497, + "grad_norm": 6.358046054840088, + "learning_rate": 9.66473741718926e-07, + "loss": 0.129, + "num_input_tokens_seen": 181702720, + "step": 57735 + }, + { + "epoch": 3.6963062544011267, + "grad_norm": 6.574100017547607, + "learning_rate": 9.66032576363618e-07, + "loss": 0.174, + "num_input_tokens_seen": 181718464, + "step": 57740 + }, + { + "epoch": 3.6966263363421037, + "grad_norm": 3.5888068675994873, + "learning_rate": 9.655914876066537e-07, + "loss": 0.1462, + "num_input_tokens_seen": 181734592, + "step": 57745 + }, + { + "epoch": 3.6969464182830807, + "grad_norm": 7.159847736358643, + "learning_rate": 9.65150475470058e-07, + "loss": 0.1505, + "num_input_tokens_seen": 181750464, + "step": 57750 + }, + { + "epoch": 3.6972665002240572, + "grad_norm": 5.425477981567383, + "learning_rate": 9.64709539975853e-07, + "loss": 0.1542, + "num_input_tokens_seen": 181765568, + "step": 57755 + }, + { + "epoch": 3.697586582165034, + "grad_norm": 3.0412492752075195, + "learning_rate": 9.642686811460565e-07, + "loss": 0.1164, + "num_input_tokens_seen": 181781888, + "step": 57760 + }, + { + "epoch": 3.697906664106011, + "grad_norm": 16.99484634399414, + "learning_rate": 9.638278990026818e-07, + "loss": 0.2066, + "num_input_tokens_seen": 181797504, + "step": 57765 + }, + { + "epoch": 3.6982267460469878, + "grad_norm": 9.320088386535645, + "learning_rate": 9.633871935677412e-07, + "loss": 0.1559, + "num_input_tokens_seen": 181812800, + "step": 57770 + }, + { + "epoch": 3.6985468279879647, + "grad_norm": 8.20836353302002, + "learning_rate": 9.6294656486324e-07, + "loss": 0.1546, + "num_input_tokens_seen": 181828544, + "step": 57775 + }, + { + "epoch": 3.6988669099289417, + "grad_norm": 8.450482368469238, + "learning_rate": 9.625060129111812e-07, + "loss": 0.1759, + "num_input_tokens_seen": 181844160, + "step": 57780 + }, + { + "epoch": 3.6991869918699187, + "grad_norm": 8.24223518371582, + "learning_rate": 9.620655377335637e-07, + "loss": 0.1689, + "num_input_tokens_seen": 181859392, + "step": 57785 + }, + { + "epoch": 3.6995070738108957, + "grad_norm": 5.640437602996826, + "learning_rate": 9.616251393523823e-07, + "loss": 0.1344, + "num_input_tokens_seen": 181875200, + "step": 57790 + }, + { + "epoch": 3.6998271557518727, + "grad_norm": 6.168683052062988, + "learning_rate": 9.611848177896286e-07, + "loss": 0.1284, + "num_input_tokens_seen": 181890496, + "step": 57795 + }, + { + "epoch": 3.7001472376928493, + "grad_norm": 4.903238296508789, + "learning_rate": 9.607445730672895e-07, + "loss": 0.1392, + "num_input_tokens_seen": 181905472, + "step": 57800 + }, + { + "epoch": 3.7004673196338262, + "grad_norm": 5.166935920715332, + "learning_rate": 9.603044052073479e-07, + "loss": 0.1796, + "num_input_tokens_seen": 181920768, + "step": 57805 + }, + { + "epoch": 3.7007874015748032, + "grad_norm": 7.894519329071045, + "learning_rate": 9.59864314231786e-07, + "loss": 0.1486, + "num_input_tokens_seen": 181935872, + "step": 57810 + }, + { + "epoch": 3.70110748351578, + "grad_norm": 17.133996963500977, + "learning_rate": 9.594243001625765e-07, + "loss": 0.2066, + "num_input_tokens_seen": 181952064, + "step": 57815 + }, + { + "epoch": 3.7014275654567568, + "grad_norm": 18.152185440063477, + "learning_rate": 9.589843630216933e-07, + "loss": 0.1551, + "num_input_tokens_seen": 181968000, + "step": 57820 + }, + { + "epoch": 3.7017476473977338, + "grad_norm": 6.7542338371276855, + "learning_rate": 9.585445028311044e-07, + "loss": 0.1295, + "num_input_tokens_seen": 181986816, + "step": 57825 + }, + { + "epoch": 3.7020677293387108, + "grad_norm": 10.558854103088379, + "learning_rate": 9.58104719612773e-07, + "loss": 0.2857, + "num_input_tokens_seen": 182002496, + "step": 57830 + }, + { + "epoch": 3.7023878112796877, + "grad_norm": 7.560016632080078, + "learning_rate": 9.576650133886623e-07, + "loss": 0.1325, + "num_input_tokens_seen": 182017728, + "step": 57835 + }, + { + "epoch": 3.7027078932206647, + "grad_norm": 7.158745765686035, + "learning_rate": 9.572253841807252e-07, + "loss": 0.1165, + "num_input_tokens_seen": 182033536, + "step": 57840 + }, + { + "epoch": 3.7030279751616413, + "grad_norm": 3.5032010078430176, + "learning_rate": 9.567858320109182e-07, + "loss": 0.1179, + "num_input_tokens_seen": 182048384, + "step": 57845 + }, + { + "epoch": 3.7033480571026183, + "grad_norm": 8.396778106689453, + "learning_rate": 9.563463569011866e-07, + "loss": 0.2408, + "num_input_tokens_seen": 182063680, + "step": 57850 + }, + { + "epoch": 3.7036681390435953, + "grad_norm": 16.914823532104492, + "learning_rate": 9.559069588734784e-07, + "loss": 0.1413, + "num_input_tokens_seen": 182079168, + "step": 57855 + }, + { + "epoch": 3.703988220984572, + "grad_norm": 5.1139397621154785, + "learning_rate": 9.554676379497338e-07, + "loss": 0.1944, + "num_input_tokens_seen": 182095360, + "step": 57860 + }, + { + "epoch": 3.704308302925549, + "grad_norm": 8.698446273803711, + "learning_rate": 9.5502839415189e-07, + "loss": 0.175, + "num_input_tokens_seen": 182112832, + "step": 57865 + }, + { + "epoch": 3.704628384866526, + "grad_norm": 19.095727920532227, + "learning_rate": 9.54589227501881e-07, + "loss": 0.2135, + "num_input_tokens_seen": 182127168, + "step": 57870 + }, + { + "epoch": 3.704948466807503, + "grad_norm": 4.550407409667969, + "learning_rate": 9.541501380216358e-07, + "loss": 0.1375, + "num_input_tokens_seen": 182142656, + "step": 57875 + }, + { + "epoch": 3.7052685487484798, + "grad_norm": 7.116178512573242, + "learning_rate": 9.53711125733081e-07, + "loss": 0.1446, + "num_input_tokens_seen": 182159680, + "step": 57880 + }, + { + "epoch": 3.7055886306894568, + "grad_norm": 85.97105407714844, + "learning_rate": 9.532721906581375e-07, + "loss": 0.233, + "num_input_tokens_seen": 182174464, + "step": 57885 + }, + { + "epoch": 3.7059087126304333, + "grad_norm": 7.606760501861572, + "learning_rate": 9.528333328187248e-07, + "loss": 0.2154, + "num_input_tokens_seen": 182190592, + "step": 57890 + }, + { + "epoch": 3.7062287945714103, + "grad_norm": 6.571393966674805, + "learning_rate": 9.523945522367564e-07, + "loss": 0.2132, + "num_input_tokens_seen": 182205888, + "step": 57895 + }, + { + "epoch": 3.7065488765123873, + "grad_norm": 8.22916030883789, + "learning_rate": 9.519558489341432e-07, + "loss": 0.1342, + "num_input_tokens_seen": 182222208, + "step": 57900 + }, + { + "epoch": 3.706868958453364, + "grad_norm": 11.055649757385254, + "learning_rate": 9.515172229327912e-07, + "loss": 0.1714, + "num_input_tokens_seen": 182237312, + "step": 57905 + }, + { + "epoch": 3.707189040394341, + "grad_norm": 29.740947723388672, + "learning_rate": 9.510786742546035e-07, + "loss": 0.1067, + "num_input_tokens_seen": 182252352, + "step": 57910 + }, + { + "epoch": 3.707509122335318, + "grad_norm": 109.11407470703125, + "learning_rate": 9.506402029214778e-07, + "loss": 0.148, + "num_input_tokens_seen": 182267328, + "step": 57915 + }, + { + "epoch": 3.707829204276295, + "grad_norm": 5.943877696990967, + "learning_rate": 9.502018089553119e-07, + "loss": 0.1358, + "num_input_tokens_seen": 182283200, + "step": 57920 + }, + { + "epoch": 3.708149286217272, + "grad_norm": 8.447360038757324, + "learning_rate": 9.497634923779933e-07, + "loss": 0.1286, + "num_input_tokens_seen": 182298432, + "step": 57925 + }, + { + "epoch": 3.7084693681582483, + "grad_norm": 8.202017784118652, + "learning_rate": 9.493252532114127e-07, + "loss": 0.1499, + "num_input_tokens_seen": 182314048, + "step": 57930 + }, + { + "epoch": 3.7087894500992253, + "grad_norm": 7.251333236694336, + "learning_rate": 9.4888709147745e-07, + "loss": 0.1604, + "num_input_tokens_seen": 182330112, + "step": 57935 + }, + { + "epoch": 3.7091095320402023, + "grad_norm": 5.721035957336426, + "learning_rate": 9.484490071979877e-07, + "loss": 0.1404, + "num_input_tokens_seen": 182347200, + "step": 57940 + }, + { + "epoch": 3.7094296139811793, + "grad_norm": 3.8197643756866455, + "learning_rate": 9.480110003948997e-07, + "loss": 0.1375, + "num_input_tokens_seen": 182362816, + "step": 57945 + }, + { + "epoch": 3.709749695922156, + "grad_norm": 7.425449848175049, + "learning_rate": 9.47573071090058e-07, + "loss": 0.0855, + "num_input_tokens_seen": 182377984, + "step": 57950 + }, + { + "epoch": 3.710069777863133, + "grad_norm": 20.986892700195312, + "learning_rate": 9.47135219305332e-07, + "loss": 0.2059, + "num_input_tokens_seen": 182393344, + "step": 57955 + }, + { + "epoch": 3.71038985980411, + "grad_norm": 5.220308303833008, + "learning_rate": 9.466974450625832e-07, + "loss": 0.1385, + "num_input_tokens_seen": 182408832, + "step": 57960 + }, + { + "epoch": 3.710709941745087, + "grad_norm": 8.114978790283203, + "learning_rate": 9.462597483836741e-07, + "loss": 0.1468, + "num_input_tokens_seen": 182426112, + "step": 57965 + }, + { + "epoch": 3.711030023686064, + "grad_norm": 14.051972389221191, + "learning_rate": 9.458221292904599e-07, + "loss": 0.217, + "num_input_tokens_seen": 182442944, + "step": 57970 + }, + { + "epoch": 3.7113501056270404, + "grad_norm": 6.940092086791992, + "learning_rate": 9.453845878047929e-07, + "loss": 0.1142, + "num_input_tokens_seen": 182458496, + "step": 57975 + }, + { + "epoch": 3.7116701875680174, + "grad_norm": 6.983901023864746, + "learning_rate": 9.449471239485222e-07, + "loss": 0.1684, + "num_input_tokens_seen": 182473920, + "step": 57980 + }, + { + "epoch": 3.7119902695089944, + "grad_norm": 8.508259773254395, + "learning_rate": 9.445097377434919e-07, + "loss": 0.1379, + "num_input_tokens_seen": 182490496, + "step": 57985 + }, + { + "epoch": 3.7123103514499713, + "grad_norm": 9.037640571594238, + "learning_rate": 9.440724292115433e-07, + "loss": 0.1118, + "num_input_tokens_seen": 182505920, + "step": 57990 + }, + { + "epoch": 3.712630433390948, + "grad_norm": 37.92066955566406, + "learning_rate": 9.436351983745121e-07, + "loss": 0.2433, + "num_input_tokens_seen": 182520832, + "step": 57995 + }, + { + "epoch": 3.712950515331925, + "grad_norm": 5.541773319244385, + "learning_rate": 9.431980452542333e-07, + "loss": 0.1167, + "num_input_tokens_seen": 182537216, + "step": 58000 + }, + { + "epoch": 3.713270597272902, + "grad_norm": 9.123044967651367, + "learning_rate": 9.427609698725349e-07, + "loss": 0.1445, + "num_input_tokens_seen": 182552320, + "step": 58005 + }, + { + "epoch": 3.713590679213879, + "grad_norm": 16.47394561767578, + "learning_rate": 9.423239722512422e-07, + "loss": 0.1567, + "num_input_tokens_seen": 182568576, + "step": 58010 + }, + { + "epoch": 3.713910761154856, + "grad_norm": 8.731366157531738, + "learning_rate": 9.418870524121768e-07, + "loss": 0.1741, + "num_input_tokens_seen": 182585408, + "step": 58015 + }, + { + "epoch": 3.7142308430958324, + "grad_norm": 4.850255012512207, + "learning_rate": 9.414502103771562e-07, + "loss": 0.1087, + "num_input_tokens_seen": 182601024, + "step": 58020 + }, + { + "epoch": 3.7145509250368094, + "grad_norm": 4.988193035125732, + "learning_rate": 9.410134461679935e-07, + "loss": 0.1428, + "num_input_tokens_seen": 182615936, + "step": 58025 + }, + { + "epoch": 3.7148710069777864, + "grad_norm": 15.326811790466309, + "learning_rate": 9.40576759806499e-07, + "loss": 0.2219, + "num_input_tokens_seen": 182632960, + "step": 58030 + }, + { + "epoch": 3.715191088918763, + "grad_norm": 2.2742607593536377, + "learning_rate": 9.401401513144778e-07, + "loss": 0.1462, + "num_input_tokens_seen": 182649216, + "step": 58035 + }, + { + "epoch": 3.71551117085974, + "grad_norm": 18.457265853881836, + "learning_rate": 9.397036207137339e-07, + "loss": 0.1371, + "num_input_tokens_seen": 182664000, + "step": 58040 + }, + { + "epoch": 3.715831252800717, + "grad_norm": 8.64289379119873, + "learning_rate": 9.392671680260623e-07, + "loss": 0.2014, + "num_input_tokens_seen": 182679296, + "step": 58045 + }, + { + "epoch": 3.716151334741694, + "grad_norm": 6.3395867347717285, + "learning_rate": 9.388307932732596e-07, + "loss": 0.1677, + "num_input_tokens_seen": 182695360, + "step": 58050 + }, + { + "epoch": 3.716471416682671, + "grad_norm": 6.373935699462891, + "learning_rate": 9.383944964771153e-07, + "loss": 0.211, + "num_input_tokens_seen": 182711040, + "step": 58055 + }, + { + "epoch": 3.716791498623648, + "grad_norm": 7.479914665222168, + "learning_rate": 9.379582776594145e-07, + "loss": 0.1923, + "num_input_tokens_seen": 182727488, + "step": 58060 + }, + { + "epoch": 3.7171115805646244, + "grad_norm": 10.332253456115723, + "learning_rate": 9.37522136841943e-07, + "loss": 0.2335, + "num_input_tokens_seen": 182742656, + "step": 58065 + }, + { + "epoch": 3.7174316625056014, + "grad_norm": 5.129547119140625, + "learning_rate": 9.370860740464757e-07, + "loss": 0.1594, + "num_input_tokens_seen": 182758528, + "step": 58070 + }, + { + "epoch": 3.7177517444465784, + "grad_norm": 30.26409339904785, + "learning_rate": 9.366500892947905e-07, + "loss": 0.1927, + "num_input_tokens_seen": 182775104, + "step": 58075 + }, + { + "epoch": 3.718071826387555, + "grad_norm": 24.374950408935547, + "learning_rate": 9.362141826086549e-07, + "loss": 0.2175, + "num_input_tokens_seen": 182789568, + "step": 58080 + }, + { + "epoch": 3.718391908328532, + "grad_norm": 9.630457878112793, + "learning_rate": 9.357783540098383e-07, + "loss": 0.1658, + "num_input_tokens_seen": 182804800, + "step": 58085 + }, + { + "epoch": 3.718711990269509, + "grad_norm": 6.192980766296387, + "learning_rate": 9.353426035201032e-07, + "loss": 0.1647, + "num_input_tokens_seen": 182821696, + "step": 58090 + }, + { + "epoch": 3.719032072210486, + "grad_norm": 7.0691304206848145, + "learning_rate": 9.349069311612086e-07, + "loss": 0.1604, + "num_input_tokens_seen": 182837312, + "step": 58095 + }, + { + "epoch": 3.719352154151463, + "grad_norm": 8.015353202819824, + "learning_rate": 9.344713369549094e-07, + "loss": 0.1641, + "num_input_tokens_seen": 182853184, + "step": 58100 + }, + { + "epoch": 3.71967223609244, + "grad_norm": 4.46745491027832, + "learning_rate": 9.34035820922957e-07, + "loss": 0.1184, + "num_input_tokens_seen": 182869824, + "step": 58105 + }, + { + "epoch": 3.7199923180334165, + "grad_norm": 4.5556960105896, + "learning_rate": 9.336003830870991e-07, + "loss": 0.1285, + "num_input_tokens_seen": 182886016, + "step": 58110 + }, + { + "epoch": 3.7203123999743934, + "grad_norm": 8.836784362792969, + "learning_rate": 9.331650234690778e-07, + "loss": 0.1813, + "num_input_tokens_seen": 182902336, + "step": 58115 + }, + { + "epoch": 3.7206324819153704, + "grad_norm": 7.500845909118652, + "learning_rate": 9.32729742090635e-07, + "loss": 0.1766, + "num_input_tokens_seen": 182919040, + "step": 58120 + }, + { + "epoch": 3.720952563856347, + "grad_norm": 3.601702928543091, + "learning_rate": 9.322945389735053e-07, + "loss": 0.1951, + "num_input_tokens_seen": 182934208, + "step": 58125 + }, + { + "epoch": 3.721272645797324, + "grad_norm": 8.906002044677734, + "learning_rate": 9.318594141394202e-07, + "loss": 0.1802, + "num_input_tokens_seen": 182949888, + "step": 58130 + }, + { + "epoch": 3.721592727738301, + "grad_norm": 7.62807035446167, + "learning_rate": 9.314243676101082e-07, + "loss": 0.1623, + "num_input_tokens_seen": 182966080, + "step": 58135 + }, + { + "epoch": 3.721912809679278, + "grad_norm": 17.641393661499023, + "learning_rate": 9.309893994072924e-07, + "loss": 0.165, + "num_input_tokens_seen": 182980928, + "step": 58140 + }, + { + "epoch": 3.722232891620255, + "grad_norm": 5.694278240203857, + "learning_rate": 9.305545095526927e-07, + "loss": 0.1839, + "num_input_tokens_seen": 182996224, + "step": 58145 + }, + { + "epoch": 3.722552973561232, + "grad_norm": 7.398188591003418, + "learning_rate": 9.301196980680277e-07, + "loss": 0.1195, + "num_input_tokens_seen": 183011008, + "step": 58150 + }, + { + "epoch": 3.7228730555022085, + "grad_norm": 9.02192211151123, + "learning_rate": 9.296849649750059e-07, + "loss": 0.1878, + "num_input_tokens_seen": 183027136, + "step": 58155 + }, + { + "epoch": 3.7231931374431855, + "grad_norm": 14.16413402557373, + "learning_rate": 9.292503102953393e-07, + "loss": 0.2403, + "num_input_tokens_seen": 183044736, + "step": 58160 + }, + { + "epoch": 3.7235132193841625, + "grad_norm": 9.835639953613281, + "learning_rate": 9.288157340507287e-07, + "loss": 0.1359, + "num_input_tokens_seen": 183073088, + "step": 58165 + }, + { + "epoch": 3.723833301325139, + "grad_norm": 5.2637619972229, + "learning_rate": 9.283812362628772e-07, + "loss": 0.1782, + "num_input_tokens_seen": 183088192, + "step": 58170 + }, + { + "epoch": 3.724153383266116, + "grad_norm": 32.11784744262695, + "learning_rate": 9.279468169534803e-07, + "loss": 0.2739, + "num_input_tokens_seen": 183103616, + "step": 58175 + }, + { + "epoch": 3.724473465207093, + "grad_norm": 5.827411651611328, + "learning_rate": 9.275124761442303e-07, + "loss": 0.1555, + "num_input_tokens_seen": 183119680, + "step": 58180 + }, + { + "epoch": 3.72479354714807, + "grad_norm": 7.594862461090088, + "learning_rate": 9.270782138568179e-07, + "loss": 0.1637, + "num_input_tokens_seen": 183135744, + "step": 58185 + }, + { + "epoch": 3.725113629089047, + "grad_norm": 7.2504777908325195, + "learning_rate": 9.266440301129248e-07, + "loss": 0.2175, + "num_input_tokens_seen": 183152000, + "step": 58190 + }, + { + "epoch": 3.7254337110300235, + "grad_norm": 10.579184532165527, + "learning_rate": 9.262099249342344e-07, + "loss": 0.1934, + "num_input_tokens_seen": 183167232, + "step": 58195 + }, + { + "epoch": 3.7257537929710005, + "grad_norm": 6.4275031089782715, + "learning_rate": 9.257758983424228e-07, + "loss": 0.1466, + "num_input_tokens_seen": 183182464, + "step": 58200 + }, + { + "epoch": 3.7260738749119775, + "grad_norm": 5.826449394226074, + "learning_rate": 9.253419503591632e-07, + "loss": 0.1633, + "num_input_tokens_seen": 183198464, + "step": 58205 + }, + { + "epoch": 3.7263939568529545, + "grad_norm": 9.936037063598633, + "learning_rate": 9.249080810061243e-07, + "loss": 0.2352, + "num_input_tokens_seen": 183214336, + "step": 58210 + }, + { + "epoch": 3.726714038793931, + "grad_norm": 4.539838790893555, + "learning_rate": 9.244742903049714e-07, + "loss": 0.1492, + "num_input_tokens_seen": 183229952, + "step": 58215 + }, + { + "epoch": 3.727034120734908, + "grad_norm": 10.823837280273438, + "learning_rate": 9.240405782773659e-07, + "loss": 0.193, + "num_input_tokens_seen": 183245120, + "step": 58220 + }, + { + "epoch": 3.727354202675885, + "grad_norm": 11.929193496704102, + "learning_rate": 9.23606944944965e-07, + "loss": 0.1448, + "num_input_tokens_seen": 183261248, + "step": 58225 + }, + { + "epoch": 3.727674284616862, + "grad_norm": 18.592687606811523, + "learning_rate": 9.231733903294213e-07, + "loss": 0.3069, + "num_input_tokens_seen": 183276352, + "step": 58230 + }, + { + "epoch": 3.727994366557839, + "grad_norm": 8.436681747436523, + "learning_rate": 9.22739914452386e-07, + "loss": 0.1813, + "num_input_tokens_seen": 183291072, + "step": 58235 + }, + { + "epoch": 3.7283144484988155, + "grad_norm": 4.838722229003906, + "learning_rate": 9.223065173355039e-07, + "loss": 0.1631, + "num_input_tokens_seen": 183306944, + "step": 58240 + }, + { + "epoch": 3.7286345304397925, + "grad_norm": 14.202372550964355, + "learning_rate": 9.218731990004162e-07, + "loss": 0.1864, + "num_input_tokens_seen": 183321664, + "step": 58245 + }, + { + "epoch": 3.7289546123807695, + "grad_norm": 8.357329368591309, + "learning_rate": 9.214399594687609e-07, + "loss": 0.2093, + "num_input_tokens_seen": 183336896, + "step": 58250 + }, + { + "epoch": 3.7292746943217465, + "grad_norm": 15.692387580871582, + "learning_rate": 9.210067987621715e-07, + "loss": 0.2773, + "num_input_tokens_seen": 183354176, + "step": 58255 + }, + { + "epoch": 3.729594776262723, + "grad_norm": 6.4604058265686035, + "learning_rate": 9.205737169022777e-07, + "loss": 0.1509, + "num_input_tokens_seen": 183370304, + "step": 58260 + }, + { + "epoch": 3.7299148582037, + "grad_norm": 4.11737060546875, + "learning_rate": 9.201407139107049e-07, + "loss": 0.1581, + "num_input_tokens_seen": 183384000, + "step": 58265 + }, + { + "epoch": 3.730234940144677, + "grad_norm": 7.42584753036499, + "learning_rate": 9.197077898090776e-07, + "loss": 0.1703, + "num_input_tokens_seen": 183401344, + "step": 58270 + }, + { + "epoch": 3.730555022085654, + "grad_norm": 6.325806617736816, + "learning_rate": 9.1927494461901e-07, + "loss": 0.2102, + "num_input_tokens_seen": 183417920, + "step": 58275 + }, + { + "epoch": 3.730875104026631, + "grad_norm": 5.260090351104736, + "learning_rate": 9.188421783621184e-07, + "loss": 0.1473, + "num_input_tokens_seen": 183436544, + "step": 58280 + }, + { + "epoch": 3.7311951859676076, + "grad_norm": 9.65290641784668, + "learning_rate": 9.18409491060013e-07, + "loss": 0.2144, + "num_input_tokens_seen": 183451520, + "step": 58285 + }, + { + "epoch": 3.7315152679085846, + "grad_norm": 5.5148210525512695, + "learning_rate": 9.179768827342983e-07, + "loss": 0.141, + "num_input_tokens_seen": 183468352, + "step": 58290 + }, + { + "epoch": 3.7318353498495616, + "grad_norm": 15.354135513305664, + "learning_rate": 9.175443534065795e-07, + "loss": 0.1781, + "num_input_tokens_seen": 183483648, + "step": 58295 + }, + { + "epoch": 3.732155431790538, + "grad_norm": 6.197383880615234, + "learning_rate": 9.17111903098451e-07, + "loss": 0.142, + "num_input_tokens_seen": 183499264, + "step": 58300 + }, + { + "epoch": 3.732475513731515, + "grad_norm": 8.24161434173584, + "learning_rate": 9.166795318315111e-07, + "loss": 0.134, + "num_input_tokens_seen": 183514816, + "step": 58305 + }, + { + "epoch": 3.732795595672492, + "grad_norm": 30.5963077545166, + "learning_rate": 9.162472396273461e-07, + "loss": 0.1455, + "num_input_tokens_seen": 183531840, + "step": 58310 + }, + { + "epoch": 3.733115677613469, + "grad_norm": 5.870702743530273, + "learning_rate": 9.158150265075458e-07, + "loss": 0.1455, + "num_input_tokens_seen": 183547712, + "step": 58315 + }, + { + "epoch": 3.733435759554446, + "grad_norm": 6.498317718505859, + "learning_rate": 9.153828924936911e-07, + "loss": 0.1859, + "num_input_tokens_seen": 183563136, + "step": 58320 + }, + { + "epoch": 3.733755841495423, + "grad_norm": 7.56807804107666, + "learning_rate": 9.149508376073607e-07, + "loss": 0.1411, + "num_input_tokens_seen": 183578176, + "step": 58325 + }, + { + "epoch": 3.7340759234363996, + "grad_norm": 24.75293731689453, + "learning_rate": 9.145188618701292e-07, + "loss": 0.1877, + "num_input_tokens_seen": 183593728, + "step": 58330 + }, + { + "epoch": 3.7343960053773766, + "grad_norm": 10.92003059387207, + "learning_rate": 9.140869653035675e-07, + "loss": 0.2938, + "num_input_tokens_seen": 183608192, + "step": 58335 + }, + { + "epoch": 3.7347160873183536, + "grad_norm": 7.811805248260498, + "learning_rate": 9.136551479292419e-07, + "loss": 0.1158, + "num_input_tokens_seen": 183623616, + "step": 58340 + }, + { + "epoch": 3.73503616925933, + "grad_norm": 20.878604888916016, + "learning_rate": 9.132234097687146e-07, + "loss": 0.2017, + "num_input_tokens_seen": 183639424, + "step": 58345 + }, + { + "epoch": 3.735356251200307, + "grad_norm": 4.830063343048096, + "learning_rate": 9.127917508435455e-07, + "loss": 0.1849, + "num_input_tokens_seen": 183655104, + "step": 58350 + }, + { + "epoch": 3.735676333141284, + "grad_norm": 6.372149467468262, + "learning_rate": 9.123601711752891e-07, + "loss": 0.1435, + "num_input_tokens_seen": 183671040, + "step": 58355 + }, + { + "epoch": 3.735996415082261, + "grad_norm": 5.058278560638428, + "learning_rate": 9.11928670785496e-07, + "loss": 0.1367, + "num_input_tokens_seen": 183685504, + "step": 58360 + }, + { + "epoch": 3.736316497023238, + "grad_norm": 6.830733299255371, + "learning_rate": 9.114972496957133e-07, + "loss": 0.1668, + "num_input_tokens_seen": 183701568, + "step": 58365 + }, + { + "epoch": 3.736636578964215, + "grad_norm": 35.1104736328125, + "learning_rate": 9.110659079274837e-07, + "loss": 0.1892, + "num_input_tokens_seen": 183717760, + "step": 58370 + }, + { + "epoch": 3.7369566609051916, + "grad_norm": 6.464717864990234, + "learning_rate": 9.106346455023452e-07, + "loss": 0.1631, + "num_input_tokens_seen": 183733376, + "step": 58375 + }, + { + "epoch": 3.7372767428461686, + "grad_norm": 6.084573268890381, + "learning_rate": 9.102034624418354e-07, + "loss": 0.1023, + "num_input_tokens_seen": 183748416, + "step": 58380 + }, + { + "epoch": 3.7375968247871456, + "grad_norm": 18.102540969848633, + "learning_rate": 9.097723587674823e-07, + "loss": 0.1932, + "num_input_tokens_seen": 183764032, + "step": 58385 + }, + { + "epoch": 3.737916906728122, + "grad_norm": 9.161663055419922, + "learning_rate": 9.093413345008159e-07, + "loss": 0.1381, + "num_input_tokens_seen": 183779008, + "step": 58390 + }, + { + "epoch": 3.738236988669099, + "grad_norm": 12.071771621704102, + "learning_rate": 9.089103896633561e-07, + "loss": 0.2475, + "num_input_tokens_seen": 183794048, + "step": 58395 + }, + { + "epoch": 3.738557070610076, + "grad_norm": 6.467938423156738, + "learning_rate": 9.084795242766248e-07, + "loss": 0.1339, + "num_input_tokens_seen": 183809664, + "step": 58400 + }, + { + "epoch": 3.738877152551053, + "grad_norm": 107.8546142578125, + "learning_rate": 9.080487383621358e-07, + "loss": 0.135, + "num_input_tokens_seen": 183825472, + "step": 58405 + }, + { + "epoch": 3.73919723449203, + "grad_norm": 7.955082416534424, + "learning_rate": 9.076180319414002e-07, + "loss": 0.1617, + "num_input_tokens_seen": 183841664, + "step": 58410 + }, + { + "epoch": 3.739517316433007, + "grad_norm": 16.42233657836914, + "learning_rate": 9.071874050359272e-07, + "loss": 0.2482, + "num_input_tokens_seen": 183857088, + "step": 58415 + }, + { + "epoch": 3.7398373983739837, + "grad_norm": 10.629131317138672, + "learning_rate": 9.067568576672167e-07, + "loss": 0.2929, + "num_input_tokens_seen": 183872384, + "step": 58420 + }, + { + "epoch": 3.7401574803149606, + "grad_norm": 7.433160305023193, + "learning_rate": 9.063263898567709e-07, + "loss": 0.1294, + "num_input_tokens_seen": 183887680, + "step": 58425 + }, + { + "epoch": 3.7404775622559376, + "grad_norm": 5.261552810668945, + "learning_rate": 9.05896001626084e-07, + "loss": 0.1621, + "num_input_tokens_seen": 183903424, + "step": 58430 + }, + { + "epoch": 3.740797644196914, + "grad_norm": 8.107470512390137, + "learning_rate": 9.054656929966474e-07, + "loss": 0.1688, + "num_input_tokens_seen": 183919296, + "step": 58435 + }, + { + "epoch": 3.741117726137891, + "grad_norm": 5.310460090637207, + "learning_rate": 9.050354639899484e-07, + "loss": 0.1352, + "num_input_tokens_seen": 183933760, + "step": 58440 + }, + { + "epoch": 3.741437808078868, + "grad_norm": 10.585951805114746, + "learning_rate": 9.046053146274702e-07, + "loss": 0.1704, + "num_input_tokens_seen": 183948992, + "step": 58445 + }, + { + "epoch": 3.741757890019845, + "grad_norm": 11.72725772857666, + "learning_rate": 9.041752449306928e-07, + "loss": 0.2059, + "num_input_tokens_seen": 183965312, + "step": 58450 + }, + { + "epoch": 3.742077971960822, + "grad_norm": 4.029614448547363, + "learning_rate": 9.037452549210915e-07, + "loss": 0.113, + "num_input_tokens_seen": 183980480, + "step": 58455 + }, + { + "epoch": 3.742398053901799, + "grad_norm": 6.748981475830078, + "learning_rate": 9.033153446201365e-07, + "loss": 0.1869, + "num_input_tokens_seen": 183996096, + "step": 58460 + }, + { + "epoch": 3.7427181358427757, + "grad_norm": 6.0325608253479, + "learning_rate": 9.02885514049297e-07, + "loss": 0.1666, + "num_input_tokens_seen": 184011264, + "step": 58465 + }, + { + "epoch": 3.7430382177837527, + "grad_norm": 9.156265258789062, + "learning_rate": 9.024557632300362e-07, + "loss": 0.1623, + "num_input_tokens_seen": 184026688, + "step": 58470 + }, + { + "epoch": 3.7433582997247297, + "grad_norm": 27.63103485107422, + "learning_rate": 9.020260921838131e-07, + "loss": 0.1249, + "num_input_tokens_seen": 184042048, + "step": 58475 + }, + { + "epoch": 3.743678381665706, + "grad_norm": 4.682126045227051, + "learning_rate": 9.015965009320834e-07, + "loss": 0.1691, + "num_input_tokens_seen": 184057472, + "step": 58480 + }, + { + "epoch": 3.743998463606683, + "grad_norm": 6.144904136657715, + "learning_rate": 9.011669894962988e-07, + "loss": 0.1338, + "num_input_tokens_seen": 184073344, + "step": 58485 + }, + { + "epoch": 3.74431854554766, + "grad_norm": 5.6010918617248535, + "learning_rate": 9.007375578979066e-07, + "loss": 0.1715, + "num_input_tokens_seen": 184088704, + "step": 58490 + }, + { + "epoch": 3.744638627488637, + "grad_norm": 10.422874450683594, + "learning_rate": 9.003082061583498e-07, + "loss": 0.1331, + "num_input_tokens_seen": 184104064, + "step": 58495 + }, + { + "epoch": 3.744958709429614, + "grad_norm": 3.8802037239074707, + "learning_rate": 8.998789342990702e-07, + "loss": 0.1823, + "num_input_tokens_seen": 184119232, + "step": 58500 + }, + { + "epoch": 3.7452787913705907, + "grad_norm": 7.273350238800049, + "learning_rate": 8.994497423415005e-07, + "loss": 0.1629, + "num_input_tokens_seen": 184135360, + "step": 58505 + }, + { + "epoch": 3.7455988733115677, + "grad_norm": 6.586413383483887, + "learning_rate": 8.990206303070742e-07, + "loss": 0.1444, + "num_input_tokens_seen": 184152192, + "step": 58510 + }, + { + "epoch": 3.7459189552525447, + "grad_norm": 6.900683879852295, + "learning_rate": 8.985915982172186e-07, + "loss": 0.1326, + "num_input_tokens_seen": 184168256, + "step": 58515 + }, + { + "epoch": 3.7462390371935217, + "grad_norm": 6.771989822387695, + "learning_rate": 8.981626460933562e-07, + "loss": 0.2129, + "num_input_tokens_seen": 184184320, + "step": 58520 + }, + { + "epoch": 3.7465591191344982, + "grad_norm": 6.6078715324401855, + "learning_rate": 8.977337739569094e-07, + "loss": 0.1328, + "num_input_tokens_seen": 184199552, + "step": 58525 + }, + { + "epoch": 3.7468792010754752, + "grad_norm": 3.531680107116699, + "learning_rate": 8.973049818292903e-07, + "loss": 0.0902, + "num_input_tokens_seen": 184215104, + "step": 58530 + }, + { + "epoch": 3.747199283016452, + "grad_norm": 4.55839729309082, + "learning_rate": 8.968762697319141e-07, + "loss": 0.2247, + "num_input_tokens_seen": 184230592, + "step": 58535 + }, + { + "epoch": 3.747519364957429, + "grad_norm": 7.954013824462891, + "learning_rate": 8.964476376861847e-07, + "loss": 0.1407, + "num_input_tokens_seen": 184246848, + "step": 58540 + }, + { + "epoch": 3.747839446898406, + "grad_norm": 6.746589183807373, + "learning_rate": 8.960190857135088e-07, + "loss": 0.2023, + "num_input_tokens_seen": 184262656, + "step": 58545 + }, + { + "epoch": 3.7481595288393827, + "grad_norm": 10.631874084472656, + "learning_rate": 8.955906138352846e-07, + "loss": 0.2425, + "num_input_tokens_seen": 184279744, + "step": 58550 + }, + { + "epoch": 3.7484796107803597, + "grad_norm": 6.643558025360107, + "learning_rate": 8.951622220729084e-07, + "loss": 0.2124, + "num_input_tokens_seen": 184294336, + "step": 58555 + }, + { + "epoch": 3.7487996927213367, + "grad_norm": 8.523418426513672, + "learning_rate": 8.947339104477712e-07, + "loss": 0.2263, + "num_input_tokens_seen": 184310336, + "step": 58560 + }, + { + "epoch": 3.7491197746623133, + "grad_norm": 5.187684059143066, + "learning_rate": 8.943056789812612e-07, + "loss": 0.1727, + "num_input_tokens_seen": 184324864, + "step": 58565 + }, + { + "epoch": 3.7494398566032903, + "grad_norm": 25.383750915527344, + "learning_rate": 8.938775276947618e-07, + "loss": 0.1008, + "num_input_tokens_seen": 184340800, + "step": 58570 + }, + { + "epoch": 3.7497599385442673, + "grad_norm": 8.599325180053711, + "learning_rate": 8.934494566096527e-07, + "loss": 0.1586, + "num_input_tokens_seen": 184356096, + "step": 58575 + }, + { + "epoch": 3.7500800204852442, + "grad_norm": 7.764920234680176, + "learning_rate": 8.930214657473085e-07, + "loss": 0.1676, + "num_input_tokens_seen": 184371968, + "step": 58580 + }, + { + "epoch": 3.7504001024262212, + "grad_norm": 8.691716194152832, + "learning_rate": 8.925935551291029e-07, + "loss": 0.1847, + "num_input_tokens_seen": 184387328, + "step": 58585 + }, + { + "epoch": 3.7507201843671982, + "grad_norm": 6.499695777893066, + "learning_rate": 8.921657247764027e-07, + "loss": 0.1324, + "num_input_tokens_seen": 184402752, + "step": 58590 + }, + { + "epoch": 3.7507201843671982, + "eval_loss": 0.5466420650482178, + "eval_runtime": 51.0173, + "eval_samples_per_second": 272.182, + "eval_steps_per_second": 34.028, + "num_input_tokens_seen": 184402752, + "step": 58590 + }, + { + "epoch": 3.7510402663081748, + "grad_norm": 5.922232627868652, + "learning_rate": 8.917379747105709e-07, + "loss": 0.1472, + "num_input_tokens_seen": 184417984, + "step": 58595 + }, + { + "epoch": 3.7513603482491518, + "grad_norm": 10.027692794799805, + "learning_rate": 8.913103049529676e-07, + "loss": 0.1734, + "num_input_tokens_seen": 184434176, + "step": 58600 + }, + { + "epoch": 3.7516804301901288, + "grad_norm": 7.129265785217285, + "learning_rate": 8.908827155249475e-07, + "loss": 0.1154, + "num_input_tokens_seen": 184449792, + "step": 58605 + }, + { + "epoch": 3.7520005121311053, + "grad_norm": 7.2519707679748535, + "learning_rate": 8.904552064478647e-07, + "loss": 0.1888, + "num_input_tokens_seen": 184465472, + "step": 58610 + }, + { + "epoch": 3.7523205940720823, + "grad_norm": 9.71768569946289, + "learning_rate": 8.900277777430636e-07, + "loss": 0.1765, + "num_input_tokens_seen": 184480832, + "step": 58615 + }, + { + "epoch": 3.7526406760130593, + "grad_norm": 10.126609802246094, + "learning_rate": 8.896004294318906e-07, + "loss": 0.1907, + "num_input_tokens_seen": 184496000, + "step": 58620 + }, + { + "epoch": 3.7529607579540363, + "grad_norm": 12.526363372802734, + "learning_rate": 8.891731615356822e-07, + "loss": 0.1515, + "num_input_tokens_seen": 184510720, + "step": 58625 + }, + { + "epoch": 3.7532808398950133, + "grad_norm": 8.690206527709961, + "learning_rate": 8.887459740757767e-07, + "loss": 0.16, + "num_input_tokens_seen": 184525120, + "step": 58630 + }, + { + "epoch": 3.7536009218359903, + "grad_norm": 8.437463760375977, + "learning_rate": 8.883188670735043e-07, + "loss": 0.1823, + "num_input_tokens_seen": 184540160, + "step": 58635 + }, + { + "epoch": 3.753921003776967, + "grad_norm": 11.459362030029297, + "learning_rate": 8.878918405501921e-07, + "loss": 0.1471, + "num_input_tokens_seen": 184554944, + "step": 58640 + }, + { + "epoch": 3.754241085717944, + "grad_norm": 4.854763984680176, + "learning_rate": 8.874648945271658e-07, + "loss": 0.156, + "num_input_tokens_seen": 184571584, + "step": 58645 + }, + { + "epoch": 3.754561167658921, + "grad_norm": 7.479836463928223, + "learning_rate": 8.870380290257416e-07, + "loss": 0.1775, + "num_input_tokens_seen": 184586240, + "step": 58650 + }, + { + "epoch": 3.7548812495998973, + "grad_norm": 6.448084354400635, + "learning_rate": 8.866112440672375e-07, + "loss": 0.1593, + "num_input_tokens_seen": 184600832, + "step": 58655 + }, + { + "epoch": 3.7552013315408743, + "grad_norm": 6.43918514251709, + "learning_rate": 8.86184539672964e-07, + "loss": 0.1339, + "num_input_tokens_seen": 184616960, + "step": 58660 + }, + { + "epoch": 3.7555214134818513, + "grad_norm": 5.552137851715088, + "learning_rate": 8.857579158642285e-07, + "loss": 0.1753, + "num_input_tokens_seen": 184632576, + "step": 58665 + }, + { + "epoch": 3.7558414954228283, + "grad_norm": 3.9686763286590576, + "learning_rate": 8.853313726623344e-07, + "loss": 0.1055, + "num_input_tokens_seen": 184648896, + "step": 58670 + }, + { + "epoch": 3.7561615773638053, + "grad_norm": 10.491434097290039, + "learning_rate": 8.849049100885812e-07, + "loss": 0.1847, + "num_input_tokens_seen": 184664256, + "step": 58675 + }, + { + "epoch": 3.7564816593047823, + "grad_norm": 6.153480052947998, + "learning_rate": 8.84478528164264e-07, + "loss": 0.4096, + "num_input_tokens_seen": 184679488, + "step": 58680 + }, + { + "epoch": 3.756801741245759, + "grad_norm": 14.517176628112793, + "learning_rate": 8.840522269106741e-07, + "loss": 0.1922, + "num_input_tokens_seen": 184695424, + "step": 58685 + }, + { + "epoch": 3.757121823186736, + "grad_norm": 9.710278511047363, + "learning_rate": 8.836260063490978e-07, + "loss": 0.1835, + "num_input_tokens_seen": 184710336, + "step": 58690 + }, + { + "epoch": 3.757441905127713, + "grad_norm": 3.0801494121551514, + "learning_rate": 8.831998665008204e-07, + "loss": 0.1775, + "num_input_tokens_seen": 184726272, + "step": 58695 + }, + { + "epoch": 3.7577619870686894, + "grad_norm": 5.0849528312683105, + "learning_rate": 8.827738073871203e-07, + "loss": 0.1546, + "num_input_tokens_seen": 184741696, + "step": 58700 + }, + { + "epoch": 3.7580820690096663, + "grad_norm": 8.897178649902344, + "learning_rate": 8.823478290292719e-07, + "loss": 0.2376, + "num_input_tokens_seen": 184756416, + "step": 58705 + }, + { + "epoch": 3.7584021509506433, + "grad_norm": 4.443512916564941, + "learning_rate": 8.819219314485472e-07, + "loss": 0.157, + "num_input_tokens_seen": 184772032, + "step": 58710 + }, + { + "epoch": 3.7587222328916203, + "grad_norm": 13.514322280883789, + "learning_rate": 8.814961146662133e-07, + "loss": 0.1521, + "num_input_tokens_seen": 184788736, + "step": 58715 + }, + { + "epoch": 3.7590423148325973, + "grad_norm": 7.382718086242676, + "learning_rate": 8.810703787035324e-07, + "loss": 0.1624, + "num_input_tokens_seen": 184804224, + "step": 58720 + }, + { + "epoch": 3.7593623967735743, + "grad_norm": 5.254143238067627, + "learning_rate": 8.806447235817633e-07, + "loss": 0.1646, + "num_input_tokens_seen": 184819584, + "step": 58725 + }, + { + "epoch": 3.759682478714551, + "grad_norm": 8.039206504821777, + "learning_rate": 8.802191493221638e-07, + "loss": 0.1719, + "num_input_tokens_seen": 184835200, + "step": 58730 + }, + { + "epoch": 3.760002560655528, + "grad_norm": 6.860873222351074, + "learning_rate": 8.79793655945981e-07, + "loss": 0.1877, + "num_input_tokens_seen": 184850816, + "step": 58735 + }, + { + "epoch": 3.760322642596505, + "grad_norm": 4.472239017486572, + "learning_rate": 8.793682434744647e-07, + "loss": 0.1455, + "num_input_tokens_seen": 184866048, + "step": 58740 + }, + { + "epoch": 3.7606427245374814, + "grad_norm": 7.497593402862549, + "learning_rate": 8.789429119288564e-07, + "loss": 0.2206, + "num_input_tokens_seen": 184880960, + "step": 58745 + }, + { + "epoch": 3.7609628064784584, + "grad_norm": 6.581731796264648, + "learning_rate": 8.785176613303955e-07, + "loss": 0.1905, + "num_input_tokens_seen": 184897344, + "step": 58750 + }, + { + "epoch": 3.7612828884194354, + "grad_norm": 10.047319412231445, + "learning_rate": 8.780924917003164e-07, + "loss": 0.162, + "num_input_tokens_seen": 184911808, + "step": 58755 + }, + { + "epoch": 3.7616029703604124, + "grad_norm": 5.149494647979736, + "learning_rate": 8.776674030598494e-07, + "loss": 0.2049, + "num_input_tokens_seen": 184926976, + "step": 58760 + }, + { + "epoch": 3.7619230523013893, + "grad_norm": 4.338736534118652, + "learning_rate": 8.772423954302237e-07, + "loss": 0.1463, + "num_input_tokens_seen": 184942656, + "step": 58765 + }, + { + "epoch": 3.762243134242366, + "grad_norm": 46.94009780883789, + "learning_rate": 8.768174688326583e-07, + "loss": 0.1993, + "num_input_tokens_seen": 184959232, + "step": 58770 + }, + { + "epoch": 3.762563216183343, + "grad_norm": 6.5091142654418945, + "learning_rate": 8.763926232883743e-07, + "loss": 0.1141, + "num_input_tokens_seen": 184974592, + "step": 58775 + }, + { + "epoch": 3.76288329812432, + "grad_norm": 7.268730640411377, + "learning_rate": 8.75967858818586e-07, + "loss": 0.1266, + "num_input_tokens_seen": 184989632, + "step": 58780 + }, + { + "epoch": 3.763203380065297, + "grad_norm": 5.365929126739502, + "learning_rate": 8.755431754445035e-07, + "loss": 0.2549, + "num_input_tokens_seen": 185006464, + "step": 58785 + }, + { + "epoch": 3.7635234620062734, + "grad_norm": 12.804243087768555, + "learning_rate": 8.751185731873329e-07, + "loss": 0.1331, + "num_input_tokens_seen": 185021376, + "step": 58790 + }, + { + "epoch": 3.7638435439472504, + "grad_norm": 6.2894606590271, + "learning_rate": 8.746940520682775e-07, + "loss": 0.2821, + "num_input_tokens_seen": 185036032, + "step": 58795 + }, + { + "epoch": 3.7641636258882274, + "grad_norm": 6.856014251708984, + "learning_rate": 8.742696121085343e-07, + "loss": 0.1156, + "num_input_tokens_seen": 185051520, + "step": 58800 + }, + { + "epoch": 3.7644837078292044, + "grad_norm": 7.967469692230225, + "learning_rate": 8.738452533293002e-07, + "loss": 0.1415, + "num_input_tokens_seen": 185067776, + "step": 58805 + }, + { + "epoch": 3.7648037897701814, + "grad_norm": 6.445944786071777, + "learning_rate": 8.73420975751762e-07, + "loss": 0.1482, + "num_input_tokens_seen": 185082944, + "step": 58810 + }, + { + "epoch": 3.765123871711158, + "grad_norm": 17.764863967895508, + "learning_rate": 8.72996779397109e-07, + "loss": 0.1886, + "num_input_tokens_seen": 185098944, + "step": 58815 + }, + { + "epoch": 3.765443953652135, + "grad_norm": 5.468904972076416, + "learning_rate": 8.725726642865215e-07, + "loss": 0.178, + "num_input_tokens_seen": 185112896, + "step": 58820 + }, + { + "epoch": 3.765764035593112, + "grad_norm": 6.897830963134766, + "learning_rate": 8.721486304411786e-07, + "loss": 0.2024, + "num_input_tokens_seen": 185127296, + "step": 58825 + }, + { + "epoch": 3.766084117534089, + "grad_norm": 10.334187507629395, + "learning_rate": 8.717246778822538e-07, + "loss": 0.1694, + "num_input_tokens_seen": 185143040, + "step": 58830 + }, + { + "epoch": 3.7664041994750654, + "grad_norm": 9.249602317810059, + "learning_rate": 8.713008066309164e-07, + "loss": 0.1773, + "num_input_tokens_seen": 185160768, + "step": 58835 + }, + { + "epoch": 3.7667242814160424, + "grad_norm": 4.9276604652404785, + "learning_rate": 8.708770167083346e-07, + "loss": 0.0715, + "num_input_tokens_seen": 185175936, + "step": 58840 + }, + { + "epoch": 3.7670443633570194, + "grad_norm": 3.8204734325408936, + "learning_rate": 8.704533081356676e-07, + "loss": 0.1279, + "num_input_tokens_seen": 185190720, + "step": 58845 + }, + { + "epoch": 3.7673644452979964, + "grad_norm": 10.252110481262207, + "learning_rate": 8.700296809340756e-07, + "loss": 0.1493, + "num_input_tokens_seen": 185207296, + "step": 58850 + }, + { + "epoch": 3.7676845272389734, + "grad_norm": 10.919593811035156, + "learning_rate": 8.696061351247095e-07, + "loss": 0.2158, + "num_input_tokens_seen": 185222272, + "step": 58855 + }, + { + "epoch": 3.76800460917995, + "grad_norm": 10.359292984008789, + "learning_rate": 8.691826707287215e-07, + "loss": 0.129, + "num_input_tokens_seen": 185237760, + "step": 58860 + }, + { + "epoch": 3.768324691120927, + "grad_norm": 7.175742149353027, + "learning_rate": 8.687592877672566e-07, + "loss": 0.1215, + "num_input_tokens_seen": 185252992, + "step": 58865 + }, + { + "epoch": 3.768644773061904, + "grad_norm": 8.253138542175293, + "learning_rate": 8.683359862614548e-07, + "loss": 0.204, + "num_input_tokens_seen": 185268288, + "step": 58870 + }, + { + "epoch": 3.7689648550028805, + "grad_norm": 7.55596923828125, + "learning_rate": 8.679127662324569e-07, + "loss": 0.1527, + "num_input_tokens_seen": 185285312, + "step": 58875 + }, + { + "epoch": 3.7692849369438575, + "grad_norm": 10.304444313049316, + "learning_rate": 8.674896277013922e-07, + "loss": 0.1456, + "num_input_tokens_seen": 185301312, + "step": 58880 + }, + { + "epoch": 3.7696050188848345, + "grad_norm": 8.269447326660156, + "learning_rate": 8.67066570689393e-07, + "loss": 0.1527, + "num_input_tokens_seen": 185315776, + "step": 58885 + }, + { + "epoch": 3.7699251008258114, + "grad_norm": 4.8624114990234375, + "learning_rate": 8.666435952175837e-07, + "loss": 0.114, + "num_input_tokens_seen": 185332032, + "step": 58890 + }, + { + "epoch": 3.7702451827667884, + "grad_norm": 8.21312141418457, + "learning_rate": 8.662207013070853e-07, + "loss": 0.1257, + "num_input_tokens_seen": 185347456, + "step": 58895 + }, + { + "epoch": 3.7705652647077654, + "grad_norm": 10.794302940368652, + "learning_rate": 8.65797888979015e-07, + "loss": 0.169, + "num_input_tokens_seen": 185363904, + "step": 58900 + }, + { + "epoch": 3.770885346648742, + "grad_norm": 6.989053726196289, + "learning_rate": 8.653751582544859e-07, + "loss": 0.1429, + "num_input_tokens_seen": 185379392, + "step": 58905 + }, + { + "epoch": 3.771205428589719, + "grad_norm": 9.433000564575195, + "learning_rate": 8.649525091546071e-07, + "loss": 0.1909, + "num_input_tokens_seen": 185395968, + "step": 58910 + }, + { + "epoch": 3.771525510530696, + "grad_norm": 8.021950721740723, + "learning_rate": 8.645299417004832e-07, + "loss": 0.2106, + "num_input_tokens_seen": 185410944, + "step": 58915 + }, + { + "epoch": 3.7718455924716725, + "grad_norm": 9.71424388885498, + "learning_rate": 8.641074559132142e-07, + "loss": 0.1437, + "num_input_tokens_seen": 185426816, + "step": 58920 + }, + { + "epoch": 3.7721656744126495, + "grad_norm": 23.32000160217285, + "learning_rate": 8.636850518138997e-07, + "loss": 0.2352, + "num_input_tokens_seen": 185441600, + "step": 58925 + }, + { + "epoch": 3.7724857563536265, + "grad_norm": 6.49536657333374, + "learning_rate": 8.632627294236287e-07, + "loss": 0.1437, + "num_input_tokens_seen": 185456192, + "step": 58930 + }, + { + "epoch": 3.7728058382946035, + "grad_norm": 3.899705171585083, + "learning_rate": 8.628404887634926e-07, + "loss": 0.1591, + "num_input_tokens_seen": 185471680, + "step": 58935 + }, + { + "epoch": 3.7731259202355805, + "grad_norm": 8.151461601257324, + "learning_rate": 8.624183298545744e-07, + "loss": 0.1462, + "num_input_tokens_seen": 185486528, + "step": 58940 + }, + { + "epoch": 3.7734460021765575, + "grad_norm": 12.920125007629395, + "learning_rate": 8.619962527179551e-07, + "loss": 0.1322, + "num_input_tokens_seen": 185502080, + "step": 58945 + }, + { + "epoch": 3.773766084117534, + "grad_norm": 11.698392868041992, + "learning_rate": 8.61574257374711e-07, + "loss": 0.2411, + "num_input_tokens_seen": 185518976, + "step": 58950 + }, + { + "epoch": 3.774086166058511, + "grad_norm": 9.13380241394043, + "learning_rate": 8.611523438459135e-07, + "loss": 0.1189, + "num_input_tokens_seen": 185534016, + "step": 58955 + }, + { + "epoch": 3.774406247999488, + "grad_norm": 5.891596794128418, + "learning_rate": 8.60730512152633e-07, + "loss": 0.1929, + "num_input_tokens_seen": 185550720, + "step": 58960 + }, + { + "epoch": 3.7747263299404645, + "grad_norm": 5.266009330749512, + "learning_rate": 8.603087623159303e-07, + "loss": 0.1363, + "num_input_tokens_seen": 185566080, + "step": 58965 + }, + { + "epoch": 3.7750464118814415, + "grad_norm": 22.958690643310547, + "learning_rate": 8.598870943568682e-07, + "loss": 0.1328, + "num_input_tokens_seen": 185581824, + "step": 58970 + }, + { + "epoch": 3.7753664938224185, + "grad_norm": 57.49211502075195, + "learning_rate": 8.594655082965017e-07, + "loss": 0.2467, + "num_input_tokens_seen": 185597056, + "step": 58975 + }, + { + "epoch": 3.7756865757633955, + "grad_norm": 9.691339492797852, + "learning_rate": 8.590440041558823e-07, + "loss": 0.1799, + "num_input_tokens_seen": 185611584, + "step": 58980 + }, + { + "epoch": 3.7760066577043725, + "grad_norm": 12.70793628692627, + "learning_rate": 8.586225819560578e-07, + "loss": 0.2062, + "num_input_tokens_seen": 185627712, + "step": 58985 + }, + { + "epoch": 3.7763267396453495, + "grad_norm": 7.667860984802246, + "learning_rate": 8.582012417180713e-07, + "loss": 0.165, + "num_input_tokens_seen": 185643520, + "step": 58990 + }, + { + "epoch": 3.776646821586326, + "grad_norm": 7.465511798858643, + "learning_rate": 8.577799834629647e-07, + "loss": 0.1311, + "num_input_tokens_seen": 185659008, + "step": 58995 + }, + { + "epoch": 3.776966903527303, + "grad_norm": 7.531183242797852, + "learning_rate": 8.573588072117697e-07, + "loss": 0.1303, + "num_input_tokens_seen": 185674368, + "step": 59000 + }, + { + "epoch": 3.77728698546828, + "grad_norm": 5.224091053009033, + "learning_rate": 8.569377129855208e-07, + "loss": 0.1685, + "num_input_tokens_seen": 185690816, + "step": 59005 + }, + { + "epoch": 3.7776070674092566, + "grad_norm": 8.431654930114746, + "learning_rate": 8.565167008052438e-07, + "loss": 0.1551, + "num_input_tokens_seen": 185705408, + "step": 59010 + }, + { + "epoch": 3.7779271493502335, + "grad_norm": 5.358313083648682, + "learning_rate": 8.560957706919626e-07, + "loss": 0.1196, + "num_input_tokens_seen": 185720448, + "step": 59015 + }, + { + "epoch": 3.7782472312912105, + "grad_norm": 6.779602527618408, + "learning_rate": 8.556749226666955e-07, + "loss": 0.1554, + "num_input_tokens_seen": 185737344, + "step": 59020 + }, + { + "epoch": 3.7785673132321875, + "grad_norm": 5.312948703765869, + "learning_rate": 8.55254156750458e-07, + "loss": 0.1578, + "num_input_tokens_seen": 185754624, + "step": 59025 + }, + { + "epoch": 3.7788873951731645, + "grad_norm": 4.795000076293945, + "learning_rate": 8.548334729642599e-07, + "loss": 0.1406, + "num_input_tokens_seen": 185769344, + "step": 59030 + }, + { + "epoch": 3.779207477114141, + "grad_norm": 6.555222988128662, + "learning_rate": 8.544128713291102e-07, + "loss": 0.1151, + "num_input_tokens_seen": 185784704, + "step": 59035 + }, + { + "epoch": 3.779527559055118, + "grad_norm": 12.626343727111816, + "learning_rate": 8.539923518660087e-07, + "loss": 0.2172, + "num_input_tokens_seen": 185799872, + "step": 59040 + }, + { + "epoch": 3.779847640996095, + "grad_norm": 54.845542907714844, + "learning_rate": 8.535719145959561e-07, + "loss": 0.2514, + "num_input_tokens_seen": 185815424, + "step": 59045 + }, + { + "epoch": 3.780167722937072, + "grad_norm": 26.484928131103516, + "learning_rate": 8.531515595399464e-07, + "loss": 0.2081, + "num_input_tokens_seen": 185830848, + "step": 59050 + }, + { + "epoch": 3.7804878048780486, + "grad_norm": 6.473377227783203, + "learning_rate": 8.527312867189694e-07, + "loss": 0.1937, + "num_input_tokens_seen": 185845760, + "step": 59055 + }, + { + "epoch": 3.7808078868190256, + "grad_norm": 10.961737632751465, + "learning_rate": 8.523110961540118e-07, + "loss": 0.2577, + "num_input_tokens_seen": 185861056, + "step": 59060 + }, + { + "epoch": 3.7811279687600026, + "grad_norm": 8.307049751281738, + "learning_rate": 8.518909878660547e-07, + "loss": 0.1585, + "num_input_tokens_seen": 185876928, + "step": 59065 + }, + { + "epoch": 3.7814480507009796, + "grad_norm": 6.90482759475708, + "learning_rate": 8.514709618760789e-07, + "loss": 0.1739, + "num_input_tokens_seen": 185892544, + "step": 59070 + }, + { + "epoch": 3.7817681326419565, + "grad_norm": 9.261107444763184, + "learning_rate": 8.510510182050544e-07, + "loss": 0.1742, + "num_input_tokens_seen": 185908608, + "step": 59075 + }, + { + "epoch": 3.782088214582933, + "grad_norm": 17.859546661376953, + "learning_rate": 8.506311568739547e-07, + "loss": 0.1917, + "num_input_tokens_seen": 185924800, + "step": 59080 + }, + { + "epoch": 3.78240829652391, + "grad_norm": 8.769471168518066, + "learning_rate": 8.502113779037421e-07, + "loss": 0.1868, + "num_input_tokens_seen": 185940864, + "step": 59085 + }, + { + "epoch": 3.782728378464887, + "grad_norm": 3.3883743286132812, + "learning_rate": 8.497916813153809e-07, + "loss": 0.1681, + "num_input_tokens_seen": 185956928, + "step": 59090 + }, + { + "epoch": 3.783048460405864, + "grad_norm": 8.025883674621582, + "learning_rate": 8.493720671298272e-07, + "loss": 0.1297, + "num_input_tokens_seen": 185972864, + "step": 59095 + }, + { + "epoch": 3.7833685423468406, + "grad_norm": 4.404480934143066, + "learning_rate": 8.489525353680348e-07, + "loss": 0.2017, + "num_input_tokens_seen": 185988416, + "step": 59100 + }, + { + "epoch": 3.7836886242878176, + "grad_norm": 49.00554656982422, + "learning_rate": 8.485330860509527e-07, + "loss": 0.1072, + "num_input_tokens_seen": 186004224, + "step": 59105 + }, + { + "epoch": 3.7840087062287946, + "grad_norm": 6.975579738616943, + "learning_rate": 8.481137191995254e-07, + "loss": 0.1486, + "num_input_tokens_seen": 186019584, + "step": 59110 + }, + { + "epoch": 3.7843287881697716, + "grad_norm": 6.622138023376465, + "learning_rate": 8.476944348346955e-07, + "loss": 0.1568, + "num_input_tokens_seen": 186034752, + "step": 59115 + }, + { + "epoch": 3.7846488701107486, + "grad_norm": 10.754019737243652, + "learning_rate": 8.472752329773989e-07, + "loss": 0.1745, + "num_input_tokens_seen": 186049984, + "step": 59120 + }, + { + "epoch": 3.784968952051725, + "grad_norm": 13.944219589233398, + "learning_rate": 8.468561136485684e-07, + "loss": 0.1704, + "num_input_tokens_seen": 186065472, + "step": 59125 + }, + { + "epoch": 3.785289033992702, + "grad_norm": 1.4396259784698486, + "learning_rate": 8.464370768691327e-07, + "loss": 0.1607, + "num_input_tokens_seen": 186081920, + "step": 59130 + }, + { + "epoch": 3.785609115933679, + "grad_norm": 8.357316970825195, + "learning_rate": 8.460181226600161e-07, + "loss": 0.1746, + "num_input_tokens_seen": 186098880, + "step": 59135 + }, + { + "epoch": 3.7859291978746556, + "grad_norm": 5.5183234214782715, + "learning_rate": 8.455992510421396e-07, + "loss": 0.1381, + "num_input_tokens_seen": 186114752, + "step": 59140 + }, + { + "epoch": 3.7862492798156326, + "grad_norm": 24.249441146850586, + "learning_rate": 8.451804620364187e-07, + "loss": 0.1937, + "num_input_tokens_seen": 186130304, + "step": 59145 + }, + { + "epoch": 3.7865693617566096, + "grad_norm": 4.675798416137695, + "learning_rate": 8.447617556637652e-07, + "loss": 0.2016, + "num_input_tokens_seen": 186145984, + "step": 59150 + }, + { + "epoch": 3.7868894436975866, + "grad_norm": 7.433457851409912, + "learning_rate": 8.443431319450893e-07, + "loss": 0.1861, + "num_input_tokens_seen": 186162240, + "step": 59155 + }, + { + "epoch": 3.7872095256385636, + "grad_norm": 5.284157752990723, + "learning_rate": 8.439245909012919e-07, + "loss": 0.13, + "num_input_tokens_seen": 186179200, + "step": 59160 + }, + { + "epoch": 3.7875296075795406, + "grad_norm": 6.652060508728027, + "learning_rate": 8.435061325532751e-07, + "loss": 0.1823, + "num_input_tokens_seen": 186193920, + "step": 59165 + }, + { + "epoch": 3.787849689520517, + "grad_norm": 6.157620429992676, + "learning_rate": 8.430877569219334e-07, + "loss": 0.1573, + "num_input_tokens_seen": 186209216, + "step": 59170 + }, + { + "epoch": 3.788169771461494, + "grad_norm": 6.298346996307373, + "learning_rate": 8.426694640281588e-07, + "loss": 0.1624, + "num_input_tokens_seen": 186225216, + "step": 59175 + }, + { + "epoch": 3.788489853402471, + "grad_norm": 6.015912055969238, + "learning_rate": 8.422512538928384e-07, + "loss": 0.1514, + "num_input_tokens_seen": 186240832, + "step": 59180 + }, + { + "epoch": 3.7888099353434477, + "grad_norm": 9.6181001663208, + "learning_rate": 8.418331265368548e-07, + "loss": 0.1732, + "num_input_tokens_seen": 186256128, + "step": 59185 + }, + { + "epoch": 3.7891300172844247, + "grad_norm": 10.43565845489502, + "learning_rate": 8.414150819810893e-07, + "loss": 0.1985, + "num_input_tokens_seen": 186273600, + "step": 59190 + }, + { + "epoch": 3.7894500992254017, + "grad_norm": 3.11832332611084, + "learning_rate": 8.409971202464138e-07, + "loss": 0.1339, + "num_input_tokens_seen": 186289920, + "step": 59195 + }, + { + "epoch": 3.7897701811663786, + "grad_norm": 4.6513824462890625, + "learning_rate": 8.405792413537014e-07, + "loss": 0.1111, + "num_input_tokens_seen": 186306816, + "step": 59200 + }, + { + "epoch": 3.7900902631073556, + "grad_norm": 4.737183570861816, + "learning_rate": 8.40161445323818e-07, + "loss": 0.18, + "num_input_tokens_seen": 186324160, + "step": 59205 + }, + { + "epoch": 3.7904103450483326, + "grad_norm": 5.155023097991943, + "learning_rate": 8.397437321776264e-07, + "loss": 0.136, + "num_input_tokens_seen": 186340224, + "step": 59210 + }, + { + "epoch": 3.790730426989309, + "grad_norm": 4.891942977905273, + "learning_rate": 8.393261019359849e-07, + "loss": 0.1257, + "num_input_tokens_seen": 186356928, + "step": 59215 + }, + { + "epoch": 3.791050508930286, + "grad_norm": 22.896841049194336, + "learning_rate": 8.389085546197467e-07, + "loss": 0.2072, + "num_input_tokens_seen": 186372288, + "step": 59220 + }, + { + "epoch": 3.791370590871263, + "grad_norm": 99.2401351928711, + "learning_rate": 8.384910902497648e-07, + "loss": 0.164, + "num_input_tokens_seen": 186388160, + "step": 59225 + }, + { + "epoch": 3.7916906728122397, + "grad_norm": 9.153043746948242, + "learning_rate": 8.380737088468815e-07, + "loss": 0.194, + "num_input_tokens_seen": 186404800, + "step": 59230 + }, + { + "epoch": 3.7920107547532167, + "grad_norm": 10.110634803771973, + "learning_rate": 8.376564104319415e-07, + "loss": 0.1868, + "num_input_tokens_seen": 186419584, + "step": 59235 + }, + { + "epoch": 3.7923308366941937, + "grad_norm": 21.842880249023438, + "learning_rate": 8.372391950257816e-07, + "loss": 0.186, + "num_input_tokens_seen": 186434816, + "step": 59240 + }, + { + "epoch": 3.7926509186351707, + "grad_norm": 7.764504909515381, + "learning_rate": 8.368220626492352e-07, + "loss": 0.176, + "num_input_tokens_seen": 186451136, + "step": 59245 + }, + { + "epoch": 3.7929710005761477, + "grad_norm": 6.741547584533691, + "learning_rate": 8.364050133231319e-07, + "loss": 0.1469, + "num_input_tokens_seen": 186466624, + "step": 59250 + }, + { + "epoch": 3.7932910825171247, + "grad_norm": 11.792462348937988, + "learning_rate": 8.359880470682969e-07, + "loss": 0.2088, + "num_input_tokens_seen": 186482048, + "step": 59255 + }, + { + "epoch": 3.793611164458101, + "grad_norm": 3.9650678634643555, + "learning_rate": 8.355711639055502e-07, + "loss": 0.1554, + "num_input_tokens_seen": 186497856, + "step": 59260 + }, + { + "epoch": 3.793931246399078, + "grad_norm": 11.226030349731445, + "learning_rate": 8.351543638557119e-07, + "loss": 0.1703, + "num_input_tokens_seen": 186514176, + "step": 59265 + }, + { + "epoch": 3.794251328340055, + "grad_norm": 9.218916893005371, + "learning_rate": 8.34737646939591e-07, + "loss": 0.2763, + "num_input_tokens_seen": 186529344, + "step": 59270 + }, + { + "epoch": 3.7945714102810317, + "grad_norm": 11.077467918395996, + "learning_rate": 8.343210131779997e-07, + "loss": 0.2097, + "num_input_tokens_seen": 186546304, + "step": 59275 + }, + { + "epoch": 3.7948914922220087, + "grad_norm": 5.5078840255737305, + "learning_rate": 8.339044625917389e-07, + "loss": 0.1346, + "num_input_tokens_seen": 186561728, + "step": 59280 + }, + { + "epoch": 3.7952115741629857, + "grad_norm": 4.52858304977417, + "learning_rate": 8.33487995201612e-07, + "loss": 0.0934, + "num_input_tokens_seen": 186577280, + "step": 59285 + }, + { + "epoch": 3.7955316561039627, + "grad_norm": 8.608771324157715, + "learning_rate": 8.330716110284137e-07, + "loss": 0.2336, + "num_input_tokens_seen": 186594048, + "step": 59290 + }, + { + "epoch": 3.7958517380449397, + "grad_norm": 8.512740135192871, + "learning_rate": 8.32655310092936e-07, + "loss": 0.182, + "num_input_tokens_seen": 186610432, + "step": 59295 + }, + { + "epoch": 3.7961718199859162, + "grad_norm": 4.559720516204834, + "learning_rate": 8.322390924159688e-07, + "loss": 0.1275, + "num_input_tokens_seen": 186625024, + "step": 59300 + }, + { + "epoch": 3.7964919019268932, + "grad_norm": 5.620969772338867, + "learning_rate": 8.318229580182924e-07, + "loss": 0.2067, + "num_input_tokens_seen": 186640256, + "step": 59305 + }, + { + "epoch": 3.79681198386787, + "grad_norm": 4.871840953826904, + "learning_rate": 8.314069069206901e-07, + "loss": 0.1439, + "num_input_tokens_seen": 186657152, + "step": 59310 + }, + { + "epoch": 3.797132065808847, + "grad_norm": 7.990790843963623, + "learning_rate": 8.309909391439339e-07, + "loss": 0.2074, + "num_input_tokens_seen": 186672640, + "step": 59315 + }, + { + "epoch": 3.7974521477498238, + "grad_norm": 6.826452255249023, + "learning_rate": 8.305750547087974e-07, + "loss": 0.1296, + "num_input_tokens_seen": 186689664, + "step": 59320 + }, + { + "epoch": 3.7977722296908007, + "grad_norm": 5.792688846588135, + "learning_rate": 8.301592536360467e-07, + "loss": 0.174, + "num_input_tokens_seen": 186704320, + "step": 59325 + }, + { + "epoch": 3.7980923116317777, + "grad_norm": 10.887834548950195, + "learning_rate": 8.297435359464453e-07, + "loss": 0.2151, + "num_input_tokens_seen": 186719040, + "step": 59330 + }, + { + "epoch": 3.7984123935727547, + "grad_norm": 3.0850284099578857, + "learning_rate": 8.293279016607514e-07, + "loss": 0.1068, + "num_input_tokens_seen": 186735680, + "step": 59335 + }, + { + "epoch": 3.7987324755137317, + "grad_norm": 7.6348676681518555, + "learning_rate": 8.289123507997193e-07, + "loss": 0.1465, + "num_input_tokens_seen": 186750720, + "step": 59340 + }, + { + "epoch": 3.7990525574547083, + "grad_norm": 6.774335861206055, + "learning_rate": 8.284968833841009e-07, + "loss": 0.1444, + "num_input_tokens_seen": 186766720, + "step": 59345 + }, + { + "epoch": 3.7993726393956853, + "grad_norm": 11.054762840270996, + "learning_rate": 8.280814994346415e-07, + "loss": 0.1979, + "num_input_tokens_seen": 186783808, + "step": 59350 + }, + { + "epoch": 3.7996927213366622, + "grad_norm": 7.61158561706543, + "learning_rate": 8.276661989720833e-07, + "loss": 0.1956, + "num_input_tokens_seen": 186799488, + "step": 59355 + }, + { + "epoch": 3.8000128032776392, + "grad_norm": 4.952817916870117, + "learning_rate": 8.27250982017164e-07, + "loss": 0.1846, + "num_input_tokens_seen": 186815488, + "step": 59360 + }, + { + "epoch": 3.800332885218616, + "grad_norm": 4.401570796966553, + "learning_rate": 8.268358485906181e-07, + "loss": 0.1437, + "num_input_tokens_seen": 186829632, + "step": 59365 + }, + { + "epoch": 3.8006529671595928, + "grad_norm": 5.70245885848999, + "learning_rate": 8.264207987131745e-07, + "loss": 0.109, + "num_input_tokens_seen": 186844736, + "step": 59370 + }, + { + "epoch": 3.8009730491005698, + "grad_norm": 8.99044418334961, + "learning_rate": 8.260058324055586e-07, + "loss": 0.1567, + "num_input_tokens_seen": 186860416, + "step": 59375 + }, + { + "epoch": 3.8012931310415468, + "grad_norm": 8.0061616897583, + "learning_rate": 8.255909496884914e-07, + "loss": 0.2166, + "num_input_tokens_seen": 186875776, + "step": 59380 + }, + { + "epoch": 3.8016132129825237, + "grad_norm": 8.185569763183594, + "learning_rate": 8.251761505826922e-07, + "loss": 0.2555, + "num_input_tokens_seen": 186891648, + "step": 59385 + }, + { + "epoch": 3.8019332949235003, + "grad_norm": 6.642343997955322, + "learning_rate": 8.247614351088703e-07, + "loss": 0.1712, + "num_input_tokens_seen": 186907072, + "step": 59390 + }, + { + "epoch": 3.8022533768644773, + "grad_norm": 3.7654874324798584, + "learning_rate": 8.243468032877375e-07, + "loss": 0.0999, + "num_input_tokens_seen": 186923264, + "step": 59395 + }, + { + "epoch": 3.8025734588054543, + "grad_norm": 6.512526988983154, + "learning_rate": 8.239322551399967e-07, + "loss": 0.1436, + "num_input_tokens_seen": 186938688, + "step": 59400 + }, + { + "epoch": 3.802893540746431, + "grad_norm": 3.925675392150879, + "learning_rate": 8.235177906863492e-07, + "loss": 0.1095, + "num_input_tokens_seen": 186954304, + "step": 59405 + }, + { + "epoch": 3.803213622687408, + "grad_norm": 7.721930980682373, + "learning_rate": 8.231034099474905e-07, + "loss": 0.2212, + "num_input_tokens_seen": 186969024, + "step": 59410 + }, + { + "epoch": 3.803533704628385, + "grad_norm": 10.285126686096191, + "learning_rate": 8.226891129441122e-07, + "loss": 0.1865, + "num_input_tokens_seen": 186984512, + "step": 59415 + }, + { + "epoch": 3.803853786569362, + "grad_norm": 18.793363571166992, + "learning_rate": 8.222748996969044e-07, + "loss": 0.167, + "num_input_tokens_seen": 187000320, + "step": 59420 + }, + { + "epoch": 3.804173868510339, + "grad_norm": 5.875027656555176, + "learning_rate": 8.218607702265474e-07, + "loss": 0.2452, + "num_input_tokens_seen": 187015936, + "step": 59425 + }, + { + "epoch": 3.8044939504513158, + "grad_norm": 7.052250385284424, + "learning_rate": 8.214467245537233e-07, + "loss": 0.1496, + "num_input_tokens_seen": 187031232, + "step": 59430 + }, + { + "epoch": 3.8048140323922923, + "grad_norm": 7.533268928527832, + "learning_rate": 8.210327626991066e-07, + "loss": 0.1514, + "num_input_tokens_seen": 187045824, + "step": 59435 + }, + { + "epoch": 3.8051341143332693, + "grad_norm": 7.170531749725342, + "learning_rate": 8.206188846833685e-07, + "loss": 0.1598, + "num_input_tokens_seen": 187062208, + "step": 59440 + }, + { + "epoch": 3.8054541962742463, + "grad_norm": 9.687552452087402, + "learning_rate": 8.202050905271752e-07, + "loss": 0.1775, + "num_input_tokens_seen": 187077888, + "step": 59445 + }, + { + "epoch": 3.805774278215223, + "grad_norm": 13.541933059692383, + "learning_rate": 8.197913802511903e-07, + "loss": 0.1985, + "num_input_tokens_seen": 187093120, + "step": 59450 + }, + { + "epoch": 3.8060943601562, + "grad_norm": 6.967306137084961, + "learning_rate": 8.193777538760719e-07, + "loss": 0.1402, + "num_input_tokens_seen": 187109056, + "step": 59455 + }, + { + "epoch": 3.806414442097177, + "grad_norm": 5.437869548797607, + "learning_rate": 8.189642114224739e-07, + "loss": 0.1896, + "num_input_tokens_seen": 187124480, + "step": 59460 + }, + { + "epoch": 3.806734524038154, + "grad_norm": 7.89445161819458, + "learning_rate": 8.185507529110473e-07, + "loss": 0.1081, + "num_input_tokens_seen": 187140864, + "step": 59465 + }, + { + "epoch": 3.807054605979131, + "grad_norm": 21.166522979736328, + "learning_rate": 8.18137378362438e-07, + "loss": 0.1726, + "num_input_tokens_seen": 187156736, + "step": 59470 + }, + { + "epoch": 3.807374687920108, + "grad_norm": 17.133169174194336, + "learning_rate": 8.177240877972875e-07, + "loss": 0.1552, + "num_input_tokens_seen": 187171968, + "step": 59475 + }, + { + "epoch": 3.8076947698610843, + "grad_norm": 7.1981682777404785, + "learning_rate": 8.173108812362334e-07, + "loss": 0.1448, + "num_input_tokens_seen": 187187648, + "step": 59480 + }, + { + "epoch": 3.8080148518020613, + "grad_norm": 4.627009391784668, + "learning_rate": 8.168977586999091e-07, + "loss": 0.1607, + "num_input_tokens_seen": 187204416, + "step": 59485 + }, + { + "epoch": 3.8083349337430383, + "grad_norm": 9.141487121582031, + "learning_rate": 8.164847202089429e-07, + "loss": 0.1403, + "num_input_tokens_seen": 187219328, + "step": 59490 + }, + { + "epoch": 3.808655015684015, + "grad_norm": 8.292530059814453, + "learning_rate": 8.160717657839625e-07, + "loss": 0.1597, + "num_input_tokens_seen": 187234560, + "step": 59495 + }, + { + "epoch": 3.808975097624992, + "grad_norm": 8.676654815673828, + "learning_rate": 8.156588954455849e-07, + "loss": 0.1466, + "num_input_tokens_seen": 187250688, + "step": 59500 + }, + { + "epoch": 3.809295179565969, + "grad_norm": 5.628675937652588, + "learning_rate": 8.152461092144306e-07, + "loss": 0.1375, + "num_input_tokens_seen": 187266432, + "step": 59505 + }, + { + "epoch": 3.809615261506946, + "grad_norm": 12.977218627929688, + "learning_rate": 8.148334071111084e-07, + "loss": 0.1379, + "num_input_tokens_seen": 187283200, + "step": 59510 + }, + { + "epoch": 3.809935343447923, + "grad_norm": 7.030035495758057, + "learning_rate": 8.144207891562287e-07, + "loss": 0.2232, + "num_input_tokens_seen": 187299136, + "step": 59515 + }, + { + "epoch": 3.8102554253889, + "grad_norm": 8.919046401977539, + "learning_rate": 8.140082553703948e-07, + "loss": 0.146, + "num_input_tokens_seen": 187314240, + "step": 59520 + }, + { + "epoch": 3.8105755073298764, + "grad_norm": 4.432891845703125, + "learning_rate": 8.135958057742061e-07, + "loss": 0.1601, + "num_input_tokens_seen": 187329984, + "step": 59525 + }, + { + "epoch": 3.8108955892708534, + "grad_norm": 7.034800052642822, + "learning_rate": 8.131834403882605e-07, + "loss": 0.1516, + "num_input_tokens_seen": 187344960, + "step": 59530 + }, + { + "epoch": 3.8112156712118304, + "grad_norm": 16.13497543334961, + "learning_rate": 8.127711592331456e-07, + "loss": 0.194, + "num_input_tokens_seen": 187360320, + "step": 59535 + }, + { + "epoch": 3.811535753152807, + "grad_norm": 9.543824195861816, + "learning_rate": 8.123589623294522e-07, + "loss": 0.1793, + "num_input_tokens_seen": 187377088, + "step": 59540 + }, + { + "epoch": 3.811855835093784, + "grad_norm": 10.17409896850586, + "learning_rate": 8.1194684969776e-07, + "loss": 0.2101, + "num_input_tokens_seen": 187394368, + "step": 59545 + }, + { + "epoch": 3.812175917034761, + "grad_norm": 6.548429012298584, + "learning_rate": 8.115348213586505e-07, + "loss": 0.1827, + "num_input_tokens_seen": 187412224, + "step": 59550 + }, + { + "epoch": 3.812495998975738, + "grad_norm": 7.233402729034424, + "learning_rate": 8.111228773326968e-07, + "loss": 0.1453, + "num_input_tokens_seen": 187428608, + "step": 59555 + }, + { + "epoch": 3.812816080916715, + "grad_norm": 6.263277053833008, + "learning_rate": 8.107110176404697e-07, + "loss": 0.1248, + "num_input_tokens_seen": 187445184, + "step": 59560 + }, + { + "epoch": 3.8131361628576914, + "grad_norm": 6.173387050628662, + "learning_rate": 8.102992423025352e-07, + "loss": 0.1271, + "num_input_tokens_seen": 187460416, + "step": 59565 + }, + { + "epoch": 3.8134562447986684, + "grad_norm": 6.394774436950684, + "learning_rate": 8.098875513394544e-07, + "loss": 0.1845, + "num_input_tokens_seen": 187475328, + "step": 59570 + }, + { + "epoch": 3.8137763267396454, + "grad_norm": 6.251878261566162, + "learning_rate": 8.094759447717865e-07, + "loss": 0.1381, + "num_input_tokens_seen": 187491392, + "step": 59575 + }, + { + "epoch": 3.8140964086806224, + "grad_norm": 12.958412170410156, + "learning_rate": 8.090644226200845e-07, + "loss": 0.2322, + "num_input_tokens_seen": 187506880, + "step": 59580 + }, + { + "epoch": 3.814416490621599, + "grad_norm": 2.0393381118774414, + "learning_rate": 8.086529849048974e-07, + "loss": 0.1146, + "num_input_tokens_seen": 187522368, + "step": 59585 + }, + { + "epoch": 3.814736572562576, + "grad_norm": 9.78134536743164, + "learning_rate": 8.082416316467703e-07, + "loss": 0.141, + "num_input_tokens_seen": 187538560, + "step": 59590 + }, + { + "epoch": 3.815056654503553, + "grad_norm": 6.924389839172363, + "learning_rate": 8.07830362866244e-07, + "loss": 0.1886, + "num_input_tokens_seen": 187556608, + "step": 59595 + }, + { + "epoch": 3.81537673644453, + "grad_norm": 6.514430046081543, + "learning_rate": 8.074191785838554e-07, + "loss": 0.1766, + "num_input_tokens_seen": 187573568, + "step": 59600 + }, + { + "epoch": 3.815696818385507, + "grad_norm": 9.501279830932617, + "learning_rate": 8.070080788201365e-07, + "loss": 0.1894, + "num_input_tokens_seen": 187590720, + "step": 59605 + }, + { + "epoch": 3.8160169003264834, + "grad_norm": 6.443179130554199, + "learning_rate": 8.065970635956147e-07, + "loss": 0.2042, + "num_input_tokens_seen": 187606144, + "step": 59610 + }, + { + "epoch": 3.8163369822674604, + "grad_norm": 14.999528884887695, + "learning_rate": 8.061861329308168e-07, + "loss": 0.2101, + "num_input_tokens_seen": 187622592, + "step": 59615 + }, + { + "epoch": 3.8166570642084374, + "grad_norm": 7.0218071937561035, + "learning_rate": 8.05775286846259e-07, + "loss": 0.1257, + "num_input_tokens_seen": 187638080, + "step": 59620 + }, + { + "epoch": 3.8169771461494144, + "grad_norm": 3.891185998916626, + "learning_rate": 8.053645253624593e-07, + "loss": 0.117, + "num_input_tokens_seen": 187654144, + "step": 59625 + }, + { + "epoch": 3.817297228090391, + "grad_norm": 5.779004096984863, + "learning_rate": 8.049538484999281e-07, + "loss": 0.176, + "num_input_tokens_seen": 187670720, + "step": 59630 + }, + { + "epoch": 3.817617310031368, + "grad_norm": 5.145244598388672, + "learning_rate": 8.045432562791727e-07, + "loss": 0.1196, + "num_input_tokens_seen": 187686016, + "step": 59635 + }, + { + "epoch": 3.817937391972345, + "grad_norm": 7.04364013671875, + "learning_rate": 8.041327487206957e-07, + "loss": 0.1555, + "num_input_tokens_seen": 187701312, + "step": 59640 + }, + { + "epoch": 3.818257473913322, + "grad_norm": 3.0372560024261475, + "learning_rate": 8.037223258449947e-07, + "loss": 0.154, + "num_input_tokens_seen": 187717440, + "step": 59645 + }, + { + "epoch": 3.818577555854299, + "grad_norm": 6.241101264953613, + "learning_rate": 8.03311987672567e-07, + "loss": 0.1606, + "num_input_tokens_seen": 187732992, + "step": 59650 + }, + { + "epoch": 3.8188976377952755, + "grad_norm": 7.089536190032959, + "learning_rate": 8.029017342238995e-07, + "loss": 0.1504, + "num_input_tokens_seen": 187748736, + "step": 59655 + }, + { + "epoch": 3.8192177197362525, + "grad_norm": 4.873488426208496, + "learning_rate": 8.024915655194801e-07, + "loss": 0.1484, + "num_input_tokens_seen": 187764160, + "step": 59660 + }, + { + "epoch": 3.8195378016772294, + "grad_norm": 10.792132377624512, + "learning_rate": 8.0208148157979e-07, + "loss": 0.1643, + "num_input_tokens_seen": 187780800, + "step": 59665 + }, + { + "epoch": 3.819857883618206, + "grad_norm": 8.647377014160156, + "learning_rate": 8.016714824253063e-07, + "loss": 0.1844, + "num_input_tokens_seen": 187796160, + "step": 59670 + }, + { + "epoch": 3.820177965559183, + "grad_norm": 6.8361310958862305, + "learning_rate": 8.012615680765029e-07, + "loss": 0.1807, + "num_input_tokens_seen": 187811840, + "step": 59675 + }, + { + "epoch": 3.82049804750016, + "grad_norm": 8.241837501525879, + "learning_rate": 8.00851738553848e-07, + "loss": 0.1676, + "num_input_tokens_seen": 187827584, + "step": 59680 + }, + { + "epoch": 3.820818129441137, + "grad_norm": 6.976298809051514, + "learning_rate": 8.004419938778071e-07, + "loss": 0.1598, + "num_input_tokens_seen": 187842816, + "step": 59685 + }, + { + "epoch": 3.821138211382114, + "grad_norm": 16.421354293823242, + "learning_rate": 8.000323340688396e-07, + "loss": 0.1937, + "num_input_tokens_seen": 187857536, + "step": 59690 + }, + { + "epoch": 3.821458293323091, + "grad_norm": 44.42338180541992, + "learning_rate": 7.996227591474029e-07, + "loss": 0.156, + "num_input_tokens_seen": 187872832, + "step": 59695 + }, + { + "epoch": 3.8217783752640675, + "grad_norm": 24.829130172729492, + "learning_rate": 7.992132691339491e-07, + "loss": 0.1605, + "num_input_tokens_seen": 187887168, + "step": 59700 + }, + { + "epoch": 3.8220984572050445, + "grad_norm": 7.066705703735352, + "learning_rate": 7.988038640489252e-07, + "loss": 0.1874, + "num_input_tokens_seen": 187902464, + "step": 59705 + }, + { + "epoch": 3.8224185391460215, + "grad_norm": 6.289402484893799, + "learning_rate": 7.983945439127753e-07, + "loss": 0.172, + "num_input_tokens_seen": 187917632, + "step": 59710 + }, + { + "epoch": 3.822738621086998, + "grad_norm": 5.94119119644165, + "learning_rate": 7.979853087459386e-07, + "loss": 0.2199, + "num_input_tokens_seen": 187933440, + "step": 59715 + }, + { + "epoch": 3.823058703027975, + "grad_norm": 8.794683456420898, + "learning_rate": 7.97576158568849e-07, + "loss": 0.1556, + "num_input_tokens_seen": 187949376, + "step": 59720 + }, + { + "epoch": 3.823378784968952, + "grad_norm": 7.583106517791748, + "learning_rate": 7.971670934019405e-07, + "loss": 0.2253, + "num_input_tokens_seen": 187964480, + "step": 59725 + }, + { + "epoch": 3.823698866909929, + "grad_norm": 7.448701858520508, + "learning_rate": 7.967581132656357e-07, + "loss": 0.2192, + "num_input_tokens_seen": 187980608, + "step": 59730 + }, + { + "epoch": 3.824018948850906, + "grad_norm": 8.520063400268555, + "learning_rate": 7.963492181803606e-07, + "loss": 0.193, + "num_input_tokens_seen": 187996416, + "step": 59735 + }, + { + "epoch": 3.824339030791883, + "grad_norm": 13.297135353088379, + "learning_rate": 7.959404081665298e-07, + "loss": 0.2356, + "num_input_tokens_seen": 188012032, + "step": 59740 + }, + { + "epoch": 3.8246591127328595, + "grad_norm": 8.26310920715332, + "learning_rate": 7.955316832445598e-07, + "loss": 0.1795, + "num_input_tokens_seen": 188028160, + "step": 59745 + }, + { + "epoch": 3.8249791946738365, + "grad_norm": 6.867763519287109, + "learning_rate": 7.951230434348595e-07, + "loss": 0.1108, + "num_input_tokens_seen": 188043264, + "step": 59750 + }, + { + "epoch": 3.8252992766148135, + "grad_norm": 74.63874816894531, + "learning_rate": 7.947144887578329e-07, + "loss": 0.2085, + "num_input_tokens_seen": 188058112, + "step": 59755 + }, + { + "epoch": 3.82561935855579, + "grad_norm": 7.781517505645752, + "learning_rate": 7.943060192338839e-07, + "loss": 0.1225, + "num_input_tokens_seen": 188073344, + "step": 59760 + }, + { + "epoch": 3.825939440496767, + "grad_norm": 25.809885025024414, + "learning_rate": 7.938976348834059e-07, + "loss": 0.1736, + "num_input_tokens_seen": 188088960, + "step": 59765 + }, + { + "epoch": 3.826259522437744, + "grad_norm": 7.54569149017334, + "learning_rate": 7.934893357267953e-07, + "loss": 0.1727, + "num_input_tokens_seen": 188105152, + "step": 59770 + }, + { + "epoch": 3.826579604378721, + "grad_norm": 4.019674301147461, + "learning_rate": 7.930811217844365e-07, + "loss": 0.2031, + "num_input_tokens_seen": 188121152, + "step": 59775 + }, + { + "epoch": 3.826899686319698, + "grad_norm": 8.522438049316406, + "learning_rate": 7.926729930767163e-07, + "loss": 0.1778, + "num_input_tokens_seen": 188136128, + "step": 59780 + }, + { + "epoch": 3.827219768260675, + "grad_norm": 8.336214065551758, + "learning_rate": 7.922649496240137e-07, + "loss": 0.1873, + "num_input_tokens_seen": 188152320, + "step": 59785 + }, + { + "epoch": 3.8275398502016516, + "grad_norm": 4.222240447998047, + "learning_rate": 7.918569914467041e-07, + "loss": 0.1792, + "num_input_tokens_seen": 188169728, + "step": 59790 + }, + { + "epoch": 3.8278599321426285, + "grad_norm": 7.577945232391357, + "learning_rate": 7.914491185651594e-07, + "loss": 0.1481, + "num_input_tokens_seen": 188185856, + "step": 59795 + }, + { + "epoch": 3.8281800140836055, + "grad_norm": 34.40736389160156, + "learning_rate": 7.910413309997456e-07, + "loss": 0.2949, + "num_input_tokens_seen": 188202176, + "step": 59800 + }, + { + "epoch": 3.828500096024582, + "grad_norm": 7.637650489807129, + "learning_rate": 7.906336287708258e-07, + "loss": 0.1485, + "num_input_tokens_seen": 188216832, + "step": 59805 + }, + { + "epoch": 3.828820177965559, + "grad_norm": 7.058324813842773, + "learning_rate": 7.902260118987592e-07, + "loss": 0.1764, + "num_input_tokens_seen": 188232064, + "step": 59810 + }, + { + "epoch": 3.829140259906536, + "grad_norm": 8.384357452392578, + "learning_rate": 7.898184804039e-07, + "loss": 0.1855, + "num_input_tokens_seen": 188246272, + "step": 59815 + }, + { + "epoch": 3.829460341847513, + "grad_norm": 6.539083480834961, + "learning_rate": 7.894110343065978e-07, + "loss": 0.1861, + "num_input_tokens_seen": 188262400, + "step": 59820 + }, + { + "epoch": 3.82978042378849, + "grad_norm": 7.2680158615112305, + "learning_rate": 7.890036736271983e-07, + "loss": 0.1767, + "num_input_tokens_seen": 188278592, + "step": 59825 + }, + { + "epoch": 3.8301005057294666, + "grad_norm": 3.2110440731048584, + "learning_rate": 7.885963983860431e-07, + "loss": 0.1602, + "num_input_tokens_seen": 188294272, + "step": 59830 + }, + { + "epoch": 3.8304205876704436, + "grad_norm": 4.199761867523193, + "learning_rate": 7.881892086034695e-07, + "loss": 0.1602, + "num_input_tokens_seen": 188308992, + "step": 59835 + }, + { + "epoch": 3.8307406696114206, + "grad_norm": 5.152889251708984, + "learning_rate": 7.877821042998093e-07, + "loss": 0.1537, + "num_input_tokens_seen": 188324224, + "step": 59840 + }, + { + "epoch": 3.8310607515523976, + "grad_norm": 5.274952411651611, + "learning_rate": 7.87375085495394e-07, + "loss": 0.1458, + "num_input_tokens_seen": 188339136, + "step": 59845 + }, + { + "epoch": 3.831380833493374, + "grad_norm": 14.279458045959473, + "learning_rate": 7.869681522105446e-07, + "loss": 0.1341, + "num_input_tokens_seen": 188354432, + "step": 59850 + }, + { + "epoch": 3.831700915434351, + "grad_norm": 6.966526031494141, + "learning_rate": 7.865613044655835e-07, + "loss": 0.2227, + "num_input_tokens_seen": 188368640, + "step": 59855 + }, + { + "epoch": 3.832020997375328, + "grad_norm": 4.748409271240234, + "learning_rate": 7.861545422808259e-07, + "loss": 0.0862, + "num_input_tokens_seen": 188385344, + "step": 59860 + }, + { + "epoch": 3.832341079316305, + "grad_norm": 5.635157585144043, + "learning_rate": 7.857478656765826e-07, + "loss": 0.1752, + "num_input_tokens_seen": 188401984, + "step": 59865 + }, + { + "epoch": 3.832661161257282, + "grad_norm": 7.925894260406494, + "learning_rate": 7.853412746731634e-07, + "loss": 0.2178, + "num_input_tokens_seen": 188417472, + "step": 59870 + }, + { + "epoch": 3.8329812431982586, + "grad_norm": 6.6017746925354, + "learning_rate": 7.84934769290868e-07, + "loss": 0.1809, + "num_input_tokens_seen": 188432192, + "step": 59875 + }, + { + "epoch": 3.8333013251392356, + "grad_norm": 5.7083282470703125, + "learning_rate": 7.845283495499981e-07, + "loss": 0.1476, + "num_input_tokens_seen": 188448576, + "step": 59880 + }, + { + "epoch": 3.8336214070802126, + "grad_norm": 9.616961479187012, + "learning_rate": 7.841220154708454e-07, + "loss": 0.1717, + "num_input_tokens_seen": 188463744, + "step": 59885 + }, + { + "epoch": 3.8339414890211896, + "grad_norm": 9.457016944885254, + "learning_rate": 7.837157670737025e-07, + "loss": 0.2148, + "num_input_tokens_seen": 188479488, + "step": 59890 + }, + { + "epoch": 3.834261570962166, + "grad_norm": 6.106605529785156, + "learning_rate": 7.833096043788543e-07, + "loss": 0.1013, + "num_input_tokens_seen": 188494464, + "step": 59895 + }, + { + "epoch": 3.834581652903143, + "grad_norm": 6.8962082862854, + "learning_rate": 7.829035274065824e-07, + "loss": 0.1325, + "num_input_tokens_seen": 188509376, + "step": 59900 + }, + { + "epoch": 3.83490173484412, + "grad_norm": 8.599390029907227, + "learning_rate": 7.824975361771644e-07, + "loss": 0.122, + "num_input_tokens_seen": 188524032, + "step": 59905 + }, + { + "epoch": 3.835221816785097, + "grad_norm": 6.747931957244873, + "learning_rate": 7.820916307108734e-07, + "loss": 0.0974, + "num_input_tokens_seen": 188539392, + "step": 59910 + }, + { + "epoch": 3.835541898726074, + "grad_norm": 8.048528671264648, + "learning_rate": 7.816858110279782e-07, + "loss": 0.1441, + "num_input_tokens_seen": 188554240, + "step": 59915 + }, + { + "epoch": 3.8358619806670506, + "grad_norm": 5.393551826477051, + "learning_rate": 7.812800771487422e-07, + "loss": 0.1766, + "num_input_tokens_seen": 188570240, + "step": 59920 + }, + { + "epoch": 3.8361820626080276, + "grad_norm": 11.722923278808594, + "learning_rate": 7.808744290934275e-07, + "loss": 0.1876, + "num_input_tokens_seen": 188585024, + "step": 59925 + }, + { + "epoch": 3.8365021445490046, + "grad_norm": 5.404016017913818, + "learning_rate": 7.804688668822893e-07, + "loss": 0.1244, + "num_input_tokens_seen": 188600896, + "step": 59930 + }, + { + "epoch": 3.836822226489981, + "grad_norm": 6.534165382385254, + "learning_rate": 7.800633905355792e-07, + "loss": 0.1395, + "num_input_tokens_seen": 188616256, + "step": 59935 + }, + { + "epoch": 3.837142308430958, + "grad_norm": 4.257538318634033, + "learning_rate": 7.796580000735443e-07, + "loss": 0.1308, + "num_input_tokens_seen": 188633088, + "step": 59940 + }, + { + "epoch": 3.837462390371935, + "grad_norm": 9.475396156311035, + "learning_rate": 7.792526955164281e-07, + "loss": 0.1823, + "num_input_tokens_seen": 188650880, + "step": 59945 + }, + { + "epoch": 3.837782472312912, + "grad_norm": 6.564140796661377, + "learning_rate": 7.788474768844686e-07, + "loss": 0.1192, + "num_input_tokens_seen": 188668096, + "step": 59950 + }, + { + "epoch": 3.838102554253889, + "grad_norm": 32.47801971435547, + "learning_rate": 7.784423441979025e-07, + "loss": 0.1912, + "num_input_tokens_seen": 188685248, + "step": 59955 + }, + { + "epoch": 3.838422636194866, + "grad_norm": 5.298294544219971, + "learning_rate": 7.780372974769565e-07, + "loss": 0.1559, + "num_input_tokens_seen": 188700992, + "step": 59960 + }, + { + "epoch": 3.8387427181358427, + "grad_norm": 7.010766983032227, + "learning_rate": 7.776323367418606e-07, + "loss": 0.1487, + "num_input_tokens_seen": 188716288, + "step": 59965 + }, + { + "epoch": 3.8390628000768197, + "grad_norm": 6.5378642082214355, + "learning_rate": 7.772274620128323e-07, + "loss": 0.1905, + "num_input_tokens_seen": 188731648, + "step": 59970 + }, + { + "epoch": 3.8393828820177966, + "grad_norm": 9.205072402954102, + "learning_rate": 7.76822673310092e-07, + "loss": 0.2068, + "num_input_tokens_seen": 188747648, + "step": 59975 + }, + { + "epoch": 3.839702963958773, + "grad_norm": 6.071143627166748, + "learning_rate": 7.764179706538516e-07, + "loss": 0.166, + "num_input_tokens_seen": 188764224, + "step": 59980 + }, + { + "epoch": 3.84002304589975, + "grad_norm": 10.870688438415527, + "learning_rate": 7.760133540643191e-07, + "loss": 0.1819, + "num_input_tokens_seen": 188782016, + "step": 59985 + }, + { + "epoch": 3.840343127840727, + "grad_norm": 62.310569763183594, + "learning_rate": 7.756088235617015e-07, + "loss": 0.2164, + "num_input_tokens_seen": 188797056, + "step": 59990 + }, + { + "epoch": 3.840663209781704, + "grad_norm": 5.135258197784424, + "learning_rate": 7.752043791661956e-07, + "loss": 0.0811, + "num_input_tokens_seen": 188813888, + "step": 59995 + }, + { + "epoch": 3.840983291722681, + "grad_norm": 12.096138000488281, + "learning_rate": 7.748000208980002e-07, + "loss": 0.188, + "num_input_tokens_seen": 188829440, + "step": 60000 + }, + { + "epoch": 3.841303373663658, + "grad_norm": 6.580257892608643, + "learning_rate": 7.743957487773043e-07, + "loss": 0.2041, + "num_input_tokens_seen": 188844352, + "step": 60005 + }, + { + "epoch": 3.8416234556046347, + "grad_norm": 6.791510581970215, + "learning_rate": 7.739915628242969e-07, + "loss": 0.1179, + "num_input_tokens_seen": 188860800, + "step": 60010 + }, + { + "epoch": 3.8419435375456117, + "grad_norm": 9.15665340423584, + "learning_rate": 7.735874630591605e-07, + "loss": 0.209, + "num_input_tokens_seen": 188875968, + "step": 60015 + }, + { + "epoch": 3.8422636194865887, + "grad_norm": 9.212127685546875, + "learning_rate": 7.731834495020738e-07, + "loss": 0.1646, + "num_input_tokens_seen": 188891776, + "step": 60020 + }, + { + "epoch": 3.8425837014275652, + "grad_norm": 6.998054027557373, + "learning_rate": 7.727795221732109e-07, + "loss": 0.2128, + "num_input_tokens_seen": 188906624, + "step": 60025 + }, + { + "epoch": 3.842903783368542, + "grad_norm": 6.608306884765625, + "learning_rate": 7.723756810927418e-07, + "loss": 0.0938, + "num_input_tokens_seen": 188922624, + "step": 60030 + }, + { + "epoch": 3.843223865309519, + "grad_norm": 5.8551716804504395, + "learning_rate": 7.719719262808314e-07, + "loss": 0.1343, + "num_input_tokens_seen": 188938624, + "step": 60035 + }, + { + "epoch": 3.843543947250496, + "grad_norm": 9.244964599609375, + "learning_rate": 7.71568257757643e-07, + "loss": 0.14, + "num_input_tokens_seen": 188953984, + "step": 60040 + }, + { + "epoch": 3.843864029191473, + "grad_norm": 7.191136360168457, + "learning_rate": 7.711646755433328e-07, + "loss": 0.1468, + "num_input_tokens_seen": 188971904, + "step": 60045 + }, + { + "epoch": 3.84418411113245, + "grad_norm": 67.89435577392578, + "learning_rate": 7.707611796580533e-07, + "loss": 0.1918, + "num_input_tokens_seen": 188987008, + "step": 60050 + }, + { + "epoch": 3.8445041930734267, + "grad_norm": 5.217496395111084, + "learning_rate": 7.703577701219537e-07, + "loss": 0.1181, + "num_input_tokens_seen": 189001920, + "step": 60055 + }, + { + "epoch": 3.8448242750144037, + "grad_norm": 8.294513702392578, + "learning_rate": 7.69954446955177e-07, + "loss": 0.1857, + "num_input_tokens_seen": 189016960, + "step": 60060 + }, + { + "epoch": 3.8451443569553807, + "grad_norm": 5.619080543518066, + "learning_rate": 7.695512101778644e-07, + "loss": 0.1907, + "num_input_tokens_seen": 189031872, + "step": 60065 + }, + { + "epoch": 3.8454644388963573, + "grad_norm": 7.1278181076049805, + "learning_rate": 7.691480598101495e-07, + "loss": 0.195, + "num_input_tokens_seen": 189047104, + "step": 60070 + }, + { + "epoch": 3.8457845208373342, + "grad_norm": 9.92073917388916, + "learning_rate": 7.687449958721665e-07, + "loss": 0.2234, + "num_input_tokens_seen": 189061376, + "step": 60075 + }, + { + "epoch": 3.8461046027783112, + "grad_norm": 6.625046730041504, + "learning_rate": 7.683420183840393e-07, + "loss": 0.1447, + "num_input_tokens_seen": 189077056, + "step": 60080 + }, + { + "epoch": 3.8464246847192882, + "grad_norm": 7.144262313842773, + "learning_rate": 7.679391273658923e-07, + "loss": 0.122, + "num_input_tokens_seen": 189092224, + "step": 60085 + }, + { + "epoch": 3.846744766660265, + "grad_norm": 3.671036720275879, + "learning_rate": 7.675363228378435e-07, + "loss": 0.1337, + "num_input_tokens_seen": 189108160, + "step": 60090 + }, + { + "epoch": 3.8470648486012418, + "grad_norm": 7.966115474700928, + "learning_rate": 7.671336048200057e-07, + "loss": 0.1809, + "num_input_tokens_seen": 189124224, + "step": 60095 + }, + { + "epoch": 3.8473849305422188, + "grad_norm": 8.134013175964355, + "learning_rate": 7.667309733324913e-07, + "loss": 0.1052, + "num_input_tokens_seen": 189138880, + "step": 60100 + }, + { + "epoch": 3.8477050124831957, + "grad_norm": 6.684310436248779, + "learning_rate": 7.66328428395402e-07, + "loss": 0.1603, + "num_input_tokens_seen": 189155520, + "step": 60105 + }, + { + "epoch": 3.8480250944241727, + "grad_norm": 49.19292068481445, + "learning_rate": 7.659259700288423e-07, + "loss": 0.248, + "num_input_tokens_seen": 189170688, + "step": 60110 + }, + { + "epoch": 3.8483451763651493, + "grad_norm": 4.957152843475342, + "learning_rate": 7.655235982529058e-07, + "loss": 0.1931, + "num_input_tokens_seen": 189185792, + "step": 60115 + }, + { + "epoch": 3.8486652583061263, + "grad_norm": 3.894009828567505, + "learning_rate": 7.651213130876867e-07, + "loss": 0.1178, + "num_input_tokens_seen": 189202240, + "step": 60120 + }, + { + "epoch": 3.8489853402471033, + "grad_norm": 25.57654571533203, + "learning_rate": 7.647191145532729e-07, + "loss": 0.2771, + "num_input_tokens_seen": 189217984, + "step": 60125 + }, + { + "epoch": 3.8493054221880803, + "grad_norm": 5.661493301391602, + "learning_rate": 7.643170026697475e-07, + "loss": 0.1143, + "num_input_tokens_seen": 189233728, + "step": 60130 + }, + { + "epoch": 3.8496255041290572, + "grad_norm": 9.691315650939941, + "learning_rate": 7.639149774571902e-07, + "loss": 0.1361, + "num_input_tokens_seen": 189249280, + "step": 60135 + }, + { + "epoch": 3.849945586070034, + "grad_norm": 8.359517097473145, + "learning_rate": 7.635130389356763e-07, + "loss": 0.1428, + "num_input_tokens_seen": 189266688, + "step": 60140 + }, + { + "epoch": 3.8502656680110108, + "grad_norm": 11.89044189453125, + "learning_rate": 7.631111871252758e-07, + "loss": 0.1778, + "num_input_tokens_seen": 189281792, + "step": 60145 + }, + { + "epoch": 3.8505857499519878, + "grad_norm": 6.880888938903809, + "learning_rate": 7.627094220460557e-07, + "loss": 0.1246, + "num_input_tokens_seen": 189296768, + "step": 60150 + }, + { + "epoch": 3.8509058318929648, + "grad_norm": 12.079642295837402, + "learning_rate": 7.623077437180771e-07, + "loss": 0.1854, + "num_input_tokens_seen": 189312384, + "step": 60155 + }, + { + "epoch": 3.8512259138339413, + "grad_norm": 7.806005001068115, + "learning_rate": 7.619061521613991e-07, + "loss": 0.138, + "num_input_tokens_seen": 189328640, + "step": 60160 + }, + { + "epoch": 3.8515459957749183, + "grad_norm": 8.198371887207031, + "learning_rate": 7.615046473960744e-07, + "loss": 0.1848, + "num_input_tokens_seen": 189343360, + "step": 60165 + }, + { + "epoch": 3.8518660777158953, + "grad_norm": 7.6838860511779785, + "learning_rate": 7.611032294421522e-07, + "loss": 0.1605, + "num_input_tokens_seen": 189358336, + "step": 60170 + }, + { + "epoch": 3.8521861596568723, + "grad_norm": 11.638063430786133, + "learning_rate": 7.607018983196773e-07, + "loss": 0.1258, + "num_input_tokens_seen": 189375168, + "step": 60175 + }, + { + "epoch": 3.8525062415978493, + "grad_norm": 8.681353569030762, + "learning_rate": 7.60300654048689e-07, + "loss": 0.214, + "num_input_tokens_seen": 189391232, + "step": 60180 + }, + { + "epoch": 3.852826323538826, + "grad_norm": 9.10726547241211, + "learning_rate": 7.59899496649226e-07, + "loss": 0.1282, + "num_input_tokens_seen": 189407424, + "step": 60185 + }, + { + "epoch": 3.853146405479803, + "grad_norm": 7.82861852645874, + "learning_rate": 7.594984261413165e-07, + "loss": 0.163, + "num_input_tokens_seen": 189423040, + "step": 60190 + }, + { + "epoch": 3.85346648742078, + "grad_norm": 8.403969764709473, + "learning_rate": 7.590974425449913e-07, + "loss": 0.2409, + "num_input_tokens_seen": 189439296, + "step": 60195 + }, + { + "epoch": 3.8537865693617563, + "grad_norm": 17.95025634765625, + "learning_rate": 7.586965458802703e-07, + "loss": 0.1665, + "num_input_tokens_seen": 189453952, + "step": 60200 + }, + { + "epoch": 3.8541066513027333, + "grad_norm": 0.9066320061683655, + "learning_rate": 7.582957361671744e-07, + "loss": 0.1404, + "num_input_tokens_seen": 189471168, + "step": 60205 + }, + { + "epoch": 3.8544267332437103, + "grad_norm": 7.671595096588135, + "learning_rate": 7.578950134257171e-07, + "loss": 0.1323, + "num_input_tokens_seen": 189486144, + "step": 60210 + }, + { + "epoch": 3.8547468151846873, + "grad_norm": 6.978868007659912, + "learning_rate": 7.57494377675908e-07, + "loss": 0.1816, + "num_input_tokens_seen": 189501376, + "step": 60215 + }, + { + "epoch": 3.8550668971256643, + "grad_norm": 8.998440742492676, + "learning_rate": 7.570938289377547e-07, + "loss": 0.1773, + "num_input_tokens_seen": 189516864, + "step": 60220 + }, + { + "epoch": 3.8553869790666413, + "grad_norm": 8.667814254760742, + "learning_rate": 7.566933672312554e-07, + "loss": 0.131, + "num_input_tokens_seen": 189531904, + "step": 60225 + }, + { + "epoch": 3.855707061007618, + "grad_norm": 2.373326301574707, + "learning_rate": 7.562929925764098e-07, + "loss": 0.1841, + "num_input_tokens_seen": 189547200, + "step": 60230 + }, + { + "epoch": 3.856027142948595, + "grad_norm": 8.536828994750977, + "learning_rate": 7.558927049932097e-07, + "loss": 0.1869, + "num_input_tokens_seen": 189563328, + "step": 60235 + }, + { + "epoch": 3.856347224889572, + "grad_norm": 63.212135314941406, + "learning_rate": 7.554925045016429e-07, + "loss": 0.1303, + "num_input_tokens_seen": 189579456, + "step": 60240 + }, + { + "epoch": 3.8566673068305484, + "grad_norm": 4.134575366973877, + "learning_rate": 7.550923911216937e-07, + "loss": 0.1375, + "num_input_tokens_seen": 189595904, + "step": 60245 + }, + { + "epoch": 3.8569873887715254, + "grad_norm": 17.70490264892578, + "learning_rate": 7.546923648733415e-07, + "loss": 0.2064, + "num_input_tokens_seen": 189612096, + "step": 60250 + }, + { + "epoch": 3.8573074707125024, + "grad_norm": 43.879947662353516, + "learning_rate": 7.542924257765616e-07, + "loss": 0.1931, + "num_input_tokens_seen": 189627584, + "step": 60255 + }, + { + "epoch": 3.8576275526534793, + "grad_norm": 4.482895374298096, + "learning_rate": 7.53892573851325e-07, + "loss": 0.1254, + "num_input_tokens_seen": 189644480, + "step": 60260 + }, + { + "epoch": 3.8579476345944563, + "grad_norm": 32.43190383911133, + "learning_rate": 7.534928091175974e-07, + "loss": 0.2058, + "num_input_tokens_seen": 189660160, + "step": 60265 + }, + { + "epoch": 3.8582677165354333, + "grad_norm": 7.647523403167725, + "learning_rate": 7.530931315953424e-07, + "loss": 0.1398, + "num_input_tokens_seen": 189675584, + "step": 60270 + }, + { + "epoch": 3.85858779847641, + "grad_norm": 8.846266746520996, + "learning_rate": 7.526935413045172e-07, + "loss": 0.1847, + "num_input_tokens_seen": 189691136, + "step": 60275 + }, + { + "epoch": 3.858907880417387, + "grad_norm": 6.385785102844238, + "learning_rate": 7.522940382650751e-07, + "loss": 0.1821, + "num_input_tokens_seen": 189707776, + "step": 60280 + }, + { + "epoch": 3.859227962358364, + "grad_norm": 6.430919170379639, + "learning_rate": 7.518946224969651e-07, + "loss": 0.1471, + "num_input_tokens_seen": 189723584, + "step": 60285 + }, + { + "epoch": 3.8595480442993404, + "grad_norm": 6.467416763305664, + "learning_rate": 7.514952940201323e-07, + "loss": 0.1466, + "num_input_tokens_seen": 189739264, + "step": 60290 + }, + { + "epoch": 3.8598681262403174, + "grad_norm": 44.10277557373047, + "learning_rate": 7.510960528545169e-07, + "loss": 0.1721, + "num_input_tokens_seen": 189754560, + "step": 60295 + }, + { + "epoch": 3.8601882081812944, + "grad_norm": 7.074823379516602, + "learning_rate": 7.506968990200539e-07, + "loss": 0.1718, + "num_input_tokens_seen": 189769600, + "step": 60300 + }, + { + "epoch": 3.8605082901222714, + "grad_norm": 9.830090522766113, + "learning_rate": 7.502978325366778e-07, + "loss": 0.1877, + "num_input_tokens_seen": 189784320, + "step": 60305 + }, + { + "epoch": 3.8608283720632484, + "grad_norm": 4.72829532623291, + "learning_rate": 7.498988534243123e-07, + "loss": 0.1328, + "num_input_tokens_seen": 189799552, + "step": 60310 + }, + { + "epoch": 3.8611484540042253, + "grad_norm": 6.9607439041137695, + "learning_rate": 7.494999617028831e-07, + "loss": 0.1716, + "num_input_tokens_seen": 189815424, + "step": 60315 + }, + { + "epoch": 3.861468535945202, + "grad_norm": 6.761481285095215, + "learning_rate": 7.491011573923077e-07, + "loss": 0.1421, + "num_input_tokens_seen": 189830208, + "step": 60320 + }, + { + "epoch": 3.861788617886179, + "grad_norm": 3.306863307952881, + "learning_rate": 7.487024405125004e-07, + "loss": 0.1343, + "num_input_tokens_seen": 189845248, + "step": 60325 + }, + { + "epoch": 3.862108699827156, + "grad_norm": 5.762723445892334, + "learning_rate": 7.48303811083371e-07, + "loss": 0.199, + "num_input_tokens_seen": 189860224, + "step": 60330 + }, + { + "epoch": 3.8624287817681324, + "grad_norm": 8.570332527160645, + "learning_rate": 7.479052691248243e-07, + "loss": 0.1605, + "num_input_tokens_seen": 189876416, + "step": 60335 + }, + { + "epoch": 3.8627488637091094, + "grad_norm": 6.751097679138184, + "learning_rate": 7.475068146567635e-07, + "loss": 0.144, + "num_input_tokens_seen": 189892032, + "step": 60340 + }, + { + "epoch": 3.8630689456500864, + "grad_norm": 8.841144561767578, + "learning_rate": 7.471084476990825e-07, + "loss": 0.2088, + "num_input_tokens_seen": 189908288, + "step": 60345 + }, + { + "epoch": 3.8633890275910634, + "grad_norm": 11.581674575805664, + "learning_rate": 7.467101682716762e-07, + "loss": 0.2278, + "num_input_tokens_seen": 189922880, + "step": 60350 + }, + { + "epoch": 3.8637091095320404, + "grad_norm": 11.028018951416016, + "learning_rate": 7.463119763944313e-07, + "loss": 0.1796, + "num_input_tokens_seen": 189938176, + "step": 60355 + }, + { + "epoch": 3.864029191473017, + "grad_norm": 5.905468940734863, + "learning_rate": 7.459138720872316e-07, + "loss": 0.1747, + "num_input_tokens_seen": 189955264, + "step": 60360 + }, + { + "epoch": 3.864349273413994, + "grad_norm": 7.92059326171875, + "learning_rate": 7.455158553699568e-07, + "loss": 0.1671, + "num_input_tokens_seen": 189971584, + "step": 60365 + }, + { + "epoch": 3.864669355354971, + "grad_norm": 5.050847053527832, + "learning_rate": 7.451179262624811e-07, + "loss": 0.1623, + "num_input_tokens_seen": 189987520, + "step": 60370 + }, + { + "epoch": 3.864989437295948, + "grad_norm": 5.892970085144043, + "learning_rate": 7.447200847846753e-07, + "loss": 0.1078, + "num_input_tokens_seen": 190003072, + "step": 60375 + }, + { + "epoch": 3.8653095192369245, + "grad_norm": 7.738960266113281, + "learning_rate": 7.443223309564057e-07, + "loss": 0.1921, + "num_input_tokens_seen": 190019584, + "step": 60380 + }, + { + "epoch": 3.8656296011779014, + "grad_norm": 3.631993293762207, + "learning_rate": 7.439246647975329e-07, + "loss": 0.0837, + "num_input_tokens_seen": 190034688, + "step": 60385 + }, + { + "epoch": 3.8659496831188784, + "grad_norm": 3.9750373363494873, + "learning_rate": 7.435270863279162e-07, + "loss": 0.1779, + "num_input_tokens_seen": 190050624, + "step": 60390 + }, + { + "epoch": 3.8662697650598554, + "grad_norm": 88.35448455810547, + "learning_rate": 7.431295955674078e-07, + "loss": 0.2802, + "num_input_tokens_seen": 190067328, + "step": 60395 + }, + { + "epoch": 3.8665898470008324, + "grad_norm": 6.095545291900635, + "learning_rate": 7.42732192535856e-07, + "loss": 0.1764, + "num_input_tokens_seen": 190082944, + "step": 60400 + }, + { + "epoch": 3.866909928941809, + "grad_norm": 5.159043312072754, + "learning_rate": 7.423348772531053e-07, + "loss": 0.1618, + "num_input_tokens_seen": 190099072, + "step": 60405 + }, + { + "epoch": 3.867230010882786, + "grad_norm": 7.251064300537109, + "learning_rate": 7.419376497389947e-07, + "loss": 0.202, + "num_input_tokens_seen": 190113984, + "step": 60410 + }, + { + "epoch": 3.867550092823763, + "grad_norm": 8.036592483520508, + "learning_rate": 7.415405100133622e-07, + "loss": 0.1491, + "num_input_tokens_seen": 190129664, + "step": 60415 + }, + { + "epoch": 3.86787017476474, + "grad_norm": 38.13129425048828, + "learning_rate": 7.411434580960353e-07, + "loss": 0.164, + "num_input_tokens_seen": 190144896, + "step": 60420 + }, + { + "epoch": 3.8681902567057165, + "grad_norm": 6.13071346282959, + "learning_rate": 7.40746494006844e-07, + "loss": 0.1218, + "num_input_tokens_seen": 190159744, + "step": 60425 + }, + { + "epoch": 3.8685103386466935, + "grad_norm": 3.634690046310425, + "learning_rate": 7.40349617765608e-07, + "loss": 0.101, + "num_input_tokens_seen": 190175040, + "step": 60430 + }, + { + "epoch": 3.8688304205876705, + "grad_norm": 11.782556533813477, + "learning_rate": 7.39952829392147e-07, + "loss": 0.1493, + "num_input_tokens_seen": 190192768, + "step": 60435 + }, + { + "epoch": 3.8691505025286475, + "grad_norm": 7.623260021209717, + "learning_rate": 7.395561289062739e-07, + "loss": 0.1294, + "num_input_tokens_seen": 190223232, + "step": 60440 + }, + { + "epoch": 3.8694705844696244, + "grad_norm": 24.41345977783203, + "learning_rate": 7.391595163277974e-07, + "loss": 0.1829, + "num_input_tokens_seen": 190240128, + "step": 60445 + }, + { + "epoch": 3.869790666410601, + "grad_norm": 6.933513641357422, + "learning_rate": 7.387629916765241e-07, + "loss": 0.1744, + "num_input_tokens_seen": 190255360, + "step": 60450 + }, + { + "epoch": 3.870110748351578, + "grad_norm": 8.329065322875977, + "learning_rate": 7.383665549722513e-07, + "loss": 0.1547, + "num_input_tokens_seen": 190270784, + "step": 60455 + }, + { + "epoch": 3.870430830292555, + "grad_norm": 12.0443696975708, + "learning_rate": 7.379702062347776e-07, + "loss": 0.1979, + "num_input_tokens_seen": 190286784, + "step": 60460 + }, + { + "epoch": 3.8707509122335315, + "grad_norm": 36.35450744628906, + "learning_rate": 7.37573945483894e-07, + "loss": 0.1682, + "num_input_tokens_seen": 190302208, + "step": 60465 + }, + { + "epoch": 3.8710709941745085, + "grad_norm": 10.536539077758789, + "learning_rate": 7.371777727393872e-07, + "loss": 0.1857, + "num_input_tokens_seen": 190318464, + "step": 60470 + }, + { + "epoch": 3.8713910761154855, + "grad_norm": 4.347920894622803, + "learning_rate": 7.367816880210404e-07, + "loss": 0.1751, + "num_input_tokens_seen": 190333824, + "step": 60475 + }, + { + "epoch": 3.8717111580564625, + "grad_norm": 2.8592803478240967, + "learning_rate": 7.363856913486317e-07, + "loss": 0.1397, + "num_input_tokens_seen": 190349632, + "step": 60480 + }, + { + "epoch": 3.8720312399974395, + "grad_norm": 7.729464530944824, + "learning_rate": 7.359897827419351e-07, + "loss": 0.152, + "num_input_tokens_seen": 190366976, + "step": 60485 + }, + { + "epoch": 3.8723513219384165, + "grad_norm": 7.897585868835449, + "learning_rate": 7.355939622207206e-07, + "loss": 0.1419, + "num_input_tokens_seen": 190382848, + "step": 60490 + }, + { + "epoch": 3.872671403879393, + "grad_norm": 7.046018600463867, + "learning_rate": 7.351982298047525e-07, + "loss": 0.1415, + "num_input_tokens_seen": 190397824, + "step": 60495 + }, + { + "epoch": 3.87299148582037, + "grad_norm": 7.629143238067627, + "learning_rate": 7.348025855137936e-07, + "loss": 0.1781, + "num_input_tokens_seen": 190415424, + "step": 60500 + }, + { + "epoch": 3.873311567761347, + "grad_norm": 6.124461650848389, + "learning_rate": 7.344070293675976e-07, + "loss": 0.1485, + "num_input_tokens_seen": 190430080, + "step": 60505 + }, + { + "epoch": 3.8736316497023235, + "grad_norm": 4.310997009277344, + "learning_rate": 7.340115613859184e-07, + "loss": 0.1483, + "num_input_tokens_seen": 190446528, + "step": 60510 + }, + { + "epoch": 3.8739517316433005, + "grad_norm": 6.944680213928223, + "learning_rate": 7.336161815885034e-07, + "loss": 0.2556, + "num_input_tokens_seen": 190462144, + "step": 60515 + }, + { + "epoch": 3.8742718135842775, + "grad_norm": 5.416937828063965, + "learning_rate": 7.332208899950955e-07, + "loss": 0.1332, + "num_input_tokens_seen": 190478720, + "step": 60520 + }, + { + "epoch": 3.8745918955252545, + "grad_norm": 8.672090530395508, + "learning_rate": 7.328256866254336e-07, + "loss": 0.1721, + "num_input_tokens_seen": 190495232, + "step": 60525 + }, + { + "epoch": 3.8749119774662315, + "grad_norm": 4.5986785888671875, + "learning_rate": 7.324305714992511e-07, + "loss": 0.1546, + "num_input_tokens_seen": 190511360, + "step": 60530 + }, + { + "epoch": 3.8752320594072085, + "grad_norm": 9.550080299377441, + "learning_rate": 7.320355446362804e-07, + "loss": 0.2121, + "num_input_tokens_seen": 190527104, + "step": 60535 + }, + { + "epoch": 3.875552141348185, + "grad_norm": 14.393152236938477, + "learning_rate": 7.316406060562442e-07, + "loss": 0.1911, + "num_input_tokens_seen": 190541632, + "step": 60540 + }, + { + "epoch": 3.875872223289162, + "grad_norm": 4.955177307128906, + "learning_rate": 7.312457557788658e-07, + "loss": 0.1714, + "num_input_tokens_seen": 190557248, + "step": 60545 + }, + { + "epoch": 3.876192305230139, + "grad_norm": 8.406546592712402, + "learning_rate": 7.308509938238612e-07, + "loss": 0.1592, + "num_input_tokens_seen": 190572864, + "step": 60550 + }, + { + "epoch": 3.8765123871711156, + "grad_norm": 8.755468368530273, + "learning_rate": 7.30456320210943e-07, + "loss": 0.1854, + "num_input_tokens_seen": 190587648, + "step": 60555 + }, + { + "epoch": 3.8768324691120926, + "grad_norm": 59.4309196472168, + "learning_rate": 7.300617349598188e-07, + "loss": 0.2262, + "num_input_tokens_seen": 190603968, + "step": 60560 + }, + { + "epoch": 3.8771525510530696, + "grad_norm": 5.818981170654297, + "learning_rate": 7.296672380901918e-07, + "loss": 0.1007, + "num_input_tokens_seen": 190619520, + "step": 60565 + }, + { + "epoch": 3.8774726329940465, + "grad_norm": 8.317026138305664, + "learning_rate": 7.292728296217627e-07, + "loss": 0.1625, + "num_input_tokens_seen": 190635072, + "step": 60570 + }, + { + "epoch": 3.8777927149350235, + "grad_norm": 10.227256774902344, + "learning_rate": 7.288785095742237e-07, + "loss": 0.1696, + "num_input_tokens_seen": 190651712, + "step": 60575 + }, + { + "epoch": 3.8781127968760005, + "grad_norm": 6.465057373046875, + "learning_rate": 7.284842779672674e-07, + "loss": 0.1997, + "num_input_tokens_seen": 190667520, + "step": 60580 + }, + { + "epoch": 3.878432878816977, + "grad_norm": 8.984538078308105, + "learning_rate": 7.280901348205788e-07, + "loss": 0.175, + "num_input_tokens_seen": 190683584, + "step": 60585 + }, + { + "epoch": 3.878752960757954, + "grad_norm": 4.658505916595459, + "learning_rate": 7.276960801538394e-07, + "loss": 0.128, + "num_input_tokens_seen": 190699968, + "step": 60590 + }, + { + "epoch": 3.879073042698931, + "grad_norm": 7.827462196350098, + "learning_rate": 7.273021139867259e-07, + "loss": 0.185, + "num_input_tokens_seen": 190715008, + "step": 60595 + }, + { + "epoch": 3.8793931246399076, + "grad_norm": 4.2868876457214355, + "learning_rate": 7.269082363389113e-07, + "loss": 0.1773, + "num_input_tokens_seen": 190730496, + "step": 60600 + }, + { + "epoch": 3.8797132065808846, + "grad_norm": 4.233025074005127, + "learning_rate": 7.265144472300636e-07, + "loss": 0.1031, + "num_input_tokens_seen": 190747776, + "step": 60605 + }, + { + "epoch": 3.8800332885218616, + "grad_norm": 5.7475128173828125, + "learning_rate": 7.261207466798467e-07, + "loss": 0.1581, + "num_input_tokens_seen": 190763712, + "step": 60610 + }, + { + "epoch": 3.8803533704628386, + "grad_norm": 8.515996932983398, + "learning_rate": 7.257271347079192e-07, + "loss": 0.1172, + "num_input_tokens_seen": 190779328, + "step": 60615 + }, + { + "epoch": 3.8806734524038156, + "grad_norm": 72.19706726074219, + "learning_rate": 7.253336113339382e-07, + "loss": 0.1333, + "num_input_tokens_seen": 190794752, + "step": 60620 + }, + { + "epoch": 3.880993534344792, + "grad_norm": 13.901391983032227, + "learning_rate": 7.249401765775513e-07, + "loss": 0.1782, + "num_input_tokens_seen": 190810496, + "step": 60625 + }, + { + "epoch": 3.881313616285769, + "grad_norm": 5.871860504150391, + "learning_rate": 7.245468304584067e-07, + "loss": 0.1172, + "num_input_tokens_seen": 190826816, + "step": 60630 + }, + { + "epoch": 3.881633698226746, + "grad_norm": 9.203563690185547, + "learning_rate": 7.241535729961455e-07, + "loss": 0.1543, + "num_input_tokens_seen": 190841792, + "step": 60635 + }, + { + "epoch": 3.881953780167723, + "grad_norm": 3.710972785949707, + "learning_rate": 7.23760404210404e-07, + "loss": 0.1781, + "num_input_tokens_seen": 190857664, + "step": 60640 + }, + { + "epoch": 3.8822738621086996, + "grad_norm": 4.491005897521973, + "learning_rate": 7.233673241208173e-07, + "loss": 0.1459, + "num_input_tokens_seen": 190874304, + "step": 60645 + }, + { + "epoch": 3.8825939440496766, + "grad_norm": 6.167999267578125, + "learning_rate": 7.229743327470109e-07, + "loss": 0.0941, + "num_input_tokens_seen": 190889472, + "step": 60650 + }, + { + "epoch": 3.8829140259906536, + "grad_norm": 9.827349662780762, + "learning_rate": 7.225814301086115e-07, + "loss": 0.1908, + "num_input_tokens_seen": 190904512, + "step": 60655 + }, + { + "epoch": 3.8832341079316306, + "grad_norm": 7.741097927093506, + "learning_rate": 7.221886162252358e-07, + "loss": 0.1529, + "num_input_tokens_seen": 190919552, + "step": 60660 + }, + { + "epoch": 3.8835541898726076, + "grad_norm": 7.226105690002441, + "learning_rate": 7.21795891116501e-07, + "loss": 0.1297, + "num_input_tokens_seen": 190934464, + "step": 60665 + }, + { + "epoch": 3.883874271813584, + "grad_norm": 4.627900123596191, + "learning_rate": 7.214032548020172e-07, + "loss": 0.1908, + "num_input_tokens_seen": 190949376, + "step": 60670 + }, + { + "epoch": 3.884194353754561, + "grad_norm": 8.390600204467773, + "learning_rate": 7.210107073013906e-07, + "loss": 0.162, + "num_input_tokens_seen": 190964672, + "step": 60675 + }, + { + "epoch": 3.884514435695538, + "grad_norm": 6.4548211097717285, + "learning_rate": 7.206182486342225e-07, + "loss": 0.2023, + "num_input_tokens_seen": 190979200, + "step": 60680 + }, + { + "epoch": 3.884834517636515, + "grad_norm": 7.485798358917236, + "learning_rate": 7.2022587882011e-07, + "loss": 0.2337, + "num_input_tokens_seen": 190995200, + "step": 60685 + }, + { + "epoch": 3.8851545995774917, + "grad_norm": 7.861607074737549, + "learning_rate": 7.198335978786475e-07, + "loss": 0.2028, + "num_input_tokens_seen": 191011584, + "step": 60690 + }, + { + "epoch": 3.8854746815184686, + "grad_norm": 8.867972373962402, + "learning_rate": 7.194414058294225e-07, + "loss": 0.2709, + "num_input_tokens_seen": 191028288, + "step": 60695 + }, + { + "epoch": 3.8857947634594456, + "grad_norm": 8.051523208618164, + "learning_rate": 7.19049302692019e-07, + "loss": 0.2329, + "num_input_tokens_seen": 191043840, + "step": 60700 + }, + { + "epoch": 3.8861148454004226, + "grad_norm": 9.768858909606934, + "learning_rate": 7.186572884860169e-07, + "loss": 0.2638, + "num_input_tokens_seen": 191059392, + "step": 60705 + }, + { + "epoch": 3.8864349273413996, + "grad_norm": 9.8037691116333, + "learning_rate": 7.182653632309908e-07, + "loss": 0.1877, + "num_input_tokens_seen": 191075264, + "step": 60710 + }, + { + "epoch": 3.886755009282376, + "grad_norm": 8.918628692626953, + "learning_rate": 7.17873526946512e-07, + "loss": 0.1368, + "num_input_tokens_seen": 191092032, + "step": 60715 + }, + { + "epoch": 3.887075091223353, + "grad_norm": 8.01812744140625, + "learning_rate": 7.174817796521466e-07, + "loss": 0.15, + "num_input_tokens_seen": 191107584, + "step": 60720 + }, + { + "epoch": 3.88739517316433, + "grad_norm": 5.360846042633057, + "learning_rate": 7.170901213674552e-07, + "loss": 0.1673, + "num_input_tokens_seen": 191123520, + "step": 60725 + }, + { + "epoch": 3.8877152551053067, + "grad_norm": 6.294430255889893, + "learning_rate": 7.166985521119982e-07, + "loss": 0.1535, + "num_input_tokens_seen": 191140096, + "step": 60730 + }, + { + "epoch": 3.8880353370462837, + "grad_norm": 13.885185241699219, + "learning_rate": 7.16307071905325e-07, + "loss": 0.1902, + "num_input_tokens_seen": 191154944, + "step": 60735 + }, + { + "epoch": 3.8883554189872607, + "grad_norm": 25.11142921447754, + "learning_rate": 7.159156807669862e-07, + "loss": 0.1822, + "num_input_tokens_seen": 191169856, + "step": 60740 + }, + { + "epoch": 3.8886755009282377, + "grad_norm": 8.010478019714355, + "learning_rate": 7.155243787165256e-07, + "loss": 0.1245, + "num_input_tokens_seen": 191185216, + "step": 60745 + }, + { + "epoch": 3.8889955828692147, + "grad_norm": 3.451695203781128, + "learning_rate": 7.151331657734825e-07, + "loss": 0.131, + "num_input_tokens_seen": 191199488, + "step": 60750 + }, + { + "epoch": 3.8893156648101916, + "grad_norm": 14.283522605895996, + "learning_rate": 7.147420419573922e-07, + "loss": 0.2288, + "num_input_tokens_seen": 191216384, + "step": 60755 + }, + { + "epoch": 3.889635746751168, + "grad_norm": 8.276209831237793, + "learning_rate": 7.143510072877844e-07, + "loss": 0.1724, + "num_input_tokens_seen": 191231680, + "step": 60760 + }, + { + "epoch": 3.889955828692145, + "grad_norm": 16.211198806762695, + "learning_rate": 7.139600617841877e-07, + "loss": 0.1791, + "num_input_tokens_seen": 191246848, + "step": 60765 + }, + { + "epoch": 3.890275910633122, + "grad_norm": 8.215794563293457, + "learning_rate": 7.135692054661206e-07, + "loss": 0.1297, + "num_input_tokens_seen": 191263552, + "step": 60770 + }, + { + "epoch": 3.8905959925740987, + "grad_norm": 6.33880090713501, + "learning_rate": 7.131784383531032e-07, + "loss": 0.1783, + "num_input_tokens_seen": 191279744, + "step": 60775 + }, + { + "epoch": 3.8909160745150757, + "grad_norm": 5.6278605461120605, + "learning_rate": 7.127877604646474e-07, + "loss": 0.1951, + "num_input_tokens_seen": 191294464, + "step": 60780 + }, + { + "epoch": 3.8912361564560527, + "grad_norm": 9.845216751098633, + "learning_rate": 7.123971718202616e-07, + "loss": 0.2041, + "num_input_tokens_seen": 191308864, + "step": 60785 + }, + { + "epoch": 3.8915562383970297, + "grad_norm": 4.6464080810546875, + "learning_rate": 7.120066724394498e-07, + "loss": 0.2145, + "num_input_tokens_seen": 191324352, + "step": 60790 + }, + { + "epoch": 3.8918763203380067, + "grad_norm": 7.291848182678223, + "learning_rate": 7.116162623417114e-07, + "loss": 0.1404, + "num_input_tokens_seen": 191340288, + "step": 60795 + }, + { + "epoch": 3.8921964022789837, + "grad_norm": 133.76376342773438, + "learning_rate": 7.112259415465414e-07, + "loss": 0.1309, + "num_input_tokens_seen": 191355584, + "step": 60800 + }, + { + "epoch": 3.89251648421996, + "grad_norm": 9.11433219909668, + "learning_rate": 7.108357100734298e-07, + "loss": 0.1976, + "num_input_tokens_seen": 191371328, + "step": 60805 + }, + { + "epoch": 3.892836566160937, + "grad_norm": 38.68040466308594, + "learning_rate": 7.104455679418642e-07, + "loss": 0.1907, + "num_input_tokens_seen": 191386368, + "step": 60810 + }, + { + "epoch": 3.893156648101914, + "grad_norm": 44.37122344970703, + "learning_rate": 7.100555151713254e-07, + "loss": 0.2097, + "num_input_tokens_seen": 191400448, + "step": 60815 + }, + { + "epoch": 3.8934767300428907, + "grad_norm": 7.337488174438477, + "learning_rate": 7.096655517812908e-07, + "loss": 0.1963, + "num_input_tokens_seen": 191416768, + "step": 60820 + }, + { + "epoch": 3.8937968119838677, + "grad_norm": 9.207269668579102, + "learning_rate": 7.09275677791233e-07, + "loss": 0.1735, + "num_input_tokens_seen": 191432128, + "step": 60825 + }, + { + "epoch": 3.8941168939248447, + "grad_norm": 10.476531982421875, + "learning_rate": 7.088858932206203e-07, + "loss": 0.0954, + "num_input_tokens_seen": 191448128, + "step": 60830 + }, + { + "epoch": 3.8944369758658217, + "grad_norm": 7.145314693450928, + "learning_rate": 7.084961980889166e-07, + "loss": 0.16, + "num_input_tokens_seen": 191465664, + "step": 60835 + }, + { + "epoch": 3.8947570578067987, + "grad_norm": 18.366863250732422, + "learning_rate": 7.081065924155811e-07, + "loss": 0.2341, + "num_input_tokens_seen": 191480256, + "step": 60840 + }, + { + "epoch": 3.8950771397477757, + "grad_norm": 8.521491050720215, + "learning_rate": 7.077170762200681e-07, + "loss": 0.1444, + "num_input_tokens_seen": 191495424, + "step": 60845 + }, + { + "epoch": 3.8953972216887522, + "grad_norm": 5.995913505554199, + "learning_rate": 7.073276495218298e-07, + "loss": 0.1778, + "num_input_tokens_seen": 191511744, + "step": 60850 + }, + { + "epoch": 3.8957173036297292, + "grad_norm": 4.1928486824035645, + "learning_rate": 7.069383123403098e-07, + "loss": 0.0944, + "num_input_tokens_seen": 191526784, + "step": 60855 + }, + { + "epoch": 3.8960373855707062, + "grad_norm": 6.0371785163879395, + "learning_rate": 7.065490646949516e-07, + "loss": 0.1969, + "num_input_tokens_seen": 191543040, + "step": 60860 + }, + { + "epoch": 3.8963574675116828, + "grad_norm": 27.920883178710938, + "learning_rate": 7.061599066051911e-07, + "loss": 0.164, + "num_input_tokens_seen": 191560512, + "step": 60865 + }, + { + "epoch": 3.8966775494526598, + "grad_norm": 4.657368183135986, + "learning_rate": 7.057708380904604e-07, + "loss": 0.1916, + "num_input_tokens_seen": 191575744, + "step": 60870 + }, + { + "epoch": 3.8969976313936368, + "grad_norm": 7.012408256530762, + "learning_rate": 7.0538185917019e-07, + "loss": 0.2014, + "num_input_tokens_seen": 191591232, + "step": 60875 + }, + { + "epoch": 3.8973177133346137, + "grad_norm": 9.141027450561523, + "learning_rate": 7.049929698638e-07, + "loss": 0.3399, + "num_input_tokens_seen": 191606848, + "step": 60880 + }, + { + "epoch": 3.8976377952755907, + "grad_norm": 5.561343193054199, + "learning_rate": 7.046041701907127e-07, + "loss": 0.1631, + "num_input_tokens_seen": 191623040, + "step": 60885 + }, + { + "epoch": 3.8979578772165673, + "grad_norm": 7.0398688316345215, + "learning_rate": 7.0421546017034e-07, + "loss": 0.1141, + "num_input_tokens_seen": 191637376, + "step": 60890 + }, + { + "epoch": 3.8982779591575443, + "grad_norm": 50.143436431884766, + "learning_rate": 7.038268398220937e-07, + "loss": 0.2313, + "num_input_tokens_seen": 191652480, + "step": 60895 + }, + { + "epoch": 3.8985980410985213, + "grad_norm": 7.5004706382751465, + "learning_rate": 7.034383091653793e-07, + "loss": 0.1317, + "num_input_tokens_seen": 191668800, + "step": 60900 + }, + { + "epoch": 3.8989181230394983, + "grad_norm": 6.408291339874268, + "learning_rate": 7.030498682195977e-07, + "loss": 0.2489, + "num_input_tokens_seen": 191685696, + "step": 60905 + }, + { + "epoch": 3.899238204980475, + "grad_norm": 6.170713424682617, + "learning_rate": 7.026615170041459e-07, + "loss": 0.1676, + "num_input_tokens_seen": 191700992, + "step": 60910 + }, + { + "epoch": 3.899558286921452, + "grad_norm": 14.964781761169434, + "learning_rate": 7.022732555384151e-07, + "loss": 0.2083, + "num_input_tokens_seen": 191717376, + "step": 60915 + }, + { + "epoch": 3.899878368862429, + "grad_norm": 23.52982521057129, + "learning_rate": 7.018850838417946e-07, + "loss": 0.1527, + "num_input_tokens_seen": 191734720, + "step": 60920 + }, + { + "epoch": 3.9001984508034058, + "grad_norm": 10.451236724853516, + "learning_rate": 7.01497001933667e-07, + "loss": 0.1515, + "num_input_tokens_seen": 191750400, + "step": 60925 + }, + { + "epoch": 3.9005185327443828, + "grad_norm": 8.051375389099121, + "learning_rate": 7.011090098334111e-07, + "loss": 0.1293, + "num_input_tokens_seen": 191766208, + "step": 60930 + }, + { + "epoch": 3.9008386146853593, + "grad_norm": 3.9295310974121094, + "learning_rate": 7.007211075604012e-07, + "loss": 0.1245, + "num_input_tokens_seen": 191782592, + "step": 60935 + }, + { + "epoch": 3.9011586966263363, + "grad_norm": 8.586870193481445, + "learning_rate": 7.003332951340069e-07, + "loss": 0.1701, + "num_input_tokens_seen": 191798720, + "step": 60940 + }, + { + "epoch": 3.9014787785673133, + "grad_norm": 23.630943298339844, + "learning_rate": 6.99945572573594e-07, + "loss": 0.1942, + "num_input_tokens_seen": 191813376, + "step": 60945 + }, + { + "epoch": 3.9017988605082903, + "grad_norm": 13.195971488952637, + "learning_rate": 6.995579398985231e-07, + "loss": 0.1709, + "num_input_tokens_seen": 191828992, + "step": 60950 + }, + { + "epoch": 3.902118942449267, + "grad_norm": 7.715383529663086, + "learning_rate": 6.991703971281494e-07, + "loss": 0.1457, + "num_input_tokens_seen": 191844288, + "step": 60955 + }, + { + "epoch": 3.902439024390244, + "grad_norm": 7.065756797790527, + "learning_rate": 6.987829442818275e-07, + "loss": 0.1594, + "num_input_tokens_seen": 191860160, + "step": 60960 + }, + { + "epoch": 3.902759106331221, + "grad_norm": 10.278454780578613, + "learning_rate": 6.983955813789018e-07, + "loss": 0.2075, + "num_input_tokens_seen": 191875392, + "step": 60965 + }, + { + "epoch": 3.903079188272198, + "grad_norm": 9.834256172180176, + "learning_rate": 6.980083084387182e-07, + "loss": 0.1591, + "num_input_tokens_seen": 191891136, + "step": 60970 + }, + { + "epoch": 3.903399270213175, + "grad_norm": 8.26651382446289, + "learning_rate": 6.976211254806118e-07, + "loss": 0.173, + "num_input_tokens_seen": 191906880, + "step": 60975 + }, + { + "epoch": 3.9037193521541513, + "grad_norm": 7.644260883331299, + "learning_rate": 6.972340325239185e-07, + "loss": 0.1801, + "num_input_tokens_seen": 191922752, + "step": 60980 + }, + { + "epoch": 3.9040394340951283, + "grad_norm": 7.626990795135498, + "learning_rate": 6.968470295879679e-07, + "loss": 0.1427, + "num_input_tokens_seen": 191941184, + "step": 60985 + }, + { + "epoch": 3.9043595160361053, + "grad_norm": 6.495023250579834, + "learning_rate": 6.964601166920834e-07, + "loss": 0.1502, + "num_input_tokens_seen": 191957184, + "step": 60990 + }, + { + "epoch": 3.904679597977082, + "grad_norm": 6.738314151763916, + "learning_rate": 6.960732938555878e-07, + "loss": 0.1786, + "num_input_tokens_seen": 191974656, + "step": 60995 + }, + { + "epoch": 3.904999679918059, + "grad_norm": 5.807941436767578, + "learning_rate": 6.956865610977942e-07, + "loss": 0.1952, + "num_input_tokens_seen": 191990528, + "step": 61000 + }, + { + "epoch": 3.905319761859036, + "grad_norm": 12.909910202026367, + "learning_rate": 6.95299918438016e-07, + "loss": 0.1903, + "num_input_tokens_seen": 192005696, + "step": 61005 + }, + { + "epoch": 3.905639843800013, + "grad_norm": 6.408806800842285, + "learning_rate": 6.949133658955598e-07, + "loss": 0.2206, + "num_input_tokens_seen": 192020480, + "step": 61010 + }, + { + "epoch": 3.90595992574099, + "grad_norm": 8.24070930480957, + "learning_rate": 6.945269034897276e-07, + "loss": 0.1778, + "num_input_tokens_seen": 192035648, + "step": 61015 + }, + { + "epoch": 3.906280007681967, + "grad_norm": 7.510499477386475, + "learning_rate": 6.941405312398175e-07, + "loss": 0.1518, + "num_input_tokens_seen": 192051968, + "step": 61020 + }, + { + "epoch": 3.9066000896229434, + "grad_norm": 5.716602802276611, + "learning_rate": 6.937542491651228e-07, + "loss": 0.1313, + "num_input_tokens_seen": 192067392, + "step": 61025 + }, + { + "epoch": 3.9069201715639204, + "grad_norm": 6.141707897186279, + "learning_rate": 6.933680572849327e-07, + "loss": 0.0996, + "num_input_tokens_seen": 192082432, + "step": 61030 + }, + { + "epoch": 3.9072402535048973, + "grad_norm": 5.73215389251709, + "learning_rate": 6.929819556185305e-07, + "loss": 0.1796, + "num_input_tokens_seen": 192098048, + "step": 61035 + }, + { + "epoch": 3.907560335445874, + "grad_norm": 6.6479268074035645, + "learning_rate": 6.925959441851979e-07, + "loss": 0.1404, + "num_input_tokens_seen": 192113152, + "step": 61040 + }, + { + "epoch": 3.907880417386851, + "grad_norm": 6.576268672943115, + "learning_rate": 6.922100230042092e-07, + "loss": 0.1074, + "num_input_tokens_seen": 192129600, + "step": 61045 + }, + { + "epoch": 3.908200499327828, + "grad_norm": 6.246692180633545, + "learning_rate": 6.918241920948357e-07, + "loss": 0.1495, + "num_input_tokens_seen": 192145344, + "step": 61050 + }, + { + "epoch": 3.908520581268805, + "grad_norm": 3.8462162017822266, + "learning_rate": 6.914384514763437e-07, + "loss": 0.1553, + "num_input_tokens_seen": 192160384, + "step": 61055 + }, + { + "epoch": 3.908840663209782, + "grad_norm": 7.376954555511475, + "learning_rate": 6.910528011679948e-07, + "loss": 0.2117, + "num_input_tokens_seen": 192177024, + "step": 61060 + }, + { + "epoch": 3.909160745150759, + "grad_norm": 7.6287760734558105, + "learning_rate": 6.906672411890459e-07, + "loss": 0.186, + "num_input_tokens_seen": 192192128, + "step": 61065 + }, + { + "epoch": 3.9094808270917354, + "grad_norm": 7.576117038726807, + "learning_rate": 6.902817715587518e-07, + "loss": 0.1418, + "num_input_tokens_seen": 192207680, + "step": 61070 + }, + { + "epoch": 3.9098009090327124, + "grad_norm": 5.015749454498291, + "learning_rate": 6.898963922963584e-07, + "loss": 0.1739, + "num_input_tokens_seen": 192223616, + "step": 61075 + }, + { + "epoch": 3.9101209909736894, + "grad_norm": 8.206592559814453, + "learning_rate": 6.895111034211119e-07, + "loss": 0.2011, + "num_input_tokens_seen": 192238720, + "step": 61080 + }, + { + "epoch": 3.910441072914666, + "grad_norm": 7.983498573303223, + "learning_rate": 6.891259049522489e-07, + "loss": 0.1026, + "num_input_tokens_seen": 192254080, + "step": 61085 + }, + { + "epoch": 3.910761154855643, + "grad_norm": 7.03717041015625, + "learning_rate": 6.887407969090066e-07, + "loss": 0.1356, + "num_input_tokens_seen": 192270720, + "step": 61090 + }, + { + "epoch": 3.91108123679662, + "grad_norm": 3.8691492080688477, + "learning_rate": 6.883557793106143e-07, + "loss": 0.1405, + "num_input_tokens_seen": 192286400, + "step": 61095 + }, + { + "epoch": 3.911401318737597, + "grad_norm": 5.187659740447998, + "learning_rate": 6.879708521762974e-07, + "loss": 0.1561, + "num_input_tokens_seen": 192301504, + "step": 61100 + }, + { + "epoch": 3.911721400678574, + "grad_norm": 8.31656551361084, + "learning_rate": 6.875860155252787e-07, + "loss": 0.1513, + "num_input_tokens_seen": 192317184, + "step": 61105 + }, + { + "epoch": 3.912041482619551, + "grad_norm": 7.123365879058838, + "learning_rate": 6.872012693767727e-07, + "loss": 0.1592, + "num_input_tokens_seen": 192333760, + "step": 61110 + }, + { + "epoch": 3.9123615645605274, + "grad_norm": 78.96114349365234, + "learning_rate": 6.86816613749994e-07, + "loss": 0.2469, + "num_input_tokens_seen": 192349120, + "step": 61115 + }, + { + "epoch": 3.9126816465015044, + "grad_norm": 5.675043106079102, + "learning_rate": 6.864320486641476e-07, + "loss": 0.2381, + "num_input_tokens_seen": 192364544, + "step": 61120 + }, + { + "epoch": 3.9130017284424814, + "grad_norm": 14.524723052978516, + "learning_rate": 6.860475741384392e-07, + "loss": 0.1527, + "num_input_tokens_seen": 192380800, + "step": 61125 + }, + { + "epoch": 3.913321810383458, + "grad_norm": 9.585533142089844, + "learning_rate": 6.856631901920662e-07, + "loss": 0.1796, + "num_input_tokens_seen": 192396288, + "step": 61130 + }, + { + "epoch": 3.913641892324435, + "grad_norm": 102.89361572265625, + "learning_rate": 6.852788968442232e-07, + "loss": 0.2359, + "num_input_tokens_seen": 192411840, + "step": 61135 + }, + { + "epoch": 3.913961974265412, + "grad_norm": 7.240891933441162, + "learning_rate": 6.848946941140991e-07, + "loss": 0.223, + "num_input_tokens_seen": 192427968, + "step": 61140 + }, + { + "epoch": 3.914282056206389, + "grad_norm": 8.28659725189209, + "learning_rate": 6.845105820208797e-07, + "loss": 0.1878, + "num_input_tokens_seen": 192443072, + "step": 61145 + }, + { + "epoch": 3.914602138147366, + "grad_norm": 5.157101631164551, + "learning_rate": 6.841265605837447e-07, + "loss": 0.1909, + "num_input_tokens_seen": 192458240, + "step": 61150 + }, + { + "epoch": 3.9149222200883425, + "grad_norm": 14.472719192504883, + "learning_rate": 6.837426298218711e-07, + "loss": 0.1758, + "num_input_tokens_seen": 192476032, + "step": 61155 + }, + { + "epoch": 3.9152423020293194, + "grad_norm": 33.10609436035156, + "learning_rate": 6.833587897544302e-07, + "loss": 0.1384, + "num_input_tokens_seen": 192491264, + "step": 61160 + }, + { + "epoch": 3.9155623839702964, + "grad_norm": 25.74319839477539, + "learning_rate": 6.829750404005888e-07, + "loss": 0.1865, + "num_input_tokens_seen": 192505728, + "step": 61165 + }, + { + "epoch": 3.9158824659112734, + "grad_norm": 2.2173361778259277, + "learning_rate": 6.825913817795096e-07, + "loss": 0.0989, + "num_input_tokens_seen": 192521344, + "step": 61170 + }, + { + "epoch": 3.91620254785225, + "grad_norm": 11.775707244873047, + "learning_rate": 6.8220781391035e-07, + "loss": 0.2037, + "num_input_tokens_seen": 192538176, + "step": 61175 + }, + { + "epoch": 3.916522629793227, + "grad_norm": 7.822396755218506, + "learning_rate": 6.818243368122637e-07, + "loss": 0.1416, + "num_input_tokens_seen": 192553024, + "step": 61180 + }, + { + "epoch": 3.916842711734204, + "grad_norm": 6.146833419799805, + "learning_rate": 6.814409505043986e-07, + "loss": 0.1883, + "num_input_tokens_seen": 192570112, + "step": 61185 + }, + { + "epoch": 3.917162793675181, + "grad_norm": 18.101667404174805, + "learning_rate": 6.810576550059014e-07, + "loss": 0.1616, + "num_input_tokens_seen": 192584768, + "step": 61190 + }, + { + "epoch": 3.917482875616158, + "grad_norm": 6.082101345062256, + "learning_rate": 6.806744503359092e-07, + "loss": 0.1655, + "num_input_tokens_seen": 192599616, + "step": 61195 + }, + { + "epoch": 3.9178029575571345, + "grad_norm": 5.728484153747559, + "learning_rate": 6.802913365135597e-07, + "loss": 0.172, + "num_input_tokens_seen": 192615168, + "step": 61200 + }, + { + "epoch": 3.9181230394981115, + "grad_norm": 7.895503997802734, + "learning_rate": 6.79908313557981e-07, + "loss": 0.2143, + "num_input_tokens_seen": 192630208, + "step": 61205 + }, + { + "epoch": 3.9184431214390885, + "grad_norm": 6.531388282775879, + "learning_rate": 6.79525381488301e-07, + "loss": 0.1218, + "num_input_tokens_seen": 192645888, + "step": 61210 + }, + { + "epoch": 3.9187632033800655, + "grad_norm": 10.227956771850586, + "learning_rate": 6.791425403236413e-07, + "loss": 0.1763, + "num_input_tokens_seen": 192661248, + "step": 61215 + }, + { + "epoch": 3.919083285321042, + "grad_norm": 6.936142921447754, + "learning_rate": 6.787597900831178e-07, + "loss": 0.1867, + "num_input_tokens_seen": 192689216, + "step": 61220 + }, + { + "epoch": 3.919403367262019, + "grad_norm": 7.6039509773254395, + "learning_rate": 6.783771307858453e-07, + "loss": 0.114, + "num_input_tokens_seen": 192705280, + "step": 61225 + }, + { + "epoch": 3.919723449202996, + "grad_norm": 10.078629493713379, + "learning_rate": 6.779945624509288e-07, + "loss": 0.0957, + "num_input_tokens_seen": 192719744, + "step": 61230 + }, + { + "epoch": 3.920043531143973, + "grad_norm": 5.736988067626953, + "learning_rate": 6.776120850974743e-07, + "loss": 0.1738, + "num_input_tokens_seen": 192735680, + "step": 61235 + }, + { + "epoch": 3.92036361308495, + "grad_norm": 23.63994789123535, + "learning_rate": 6.772296987445798e-07, + "loss": 0.1434, + "num_input_tokens_seen": 192751104, + "step": 61240 + }, + { + "epoch": 3.9206836950259265, + "grad_norm": 6.012808322906494, + "learning_rate": 6.768474034113396e-07, + "loss": 0.1488, + "num_input_tokens_seen": 192767168, + "step": 61245 + }, + { + "epoch": 3.9210037769669035, + "grad_norm": 10.639217376708984, + "learning_rate": 6.764651991168436e-07, + "loss": 0.1799, + "num_input_tokens_seen": 192782720, + "step": 61250 + }, + { + "epoch": 3.9213238589078805, + "grad_norm": 18.16587257385254, + "learning_rate": 6.760830858801773e-07, + "loss": 0.1952, + "num_input_tokens_seen": 192798976, + "step": 61255 + }, + { + "epoch": 3.921643940848857, + "grad_norm": 5.88622522354126, + "learning_rate": 6.757010637204212e-07, + "loss": 0.1964, + "num_input_tokens_seen": 192815552, + "step": 61260 + }, + { + "epoch": 3.921964022789834, + "grad_norm": 16.494815826416016, + "learning_rate": 6.753191326566508e-07, + "loss": 0.1476, + "num_input_tokens_seen": 192831744, + "step": 61265 + }, + { + "epoch": 3.922284104730811, + "grad_norm": 4.563991546630859, + "learning_rate": 6.74937292707939e-07, + "loss": 0.1225, + "num_input_tokens_seen": 192847168, + "step": 61270 + }, + { + "epoch": 3.922604186671788, + "grad_norm": 7.716944694519043, + "learning_rate": 6.745555438933527e-07, + "loss": 0.167, + "num_input_tokens_seen": 192862784, + "step": 61275 + }, + { + "epoch": 3.922924268612765, + "grad_norm": 5.3884663581848145, + "learning_rate": 6.741738862319538e-07, + "loss": 0.1852, + "num_input_tokens_seen": 192878656, + "step": 61280 + }, + { + "epoch": 3.923244350553742, + "grad_norm": 8.987110137939453, + "learning_rate": 6.737923197428009e-07, + "loss": 0.1745, + "num_input_tokens_seen": 192893376, + "step": 61285 + }, + { + "epoch": 3.9235644324947185, + "grad_norm": 7.642669677734375, + "learning_rate": 6.734108444449472e-07, + "loss": 0.1699, + "num_input_tokens_seen": 192909504, + "step": 61290 + }, + { + "epoch": 3.9238845144356955, + "grad_norm": 6.1631178855896, + "learning_rate": 6.730294603574408e-07, + "loss": 0.1383, + "num_input_tokens_seen": 192925440, + "step": 61295 + }, + { + "epoch": 3.9242045963766725, + "grad_norm": 4.649872303009033, + "learning_rate": 6.726481674993282e-07, + "loss": 0.1798, + "num_input_tokens_seen": 192941696, + "step": 61300 + }, + { + "epoch": 3.924524678317649, + "grad_norm": 7.228231906890869, + "learning_rate": 6.722669658896464e-07, + "loss": 0.1459, + "num_input_tokens_seen": 192957376, + "step": 61305 + }, + { + "epoch": 3.924844760258626, + "grad_norm": 11.396994590759277, + "learning_rate": 6.718858555474336e-07, + "loss": 0.2058, + "num_input_tokens_seen": 192973632, + "step": 61310 + }, + { + "epoch": 3.925164842199603, + "grad_norm": 8.868253707885742, + "learning_rate": 6.71504836491717e-07, + "loss": 0.2832, + "num_input_tokens_seen": 192988992, + "step": 61315 + }, + { + "epoch": 3.92548492414058, + "grad_norm": 5.654933452606201, + "learning_rate": 6.711239087415258e-07, + "loss": 0.1554, + "num_input_tokens_seen": 193004608, + "step": 61320 + }, + { + "epoch": 3.925805006081557, + "grad_norm": 4.034284591674805, + "learning_rate": 6.7074307231588e-07, + "loss": 0.1474, + "num_input_tokens_seen": 193020096, + "step": 61325 + }, + { + "epoch": 3.926125088022534, + "grad_norm": 8.771958351135254, + "learning_rate": 6.703623272337964e-07, + "loss": 0.1642, + "num_input_tokens_seen": 193036096, + "step": 61330 + }, + { + "epoch": 3.9264451699635106, + "grad_norm": 8.20996379852295, + "learning_rate": 6.699816735142891e-07, + "loss": 0.2024, + "num_input_tokens_seen": 193051264, + "step": 61335 + }, + { + "epoch": 3.9267652519044876, + "grad_norm": 10.04263973236084, + "learning_rate": 6.696011111763637e-07, + "loss": 0.162, + "num_input_tokens_seen": 193066176, + "step": 61340 + }, + { + "epoch": 3.9270853338454645, + "grad_norm": 22.811067581176758, + "learning_rate": 6.692206402390258e-07, + "loss": 0.0706, + "num_input_tokens_seen": 193081728, + "step": 61345 + }, + { + "epoch": 3.927405415786441, + "grad_norm": 6.441585063934326, + "learning_rate": 6.688402607212715e-07, + "loss": 0.1414, + "num_input_tokens_seen": 193097280, + "step": 61350 + }, + { + "epoch": 3.927725497727418, + "grad_norm": 7.48707914352417, + "learning_rate": 6.684599726420973e-07, + "loss": 0.1554, + "num_input_tokens_seen": 193112896, + "step": 61355 + }, + { + "epoch": 3.928045579668395, + "grad_norm": 10.673099517822266, + "learning_rate": 6.680797760204921e-07, + "loss": 0.1971, + "num_input_tokens_seen": 193127808, + "step": 61360 + }, + { + "epoch": 3.928365661609372, + "grad_norm": 4.043043613433838, + "learning_rate": 6.676996708754405e-07, + "loss": 0.189, + "num_input_tokens_seen": 193143744, + "step": 61365 + }, + { + "epoch": 3.928685743550349, + "grad_norm": 5.307353496551514, + "learning_rate": 6.673196572259233e-07, + "loss": 0.1005, + "num_input_tokens_seen": 193161472, + "step": 61370 + }, + { + "epoch": 3.929005825491326, + "grad_norm": 7.18705940246582, + "learning_rate": 6.669397350909162e-07, + "loss": 0.1638, + "num_input_tokens_seen": 193176512, + "step": 61375 + }, + { + "epoch": 3.9293259074323026, + "grad_norm": 9.326898574829102, + "learning_rate": 6.6655990448939e-07, + "loss": 0.2051, + "num_input_tokens_seen": 193192000, + "step": 61380 + }, + { + "epoch": 3.9296459893732796, + "grad_norm": 5.992394924163818, + "learning_rate": 6.66180165440313e-07, + "loss": 0.1869, + "num_input_tokens_seen": 193208128, + "step": 61385 + }, + { + "epoch": 3.9299660713142566, + "grad_norm": 9.805194854736328, + "learning_rate": 6.658005179626462e-07, + "loss": 0.1991, + "num_input_tokens_seen": 193222592, + "step": 61390 + }, + { + "epoch": 3.930286153255233, + "grad_norm": 7.561500549316406, + "learning_rate": 6.65420962075348e-07, + "loss": 0.1891, + "num_input_tokens_seen": 193237760, + "step": 61395 + }, + { + "epoch": 3.93060623519621, + "grad_norm": 6.980175971984863, + "learning_rate": 6.650414977973707e-07, + "loss": 0.1598, + "num_input_tokens_seen": 193254336, + "step": 61400 + }, + { + "epoch": 3.930926317137187, + "grad_norm": 6.759453773498535, + "learning_rate": 6.646621251476628e-07, + "loss": 0.1265, + "num_input_tokens_seen": 193269824, + "step": 61405 + }, + { + "epoch": 3.931246399078164, + "grad_norm": 37.663673400878906, + "learning_rate": 6.642828441451687e-07, + "loss": 0.2535, + "num_input_tokens_seen": 193286336, + "step": 61410 + }, + { + "epoch": 3.931566481019141, + "grad_norm": 9.840764999389648, + "learning_rate": 6.639036548088268e-07, + "loss": 0.1526, + "num_input_tokens_seen": 193302848, + "step": 61415 + }, + { + "epoch": 3.9318865629601176, + "grad_norm": 9.652499198913574, + "learning_rate": 6.635245571575741e-07, + "loss": 0.1592, + "num_input_tokens_seen": 193318976, + "step": 61420 + }, + { + "epoch": 3.9322066449010946, + "grad_norm": 4.271590709686279, + "learning_rate": 6.631455512103375e-07, + "loss": 0.1106, + "num_input_tokens_seen": 193333632, + "step": 61425 + }, + { + "epoch": 3.9325267268420716, + "grad_norm": 7.447033882141113, + "learning_rate": 6.62766636986045e-07, + "loss": 0.1728, + "num_input_tokens_seen": 193349568, + "step": 61430 + }, + { + "epoch": 3.9328468087830486, + "grad_norm": 8.500179290771484, + "learning_rate": 6.623878145036166e-07, + "loss": 0.124, + "num_input_tokens_seen": 193364288, + "step": 61435 + }, + { + "epoch": 3.933166890724025, + "grad_norm": 8.39038372039795, + "learning_rate": 6.620090837819693e-07, + "loss": 0.1758, + "num_input_tokens_seen": 193379648, + "step": 61440 + }, + { + "epoch": 3.933486972665002, + "grad_norm": 5.196012020111084, + "learning_rate": 6.616304448400146e-07, + "loss": 0.2264, + "num_input_tokens_seen": 193394944, + "step": 61445 + }, + { + "epoch": 3.933807054605979, + "grad_norm": 6.908936500549316, + "learning_rate": 6.612518976966589e-07, + "loss": 0.1586, + "num_input_tokens_seen": 193410432, + "step": 61450 + }, + { + "epoch": 3.934127136546956, + "grad_norm": 7.336106300354004, + "learning_rate": 6.608734423708074e-07, + "loss": 0.1256, + "num_input_tokens_seen": 193426304, + "step": 61455 + }, + { + "epoch": 3.934447218487933, + "grad_norm": 5.581843852996826, + "learning_rate": 6.604950788813549e-07, + "loss": 0.1297, + "num_input_tokens_seen": 193441664, + "step": 61460 + }, + { + "epoch": 3.9347673004289097, + "grad_norm": 4.881640434265137, + "learning_rate": 6.601168072471973e-07, + "loss": 0.1317, + "num_input_tokens_seen": 193458944, + "step": 61465 + }, + { + "epoch": 3.9350873823698866, + "grad_norm": 5.441731929779053, + "learning_rate": 6.597386274872228e-07, + "loss": 0.1677, + "num_input_tokens_seen": 193474112, + "step": 61470 + }, + { + "epoch": 3.9354074643108636, + "grad_norm": 7.458133220672607, + "learning_rate": 6.593605396203154e-07, + "loss": 0.1896, + "num_input_tokens_seen": 193489088, + "step": 61475 + }, + { + "epoch": 3.9357275462518406, + "grad_norm": 7.160656452178955, + "learning_rate": 6.589825436653554e-07, + "loss": 0.1522, + "num_input_tokens_seen": 193504384, + "step": 61480 + }, + { + "epoch": 3.936047628192817, + "grad_norm": 8.558096885681152, + "learning_rate": 6.586046396412177e-07, + "loss": 0.1566, + "num_input_tokens_seen": 193521664, + "step": 61485 + }, + { + "epoch": 3.936367710133794, + "grad_norm": 11.321138381958008, + "learning_rate": 6.582268275667725e-07, + "loss": 0.2193, + "num_input_tokens_seen": 193537920, + "step": 61490 + }, + { + "epoch": 3.936687792074771, + "grad_norm": 5.906735420227051, + "learning_rate": 6.578491074608864e-07, + "loss": 0.136, + "num_input_tokens_seen": 193553280, + "step": 61495 + }, + { + "epoch": 3.937007874015748, + "grad_norm": 6.278759956359863, + "learning_rate": 6.574714793424195e-07, + "loss": 0.1589, + "num_input_tokens_seen": 193568192, + "step": 61500 + }, + { + "epoch": 3.937327955956725, + "grad_norm": 5.373173236846924, + "learning_rate": 6.570939432302303e-07, + "loss": 0.1547, + "num_input_tokens_seen": 193584000, + "step": 61505 + }, + { + "epoch": 3.9376480378977017, + "grad_norm": 6.470065593719482, + "learning_rate": 6.567164991431704e-07, + "loss": 0.1701, + "num_input_tokens_seen": 193599872, + "step": 61510 + }, + { + "epoch": 3.9379681198386787, + "grad_norm": 5.777068614959717, + "learning_rate": 6.563391471000871e-07, + "loss": 0.2501, + "num_input_tokens_seen": 193616768, + "step": 61515 + }, + { + "epoch": 3.9382882017796557, + "grad_norm": 5.5384321212768555, + "learning_rate": 6.559618871198237e-07, + "loss": 0.1324, + "num_input_tokens_seen": 193632768, + "step": 61520 + }, + { + "epoch": 3.938608283720632, + "grad_norm": 7.2986249923706055, + "learning_rate": 6.555847192212175e-07, + "loss": 0.1442, + "num_input_tokens_seen": 193648320, + "step": 61525 + }, + { + "epoch": 3.938928365661609, + "grad_norm": 11.032890319824219, + "learning_rate": 6.552076434231047e-07, + "loss": 0.1977, + "num_input_tokens_seen": 193664128, + "step": 61530 + }, + { + "epoch": 3.939248447602586, + "grad_norm": 11.795755386352539, + "learning_rate": 6.548306597443116e-07, + "loss": 0.2114, + "num_input_tokens_seen": 193679488, + "step": 61535 + }, + { + "epoch": 3.939568529543563, + "grad_norm": 6.8732171058654785, + "learning_rate": 6.544537682036655e-07, + "loss": 0.1585, + "num_input_tokens_seen": 193694848, + "step": 61540 + }, + { + "epoch": 3.93988861148454, + "grad_norm": 14.268680572509766, + "learning_rate": 6.540769688199838e-07, + "loss": 0.1558, + "num_input_tokens_seen": 193710464, + "step": 61545 + }, + { + "epoch": 3.940208693425517, + "grad_norm": 6.862219333648682, + "learning_rate": 6.537002616120839e-07, + "loss": 0.2872, + "num_input_tokens_seen": 193725248, + "step": 61550 + }, + { + "epoch": 3.9405287753664937, + "grad_norm": 25.0738525390625, + "learning_rate": 6.53323646598776e-07, + "loss": 0.1698, + "num_input_tokens_seen": 193740672, + "step": 61555 + }, + { + "epoch": 3.9408488573074707, + "grad_norm": 12.294326782226562, + "learning_rate": 6.529471237988654e-07, + "loss": 0.1753, + "num_input_tokens_seen": 193756160, + "step": 61560 + }, + { + "epoch": 3.9411689392484477, + "grad_norm": 11.410971641540527, + "learning_rate": 6.525706932311559e-07, + "loss": 0.1949, + "num_input_tokens_seen": 193770880, + "step": 61565 + }, + { + "epoch": 3.9414890211894242, + "grad_norm": 8.97628116607666, + "learning_rate": 6.521943549144416e-07, + "loss": 0.1703, + "num_input_tokens_seen": 193786368, + "step": 61570 + }, + { + "epoch": 3.9418091031304012, + "grad_norm": 2.7551205158233643, + "learning_rate": 6.51818108867518e-07, + "loss": 0.1146, + "num_input_tokens_seen": 193802496, + "step": 61575 + }, + { + "epoch": 3.942129185071378, + "grad_norm": 7.645575523376465, + "learning_rate": 6.514419551091697e-07, + "loss": 0.3015, + "num_input_tokens_seen": 193817728, + "step": 61580 + }, + { + "epoch": 3.942449267012355, + "grad_norm": 8.436867713928223, + "learning_rate": 6.51065893658182e-07, + "loss": 0.1662, + "num_input_tokens_seen": 193833024, + "step": 61585 + }, + { + "epoch": 3.942769348953332, + "grad_norm": 7.281113147735596, + "learning_rate": 6.506899245333329e-07, + "loss": 0.1407, + "num_input_tokens_seen": 193848640, + "step": 61590 + }, + { + "epoch": 3.943089430894309, + "grad_norm": 7.008751392364502, + "learning_rate": 6.503140477533962e-07, + "loss": 0.1651, + "num_input_tokens_seen": 193863040, + "step": 61595 + }, + { + "epoch": 3.9434095128352857, + "grad_norm": 6.98078727722168, + "learning_rate": 6.499382633371412e-07, + "loss": 0.109, + "num_input_tokens_seen": 193879168, + "step": 61600 + }, + { + "epoch": 3.9437295947762627, + "grad_norm": 5.66385555267334, + "learning_rate": 6.495625713033329e-07, + "loss": 0.1581, + "num_input_tokens_seen": 193894720, + "step": 61605 + }, + { + "epoch": 3.9440496767172397, + "grad_norm": 9.59770393371582, + "learning_rate": 6.491869716707305e-07, + "loss": 0.1539, + "num_input_tokens_seen": 193910208, + "step": 61610 + }, + { + "epoch": 3.9443697586582163, + "grad_norm": 7.582624435424805, + "learning_rate": 6.488114644580909e-07, + "loss": 0.2289, + "num_input_tokens_seen": 193926528, + "step": 61615 + }, + { + "epoch": 3.9446898405991933, + "grad_norm": 4.2850141525268555, + "learning_rate": 6.484360496841641e-07, + "loss": 0.1422, + "num_input_tokens_seen": 193941760, + "step": 61620 + }, + { + "epoch": 3.9450099225401702, + "grad_norm": 6.5510053634643555, + "learning_rate": 6.480607273676967e-07, + "loss": 0.1784, + "num_input_tokens_seen": 193957376, + "step": 61625 + }, + { + "epoch": 3.9453300044811472, + "grad_norm": 6.865854263305664, + "learning_rate": 6.476854975274302e-07, + "loss": 0.1341, + "num_input_tokens_seen": 193974784, + "step": 61630 + }, + { + "epoch": 3.9456500864221242, + "grad_norm": 8.227691650390625, + "learning_rate": 6.473103601821015e-07, + "loss": 0.1264, + "num_input_tokens_seen": 193991168, + "step": 61635 + }, + { + "epoch": 3.945970168363101, + "grad_norm": 8.775341987609863, + "learning_rate": 6.469353153504431e-07, + "loss": 0.1234, + "num_input_tokens_seen": 194006144, + "step": 61640 + }, + { + "epoch": 3.9462902503040778, + "grad_norm": 6.067750453948975, + "learning_rate": 6.465603630511822e-07, + "loss": 0.2083, + "num_input_tokens_seen": 194022336, + "step": 61645 + }, + { + "epoch": 3.9466103322450548, + "grad_norm": 10.400042533874512, + "learning_rate": 6.461855033030437e-07, + "loss": 0.2178, + "num_input_tokens_seen": 194037504, + "step": 61650 + }, + { + "epoch": 3.9469304141860317, + "grad_norm": 8.032817840576172, + "learning_rate": 6.458107361247437e-07, + "loss": 0.1497, + "num_input_tokens_seen": 194052544, + "step": 61655 + }, + { + "epoch": 3.9472504961270083, + "grad_norm": 25.76153564453125, + "learning_rate": 6.454360615349983e-07, + "loss": 0.1428, + "num_input_tokens_seen": 194067968, + "step": 61660 + }, + { + "epoch": 3.9475705780679853, + "grad_norm": 5.691476821899414, + "learning_rate": 6.450614795525159e-07, + "loss": 0.2099, + "num_input_tokens_seen": 194083904, + "step": 61665 + }, + { + "epoch": 3.9478906600089623, + "grad_norm": 11.759317398071289, + "learning_rate": 6.44686990196001e-07, + "loss": 0.1603, + "num_input_tokens_seen": 194099136, + "step": 61670 + }, + { + "epoch": 3.9482107419499393, + "grad_norm": 3.324213981628418, + "learning_rate": 6.44312593484154e-07, + "loss": 0.145, + "num_input_tokens_seen": 194114176, + "step": 61675 + }, + { + "epoch": 3.9485308238909163, + "grad_norm": 7.108952045440674, + "learning_rate": 6.439382894356694e-07, + "loss": 0.1703, + "num_input_tokens_seen": 194129664, + "step": 61680 + }, + { + "epoch": 3.948850905831893, + "grad_norm": 4.018266201019287, + "learning_rate": 6.435640780692401e-07, + "loss": 0.1856, + "num_input_tokens_seen": 194144576, + "step": 61685 + }, + { + "epoch": 3.94917098777287, + "grad_norm": 4.61806058883667, + "learning_rate": 6.431899594035495e-07, + "loss": 0.2219, + "num_input_tokens_seen": 194160256, + "step": 61690 + }, + { + "epoch": 3.949491069713847, + "grad_norm": 3.8122241497039795, + "learning_rate": 6.428159334572814e-07, + "loss": 0.1516, + "num_input_tokens_seen": 194175488, + "step": 61695 + }, + { + "epoch": 3.9498111516548238, + "grad_norm": 8.367715835571289, + "learning_rate": 6.424420002491116e-07, + "loss": 0.1925, + "num_input_tokens_seen": 194190464, + "step": 61700 + }, + { + "epoch": 3.9501312335958003, + "grad_norm": 10.592341423034668, + "learning_rate": 6.420681597977129e-07, + "loss": 0.183, + "num_input_tokens_seen": 194206208, + "step": 61705 + }, + { + "epoch": 3.9504513155367773, + "grad_norm": 7.339032173156738, + "learning_rate": 6.416944121217527e-07, + "loss": 0.231, + "num_input_tokens_seen": 194222208, + "step": 61710 + }, + { + "epoch": 3.9507713974777543, + "grad_norm": 34.243350982666016, + "learning_rate": 6.413207572398938e-07, + "loss": 0.2852, + "num_input_tokens_seen": 194238784, + "step": 61715 + }, + { + "epoch": 3.9510914794187313, + "grad_norm": 8.378280639648438, + "learning_rate": 6.40947195170795e-07, + "loss": 0.2077, + "num_input_tokens_seen": 194254272, + "step": 61720 + }, + { + "epoch": 3.9514115613597083, + "grad_norm": 7.601789474487305, + "learning_rate": 6.405737259331096e-07, + "loss": 0.1523, + "num_input_tokens_seen": 194269888, + "step": 61725 + }, + { + "epoch": 3.951731643300685, + "grad_norm": 8.7802734375, + "learning_rate": 6.402003495454864e-07, + "loss": 0.1398, + "num_input_tokens_seen": 194284544, + "step": 61730 + }, + { + "epoch": 3.952051725241662, + "grad_norm": 7.522892475128174, + "learning_rate": 6.398270660265709e-07, + "loss": 0.1711, + "num_input_tokens_seen": 194298752, + "step": 61735 + }, + { + "epoch": 3.952371807182639, + "grad_norm": 7.39864444732666, + "learning_rate": 6.394538753950028e-07, + "loss": 0.1687, + "num_input_tokens_seen": 194313856, + "step": 61740 + }, + { + "epoch": 3.952691889123616, + "grad_norm": 6.398730754852295, + "learning_rate": 6.390807776694166e-07, + "loss": 0.1807, + "num_input_tokens_seen": 194329280, + "step": 61745 + }, + { + "epoch": 3.9530119710645923, + "grad_norm": 19.618350982666016, + "learning_rate": 6.387077728684432e-07, + "loss": 0.1258, + "num_input_tokens_seen": 194344320, + "step": 61750 + }, + { + "epoch": 3.9533320530055693, + "grad_norm": 5.95163631439209, + "learning_rate": 6.383348610107079e-07, + "loss": 0.1547, + "num_input_tokens_seen": 194361088, + "step": 61755 + }, + { + "epoch": 3.9536521349465463, + "grad_norm": 6.370901107788086, + "learning_rate": 6.379620421148342e-07, + "loss": 0.1649, + "num_input_tokens_seen": 194378240, + "step": 61760 + }, + { + "epoch": 3.9539722168875233, + "grad_norm": 5.236616611480713, + "learning_rate": 6.375893161994353e-07, + "loss": 0.0695, + "num_input_tokens_seen": 194393728, + "step": 61765 + }, + { + "epoch": 3.9542922988285003, + "grad_norm": 4.212461471557617, + "learning_rate": 6.372166832831267e-07, + "loss": 0.1269, + "num_input_tokens_seen": 194410432, + "step": 61770 + }, + { + "epoch": 3.954612380769477, + "grad_norm": 5.329573154449463, + "learning_rate": 6.368441433845126e-07, + "loss": 0.1109, + "num_input_tokens_seen": 194426240, + "step": 61775 + }, + { + "epoch": 3.954932462710454, + "grad_norm": 6.646602153778076, + "learning_rate": 6.364716965221978e-07, + "loss": 0.1346, + "num_input_tokens_seen": 194441408, + "step": 61780 + }, + { + "epoch": 3.955252544651431, + "grad_norm": 43.07939147949219, + "learning_rate": 6.360993427147796e-07, + "loss": 0.2338, + "num_input_tokens_seen": 194458752, + "step": 61785 + }, + { + "epoch": 3.9555726265924074, + "grad_norm": 6.863036632537842, + "learning_rate": 6.357270819808509e-07, + "loss": 0.1724, + "num_input_tokens_seen": 194475904, + "step": 61790 + }, + { + "epoch": 3.9558927085333844, + "grad_norm": 10.108332633972168, + "learning_rate": 6.353549143390025e-07, + "loss": 0.1906, + "num_input_tokens_seen": 194491264, + "step": 61795 + }, + { + "epoch": 3.9562127904743614, + "grad_norm": 30.982995986938477, + "learning_rate": 6.349828398078159e-07, + "loss": 0.1908, + "num_input_tokens_seen": 194506432, + "step": 61800 + }, + { + "epoch": 3.9565328724153384, + "grad_norm": 3.332963705062866, + "learning_rate": 6.34610858405873e-07, + "loss": 0.1164, + "num_input_tokens_seen": 194522240, + "step": 61805 + }, + { + "epoch": 3.9568529543563153, + "grad_norm": 7.152875900268555, + "learning_rate": 6.34238970151746e-07, + "loss": 0.1896, + "num_input_tokens_seen": 194537984, + "step": 61810 + }, + { + "epoch": 3.9571730362972923, + "grad_norm": 5.780649662017822, + "learning_rate": 6.338671750640068e-07, + "loss": 0.2008, + "num_input_tokens_seen": 194553344, + "step": 61815 + }, + { + "epoch": 3.957493118238269, + "grad_norm": 5.310932159423828, + "learning_rate": 6.334954731612211e-07, + "loss": 0.1522, + "num_input_tokens_seen": 194569216, + "step": 61820 + }, + { + "epoch": 3.957813200179246, + "grad_norm": 5.209805488586426, + "learning_rate": 6.331238644619489e-07, + "loss": 0.1726, + "num_input_tokens_seen": 194584448, + "step": 61825 + }, + { + "epoch": 3.958133282120223, + "grad_norm": 9.290993690490723, + "learning_rate": 6.327523489847468e-07, + "loss": 0.2159, + "num_input_tokens_seen": 194599680, + "step": 61830 + }, + { + "epoch": 3.9584533640611994, + "grad_norm": 6.083215713500977, + "learning_rate": 6.323809267481662e-07, + "loss": 0.1385, + "num_input_tokens_seen": 194615808, + "step": 61835 + }, + { + "epoch": 3.9587734460021764, + "grad_norm": 8.823843955993652, + "learning_rate": 6.320095977707533e-07, + "loss": 0.2075, + "num_input_tokens_seen": 194631232, + "step": 61840 + }, + { + "epoch": 3.9590935279431534, + "grad_norm": 7.6382365226745605, + "learning_rate": 6.316383620710526e-07, + "loss": 0.222, + "num_input_tokens_seen": 194647680, + "step": 61845 + }, + { + "epoch": 3.9594136098841304, + "grad_norm": 6.162361145019531, + "learning_rate": 6.312672196675986e-07, + "loss": 0.1296, + "num_input_tokens_seen": 194663168, + "step": 61850 + }, + { + "epoch": 3.9597336918251074, + "grad_norm": 58.67809295654297, + "learning_rate": 6.308961705789265e-07, + "loss": 0.1991, + "num_input_tokens_seen": 194679168, + "step": 61855 + }, + { + "epoch": 3.9600537737660844, + "grad_norm": 5.327035427093506, + "learning_rate": 6.305252148235638e-07, + "loss": 0.1225, + "num_input_tokens_seen": 194694656, + "step": 61860 + }, + { + "epoch": 3.960373855707061, + "grad_norm": 5.103816032409668, + "learning_rate": 6.301543524200343e-07, + "loss": 0.1924, + "num_input_tokens_seen": 194710016, + "step": 61865 + }, + { + "epoch": 3.960693937648038, + "grad_norm": 3.1403303146362305, + "learning_rate": 6.297835833868568e-07, + "loss": 0.182, + "num_input_tokens_seen": 194725376, + "step": 61870 + }, + { + "epoch": 3.961014019589015, + "grad_norm": 17.46578598022461, + "learning_rate": 6.294129077425445e-07, + "loss": 0.187, + "num_input_tokens_seen": 194741440, + "step": 61875 + }, + { + "epoch": 3.9613341015299914, + "grad_norm": 6.366185665130615, + "learning_rate": 6.2904232550561e-07, + "loss": 0.1324, + "num_input_tokens_seen": 194756800, + "step": 61880 + }, + { + "epoch": 3.9616541834709684, + "grad_norm": 11.41672134399414, + "learning_rate": 6.286718366945543e-07, + "loss": 0.1479, + "num_input_tokens_seen": 194771776, + "step": 61885 + }, + { + "epoch": 3.9619742654119454, + "grad_norm": 5.069608688354492, + "learning_rate": 6.28301441327881e-07, + "loss": 0.1099, + "num_input_tokens_seen": 194786432, + "step": 61890 + }, + { + "epoch": 3.9622943473529224, + "grad_norm": 4.035638332366943, + "learning_rate": 6.279311394240839e-07, + "loss": 0.0988, + "num_input_tokens_seen": 194801984, + "step": 61895 + }, + { + "epoch": 3.9626144292938994, + "grad_norm": 5.399405479431152, + "learning_rate": 6.275609310016544e-07, + "loss": 0.1369, + "num_input_tokens_seen": 194816064, + "step": 61900 + }, + { + "epoch": 3.9629345112348764, + "grad_norm": 7.77336311340332, + "learning_rate": 6.271908160790794e-07, + "loss": 0.1972, + "num_input_tokens_seen": 194831424, + "step": 61905 + }, + { + "epoch": 3.963254593175853, + "grad_norm": 4.090097904205322, + "learning_rate": 6.268207946748387e-07, + "loss": 0.1671, + "num_input_tokens_seen": 194846720, + "step": 61910 + }, + { + "epoch": 3.96357467511683, + "grad_norm": 11.070816040039062, + "learning_rate": 6.26450866807412e-07, + "loss": 0.1602, + "num_input_tokens_seen": 194863360, + "step": 61915 + }, + { + "epoch": 3.963894757057807, + "grad_norm": 12.875285148620605, + "learning_rate": 6.260810324952688e-07, + "loss": 0.249, + "num_input_tokens_seen": 194879104, + "step": 61920 + }, + { + "epoch": 3.9642148389987835, + "grad_norm": 7.048861503601074, + "learning_rate": 6.257112917568787e-07, + "loss": 0.1858, + "num_input_tokens_seen": 194893632, + "step": 61925 + }, + { + "epoch": 3.9645349209397605, + "grad_norm": 6.3127593994140625, + "learning_rate": 6.253416446107036e-07, + "loss": 0.1371, + "num_input_tokens_seen": 194909632, + "step": 61930 + }, + { + "epoch": 3.9648550028807374, + "grad_norm": 8.379654884338379, + "learning_rate": 6.249720910752022e-07, + "loss": 0.1251, + "num_input_tokens_seen": 194925632, + "step": 61935 + }, + { + "epoch": 3.9651750848217144, + "grad_norm": 7.618344306945801, + "learning_rate": 6.24602631168828e-07, + "loss": 0.1424, + "num_input_tokens_seen": 194942784, + "step": 61940 + }, + { + "epoch": 3.9654951667626914, + "grad_norm": 6.816864967346191, + "learning_rate": 6.242332649100297e-07, + "loss": 0.1561, + "num_input_tokens_seen": 194958720, + "step": 61945 + }, + { + "epoch": 3.9658152487036684, + "grad_norm": 13.847583770751953, + "learning_rate": 6.238639923172515e-07, + "loss": 0.1491, + "num_input_tokens_seen": 194975872, + "step": 61950 + }, + { + "epoch": 3.966135330644645, + "grad_norm": 12.300398826599121, + "learning_rate": 6.234948134089333e-07, + "loss": 0.1673, + "num_input_tokens_seen": 194991808, + "step": 61955 + }, + { + "epoch": 3.966455412585622, + "grad_norm": 5.4627532958984375, + "learning_rate": 6.231257282035088e-07, + "loss": 0.2584, + "num_input_tokens_seen": 195006976, + "step": 61960 + }, + { + "epoch": 3.966775494526599, + "grad_norm": 4.124691486358643, + "learning_rate": 6.227567367194101e-07, + "loss": 0.1614, + "num_input_tokens_seen": 195023360, + "step": 61965 + }, + { + "epoch": 3.9670955764675755, + "grad_norm": 12.06868839263916, + "learning_rate": 6.223878389750618e-07, + "loss": 0.1806, + "num_input_tokens_seen": 195039296, + "step": 61970 + }, + { + "epoch": 3.9674156584085525, + "grad_norm": 7.305934906005859, + "learning_rate": 6.220190349888843e-07, + "loss": 0.1318, + "num_input_tokens_seen": 195054336, + "step": 61975 + }, + { + "epoch": 3.9677357403495295, + "grad_norm": 10.453595161437988, + "learning_rate": 6.216503247792946e-07, + "loss": 0.1347, + "num_input_tokens_seen": 195069696, + "step": 61980 + }, + { + "epoch": 3.9680558222905065, + "grad_norm": 3.5916476249694824, + "learning_rate": 6.21281708364703e-07, + "loss": 0.1617, + "num_input_tokens_seen": 195084992, + "step": 61985 + }, + { + "epoch": 3.9683759042314835, + "grad_norm": 8.28137493133545, + "learning_rate": 6.209131857635181e-07, + "loss": 0.1866, + "num_input_tokens_seen": 195102208, + "step": 61990 + }, + { + "epoch": 3.96869598617246, + "grad_norm": 4.898381233215332, + "learning_rate": 6.205447569941398e-07, + "loss": 0.1466, + "num_input_tokens_seen": 195117056, + "step": 61995 + }, + { + "epoch": 3.969016068113437, + "grad_norm": 7.0111846923828125, + "learning_rate": 6.201764220749678e-07, + "loss": 0.1537, + "num_input_tokens_seen": 195132736, + "step": 62000 + }, + { + "epoch": 3.969336150054414, + "grad_norm": 8.864232063293457, + "learning_rate": 6.198081810243922e-07, + "loss": 0.1636, + "num_input_tokens_seen": 195149760, + "step": 62005 + }, + { + "epoch": 3.969656231995391, + "grad_norm": 8.487491607666016, + "learning_rate": 6.194400338608034e-07, + "loss": 0.1442, + "num_input_tokens_seen": 195165056, + "step": 62010 + }, + { + "epoch": 3.9699763139363675, + "grad_norm": 8.349640846252441, + "learning_rate": 6.190719806025835e-07, + "loss": 0.1662, + "num_input_tokens_seen": 195180224, + "step": 62015 + }, + { + "epoch": 3.9702963958773445, + "grad_norm": 6.714748859405518, + "learning_rate": 6.187040212681117e-07, + "loss": 0.1082, + "num_input_tokens_seen": 195199296, + "step": 62020 + }, + { + "epoch": 3.9706164778183215, + "grad_norm": 5.37772798538208, + "learning_rate": 6.183361558757619e-07, + "loss": 0.22, + "num_input_tokens_seen": 195215232, + "step": 62025 + }, + { + "epoch": 3.9709365597592985, + "grad_norm": 25.2811336517334, + "learning_rate": 6.179683844439025e-07, + "loss": 0.1818, + "num_input_tokens_seen": 195229632, + "step": 62030 + }, + { + "epoch": 3.9712566417002755, + "grad_norm": 6.722928524017334, + "learning_rate": 6.176007069909002e-07, + "loss": 0.1776, + "num_input_tokens_seen": 195245184, + "step": 62035 + }, + { + "epoch": 3.971576723641252, + "grad_norm": 5.6627936363220215, + "learning_rate": 6.17233123535112e-07, + "loss": 0.1291, + "num_input_tokens_seen": 195260416, + "step": 62040 + }, + { + "epoch": 3.971896805582229, + "grad_norm": 6.504760265350342, + "learning_rate": 6.168656340948953e-07, + "loss": 0.1825, + "num_input_tokens_seen": 195276544, + "step": 62045 + }, + { + "epoch": 3.972216887523206, + "grad_norm": 8.94705581665039, + "learning_rate": 6.164982386886001e-07, + "loss": 0.1618, + "num_input_tokens_seen": 195294528, + "step": 62050 + }, + { + "epoch": 3.9725369694641826, + "grad_norm": 6.267898082733154, + "learning_rate": 6.16130937334572e-07, + "loss": 0.1169, + "num_input_tokens_seen": 195310080, + "step": 62055 + }, + { + "epoch": 3.9728570514051595, + "grad_norm": 12.250431060791016, + "learning_rate": 6.15763730051152e-07, + "loss": 0.1439, + "num_input_tokens_seen": 195326208, + "step": 62060 + }, + { + "epoch": 3.9731771333461365, + "grad_norm": 9.921348571777344, + "learning_rate": 6.153966168566769e-07, + "loss": 0.1621, + "num_input_tokens_seen": 195341824, + "step": 62065 + }, + { + "epoch": 3.9734972152871135, + "grad_norm": 7.0948286056518555, + "learning_rate": 6.150295977694776e-07, + "loss": 0.1332, + "num_input_tokens_seen": 195357440, + "step": 62070 + }, + { + "epoch": 3.9738172972280905, + "grad_norm": 17.13677978515625, + "learning_rate": 6.14662672807883e-07, + "loss": 0.1515, + "num_input_tokens_seen": 195373312, + "step": 62075 + }, + { + "epoch": 3.9741373791690675, + "grad_norm": 5.930923938751221, + "learning_rate": 6.142958419902129e-07, + "loss": 0.1444, + "num_input_tokens_seen": 195388544, + "step": 62080 + }, + { + "epoch": 3.974457461110044, + "grad_norm": 81.13789367675781, + "learning_rate": 6.139291053347868e-07, + "loss": 0.2397, + "num_input_tokens_seen": 195403904, + "step": 62085 + }, + { + "epoch": 3.974777543051021, + "grad_norm": 6.545350551605225, + "learning_rate": 6.135624628599169e-07, + "loss": 0.148, + "num_input_tokens_seen": 195420288, + "step": 62090 + }, + { + "epoch": 3.975097624991998, + "grad_norm": 8.250507354736328, + "learning_rate": 6.131959145839114e-07, + "loss": 0.1955, + "num_input_tokens_seen": 195436608, + "step": 62095 + }, + { + "epoch": 3.9754177069329746, + "grad_norm": 8.353729248046875, + "learning_rate": 6.128294605250743e-07, + "loss": 0.1525, + "num_input_tokens_seen": 195451264, + "step": 62100 + }, + { + "epoch": 3.9757377888739516, + "grad_norm": 5.632579326629639, + "learning_rate": 6.124631007017029e-07, + "loss": 0.1367, + "num_input_tokens_seen": 195465728, + "step": 62105 + }, + { + "epoch": 3.9760578708149286, + "grad_norm": 8.744921684265137, + "learning_rate": 6.120968351320941e-07, + "loss": 0.153, + "num_input_tokens_seen": 195482112, + "step": 62110 + }, + { + "epoch": 3.9763779527559056, + "grad_norm": 8.367347717285156, + "learning_rate": 6.117306638345341e-07, + "loss": 0.1983, + "num_input_tokens_seen": 195498368, + "step": 62115 + }, + { + "epoch": 3.9766980346968825, + "grad_norm": 9.486268043518066, + "learning_rate": 6.113645868273099e-07, + "loss": 0.1494, + "num_input_tokens_seen": 195513280, + "step": 62120 + }, + { + "epoch": 3.9770181166378595, + "grad_norm": 7.909243106842041, + "learning_rate": 6.109986041287008e-07, + "loss": 0.194, + "num_input_tokens_seen": 195528512, + "step": 62125 + }, + { + "epoch": 3.977338198578836, + "grad_norm": 8.072542190551758, + "learning_rate": 6.106327157569819e-07, + "loss": 0.2468, + "num_input_tokens_seen": 195543104, + "step": 62130 + }, + { + "epoch": 3.977658280519813, + "grad_norm": 19.46561622619629, + "learning_rate": 6.102669217304236e-07, + "loss": 0.1486, + "num_input_tokens_seen": 195558272, + "step": 62135 + }, + { + "epoch": 3.97797836246079, + "grad_norm": 8.17498779296875, + "learning_rate": 6.099012220672914e-07, + "loss": 0.1698, + "num_input_tokens_seen": 195573632, + "step": 62140 + }, + { + "epoch": 3.9782984444017666, + "grad_norm": 5.849461078643799, + "learning_rate": 6.095356167858484e-07, + "loss": 0.1665, + "num_input_tokens_seen": 195590592, + "step": 62145 + }, + { + "epoch": 3.9786185263427436, + "grad_norm": 7.13538932800293, + "learning_rate": 6.091701059043484e-07, + "loss": 0.2224, + "num_input_tokens_seen": 195606592, + "step": 62150 + }, + { + "epoch": 3.9789386082837206, + "grad_norm": 6.367821216583252, + "learning_rate": 6.088046894410448e-07, + "loss": 0.151, + "num_input_tokens_seen": 195622656, + "step": 62155 + }, + { + "epoch": 3.9792586902246976, + "grad_norm": 3.6173439025878906, + "learning_rate": 6.084393674141842e-07, + "loss": 0.1325, + "num_input_tokens_seen": 195638080, + "step": 62160 + }, + { + "epoch": 3.9795787721656746, + "grad_norm": 4.473767280578613, + "learning_rate": 6.080741398420089e-07, + "loss": 0.1371, + "num_input_tokens_seen": 195654016, + "step": 62165 + }, + { + "epoch": 3.9798988541066516, + "grad_norm": 12.999070167541504, + "learning_rate": 6.077090067427563e-07, + "loss": 0.1487, + "num_input_tokens_seen": 195669504, + "step": 62170 + }, + { + "epoch": 3.980218936047628, + "grad_norm": 5.066305637359619, + "learning_rate": 6.073439681346593e-07, + "loss": 0.1872, + "num_input_tokens_seen": 195684288, + "step": 62175 + }, + { + "epoch": 3.980539017988605, + "grad_norm": 11.778057098388672, + "learning_rate": 6.069790240359461e-07, + "loss": 0.2259, + "num_input_tokens_seen": 195700224, + "step": 62180 + }, + { + "epoch": 3.980859099929582, + "grad_norm": 6.938231945037842, + "learning_rate": 6.066141744648401e-07, + "loss": 0.2132, + "num_input_tokens_seen": 195715520, + "step": 62185 + }, + { + "epoch": 3.9811791818705586, + "grad_norm": 5.6122331619262695, + "learning_rate": 6.062494194395591e-07, + "loss": 0.1509, + "num_input_tokens_seen": 195731264, + "step": 62190 + }, + { + "epoch": 3.9814992638115356, + "grad_norm": 8.324445724487305, + "learning_rate": 6.058847589783193e-07, + "loss": 0.2026, + "num_input_tokens_seen": 195746560, + "step": 62195 + }, + { + "epoch": 3.9818193457525126, + "grad_norm": 10.610299110412598, + "learning_rate": 6.055201930993274e-07, + "loss": 0.1928, + "num_input_tokens_seen": 195762304, + "step": 62200 + }, + { + "epoch": 3.9821394276934896, + "grad_norm": 4.614974021911621, + "learning_rate": 6.051557218207896e-07, + "loss": 0.1464, + "num_input_tokens_seen": 195777024, + "step": 62205 + }, + { + "epoch": 3.9824595096344666, + "grad_norm": 5.739782810211182, + "learning_rate": 6.04791345160905e-07, + "loss": 0.1745, + "num_input_tokens_seen": 195792768, + "step": 62210 + }, + { + "epoch": 3.9827795915754436, + "grad_norm": 21.525619506835938, + "learning_rate": 6.044270631378685e-07, + "loss": 0.1862, + "num_input_tokens_seen": 195807360, + "step": 62215 + }, + { + "epoch": 3.98309967351642, + "grad_norm": 50.44179153442383, + "learning_rate": 6.040628757698719e-07, + "loss": 0.2348, + "num_input_tokens_seen": 195823040, + "step": 62220 + }, + { + "epoch": 3.983419755457397, + "grad_norm": 10.870773315429688, + "learning_rate": 6.036987830750984e-07, + "loss": 0.2013, + "num_input_tokens_seen": 195839872, + "step": 62225 + }, + { + "epoch": 3.983739837398374, + "grad_norm": 3.5318315029144287, + "learning_rate": 6.033347850717316e-07, + "loss": 0.1472, + "num_input_tokens_seen": 195857152, + "step": 62230 + }, + { + "epoch": 3.9840599193393507, + "grad_norm": 6.8044233322143555, + "learning_rate": 6.029708817779447e-07, + "loss": 0.206, + "num_input_tokens_seen": 195872576, + "step": 62235 + }, + { + "epoch": 3.9843800012803277, + "grad_norm": 7.743764877319336, + "learning_rate": 6.026070732119115e-07, + "loss": 0.1288, + "num_input_tokens_seen": 195887744, + "step": 62240 + }, + { + "epoch": 3.9847000832213046, + "grad_norm": 7.547524929046631, + "learning_rate": 6.022433593917981e-07, + "loss": 0.1633, + "num_input_tokens_seen": 195903168, + "step": 62245 + }, + { + "epoch": 3.9850201651622816, + "grad_norm": 4.893002033233643, + "learning_rate": 6.018797403357662e-07, + "loss": 0.0829, + "num_input_tokens_seen": 195920192, + "step": 62250 + }, + { + "epoch": 3.9853402471032586, + "grad_norm": 7.151582717895508, + "learning_rate": 6.015162160619731e-07, + "loss": 0.1791, + "num_input_tokens_seen": 195935680, + "step": 62255 + }, + { + "epoch": 3.985660329044235, + "grad_norm": 6.887661457061768, + "learning_rate": 6.011527865885705e-07, + "loss": 0.1432, + "num_input_tokens_seen": 195950720, + "step": 62260 + }, + { + "epoch": 3.985980410985212, + "grad_norm": 7.731164932250977, + "learning_rate": 6.007894519337088e-07, + "loss": 0.186, + "num_input_tokens_seen": 195966976, + "step": 62265 + }, + { + "epoch": 3.986300492926189, + "grad_norm": 104.61258697509766, + "learning_rate": 6.004262121155275e-07, + "loss": 0.2086, + "num_input_tokens_seen": 195982080, + "step": 62270 + }, + { + "epoch": 3.986620574867166, + "grad_norm": 48.73239517211914, + "learning_rate": 6.000630671521673e-07, + "loss": 0.2099, + "num_input_tokens_seen": 195997952, + "step": 62275 + }, + { + "epoch": 3.9869406568081427, + "grad_norm": 5.536442279815674, + "learning_rate": 5.997000170617612e-07, + "loss": 0.1217, + "num_input_tokens_seen": 196014208, + "step": 62280 + }, + { + "epoch": 3.9872607387491197, + "grad_norm": 6.85652494430542, + "learning_rate": 5.993370618624378e-07, + "loss": 0.1811, + "num_input_tokens_seen": 196031296, + "step": 62285 + }, + { + "epoch": 3.9875808206900967, + "grad_norm": 6.5990681648254395, + "learning_rate": 5.989742015723215e-07, + "loss": 0.1576, + "num_input_tokens_seen": 196046592, + "step": 62290 + }, + { + "epoch": 3.9879009026310737, + "grad_norm": 8.320147514343262, + "learning_rate": 5.986114362095316e-07, + "loss": 0.1303, + "num_input_tokens_seen": 196062016, + "step": 62295 + }, + { + "epoch": 3.9882209845720507, + "grad_norm": 51.234474182128906, + "learning_rate": 5.982487657921818e-07, + "loss": 0.1843, + "num_input_tokens_seen": 196077184, + "step": 62300 + }, + { + "epoch": 3.988541066513027, + "grad_norm": 7.340851783752441, + "learning_rate": 5.978861903383843e-07, + "loss": 0.1572, + "num_input_tokens_seen": 196092864, + "step": 62305 + }, + { + "epoch": 3.988861148454004, + "grad_norm": 8.096890449523926, + "learning_rate": 5.975237098662409e-07, + "loss": 0.1924, + "num_input_tokens_seen": 196107776, + "step": 62310 + }, + { + "epoch": 3.989181230394981, + "grad_norm": 7.70768928527832, + "learning_rate": 5.971613243938548e-07, + "loss": 0.1844, + "num_input_tokens_seen": 196123712, + "step": 62315 + }, + { + "epoch": 3.9895013123359577, + "grad_norm": 9.050369262695312, + "learning_rate": 5.967990339393206e-07, + "loss": 0.17, + "num_input_tokens_seen": 196138560, + "step": 62320 + }, + { + "epoch": 3.9898213942769347, + "grad_norm": 8.857805252075195, + "learning_rate": 5.964368385207292e-07, + "loss": 0.2002, + "num_input_tokens_seen": 196153664, + "step": 62325 + }, + { + "epoch": 3.9901414762179117, + "grad_norm": 7.641764163970947, + "learning_rate": 5.960747381561669e-07, + "loss": 0.126, + "num_input_tokens_seen": 196169536, + "step": 62330 + }, + { + "epoch": 3.9904615581588887, + "grad_norm": 8.264159202575684, + "learning_rate": 5.957127328637141e-07, + "loss": 0.1623, + "num_input_tokens_seen": 196184960, + "step": 62335 + }, + { + "epoch": 3.9907816400998657, + "grad_norm": 7.124316215515137, + "learning_rate": 5.953508226614499e-07, + "loss": 0.2614, + "num_input_tokens_seen": 196201152, + "step": 62340 + }, + { + "epoch": 3.9911017220408427, + "grad_norm": 7.256993770599365, + "learning_rate": 5.949890075674433e-07, + "loss": 0.1714, + "num_input_tokens_seen": 196216960, + "step": 62345 + }, + { + "epoch": 3.9914218039818192, + "grad_norm": 5.044681072235107, + "learning_rate": 5.946272875997636e-07, + "loss": 0.153, + "num_input_tokens_seen": 196232384, + "step": 62350 + }, + { + "epoch": 3.991741885922796, + "grad_norm": 11.499190330505371, + "learning_rate": 5.942656627764723e-07, + "loss": 0.1225, + "num_input_tokens_seen": 196247040, + "step": 62355 + }, + { + "epoch": 3.992061967863773, + "grad_norm": 40.355560302734375, + "learning_rate": 5.93904133115627e-07, + "loss": 0.2269, + "num_input_tokens_seen": 196263744, + "step": 62360 + }, + { + "epoch": 3.9923820498047498, + "grad_norm": 6.880862236022949, + "learning_rate": 5.935426986352811e-07, + "loss": 0.1624, + "num_input_tokens_seen": 196279168, + "step": 62365 + }, + { + "epoch": 3.9927021317457267, + "grad_norm": 12.482935905456543, + "learning_rate": 5.931813593534821e-07, + "loss": 0.1554, + "num_input_tokens_seen": 196295168, + "step": 62370 + }, + { + "epoch": 3.9930222136867037, + "grad_norm": 8.971439361572266, + "learning_rate": 5.92820115288274e-07, + "loss": 0.1292, + "num_input_tokens_seen": 196311616, + "step": 62375 + }, + { + "epoch": 3.9933422956276807, + "grad_norm": 4.1596999168396, + "learning_rate": 5.924589664576946e-07, + "loss": 0.1403, + "num_input_tokens_seen": 196326144, + "step": 62380 + }, + { + "epoch": 3.9936623775686577, + "grad_norm": 7.650587558746338, + "learning_rate": 5.920979128797791e-07, + "loss": 0.1459, + "num_input_tokens_seen": 196342464, + "step": 62385 + }, + { + "epoch": 3.9939824595096347, + "grad_norm": 8.333703994750977, + "learning_rate": 5.917369545725557e-07, + "loss": 0.1534, + "num_input_tokens_seen": 196358400, + "step": 62390 + }, + { + "epoch": 3.9943025414506113, + "grad_norm": 8.710838317871094, + "learning_rate": 5.913760915540493e-07, + "loss": 0.1893, + "num_input_tokens_seen": 196373760, + "step": 62395 + }, + { + "epoch": 3.9946226233915882, + "grad_norm": 8.364852905273438, + "learning_rate": 5.910153238422789e-07, + "loss": 0.133, + "num_input_tokens_seen": 196389632, + "step": 62400 + }, + { + "epoch": 3.9949427053325652, + "grad_norm": 8.059948921203613, + "learning_rate": 5.906546514552599e-07, + "loss": 0.1228, + "num_input_tokens_seen": 196405312, + "step": 62405 + }, + { + "epoch": 3.995262787273542, + "grad_norm": 16.045654296875, + "learning_rate": 5.90294074411002e-07, + "loss": 0.1461, + "num_input_tokens_seen": 196420864, + "step": 62410 + }, + { + "epoch": 3.9955828692145188, + "grad_norm": 5.347639083862305, + "learning_rate": 5.89933592727511e-07, + "loss": 0.2077, + "num_input_tokens_seen": 196436224, + "step": 62415 + }, + { + "epoch": 3.9959029511554958, + "grad_norm": 8.43484115600586, + "learning_rate": 5.895732064227863e-07, + "loss": 0.1708, + "num_input_tokens_seen": 196452416, + "step": 62420 + }, + { + "epoch": 3.9962230330964728, + "grad_norm": 8.232566833496094, + "learning_rate": 5.89212915514826e-07, + "loss": 0.1581, + "num_input_tokens_seen": 196468416, + "step": 62425 + }, + { + "epoch": 3.9965431150374497, + "grad_norm": 4.917001247406006, + "learning_rate": 5.888527200216185e-07, + "loss": 0.1456, + "num_input_tokens_seen": 196483328, + "step": 62430 + }, + { + "epoch": 3.9968631969784267, + "grad_norm": 9.226170539855957, + "learning_rate": 5.884926199611521e-07, + "loss": 0.2012, + "num_input_tokens_seen": 196498752, + "step": 62435 + }, + { + "epoch": 3.9971832789194033, + "grad_norm": 6.947924613952637, + "learning_rate": 5.881326153514075e-07, + "loss": 0.2059, + "num_input_tokens_seen": 196514624, + "step": 62440 + }, + { + "epoch": 3.9975033608603803, + "grad_norm": 7.495025157928467, + "learning_rate": 5.877727062103608e-07, + "loss": 0.2552, + "num_input_tokens_seen": 196529088, + "step": 62445 + }, + { + "epoch": 3.9978234428013573, + "grad_norm": 7.639040946960449, + "learning_rate": 5.874128925559863e-07, + "loss": 0.1713, + "num_input_tokens_seen": 196544128, + "step": 62450 + }, + { + "epoch": 3.998143524742334, + "grad_norm": 10.220236778259277, + "learning_rate": 5.870531744062479e-07, + "loss": 0.2296, + "num_input_tokens_seen": 196558848, + "step": 62455 + }, + { + "epoch": 3.998463606683311, + "grad_norm": 5.132604122161865, + "learning_rate": 5.866935517791114e-07, + "loss": 0.1552, + "num_input_tokens_seen": 196574272, + "step": 62460 + }, + { + "epoch": 3.998783688624288, + "grad_norm": 6.877859592437744, + "learning_rate": 5.863340246925317e-07, + "loss": 0.2365, + "num_input_tokens_seen": 196589696, + "step": 62465 + }, + { + "epoch": 3.999103770565265, + "grad_norm": 7.713548183441162, + "learning_rate": 5.859745931644631e-07, + "loss": 0.124, + "num_input_tokens_seen": 196606656, + "step": 62470 + }, + { + "epoch": 3.9994238525062418, + "grad_norm": 7.593625545501709, + "learning_rate": 5.85615257212854e-07, + "loss": 0.1343, + "num_input_tokens_seen": 196622336, + "step": 62475 + }, + { + "epoch": 3.9997439344472188, + "grad_norm": 8.496943473815918, + "learning_rate": 5.85256016855647e-07, + "loss": 0.1703, + "num_input_tokens_seen": 196637888, + "step": 62480 + }, + { + "epoch": 4.000064016388196, + "grad_norm": 5.756143569946289, + "learning_rate": 5.848968721107812e-07, + "loss": 0.1188, + "num_input_tokens_seen": 196653696, + "step": 62485 + }, + { + "epoch": 4.000384098329172, + "grad_norm": 6.099350929260254, + "learning_rate": 5.845378229961893e-07, + "loss": 0.1062, + "num_input_tokens_seen": 196669312, + "step": 62490 + }, + { + "epoch": 4.000704180270149, + "grad_norm": 5.355601787567139, + "learning_rate": 5.841788695298017e-07, + "loss": 0.1773, + "num_input_tokens_seen": 196685312, + "step": 62495 + }, + { + "epoch": 4.000768196658345, + "eval_loss": 0.5554755926132202, + "eval_runtime": 51.0295, + "eval_samples_per_second": 272.117, + "eval_steps_per_second": 34.02, + "num_input_tokens_seen": 196687936, + "step": 62496 + }, + { + "epoch": 4.001024262211126, + "grad_norm": 6.419754981994629, + "learning_rate": 5.838200117295426e-07, + "loss": 0.1117, + "num_input_tokens_seen": 196700928, + "step": 62500 + }, + { + "epoch": 4.001344344152103, + "grad_norm": 6.43391752243042, + "learning_rate": 5.83461249613331e-07, + "loss": 0.1308, + "num_input_tokens_seen": 196717120, + "step": 62505 + }, + { + "epoch": 4.00166442609308, + "grad_norm": 3.533277988433838, + "learning_rate": 5.831025831990817e-07, + "loss": 0.1165, + "num_input_tokens_seen": 196732608, + "step": 62510 + }, + { + "epoch": 4.001984508034057, + "grad_norm": 6.483725070953369, + "learning_rate": 5.827440125047046e-07, + "loss": 0.0882, + "num_input_tokens_seen": 196748416, + "step": 62515 + }, + { + "epoch": 4.002304589975034, + "grad_norm": 6.90410041809082, + "learning_rate": 5.823855375481049e-07, + "loss": 0.1229, + "num_input_tokens_seen": 196766272, + "step": 62520 + }, + { + "epoch": 4.002624671916011, + "grad_norm": 3.740211009979248, + "learning_rate": 5.820271583471832e-07, + "loss": 0.0923, + "num_input_tokens_seen": 196781632, + "step": 62525 + }, + { + "epoch": 4.002944753856988, + "grad_norm": 6.257179260253906, + "learning_rate": 5.81668874919834e-07, + "loss": 0.1239, + "num_input_tokens_seen": 196797312, + "step": 62530 + }, + { + "epoch": 4.003264835797964, + "grad_norm": 7.552742004394531, + "learning_rate": 5.813106872839503e-07, + "loss": 0.1743, + "num_input_tokens_seen": 196813184, + "step": 62535 + }, + { + "epoch": 4.003584917738941, + "grad_norm": 8.533490180969238, + "learning_rate": 5.809525954574153e-07, + "loss": 0.1132, + "num_input_tokens_seen": 196828992, + "step": 62540 + }, + { + "epoch": 4.003904999679918, + "grad_norm": 5.961421489715576, + "learning_rate": 5.805945994581133e-07, + "loss": 0.092, + "num_input_tokens_seen": 196844160, + "step": 62545 + }, + { + "epoch": 4.004225081620895, + "grad_norm": 7.508273124694824, + "learning_rate": 5.802366993039176e-07, + "loss": 0.1455, + "num_input_tokens_seen": 196859456, + "step": 62550 + }, + { + "epoch": 4.004545163561872, + "grad_norm": 5.613559246063232, + "learning_rate": 5.798788950127024e-07, + "loss": 0.1404, + "num_input_tokens_seen": 196875072, + "step": 62555 + }, + { + "epoch": 4.004865245502849, + "grad_norm": 5.474255084991455, + "learning_rate": 5.795211866023334e-07, + "loss": 0.1236, + "num_input_tokens_seen": 196889088, + "step": 62560 + }, + { + "epoch": 4.005185327443826, + "grad_norm": 3.8023197650909424, + "learning_rate": 5.791635740906725e-07, + "loss": 0.0884, + "num_input_tokens_seen": 196903744, + "step": 62565 + }, + { + "epoch": 4.005505409384803, + "grad_norm": 8.19470500946045, + "learning_rate": 5.788060574955787e-07, + "loss": 0.1446, + "num_input_tokens_seen": 196919296, + "step": 62570 + }, + { + "epoch": 4.00582549132578, + "grad_norm": 4.941251277923584, + "learning_rate": 5.784486368349018e-07, + "loss": 0.1382, + "num_input_tokens_seen": 196934528, + "step": 62575 + }, + { + "epoch": 4.006145573266756, + "grad_norm": 6.676270008087158, + "learning_rate": 5.780913121264916e-07, + "loss": 0.1447, + "num_input_tokens_seen": 196951104, + "step": 62580 + }, + { + "epoch": 4.006465655207733, + "grad_norm": 7.296558380126953, + "learning_rate": 5.777340833881906e-07, + "loss": 0.1598, + "num_input_tokens_seen": 196966336, + "step": 62585 + }, + { + "epoch": 4.00678573714871, + "grad_norm": 6.902030944824219, + "learning_rate": 5.773769506378369e-07, + "loss": 0.1354, + "num_input_tokens_seen": 196995136, + "step": 62590 + }, + { + "epoch": 4.007105819089687, + "grad_norm": 12.5274076461792, + "learning_rate": 5.770199138932633e-07, + "loss": 0.1417, + "num_input_tokens_seen": 197010496, + "step": 62595 + }, + { + "epoch": 4.007425901030664, + "grad_norm": 6.3561177253723145, + "learning_rate": 5.76662973172299e-07, + "loss": 0.1278, + "num_input_tokens_seen": 197026560, + "step": 62600 + }, + { + "epoch": 4.007745982971641, + "grad_norm": 3.419008493423462, + "learning_rate": 5.763061284927677e-07, + "loss": 0.0678, + "num_input_tokens_seen": 197043712, + "step": 62605 + }, + { + "epoch": 4.008066064912618, + "grad_norm": 5.9596943855285645, + "learning_rate": 5.759493798724872e-07, + "loss": 0.1013, + "num_input_tokens_seen": 197060608, + "step": 62610 + }, + { + "epoch": 4.008386146853595, + "grad_norm": 5.411553859710693, + "learning_rate": 5.755927273292733e-07, + "loss": 0.1109, + "num_input_tokens_seen": 197076288, + "step": 62615 + }, + { + "epoch": 4.008706228794572, + "grad_norm": 6.783432960510254, + "learning_rate": 5.752361708809348e-07, + "loss": 0.1732, + "num_input_tokens_seen": 197091712, + "step": 62620 + }, + { + "epoch": 4.009026310735548, + "grad_norm": 6.119074821472168, + "learning_rate": 5.748797105452764e-07, + "loss": 0.1169, + "num_input_tokens_seen": 197106496, + "step": 62625 + }, + { + "epoch": 4.009346392676525, + "grad_norm": 6.275419235229492, + "learning_rate": 5.745233463400977e-07, + "loss": 0.1016, + "num_input_tokens_seen": 197122176, + "step": 62630 + }, + { + "epoch": 4.009666474617502, + "grad_norm": 3.660553216934204, + "learning_rate": 5.741670782831931e-07, + "loss": 0.1032, + "num_input_tokens_seen": 197138240, + "step": 62635 + }, + { + "epoch": 4.009986556558479, + "grad_norm": 7.217890739440918, + "learning_rate": 5.738109063923539e-07, + "loss": 0.1633, + "num_input_tokens_seen": 197153408, + "step": 62640 + }, + { + "epoch": 4.010306638499456, + "grad_norm": 7.268453121185303, + "learning_rate": 5.734548306853646e-07, + "loss": 0.1155, + "num_input_tokens_seen": 197168640, + "step": 62645 + }, + { + "epoch": 4.010626720440433, + "grad_norm": 5.136076927185059, + "learning_rate": 5.730988511800053e-07, + "loss": 0.1154, + "num_input_tokens_seen": 197184448, + "step": 62650 + }, + { + "epoch": 4.01094680238141, + "grad_norm": 6.964511394500732, + "learning_rate": 5.727429678940541e-07, + "loss": 0.1091, + "num_input_tokens_seen": 197201088, + "step": 62655 + }, + { + "epoch": 4.011266884322387, + "grad_norm": 5.514355659484863, + "learning_rate": 5.723871808452788e-07, + "loss": 0.0878, + "num_input_tokens_seen": 197216320, + "step": 62660 + }, + { + "epoch": 4.011586966263364, + "grad_norm": 5.53068208694458, + "learning_rate": 5.720314900514481e-07, + "loss": 0.1662, + "num_input_tokens_seen": 197231744, + "step": 62665 + }, + { + "epoch": 4.01190704820434, + "grad_norm": 9.88190746307373, + "learning_rate": 5.71675895530322e-07, + "loss": 0.136, + "num_input_tokens_seen": 197248128, + "step": 62670 + }, + { + "epoch": 4.012227130145317, + "grad_norm": 6.835646629333496, + "learning_rate": 5.71320397299657e-07, + "loss": 0.0906, + "num_input_tokens_seen": 197265408, + "step": 62675 + }, + { + "epoch": 4.012547212086294, + "grad_norm": 8.204851150512695, + "learning_rate": 5.709649953772064e-07, + "loss": 0.1032, + "num_input_tokens_seen": 197280384, + "step": 62680 + }, + { + "epoch": 4.012867294027271, + "grad_norm": 5.0673017501831055, + "learning_rate": 5.706096897807148e-07, + "loss": 0.1049, + "num_input_tokens_seen": 197295808, + "step": 62685 + }, + { + "epoch": 4.013187375968248, + "grad_norm": 5.1981201171875, + "learning_rate": 5.702544805279267e-07, + "loss": 0.1803, + "num_input_tokens_seen": 197310272, + "step": 62690 + }, + { + "epoch": 4.013507457909225, + "grad_norm": 7.483926296234131, + "learning_rate": 5.698993676365769e-07, + "loss": 0.1273, + "num_input_tokens_seen": 197325440, + "step": 62695 + }, + { + "epoch": 4.013827539850202, + "grad_norm": 5.820372581481934, + "learning_rate": 5.695443511243998e-07, + "loss": 0.0859, + "num_input_tokens_seen": 197340608, + "step": 62700 + }, + { + "epoch": 4.014147621791179, + "grad_norm": 8.203961372375488, + "learning_rate": 5.691894310091223e-07, + "loss": 0.1413, + "num_input_tokens_seen": 197354816, + "step": 62705 + }, + { + "epoch": 4.014467703732155, + "grad_norm": 7.194404125213623, + "learning_rate": 5.688346073084674e-07, + "loss": 0.1179, + "num_input_tokens_seen": 197370112, + "step": 62710 + }, + { + "epoch": 4.014787785673132, + "grad_norm": 4.581719398498535, + "learning_rate": 5.684798800401531e-07, + "loss": 0.1382, + "num_input_tokens_seen": 197385728, + "step": 62715 + }, + { + "epoch": 4.015107867614109, + "grad_norm": 4.358405113220215, + "learning_rate": 5.681252492218928e-07, + "loss": 0.1136, + "num_input_tokens_seen": 197401088, + "step": 62720 + }, + { + "epoch": 4.015427949555086, + "grad_norm": 8.44141674041748, + "learning_rate": 5.67770714871394e-07, + "loss": 0.1547, + "num_input_tokens_seen": 197417216, + "step": 62725 + }, + { + "epoch": 4.015748031496063, + "grad_norm": 5.5084547996521, + "learning_rate": 5.674162770063621e-07, + "loss": 0.103, + "num_input_tokens_seen": 197432768, + "step": 62730 + }, + { + "epoch": 4.01606811343704, + "grad_norm": 5.1241655349731445, + "learning_rate": 5.670619356444945e-07, + "loss": 0.1073, + "num_input_tokens_seen": 197447360, + "step": 62735 + }, + { + "epoch": 4.016388195378017, + "grad_norm": 4.578981399536133, + "learning_rate": 5.667076908034858e-07, + "loss": 0.0736, + "num_input_tokens_seen": 197463232, + "step": 62740 + }, + { + "epoch": 4.016708277318994, + "grad_norm": 8.136292457580566, + "learning_rate": 5.663535425010245e-07, + "loss": 0.1192, + "num_input_tokens_seen": 197480000, + "step": 62745 + }, + { + "epoch": 4.017028359259971, + "grad_norm": 4.228031158447266, + "learning_rate": 5.659994907547955e-07, + "loss": 0.1443, + "num_input_tokens_seen": 197495424, + "step": 62750 + }, + { + "epoch": 4.017348441200947, + "grad_norm": 5.78891658782959, + "learning_rate": 5.65645535582478e-07, + "loss": 0.1448, + "num_input_tokens_seen": 197511168, + "step": 62755 + }, + { + "epoch": 4.017668523141924, + "grad_norm": 8.938915252685547, + "learning_rate": 5.652916770017464e-07, + "loss": 0.1295, + "num_input_tokens_seen": 197527552, + "step": 62760 + }, + { + "epoch": 4.017988605082901, + "grad_norm": 5.7786736488342285, + "learning_rate": 5.64937915030272e-07, + "loss": 0.1673, + "num_input_tokens_seen": 197543488, + "step": 62765 + }, + { + "epoch": 4.018308687023878, + "grad_norm": 3.9713101387023926, + "learning_rate": 5.645842496857173e-07, + "loss": 0.1176, + "num_input_tokens_seen": 197558656, + "step": 62770 + }, + { + "epoch": 4.018628768964855, + "grad_norm": 4.676670551300049, + "learning_rate": 5.642306809857457e-07, + "loss": 0.0851, + "num_input_tokens_seen": 197573312, + "step": 62775 + }, + { + "epoch": 4.018948850905832, + "grad_norm": 6.8274664878845215, + "learning_rate": 5.63877208948009e-07, + "loss": 0.1452, + "num_input_tokens_seen": 197588672, + "step": 62780 + }, + { + "epoch": 4.019268932846809, + "grad_norm": 5.103263854980469, + "learning_rate": 5.635238335901605e-07, + "loss": 0.127, + "num_input_tokens_seen": 197604608, + "step": 62785 + }, + { + "epoch": 4.019589014787786, + "grad_norm": 4.025350570678711, + "learning_rate": 5.631705549298452e-07, + "loss": 0.1122, + "num_input_tokens_seen": 197620480, + "step": 62790 + }, + { + "epoch": 4.019909096728763, + "grad_norm": 13.347708702087402, + "learning_rate": 5.628173729847028e-07, + "loss": 0.112, + "num_input_tokens_seen": 197635456, + "step": 62795 + }, + { + "epoch": 4.020229178669739, + "grad_norm": 6.100518226623535, + "learning_rate": 5.624642877723721e-07, + "loss": 0.117, + "num_input_tokens_seen": 197650368, + "step": 62800 + }, + { + "epoch": 4.020549260610716, + "grad_norm": 6.782215118408203, + "learning_rate": 5.62111299310481e-07, + "loss": 0.179, + "num_input_tokens_seen": 197665920, + "step": 62805 + }, + { + "epoch": 4.020869342551693, + "grad_norm": 9.479937553405762, + "learning_rate": 5.617584076166583e-07, + "loss": 0.1436, + "num_input_tokens_seen": 197680704, + "step": 62810 + }, + { + "epoch": 4.02118942449267, + "grad_norm": 8.658406257629395, + "learning_rate": 5.614056127085246e-07, + "loss": 0.1605, + "num_input_tokens_seen": 197695168, + "step": 62815 + }, + { + "epoch": 4.021509506433647, + "grad_norm": 4.614177703857422, + "learning_rate": 5.610529146036969e-07, + "loss": 0.0953, + "num_input_tokens_seen": 197710016, + "step": 62820 + }, + { + "epoch": 4.021829588374624, + "grad_norm": 7.777926445007324, + "learning_rate": 5.607003133197872e-07, + "loss": 0.1414, + "num_input_tokens_seen": 197725056, + "step": 62825 + }, + { + "epoch": 4.022149670315601, + "grad_norm": 9.830087661743164, + "learning_rate": 5.603478088744024e-07, + "loss": 0.1614, + "num_input_tokens_seen": 197740672, + "step": 62830 + }, + { + "epoch": 4.022469752256578, + "grad_norm": 6.634655952453613, + "learning_rate": 5.599954012851447e-07, + "loss": 0.1611, + "num_input_tokens_seen": 197762816, + "step": 62835 + }, + { + "epoch": 4.022789834197555, + "grad_norm": 6.273291110992432, + "learning_rate": 5.596430905696107e-07, + "loss": 0.1423, + "num_input_tokens_seen": 197777472, + "step": 62840 + }, + { + "epoch": 4.023109916138531, + "grad_norm": 6.632730960845947, + "learning_rate": 5.592908767453947e-07, + "loss": 0.1156, + "num_input_tokens_seen": 197793152, + "step": 62845 + }, + { + "epoch": 4.023429998079508, + "grad_norm": 5.942710876464844, + "learning_rate": 5.589387598300832e-07, + "loss": 0.1107, + "num_input_tokens_seen": 197808256, + "step": 62850 + }, + { + "epoch": 4.023750080020485, + "grad_norm": 9.980260848999023, + "learning_rate": 5.585867398412598e-07, + "loss": 0.1397, + "num_input_tokens_seen": 197823616, + "step": 62855 + }, + { + "epoch": 4.024070161961462, + "grad_norm": 9.39174747467041, + "learning_rate": 5.582348167965021e-07, + "loss": 0.1653, + "num_input_tokens_seen": 197839168, + "step": 62860 + }, + { + "epoch": 4.024390243902439, + "grad_norm": 5.867715358734131, + "learning_rate": 5.578829907133831e-07, + "loss": 0.1231, + "num_input_tokens_seen": 197856320, + "step": 62865 + }, + { + "epoch": 4.024710325843416, + "grad_norm": 7.9700727462768555, + "learning_rate": 5.575312616094716e-07, + "loss": 0.1525, + "num_input_tokens_seen": 197871104, + "step": 62870 + }, + { + "epoch": 4.025030407784393, + "grad_norm": 9.630566596984863, + "learning_rate": 5.57179629502331e-07, + "loss": 0.1495, + "num_input_tokens_seen": 197887104, + "step": 62875 + }, + { + "epoch": 4.02535048972537, + "grad_norm": 7.399318695068359, + "learning_rate": 5.568280944095192e-07, + "loss": 0.1425, + "num_input_tokens_seen": 197902144, + "step": 62880 + }, + { + "epoch": 4.025670571666347, + "grad_norm": 5.549108505249023, + "learning_rate": 5.564766563485921e-07, + "loss": 0.0992, + "num_input_tokens_seen": 197918208, + "step": 62885 + }, + { + "epoch": 4.025990653607323, + "grad_norm": 2.4977800846099854, + "learning_rate": 5.56125315337096e-07, + "loss": 0.1317, + "num_input_tokens_seen": 197933504, + "step": 62890 + }, + { + "epoch": 4.0263107355483, + "grad_norm": 4.574124813079834, + "learning_rate": 5.557740713925771e-07, + "loss": 0.1354, + "num_input_tokens_seen": 197949248, + "step": 62895 + }, + { + "epoch": 4.026630817489277, + "grad_norm": 8.27095890045166, + "learning_rate": 5.55422924532574e-07, + "loss": 0.1555, + "num_input_tokens_seen": 197964672, + "step": 62900 + }, + { + "epoch": 4.026950899430254, + "grad_norm": 3.3083651065826416, + "learning_rate": 5.550718747746203e-07, + "loss": 0.0964, + "num_input_tokens_seen": 197980544, + "step": 62905 + }, + { + "epoch": 4.027270981371231, + "grad_norm": 6.499920845031738, + "learning_rate": 5.547209221362479e-07, + "loss": 0.0851, + "num_input_tokens_seen": 197995520, + "step": 62910 + }, + { + "epoch": 4.027591063312208, + "grad_norm": 8.416144371032715, + "learning_rate": 5.543700666349785e-07, + "loss": 0.1104, + "num_input_tokens_seen": 198014272, + "step": 62915 + }, + { + "epoch": 4.027911145253185, + "grad_norm": 6.094906330108643, + "learning_rate": 5.54019308288335e-07, + "loss": 0.0735, + "num_input_tokens_seen": 198029248, + "step": 62920 + }, + { + "epoch": 4.028231227194162, + "grad_norm": 4.837270736694336, + "learning_rate": 5.536686471138298e-07, + "loss": 0.1291, + "num_input_tokens_seen": 198044224, + "step": 62925 + }, + { + "epoch": 4.028551309135139, + "grad_norm": 8.382013320922852, + "learning_rate": 5.533180831289748e-07, + "loss": 0.1579, + "num_input_tokens_seen": 198060736, + "step": 62930 + }, + { + "epoch": 4.028871391076115, + "grad_norm": 1.6022878885269165, + "learning_rate": 5.529676163512748e-07, + "loss": 0.1227, + "num_input_tokens_seen": 198076672, + "step": 62935 + }, + { + "epoch": 4.029191473017092, + "grad_norm": 6.789572238922119, + "learning_rate": 5.526172467982305e-07, + "loss": 0.1297, + "num_input_tokens_seen": 198093312, + "step": 62940 + }, + { + "epoch": 4.029511554958069, + "grad_norm": 7.77125883102417, + "learning_rate": 5.522669744873372e-07, + "loss": 0.1562, + "num_input_tokens_seen": 198108032, + "step": 62945 + }, + { + "epoch": 4.029831636899046, + "grad_norm": 5.302130222320557, + "learning_rate": 5.519167994360858e-07, + "loss": 0.1627, + "num_input_tokens_seen": 198123904, + "step": 62950 + }, + { + "epoch": 4.030151718840023, + "grad_norm": 5.046792984008789, + "learning_rate": 5.515667216619617e-07, + "loss": 0.117, + "num_input_tokens_seen": 198140160, + "step": 62955 + }, + { + "epoch": 4.030471800781, + "grad_norm": 7.272430419921875, + "learning_rate": 5.51216741182447e-07, + "loss": 0.0972, + "num_input_tokens_seen": 198156544, + "step": 62960 + }, + { + "epoch": 4.030791882721977, + "grad_norm": 7.567429542541504, + "learning_rate": 5.508668580150175e-07, + "loss": 0.0985, + "num_input_tokens_seen": 198173504, + "step": 62965 + }, + { + "epoch": 4.031111964662954, + "grad_norm": 8.557794570922852, + "learning_rate": 5.505170721771447e-07, + "loss": 0.1219, + "num_input_tokens_seen": 198188416, + "step": 62970 + }, + { + "epoch": 4.03143204660393, + "grad_norm": 7.342204570770264, + "learning_rate": 5.501673836862945e-07, + "loss": 0.1055, + "num_input_tokens_seen": 198202944, + "step": 62975 + }, + { + "epoch": 4.031752128544907, + "grad_norm": 6.1138763427734375, + "learning_rate": 5.498177925599291e-07, + "loss": 0.1133, + "num_input_tokens_seen": 198218240, + "step": 62980 + }, + { + "epoch": 4.032072210485884, + "grad_norm": 4.884414196014404, + "learning_rate": 5.494682988155048e-07, + "loss": 0.105, + "num_input_tokens_seen": 198233984, + "step": 62985 + }, + { + "epoch": 4.032392292426861, + "grad_norm": 3.8691346645355225, + "learning_rate": 5.491189024704729e-07, + "loss": 0.065, + "num_input_tokens_seen": 198248640, + "step": 62990 + }, + { + "epoch": 4.032712374367838, + "grad_norm": 3.829803228378296, + "learning_rate": 5.487696035422829e-07, + "loss": 0.1014, + "num_input_tokens_seen": 198264256, + "step": 62995 + }, + { + "epoch": 4.033032456308815, + "grad_norm": 4.922103404998779, + "learning_rate": 5.484204020483739e-07, + "loss": 0.0645, + "num_input_tokens_seen": 198279552, + "step": 63000 + }, + { + "epoch": 4.033352538249792, + "grad_norm": 5.856963157653809, + "learning_rate": 5.480712980061858e-07, + "loss": 0.1114, + "num_input_tokens_seen": 198296192, + "step": 63005 + }, + { + "epoch": 4.033672620190769, + "grad_norm": 7.089382648468018, + "learning_rate": 5.477222914331487e-07, + "loss": 0.1338, + "num_input_tokens_seen": 198310976, + "step": 63010 + }, + { + "epoch": 4.033992702131746, + "grad_norm": 9.481048583984375, + "learning_rate": 5.47373382346692e-07, + "loss": 0.0964, + "num_input_tokens_seen": 198325824, + "step": 63015 + }, + { + "epoch": 4.034312784072722, + "grad_norm": 2.3552329540252686, + "learning_rate": 5.470245707642375e-07, + "loss": 0.1392, + "num_input_tokens_seen": 198341248, + "step": 63020 + }, + { + "epoch": 4.034632866013699, + "grad_norm": 7.6332926750183105, + "learning_rate": 5.466758567032027e-07, + "loss": 0.1401, + "num_input_tokens_seen": 198356544, + "step": 63025 + }, + { + "epoch": 4.034952947954676, + "grad_norm": 6.329472541809082, + "learning_rate": 5.463272401810025e-07, + "loss": 0.1066, + "num_input_tokens_seen": 198372416, + "step": 63030 + }, + { + "epoch": 4.035273029895653, + "grad_norm": 7.960379600524902, + "learning_rate": 5.459787212150422e-07, + "loss": 0.1044, + "num_input_tokens_seen": 198388096, + "step": 63035 + }, + { + "epoch": 4.03559311183663, + "grad_norm": 4.35312557220459, + "learning_rate": 5.456302998227273e-07, + "loss": 0.1113, + "num_input_tokens_seen": 198403136, + "step": 63040 + }, + { + "epoch": 4.035913193777607, + "grad_norm": 4.331933498382568, + "learning_rate": 5.452819760214551e-07, + "loss": 0.1056, + "num_input_tokens_seen": 198418752, + "step": 63045 + }, + { + "epoch": 4.036233275718584, + "grad_norm": 10.35120964050293, + "learning_rate": 5.449337498286195e-07, + "loss": 0.1626, + "num_input_tokens_seen": 198434240, + "step": 63050 + }, + { + "epoch": 4.036553357659561, + "grad_norm": 8.302675247192383, + "learning_rate": 5.445856212616085e-07, + "loss": 0.1379, + "num_input_tokens_seen": 198449920, + "step": 63055 + }, + { + "epoch": 4.036873439600538, + "grad_norm": 9.187786102294922, + "learning_rate": 5.442375903378064e-07, + "loss": 0.1009, + "num_input_tokens_seen": 198464576, + "step": 63060 + }, + { + "epoch": 4.037193521541514, + "grad_norm": 9.12442398071289, + "learning_rate": 5.438896570745919e-07, + "loss": 0.21, + "num_input_tokens_seen": 198480192, + "step": 63065 + }, + { + "epoch": 4.037513603482491, + "grad_norm": 7.294279098510742, + "learning_rate": 5.435418214893387e-07, + "loss": 0.1761, + "num_input_tokens_seen": 198497408, + "step": 63070 + }, + { + "epoch": 4.037833685423468, + "grad_norm": 9.7924222946167, + "learning_rate": 5.431940835994156e-07, + "loss": 0.118, + "num_input_tokens_seen": 198511808, + "step": 63075 + }, + { + "epoch": 4.038153767364445, + "grad_norm": 8.213648796081543, + "learning_rate": 5.42846443422188e-07, + "loss": 0.1109, + "num_input_tokens_seen": 198528128, + "step": 63080 + }, + { + "epoch": 4.038473849305422, + "grad_norm": 11.40200424194336, + "learning_rate": 5.424989009750145e-07, + "loss": 0.1732, + "num_input_tokens_seen": 198543424, + "step": 63085 + }, + { + "epoch": 4.038793931246399, + "grad_norm": 5.364441871643066, + "learning_rate": 5.421514562752498e-07, + "loss": 0.085, + "num_input_tokens_seen": 198558144, + "step": 63090 + }, + { + "epoch": 4.039114013187376, + "grad_norm": 7.0776214599609375, + "learning_rate": 5.418041093402432e-07, + "loss": 0.1585, + "num_input_tokens_seen": 198573760, + "step": 63095 + }, + { + "epoch": 4.039434095128353, + "grad_norm": 9.736440658569336, + "learning_rate": 5.414568601873396e-07, + "loss": 0.1162, + "num_input_tokens_seen": 198588224, + "step": 63100 + }, + { + "epoch": 4.03975417706933, + "grad_norm": 6.61515474319458, + "learning_rate": 5.411097088338785e-07, + "loss": 0.2928, + "num_input_tokens_seen": 198605312, + "step": 63105 + }, + { + "epoch": 4.040074259010306, + "grad_norm": 6.130372524261475, + "learning_rate": 5.407626552971946e-07, + "loss": 0.1135, + "num_input_tokens_seen": 198621248, + "step": 63110 + }, + { + "epoch": 4.040394340951283, + "grad_norm": 4.667547225952148, + "learning_rate": 5.404156995946197e-07, + "loss": 0.1772, + "num_input_tokens_seen": 198637440, + "step": 63115 + }, + { + "epoch": 4.04071442289226, + "grad_norm": 93.60664367675781, + "learning_rate": 5.400688417434763e-07, + "loss": 0.1699, + "num_input_tokens_seen": 198652608, + "step": 63120 + }, + { + "epoch": 4.041034504833237, + "grad_norm": 7.326137542724609, + "learning_rate": 5.397220817610866e-07, + "loss": 0.1354, + "num_input_tokens_seen": 198668992, + "step": 63125 + }, + { + "epoch": 4.041354586774214, + "grad_norm": 13.804678916931152, + "learning_rate": 5.393754196647655e-07, + "loss": 0.1053, + "num_input_tokens_seen": 198684160, + "step": 63130 + }, + { + "epoch": 4.041674668715191, + "grad_norm": 8.558884620666504, + "learning_rate": 5.390288554718229e-07, + "loss": 0.134, + "num_input_tokens_seen": 198700160, + "step": 63135 + }, + { + "epoch": 4.041994750656168, + "grad_norm": 6.409483909606934, + "learning_rate": 5.38682389199566e-07, + "loss": 0.1397, + "num_input_tokens_seen": 198715328, + "step": 63140 + }, + { + "epoch": 4.042314832597145, + "grad_norm": 5.655080795288086, + "learning_rate": 5.383360208652933e-07, + "loss": 0.1005, + "num_input_tokens_seen": 198730624, + "step": 63145 + }, + { + "epoch": 4.042634914538122, + "grad_norm": 4.533898830413818, + "learning_rate": 5.37989750486303e-07, + "loss": 0.1437, + "num_input_tokens_seen": 198745280, + "step": 63150 + }, + { + "epoch": 4.042954996479098, + "grad_norm": 10.31092643737793, + "learning_rate": 5.376435780798836e-07, + "loss": 0.1493, + "num_input_tokens_seen": 198761792, + "step": 63155 + }, + { + "epoch": 4.043275078420075, + "grad_norm": 8.224305152893066, + "learning_rate": 5.372975036633229e-07, + "loss": 0.1642, + "num_input_tokens_seen": 198778560, + "step": 63160 + }, + { + "epoch": 4.043595160361052, + "grad_norm": 3.323347330093384, + "learning_rate": 5.369515272539017e-07, + "loss": 0.0871, + "num_input_tokens_seen": 198793792, + "step": 63165 + }, + { + "epoch": 4.043915242302029, + "grad_norm": 9.247758865356445, + "learning_rate": 5.366056488688958e-07, + "loss": 0.1275, + "num_input_tokens_seen": 198810304, + "step": 63170 + }, + { + "epoch": 4.044235324243006, + "grad_norm": 3.557835340499878, + "learning_rate": 5.362598685255771e-07, + "loss": 0.1562, + "num_input_tokens_seen": 198826240, + "step": 63175 + }, + { + "epoch": 4.044555406183983, + "grad_norm": 7.218681812286377, + "learning_rate": 5.359141862412118e-07, + "loss": 0.1165, + "num_input_tokens_seen": 198841600, + "step": 63180 + }, + { + "epoch": 4.04487548812496, + "grad_norm": 3.933573007583618, + "learning_rate": 5.355686020330608e-07, + "loss": 0.1027, + "num_input_tokens_seen": 198857280, + "step": 63185 + }, + { + "epoch": 4.045195570065937, + "grad_norm": 7.484566688537598, + "learning_rate": 5.352231159183821e-07, + "loss": 0.1533, + "num_input_tokens_seen": 198875008, + "step": 63190 + }, + { + "epoch": 4.045515652006914, + "grad_norm": 11.813130378723145, + "learning_rate": 5.348777279144269e-07, + "loss": 0.1865, + "num_input_tokens_seen": 198889792, + "step": 63195 + }, + { + "epoch": 4.04583573394789, + "grad_norm": 4.174560070037842, + "learning_rate": 5.34532438038442e-07, + "loss": 0.1483, + "num_input_tokens_seen": 198904384, + "step": 63200 + }, + { + "epoch": 4.046155815888867, + "grad_norm": 21.420005798339844, + "learning_rate": 5.341872463076695e-07, + "loss": 0.1277, + "num_input_tokens_seen": 198921152, + "step": 63205 + }, + { + "epoch": 4.046475897829844, + "grad_norm": 9.86802864074707, + "learning_rate": 5.338421527393461e-07, + "loss": 0.1328, + "num_input_tokens_seen": 198939264, + "step": 63210 + }, + { + "epoch": 4.046795979770821, + "grad_norm": 5.843830108642578, + "learning_rate": 5.334971573507045e-07, + "loss": 0.1319, + "num_input_tokens_seen": 198955584, + "step": 63215 + }, + { + "epoch": 4.047116061711798, + "grad_norm": 8.252338409423828, + "learning_rate": 5.331522601589709e-07, + "loss": 0.1289, + "num_input_tokens_seen": 198970880, + "step": 63220 + }, + { + "epoch": 4.047436143652775, + "grad_norm": 6.44088077545166, + "learning_rate": 5.328074611813699e-07, + "loss": 0.118, + "num_input_tokens_seen": 198986624, + "step": 63225 + }, + { + "epoch": 4.047756225593752, + "grad_norm": 7.625007629394531, + "learning_rate": 5.324627604351163e-07, + "loss": 0.1392, + "num_input_tokens_seen": 199002112, + "step": 63230 + }, + { + "epoch": 4.048076307534729, + "grad_norm": 6.067739486694336, + "learning_rate": 5.321181579374252e-07, + "loss": 0.0864, + "num_input_tokens_seen": 199017600, + "step": 63235 + }, + { + "epoch": 4.048396389475705, + "grad_norm": 4.018966197967529, + "learning_rate": 5.317736537055012e-07, + "loss": 0.1273, + "num_input_tokens_seen": 199033536, + "step": 63240 + }, + { + "epoch": 4.048716471416682, + "grad_norm": 6.255914211273193, + "learning_rate": 5.314292477565499e-07, + "loss": 0.1282, + "num_input_tokens_seen": 199048192, + "step": 63245 + }, + { + "epoch": 4.049036553357659, + "grad_norm": 6.934030532836914, + "learning_rate": 5.310849401077683e-07, + "loss": 0.1739, + "num_input_tokens_seen": 199064192, + "step": 63250 + }, + { + "epoch": 4.049356635298636, + "grad_norm": 6.104036808013916, + "learning_rate": 5.307407307763479e-07, + "loss": 0.1056, + "num_input_tokens_seen": 199080256, + "step": 63255 + }, + { + "epoch": 4.049676717239613, + "grad_norm": 5.489689350128174, + "learning_rate": 5.303966197794797e-07, + "loss": 0.1282, + "num_input_tokens_seen": 199096192, + "step": 63260 + }, + { + "epoch": 4.04999679918059, + "grad_norm": 4.786718368530273, + "learning_rate": 5.300526071343434e-07, + "loss": 0.1329, + "num_input_tokens_seen": 199112000, + "step": 63265 + }, + { + "epoch": 4.050316881121567, + "grad_norm": 8.071839332580566, + "learning_rate": 5.297086928581196e-07, + "loss": 0.1424, + "num_input_tokens_seen": 199127040, + "step": 63270 + }, + { + "epoch": 4.050636963062544, + "grad_norm": 8.16862678527832, + "learning_rate": 5.293648769679807e-07, + "loss": 0.1404, + "num_input_tokens_seen": 199142720, + "step": 63275 + }, + { + "epoch": 4.050957045003521, + "grad_norm": 5.222912788391113, + "learning_rate": 5.290211594810954e-07, + "loss": 0.1317, + "num_input_tokens_seen": 199158848, + "step": 63280 + }, + { + "epoch": 4.051277126944497, + "grad_norm": 9.663863182067871, + "learning_rate": 5.286775404146266e-07, + "loss": 0.1368, + "num_input_tokens_seen": 199173760, + "step": 63285 + }, + { + "epoch": 4.051597208885474, + "grad_norm": 4.527390956878662, + "learning_rate": 5.283340197857334e-07, + "loss": 0.1028, + "num_input_tokens_seen": 199188608, + "step": 63290 + }, + { + "epoch": 4.051917290826451, + "grad_norm": 7.636993885040283, + "learning_rate": 5.279905976115693e-07, + "loss": 0.1147, + "num_input_tokens_seen": 199203136, + "step": 63295 + }, + { + "epoch": 4.052237372767428, + "grad_norm": 4.039955139160156, + "learning_rate": 5.276472739092828e-07, + "loss": 0.1054, + "num_input_tokens_seen": 199219008, + "step": 63300 + }, + { + "epoch": 4.052557454708405, + "grad_norm": 2.45246958732605, + "learning_rate": 5.273040486960171e-07, + "loss": 0.1534, + "num_input_tokens_seen": 199234624, + "step": 63305 + }, + { + "epoch": 4.052877536649382, + "grad_norm": 9.003640174865723, + "learning_rate": 5.269609219889127e-07, + "loss": 0.1307, + "num_input_tokens_seen": 199249408, + "step": 63310 + }, + { + "epoch": 4.053197618590359, + "grad_norm": 6.940491199493408, + "learning_rate": 5.266178938051026e-07, + "loss": 0.1606, + "num_input_tokens_seen": 199263872, + "step": 63315 + }, + { + "epoch": 4.053517700531336, + "grad_norm": 8.711896896362305, + "learning_rate": 5.262749641617157e-07, + "loss": 0.1286, + "num_input_tokens_seen": 199279936, + "step": 63320 + }, + { + "epoch": 4.053837782472313, + "grad_norm": 9.555429458618164, + "learning_rate": 5.259321330758766e-07, + "loss": 0.1539, + "num_input_tokens_seen": 199295552, + "step": 63325 + }, + { + "epoch": 4.054157864413289, + "grad_norm": 5.980725288391113, + "learning_rate": 5.255894005647033e-07, + "loss": 0.0866, + "num_input_tokens_seen": 199311104, + "step": 63330 + }, + { + "epoch": 4.054477946354266, + "grad_norm": 8.909782409667969, + "learning_rate": 5.252467666453126e-07, + "loss": 0.113, + "num_input_tokens_seen": 199326656, + "step": 63335 + }, + { + "epoch": 4.054798028295243, + "grad_norm": 8.379039764404297, + "learning_rate": 5.249042313348107e-07, + "loss": 0.0873, + "num_input_tokens_seen": 199341760, + "step": 63340 + }, + { + "epoch": 4.05511811023622, + "grad_norm": 55.13984298706055, + "learning_rate": 5.245617946503051e-07, + "loss": 0.1428, + "num_input_tokens_seen": 199356480, + "step": 63345 + }, + { + "epoch": 4.055438192177197, + "grad_norm": 7.577048301696777, + "learning_rate": 5.242194566088923e-07, + "loss": 0.1471, + "num_input_tokens_seen": 199371712, + "step": 63350 + }, + { + "epoch": 4.055758274118174, + "grad_norm": 4.0757975578308105, + "learning_rate": 5.238772172276693e-07, + "loss": 0.138, + "num_input_tokens_seen": 199387200, + "step": 63355 + }, + { + "epoch": 4.056078356059151, + "grad_norm": 7.540800094604492, + "learning_rate": 5.235350765237246e-07, + "loss": 0.1257, + "num_input_tokens_seen": 199403776, + "step": 63360 + }, + { + "epoch": 4.056398438000128, + "grad_norm": 6.616398334503174, + "learning_rate": 5.231930345141431e-07, + "loss": 0.1356, + "num_input_tokens_seen": 199419200, + "step": 63365 + }, + { + "epoch": 4.056718519941105, + "grad_norm": 4.947017192840576, + "learning_rate": 5.228510912160048e-07, + "loss": 0.1023, + "num_input_tokens_seen": 199434752, + "step": 63370 + }, + { + "epoch": 4.057038601882081, + "grad_norm": 5.677462577819824, + "learning_rate": 5.225092466463838e-07, + "loss": 0.1009, + "num_input_tokens_seen": 199450816, + "step": 63375 + }, + { + "epoch": 4.057358683823058, + "grad_norm": 6.221627712249756, + "learning_rate": 5.221675008223517e-07, + "loss": 0.1229, + "num_input_tokens_seen": 199466496, + "step": 63380 + }, + { + "epoch": 4.057678765764035, + "grad_norm": 3.477424383163452, + "learning_rate": 5.218258537609713e-07, + "loss": 0.0859, + "num_input_tokens_seen": 199482304, + "step": 63385 + }, + { + "epoch": 4.057998847705012, + "grad_norm": 4.817313194274902, + "learning_rate": 5.214843054793045e-07, + "loss": 0.0956, + "num_input_tokens_seen": 199498368, + "step": 63390 + }, + { + "epoch": 4.058318929645989, + "grad_norm": 5.168781757354736, + "learning_rate": 5.21142855994406e-07, + "loss": 0.0836, + "num_input_tokens_seen": 199514304, + "step": 63395 + }, + { + "epoch": 4.058639011586966, + "grad_norm": 7.3250579833984375, + "learning_rate": 5.208015053233253e-07, + "loss": 0.1224, + "num_input_tokens_seen": 199530048, + "step": 63400 + }, + { + "epoch": 4.058959093527943, + "grad_norm": 9.980535507202148, + "learning_rate": 5.204602534831085e-07, + "loss": 0.1053, + "num_input_tokens_seen": 199544512, + "step": 63405 + }, + { + "epoch": 4.05927917546892, + "grad_norm": 8.435853958129883, + "learning_rate": 5.201191004907955e-07, + "loss": 0.1026, + "num_input_tokens_seen": 199559616, + "step": 63410 + }, + { + "epoch": 4.059599257409897, + "grad_norm": 15.590380668640137, + "learning_rate": 5.197780463634211e-07, + "loss": 0.1402, + "num_input_tokens_seen": 199575488, + "step": 63415 + }, + { + "epoch": 4.0599193393508735, + "grad_norm": 4.803512096405029, + "learning_rate": 5.194370911180177e-07, + "loss": 0.1276, + "num_input_tokens_seen": 199591232, + "step": 63420 + }, + { + "epoch": 4.0602394212918504, + "grad_norm": 6.378673076629639, + "learning_rate": 5.190962347716086e-07, + "loss": 0.1628, + "num_input_tokens_seen": 199607232, + "step": 63425 + }, + { + "epoch": 4.060559503232827, + "grad_norm": 5.320207118988037, + "learning_rate": 5.187554773412157e-07, + "loss": 0.1143, + "num_input_tokens_seen": 199621760, + "step": 63430 + }, + { + "epoch": 4.060879585173804, + "grad_norm": 8.075942039489746, + "learning_rate": 5.184148188438545e-07, + "loss": 0.0886, + "num_input_tokens_seen": 199638016, + "step": 63435 + }, + { + "epoch": 4.061199667114781, + "grad_norm": 9.057145118713379, + "learning_rate": 5.180742592965354e-07, + "loss": 0.1261, + "num_input_tokens_seen": 199653376, + "step": 63440 + }, + { + "epoch": 4.061519749055758, + "grad_norm": 8.297286033630371, + "learning_rate": 5.177337987162645e-07, + "loss": 0.1589, + "num_input_tokens_seen": 199671680, + "step": 63445 + }, + { + "epoch": 4.061839830996735, + "grad_norm": 6.603394508361816, + "learning_rate": 5.173934371200418e-07, + "loss": 0.1513, + "num_input_tokens_seen": 199689408, + "step": 63450 + }, + { + "epoch": 4.062159912937712, + "grad_norm": 7.698631286621094, + "learning_rate": 5.17053174524865e-07, + "loss": 0.1684, + "num_input_tokens_seen": 199704576, + "step": 63455 + }, + { + "epoch": 4.062479994878689, + "grad_norm": 8.650748252868652, + "learning_rate": 5.167130109477228e-07, + "loss": 0.1274, + "num_input_tokens_seen": 199723200, + "step": 63460 + }, + { + "epoch": 4.0628000768196655, + "grad_norm": 5.789612293243408, + "learning_rate": 5.163729464056033e-07, + "loss": 0.0847, + "num_input_tokens_seen": 199737344, + "step": 63465 + }, + { + "epoch": 4.0631201587606425, + "grad_norm": 5.7805256843566895, + "learning_rate": 5.160329809154854e-07, + "loss": 0.1188, + "num_input_tokens_seen": 199753152, + "step": 63470 + }, + { + "epoch": 4.0634402407016195, + "grad_norm": 3.5948081016540527, + "learning_rate": 5.156931144943466e-07, + "loss": 0.1069, + "num_input_tokens_seen": 199767808, + "step": 63475 + }, + { + "epoch": 4.0637603226425965, + "grad_norm": 8.57376480102539, + "learning_rate": 5.153533471591582e-07, + "loss": 0.1627, + "num_input_tokens_seen": 199782080, + "step": 63480 + }, + { + "epoch": 4.0640804045835734, + "grad_norm": 7.880132675170898, + "learning_rate": 5.150136789268853e-07, + "loss": 0.1198, + "num_input_tokens_seen": 199797312, + "step": 63485 + }, + { + "epoch": 4.06440048652455, + "grad_norm": 6.964414119720459, + "learning_rate": 5.146741098144911e-07, + "loss": 0.2233, + "num_input_tokens_seen": 199812544, + "step": 63490 + }, + { + "epoch": 4.064720568465527, + "grad_norm": 3.7892439365386963, + "learning_rate": 5.143346398389292e-07, + "loss": 0.1031, + "num_input_tokens_seen": 199827776, + "step": 63495 + }, + { + "epoch": 4.065040650406504, + "grad_norm": 4.502318382263184, + "learning_rate": 5.139952690171534e-07, + "loss": 0.1403, + "num_input_tokens_seen": 199843264, + "step": 63500 + }, + { + "epoch": 4.0653607323474805, + "grad_norm": 5.661182880401611, + "learning_rate": 5.13655997366109e-07, + "loss": 0.116, + "num_input_tokens_seen": 199858752, + "step": 63505 + }, + { + "epoch": 4.0656808142884575, + "grad_norm": 7.053788185119629, + "learning_rate": 5.133168249027376e-07, + "loss": 0.1519, + "num_input_tokens_seen": 199874304, + "step": 63510 + }, + { + "epoch": 4.0660008962294345, + "grad_norm": 4.567406177520752, + "learning_rate": 5.129777516439757e-07, + "loss": 0.1366, + "num_input_tokens_seen": 199890944, + "step": 63515 + }, + { + "epoch": 4.0663209781704115, + "grad_norm": 74.30561065673828, + "learning_rate": 5.126387776067548e-07, + "loss": 0.1564, + "num_input_tokens_seen": 199905920, + "step": 63520 + }, + { + "epoch": 4.0666410601113885, + "grad_norm": 6.867386817932129, + "learning_rate": 5.122999028080017e-07, + "loss": 0.1467, + "num_input_tokens_seen": 199921728, + "step": 63525 + }, + { + "epoch": 4.0669611420523655, + "grad_norm": 6.980252742767334, + "learning_rate": 5.119611272646377e-07, + "loss": 0.132, + "num_input_tokens_seen": 199936896, + "step": 63530 + }, + { + "epoch": 4.0672812239933425, + "grad_norm": 5.977770805358887, + "learning_rate": 5.116224509935791e-07, + "loss": 0.1072, + "num_input_tokens_seen": 199951808, + "step": 63535 + }, + { + "epoch": 4.0676013059343195, + "grad_norm": 7.813695430755615, + "learning_rate": 5.112838740117396e-07, + "loss": 0.1189, + "num_input_tokens_seen": 199967360, + "step": 63540 + }, + { + "epoch": 4.0679213878752964, + "grad_norm": 3.8320064544677734, + "learning_rate": 5.109453963360234e-07, + "loss": 0.1474, + "num_input_tokens_seen": 199983360, + "step": 63545 + }, + { + "epoch": 4.0682414698162725, + "grad_norm": 8.735663414001465, + "learning_rate": 5.106070179833339e-07, + "loss": 0.1933, + "num_input_tokens_seen": 199999040, + "step": 63550 + }, + { + "epoch": 4.0685615517572495, + "grad_norm": 6.001568794250488, + "learning_rate": 5.102687389705676e-07, + "loss": 0.1199, + "num_input_tokens_seen": 200015104, + "step": 63555 + }, + { + "epoch": 4.0688816336982265, + "grad_norm": 8.535499572753906, + "learning_rate": 5.099305593146159e-07, + "loss": 0.1405, + "num_input_tokens_seen": 200031168, + "step": 63560 + }, + { + "epoch": 4.0692017156392035, + "grad_norm": 3.3347008228302, + "learning_rate": 5.095924790323672e-07, + "loss": 0.1306, + "num_input_tokens_seen": 200046208, + "step": 63565 + }, + { + "epoch": 4.0695217975801805, + "grad_norm": 7.856939792633057, + "learning_rate": 5.092544981407013e-07, + "loss": 0.1384, + "num_input_tokens_seen": 200061696, + "step": 63570 + }, + { + "epoch": 4.0698418795211575, + "grad_norm": 10.428153991699219, + "learning_rate": 5.089166166564977e-07, + "loss": 0.1234, + "num_input_tokens_seen": 200076992, + "step": 63575 + }, + { + "epoch": 4.0701619614621345, + "grad_norm": 8.3367280960083, + "learning_rate": 5.085788345966255e-07, + "loss": 0.1477, + "num_input_tokens_seen": 200092224, + "step": 63580 + }, + { + "epoch": 4.0704820434031115, + "grad_norm": 5.573425769805908, + "learning_rate": 5.082411519779542e-07, + "loss": 0.1053, + "num_input_tokens_seen": 200107776, + "step": 63585 + }, + { + "epoch": 4.0708021253440885, + "grad_norm": 4.889737129211426, + "learning_rate": 5.079035688173454e-07, + "loss": 0.0908, + "num_input_tokens_seen": 200123136, + "step": 63590 + }, + { + "epoch": 4.071122207285065, + "grad_norm": 8.57197380065918, + "learning_rate": 5.075660851316555e-07, + "loss": 0.1172, + "num_input_tokens_seen": 200138688, + "step": 63595 + }, + { + "epoch": 4.071442289226042, + "grad_norm": 4.089336395263672, + "learning_rate": 5.072287009377372e-07, + "loss": 0.0586, + "num_input_tokens_seen": 200155456, + "step": 63600 + }, + { + "epoch": 4.071762371167019, + "grad_norm": 7.386301517486572, + "learning_rate": 5.068914162524368e-07, + "loss": 0.1135, + "num_input_tokens_seen": 200170560, + "step": 63605 + }, + { + "epoch": 4.0720824531079955, + "grad_norm": 6.305893421173096, + "learning_rate": 5.065542310925992e-07, + "loss": 0.1052, + "num_input_tokens_seen": 200186048, + "step": 63610 + }, + { + "epoch": 4.0724025350489725, + "grad_norm": 6.103020668029785, + "learning_rate": 5.062171454750581e-07, + "loss": 0.1529, + "num_input_tokens_seen": 200201216, + "step": 63615 + }, + { + "epoch": 4.0727226169899495, + "grad_norm": 6.600955009460449, + "learning_rate": 5.058801594166482e-07, + "loss": 0.1382, + "num_input_tokens_seen": 200216576, + "step": 63620 + }, + { + "epoch": 4.0730426989309265, + "grad_norm": 8.650467872619629, + "learning_rate": 5.05543272934196e-07, + "loss": 0.1454, + "num_input_tokens_seen": 200232640, + "step": 63625 + }, + { + "epoch": 4.0733627808719035, + "grad_norm": 8.003135681152344, + "learning_rate": 5.052064860445241e-07, + "loss": 0.1427, + "num_input_tokens_seen": 200249088, + "step": 63630 + }, + { + "epoch": 4.0736828628128805, + "grad_norm": 10.565467834472656, + "learning_rate": 5.048697987644499e-07, + "loss": 0.1248, + "num_input_tokens_seen": 200264320, + "step": 63635 + }, + { + "epoch": 4.074002944753857, + "grad_norm": 6.684720039367676, + "learning_rate": 5.045332111107856e-07, + "loss": 0.1284, + "num_input_tokens_seen": 200280832, + "step": 63640 + }, + { + "epoch": 4.074323026694834, + "grad_norm": 7.549925804138184, + "learning_rate": 5.041967231003383e-07, + "loss": 0.157, + "num_input_tokens_seen": 200298048, + "step": 63645 + }, + { + "epoch": 4.074643108635811, + "grad_norm": 8.463425636291504, + "learning_rate": 5.038603347499121e-07, + "loss": 0.1335, + "num_input_tokens_seen": 200312768, + "step": 63650 + }, + { + "epoch": 4.074963190576788, + "grad_norm": 8.394426345825195, + "learning_rate": 5.035240460763019e-07, + "loss": 0.1238, + "num_input_tokens_seen": 200328512, + "step": 63655 + }, + { + "epoch": 4.075283272517765, + "grad_norm": 13.42997932434082, + "learning_rate": 5.031878570963019e-07, + "loss": 0.1152, + "num_input_tokens_seen": 200345152, + "step": 63660 + }, + { + "epoch": 4.075603354458742, + "grad_norm": 5.295426368713379, + "learning_rate": 5.028517678266997e-07, + "loss": 0.1495, + "num_input_tokens_seen": 200360832, + "step": 63665 + }, + { + "epoch": 4.0759234363997185, + "grad_norm": 12.845161437988281, + "learning_rate": 5.025157782842774e-07, + "loss": 0.1661, + "num_input_tokens_seen": 200376960, + "step": 63670 + }, + { + "epoch": 4.0762435183406955, + "grad_norm": 6.391480445861816, + "learning_rate": 5.021798884858126e-07, + "loss": 0.1434, + "num_input_tokens_seen": 200392064, + "step": 63675 + }, + { + "epoch": 4.0765636002816725, + "grad_norm": 3.795880079269409, + "learning_rate": 5.018440984480775e-07, + "loss": 0.1084, + "num_input_tokens_seen": 200407936, + "step": 63680 + }, + { + "epoch": 4.076883682222649, + "grad_norm": 3.281403064727783, + "learning_rate": 5.01508408187841e-07, + "loss": 0.1253, + "num_input_tokens_seen": 200424192, + "step": 63685 + }, + { + "epoch": 4.077203764163626, + "grad_norm": 4.788156986236572, + "learning_rate": 5.011728177218636e-07, + "loss": 0.1007, + "num_input_tokens_seen": 200440576, + "step": 63690 + }, + { + "epoch": 4.077523846104603, + "grad_norm": 143.15640258789062, + "learning_rate": 5.008373270669048e-07, + "loss": 0.1397, + "num_input_tokens_seen": 200455680, + "step": 63695 + }, + { + "epoch": 4.07784392804558, + "grad_norm": 8.013894081115723, + "learning_rate": 5.005019362397167e-07, + "loss": 0.1862, + "num_input_tokens_seen": 200472064, + "step": 63700 + }, + { + "epoch": 4.078164009986557, + "grad_norm": 4.242880344390869, + "learning_rate": 5.00166645257047e-07, + "loss": 0.1342, + "num_input_tokens_seen": 200487360, + "step": 63705 + }, + { + "epoch": 4.078484091927534, + "grad_norm": 8.961575508117676, + "learning_rate": 4.998314541356384e-07, + "loss": 0.1402, + "num_input_tokens_seen": 200502464, + "step": 63710 + }, + { + "epoch": 4.078804173868511, + "grad_norm": 2.448486089706421, + "learning_rate": 4.994963628922281e-07, + "loss": 0.122, + "num_input_tokens_seen": 200518144, + "step": 63715 + }, + { + "epoch": 4.079124255809488, + "grad_norm": 7.262826919555664, + "learning_rate": 4.991613715435495e-07, + "loss": 0.1182, + "num_input_tokens_seen": 200533312, + "step": 63720 + }, + { + "epoch": 4.0794443377504646, + "grad_norm": 7.479504108428955, + "learning_rate": 4.988264801063291e-07, + "loss": 0.0836, + "num_input_tokens_seen": 200549248, + "step": 63725 + }, + { + "epoch": 4.079764419691441, + "grad_norm": 6.994152545928955, + "learning_rate": 4.984916885972915e-07, + "loss": 0.112, + "num_input_tokens_seen": 200565696, + "step": 63730 + }, + { + "epoch": 4.080084501632418, + "grad_norm": 9.922218322753906, + "learning_rate": 4.981569970331534e-07, + "loss": 0.1929, + "num_input_tokens_seen": 200581696, + "step": 63735 + }, + { + "epoch": 4.080404583573395, + "grad_norm": 7.126255989074707, + "learning_rate": 4.978224054306274e-07, + "loss": 0.1161, + "num_input_tokens_seen": 200597120, + "step": 63740 + }, + { + "epoch": 4.080724665514372, + "grad_norm": 3.6785781383514404, + "learning_rate": 4.974879138064217e-07, + "loss": 0.0953, + "num_input_tokens_seen": 200611712, + "step": 63745 + }, + { + "epoch": 4.081044747455349, + "grad_norm": 7.6703691482543945, + "learning_rate": 4.971535221772391e-07, + "loss": 0.0989, + "num_input_tokens_seen": 200626560, + "step": 63750 + }, + { + "epoch": 4.081364829396326, + "grad_norm": 10.323360443115234, + "learning_rate": 4.968192305597769e-07, + "loss": 0.128, + "num_input_tokens_seen": 200641600, + "step": 63755 + }, + { + "epoch": 4.081684911337303, + "grad_norm": 6.648859977722168, + "learning_rate": 4.964850389707282e-07, + "loss": 0.138, + "num_input_tokens_seen": 200656192, + "step": 63760 + }, + { + "epoch": 4.08200499327828, + "grad_norm": 9.132231712341309, + "learning_rate": 4.9615094742678e-07, + "loss": 0.1218, + "num_input_tokens_seen": 200670848, + "step": 63765 + }, + { + "epoch": 4.082325075219256, + "grad_norm": 13.952527046203613, + "learning_rate": 4.958169559446171e-07, + "loss": 0.1446, + "num_input_tokens_seen": 200686144, + "step": 63770 + }, + { + "epoch": 4.082645157160233, + "grad_norm": 8.526734352111816, + "learning_rate": 4.954830645409147e-07, + "loss": 0.1186, + "num_input_tokens_seen": 200701248, + "step": 63775 + }, + { + "epoch": 4.08296523910121, + "grad_norm": 6.706409454345703, + "learning_rate": 4.951492732323479e-07, + "loss": 0.1228, + "num_input_tokens_seen": 200715840, + "step": 63780 + }, + { + "epoch": 4.083285321042187, + "grad_norm": 6.624173164367676, + "learning_rate": 4.948155820355832e-07, + "loss": 0.1434, + "num_input_tokens_seen": 200732672, + "step": 63785 + }, + { + "epoch": 4.083605402983164, + "grad_norm": 6.5767107009887695, + "learning_rate": 4.944819909672832e-07, + "loss": 0.1807, + "num_input_tokens_seen": 200747456, + "step": 63790 + }, + { + "epoch": 4.083925484924141, + "grad_norm": 6.605382919311523, + "learning_rate": 4.941485000441074e-07, + "loss": 0.1366, + "num_input_tokens_seen": 200763904, + "step": 63795 + }, + { + "epoch": 4.084245566865118, + "grad_norm": 5.441231727600098, + "learning_rate": 4.938151092827059e-07, + "loss": 0.1214, + "num_input_tokens_seen": 200779648, + "step": 63800 + }, + { + "epoch": 4.084565648806095, + "grad_norm": 2.749691963195801, + "learning_rate": 4.934818186997298e-07, + "loss": 0.1133, + "num_input_tokens_seen": 200795072, + "step": 63805 + }, + { + "epoch": 4.084885730747072, + "grad_norm": 2.9431703090667725, + "learning_rate": 4.931486283118183e-07, + "loss": 0.1034, + "num_input_tokens_seen": 200809536, + "step": 63810 + }, + { + "epoch": 4.085205812688048, + "grad_norm": 5.098804950714111, + "learning_rate": 4.92815538135612e-07, + "loss": 0.1396, + "num_input_tokens_seen": 200825472, + "step": 63815 + }, + { + "epoch": 4.085525894629025, + "grad_norm": 4.843893051147461, + "learning_rate": 4.924825481877427e-07, + "loss": 0.0972, + "num_input_tokens_seen": 200841088, + "step": 63820 + }, + { + "epoch": 4.085845976570002, + "grad_norm": 8.927509307861328, + "learning_rate": 4.921496584848379e-07, + "loss": 0.1445, + "num_input_tokens_seen": 200857920, + "step": 63825 + }, + { + "epoch": 4.086166058510979, + "grad_norm": 8.199912071228027, + "learning_rate": 4.918168690435208e-07, + "loss": 0.1206, + "num_input_tokens_seen": 200874880, + "step": 63830 + }, + { + "epoch": 4.086486140451956, + "grad_norm": 13.004698753356934, + "learning_rate": 4.914841798804085e-07, + "loss": 0.1534, + "num_input_tokens_seen": 200889536, + "step": 63835 + }, + { + "epoch": 4.086806222392933, + "grad_norm": 7.282524108886719, + "learning_rate": 4.911515910121153e-07, + "loss": 0.1226, + "num_input_tokens_seen": 200906304, + "step": 63840 + }, + { + "epoch": 4.08712630433391, + "grad_norm": 10.06285285949707, + "learning_rate": 4.908191024552466e-07, + "loss": 0.1881, + "num_input_tokens_seen": 200921856, + "step": 63845 + }, + { + "epoch": 4.087446386274887, + "grad_norm": 6.178557872772217, + "learning_rate": 4.904867142264074e-07, + "loss": 0.1129, + "num_input_tokens_seen": 200937088, + "step": 63850 + }, + { + "epoch": 4.087766468215864, + "grad_norm": 6.1194305419921875, + "learning_rate": 4.901544263421945e-07, + "loss": 0.0849, + "num_input_tokens_seen": 200953280, + "step": 63855 + }, + { + "epoch": 4.08808655015684, + "grad_norm": 6.021678447723389, + "learning_rate": 4.898222388192005e-07, + "loss": 0.1147, + "num_input_tokens_seen": 200968960, + "step": 63860 + }, + { + "epoch": 4.088406632097817, + "grad_norm": 4.732282638549805, + "learning_rate": 4.894901516740133e-07, + "loss": 0.1434, + "num_input_tokens_seen": 200984704, + "step": 63865 + }, + { + "epoch": 4.088726714038794, + "grad_norm": 6.697568893432617, + "learning_rate": 4.891581649232155e-07, + "loss": 0.1706, + "num_input_tokens_seen": 201000064, + "step": 63870 + }, + { + "epoch": 4.089046795979771, + "grad_norm": 7.29400110244751, + "learning_rate": 4.888262785833844e-07, + "loss": 0.1149, + "num_input_tokens_seen": 201014848, + "step": 63875 + }, + { + "epoch": 4.089366877920748, + "grad_norm": 7.677183628082275, + "learning_rate": 4.884944926710943e-07, + "loss": 0.1785, + "num_input_tokens_seen": 201030464, + "step": 63880 + }, + { + "epoch": 4.089686959861725, + "grad_norm": 9.775403022766113, + "learning_rate": 4.881628072029105e-07, + "loss": 0.1517, + "num_input_tokens_seen": 201047104, + "step": 63885 + }, + { + "epoch": 4.090007041802702, + "grad_norm": 6.241161823272705, + "learning_rate": 4.878312221953979e-07, + "loss": 0.1125, + "num_input_tokens_seen": 201062272, + "step": 63890 + }, + { + "epoch": 4.090327123743679, + "grad_norm": 8.191804885864258, + "learning_rate": 4.87499737665112e-07, + "loss": 0.1329, + "num_input_tokens_seen": 201082624, + "step": 63895 + }, + { + "epoch": 4.090647205684656, + "grad_norm": 3.571190595626831, + "learning_rate": 4.871683536286068e-07, + "loss": 0.1418, + "num_input_tokens_seen": 201098496, + "step": 63900 + }, + { + "epoch": 4.090967287625632, + "grad_norm": 4.2672600746154785, + "learning_rate": 4.868370701024299e-07, + "loss": 0.1341, + "num_input_tokens_seen": 201113152, + "step": 63905 + }, + { + "epoch": 4.091287369566609, + "grad_norm": 8.960036277770996, + "learning_rate": 4.865058871031228e-07, + "loss": 0.0994, + "num_input_tokens_seen": 201129728, + "step": 63910 + }, + { + "epoch": 4.091607451507586, + "grad_norm": 11.316998481750488, + "learning_rate": 4.86174804647225e-07, + "loss": 0.1385, + "num_input_tokens_seen": 201144640, + "step": 63915 + }, + { + "epoch": 4.091927533448563, + "grad_norm": 5.9692063331604, + "learning_rate": 4.858438227512666e-07, + "loss": 0.1163, + "num_input_tokens_seen": 201161024, + "step": 63920 + }, + { + "epoch": 4.09224761538954, + "grad_norm": 4.682860374450684, + "learning_rate": 4.855129414317769e-07, + "loss": 0.1613, + "num_input_tokens_seen": 201176512, + "step": 63925 + }, + { + "epoch": 4.092567697330517, + "grad_norm": 5.050927639007568, + "learning_rate": 4.851821607052779e-07, + "loss": 0.0998, + "num_input_tokens_seen": 201191040, + "step": 63930 + }, + { + "epoch": 4.092887779271494, + "grad_norm": 5.81198263168335, + "learning_rate": 4.84851480588287e-07, + "loss": 0.1349, + "num_input_tokens_seen": 201207296, + "step": 63935 + }, + { + "epoch": 4.093207861212471, + "grad_norm": 5.979616641998291, + "learning_rate": 4.845209010973164e-07, + "loss": 0.0892, + "num_input_tokens_seen": 201223104, + "step": 63940 + }, + { + "epoch": 4.093527943153448, + "grad_norm": 9.303556442260742, + "learning_rate": 4.841904222488741e-07, + "loss": 0.178, + "num_input_tokens_seen": 201239296, + "step": 63945 + }, + { + "epoch": 4.093848025094424, + "grad_norm": 8.367188453674316, + "learning_rate": 4.838600440594618e-07, + "loss": 0.1348, + "num_input_tokens_seen": 201254272, + "step": 63950 + }, + { + "epoch": 4.094168107035401, + "grad_norm": 9.768506050109863, + "learning_rate": 4.835297665455768e-07, + "loss": 0.1422, + "num_input_tokens_seen": 201268544, + "step": 63955 + }, + { + "epoch": 4.094488188976378, + "grad_norm": 27.486238479614258, + "learning_rate": 4.831995897237124e-07, + "loss": 0.317, + "num_input_tokens_seen": 201283840, + "step": 63960 + }, + { + "epoch": 4.094808270917355, + "grad_norm": 5.368742942810059, + "learning_rate": 4.828695136103557e-07, + "loss": 0.0856, + "num_input_tokens_seen": 201300096, + "step": 63965 + }, + { + "epoch": 4.095128352858332, + "grad_norm": 5.814819812774658, + "learning_rate": 4.825395382219883e-07, + "loss": 0.1197, + "num_input_tokens_seen": 201315968, + "step": 63970 + }, + { + "epoch": 4.095448434799309, + "grad_norm": 5.141866683959961, + "learning_rate": 4.822096635750879e-07, + "loss": 0.1351, + "num_input_tokens_seen": 201331968, + "step": 63975 + }, + { + "epoch": 4.095768516740286, + "grad_norm": 8.935876846313477, + "learning_rate": 4.818798896861265e-07, + "loss": 0.1312, + "num_input_tokens_seen": 201349760, + "step": 63980 + }, + { + "epoch": 4.096088598681263, + "grad_norm": 9.864580154418945, + "learning_rate": 4.815502165715713e-07, + "loss": 0.1428, + "num_input_tokens_seen": 201367552, + "step": 63985 + }, + { + "epoch": 4.09640868062224, + "grad_norm": 7.246709823608398, + "learning_rate": 4.812206442478848e-07, + "loss": 0.1233, + "num_input_tokens_seen": 201382272, + "step": 63990 + }, + { + "epoch": 4.096728762563216, + "grad_norm": 4.619058609008789, + "learning_rate": 4.808911727315233e-07, + "loss": 0.1355, + "num_input_tokens_seen": 201398336, + "step": 63995 + }, + { + "epoch": 4.097048844504193, + "grad_norm": 7.081596851348877, + "learning_rate": 4.805618020389405e-07, + "loss": 0.1449, + "num_input_tokens_seen": 201412992, + "step": 64000 + }, + { + "epoch": 4.09736892644517, + "grad_norm": 7.6991868019104, + "learning_rate": 4.802325321865814e-07, + "loss": 0.2353, + "num_input_tokens_seen": 201427840, + "step": 64005 + }, + { + "epoch": 4.097689008386147, + "grad_norm": 7.665463924407959, + "learning_rate": 4.799033631908894e-07, + "loss": 0.1559, + "num_input_tokens_seen": 201444096, + "step": 64010 + }, + { + "epoch": 4.098009090327124, + "grad_norm": 4.001482009887695, + "learning_rate": 4.795742950683013e-07, + "loss": 0.1141, + "num_input_tokens_seen": 201460800, + "step": 64015 + }, + { + "epoch": 4.098329172268101, + "grad_norm": 6.384839057922363, + "learning_rate": 4.792453278352485e-07, + "loss": 0.1633, + "num_input_tokens_seen": 201477440, + "step": 64020 + }, + { + "epoch": 4.098649254209078, + "grad_norm": 7.807558059692383, + "learning_rate": 4.789164615081593e-07, + "loss": 0.1256, + "num_input_tokens_seen": 201492992, + "step": 64025 + }, + { + "epoch": 4.098969336150055, + "grad_norm": 7.991443634033203, + "learning_rate": 4.785876961034533e-07, + "loss": 0.1386, + "num_input_tokens_seen": 201508608, + "step": 64030 + }, + { + "epoch": 4.099289418091031, + "grad_norm": 5.980536460876465, + "learning_rate": 4.782590316375499e-07, + "loss": 0.0883, + "num_input_tokens_seen": 201523840, + "step": 64035 + }, + { + "epoch": 4.099609500032008, + "grad_norm": 78.12960052490234, + "learning_rate": 4.779304681268584e-07, + "loss": 0.2145, + "num_input_tokens_seen": 201540672, + "step": 64040 + }, + { + "epoch": 4.099929581972985, + "grad_norm": 6.871655464172363, + "learning_rate": 4.776020055877872e-07, + "loss": 0.1174, + "num_input_tokens_seen": 201555200, + "step": 64045 + }, + { + "epoch": 4.100249663913962, + "grad_norm": 6.392416477203369, + "learning_rate": 4.772736440367379e-07, + "loss": 0.121, + "num_input_tokens_seen": 201570880, + "step": 64050 + }, + { + "epoch": 4.100569745854939, + "grad_norm": 5.649820327758789, + "learning_rate": 4.769453834901066e-07, + "loss": 0.1114, + "num_input_tokens_seen": 201585664, + "step": 64055 + }, + { + "epoch": 4.100889827795916, + "grad_norm": 6.630617618560791, + "learning_rate": 4.766172239642852e-07, + "loss": 0.1461, + "num_input_tokens_seen": 201601792, + "step": 64060 + }, + { + "epoch": 4.101209909736893, + "grad_norm": 9.692947387695312, + "learning_rate": 4.7628916547566017e-07, + "loss": 0.1063, + "num_input_tokens_seen": 201617280, + "step": 64065 + }, + { + "epoch": 4.10152999167787, + "grad_norm": 6.5548529624938965, + "learning_rate": 4.7596120804061314e-07, + "loss": 0.1303, + "num_input_tokens_seen": 201633472, + "step": 64070 + }, + { + "epoch": 4.101850073618847, + "grad_norm": 10.319610595703125, + "learning_rate": 4.756333516755199e-07, + "loss": 0.1477, + "num_input_tokens_seen": 201648512, + "step": 64075 + }, + { + "epoch": 4.102170155559823, + "grad_norm": 3.0399465560913086, + "learning_rate": 4.753055963967529e-07, + "loss": 0.1502, + "num_input_tokens_seen": 201665152, + "step": 64080 + }, + { + "epoch": 4.1024902375008, + "grad_norm": 5.851550102233887, + "learning_rate": 4.749779422206785e-07, + "loss": 0.1126, + "num_input_tokens_seen": 201680896, + "step": 64085 + }, + { + "epoch": 4.102810319441777, + "grad_norm": 10.231552124023438, + "learning_rate": 4.746503891636575e-07, + "loss": 0.1152, + "num_input_tokens_seen": 201696064, + "step": 64090 + }, + { + "epoch": 4.103130401382754, + "grad_norm": 5.460229396820068, + "learning_rate": 4.743229372420466e-07, + "loss": 0.1107, + "num_input_tokens_seen": 201711104, + "step": 64095 + }, + { + "epoch": 4.103450483323731, + "grad_norm": 7.097415924072266, + "learning_rate": 4.739955864721968e-07, + "loss": 0.1282, + "num_input_tokens_seen": 201727232, + "step": 64100 + }, + { + "epoch": 4.103770565264708, + "grad_norm": 14.54581356048584, + "learning_rate": 4.7366833687045365e-07, + "loss": 0.1236, + "num_input_tokens_seen": 201742464, + "step": 64105 + }, + { + "epoch": 4.104090647205685, + "grad_norm": 11.709148406982422, + "learning_rate": 4.7334118845316024e-07, + "loss": 0.1583, + "num_input_tokens_seen": 201757824, + "step": 64110 + }, + { + "epoch": 4.104410729146662, + "grad_norm": 8.274080276489258, + "learning_rate": 4.730141412366501e-07, + "loss": 0.1226, + "num_input_tokens_seen": 201773440, + "step": 64115 + }, + { + "epoch": 4.104730811087639, + "grad_norm": 5.246041774749756, + "learning_rate": 4.726871952372569e-07, + "loss": 0.0671, + "num_input_tokens_seen": 201789248, + "step": 64120 + }, + { + "epoch": 4.105050893028615, + "grad_norm": 6.7378621101379395, + "learning_rate": 4.7236035047130425e-07, + "loss": 0.1372, + "num_input_tokens_seen": 201804928, + "step": 64125 + }, + { + "epoch": 4.105370974969592, + "grad_norm": 7.771871566772461, + "learning_rate": 4.720336069551143e-07, + "loss": 0.1448, + "num_input_tokens_seen": 201820352, + "step": 64130 + }, + { + "epoch": 4.105691056910569, + "grad_norm": 6.194515228271484, + "learning_rate": 4.717069647050029e-07, + "loss": 0.103, + "num_input_tokens_seen": 201836160, + "step": 64135 + }, + { + "epoch": 4.106011138851546, + "grad_norm": 10.255449295043945, + "learning_rate": 4.7138042373728016e-07, + "loss": 0.1144, + "num_input_tokens_seen": 201852736, + "step": 64140 + }, + { + "epoch": 4.106331220792523, + "grad_norm": 4.92250919342041, + "learning_rate": 4.710539840682538e-07, + "loss": 0.0645, + "num_input_tokens_seen": 201868608, + "step": 64145 + }, + { + "epoch": 4.1066513027335, + "grad_norm": 7.533327579498291, + "learning_rate": 4.7072764571422127e-07, + "loss": 0.0904, + "num_input_tokens_seen": 201884864, + "step": 64150 + }, + { + "epoch": 4.106971384674477, + "grad_norm": 8.86917781829834, + "learning_rate": 4.70401408691481e-07, + "loss": 0.2092, + "num_input_tokens_seen": 201900352, + "step": 64155 + }, + { + "epoch": 4.107291466615454, + "grad_norm": 4.778404235839844, + "learning_rate": 4.7007527301632274e-07, + "loss": 0.144, + "num_input_tokens_seen": 201915968, + "step": 64160 + }, + { + "epoch": 4.107611548556431, + "grad_norm": 7.085107326507568, + "learning_rate": 4.697492387050315e-07, + "loss": 0.1251, + "num_input_tokens_seen": 201932032, + "step": 64165 + }, + { + "epoch": 4.107931630497407, + "grad_norm": 4.512434959411621, + "learning_rate": 4.6942330577388837e-07, + "loss": 0.1085, + "num_input_tokens_seen": 201947584, + "step": 64170 + }, + { + "epoch": 4.108251712438384, + "grad_norm": 5.777368068695068, + "learning_rate": 4.6909747423916826e-07, + "loss": 0.1524, + "num_input_tokens_seen": 201963264, + "step": 64175 + }, + { + "epoch": 4.108571794379361, + "grad_norm": 6.044201374053955, + "learning_rate": 4.6877174411714186e-07, + "loss": 0.1561, + "num_input_tokens_seen": 201979328, + "step": 64180 + }, + { + "epoch": 4.108891876320338, + "grad_norm": 7.18031644821167, + "learning_rate": 4.6844611542407354e-07, + "loss": 0.1357, + "num_input_tokens_seen": 201994688, + "step": 64185 + }, + { + "epoch": 4.109211958261315, + "grad_norm": 5.056606769561768, + "learning_rate": 4.681205881762249e-07, + "loss": 0.2014, + "num_input_tokens_seen": 202009536, + "step": 64190 + }, + { + "epoch": 4.109532040202292, + "grad_norm": 11.028480529785156, + "learning_rate": 4.677951623898502e-07, + "loss": 0.0846, + "num_input_tokens_seen": 202025600, + "step": 64195 + }, + { + "epoch": 4.109852122143269, + "grad_norm": 10.291851997375488, + "learning_rate": 4.674698380812001e-07, + "loss": 0.187, + "num_input_tokens_seen": 202041664, + "step": 64200 + }, + { + "epoch": 4.110172204084246, + "grad_norm": 5.782900810241699, + "learning_rate": 4.671446152665191e-07, + "loss": 0.1034, + "num_input_tokens_seen": 202057024, + "step": 64205 + }, + { + "epoch": 4.110492286025223, + "grad_norm": 10.192551612854004, + "learning_rate": 4.668194939620471e-07, + "loss": 0.1511, + "num_input_tokens_seen": 202072512, + "step": 64210 + }, + { + "epoch": 4.110812367966199, + "grad_norm": 7.422775745391846, + "learning_rate": 4.664944741840191e-07, + "loss": 0.1393, + "num_input_tokens_seen": 202088704, + "step": 64215 + }, + { + "epoch": 4.111132449907176, + "grad_norm": 6.216917991638184, + "learning_rate": 4.6616955594866507e-07, + "loss": 0.0914, + "num_input_tokens_seen": 202103744, + "step": 64220 + }, + { + "epoch": 4.111452531848153, + "grad_norm": 5.656347751617432, + "learning_rate": 4.65844739272209e-07, + "loss": 0.1151, + "num_input_tokens_seen": 202118528, + "step": 64225 + }, + { + "epoch": 4.11177261378913, + "grad_norm": 9.321889877319336, + "learning_rate": 4.6552002417087236e-07, + "loss": 0.112, + "num_input_tokens_seen": 202133696, + "step": 64230 + }, + { + "epoch": 4.112092695730107, + "grad_norm": 7.956514358520508, + "learning_rate": 4.651954106608672e-07, + "loss": 0.1342, + "num_input_tokens_seen": 202151296, + "step": 64235 + }, + { + "epoch": 4.112412777671084, + "grad_norm": 7.893286228179932, + "learning_rate": 4.6487089875840496e-07, + "loss": 0.0695, + "num_input_tokens_seen": 202167296, + "step": 64240 + }, + { + "epoch": 4.112732859612061, + "grad_norm": 6.544720649719238, + "learning_rate": 4.6454648847968935e-07, + "loss": 0.1241, + "num_input_tokens_seen": 202182272, + "step": 64245 + }, + { + "epoch": 4.113052941553038, + "grad_norm": 6.785724639892578, + "learning_rate": 4.642221798409191e-07, + "loss": 0.1315, + "num_input_tokens_seen": 202198784, + "step": 64250 + }, + { + "epoch": 4.113373023494015, + "grad_norm": 0.4086298644542694, + "learning_rate": 4.6389797285829067e-07, + "loss": 0.0888, + "num_input_tokens_seen": 202213824, + "step": 64255 + }, + { + "epoch": 4.113693105434991, + "grad_norm": 5.997555732727051, + "learning_rate": 4.635738675479906e-07, + "loss": 0.1836, + "num_input_tokens_seen": 202228416, + "step": 64260 + }, + { + "epoch": 4.114013187375968, + "grad_norm": 5.9400811195373535, + "learning_rate": 4.6324986392620533e-07, + "loss": 0.1495, + "num_input_tokens_seen": 202243648, + "step": 64265 + }, + { + "epoch": 4.114333269316945, + "grad_norm": 8.89889907836914, + "learning_rate": 4.629259620091114e-07, + "loss": 0.1623, + "num_input_tokens_seen": 202258624, + "step": 64270 + }, + { + "epoch": 4.114653351257922, + "grad_norm": 5.258963108062744, + "learning_rate": 4.626021618128851e-07, + "loss": 0.1202, + "num_input_tokens_seen": 202273792, + "step": 64275 + }, + { + "epoch": 4.114973433198899, + "grad_norm": 3.978872537612915, + "learning_rate": 4.6227846335369445e-07, + "loss": 0.2068, + "num_input_tokens_seen": 202289280, + "step": 64280 + }, + { + "epoch": 4.115293515139876, + "grad_norm": 14.61378002166748, + "learning_rate": 4.6195486664770307e-07, + "loss": 0.1566, + "num_input_tokens_seen": 202304448, + "step": 64285 + }, + { + "epoch": 4.115613597080853, + "grad_norm": 6.341430187225342, + "learning_rate": 4.6163137171106985e-07, + "loss": 0.147, + "num_input_tokens_seen": 202319808, + "step": 64290 + }, + { + "epoch": 4.11593367902183, + "grad_norm": 5.738806247711182, + "learning_rate": 4.613079785599486e-07, + "loss": 0.1479, + "num_input_tokens_seen": 202335360, + "step": 64295 + }, + { + "epoch": 4.116253760962806, + "grad_norm": 8.914966583251953, + "learning_rate": 4.609846872104876e-07, + "loss": 0.1095, + "num_input_tokens_seen": 202351488, + "step": 64300 + }, + { + "epoch": 4.116573842903783, + "grad_norm": 5.37955904006958, + "learning_rate": 4.6066149767882986e-07, + "loss": 0.107, + "num_input_tokens_seen": 202366592, + "step": 64305 + }, + { + "epoch": 4.11689392484476, + "grad_norm": 4.975404739379883, + "learning_rate": 4.603384099811151e-07, + "loss": 0.1555, + "num_input_tokens_seen": 202381760, + "step": 64310 + }, + { + "epoch": 4.117214006785737, + "grad_norm": 8.30335807800293, + "learning_rate": 4.600154241334759e-07, + "loss": 0.1152, + "num_input_tokens_seen": 202397696, + "step": 64315 + }, + { + "epoch": 4.117534088726714, + "grad_norm": 5.856732368469238, + "learning_rate": 4.596925401520405e-07, + "loss": 0.1218, + "num_input_tokens_seen": 202412608, + "step": 64320 + }, + { + "epoch": 4.117854170667691, + "grad_norm": 8.306800842285156, + "learning_rate": 4.5936975805293213e-07, + "loss": 0.1414, + "num_input_tokens_seen": 202429248, + "step": 64325 + }, + { + "epoch": 4.118174252608668, + "grad_norm": 7.704750061035156, + "learning_rate": 4.590470778522688e-07, + "loss": 0.1261, + "num_input_tokens_seen": 202445568, + "step": 64330 + }, + { + "epoch": 4.118494334549645, + "grad_norm": 3.596156120300293, + "learning_rate": 4.58724499566163e-07, + "loss": 0.1538, + "num_input_tokens_seen": 202461568, + "step": 64335 + }, + { + "epoch": 4.118814416490622, + "grad_norm": 5.88538932800293, + "learning_rate": 4.584020232107239e-07, + "loss": 0.1237, + "num_input_tokens_seen": 202476800, + "step": 64340 + }, + { + "epoch": 4.119134498431598, + "grad_norm": 9.162357330322266, + "learning_rate": 4.580796488020525e-07, + "loss": 0.1777, + "num_input_tokens_seen": 202492416, + "step": 64345 + }, + { + "epoch": 4.119454580372575, + "grad_norm": 8.24802303314209, + "learning_rate": 4.577573763562487e-07, + "loss": 0.1355, + "num_input_tokens_seen": 202507648, + "step": 64350 + }, + { + "epoch": 4.119774662313552, + "grad_norm": 7.839282989501953, + "learning_rate": 4.574352058894027e-07, + "loss": 0.0978, + "num_input_tokens_seen": 202523328, + "step": 64355 + }, + { + "epoch": 4.120094744254529, + "grad_norm": 6.798035144805908, + "learning_rate": 4.5711313741760354e-07, + "loss": 0.0897, + "num_input_tokens_seen": 202540736, + "step": 64360 + }, + { + "epoch": 4.120414826195506, + "grad_norm": 6.4673004150390625, + "learning_rate": 4.5679117095693363e-07, + "loss": 0.169, + "num_input_tokens_seen": 202556160, + "step": 64365 + }, + { + "epoch": 4.120734908136483, + "grad_norm": 8.152679443359375, + "learning_rate": 4.5646930652346904e-07, + "loss": 0.166, + "num_input_tokens_seen": 202571072, + "step": 64370 + }, + { + "epoch": 4.12105499007746, + "grad_norm": 8.43988037109375, + "learning_rate": 4.561475441332844e-07, + "loss": 0.1584, + "num_input_tokens_seen": 202586048, + "step": 64375 + }, + { + "epoch": 4.121375072018437, + "grad_norm": 6.888323783874512, + "learning_rate": 4.558258838024437e-07, + "loss": 0.1511, + "num_input_tokens_seen": 202601408, + "step": 64380 + }, + { + "epoch": 4.121695153959414, + "grad_norm": 2.4181530475616455, + "learning_rate": 4.555043255470118e-07, + "loss": 0.0973, + "num_input_tokens_seen": 202617920, + "step": 64385 + }, + { + "epoch": 4.12201523590039, + "grad_norm": 8.222871780395508, + "learning_rate": 4.551828693830443e-07, + "loss": 0.1305, + "num_input_tokens_seen": 202634176, + "step": 64390 + }, + { + "epoch": 4.122335317841367, + "grad_norm": 5.854419708251953, + "learning_rate": 4.548615153265931e-07, + "loss": 0.1261, + "num_input_tokens_seen": 202650432, + "step": 64395 + }, + { + "epoch": 4.122655399782344, + "grad_norm": 9.208224296569824, + "learning_rate": 4.5454026339370514e-07, + "loss": 0.132, + "num_input_tokens_seen": 202665280, + "step": 64400 + }, + { + "epoch": 4.122975481723321, + "grad_norm": 10.366499900817871, + "learning_rate": 4.54219113600422e-07, + "loss": 0.1161, + "num_input_tokens_seen": 202681472, + "step": 64405 + }, + { + "epoch": 4.123295563664298, + "grad_norm": 8.45064640045166, + "learning_rate": 4.5389806596278035e-07, + "loss": 0.1338, + "num_input_tokens_seen": 202697088, + "step": 64410 + }, + { + "epoch": 4.123615645605275, + "grad_norm": 10.192599296569824, + "learning_rate": 4.5357712049681145e-07, + "loss": 0.1066, + "num_input_tokens_seen": 202713536, + "step": 64415 + }, + { + "epoch": 4.123935727546252, + "grad_norm": 7.316083908081055, + "learning_rate": 4.5325627721854114e-07, + "loss": 0.1069, + "num_input_tokens_seen": 202728512, + "step": 64420 + }, + { + "epoch": 4.124255809487229, + "grad_norm": 5.12086820602417, + "learning_rate": 4.529355361439919e-07, + "loss": 0.1338, + "num_input_tokens_seen": 202744256, + "step": 64425 + }, + { + "epoch": 4.124575891428206, + "grad_norm": 9.340728759765625, + "learning_rate": 4.5261489728917917e-07, + "loss": 0.1358, + "num_input_tokens_seen": 202759424, + "step": 64430 + }, + { + "epoch": 4.124895973369182, + "grad_norm": 6.101383209228516, + "learning_rate": 4.522943606701141e-07, + "loss": 0.1301, + "num_input_tokens_seen": 202774784, + "step": 64435 + }, + { + "epoch": 4.125216055310159, + "grad_norm": 5.047685623168945, + "learning_rate": 4.519739263028025e-07, + "loss": 0.0963, + "num_input_tokens_seen": 202793536, + "step": 64440 + }, + { + "epoch": 4.125536137251136, + "grad_norm": 5.234762668609619, + "learning_rate": 4.516535942032452e-07, + "loss": 0.1323, + "num_input_tokens_seen": 202809536, + "step": 64445 + }, + { + "epoch": 4.125856219192113, + "grad_norm": 4.574209690093994, + "learning_rate": 4.5133336438743793e-07, + "loss": 0.1842, + "num_input_tokens_seen": 202825408, + "step": 64450 + }, + { + "epoch": 4.12617630113309, + "grad_norm": 7.093040466308594, + "learning_rate": 4.5101323687137074e-07, + "loss": 0.0924, + "num_input_tokens_seen": 202840448, + "step": 64455 + }, + { + "epoch": 4.126496383074067, + "grad_norm": 7.543084621429443, + "learning_rate": 4.5069321167103106e-07, + "loss": 0.1397, + "num_input_tokens_seen": 202856064, + "step": 64460 + }, + { + "epoch": 4.126816465015044, + "grad_norm": 9.838717460632324, + "learning_rate": 4.503732888023968e-07, + "loss": 0.1081, + "num_input_tokens_seen": 202871936, + "step": 64465 + }, + { + "epoch": 4.127136546956021, + "grad_norm": 12.101775169372559, + "learning_rate": 4.5005346828144477e-07, + "loss": 0.1444, + "num_input_tokens_seen": 202887232, + "step": 64470 + }, + { + "epoch": 4.127456628896998, + "grad_norm": 12.257368087768555, + "learning_rate": 4.4973375012414474e-07, + "loss": 0.1141, + "num_input_tokens_seen": 202902528, + "step": 64475 + }, + { + "epoch": 4.127776710837974, + "grad_norm": 6.710676193237305, + "learning_rate": 4.4941413434646116e-07, + "loss": 0.1175, + "num_input_tokens_seen": 202917632, + "step": 64480 + }, + { + "epoch": 4.128096792778951, + "grad_norm": 8.253647804260254, + "learning_rate": 4.4909462096435594e-07, + "loss": 0.1459, + "num_input_tokens_seen": 202933376, + "step": 64485 + }, + { + "epoch": 4.128416874719928, + "grad_norm": 7.905727863311768, + "learning_rate": 4.4877520999378115e-07, + "loss": 0.1393, + "num_input_tokens_seen": 202949056, + "step": 64490 + }, + { + "epoch": 4.128736956660905, + "grad_norm": 5.415424823760986, + "learning_rate": 4.484559014506895e-07, + "loss": 0.0822, + "num_input_tokens_seen": 202964736, + "step": 64495 + }, + { + "epoch": 4.129057038601882, + "grad_norm": 5.61149787902832, + "learning_rate": 4.4813669535102243e-07, + "loss": 0.1429, + "num_input_tokens_seen": 202979776, + "step": 64500 + }, + { + "epoch": 4.129377120542859, + "grad_norm": 8.02567195892334, + "learning_rate": 4.4781759171072194e-07, + "loss": 0.1544, + "num_input_tokens_seen": 202996096, + "step": 64505 + }, + { + "epoch": 4.129697202483836, + "grad_norm": 4.626505374908447, + "learning_rate": 4.4749859054572133e-07, + "loss": 0.1191, + "num_input_tokens_seen": 203011136, + "step": 64510 + }, + { + "epoch": 4.130017284424813, + "grad_norm": 8.771536827087402, + "learning_rate": 4.471796918719501e-07, + "loss": 0.0833, + "num_input_tokens_seen": 203026816, + "step": 64515 + }, + { + "epoch": 4.13033736636579, + "grad_norm": 8.660917282104492, + "learning_rate": 4.4686089570533247e-07, + "loss": 0.1392, + "num_input_tokens_seen": 203043456, + "step": 64520 + }, + { + "epoch": 4.130657448306766, + "grad_norm": 5.964465618133545, + "learning_rate": 4.465422020617871e-07, + "loss": 0.1316, + "num_input_tokens_seen": 203059456, + "step": 64525 + }, + { + "epoch": 4.130977530247743, + "grad_norm": 4.65256404876709, + "learning_rate": 4.462236109572279e-07, + "loss": 0.0822, + "num_input_tokens_seen": 203075328, + "step": 64530 + }, + { + "epoch": 4.13129761218872, + "grad_norm": 3.1193559169769287, + "learning_rate": 4.4590512240756334e-07, + "loss": 0.1321, + "num_input_tokens_seen": 203091136, + "step": 64535 + }, + { + "epoch": 4.131617694129697, + "grad_norm": 7.609294891357422, + "learning_rate": 4.455867364286984e-07, + "loss": 0.1423, + "num_input_tokens_seen": 203106432, + "step": 64540 + }, + { + "epoch": 4.131937776070674, + "grad_norm": 6.1045427322387695, + "learning_rate": 4.452684530365306e-07, + "loss": 0.1326, + "num_input_tokens_seen": 203121600, + "step": 64545 + }, + { + "epoch": 4.132257858011651, + "grad_norm": 8.066226959228516, + "learning_rate": 4.449502722469537e-07, + "loss": 0.1625, + "num_input_tokens_seen": 203150528, + "step": 64550 + }, + { + "epoch": 4.132577939952628, + "grad_norm": 8.309831619262695, + "learning_rate": 4.4463219407585577e-07, + "loss": 0.1392, + "num_input_tokens_seen": 203166848, + "step": 64555 + }, + { + "epoch": 4.132898021893605, + "grad_norm": 5.422299385070801, + "learning_rate": 4.4431421853912e-07, + "loss": 0.1145, + "num_input_tokens_seen": 203182336, + "step": 64560 + }, + { + "epoch": 4.133218103834581, + "grad_norm": 9.264963150024414, + "learning_rate": 4.439963456526239e-07, + "loss": 0.1261, + "num_input_tokens_seen": 203197504, + "step": 64565 + }, + { + "epoch": 4.133538185775558, + "grad_norm": 9.160274505615234, + "learning_rate": 4.436785754322423e-07, + "loss": 0.0983, + "num_input_tokens_seen": 203212032, + "step": 64570 + }, + { + "epoch": 4.133858267716535, + "grad_norm": 8.655463218688965, + "learning_rate": 4.433609078938403e-07, + "loss": 0.1492, + "num_input_tokens_seen": 203227520, + "step": 64575 + }, + { + "epoch": 4.134178349657512, + "grad_norm": 8.177088737487793, + "learning_rate": 4.4304334305328296e-07, + "loss": 0.1942, + "num_input_tokens_seen": 203242432, + "step": 64580 + }, + { + "epoch": 4.134498431598489, + "grad_norm": 8.06353759765625, + "learning_rate": 4.4272588092642596e-07, + "loss": 0.142, + "num_input_tokens_seen": 203257984, + "step": 64585 + }, + { + "epoch": 4.134818513539466, + "grad_norm": 4.903648853302002, + "learning_rate": 4.4240852152912267e-07, + "loss": 0.1, + "num_input_tokens_seen": 203272640, + "step": 64590 + }, + { + "epoch": 4.135138595480443, + "grad_norm": 5.384436130523682, + "learning_rate": 4.420912648772205e-07, + "loss": 0.0801, + "num_input_tokens_seen": 203287680, + "step": 64595 + }, + { + "epoch": 4.13545867742142, + "grad_norm": 6.577602863311768, + "learning_rate": 4.417741109865606e-07, + "loss": 0.1625, + "num_input_tokens_seen": 203303616, + "step": 64600 + }, + { + "epoch": 4.135778759362397, + "grad_norm": 4.728972434997559, + "learning_rate": 4.4145705987298197e-07, + "loss": 0.1081, + "num_input_tokens_seen": 203318784, + "step": 64605 + }, + { + "epoch": 4.136098841303373, + "grad_norm": 7.489706039428711, + "learning_rate": 4.411401115523142e-07, + "loss": 0.0931, + "num_input_tokens_seen": 203334848, + "step": 64610 + }, + { + "epoch": 4.13641892324435, + "grad_norm": 11.820605278015137, + "learning_rate": 4.4082326604038525e-07, + "loss": 0.1182, + "num_input_tokens_seen": 203349696, + "step": 64615 + }, + { + "epoch": 4.136739005185327, + "grad_norm": 5.498492240905762, + "learning_rate": 4.4050652335301676e-07, + "loss": 0.1232, + "num_input_tokens_seen": 203365376, + "step": 64620 + }, + { + "epoch": 4.137059087126304, + "grad_norm": 7.15424919128418, + "learning_rate": 4.40189883506025e-07, + "loss": 0.1279, + "num_input_tokens_seen": 203381376, + "step": 64625 + }, + { + "epoch": 4.137379169067281, + "grad_norm": 8.572880744934082, + "learning_rate": 4.3987334651522134e-07, + "loss": 0.1339, + "num_input_tokens_seen": 203398528, + "step": 64630 + }, + { + "epoch": 4.137699251008258, + "grad_norm": 2.171250343322754, + "learning_rate": 4.395569123964119e-07, + "loss": 0.1897, + "num_input_tokens_seen": 203414784, + "step": 64635 + }, + { + "epoch": 4.138019332949235, + "grad_norm": 33.961822509765625, + "learning_rate": 4.392405811653977e-07, + "loss": 0.17, + "num_input_tokens_seen": 203431168, + "step": 64640 + }, + { + "epoch": 4.138339414890212, + "grad_norm": 7.317293167114258, + "learning_rate": 4.389243528379747e-07, + "loss": 0.1441, + "num_input_tokens_seen": 203446144, + "step": 64645 + }, + { + "epoch": 4.138659496831189, + "grad_norm": 6.206246852874756, + "learning_rate": 4.386082274299333e-07, + "loss": 0.1901, + "num_input_tokens_seen": 203460992, + "step": 64650 + }, + { + "epoch": 4.138979578772165, + "grad_norm": 7.162794589996338, + "learning_rate": 4.382922049570604e-07, + "loss": 0.1195, + "num_input_tokens_seen": 203476608, + "step": 64655 + }, + { + "epoch": 4.139299660713142, + "grad_norm": 10.033892631530762, + "learning_rate": 4.3797628543513547e-07, + "loss": 0.1279, + "num_input_tokens_seen": 203492416, + "step": 64660 + }, + { + "epoch": 4.139619742654119, + "grad_norm": 5.336422443389893, + "learning_rate": 4.376604688799344e-07, + "loss": 0.154, + "num_input_tokens_seen": 203508160, + "step": 64665 + }, + { + "epoch": 4.139939824595096, + "grad_norm": 14.454167366027832, + "learning_rate": 4.3734475530722686e-07, + "loss": 0.1593, + "num_input_tokens_seen": 203524800, + "step": 64670 + }, + { + "epoch": 4.140259906536073, + "grad_norm": 5.4836344718933105, + "learning_rate": 4.370291447327785e-07, + "loss": 0.1384, + "num_input_tokens_seen": 203541952, + "step": 64675 + }, + { + "epoch": 4.14057998847705, + "grad_norm": 6.725831508636475, + "learning_rate": 4.3671363717234877e-07, + "loss": 0.156, + "num_input_tokens_seen": 203558400, + "step": 64680 + }, + { + "epoch": 4.140900070418027, + "grad_norm": 5.648711204528809, + "learning_rate": 4.363982326416924e-07, + "loss": 0.1089, + "num_input_tokens_seen": 203586816, + "step": 64685 + }, + { + "epoch": 4.141220152359004, + "grad_norm": 19.24805450439453, + "learning_rate": 4.3608293115656066e-07, + "loss": 0.1465, + "num_input_tokens_seen": 203602304, + "step": 64690 + }, + { + "epoch": 4.141540234299981, + "grad_norm": 10.382246971130371, + "learning_rate": 4.3576773273269533e-07, + "loss": 0.1297, + "num_input_tokens_seen": 203617216, + "step": 64695 + }, + { + "epoch": 4.141860316240957, + "grad_norm": 3.059929370880127, + "learning_rate": 4.3545263738583784e-07, + "loss": 0.1259, + "num_input_tokens_seen": 203632832, + "step": 64700 + }, + { + "epoch": 4.142180398181934, + "grad_norm": 4.621263027191162, + "learning_rate": 4.3513764513172183e-07, + "loss": 0.1186, + "num_input_tokens_seen": 203648256, + "step": 64705 + }, + { + "epoch": 4.142500480122911, + "grad_norm": 4.6872735023498535, + "learning_rate": 4.3482275598607596e-07, + "loss": 0.1011, + "num_input_tokens_seen": 203664448, + "step": 64710 + }, + { + "epoch": 4.142820562063888, + "grad_norm": 6.460956573486328, + "learning_rate": 4.3450796996462567e-07, + "loss": 0.083, + "num_input_tokens_seen": 203681472, + "step": 64715 + }, + { + "epoch": 4.143140644004865, + "grad_norm": 6.000147342681885, + "learning_rate": 4.3419328708308727e-07, + "loss": 0.1433, + "num_input_tokens_seen": 203698240, + "step": 64720 + }, + { + "epoch": 4.143460725945842, + "grad_norm": 9.804975509643555, + "learning_rate": 4.338787073571768e-07, + "loss": 0.1355, + "num_input_tokens_seen": 203714304, + "step": 64725 + }, + { + "epoch": 4.143780807886819, + "grad_norm": 12.227949142456055, + "learning_rate": 4.3356423080260085e-07, + "loss": 0.1808, + "num_input_tokens_seen": 203729728, + "step": 64730 + }, + { + "epoch": 4.144100889827796, + "grad_norm": 7.299482822418213, + "learning_rate": 4.332498574350638e-07, + "loss": 0.1245, + "num_input_tokens_seen": 203745792, + "step": 64735 + }, + { + "epoch": 4.144420971768773, + "grad_norm": 6.429621696472168, + "learning_rate": 4.329355872702637e-07, + "loss": 0.131, + "num_input_tokens_seen": 203761216, + "step": 64740 + }, + { + "epoch": 4.144741053709749, + "grad_norm": 7.543949127197266, + "learning_rate": 4.3262142032389347e-07, + "loss": 0.0968, + "num_input_tokens_seen": 203776640, + "step": 64745 + }, + { + "epoch": 4.145061135650726, + "grad_norm": 7.2457475662231445, + "learning_rate": 4.3230735661164093e-07, + "loss": 0.1456, + "num_input_tokens_seen": 203792640, + "step": 64750 + }, + { + "epoch": 4.145381217591703, + "grad_norm": 6.649298191070557, + "learning_rate": 4.3199339614918874e-07, + "loss": 0.1417, + "num_input_tokens_seen": 203808128, + "step": 64755 + }, + { + "epoch": 4.14570129953268, + "grad_norm": 4.617276668548584, + "learning_rate": 4.316795389522138e-07, + "loss": 0.1232, + "num_input_tokens_seen": 203824704, + "step": 64760 + }, + { + "epoch": 4.146021381473657, + "grad_norm": 5.741948127746582, + "learning_rate": 4.313657850363903e-07, + "loss": 0.1134, + "num_input_tokens_seen": 203838784, + "step": 64765 + }, + { + "epoch": 4.146341463414634, + "grad_norm": 5.253041744232178, + "learning_rate": 4.310521344173835e-07, + "loss": 0.117, + "num_input_tokens_seen": 203855616, + "step": 64770 + }, + { + "epoch": 4.146661545355611, + "grad_norm": 7.2811737060546875, + "learning_rate": 4.3073858711085656e-07, + "loss": 0.1809, + "num_input_tokens_seen": 203870912, + "step": 64775 + }, + { + "epoch": 4.146981627296588, + "grad_norm": 5.2745866775512695, + "learning_rate": 4.3042514313246656e-07, + "loss": 0.1523, + "num_input_tokens_seen": 203887360, + "step": 64780 + }, + { + "epoch": 4.147301709237565, + "grad_norm": 4.935412406921387, + "learning_rate": 4.3011180249786485e-07, + "loss": 0.153, + "num_input_tokens_seen": 203902272, + "step": 64785 + }, + { + "epoch": 4.147621791178541, + "grad_norm": 9.247057914733887, + "learning_rate": 4.2979856522269796e-07, + "loss": 0.1738, + "num_input_tokens_seen": 203917632, + "step": 64790 + }, + { + "epoch": 4.147941873119518, + "grad_norm": 3.4771335124969482, + "learning_rate": 4.2948543132260686e-07, + "loss": 0.1056, + "num_input_tokens_seen": 203932416, + "step": 64795 + }, + { + "epoch": 4.148261955060495, + "grad_norm": 7.737612724304199, + "learning_rate": 4.291724008132295e-07, + "loss": 0.1178, + "num_input_tokens_seen": 203946816, + "step": 64800 + }, + { + "epoch": 4.148582037001472, + "grad_norm": 5.26614236831665, + "learning_rate": 4.2885947371019476e-07, + "loss": 0.1153, + "num_input_tokens_seen": 203963136, + "step": 64805 + }, + { + "epoch": 4.148902118942449, + "grad_norm": 7.270917892456055, + "learning_rate": 4.2854665002913103e-07, + "loss": 0.1212, + "num_input_tokens_seen": 203978496, + "step": 64810 + }, + { + "epoch": 4.149222200883426, + "grad_norm": 11.170479774475098, + "learning_rate": 4.282339297856564e-07, + "loss": 0.1427, + "num_input_tokens_seen": 203994368, + "step": 64815 + }, + { + "epoch": 4.149542282824403, + "grad_norm": 3.4648985862731934, + "learning_rate": 4.279213129953885e-07, + "loss": 0.1349, + "num_input_tokens_seen": 204010112, + "step": 64820 + }, + { + "epoch": 4.14986236476538, + "grad_norm": 4.9487128257751465, + "learning_rate": 4.276087996739375e-07, + "loss": 0.1063, + "num_input_tokens_seen": 204026048, + "step": 64825 + }, + { + "epoch": 4.150182446706356, + "grad_norm": 6.395337104797363, + "learning_rate": 4.272963898369073e-07, + "loss": 0.1247, + "num_input_tokens_seen": 204040896, + "step": 64830 + }, + { + "epoch": 4.150502528647333, + "grad_norm": 5.406057834625244, + "learning_rate": 4.269840834999006e-07, + "loss": 0.1024, + "num_input_tokens_seen": 204056704, + "step": 64835 + }, + { + "epoch": 4.15082261058831, + "grad_norm": 7.827709674835205, + "learning_rate": 4.2667188067850943e-07, + "loss": 0.1437, + "num_input_tokens_seen": 204072832, + "step": 64840 + }, + { + "epoch": 4.151142692529287, + "grad_norm": 18.274723052978516, + "learning_rate": 4.2635978138832576e-07, + "loss": 0.1359, + "num_input_tokens_seen": 204089152, + "step": 64845 + }, + { + "epoch": 4.151462774470264, + "grad_norm": 3.4492132663726807, + "learning_rate": 4.2604778564493326e-07, + "loss": 0.0895, + "num_input_tokens_seen": 204105600, + "step": 64850 + }, + { + "epoch": 4.151782856411241, + "grad_norm": 6.174066543579102, + "learning_rate": 4.2573589346391166e-07, + "loss": 0.1332, + "num_input_tokens_seen": 204123392, + "step": 64855 + }, + { + "epoch": 4.152102938352218, + "grad_norm": 10.934636116027832, + "learning_rate": 4.2542410486083484e-07, + "loss": 0.133, + "num_input_tokens_seen": 204139008, + "step": 64860 + }, + { + "epoch": 4.152423020293195, + "grad_norm": 12.135160446166992, + "learning_rate": 4.2511241985127236e-07, + "loss": 0.187, + "num_input_tokens_seen": 204154368, + "step": 64865 + }, + { + "epoch": 4.152743102234172, + "grad_norm": 5.261432647705078, + "learning_rate": 4.248008384507879e-07, + "loss": 0.0906, + "num_input_tokens_seen": 204169792, + "step": 64870 + }, + { + "epoch": 4.153063184175148, + "grad_norm": 4.414053440093994, + "learning_rate": 4.2448936067494033e-07, + "loss": 0.1672, + "num_input_tokens_seen": 204185088, + "step": 64875 + }, + { + "epoch": 4.153383266116125, + "grad_norm": 10.005860328674316, + "learning_rate": 4.241779865392823e-07, + "loss": 0.143, + "num_input_tokens_seen": 204200576, + "step": 64880 + }, + { + "epoch": 4.153703348057102, + "grad_norm": 5.049037933349609, + "learning_rate": 4.2386671605936377e-07, + "loss": 0.1264, + "num_input_tokens_seen": 204216768, + "step": 64885 + }, + { + "epoch": 4.154023429998079, + "grad_norm": 7.040349960327148, + "learning_rate": 4.2355554925072733e-07, + "loss": 0.1559, + "num_input_tokens_seen": 204231488, + "step": 64890 + }, + { + "epoch": 4.154343511939056, + "grad_norm": 4.050569534301758, + "learning_rate": 4.232444861289109e-07, + "loss": 0.1119, + "num_input_tokens_seen": 204246592, + "step": 64895 + }, + { + "epoch": 4.154663593880033, + "grad_norm": 4.416751861572266, + "learning_rate": 4.2293352670944755e-07, + "loss": 0.1392, + "num_input_tokens_seen": 204262656, + "step": 64900 + }, + { + "epoch": 4.15498367582101, + "grad_norm": 4.7634596824646, + "learning_rate": 4.226226710078646e-07, + "loss": 0.1221, + "num_input_tokens_seen": 204278592, + "step": 64905 + }, + { + "epoch": 4.155303757761987, + "grad_norm": 5.634028434753418, + "learning_rate": 4.2231191903968487e-07, + "loss": 0.1017, + "num_input_tokens_seen": 204295360, + "step": 64910 + }, + { + "epoch": 4.155623839702964, + "grad_norm": 2.987914562225342, + "learning_rate": 4.220012708204252e-07, + "loss": 0.0691, + "num_input_tokens_seen": 204310336, + "step": 64915 + }, + { + "epoch": 4.15594392164394, + "grad_norm": 8.094100952148438, + "learning_rate": 4.216907263655992e-07, + "loss": 0.0884, + "num_input_tokens_seen": 204330432, + "step": 64920 + }, + { + "epoch": 4.156264003584917, + "grad_norm": 12.026028633117676, + "learning_rate": 4.213802856907115e-07, + "loss": 0.093, + "num_input_tokens_seen": 204347264, + "step": 64925 + }, + { + "epoch": 4.156584085525894, + "grad_norm": 6.207912921905518, + "learning_rate": 4.21069948811266e-07, + "loss": 0.1339, + "num_input_tokens_seen": 204361920, + "step": 64930 + }, + { + "epoch": 4.156904167466871, + "grad_norm": 9.043725967407227, + "learning_rate": 4.207597157427584e-07, + "loss": 0.1068, + "num_input_tokens_seen": 204376576, + "step": 64935 + }, + { + "epoch": 4.157224249407848, + "grad_norm": 8.208356857299805, + "learning_rate": 4.2044958650068024e-07, + "loss": 0.1804, + "num_input_tokens_seen": 204392640, + "step": 64940 + }, + { + "epoch": 4.157544331348825, + "grad_norm": 7.2613606452941895, + "learning_rate": 4.2013956110051766e-07, + "loss": 0.1378, + "num_input_tokens_seen": 204409216, + "step": 64945 + }, + { + "epoch": 4.157864413289802, + "grad_norm": 3.547797203063965, + "learning_rate": 4.1982963955775143e-07, + "loss": 0.1157, + "num_input_tokens_seen": 204424448, + "step": 64950 + }, + { + "epoch": 4.158184495230779, + "grad_norm": 4.094151973724365, + "learning_rate": 4.195198218878588e-07, + "loss": 0.0977, + "num_input_tokens_seen": 204439360, + "step": 64955 + }, + { + "epoch": 4.158504577171756, + "grad_norm": 5.771692752838135, + "learning_rate": 4.192101081063082e-07, + "loss": 0.1166, + "num_input_tokens_seen": 204454208, + "step": 64960 + }, + { + "epoch": 4.1588246591127325, + "grad_norm": 6.248169422149658, + "learning_rate": 4.1890049822856716e-07, + "loss": 0.1392, + "num_input_tokens_seen": 204470208, + "step": 64965 + }, + { + "epoch": 4.1591447410537095, + "grad_norm": 6.672712802886963, + "learning_rate": 4.185909922700951e-07, + "loss": 0.1047, + "num_input_tokens_seen": 204485312, + "step": 64970 + }, + { + "epoch": 4.1594648229946865, + "grad_norm": 5.553216934204102, + "learning_rate": 4.182815902463472e-07, + "loss": 0.127, + "num_input_tokens_seen": 204501440, + "step": 64975 + }, + { + "epoch": 4.159784904935663, + "grad_norm": 5.347324371337891, + "learning_rate": 4.179722921727736e-07, + "loss": 0.0809, + "num_input_tokens_seen": 204515904, + "step": 64980 + }, + { + "epoch": 4.16010498687664, + "grad_norm": 6.981020450592041, + "learning_rate": 4.1766309806481857e-07, + "loss": 0.1538, + "num_input_tokens_seen": 204531328, + "step": 64985 + }, + { + "epoch": 4.160425068817617, + "grad_norm": 6.211248397827148, + "learning_rate": 4.1735400793792153e-07, + "loss": 0.1203, + "num_input_tokens_seen": 204546560, + "step": 64990 + }, + { + "epoch": 4.160745150758594, + "grad_norm": 5.114505767822266, + "learning_rate": 4.170450218075181e-07, + "loss": 0.1002, + "num_input_tokens_seen": 204562432, + "step": 64995 + }, + { + "epoch": 4.161065232699571, + "grad_norm": 4.6217546463012695, + "learning_rate": 4.167361396890357e-07, + "loss": 0.1078, + "num_input_tokens_seen": 204577856, + "step": 65000 + }, + { + "epoch": 4.161385314640548, + "grad_norm": 4.974288463592529, + "learning_rate": 4.1642736159789974e-07, + "loss": 0.1839, + "num_input_tokens_seen": 204593984, + "step": 65005 + }, + { + "epoch": 4.1617053965815245, + "grad_norm": 8.560049057006836, + "learning_rate": 4.1611868754952824e-07, + "loss": 0.2009, + "num_input_tokens_seen": 204609152, + "step": 65010 + }, + { + "epoch": 4.1620254785225015, + "grad_norm": 6.036561012268066, + "learning_rate": 4.158101175593349e-07, + "loss": 0.0927, + "num_input_tokens_seen": 204624896, + "step": 65015 + }, + { + "epoch": 4.1623455604634785, + "grad_norm": 9.369812965393066, + "learning_rate": 4.155016516427285e-07, + "loss": 0.1504, + "num_input_tokens_seen": 204640576, + "step": 65020 + }, + { + "epoch": 4.1626656424044555, + "grad_norm": 9.265480041503906, + "learning_rate": 4.1519328981511094e-07, + "loss": 0.1276, + "num_input_tokens_seen": 204657024, + "step": 65025 + }, + { + "epoch": 4.1629857243454325, + "grad_norm": 4.390053749084473, + "learning_rate": 4.1488503209188233e-07, + "loss": 0.0998, + "num_input_tokens_seen": 204672576, + "step": 65030 + }, + { + "epoch": 4.1633058062864094, + "grad_norm": 8.05117416381836, + "learning_rate": 4.14576878488433e-07, + "loss": 0.146, + "num_input_tokens_seen": 204688128, + "step": 65035 + }, + { + "epoch": 4.163625888227386, + "grad_norm": 6.919520378112793, + "learning_rate": 4.1426882902015325e-07, + "loss": 0.1338, + "num_input_tokens_seen": 204705472, + "step": 65040 + }, + { + "epoch": 4.163945970168363, + "grad_norm": 3.4324560165405273, + "learning_rate": 4.1396088370242257e-07, + "loss": 0.1259, + "num_input_tokens_seen": 204721984, + "step": 65045 + }, + { + "epoch": 4.16426605210934, + "grad_norm": 5.224032402038574, + "learning_rate": 4.136530425506202e-07, + "loss": 0.146, + "num_input_tokens_seen": 204738816, + "step": 65050 + }, + { + "epoch": 4.1645861340503165, + "grad_norm": 5.979124546051025, + "learning_rate": 4.133453055801176e-07, + "loss": 0.119, + "num_input_tokens_seen": 204753856, + "step": 65055 + }, + { + "epoch": 4.1649062159912935, + "grad_norm": 7.344429016113281, + "learning_rate": 4.13037672806281e-07, + "loss": 0.1108, + "num_input_tokens_seen": 204768960, + "step": 65060 + }, + { + "epoch": 4.1652262979322705, + "grad_norm": 3.9745049476623535, + "learning_rate": 4.1273014424447336e-07, + "loss": 0.0814, + "num_input_tokens_seen": 204784384, + "step": 65065 + }, + { + "epoch": 4.1655463798732475, + "grad_norm": 9.023301124572754, + "learning_rate": 4.124227199100489e-07, + "loss": 0.0996, + "num_input_tokens_seen": 204800576, + "step": 65070 + }, + { + "epoch": 4.1658664618142245, + "grad_norm": 6.294254779815674, + "learning_rate": 4.121153998183608e-07, + "loss": 0.0984, + "num_input_tokens_seen": 204816896, + "step": 65075 + }, + { + "epoch": 4.1661865437552015, + "grad_norm": 5.817291259765625, + "learning_rate": 4.1180818398475405e-07, + "loss": 0.1195, + "num_input_tokens_seen": 204832512, + "step": 65080 + }, + { + "epoch": 4.1665066256961785, + "grad_norm": 7.0718817710876465, + "learning_rate": 4.1150107242456994e-07, + "loss": 0.1197, + "num_input_tokens_seen": 204849920, + "step": 65085 + }, + { + "epoch": 4.1668267076371555, + "grad_norm": 6.484981536865234, + "learning_rate": 4.111940651531432e-07, + "loss": 0.1707, + "num_input_tokens_seen": 204864960, + "step": 65090 + }, + { + "epoch": 4.167146789578132, + "grad_norm": 5.123349189758301, + "learning_rate": 4.1088716218580517e-07, + "loss": 0.1008, + "num_input_tokens_seen": 204880832, + "step": 65095 + }, + { + "epoch": 4.1674668715191086, + "grad_norm": 10.600704193115234, + "learning_rate": 4.1058036353787996e-07, + "loss": 0.1414, + "num_input_tokens_seen": 204896448, + "step": 65100 + }, + { + "epoch": 4.1677869534600855, + "grad_norm": 6.8735809326171875, + "learning_rate": 4.1027366922468836e-07, + "loss": 0.1297, + "num_input_tokens_seen": 204911936, + "step": 65105 + }, + { + "epoch": 4.1681070354010625, + "grad_norm": 5.020514965057373, + "learning_rate": 4.09967079261544e-07, + "loss": 0.1235, + "num_input_tokens_seen": 204928128, + "step": 65110 + }, + { + "epoch": 4.1684271173420395, + "grad_norm": 9.246545791625977, + "learning_rate": 4.096605936637582e-07, + "loss": 0.1563, + "num_input_tokens_seen": 204945984, + "step": 65115 + }, + { + "epoch": 4.1687471992830165, + "grad_norm": 6.357913970947266, + "learning_rate": 4.093542124466332e-07, + "loss": 0.1165, + "num_input_tokens_seen": 204961664, + "step": 65120 + }, + { + "epoch": 4.1690672812239935, + "grad_norm": 6.437119483947754, + "learning_rate": 4.090479356254695e-07, + "loss": 0.1322, + "num_input_tokens_seen": 204978816, + "step": 65125 + }, + { + "epoch": 4.1693873631649705, + "grad_norm": 4.161965370178223, + "learning_rate": 4.0874176321556076e-07, + "loss": 0.1032, + "num_input_tokens_seen": 204994048, + "step": 65130 + }, + { + "epoch": 4.1697074451059475, + "grad_norm": 4.251039028167725, + "learning_rate": 4.084356952321952e-07, + "loss": 0.0664, + "num_input_tokens_seen": 205010240, + "step": 65135 + }, + { + "epoch": 4.170027527046924, + "grad_norm": 5.469411373138428, + "learning_rate": 4.0812973169065655e-07, + "loss": 0.1436, + "num_input_tokens_seen": 205026112, + "step": 65140 + }, + { + "epoch": 4.170347608987901, + "grad_norm": 5.48233699798584, + "learning_rate": 4.0782387260622225e-07, + "loss": 0.1175, + "num_input_tokens_seen": 205041408, + "step": 65145 + }, + { + "epoch": 4.170667690928878, + "grad_norm": 6.895121097564697, + "learning_rate": 4.07518117994167e-07, + "loss": 0.1286, + "num_input_tokens_seen": 205057408, + "step": 65150 + }, + { + "epoch": 4.170987772869855, + "grad_norm": 5.660162925720215, + "learning_rate": 4.0721246786975673e-07, + "loss": 0.1448, + "num_input_tokens_seen": 205073152, + "step": 65155 + }, + { + "epoch": 4.1713078548108316, + "grad_norm": 7.723247528076172, + "learning_rate": 4.069069222482555e-07, + "loss": 0.1165, + "num_input_tokens_seen": 205088384, + "step": 65160 + }, + { + "epoch": 4.1716279367518085, + "grad_norm": 8.269698143005371, + "learning_rate": 4.066014811449198e-07, + "loss": 0.144, + "num_input_tokens_seen": 205104832, + "step": 65165 + }, + { + "epoch": 4.1719480186927855, + "grad_norm": 7.108753681182861, + "learning_rate": 4.0629614457500214e-07, + "loss": 0.1505, + "num_input_tokens_seen": 205120576, + "step": 65170 + }, + { + "epoch": 4.1722681006337625, + "grad_norm": 4.472288131713867, + "learning_rate": 4.059909125537495e-07, + "loss": 0.1021, + "num_input_tokens_seen": 205136128, + "step": 65175 + }, + { + "epoch": 4.1725881825747395, + "grad_norm": 3.230320453643799, + "learning_rate": 4.0568578509640245e-07, + "loss": 0.1506, + "num_input_tokens_seen": 205150912, + "step": 65180 + }, + { + "epoch": 4.172908264515716, + "grad_norm": 7.033930778503418, + "learning_rate": 4.053807622181999e-07, + "loss": 0.1284, + "num_input_tokens_seen": 205167040, + "step": 65185 + }, + { + "epoch": 4.173228346456693, + "grad_norm": 52.03535079956055, + "learning_rate": 4.0507584393437023e-07, + "loss": 0.1668, + "num_input_tokens_seen": 205184320, + "step": 65190 + }, + { + "epoch": 4.17354842839767, + "grad_norm": 9.029613494873047, + "learning_rate": 4.047710302601415e-07, + "loss": 0.136, + "num_input_tokens_seen": 205200192, + "step": 65195 + }, + { + "epoch": 4.173868510338647, + "grad_norm": 7.198744297027588, + "learning_rate": 4.0446632121073387e-07, + "loss": 0.1075, + "num_input_tokens_seen": 205216128, + "step": 65200 + }, + { + "epoch": 4.174188592279624, + "grad_norm": 54.365020751953125, + "learning_rate": 4.0416171680136285e-07, + "loss": 0.1893, + "num_input_tokens_seen": 205234624, + "step": 65205 + }, + { + "epoch": 4.174508674220601, + "grad_norm": 13.560495376586914, + "learning_rate": 4.0385721704723906e-07, + "loss": 0.117, + "num_input_tokens_seen": 205249984, + "step": 65210 + }, + { + "epoch": 4.174828756161578, + "grad_norm": 8.687776565551758, + "learning_rate": 4.035528219635673e-07, + "loss": 0.1858, + "num_input_tokens_seen": 205265152, + "step": 65215 + }, + { + "epoch": 4.1751488381025545, + "grad_norm": 6.296289920806885, + "learning_rate": 4.032485315655471e-07, + "loss": 0.1583, + "num_input_tokens_seen": 205282368, + "step": 65220 + }, + { + "epoch": 4.1754689200435315, + "grad_norm": 6.128037452697754, + "learning_rate": 4.029443458683746e-07, + "loss": 0.1332, + "num_input_tokens_seen": 205297664, + "step": 65225 + }, + { + "epoch": 4.175789001984508, + "grad_norm": 5.020445823669434, + "learning_rate": 4.026402648872374e-07, + "loss": 0.0907, + "num_input_tokens_seen": 205312960, + "step": 65230 + }, + { + "epoch": 4.176109083925485, + "grad_norm": 4.737738609313965, + "learning_rate": 4.023362886373211e-07, + "loss": 0.1385, + "num_input_tokens_seen": 205328448, + "step": 65235 + }, + { + "epoch": 4.176429165866462, + "grad_norm": 162.4182891845703, + "learning_rate": 4.0203241713380414e-07, + "loss": 0.1852, + "num_input_tokens_seen": 205346112, + "step": 65240 + }, + { + "epoch": 4.176749247807439, + "grad_norm": 6.609681606292725, + "learning_rate": 4.017286503918605e-07, + "loss": 0.1236, + "num_input_tokens_seen": 205361664, + "step": 65245 + }, + { + "epoch": 4.177069329748416, + "grad_norm": 4.7112321853637695, + "learning_rate": 4.0142498842665853e-07, + "loss": 0.1978, + "num_input_tokens_seen": 205377152, + "step": 65250 + }, + { + "epoch": 4.177389411689393, + "grad_norm": 4.517832279205322, + "learning_rate": 4.0112143125336093e-07, + "loss": 0.1269, + "num_input_tokens_seen": 205392320, + "step": 65255 + }, + { + "epoch": 4.17770949363037, + "grad_norm": 38.038631439208984, + "learning_rate": 4.0081797888712776e-07, + "loss": 0.212, + "num_input_tokens_seen": 205407296, + "step": 65260 + }, + { + "epoch": 4.178029575571347, + "grad_norm": 6.505274295806885, + "learning_rate": 4.0051463134310906e-07, + "loss": 0.1213, + "num_input_tokens_seen": 205422720, + "step": 65265 + }, + { + "epoch": 4.178349657512324, + "grad_norm": 9.112509727478027, + "learning_rate": 4.00211388636455e-07, + "loss": 0.1097, + "num_input_tokens_seen": 205438656, + "step": 65270 + }, + { + "epoch": 4.1786697394533, + "grad_norm": 4.017698764801025, + "learning_rate": 3.999082507823057e-07, + "loss": 0.1315, + "num_input_tokens_seen": 205453760, + "step": 65275 + }, + { + "epoch": 4.178989821394277, + "grad_norm": 7.87859582901001, + "learning_rate": 3.9960521779580014e-07, + "loss": 0.1704, + "num_input_tokens_seen": 205469056, + "step": 65280 + }, + { + "epoch": 4.179309903335254, + "grad_norm": 7.94537878036499, + "learning_rate": 3.9930228969206925e-07, + "loss": 0.1219, + "num_input_tokens_seen": 205484736, + "step": 65285 + }, + { + "epoch": 4.179629985276231, + "grad_norm": 5.9314117431640625, + "learning_rate": 3.9899946648623983e-07, + "loss": 0.1858, + "num_input_tokens_seen": 205499520, + "step": 65290 + }, + { + "epoch": 4.179950067217208, + "grad_norm": 14.074431419372559, + "learning_rate": 3.986967481934334e-07, + "loss": 0.1224, + "num_input_tokens_seen": 205514304, + "step": 65295 + }, + { + "epoch": 4.180270149158185, + "grad_norm": 6.082620620727539, + "learning_rate": 3.9839413482876565e-07, + "loss": 0.1172, + "num_input_tokens_seen": 205529984, + "step": 65300 + }, + { + "epoch": 4.180590231099162, + "grad_norm": 5.203332424163818, + "learning_rate": 3.9809162640734837e-07, + "loss": 0.1216, + "num_input_tokens_seen": 205545664, + "step": 65305 + }, + { + "epoch": 4.180910313040139, + "grad_norm": 4.808323860168457, + "learning_rate": 3.97789222944287e-07, + "loss": 0.1885, + "num_input_tokens_seen": 205561152, + "step": 65310 + }, + { + "epoch": 4.181230394981116, + "grad_norm": 6.950286865234375, + "learning_rate": 3.974869244546817e-07, + "loss": 0.1337, + "num_input_tokens_seen": 205576512, + "step": 65315 + }, + { + "epoch": 4.181550476922092, + "grad_norm": 7.8831562995910645, + "learning_rate": 3.971847309536275e-07, + "loss": 0.1512, + "num_input_tokens_seen": 205591104, + "step": 65320 + }, + { + "epoch": 4.181870558863069, + "grad_norm": 5.859790325164795, + "learning_rate": 3.96882642456215e-07, + "loss": 0.1086, + "num_input_tokens_seen": 205606464, + "step": 65325 + }, + { + "epoch": 4.182190640804046, + "grad_norm": 7.602869987487793, + "learning_rate": 3.9658065897752867e-07, + "loss": 0.1306, + "num_input_tokens_seen": 205621888, + "step": 65330 + }, + { + "epoch": 4.182510722745023, + "grad_norm": 5.964837551116943, + "learning_rate": 3.962787805326479e-07, + "loss": 0.1259, + "num_input_tokens_seen": 205637440, + "step": 65335 + }, + { + "epoch": 4.182830804686, + "grad_norm": 6.889533042907715, + "learning_rate": 3.959770071366462e-07, + "loss": 0.1316, + "num_input_tokens_seen": 205653440, + "step": 65340 + }, + { + "epoch": 4.183150886626977, + "grad_norm": 4.04403829574585, + "learning_rate": 3.9567533880459446e-07, + "loss": 0.1136, + "num_input_tokens_seen": 205668992, + "step": 65345 + }, + { + "epoch": 4.183470968567954, + "grad_norm": 7.044495582580566, + "learning_rate": 3.953737755515541e-07, + "loss": 0.1497, + "num_input_tokens_seen": 205683904, + "step": 65350 + }, + { + "epoch": 4.183791050508931, + "grad_norm": 5.713382244110107, + "learning_rate": 3.950723173925855e-07, + "loss": 0.1018, + "num_input_tokens_seen": 205699200, + "step": 65355 + }, + { + "epoch": 4.184111132449907, + "grad_norm": 6.2108941078186035, + "learning_rate": 3.9477096434274107e-07, + "loss": 0.098, + "num_input_tokens_seen": 205716096, + "step": 65360 + }, + { + "epoch": 4.184431214390884, + "grad_norm": 7.884764194488525, + "learning_rate": 3.944697164170691e-07, + "loss": 0.1628, + "num_input_tokens_seen": 205732416, + "step": 65365 + }, + { + "epoch": 4.184751296331861, + "grad_norm": 5.4932732582092285, + "learning_rate": 3.941685736306122e-07, + "loss": 0.1253, + "num_input_tokens_seen": 205747520, + "step": 65370 + }, + { + "epoch": 4.185071378272838, + "grad_norm": 6.287684917449951, + "learning_rate": 3.93867535998407e-07, + "loss": 0.1618, + "num_input_tokens_seen": 205764160, + "step": 65375 + }, + { + "epoch": 4.185391460213815, + "grad_norm": 12.012162208557129, + "learning_rate": 3.935666035354882e-07, + "loss": 0.1273, + "num_input_tokens_seen": 205780416, + "step": 65380 + }, + { + "epoch": 4.185711542154792, + "grad_norm": 7.213577747344971, + "learning_rate": 3.932657762568798e-07, + "loss": 0.1622, + "num_input_tokens_seen": 205796288, + "step": 65385 + }, + { + "epoch": 4.186031624095769, + "grad_norm": 11.973565101623535, + "learning_rate": 3.9296505417760535e-07, + "loss": 0.1556, + "num_input_tokens_seen": 205812672, + "step": 65390 + }, + { + "epoch": 4.186351706036746, + "grad_norm": 15.538015365600586, + "learning_rate": 3.926644373126812e-07, + "loss": 0.1712, + "num_input_tokens_seen": 205829952, + "step": 65395 + }, + { + "epoch": 4.186671787977723, + "grad_norm": 4.036314010620117, + "learning_rate": 3.9236392567711835e-07, + "loss": 0.1052, + "num_input_tokens_seen": 205845760, + "step": 65400 + }, + { + "epoch": 4.186991869918699, + "grad_norm": 6.301973819732666, + "learning_rate": 3.9206351928592263e-07, + "loss": 0.1405, + "num_input_tokens_seen": 205862784, + "step": 65405 + }, + { + "epoch": 4.187311951859676, + "grad_norm": 9.556912422180176, + "learning_rate": 3.917632181540945e-07, + "loss": 0.1094, + "num_input_tokens_seen": 205878976, + "step": 65410 + }, + { + "epoch": 4.187632033800653, + "grad_norm": 9.354650497436523, + "learning_rate": 3.9146302229663093e-07, + "loss": 0.1794, + "num_input_tokens_seen": 205894528, + "step": 65415 + }, + { + "epoch": 4.18795211574163, + "grad_norm": 6.394495010375977, + "learning_rate": 3.911629317285198e-07, + "loss": 0.1093, + "num_input_tokens_seen": 205910592, + "step": 65420 + }, + { + "epoch": 4.188272197682607, + "grad_norm": 13.79644775390625, + "learning_rate": 3.9086294646474817e-07, + "loss": 0.1483, + "num_input_tokens_seen": 205926656, + "step": 65425 + }, + { + "epoch": 4.188592279623584, + "grad_norm": 6.331536769866943, + "learning_rate": 3.9056306652029473e-07, + "loss": 0.1685, + "num_input_tokens_seen": 205941952, + "step": 65430 + }, + { + "epoch": 4.188912361564561, + "grad_norm": 9.545050621032715, + "learning_rate": 3.90263291910134e-07, + "loss": 0.1527, + "num_input_tokens_seen": 205958336, + "step": 65435 + }, + { + "epoch": 4.189232443505538, + "grad_norm": 5.203629493713379, + "learning_rate": 3.899636226492354e-07, + "loss": 0.1384, + "num_input_tokens_seen": 205973824, + "step": 65440 + }, + { + "epoch": 4.189552525446515, + "grad_norm": 7.507617950439453, + "learning_rate": 3.896640587525627e-07, + "loss": 0.111, + "num_input_tokens_seen": 205989056, + "step": 65445 + }, + { + "epoch": 4.189872607387491, + "grad_norm": 4.278966426849365, + "learning_rate": 3.893646002350737e-07, + "loss": 0.0883, + "num_input_tokens_seen": 206005824, + "step": 65450 + }, + { + "epoch": 4.190192689328468, + "grad_norm": 7.311602592468262, + "learning_rate": 3.890652471117243e-07, + "loss": 0.1226, + "num_input_tokens_seen": 206021376, + "step": 65455 + }, + { + "epoch": 4.190512771269445, + "grad_norm": 3.9228553771972656, + "learning_rate": 3.887659993974596e-07, + "loss": 0.1542, + "num_input_tokens_seen": 206036416, + "step": 65460 + }, + { + "epoch": 4.190832853210422, + "grad_norm": 4.218652725219727, + "learning_rate": 3.88466857107225e-07, + "loss": 0.0965, + "num_input_tokens_seen": 206055040, + "step": 65465 + }, + { + "epoch": 4.191152935151399, + "grad_norm": 5.507840156555176, + "learning_rate": 3.881678202559558e-07, + "loss": 0.1405, + "num_input_tokens_seen": 206070848, + "step": 65470 + }, + { + "epoch": 4.191473017092376, + "grad_norm": 7.66400671005249, + "learning_rate": 3.878688888585863e-07, + "loss": 0.1526, + "num_input_tokens_seen": 206086272, + "step": 65475 + }, + { + "epoch": 4.191793099033353, + "grad_norm": 6.178966522216797, + "learning_rate": 3.875700629300425e-07, + "loss": 0.1044, + "num_input_tokens_seen": 206103744, + "step": 65480 + }, + { + "epoch": 4.19211318097433, + "grad_norm": 8.339165687561035, + "learning_rate": 3.8727134248524617e-07, + "loss": 0.1167, + "num_input_tokens_seen": 206121152, + "step": 65485 + }, + { + "epoch": 4.192433262915307, + "grad_norm": 8.326913833618164, + "learning_rate": 3.8697272753911526e-07, + "loss": 0.1162, + "num_input_tokens_seen": 206137472, + "step": 65490 + }, + { + "epoch": 4.192753344856283, + "grad_norm": 7.031442165374756, + "learning_rate": 3.8667421810655887e-07, + "loss": 0.1418, + "num_input_tokens_seen": 206153280, + "step": 65495 + }, + { + "epoch": 4.19307342679726, + "grad_norm": 7.460990905761719, + "learning_rate": 3.863758142024854e-07, + "loss": 0.1267, + "num_input_tokens_seen": 206169984, + "step": 65500 + }, + { + "epoch": 4.193393508738237, + "grad_norm": 7.4375457763671875, + "learning_rate": 3.860775158417929e-07, + "loss": 0.1162, + "num_input_tokens_seen": 206185472, + "step": 65505 + }, + { + "epoch": 4.193713590679214, + "grad_norm": 7.043656826019287, + "learning_rate": 3.8577932303937886e-07, + "loss": 0.1482, + "num_input_tokens_seen": 206200704, + "step": 65510 + }, + { + "epoch": 4.194033672620191, + "grad_norm": 5.160536289215088, + "learning_rate": 3.8548123581013287e-07, + "loss": 0.1442, + "num_input_tokens_seen": 206215680, + "step": 65515 + }, + { + "epoch": 4.194353754561168, + "grad_norm": 4.828531265258789, + "learning_rate": 3.851832541689399e-07, + "loss": 0.1025, + "num_input_tokens_seen": 206230976, + "step": 65520 + }, + { + "epoch": 4.194673836502145, + "grad_norm": 7.864434242248535, + "learning_rate": 3.848853781306794e-07, + "loss": 0.1415, + "num_input_tokens_seen": 206246720, + "step": 65525 + }, + { + "epoch": 4.194993918443122, + "grad_norm": 9.393731117248535, + "learning_rate": 3.845876077102256e-07, + "loss": 0.1326, + "num_input_tokens_seen": 206263808, + "step": 65530 + }, + { + "epoch": 4.195314000384099, + "grad_norm": 6.299277305603027, + "learning_rate": 3.842899429224481e-07, + "loss": 0.1183, + "num_input_tokens_seen": 206280256, + "step": 65535 + }, + { + "epoch": 4.195634082325075, + "grad_norm": 9.672493934631348, + "learning_rate": 3.8399238378221073e-07, + "loss": 0.1151, + "num_input_tokens_seen": 206295744, + "step": 65540 + }, + { + "epoch": 4.195954164266052, + "grad_norm": 5.5722880363464355, + "learning_rate": 3.8369493030437187e-07, + "loss": 0.0828, + "num_input_tokens_seen": 206313664, + "step": 65545 + }, + { + "epoch": 4.196274246207029, + "grad_norm": 2.732051372528076, + "learning_rate": 3.833975825037847e-07, + "loss": 0.1232, + "num_input_tokens_seen": 206329216, + "step": 65550 + }, + { + "epoch": 4.196594328148006, + "grad_norm": 5.330535411834717, + "learning_rate": 3.8310034039529736e-07, + "loss": 0.1237, + "num_input_tokens_seen": 206344576, + "step": 65555 + }, + { + "epoch": 4.196914410088983, + "grad_norm": 3.9268798828125, + "learning_rate": 3.828032039937524e-07, + "loss": 0.0961, + "num_input_tokens_seen": 206360192, + "step": 65560 + }, + { + "epoch": 4.19723449202996, + "grad_norm": 7.441253662109375, + "learning_rate": 3.825061733139873e-07, + "loss": 0.1549, + "num_input_tokens_seen": 206375808, + "step": 65565 + }, + { + "epoch": 4.197554573970937, + "grad_norm": 6.82082986831665, + "learning_rate": 3.822092483708337e-07, + "loss": 0.1523, + "num_input_tokens_seen": 206390720, + "step": 65570 + }, + { + "epoch": 4.197874655911914, + "grad_norm": 6.5198655128479, + "learning_rate": 3.819124291791204e-07, + "loss": 0.13, + "num_input_tokens_seen": 206408320, + "step": 65575 + }, + { + "epoch": 4.198194737852891, + "grad_norm": 7.372783184051514, + "learning_rate": 3.8161571575366654e-07, + "loss": 0.1249, + "num_input_tokens_seen": 206424000, + "step": 65580 + }, + { + "epoch": 4.198514819793867, + "grad_norm": 11.699318885803223, + "learning_rate": 3.813191081092898e-07, + "loss": 0.1413, + "num_input_tokens_seen": 206439936, + "step": 65585 + }, + { + "epoch": 4.198834901734844, + "grad_norm": 5.931205749511719, + "learning_rate": 3.810226062608013e-07, + "loss": 0.1154, + "num_input_tokens_seen": 206455872, + "step": 65590 + }, + { + "epoch": 4.199154983675821, + "grad_norm": 3.075136661529541, + "learning_rate": 3.8072621022300584e-07, + "loss": 0.1157, + "num_input_tokens_seen": 206470656, + "step": 65595 + }, + { + "epoch": 4.199475065616798, + "grad_norm": 7.96856689453125, + "learning_rate": 3.804299200107056e-07, + "loss": 0.1487, + "num_input_tokens_seen": 206486656, + "step": 65600 + }, + { + "epoch": 4.199795147557775, + "grad_norm": 6.508393287658691, + "learning_rate": 3.801337356386936e-07, + "loss": 0.1272, + "num_input_tokens_seen": 206502400, + "step": 65605 + }, + { + "epoch": 4.200115229498752, + "grad_norm": 7.560256481170654, + "learning_rate": 3.798376571217621e-07, + "loss": 0.1387, + "num_input_tokens_seen": 206519232, + "step": 65610 + }, + { + "epoch": 4.200435311439729, + "grad_norm": 5.510799407958984, + "learning_rate": 3.795416844746935e-07, + "loss": 0.0857, + "num_input_tokens_seen": 206534016, + "step": 65615 + }, + { + "epoch": 4.200755393380706, + "grad_norm": 4.8158674240112305, + "learning_rate": 3.792458177122685e-07, + "loss": 0.093, + "num_input_tokens_seen": 206548352, + "step": 65620 + }, + { + "epoch": 4.201075475321682, + "grad_norm": 6.1917033195495605, + "learning_rate": 3.7895005684926073e-07, + "loss": 0.1267, + "num_input_tokens_seen": 206565312, + "step": 65625 + }, + { + "epoch": 4.201395557262659, + "grad_norm": 7.788633346557617, + "learning_rate": 3.78654401900439e-07, + "loss": 0.162, + "num_input_tokens_seen": 206579584, + "step": 65630 + }, + { + "epoch": 4.201715639203636, + "grad_norm": 9.555703163146973, + "learning_rate": 3.7835885288056653e-07, + "loss": 0.1325, + "num_input_tokens_seen": 206594112, + "step": 65635 + }, + { + "epoch": 4.202035721144613, + "grad_norm": 5.8175048828125, + "learning_rate": 3.78063409804402e-07, + "loss": 0.107, + "num_input_tokens_seen": 206609728, + "step": 65640 + }, + { + "epoch": 4.20235580308559, + "grad_norm": 5.175374984741211, + "learning_rate": 3.7776807268669805e-07, + "loss": 0.1051, + "num_input_tokens_seen": 206624896, + "step": 65645 + }, + { + "epoch": 4.202675885026567, + "grad_norm": 9.937192916870117, + "learning_rate": 3.774728415422016e-07, + "loss": 0.1849, + "num_input_tokens_seen": 206640064, + "step": 65650 + }, + { + "epoch": 4.202995966967544, + "grad_norm": 29.990325927734375, + "learning_rate": 3.77177716385656e-07, + "loss": 0.1867, + "num_input_tokens_seen": 206655296, + "step": 65655 + }, + { + "epoch": 4.203316048908521, + "grad_norm": 7.760743141174316, + "learning_rate": 3.768826972317982e-07, + "loss": 0.12, + "num_input_tokens_seen": 206672192, + "step": 65660 + }, + { + "epoch": 4.203636130849498, + "grad_norm": 7.011510848999023, + "learning_rate": 3.765877840953594e-07, + "loss": 0.1247, + "num_input_tokens_seen": 206688832, + "step": 65665 + }, + { + "epoch": 4.203956212790474, + "grad_norm": 9.891217231750488, + "learning_rate": 3.762929769910662e-07, + "loss": 0.1091, + "num_input_tokens_seen": 206708096, + "step": 65670 + }, + { + "epoch": 4.204276294731451, + "grad_norm": 6.9467244148254395, + "learning_rate": 3.7599827593363976e-07, + "loss": 0.1137, + "num_input_tokens_seen": 206724160, + "step": 65675 + }, + { + "epoch": 4.204596376672428, + "grad_norm": 6.598855495452881, + "learning_rate": 3.7570368093779535e-07, + "loss": 0.1201, + "num_input_tokens_seen": 206739648, + "step": 65680 + }, + { + "epoch": 4.204916458613405, + "grad_norm": 7.426547527313232, + "learning_rate": 3.754091920182451e-07, + "loss": 0.1037, + "num_input_tokens_seen": 206756480, + "step": 65685 + }, + { + "epoch": 4.205236540554382, + "grad_norm": 5.9256272315979, + "learning_rate": 3.7511480918969224e-07, + "loss": 0.1038, + "num_input_tokens_seen": 206772096, + "step": 65690 + }, + { + "epoch": 4.205556622495359, + "grad_norm": 6.257135391235352, + "learning_rate": 3.748205324668386e-07, + "loss": 0.1128, + "num_input_tokens_seen": 206787520, + "step": 65695 + }, + { + "epoch": 4.205876704436336, + "grad_norm": 9.12691879272461, + "learning_rate": 3.7452636186437683e-07, + "loss": 0.1101, + "num_input_tokens_seen": 206801856, + "step": 65700 + }, + { + "epoch": 4.206196786377313, + "grad_norm": 7.111203193664551, + "learning_rate": 3.7423229739699805e-07, + "loss": 0.1327, + "num_input_tokens_seen": 206817856, + "step": 65705 + }, + { + "epoch": 4.20651686831829, + "grad_norm": 2.447327136993408, + "learning_rate": 3.7393833907938576e-07, + "loss": 0.0846, + "num_input_tokens_seen": 206834432, + "step": 65710 + }, + { + "epoch": 4.206836950259266, + "grad_norm": 5.681812763214111, + "learning_rate": 3.7364448692621763e-07, + "loss": 0.1401, + "num_input_tokens_seen": 206850048, + "step": 65715 + }, + { + "epoch": 4.207157032200243, + "grad_norm": 7.582657337188721, + "learning_rate": 3.7335074095216946e-07, + "loss": 0.096, + "num_input_tokens_seen": 206866560, + "step": 65720 + }, + { + "epoch": 4.20747711414122, + "grad_norm": 4.185806751251221, + "learning_rate": 3.730571011719067e-07, + "loss": 0.1242, + "num_input_tokens_seen": 206882880, + "step": 65725 + }, + { + "epoch": 4.207797196082197, + "grad_norm": 7.055470943450928, + "learning_rate": 3.7276356760009455e-07, + "loss": 0.1136, + "num_input_tokens_seen": 206897920, + "step": 65730 + }, + { + "epoch": 4.208117278023174, + "grad_norm": 5.0167083740234375, + "learning_rate": 3.724701402513886e-07, + "loss": 0.091, + "num_input_tokens_seen": 206913408, + "step": 65735 + }, + { + "epoch": 4.208437359964151, + "grad_norm": 3.8594417572021484, + "learning_rate": 3.7217681914044234e-07, + "loss": 0.143, + "num_input_tokens_seen": 206930432, + "step": 65740 + }, + { + "epoch": 4.208757441905128, + "grad_norm": 11.658439636230469, + "learning_rate": 3.718836042819021e-07, + "loss": 0.1638, + "num_input_tokens_seen": 206947456, + "step": 65745 + }, + { + "epoch": 4.209077523846105, + "grad_norm": 23.682554244995117, + "learning_rate": 3.715904956904101e-07, + "loss": 0.1498, + "num_input_tokens_seen": 206963008, + "step": 65750 + }, + { + "epoch": 4.209397605787082, + "grad_norm": 6.623591899871826, + "learning_rate": 3.7129749338060214e-07, + "loss": 0.1051, + "num_input_tokens_seen": 206979008, + "step": 65755 + }, + { + "epoch": 4.209717687728058, + "grad_norm": 8.442981719970703, + "learning_rate": 3.7100459736710866e-07, + "loss": 0.1227, + "num_input_tokens_seen": 206995072, + "step": 65760 + }, + { + "epoch": 4.210037769669035, + "grad_norm": 7.328427791595459, + "learning_rate": 3.707118076645566e-07, + "loss": 0.1134, + "num_input_tokens_seen": 207010624, + "step": 65765 + }, + { + "epoch": 4.210357851610012, + "grad_norm": 7.7867841720581055, + "learning_rate": 3.70419124287566e-07, + "loss": 0.1118, + "num_input_tokens_seen": 207026688, + "step": 65770 + }, + { + "epoch": 4.210677933550989, + "grad_norm": 6.16900110244751, + "learning_rate": 3.7012654725075147e-07, + "loss": 0.1667, + "num_input_tokens_seen": 207043200, + "step": 65775 + }, + { + "epoch": 4.210998015491966, + "grad_norm": 6.537657737731934, + "learning_rate": 3.698340765687233e-07, + "loss": 0.1023, + "num_input_tokens_seen": 207059328, + "step": 65780 + }, + { + "epoch": 4.211318097432943, + "grad_norm": 3.319071054458618, + "learning_rate": 3.695417122560857e-07, + "loss": 0.085, + "num_input_tokens_seen": 207074560, + "step": 65785 + }, + { + "epoch": 4.21163817937392, + "grad_norm": 8.168157577514648, + "learning_rate": 3.6924945432743777e-07, + "loss": 0.1445, + "num_input_tokens_seen": 207089408, + "step": 65790 + }, + { + "epoch": 4.211958261314897, + "grad_norm": 10.709131240844727, + "learning_rate": 3.6895730279737336e-07, + "loss": 0.1427, + "num_input_tokens_seen": 207105728, + "step": 65795 + }, + { + "epoch": 4.212278343255874, + "grad_norm": 7.023452281951904, + "learning_rate": 3.6866525768048057e-07, + "loss": 0.1415, + "num_input_tokens_seen": 207121408, + "step": 65800 + }, + { + "epoch": 4.21259842519685, + "grad_norm": 3.972506284713745, + "learning_rate": 3.6837331899134436e-07, + "loss": 0.1295, + "num_input_tokens_seen": 207138560, + "step": 65805 + }, + { + "epoch": 4.212918507137827, + "grad_norm": 8.679563522338867, + "learning_rate": 3.680814867445401e-07, + "loss": 0.1691, + "num_input_tokens_seen": 207154048, + "step": 65810 + }, + { + "epoch": 4.213238589078804, + "grad_norm": 5.689355373382568, + "learning_rate": 3.6778976095464215e-07, + "loss": 0.1338, + "num_input_tokens_seen": 207169088, + "step": 65815 + }, + { + "epoch": 4.213558671019781, + "grad_norm": 6.051870346069336, + "learning_rate": 3.674981416362172e-07, + "loss": 0.1106, + "num_input_tokens_seen": 207184384, + "step": 65820 + }, + { + "epoch": 4.213878752960758, + "grad_norm": 7.842889308929443, + "learning_rate": 3.6720662880382703e-07, + "loss": 0.1202, + "num_input_tokens_seen": 207199552, + "step": 65825 + }, + { + "epoch": 4.214198834901735, + "grad_norm": 7.602449893951416, + "learning_rate": 3.669152224720293e-07, + "loss": 0.1289, + "num_input_tokens_seen": 207215232, + "step": 65830 + }, + { + "epoch": 4.214518916842712, + "grad_norm": 8.46787166595459, + "learning_rate": 3.6662392265537334e-07, + "loss": 0.1326, + "num_input_tokens_seen": 207230464, + "step": 65835 + }, + { + "epoch": 4.214838998783689, + "grad_norm": 5.908459186553955, + "learning_rate": 3.663327293684077e-07, + "loss": 0.1068, + "num_input_tokens_seen": 207246208, + "step": 65840 + }, + { + "epoch": 4.215159080724666, + "grad_norm": 4.8247175216674805, + "learning_rate": 3.660416426256705e-07, + "loss": 0.1396, + "num_input_tokens_seen": 207261824, + "step": 65845 + }, + { + "epoch": 4.215479162665642, + "grad_norm": 6.74932336807251, + "learning_rate": 3.657506624416987e-07, + "loss": 0.1417, + "num_input_tokens_seen": 207277952, + "step": 65850 + }, + { + "epoch": 4.215799244606619, + "grad_norm": 7.468928337097168, + "learning_rate": 3.6545978883102214e-07, + "loss": 0.1551, + "num_input_tokens_seen": 207294464, + "step": 65855 + }, + { + "epoch": 4.216119326547596, + "grad_norm": 8.04274845123291, + "learning_rate": 3.6516902180816523e-07, + "loss": 0.126, + "num_input_tokens_seen": 207309760, + "step": 65860 + }, + { + "epoch": 4.216439408488573, + "grad_norm": 9.891701698303223, + "learning_rate": 3.648783613876475e-07, + "loss": 0.1593, + "num_input_tokens_seen": 207324928, + "step": 65865 + }, + { + "epoch": 4.21675949042955, + "grad_norm": 8.308454513549805, + "learning_rate": 3.645878075839829e-07, + "loss": 0.1487, + "num_input_tokens_seen": 207341312, + "step": 65870 + }, + { + "epoch": 4.217079572370527, + "grad_norm": 9.344834327697754, + "learning_rate": 3.642973604116801e-07, + "loss": 0.1265, + "num_input_tokens_seen": 207357824, + "step": 65875 + }, + { + "epoch": 4.217399654311504, + "grad_norm": 8.768256187438965, + "learning_rate": 3.6400701988524215e-07, + "loss": 0.1482, + "num_input_tokens_seen": 207376448, + "step": 65880 + }, + { + "epoch": 4.217719736252481, + "grad_norm": 5.447292804718018, + "learning_rate": 3.6371678601916843e-07, + "loss": 0.1313, + "num_input_tokens_seen": 207393408, + "step": 65885 + }, + { + "epoch": 4.218039818193457, + "grad_norm": 6.515311241149902, + "learning_rate": 3.634266588279506e-07, + "loss": 0.1284, + "num_input_tokens_seen": 207409280, + "step": 65890 + }, + { + "epoch": 4.218359900134434, + "grad_norm": 3.303149461746216, + "learning_rate": 3.6313663832607676e-07, + "loss": 0.1631, + "num_input_tokens_seen": 207424192, + "step": 65895 + }, + { + "epoch": 4.218679982075411, + "grad_norm": 8.133296012878418, + "learning_rate": 3.6284672452802845e-07, + "loss": 0.107, + "num_input_tokens_seen": 207440512, + "step": 65900 + }, + { + "epoch": 4.219000064016388, + "grad_norm": 5.6143269538879395, + "learning_rate": 3.6255691744828263e-07, + "loss": 0.1197, + "num_input_tokens_seen": 207455872, + "step": 65905 + }, + { + "epoch": 4.219320145957365, + "grad_norm": 8.316951751708984, + "learning_rate": 3.622672171013106e-07, + "loss": 0.093, + "num_input_tokens_seen": 207473152, + "step": 65910 + }, + { + "epoch": 4.219640227898342, + "grad_norm": 6.001427173614502, + "learning_rate": 3.619776235015796e-07, + "loss": 0.1283, + "num_input_tokens_seen": 207488256, + "step": 65915 + }, + { + "epoch": 4.219960309839319, + "grad_norm": 5.123419761657715, + "learning_rate": 3.616881366635483e-07, + "loss": 0.1359, + "num_input_tokens_seen": 207502912, + "step": 65920 + }, + { + "epoch": 4.220280391780296, + "grad_norm": 6.644757270812988, + "learning_rate": 3.613987566016747e-07, + "loss": 0.1323, + "num_input_tokens_seen": 207518400, + "step": 65925 + }, + { + "epoch": 4.220600473721273, + "grad_norm": 7.539116859436035, + "learning_rate": 3.611094833304063e-07, + "loss": 0.1187, + "num_input_tokens_seen": 207533376, + "step": 65930 + }, + { + "epoch": 4.220920555662249, + "grad_norm": 9.179072380065918, + "learning_rate": 3.608203168641899e-07, + "loss": 0.1428, + "num_input_tokens_seen": 207549184, + "step": 65935 + }, + { + "epoch": 4.221240637603226, + "grad_norm": 7.066376686096191, + "learning_rate": 3.6053125721746435e-07, + "loss": 0.1186, + "num_input_tokens_seen": 207565440, + "step": 65940 + }, + { + "epoch": 4.221560719544203, + "grad_norm": 6.484012126922607, + "learning_rate": 3.602423044046632e-07, + "loss": 0.1489, + "num_input_tokens_seen": 207581568, + "step": 65945 + }, + { + "epoch": 4.22188080148518, + "grad_norm": 8.615757942199707, + "learning_rate": 3.599534584402167e-07, + "loss": 0.1264, + "num_input_tokens_seen": 207596992, + "step": 65950 + }, + { + "epoch": 4.222200883426157, + "grad_norm": 5.422926425933838, + "learning_rate": 3.596647193385466e-07, + "loss": 0.0979, + "num_input_tokens_seen": 207612352, + "step": 65955 + }, + { + "epoch": 4.222520965367134, + "grad_norm": 2.1734001636505127, + "learning_rate": 3.5937608711407223e-07, + "loss": 0.1098, + "num_input_tokens_seen": 207627840, + "step": 65960 + }, + { + "epoch": 4.222841047308111, + "grad_norm": 8.237195014953613, + "learning_rate": 3.590875617812059e-07, + "loss": 0.1227, + "num_input_tokens_seen": 207642368, + "step": 65965 + }, + { + "epoch": 4.223161129249088, + "grad_norm": 4.260740280151367, + "learning_rate": 3.58799143354355e-07, + "loss": 0.0972, + "num_input_tokens_seen": 207659776, + "step": 65970 + }, + { + "epoch": 4.223481211190065, + "grad_norm": 9.61481761932373, + "learning_rate": 3.585108318479219e-07, + "loss": 0.1464, + "num_input_tokens_seen": 207674432, + "step": 65975 + }, + { + "epoch": 4.223801293131041, + "grad_norm": 9.89612102508545, + "learning_rate": 3.582226272763034e-07, + "loss": 0.1076, + "num_input_tokens_seen": 207689792, + "step": 65980 + }, + { + "epoch": 4.224121375072018, + "grad_norm": 5.91574764251709, + "learning_rate": 3.5793452965389053e-07, + "loss": 0.1273, + "num_input_tokens_seen": 207707200, + "step": 65985 + }, + { + "epoch": 4.224441457012995, + "grad_norm": 9.768184661865234, + "learning_rate": 3.576465389950698e-07, + "loss": 0.1367, + "num_input_tokens_seen": 207721408, + "step": 65990 + }, + { + "epoch": 4.224761538953972, + "grad_norm": 8.011545181274414, + "learning_rate": 3.5735865531422107e-07, + "loss": 0.1176, + "num_input_tokens_seen": 207739584, + "step": 65995 + }, + { + "epoch": 4.225081620894949, + "grad_norm": 8.13282299041748, + "learning_rate": 3.5707087862572125e-07, + "loss": 0.1428, + "num_input_tokens_seen": 207754944, + "step": 66000 + }, + { + "epoch": 4.225401702835926, + "grad_norm": 7.644813537597656, + "learning_rate": 3.5678320894393964e-07, + "loss": 0.1425, + "num_input_tokens_seen": 207771968, + "step": 66005 + }, + { + "epoch": 4.225721784776903, + "grad_norm": 9.796463012695312, + "learning_rate": 3.5649564628324086e-07, + "loss": 0.1372, + "num_input_tokens_seen": 207790720, + "step": 66010 + }, + { + "epoch": 4.22604186671788, + "grad_norm": 11.510635375976562, + "learning_rate": 3.562081906579845e-07, + "loss": 0.1162, + "num_input_tokens_seen": 207806144, + "step": 66015 + }, + { + "epoch": 4.226361948658857, + "grad_norm": 3.7229855060577393, + "learning_rate": 3.559208420825244e-07, + "loss": 0.1124, + "num_input_tokens_seen": 207821504, + "step": 66020 + }, + { + "epoch": 4.226682030599833, + "grad_norm": 3.678274631500244, + "learning_rate": 3.5563360057120937e-07, + "loss": 0.0644, + "num_input_tokens_seen": 207837568, + "step": 66025 + }, + { + "epoch": 4.22700211254081, + "grad_norm": 4.751704216003418, + "learning_rate": 3.553464661383821e-07, + "loss": 0.1241, + "num_input_tokens_seen": 207852352, + "step": 66030 + }, + { + "epoch": 4.227322194481787, + "grad_norm": 6.900976657867432, + "learning_rate": 3.550594387983825e-07, + "loss": 0.1247, + "num_input_tokens_seen": 207867200, + "step": 66035 + }, + { + "epoch": 4.227642276422764, + "grad_norm": 5.516576290130615, + "learning_rate": 3.547725185655407e-07, + "loss": 0.1117, + "num_input_tokens_seen": 207883008, + "step": 66040 + }, + { + "epoch": 4.227962358363741, + "grad_norm": 6.677721977233887, + "learning_rate": 3.5448570545418587e-07, + "loss": 0.143, + "num_input_tokens_seen": 207898560, + "step": 66045 + }, + { + "epoch": 4.228282440304718, + "grad_norm": 4.564075469970703, + "learning_rate": 3.5419899947863905e-07, + "loss": 0.1124, + "num_input_tokens_seen": 207915200, + "step": 66050 + }, + { + "epoch": 4.228602522245695, + "grad_norm": 5.609903335571289, + "learning_rate": 3.539124006532169e-07, + "loss": 0.1827, + "num_input_tokens_seen": 207932224, + "step": 66055 + }, + { + "epoch": 4.228922604186672, + "grad_norm": 7.1914849281311035, + "learning_rate": 3.53625908992232e-07, + "loss": 0.1057, + "num_input_tokens_seen": 207947968, + "step": 66060 + }, + { + "epoch": 4.229242686127649, + "grad_norm": 5.721606731414795, + "learning_rate": 3.53339524509988e-07, + "loss": 0.1429, + "num_input_tokens_seen": 207963072, + "step": 66065 + }, + { + "epoch": 4.229562768068625, + "grad_norm": 7.213122367858887, + "learning_rate": 3.530532472207876e-07, + "loss": 0.1562, + "num_input_tokens_seen": 207978880, + "step": 66070 + }, + { + "epoch": 4.229882850009602, + "grad_norm": 7.199856281280518, + "learning_rate": 3.527670771389238e-07, + "loss": 0.1663, + "num_input_tokens_seen": 207993536, + "step": 66075 + }, + { + "epoch": 4.230202931950579, + "grad_norm": 4.300459861755371, + "learning_rate": 3.5248101427868857e-07, + "loss": 0.139, + "num_input_tokens_seen": 208010048, + "step": 66080 + }, + { + "epoch": 4.230523013891556, + "grad_norm": 4.629220008850098, + "learning_rate": 3.5219505865436515e-07, + "loss": 0.1293, + "num_input_tokens_seen": 208026368, + "step": 66085 + }, + { + "epoch": 4.230843095832533, + "grad_norm": 4.4167399406433105, + "learning_rate": 3.5190921028023325e-07, + "loss": 0.0796, + "num_input_tokens_seen": 208041792, + "step": 66090 + }, + { + "epoch": 4.23116317777351, + "grad_norm": 2.1350700855255127, + "learning_rate": 3.5162346917056613e-07, + "loss": 0.1153, + "num_input_tokens_seen": 208057152, + "step": 66095 + }, + { + "epoch": 4.231483259714487, + "grad_norm": 4.760867118835449, + "learning_rate": 3.513378353396327e-07, + "loss": 0.1223, + "num_input_tokens_seen": 208072512, + "step": 66100 + }, + { + "epoch": 4.231803341655464, + "grad_norm": 11.337530136108398, + "learning_rate": 3.5105230880169537e-07, + "loss": 0.1408, + "num_input_tokens_seen": 208087808, + "step": 66105 + }, + { + "epoch": 4.232123423596441, + "grad_norm": 1.987508773803711, + "learning_rate": 3.507668895710123e-07, + "loss": 0.1467, + "num_input_tokens_seen": 208102720, + "step": 66110 + }, + { + "epoch": 4.232443505537417, + "grad_norm": 5.0846333503723145, + "learning_rate": 3.5048157766183587e-07, + "loss": 0.1314, + "num_input_tokens_seen": 208118272, + "step": 66115 + }, + { + "epoch": 4.232763587478394, + "grad_norm": 6.5060014724731445, + "learning_rate": 3.501963730884134e-07, + "loss": 0.1444, + "num_input_tokens_seen": 208133184, + "step": 66120 + }, + { + "epoch": 4.233083669419371, + "grad_norm": 7.801139831542969, + "learning_rate": 3.4991127586498587e-07, + "loss": 0.1083, + "num_input_tokens_seen": 208148608, + "step": 66125 + }, + { + "epoch": 4.233403751360348, + "grad_norm": 5.040402889251709, + "learning_rate": 3.4962628600579e-07, + "loss": 0.1114, + "num_input_tokens_seen": 208165440, + "step": 66130 + }, + { + "epoch": 4.233723833301325, + "grad_norm": 6.590925693511963, + "learning_rate": 3.493414035250564e-07, + "loss": 0.1222, + "num_input_tokens_seen": 208181184, + "step": 66135 + }, + { + "epoch": 4.234043915242302, + "grad_norm": 6.264365196228027, + "learning_rate": 3.490566284370103e-07, + "loss": 0.1674, + "num_input_tokens_seen": 208196992, + "step": 66140 + }, + { + "epoch": 4.234363997183279, + "grad_norm": 7.159857749938965, + "learning_rate": 3.4877196075587344e-07, + "loss": 0.1058, + "num_input_tokens_seen": 208214272, + "step": 66145 + }, + { + "epoch": 4.234684079124256, + "grad_norm": 5.42577600479126, + "learning_rate": 3.484874004958583e-07, + "loss": 0.1147, + "num_input_tokens_seen": 208230656, + "step": 66150 + }, + { + "epoch": 4.235004161065232, + "grad_norm": 6.828686237335205, + "learning_rate": 3.482029476711768e-07, + "loss": 0.1606, + "num_input_tokens_seen": 208247936, + "step": 66155 + }, + { + "epoch": 4.235324243006209, + "grad_norm": 5.728832721710205, + "learning_rate": 3.479186022960304e-07, + "loss": 0.1125, + "num_input_tokens_seen": 208264576, + "step": 66160 + }, + { + "epoch": 4.235644324947186, + "grad_norm": 6.671734809875488, + "learning_rate": 3.4763436438462003e-07, + "loss": 0.0993, + "num_input_tokens_seen": 208281408, + "step": 66165 + }, + { + "epoch": 4.235964406888163, + "grad_norm": 6.709255695343018, + "learning_rate": 3.473502339511381e-07, + "loss": 0.0993, + "num_input_tokens_seen": 208297344, + "step": 66170 + }, + { + "epoch": 4.23628448882914, + "grad_norm": 6.916262626647949, + "learning_rate": 3.470662110097725e-07, + "loss": 0.1202, + "num_input_tokens_seen": 208314176, + "step": 66175 + }, + { + "epoch": 4.236604570770117, + "grad_norm": 3.0677590370178223, + "learning_rate": 3.467822955747069e-07, + "loss": 0.075, + "num_input_tokens_seen": 208329408, + "step": 66180 + }, + { + "epoch": 4.236924652711094, + "grad_norm": 8.874311447143555, + "learning_rate": 3.4649848766011655e-07, + "loss": 0.1311, + "num_input_tokens_seen": 208345280, + "step": 66185 + }, + { + "epoch": 4.237244734652071, + "grad_norm": 23.221654891967773, + "learning_rate": 3.462147872801755e-07, + "loss": 0.1266, + "num_input_tokens_seen": 208359488, + "step": 66190 + }, + { + "epoch": 4.237564816593048, + "grad_norm": 5.64043664932251, + "learning_rate": 3.45931194449049e-07, + "loss": 0.1105, + "num_input_tokens_seen": 208373888, + "step": 66195 + }, + { + "epoch": 4.237884898534024, + "grad_norm": 6.139119625091553, + "learning_rate": 3.456477091808985e-07, + "loss": 0.1334, + "num_input_tokens_seen": 208388800, + "step": 66200 + }, + { + "epoch": 4.238204980475001, + "grad_norm": 9.46316909790039, + "learning_rate": 3.4536433148988e-07, + "loss": 0.139, + "num_input_tokens_seen": 208405568, + "step": 66205 + }, + { + "epoch": 4.238525062415978, + "grad_norm": 5.559295654296875, + "learning_rate": 3.450810613901437e-07, + "loss": 0.1348, + "num_input_tokens_seen": 208421312, + "step": 66210 + }, + { + "epoch": 4.238845144356955, + "grad_norm": 7.270461559295654, + "learning_rate": 3.447978988958345e-07, + "loss": 0.0886, + "num_input_tokens_seen": 208436736, + "step": 66215 + }, + { + "epoch": 4.239165226297932, + "grad_norm": 11.192678451538086, + "learning_rate": 3.4451484402109213e-07, + "loss": 0.153, + "num_input_tokens_seen": 208452672, + "step": 66220 + }, + { + "epoch": 4.239485308238909, + "grad_norm": 5.939472675323486, + "learning_rate": 3.442318967800504e-07, + "loss": 0.1039, + "num_input_tokens_seen": 208468672, + "step": 66225 + }, + { + "epoch": 4.239805390179886, + "grad_norm": 9.23706340789795, + "learning_rate": 3.439490571868395e-07, + "loss": 0.111, + "num_input_tokens_seen": 208483584, + "step": 66230 + }, + { + "epoch": 4.240125472120863, + "grad_norm": 6.440616130828857, + "learning_rate": 3.436663252555822e-07, + "loss": 0.1269, + "num_input_tokens_seen": 208499840, + "step": 66235 + }, + { + "epoch": 4.24044555406184, + "grad_norm": 8.985855102539062, + "learning_rate": 3.4338370100039654e-07, + "loss": 0.1484, + "num_input_tokens_seen": 208515776, + "step": 66240 + }, + { + "epoch": 4.240765636002816, + "grad_norm": 66.59968566894531, + "learning_rate": 3.431011844353954e-07, + "loss": 0.1339, + "num_input_tokens_seen": 208531264, + "step": 66245 + }, + { + "epoch": 4.241085717943793, + "grad_norm": 6.262960910797119, + "learning_rate": 3.4281877557468644e-07, + "loss": 0.1122, + "num_input_tokens_seen": 208547008, + "step": 66250 + }, + { + "epoch": 4.24140579988477, + "grad_norm": 5.150328636169434, + "learning_rate": 3.425364744323714e-07, + "loss": 0.0951, + "num_input_tokens_seen": 208562112, + "step": 66255 + }, + { + "epoch": 4.241725881825747, + "grad_norm": 8.40455436706543, + "learning_rate": 3.4225428102254627e-07, + "loss": 0.1762, + "num_input_tokens_seen": 208577344, + "step": 66260 + }, + { + "epoch": 4.242045963766724, + "grad_norm": 8.049717903137207, + "learning_rate": 3.4197219535930445e-07, + "loss": 0.117, + "num_input_tokens_seen": 208593920, + "step": 66265 + }, + { + "epoch": 4.242366045707701, + "grad_norm": 6.759324073791504, + "learning_rate": 3.416902174567291e-07, + "loss": 0.1237, + "num_input_tokens_seen": 208609088, + "step": 66270 + }, + { + "epoch": 4.242686127648678, + "grad_norm": 8.482562065124512, + "learning_rate": 3.4140834732890286e-07, + "loss": 0.1098, + "num_input_tokens_seen": 208624192, + "step": 66275 + }, + { + "epoch": 4.243006209589655, + "grad_norm": 7.845933437347412, + "learning_rate": 3.411265849898998e-07, + "loss": 0.1449, + "num_input_tokens_seen": 208640960, + "step": 66280 + }, + { + "epoch": 4.243326291530632, + "grad_norm": 60.8792724609375, + "learning_rate": 3.4084493045379003e-07, + "loss": 0.1017, + "num_input_tokens_seen": 208656768, + "step": 66285 + }, + { + "epoch": 4.243646373471608, + "grad_norm": 12.23678970336914, + "learning_rate": 3.4056338373463783e-07, + "loss": 0.1363, + "num_input_tokens_seen": 208672192, + "step": 66290 + }, + { + "epoch": 4.243966455412585, + "grad_norm": 5.607940196990967, + "learning_rate": 3.402819448465017e-07, + "loss": 0.1305, + "num_input_tokens_seen": 208687680, + "step": 66295 + }, + { + "epoch": 4.244286537353562, + "grad_norm": 6.008354663848877, + "learning_rate": 3.4000061380343645e-07, + "loss": 0.1184, + "num_input_tokens_seen": 208702656, + "step": 66300 + }, + { + "epoch": 4.244606619294539, + "grad_norm": 12.245163917541504, + "learning_rate": 3.397193906194887e-07, + "loss": 0.1418, + "num_input_tokens_seen": 208718272, + "step": 66305 + }, + { + "epoch": 4.244926701235516, + "grad_norm": 6.729484558105469, + "learning_rate": 3.3943827530870276e-07, + "loss": 0.1569, + "num_input_tokens_seen": 208733248, + "step": 66310 + }, + { + "epoch": 4.245246783176493, + "grad_norm": 5.428530693054199, + "learning_rate": 3.3915726788511514e-07, + "loss": 0.1097, + "num_input_tokens_seen": 208748928, + "step": 66315 + }, + { + "epoch": 4.24556686511747, + "grad_norm": 6.136725425720215, + "learning_rate": 3.388763683627583e-07, + "loss": 0.1422, + "num_input_tokens_seen": 208763968, + "step": 66320 + }, + { + "epoch": 4.245886947058447, + "grad_norm": 10.855198860168457, + "learning_rate": 3.385955767556587e-07, + "loss": 0.1638, + "num_input_tokens_seen": 208778816, + "step": 66325 + }, + { + "epoch": 4.246207028999424, + "grad_norm": 7.105585098266602, + "learning_rate": 3.3831489307783765e-07, + "loss": 0.1093, + "num_input_tokens_seen": 208794624, + "step": 66330 + }, + { + "epoch": 4.2465271109404, + "grad_norm": 7.619117736816406, + "learning_rate": 3.3803431734331093e-07, + "loss": 0.1696, + "num_input_tokens_seen": 208810176, + "step": 66335 + }, + { + "epoch": 4.246847192881377, + "grad_norm": 8.111536026000977, + "learning_rate": 3.377538495660893e-07, + "loss": 0.1384, + "num_input_tokens_seen": 208824704, + "step": 66340 + }, + { + "epoch": 4.247167274822354, + "grad_norm": 9.279280662536621, + "learning_rate": 3.3747348976017694e-07, + "loss": 0.1278, + "num_input_tokens_seen": 208840704, + "step": 66345 + }, + { + "epoch": 4.247487356763331, + "grad_norm": 3.9286468029022217, + "learning_rate": 3.371932379395748e-07, + "loss": 0.0788, + "num_input_tokens_seen": 208856448, + "step": 66350 + }, + { + "epoch": 4.247807438704308, + "grad_norm": 8.37078857421875, + "learning_rate": 3.3691309411827716e-07, + "loss": 0.1371, + "num_input_tokens_seen": 208871616, + "step": 66355 + }, + { + "epoch": 4.248127520645285, + "grad_norm": 5.476138591766357, + "learning_rate": 3.3663305831027197e-07, + "loss": 0.0801, + "num_input_tokens_seen": 208886912, + "step": 66360 + }, + { + "epoch": 4.248447602586262, + "grad_norm": 113.49905395507812, + "learning_rate": 3.3635313052954365e-07, + "loss": 0.1693, + "num_input_tokens_seen": 208903232, + "step": 66365 + }, + { + "epoch": 4.248767684527239, + "grad_norm": 10.72178840637207, + "learning_rate": 3.3607331079006935e-07, + "loss": 0.1533, + "num_input_tokens_seen": 208917696, + "step": 66370 + }, + { + "epoch": 4.249087766468216, + "grad_norm": 2.8249120712280273, + "learning_rate": 3.3579359910582337e-07, + "loss": 0.2296, + "num_input_tokens_seen": 208933376, + "step": 66375 + }, + { + "epoch": 4.249407848409192, + "grad_norm": 5.390052795410156, + "learning_rate": 3.35513995490771e-07, + "loss": 0.1136, + "num_input_tokens_seen": 208948736, + "step": 66380 + }, + { + "epoch": 4.249727930350169, + "grad_norm": 5.360455513000488, + "learning_rate": 3.3523449995887664e-07, + "loss": 0.1421, + "num_input_tokens_seen": 208964800, + "step": 66385 + }, + { + "epoch": 4.250048012291146, + "grad_norm": 7.10435676574707, + "learning_rate": 3.3495511252409436e-07, + "loss": 0.1526, + "num_input_tokens_seen": 208979520, + "step": 66390 + }, + { + "epoch": 4.250368094232123, + "grad_norm": 2.422415018081665, + "learning_rate": 3.3467583320037687e-07, + "loss": 0.0669, + "num_input_tokens_seen": 208995328, + "step": 66395 + }, + { + "epoch": 4.2506881761731, + "grad_norm": 5.102203845977783, + "learning_rate": 3.343966620016695e-07, + "loss": 0.0922, + "num_input_tokens_seen": 209010624, + "step": 66400 + }, + { + "epoch": 4.250816208949491, + "eval_loss": 0.6279151439666748, + "eval_runtime": 51.0789, + "eval_samples_per_second": 271.854, + "eval_steps_per_second": 33.987, + "num_input_tokens_seen": 209017024, + "step": 66402 + }, + { + "epoch": 4.251008258114077, + "grad_norm": 10.389789581298828, + "learning_rate": 3.3411759894191207e-07, + "loss": 0.1591, + "num_input_tokens_seen": 209026368, + "step": 66405 + }, + { + "epoch": 4.251328340055054, + "grad_norm": 1.6738629341125488, + "learning_rate": 3.3383864403504136e-07, + "loss": 0.1292, + "num_input_tokens_seen": 209041344, + "step": 66410 + }, + { + "epoch": 4.251648421996031, + "grad_norm": 5.418693542480469, + "learning_rate": 3.335597972949842e-07, + "loss": 0.1414, + "num_input_tokens_seen": 209056320, + "step": 66415 + }, + { + "epoch": 4.251968503937007, + "grad_norm": 7.790010452270508, + "learning_rate": 3.332810587356669e-07, + "loss": 0.1266, + "num_input_tokens_seen": 209071488, + "step": 66420 + }, + { + "epoch": 4.252288585877984, + "grad_norm": 9.204582214355469, + "learning_rate": 3.3300242837100763e-07, + "loss": 0.1375, + "num_input_tokens_seen": 209086720, + "step": 66425 + }, + { + "epoch": 4.252608667818961, + "grad_norm": 5.091311931610107, + "learning_rate": 3.327239062149196e-07, + "loss": 0.1353, + "num_input_tokens_seen": 209102784, + "step": 66430 + }, + { + "epoch": 4.252928749759938, + "grad_norm": 6.259756088256836, + "learning_rate": 3.3244549228131054e-07, + "loss": 0.1139, + "num_input_tokens_seen": 209118592, + "step": 66435 + }, + { + "epoch": 4.253248831700915, + "grad_norm": 7.7930192947387695, + "learning_rate": 3.3216718658408337e-07, + "loss": 0.1902, + "num_input_tokens_seen": 209134912, + "step": 66440 + }, + { + "epoch": 4.253568913641892, + "grad_norm": 3.6433939933776855, + "learning_rate": 3.318889891371352e-07, + "loss": 0.1814, + "num_input_tokens_seen": 209153792, + "step": 66445 + }, + { + "epoch": 4.253888995582869, + "grad_norm": 6.569989204406738, + "learning_rate": 3.316108999543574e-07, + "loss": 0.0806, + "num_input_tokens_seen": 209169472, + "step": 66450 + }, + { + "epoch": 4.254209077523846, + "grad_norm": 8.628015518188477, + "learning_rate": 3.313329190496362e-07, + "loss": 0.1387, + "num_input_tokens_seen": 209185536, + "step": 66455 + }, + { + "epoch": 4.254529159464823, + "grad_norm": 7.450897693634033, + "learning_rate": 3.3105504643685367e-07, + "loss": 0.1188, + "num_input_tokens_seen": 209201280, + "step": 66460 + }, + { + "epoch": 4.2548492414058, + "grad_norm": 4.82994270324707, + "learning_rate": 3.3077728212988354e-07, + "loss": 0.129, + "num_input_tokens_seen": 209216640, + "step": 66465 + }, + { + "epoch": 4.2551693233467764, + "grad_norm": 8.238375663757324, + "learning_rate": 3.3049962614259744e-07, + "loss": 0.1634, + "num_input_tokens_seen": 209231232, + "step": 66470 + }, + { + "epoch": 4.255489405287753, + "grad_norm": 5.8409013748168945, + "learning_rate": 3.3022207848885963e-07, + "loss": 0.0657, + "num_input_tokens_seen": 209246080, + "step": 66475 + }, + { + "epoch": 4.25580948722873, + "grad_norm": 4.054143905639648, + "learning_rate": 3.2994463918252924e-07, + "loss": 0.1012, + "num_input_tokens_seen": 209262976, + "step": 66480 + }, + { + "epoch": 4.256129569169707, + "grad_norm": 6.683859348297119, + "learning_rate": 3.296673082374599e-07, + "loss": 0.082, + "num_input_tokens_seen": 209278784, + "step": 66485 + }, + { + "epoch": 4.256449651110684, + "grad_norm": 6.044507026672363, + "learning_rate": 3.2939008566750003e-07, + "loss": 0.0905, + "num_input_tokens_seen": 209299648, + "step": 66490 + }, + { + "epoch": 4.256769733051661, + "grad_norm": 7.417046070098877, + "learning_rate": 3.291129714864938e-07, + "loss": 0.1748, + "num_input_tokens_seen": 209315328, + "step": 66495 + }, + { + "epoch": 4.257089814992638, + "grad_norm": 9.058755874633789, + "learning_rate": 3.2883596570827736e-07, + "loss": 0.1073, + "num_input_tokens_seen": 209330752, + "step": 66500 + }, + { + "epoch": 4.257409896933615, + "grad_norm": 8.21192455291748, + "learning_rate": 3.2855906834668384e-07, + "loss": 0.1558, + "num_input_tokens_seen": 209345856, + "step": 66505 + }, + { + "epoch": 4.2577299788745915, + "grad_norm": 5.476708889007568, + "learning_rate": 3.282822794155399e-07, + "loss": 0.1235, + "num_input_tokens_seen": 209362560, + "step": 66510 + }, + { + "epoch": 4.2580500608155685, + "grad_norm": 4.975898265838623, + "learning_rate": 3.280055989286668e-07, + "loss": 0.1399, + "num_input_tokens_seen": 209377152, + "step": 66515 + }, + { + "epoch": 4.2583701427565455, + "grad_norm": 10.425461769104004, + "learning_rate": 3.277290268998806e-07, + "loss": 0.1494, + "num_input_tokens_seen": 209394496, + "step": 66520 + }, + { + "epoch": 4.2586902246975225, + "grad_norm": 10.288459777832031, + "learning_rate": 3.2745256334299145e-07, + "loss": 0.164, + "num_input_tokens_seen": 209409152, + "step": 66525 + }, + { + "epoch": 4.259010306638499, + "grad_norm": 41.545318603515625, + "learning_rate": 3.271762082718058e-07, + "loss": 0.1273, + "num_input_tokens_seen": 209425280, + "step": 66530 + }, + { + "epoch": 4.259330388579476, + "grad_norm": 5.711941719055176, + "learning_rate": 3.268999617001217e-07, + "loss": 0.1372, + "num_input_tokens_seen": 209441600, + "step": 66535 + }, + { + "epoch": 4.259650470520453, + "grad_norm": 8.310587882995605, + "learning_rate": 3.2662382364173434e-07, + "loss": 0.1437, + "num_input_tokens_seen": 209457664, + "step": 66540 + }, + { + "epoch": 4.25997055246143, + "grad_norm": 9.916123390197754, + "learning_rate": 3.2634779411043287e-07, + "loss": 0.1344, + "num_input_tokens_seen": 209472000, + "step": 66545 + }, + { + "epoch": 4.260290634402407, + "grad_norm": 6.677154064178467, + "learning_rate": 3.260718731200005e-07, + "loss": 0.084, + "num_input_tokens_seen": 209488704, + "step": 66550 + }, + { + "epoch": 4.2606107163433835, + "grad_norm": 7.0181450843811035, + "learning_rate": 3.2579606068421476e-07, + "loss": 0.1185, + "num_input_tokens_seen": 209503744, + "step": 66555 + }, + { + "epoch": 4.2609307982843605, + "grad_norm": 4.301034450531006, + "learning_rate": 3.2552035681684913e-07, + "loss": 0.1368, + "num_input_tokens_seen": 209520768, + "step": 66560 + }, + { + "epoch": 4.2612508802253375, + "grad_norm": 5.0702691078186035, + "learning_rate": 3.2524476153167033e-07, + "loss": 0.1241, + "num_input_tokens_seen": 209537728, + "step": 66565 + }, + { + "epoch": 4.2615709621663145, + "grad_norm": 8.289316177368164, + "learning_rate": 3.2496927484244017e-07, + "loss": 0.1202, + "num_input_tokens_seen": 209553984, + "step": 66570 + }, + { + "epoch": 4.2618910441072915, + "grad_norm": 3.999276638031006, + "learning_rate": 3.2469389676291456e-07, + "loss": 0.0852, + "num_input_tokens_seen": 209569600, + "step": 66575 + }, + { + "epoch": 4.2622111260482685, + "grad_norm": 5.073244571685791, + "learning_rate": 3.244186273068456e-07, + "loss": 0.1302, + "num_input_tokens_seen": 209584512, + "step": 66580 + }, + { + "epoch": 4.2625312079892455, + "grad_norm": 7.080600261688232, + "learning_rate": 3.2414346648797834e-07, + "loss": 0.1274, + "num_input_tokens_seen": 209599808, + "step": 66585 + }, + { + "epoch": 4.262851289930222, + "grad_norm": 6.196502208709717, + "learning_rate": 3.2386841432005237e-07, + "loss": 0.1476, + "num_input_tokens_seen": 209615552, + "step": 66590 + }, + { + "epoch": 4.263171371871199, + "grad_norm": 9.240386009216309, + "learning_rate": 3.235934708168031e-07, + "loss": 0.1607, + "num_input_tokens_seen": 209631360, + "step": 66595 + }, + { + "epoch": 4.2634914538121755, + "grad_norm": 7.365181922912598, + "learning_rate": 3.233186359919588e-07, + "loss": 0.1514, + "num_input_tokens_seen": 209646272, + "step": 66600 + }, + { + "epoch": 4.2638115357531525, + "grad_norm": 5.901136875152588, + "learning_rate": 3.2304390985924473e-07, + "loss": 0.0887, + "num_input_tokens_seen": 209661376, + "step": 66605 + }, + { + "epoch": 4.2641316176941295, + "grad_norm": 2.7148983478546143, + "learning_rate": 3.2276929243237776e-07, + "loss": 0.1717, + "num_input_tokens_seen": 209678720, + "step": 66610 + }, + { + "epoch": 4.2644516996351065, + "grad_norm": 7.582194805145264, + "learning_rate": 3.224947837250722e-07, + "loss": 0.1424, + "num_input_tokens_seen": 209694272, + "step": 66615 + }, + { + "epoch": 4.2647717815760835, + "grad_norm": 6.380526065826416, + "learning_rate": 3.2222038375103404e-07, + "loss": 0.1312, + "num_input_tokens_seen": 209710016, + "step": 66620 + }, + { + "epoch": 4.2650918635170605, + "grad_norm": 7.57634973526001, + "learning_rate": 3.21946092523967e-07, + "loss": 0.1292, + "num_input_tokens_seen": 209725376, + "step": 66625 + }, + { + "epoch": 4.2654119454580375, + "grad_norm": 4.737862586975098, + "learning_rate": 3.216719100575669e-07, + "loss": 0.1617, + "num_input_tokens_seen": 209740160, + "step": 66630 + }, + { + "epoch": 4.2657320273990145, + "grad_norm": 8.538338661193848, + "learning_rate": 3.2139783636552483e-07, + "loss": 0.2161, + "num_input_tokens_seen": 209757248, + "step": 66635 + }, + { + "epoch": 4.2660521093399915, + "grad_norm": 1.434650182723999, + "learning_rate": 3.2112387146152726e-07, + "loss": 0.1192, + "num_input_tokens_seen": 209772416, + "step": 66640 + }, + { + "epoch": 4.266372191280968, + "grad_norm": 8.964582443237305, + "learning_rate": 3.2085001535925364e-07, + "loss": 0.1435, + "num_input_tokens_seen": 209788352, + "step": 66645 + }, + { + "epoch": 4.2666922732219446, + "grad_norm": 6.167220592498779, + "learning_rate": 3.205762680723798e-07, + "loss": 0.1802, + "num_input_tokens_seen": 209805248, + "step": 66650 + }, + { + "epoch": 4.2670123551629215, + "grad_norm": 5.003530025482178, + "learning_rate": 3.20302629614575e-07, + "loss": 0.1442, + "num_input_tokens_seen": 209821440, + "step": 66655 + }, + { + "epoch": 4.2673324371038985, + "grad_norm": 7.9082136154174805, + "learning_rate": 3.2002909999950343e-07, + "loss": 0.1423, + "num_input_tokens_seen": 209837312, + "step": 66660 + }, + { + "epoch": 4.2676525190448755, + "grad_norm": 5.093303680419922, + "learning_rate": 3.197556792408232e-07, + "loss": 0.1184, + "num_input_tokens_seen": 209854336, + "step": 66665 + }, + { + "epoch": 4.2679726009858525, + "grad_norm": 7.498791694641113, + "learning_rate": 3.1948236735218826e-07, + "loss": 0.117, + "num_input_tokens_seen": 209869120, + "step": 66670 + }, + { + "epoch": 4.2682926829268295, + "grad_norm": 6.9221272468566895, + "learning_rate": 3.1920916434724586e-07, + "loss": 0.1468, + "num_input_tokens_seen": 209885952, + "step": 66675 + }, + { + "epoch": 4.2686127648678065, + "grad_norm": 24.96245574951172, + "learning_rate": 3.1893607023963823e-07, + "loss": 0.1449, + "num_input_tokens_seen": 209900736, + "step": 66680 + }, + { + "epoch": 4.268932846808783, + "grad_norm": 9.417037963867188, + "learning_rate": 3.1866308504300226e-07, + "loss": 0.1056, + "num_input_tokens_seen": 209916544, + "step": 66685 + }, + { + "epoch": 4.26925292874976, + "grad_norm": 6.494400978088379, + "learning_rate": 3.183902087709706e-07, + "loss": 0.108, + "num_input_tokens_seen": 209932160, + "step": 66690 + }, + { + "epoch": 4.269573010690737, + "grad_norm": 4.382632255554199, + "learning_rate": 3.181174414371674e-07, + "loss": 0.1569, + "num_input_tokens_seen": 209948928, + "step": 66695 + }, + { + "epoch": 4.269893092631714, + "grad_norm": 10.431427955627441, + "learning_rate": 3.178447830552145e-07, + "loss": 0.1881, + "num_input_tokens_seen": 209963776, + "step": 66700 + }, + { + "epoch": 4.270213174572691, + "grad_norm": 6.268565654754639, + "learning_rate": 3.175722336387266e-07, + "loss": 0.1396, + "num_input_tokens_seen": 209979904, + "step": 66705 + }, + { + "epoch": 4.2705332565136676, + "grad_norm": 6.045476913452148, + "learning_rate": 3.172997932013139e-07, + "loss": 0.1545, + "num_input_tokens_seen": 209995136, + "step": 66710 + }, + { + "epoch": 4.2708533384546445, + "grad_norm": 11.175773620605469, + "learning_rate": 3.1702746175657997e-07, + "loss": 0.1718, + "num_input_tokens_seen": 210011072, + "step": 66715 + }, + { + "epoch": 4.2711734203956215, + "grad_norm": 4.499311923980713, + "learning_rate": 3.1675523931812337e-07, + "loss": 0.1476, + "num_input_tokens_seen": 210025856, + "step": 66720 + }, + { + "epoch": 4.2714935023365985, + "grad_norm": 5.267547130584717, + "learning_rate": 3.164831258995391e-07, + "loss": 0.0998, + "num_input_tokens_seen": 210042240, + "step": 66725 + }, + { + "epoch": 4.2718135842775755, + "grad_norm": 5.8771891593933105, + "learning_rate": 3.1621112151441315e-07, + "loss": 0.1297, + "num_input_tokens_seen": 210057664, + "step": 66730 + }, + { + "epoch": 4.272133666218552, + "grad_norm": 6.990921497344971, + "learning_rate": 3.159392261763292e-07, + "loss": 0.1096, + "num_input_tokens_seen": 210072384, + "step": 66735 + }, + { + "epoch": 4.272453748159529, + "grad_norm": 4.574536323547363, + "learning_rate": 3.156674398988638e-07, + "loss": 0.1216, + "num_input_tokens_seen": 210087488, + "step": 66740 + }, + { + "epoch": 4.272773830100506, + "grad_norm": 8.104863166809082, + "learning_rate": 3.153957626955889e-07, + "loss": 0.1317, + "num_input_tokens_seen": 210104512, + "step": 66745 + }, + { + "epoch": 4.273093912041483, + "grad_norm": 7.822525978088379, + "learning_rate": 3.151241945800704e-07, + "loss": 0.1093, + "num_input_tokens_seen": 210123136, + "step": 66750 + }, + { + "epoch": 4.27341399398246, + "grad_norm": 108.5492172241211, + "learning_rate": 3.148527355658684e-07, + "loss": 0.1306, + "num_input_tokens_seen": 210138624, + "step": 66755 + }, + { + "epoch": 4.273734075923437, + "grad_norm": 4.954047203063965, + "learning_rate": 3.1458138566653975e-07, + "loss": 0.1056, + "num_input_tokens_seen": 210154304, + "step": 66760 + }, + { + "epoch": 4.274054157864414, + "grad_norm": 5.636037826538086, + "learning_rate": 3.143101448956323e-07, + "loss": 0.1623, + "num_input_tokens_seen": 210169728, + "step": 66765 + }, + { + "epoch": 4.2743742398053906, + "grad_norm": 8.131101608276367, + "learning_rate": 3.140390132666921e-07, + "loss": 0.1264, + "num_input_tokens_seen": 210184704, + "step": 66770 + }, + { + "epoch": 4.274694321746367, + "grad_norm": 5.758734703063965, + "learning_rate": 3.1376799079325726e-07, + "loss": 0.0836, + "num_input_tokens_seen": 210200448, + "step": 66775 + }, + { + "epoch": 4.275014403687344, + "grad_norm": 5.461369037628174, + "learning_rate": 3.1349707748886097e-07, + "loss": 0.1466, + "num_input_tokens_seen": 210215360, + "step": 66780 + }, + { + "epoch": 4.275334485628321, + "grad_norm": 8.821362495422363, + "learning_rate": 3.1322627336703186e-07, + "loss": 0.1295, + "num_input_tokens_seen": 210231616, + "step": 66785 + }, + { + "epoch": 4.275654567569298, + "grad_norm": 5.673820972442627, + "learning_rate": 3.1295557844129217e-07, + "loss": 0.103, + "num_input_tokens_seen": 210248448, + "step": 66790 + }, + { + "epoch": 4.275974649510275, + "grad_norm": 9.322299003601074, + "learning_rate": 3.126849927251588e-07, + "loss": 0.138, + "num_input_tokens_seen": 210263040, + "step": 66795 + }, + { + "epoch": 4.276294731451252, + "grad_norm": 3.724174976348877, + "learning_rate": 3.124145162321437e-07, + "loss": 0.1231, + "num_input_tokens_seen": 210278464, + "step": 66800 + }, + { + "epoch": 4.276614813392229, + "grad_norm": 7.717945575714111, + "learning_rate": 3.1214414897575236e-07, + "loss": 0.1155, + "num_input_tokens_seen": 210294016, + "step": 66805 + }, + { + "epoch": 4.276934895333206, + "grad_norm": 5.631811618804932, + "learning_rate": 3.118738909694871e-07, + "loss": 0.1451, + "num_input_tokens_seen": 210310080, + "step": 66810 + }, + { + "epoch": 4.277254977274183, + "grad_norm": 6.68052339553833, + "learning_rate": 3.1160374222684144e-07, + "loss": 0.1678, + "num_input_tokens_seen": 210326144, + "step": 66815 + }, + { + "epoch": 4.277575059215159, + "grad_norm": 6.119938850402832, + "learning_rate": 3.113337027613061e-07, + "loss": 0.1058, + "num_input_tokens_seen": 210340544, + "step": 66820 + }, + { + "epoch": 4.277895141156136, + "grad_norm": 6.579647064208984, + "learning_rate": 3.110637725863655e-07, + "loss": 0.095, + "num_input_tokens_seen": 210357696, + "step": 66825 + }, + { + "epoch": 4.278215223097113, + "grad_norm": 4.266699314117432, + "learning_rate": 3.1079395171549787e-07, + "loss": 0.107, + "num_input_tokens_seen": 210372800, + "step": 66830 + }, + { + "epoch": 4.27853530503809, + "grad_norm": 0.5049014091491699, + "learning_rate": 3.1052424016217804e-07, + "loss": 0.1143, + "num_input_tokens_seen": 210388672, + "step": 66835 + }, + { + "epoch": 4.278855386979067, + "grad_norm": 6.453945636749268, + "learning_rate": 3.1025463793987215e-07, + "loss": 0.1163, + "num_input_tokens_seen": 210403136, + "step": 66840 + }, + { + "epoch": 4.279175468920044, + "grad_norm": 6.009002208709717, + "learning_rate": 3.0998514506204474e-07, + "loss": 0.1007, + "num_input_tokens_seen": 210419136, + "step": 66845 + }, + { + "epoch": 4.279495550861021, + "grad_norm": 7.426761627197266, + "learning_rate": 3.097157615421506e-07, + "loss": 0.1155, + "num_input_tokens_seen": 210435392, + "step": 66850 + }, + { + "epoch": 4.279815632801998, + "grad_norm": 6.449512004852295, + "learning_rate": 3.0944648739364313e-07, + "loss": 0.1402, + "num_input_tokens_seen": 210450240, + "step": 66855 + }, + { + "epoch": 4.280135714742975, + "grad_norm": 5.07066011428833, + "learning_rate": 3.09177322629968e-07, + "loss": 0.1489, + "num_input_tokens_seen": 210467072, + "step": 66860 + }, + { + "epoch": 4.280455796683951, + "grad_norm": 6.220962047576904, + "learning_rate": 3.0890826726456576e-07, + "loss": 0.1579, + "num_input_tokens_seen": 210482368, + "step": 66865 + }, + { + "epoch": 4.280775878624928, + "grad_norm": 6.2117438316345215, + "learning_rate": 3.0863932131087154e-07, + "loss": 0.1348, + "num_input_tokens_seen": 210500480, + "step": 66870 + }, + { + "epoch": 4.281095960565905, + "grad_norm": 7.6115403175354, + "learning_rate": 3.0837048478231493e-07, + "loss": 0.1665, + "num_input_tokens_seen": 210516672, + "step": 66875 + }, + { + "epoch": 4.281416042506882, + "grad_norm": 8.027716636657715, + "learning_rate": 3.0810175769232093e-07, + "loss": 0.1429, + "num_input_tokens_seen": 210532992, + "step": 66880 + }, + { + "epoch": 4.281736124447859, + "grad_norm": 5.716476917266846, + "learning_rate": 3.07833140054308e-07, + "loss": 0.1077, + "num_input_tokens_seen": 210548544, + "step": 66885 + }, + { + "epoch": 4.282056206388836, + "grad_norm": 5.805980205535889, + "learning_rate": 3.075646318816894e-07, + "loss": 0.1233, + "num_input_tokens_seen": 210563456, + "step": 66890 + }, + { + "epoch": 4.282376288329813, + "grad_norm": 8.963545799255371, + "learning_rate": 3.0729623318787344e-07, + "loss": 0.1286, + "num_input_tokens_seen": 210579136, + "step": 66895 + }, + { + "epoch": 4.28269637027079, + "grad_norm": 7.198855400085449, + "learning_rate": 3.070279439862617e-07, + "loss": 0.1611, + "num_input_tokens_seen": 210593664, + "step": 66900 + }, + { + "epoch": 4.283016452211767, + "grad_norm": 8.868745803833008, + "learning_rate": 3.06759764290252e-07, + "loss": 0.151, + "num_input_tokens_seen": 210609728, + "step": 66905 + }, + { + "epoch": 4.283336534152743, + "grad_norm": 5.86210823059082, + "learning_rate": 3.064916941132354e-07, + "loss": 0.1032, + "num_input_tokens_seen": 210625792, + "step": 66910 + }, + { + "epoch": 4.28365661609372, + "grad_norm": 7.6525421142578125, + "learning_rate": 3.0622373346859746e-07, + "loss": 0.0661, + "num_input_tokens_seen": 210641216, + "step": 66915 + }, + { + "epoch": 4.283976698034697, + "grad_norm": 10.274970054626465, + "learning_rate": 3.0595588236972063e-07, + "loss": 0.1357, + "num_input_tokens_seen": 210657088, + "step": 66920 + }, + { + "epoch": 4.284296779975674, + "grad_norm": 7.410429954528809, + "learning_rate": 3.056881408299772e-07, + "loss": 0.1477, + "num_input_tokens_seen": 210672128, + "step": 66925 + }, + { + "epoch": 4.284616861916651, + "grad_norm": 8.211162567138672, + "learning_rate": 3.0542050886273904e-07, + "loss": 0.1822, + "num_input_tokens_seen": 210687808, + "step": 66930 + }, + { + "epoch": 4.284936943857628, + "grad_norm": 10.301297187805176, + "learning_rate": 3.051529864813696e-07, + "loss": 0.1298, + "num_input_tokens_seen": 210703040, + "step": 66935 + }, + { + "epoch": 4.285257025798605, + "grad_norm": 6.136139869689941, + "learning_rate": 3.0488557369922735e-07, + "loss": 0.0893, + "num_input_tokens_seen": 210717824, + "step": 66940 + }, + { + "epoch": 4.285577107739582, + "grad_norm": 3.1239404678344727, + "learning_rate": 3.046182705296655e-07, + "loss": 0.1294, + "num_input_tokens_seen": 210732928, + "step": 66945 + }, + { + "epoch": 4.285897189680558, + "grad_norm": 4.959163188934326, + "learning_rate": 3.0435107698603167e-07, + "loss": 0.0836, + "num_input_tokens_seen": 210748864, + "step": 66950 + }, + { + "epoch": 4.286217271621535, + "grad_norm": 6.524843215942383, + "learning_rate": 3.0408399308166916e-07, + "loss": 0.1691, + "num_input_tokens_seen": 210765888, + "step": 66955 + }, + { + "epoch": 4.286537353562512, + "grad_norm": 53.33771896362305, + "learning_rate": 3.038170188299128e-07, + "loss": 0.1109, + "num_input_tokens_seen": 210781504, + "step": 66960 + }, + { + "epoch": 4.286857435503489, + "grad_norm": 5.832334041595459, + "learning_rate": 3.035501542440958e-07, + "loss": 0.1941, + "num_input_tokens_seen": 210799424, + "step": 66965 + }, + { + "epoch": 4.287177517444466, + "grad_norm": 7.791653156280518, + "learning_rate": 3.032833993375431e-07, + "loss": 0.1214, + "num_input_tokens_seen": 210815424, + "step": 66970 + }, + { + "epoch": 4.287497599385443, + "grad_norm": 11.424722671508789, + "learning_rate": 3.030167541235751e-07, + "loss": 0.1333, + "num_input_tokens_seen": 210830784, + "step": 66975 + }, + { + "epoch": 4.28781768132642, + "grad_norm": 6.427248954772949, + "learning_rate": 3.027502186155068e-07, + "loss": 0.1684, + "num_input_tokens_seen": 210846656, + "step": 66980 + }, + { + "epoch": 4.288137763267397, + "grad_norm": 8.33221435546875, + "learning_rate": 3.024837928266475e-07, + "loss": 0.1467, + "num_input_tokens_seen": 210861760, + "step": 66985 + }, + { + "epoch": 4.288457845208374, + "grad_norm": 5.108793258666992, + "learning_rate": 3.0221747677030094e-07, + "loss": 0.1134, + "num_input_tokens_seen": 210878784, + "step": 66990 + }, + { + "epoch": 4.288777927149351, + "grad_norm": 5.095913887023926, + "learning_rate": 3.019512704597655e-07, + "loss": 0.1247, + "num_input_tokens_seen": 210894336, + "step": 66995 + }, + { + "epoch": 4.289098009090327, + "grad_norm": 6.850958347320557, + "learning_rate": 3.0168517390833485e-07, + "loss": 0.0949, + "num_input_tokens_seen": 210910912, + "step": 67000 + }, + { + "epoch": 4.289418091031304, + "grad_norm": 6.00884485244751, + "learning_rate": 3.0141918712929595e-07, + "loss": 0.1361, + "num_input_tokens_seen": 210926144, + "step": 67005 + }, + { + "epoch": 4.289738172972281, + "grad_norm": 6.745336055755615, + "learning_rate": 3.0115331013593096e-07, + "loss": 0.1263, + "num_input_tokens_seen": 210942080, + "step": 67010 + }, + { + "epoch": 4.290058254913258, + "grad_norm": 11.379066467285156, + "learning_rate": 3.0088754294151616e-07, + "loss": 0.1491, + "num_input_tokens_seen": 210958848, + "step": 67015 + }, + { + "epoch": 4.290378336854235, + "grad_norm": 5.999561309814453, + "learning_rate": 3.006218855593229e-07, + "loss": 0.1034, + "num_input_tokens_seen": 210974336, + "step": 67020 + }, + { + "epoch": 4.290698418795212, + "grad_norm": 11.639169692993164, + "learning_rate": 3.003563380026159e-07, + "loss": 0.1211, + "num_input_tokens_seen": 210992256, + "step": 67025 + }, + { + "epoch": 4.291018500736189, + "grad_norm": 8.939642906188965, + "learning_rate": 3.0009090028465693e-07, + "loss": 0.1435, + "num_input_tokens_seen": 211007424, + "step": 67030 + }, + { + "epoch": 4.291338582677166, + "grad_norm": 6.47451114654541, + "learning_rate": 2.9982557241869827e-07, + "loss": 0.117, + "num_input_tokens_seen": 211024640, + "step": 67035 + }, + { + "epoch": 4.291658664618142, + "grad_norm": 5.791005611419678, + "learning_rate": 2.9956035441799147e-07, + "loss": 0.099, + "num_input_tokens_seen": 211040512, + "step": 67040 + }, + { + "epoch": 4.291978746559119, + "grad_norm": 7.88689661026001, + "learning_rate": 2.992952462957779e-07, + "loss": 0.124, + "num_input_tokens_seen": 211056704, + "step": 67045 + }, + { + "epoch": 4.292298828500096, + "grad_norm": 5.582493305206299, + "learning_rate": 2.990302480652971e-07, + "loss": 0.1007, + "num_input_tokens_seen": 211072896, + "step": 67050 + }, + { + "epoch": 4.292618910441073, + "grad_norm": 17.961380004882812, + "learning_rate": 2.9876535973978134e-07, + "loss": 0.1816, + "num_input_tokens_seen": 211088128, + "step": 67055 + }, + { + "epoch": 4.29293899238205, + "grad_norm": 8.106684684753418, + "learning_rate": 2.9850058133245707e-07, + "loss": 0.128, + "num_input_tokens_seen": 211104256, + "step": 67060 + }, + { + "epoch": 4.293259074323027, + "grad_norm": 8.642922401428223, + "learning_rate": 2.982359128565476e-07, + "loss": 0.1425, + "num_input_tokens_seen": 211118912, + "step": 67065 + }, + { + "epoch": 4.293579156264004, + "grad_norm": 5.707774639129639, + "learning_rate": 2.979713543252671e-07, + "loss": 0.1409, + "num_input_tokens_seen": 211133888, + "step": 67070 + }, + { + "epoch": 4.293899238204981, + "grad_norm": 8.161005020141602, + "learning_rate": 2.977069057518284e-07, + "loss": 0.1371, + "num_input_tokens_seen": 211150464, + "step": 67075 + }, + { + "epoch": 4.294219320145958, + "grad_norm": 3.671234607696533, + "learning_rate": 2.9744256714943407e-07, + "loss": 0.1069, + "num_input_tokens_seen": 211165120, + "step": 67080 + }, + { + "epoch": 4.294539402086934, + "grad_norm": 6.142932891845703, + "learning_rate": 2.97178338531286e-07, + "loss": 0.1297, + "num_input_tokens_seen": 211181056, + "step": 67085 + }, + { + "epoch": 4.294859484027911, + "grad_norm": 5.813260078430176, + "learning_rate": 2.969142199105776e-07, + "loss": 0.1655, + "num_input_tokens_seen": 211197120, + "step": 67090 + }, + { + "epoch": 4.295179565968888, + "grad_norm": 12.69554615020752, + "learning_rate": 2.9665021130049767e-07, + "loss": 0.1848, + "num_input_tokens_seen": 211212032, + "step": 67095 + }, + { + "epoch": 4.295499647909865, + "grad_norm": 6.29742431640625, + "learning_rate": 2.963863127142294e-07, + "loss": 0.139, + "num_input_tokens_seen": 211226624, + "step": 67100 + }, + { + "epoch": 4.295819729850842, + "grad_norm": 5.86332893371582, + "learning_rate": 2.9612252416494985e-07, + "loss": 0.1133, + "num_input_tokens_seen": 211240896, + "step": 67105 + }, + { + "epoch": 4.296139811791819, + "grad_norm": 13.355570793151855, + "learning_rate": 2.958588456658323e-07, + "loss": 0.1605, + "num_input_tokens_seen": 211256192, + "step": 67110 + }, + { + "epoch": 4.296459893732796, + "grad_norm": 5.682133674621582, + "learning_rate": 2.955952772300433e-07, + "loss": 0.1421, + "num_input_tokens_seen": 211272512, + "step": 67115 + }, + { + "epoch": 4.296779975673773, + "grad_norm": 8.607494354248047, + "learning_rate": 2.9533181887074353e-07, + "loss": 0.1326, + "num_input_tokens_seen": 211287744, + "step": 67120 + }, + { + "epoch": 4.29710005761475, + "grad_norm": 6.788436412811279, + "learning_rate": 2.950684706010892e-07, + "loss": 0.1269, + "num_input_tokens_seen": 211302528, + "step": 67125 + }, + { + "epoch": 4.297420139555726, + "grad_norm": 7.449051856994629, + "learning_rate": 2.9480523243423043e-07, + "loss": 0.119, + "num_input_tokens_seen": 211318208, + "step": 67130 + }, + { + "epoch": 4.297740221496703, + "grad_norm": 5.326625347137451, + "learning_rate": 2.945421043833119e-07, + "loss": 0.0983, + "num_input_tokens_seen": 211333120, + "step": 67135 + }, + { + "epoch": 4.29806030343768, + "grad_norm": 4.422914505004883, + "learning_rate": 2.942790864614728e-07, + "loss": 0.1105, + "num_input_tokens_seen": 211348992, + "step": 67140 + }, + { + "epoch": 4.298380385378657, + "grad_norm": 9.872152328491211, + "learning_rate": 2.9401617868184626e-07, + "loss": 0.1588, + "num_input_tokens_seen": 211364992, + "step": 67145 + }, + { + "epoch": 4.298700467319634, + "grad_norm": 5.399128437042236, + "learning_rate": 2.9375338105756243e-07, + "loss": 0.103, + "num_input_tokens_seen": 211380224, + "step": 67150 + }, + { + "epoch": 4.299020549260611, + "grad_norm": 5.185306072235107, + "learning_rate": 2.934906936017418e-07, + "loss": 0.1047, + "num_input_tokens_seen": 211396416, + "step": 67155 + }, + { + "epoch": 4.299340631201588, + "grad_norm": 4.952142238616943, + "learning_rate": 2.932281163275033e-07, + "loss": 0.075, + "num_input_tokens_seen": 211411520, + "step": 67160 + }, + { + "epoch": 4.299660713142565, + "grad_norm": 7.231166362762451, + "learning_rate": 2.929656492479577e-07, + "loss": 0.1115, + "num_input_tokens_seen": 211427136, + "step": 67165 + }, + { + "epoch": 4.299980795083542, + "grad_norm": 6.0913238525390625, + "learning_rate": 2.927032923762116e-07, + "loss": 0.1317, + "num_input_tokens_seen": 211442304, + "step": 67170 + }, + { + "epoch": 4.300300877024518, + "grad_norm": 5.656469821929932, + "learning_rate": 2.9244104572536595e-07, + "loss": 0.127, + "num_input_tokens_seen": 211457344, + "step": 67175 + }, + { + "epoch": 4.300620958965495, + "grad_norm": 4.636542797088623, + "learning_rate": 2.9217890930851505e-07, + "loss": 0.1585, + "num_input_tokens_seen": 211473472, + "step": 67180 + }, + { + "epoch": 4.300941040906472, + "grad_norm": 3.5144150257110596, + "learning_rate": 2.9191688313875035e-07, + "loss": 0.1279, + "num_input_tokens_seen": 211489536, + "step": 67185 + }, + { + "epoch": 4.301261122847449, + "grad_norm": 6.6227707862854, + "learning_rate": 2.916549672291538e-07, + "loss": 0.1289, + "num_input_tokens_seen": 211505216, + "step": 67190 + }, + { + "epoch": 4.301581204788426, + "grad_norm": 9.606805801391602, + "learning_rate": 2.9139316159280617e-07, + "loss": 0.1253, + "num_input_tokens_seen": 211522816, + "step": 67195 + }, + { + "epoch": 4.301901286729403, + "grad_norm": 6.208489418029785, + "learning_rate": 2.911314662427797e-07, + "loss": 0.119, + "num_input_tokens_seen": 211537856, + "step": 67200 + }, + { + "epoch": 4.30222136867038, + "grad_norm": 5.672071933746338, + "learning_rate": 2.9086988119214207e-07, + "loss": 0.1219, + "num_input_tokens_seen": 211553664, + "step": 67205 + }, + { + "epoch": 4.302541450611357, + "grad_norm": 6.029555320739746, + "learning_rate": 2.906084064539558e-07, + "loss": 0.1333, + "num_input_tokens_seen": 211570880, + "step": 67210 + }, + { + "epoch": 4.302861532552333, + "grad_norm": 3.60514497756958, + "learning_rate": 2.903470420412774e-07, + "loss": 0.1191, + "num_input_tokens_seen": 211586880, + "step": 67215 + }, + { + "epoch": 4.30318161449331, + "grad_norm": 13.902756690979004, + "learning_rate": 2.9008578796715814e-07, + "loss": 0.1304, + "num_input_tokens_seen": 211602752, + "step": 67220 + }, + { + "epoch": 4.303501696434287, + "grad_norm": 7.556607723236084, + "learning_rate": 2.8982464424464286e-07, + "loss": 0.1196, + "num_input_tokens_seen": 211617984, + "step": 67225 + }, + { + "epoch": 4.303821778375264, + "grad_norm": 7.925574779510498, + "learning_rate": 2.895636108867733e-07, + "loss": 0.1474, + "num_input_tokens_seen": 211632960, + "step": 67230 + }, + { + "epoch": 4.304141860316241, + "grad_norm": 6.103649616241455, + "learning_rate": 2.89302687906583e-07, + "loss": 0.1015, + "num_input_tokens_seen": 211648704, + "step": 67235 + }, + { + "epoch": 4.304461942257218, + "grad_norm": 7.362614154815674, + "learning_rate": 2.8904187531710147e-07, + "loss": 0.1068, + "num_input_tokens_seen": 211663296, + "step": 67240 + }, + { + "epoch": 4.304782024198195, + "grad_norm": 5.716064453125, + "learning_rate": 2.8878117313135225e-07, + "loss": 0.1566, + "num_input_tokens_seen": 211679744, + "step": 67245 + }, + { + "epoch": 4.305102106139172, + "grad_norm": 9.922686576843262, + "learning_rate": 2.885205813623534e-07, + "loss": 0.1574, + "num_input_tokens_seen": 211695296, + "step": 67250 + }, + { + "epoch": 4.305422188080149, + "grad_norm": 0.9836015105247498, + "learning_rate": 2.8826010002311697e-07, + "loss": 0.1124, + "num_input_tokens_seen": 211711616, + "step": 67255 + }, + { + "epoch": 4.305742270021126, + "grad_norm": 4.177858829498291, + "learning_rate": 2.8799972912665176e-07, + "loss": 0.1695, + "num_input_tokens_seen": 211727232, + "step": 67260 + }, + { + "epoch": 4.306062351962102, + "grad_norm": 6.99788761138916, + "learning_rate": 2.877394686859569e-07, + "loss": 0.1305, + "num_input_tokens_seen": 211742016, + "step": 67265 + }, + { + "epoch": 4.306382433903079, + "grad_norm": 8.497699737548828, + "learning_rate": 2.87479318714031e-07, + "loss": 0.1308, + "num_input_tokens_seen": 211756544, + "step": 67270 + }, + { + "epoch": 4.306702515844056, + "grad_norm": 6.454923629760742, + "learning_rate": 2.872192792238623e-07, + "loss": 0.1009, + "num_input_tokens_seen": 211771968, + "step": 67275 + }, + { + "epoch": 4.307022597785033, + "grad_norm": 5.330953598022461, + "learning_rate": 2.8695935022843737e-07, + "loss": 0.1001, + "num_input_tokens_seen": 211787712, + "step": 67280 + }, + { + "epoch": 4.30734267972601, + "grad_norm": 4.262053489685059, + "learning_rate": 2.8669953174073506e-07, + "loss": 0.0968, + "num_input_tokens_seen": 211803712, + "step": 67285 + }, + { + "epoch": 4.307662761666987, + "grad_norm": 7.673128128051758, + "learning_rate": 2.86439823773729e-07, + "loss": 0.1407, + "num_input_tokens_seen": 211819200, + "step": 67290 + }, + { + "epoch": 4.307982843607964, + "grad_norm": 2.246711492538452, + "learning_rate": 2.86180226340389e-07, + "loss": 0.0711, + "num_input_tokens_seen": 211835840, + "step": 67295 + }, + { + "epoch": 4.308302925548941, + "grad_norm": 3.3215720653533936, + "learning_rate": 2.8592073945367645e-07, + "loss": 0.1331, + "num_input_tokens_seen": 211851264, + "step": 67300 + }, + { + "epoch": 4.308623007489917, + "grad_norm": 3.490577220916748, + "learning_rate": 2.8566136312655006e-07, + "loss": 0.0881, + "num_input_tokens_seen": 211866688, + "step": 67305 + }, + { + "epoch": 4.308943089430894, + "grad_norm": 4.398473739624023, + "learning_rate": 2.854020973719604e-07, + "loss": 0.0945, + "num_input_tokens_seen": 211881984, + "step": 67310 + }, + { + "epoch": 4.309263171371871, + "grad_norm": 6.55675745010376, + "learning_rate": 2.851429422028551e-07, + "loss": 0.1264, + "num_input_tokens_seen": 211897792, + "step": 67315 + }, + { + "epoch": 4.309583253312848, + "grad_norm": 8.676041603088379, + "learning_rate": 2.848838976321744e-07, + "loss": 0.1337, + "num_input_tokens_seen": 211913792, + "step": 67320 + }, + { + "epoch": 4.309903335253825, + "grad_norm": 9.850770950317383, + "learning_rate": 2.8462496367285377e-07, + "loss": 0.1773, + "num_input_tokens_seen": 211930880, + "step": 67325 + }, + { + "epoch": 4.310223417194802, + "grad_norm": 5.028923511505127, + "learning_rate": 2.8436614033782286e-07, + "loss": 0.1356, + "num_input_tokens_seen": 211946368, + "step": 67330 + }, + { + "epoch": 4.310543499135779, + "grad_norm": 6.654808521270752, + "learning_rate": 2.841074276400063e-07, + "loss": 0.1384, + "num_input_tokens_seen": 211962240, + "step": 67335 + }, + { + "epoch": 4.310863581076756, + "grad_norm": 5.870004653930664, + "learning_rate": 2.838488255923219e-07, + "loss": 0.1178, + "num_input_tokens_seen": 211978624, + "step": 67340 + }, + { + "epoch": 4.311183663017733, + "grad_norm": 4.362114906311035, + "learning_rate": 2.835903342076843e-07, + "loss": 0.0958, + "num_input_tokens_seen": 211993792, + "step": 67345 + }, + { + "epoch": 4.311503744958709, + "grad_norm": 6.917397499084473, + "learning_rate": 2.8333195349900064e-07, + "loss": 0.1116, + "num_input_tokens_seen": 212009856, + "step": 67350 + }, + { + "epoch": 4.311823826899686, + "grad_norm": 9.362693786621094, + "learning_rate": 2.830736834791728e-07, + "loss": 0.1179, + "num_input_tokens_seen": 212025216, + "step": 67355 + }, + { + "epoch": 4.312143908840663, + "grad_norm": 6.550846576690674, + "learning_rate": 2.8281552416109773e-07, + "loss": 0.0726, + "num_input_tokens_seen": 212041920, + "step": 67360 + }, + { + "epoch": 4.31246399078164, + "grad_norm": 5.951886177062988, + "learning_rate": 2.8255747555766654e-07, + "loss": 0.1373, + "num_input_tokens_seen": 212058432, + "step": 67365 + }, + { + "epoch": 4.312784072722617, + "grad_norm": 5.125927925109863, + "learning_rate": 2.822995376817647e-07, + "loss": 0.1017, + "num_input_tokens_seen": 212073216, + "step": 67370 + }, + { + "epoch": 4.313104154663594, + "grad_norm": 13.803727149963379, + "learning_rate": 2.8204171054627194e-07, + "loss": 0.1448, + "num_input_tokens_seen": 212089792, + "step": 67375 + }, + { + "epoch": 4.313424236604571, + "grad_norm": 6.15095853805542, + "learning_rate": 2.817839941640643e-07, + "loss": 0.1223, + "num_input_tokens_seen": 212105984, + "step": 67380 + }, + { + "epoch": 4.313744318545548, + "grad_norm": 5.450864791870117, + "learning_rate": 2.815263885480088e-07, + "loss": 0.1467, + "num_input_tokens_seen": 212120640, + "step": 67385 + }, + { + "epoch": 4.314064400486525, + "grad_norm": 4.077380657196045, + "learning_rate": 2.8126889371097006e-07, + "loss": 0.1213, + "num_input_tokens_seen": 212136064, + "step": 67390 + }, + { + "epoch": 4.314384482427501, + "grad_norm": 4.29656457901001, + "learning_rate": 2.810115096658059e-07, + "loss": 0.1132, + "num_input_tokens_seen": 212151488, + "step": 67395 + }, + { + "epoch": 4.314704564368478, + "grad_norm": 4.1266188621521, + "learning_rate": 2.8075423642536876e-07, + "loss": 0.1291, + "num_input_tokens_seen": 212166016, + "step": 67400 + }, + { + "epoch": 4.315024646309455, + "grad_norm": 8.97269058227539, + "learning_rate": 2.8049707400250534e-07, + "loss": 0.1273, + "num_input_tokens_seen": 212180928, + "step": 67405 + }, + { + "epoch": 4.315344728250432, + "grad_norm": 5.567686557769775, + "learning_rate": 2.8024002241005654e-07, + "loss": 0.1066, + "num_input_tokens_seen": 212196352, + "step": 67410 + }, + { + "epoch": 4.315664810191409, + "grad_norm": 6.652954578399658, + "learning_rate": 2.799830816608598e-07, + "loss": 0.213, + "num_input_tokens_seen": 212212480, + "step": 67415 + }, + { + "epoch": 4.315984892132386, + "grad_norm": 10.912616729736328, + "learning_rate": 2.797262517677435e-07, + "loss": 0.1763, + "num_input_tokens_seen": 212227072, + "step": 67420 + }, + { + "epoch": 4.316304974073363, + "grad_norm": 5.796988010406494, + "learning_rate": 2.7946953274353346e-07, + "loss": 0.1169, + "num_input_tokens_seen": 212242176, + "step": 67425 + }, + { + "epoch": 4.31662505601434, + "grad_norm": 8.975943565368652, + "learning_rate": 2.792129246010486e-07, + "loss": 0.1239, + "num_input_tokens_seen": 212257408, + "step": 67430 + }, + { + "epoch": 4.316945137955317, + "grad_norm": 7.312676429748535, + "learning_rate": 2.7895642735310285e-07, + "loss": 0.1388, + "num_input_tokens_seen": 212273856, + "step": 67435 + }, + { + "epoch": 4.317265219896293, + "grad_norm": 7.211702346801758, + "learning_rate": 2.78700041012504e-07, + "loss": 0.1176, + "num_input_tokens_seen": 212291456, + "step": 67440 + }, + { + "epoch": 4.31758530183727, + "grad_norm": 8.195271492004395, + "learning_rate": 2.7844376559205464e-07, + "loss": 0.11, + "num_input_tokens_seen": 212306688, + "step": 67445 + }, + { + "epoch": 4.317905383778247, + "grad_norm": 7.65985631942749, + "learning_rate": 2.78187601104552e-07, + "loss": 0.1237, + "num_input_tokens_seen": 212322944, + "step": 67450 + }, + { + "epoch": 4.318225465719224, + "grad_norm": 8.059859275817871, + "learning_rate": 2.779315475627872e-07, + "loss": 0.1884, + "num_input_tokens_seen": 212338688, + "step": 67455 + }, + { + "epoch": 4.318545547660201, + "grad_norm": 7.819667816162109, + "learning_rate": 2.7767560497954704e-07, + "loss": 0.1658, + "num_input_tokens_seen": 212355392, + "step": 67460 + }, + { + "epoch": 4.318865629601178, + "grad_norm": 9.632128715515137, + "learning_rate": 2.774197733676115e-07, + "loss": 0.1622, + "num_input_tokens_seen": 212372032, + "step": 67465 + }, + { + "epoch": 4.319185711542155, + "grad_norm": 5.777871608734131, + "learning_rate": 2.771640527397554e-07, + "loss": 0.1957, + "num_input_tokens_seen": 212388288, + "step": 67470 + }, + { + "epoch": 4.319505793483132, + "grad_norm": 7.3773884773254395, + "learning_rate": 2.7690844310874847e-07, + "loss": 0.1136, + "num_input_tokens_seen": 212402944, + "step": 67475 + }, + { + "epoch": 4.319825875424108, + "grad_norm": 22.04775047302246, + "learning_rate": 2.766529444873539e-07, + "loss": 0.2043, + "num_input_tokens_seen": 212419776, + "step": 67480 + }, + { + "epoch": 4.320145957365085, + "grad_norm": 7.659334659576416, + "learning_rate": 2.763975568883298e-07, + "loss": 0.1526, + "num_input_tokens_seen": 212434688, + "step": 67485 + }, + { + "epoch": 4.320466039306062, + "grad_norm": 5.140069484710693, + "learning_rate": 2.761422803244307e-07, + "loss": 0.0931, + "num_input_tokens_seen": 212451072, + "step": 67490 + }, + { + "epoch": 4.320786121247039, + "grad_norm": 7.423378944396973, + "learning_rate": 2.758871148084011e-07, + "loss": 0.1119, + "num_input_tokens_seen": 212466432, + "step": 67495 + }, + { + "epoch": 4.321106203188016, + "grad_norm": 8.482197761535645, + "learning_rate": 2.7563206035298525e-07, + "loss": 0.1451, + "num_input_tokens_seen": 212481472, + "step": 67500 + }, + { + "epoch": 4.321426285128993, + "grad_norm": 5.493631362915039, + "learning_rate": 2.7537711697091685e-07, + "loss": 0.0936, + "num_input_tokens_seen": 212496576, + "step": 67505 + }, + { + "epoch": 4.32174636706997, + "grad_norm": 6.344107151031494, + "learning_rate": 2.7512228467492826e-07, + "loss": 0.1878, + "num_input_tokens_seen": 212511808, + "step": 67510 + }, + { + "epoch": 4.322066449010947, + "grad_norm": 6.160593032836914, + "learning_rate": 2.748675634777434e-07, + "loss": 0.113, + "num_input_tokens_seen": 212526848, + "step": 67515 + }, + { + "epoch": 4.322386530951924, + "grad_norm": 2.4155402183532715, + "learning_rate": 2.7461295339208214e-07, + "loss": 0.0943, + "num_input_tokens_seen": 212542272, + "step": 67520 + }, + { + "epoch": 4.322706612892901, + "grad_norm": 6.078248500823975, + "learning_rate": 2.7435845443065906e-07, + "loss": 0.1284, + "num_input_tokens_seen": 212558400, + "step": 67525 + }, + { + "epoch": 4.323026694833877, + "grad_norm": 7.901853561401367, + "learning_rate": 2.741040666061809e-07, + "loss": 0.1714, + "num_input_tokens_seen": 212574144, + "step": 67530 + }, + { + "epoch": 4.323346776774854, + "grad_norm": 4.046666622161865, + "learning_rate": 2.738497899313525e-07, + "loss": 0.1202, + "num_input_tokens_seen": 212589888, + "step": 67535 + }, + { + "epoch": 4.323666858715831, + "grad_norm": 6.523605823516846, + "learning_rate": 2.7359562441886867e-07, + "loss": 0.1282, + "num_input_tokens_seen": 212605696, + "step": 67540 + }, + { + "epoch": 4.323986940656808, + "grad_norm": 4.778154373168945, + "learning_rate": 2.7334157008142323e-07, + "loss": 0.0927, + "num_input_tokens_seen": 212621248, + "step": 67545 + }, + { + "epoch": 4.324307022597785, + "grad_norm": 10.331209182739258, + "learning_rate": 2.730876269317015e-07, + "loss": 0.1703, + "num_input_tokens_seen": 212638784, + "step": 67550 + }, + { + "epoch": 4.324627104538762, + "grad_norm": 7.51694393157959, + "learning_rate": 2.728337949823842e-07, + "loss": 0.164, + "num_input_tokens_seen": 212653888, + "step": 67555 + }, + { + "epoch": 4.324947186479739, + "grad_norm": 6.6786346435546875, + "learning_rate": 2.7258007424614595e-07, + "loss": 0.0987, + "num_input_tokens_seen": 212669504, + "step": 67560 + }, + { + "epoch": 4.325267268420716, + "grad_norm": 3.85239315032959, + "learning_rate": 2.723264647356569e-07, + "loss": 0.118, + "num_input_tokens_seen": 212684864, + "step": 67565 + }, + { + "epoch": 4.325587350361692, + "grad_norm": 8.9933500289917, + "learning_rate": 2.7207296646358017e-07, + "loss": 0.117, + "num_input_tokens_seen": 212701632, + "step": 67570 + }, + { + "epoch": 4.325907432302669, + "grad_norm": 7.096656322479248, + "learning_rate": 2.7181957944257515e-07, + "loss": 0.1121, + "num_input_tokens_seen": 212718784, + "step": 67575 + }, + { + "epoch": 4.326227514243646, + "grad_norm": 8.534339904785156, + "learning_rate": 2.715663036852939e-07, + "loss": 0.1139, + "num_input_tokens_seen": 212735296, + "step": 67580 + }, + { + "epoch": 4.326547596184623, + "grad_norm": 21.554492950439453, + "learning_rate": 2.7131313920438414e-07, + "loss": 0.1374, + "num_input_tokens_seen": 212750848, + "step": 67585 + }, + { + "epoch": 4.3268676781256, + "grad_norm": 7.330188751220703, + "learning_rate": 2.7106008601248737e-07, + "loss": 0.1773, + "num_input_tokens_seen": 212766400, + "step": 67590 + }, + { + "epoch": 4.327187760066577, + "grad_norm": 7.5272955894470215, + "learning_rate": 2.7080714412223987e-07, + "loss": 0.1624, + "num_input_tokens_seen": 212781504, + "step": 67595 + }, + { + "epoch": 4.327507842007554, + "grad_norm": 4.433496952056885, + "learning_rate": 2.705543135462721e-07, + "loss": 0.1569, + "num_input_tokens_seen": 212798784, + "step": 67600 + }, + { + "epoch": 4.327827923948531, + "grad_norm": 5.415318012237549, + "learning_rate": 2.703015942972087e-07, + "loss": 0.1138, + "num_input_tokens_seen": 212816384, + "step": 67605 + }, + { + "epoch": 4.328148005889508, + "grad_norm": 6.442133903503418, + "learning_rate": 2.700489863876704e-07, + "loss": 0.1259, + "num_input_tokens_seen": 212831808, + "step": 67610 + }, + { + "epoch": 4.328468087830484, + "grad_norm": 20.394981384277344, + "learning_rate": 2.697964898302696e-07, + "loss": 0.1065, + "num_input_tokens_seen": 212847360, + "step": 67615 + }, + { + "epoch": 4.328788169771461, + "grad_norm": 7.262779712677002, + "learning_rate": 2.695441046376157e-07, + "loss": 0.1117, + "num_input_tokens_seen": 212862976, + "step": 67620 + }, + { + "epoch": 4.329108251712438, + "grad_norm": 5.231938362121582, + "learning_rate": 2.6929183082231136e-07, + "loss": 0.1428, + "num_input_tokens_seen": 212877504, + "step": 67625 + }, + { + "epoch": 4.329428333653415, + "grad_norm": 5.238612174987793, + "learning_rate": 2.6903966839695346e-07, + "loss": 0.1325, + "num_input_tokens_seen": 212892544, + "step": 67630 + }, + { + "epoch": 4.329748415594392, + "grad_norm": 7.0665411949157715, + "learning_rate": 2.6878761737413413e-07, + "loss": 0.119, + "num_input_tokens_seen": 212907968, + "step": 67635 + }, + { + "epoch": 4.330068497535369, + "grad_norm": 5.273152828216553, + "learning_rate": 2.685356777664386e-07, + "loss": 0.1283, + "num_input_tokens_seen": 212923840, + "step": 67640 + }, + { + "epoch": 4.330388579476346, + "grad_norm": 10.81301498413086, + "learning_rate": 2.68283849586449e-07, + "loss": 0.1442, + "num_input_tokens_seen": 212939200, + "step": 67645 + }, + { + "epoch": 4.330708661417323, + "grad_norm": 5.540247440338135, + "learning_rate": 2.6803213284673834e-07, + "loss": 0.1297, + "num_input_tokens_seen": 212955008, + "step": 67650 + }, + { + "epoch": 4.3310287433583, + "grad_norm": 7.0324273109436035, + "learning_rate": 2.6778052755987743e-07, + "loss": 0.127, + "num_input_tokens_seen": 212970752, + "step": 67655 + }, + { + "epoch": 4.331348825299276, + "grad_norm": 6.102474689483643, + "learning_rate": 2.675290337384301e-07, + "loss": 0.1288, + "num_input_tokens_seen": 212986048, + "step": 67660 + }, + { + "epoch": 4.331668907240253, + "grad_norm": 6.009413242340088, + "learning_rate": 2.6727765139495404e-07, + "loss": 0.1173, + "num_input_tokens_seen": 213001728, + "step": 67665 + }, + { + "epoch": 4.33198898918123, + "grad_norm": 6.38776969909668, + "learning_rate": 2.670263805420026e-07, + "loss": 0.1382, + "num_input_tokens_seen": 213018624, + "step": 67670 + }, + { + "epoch": 4.332309071122207, + "grad_norm": 5.682307720184326, + "learning_rate": 2.667752211921221e-07, + "loss": 0.1322, + "num_input_tokens_seen": 213034112, + "step": 67675 + }, + { + "epoch": 4.332629153063184, + "grad_norm": 5.733408451080322, + "learning_rate": 2.66524173357855e-07, + "loss": 0.1283, + "num_input_tokens_seen": 213049024, + "step": 67680 + }, + { + "epoch": 4.332949235004161, + "grad_norm": 6.547173976898193, + "learning_rate": 2.662732370517368e-07, + "loss": 0.1605, + "num_input_tokens_seen": 213064640, + "step": 67685 + }, + { + "epoch": 4.333269316945138, + "grad_norm": 8.823935508728027, + "learning_rate": 2.6602241228629757e-07, + "loss": 0.1374, + "num_input_tokens_seen": 213079040, + "step": 67690 + }, + { + "epoch": 4.333589398886115, + "grad_norm": 6.775853157043457, + "learning_rate": 2.65771699074063e-07, + "loss": 0.1308, + "num_input_tokens_seen": 213094336, + "step": 67695 + }, + { + "epoch": 4.333909480827092, + "grad_norm": 8.234018325805664, + "learning_rate": 2.655210974275524e-07, + "loss": 0.1318, + "num_input_tokens_seen": 213110592, + "step": 67700 + }, + { + "epoch": 4.334229562768068, + "grad_norm": 38.329833984375, + "learning_rate": 2.652706073592792e-07, + "loss": 0.1614, + "num_input_tokens_seen": 213126336, + "step": 67705 + }, + { + "epoch": 4.334549644709045, + "grad_norm": 7.013932228088379, + "learning_rate": 2.650202288817516e-07, + "loss": 0.1108, + "num_input_tokens_seen": 213141760, + "step": 67710 + }, + { + "epoch": 4.334869726650022, + "grad_norm": 7.217042446136475, + "learning_rate": 2.6476996200747144e-07, + "loss": 0.1333, + "num_input_tokens_seen": 213156736, + "step": 67715 + }, + { + "epoch": 4.335189808590999, + "grad_norm": 4.290158271789551, + "learning_rate": 2.6451980674893766e-07, + "loss": 0.1304, + "num_input_tokens_seen": 213171712, + "step": 67720 + }, + { + "epoch": 4.335509890531976, + "grad_norm": 4.44180965423584, + "learning_rate": 2.6426976311863947e-07, + "loss": 0.1565, + "num_input_tokens_seen": 213187200, + "step": 67725 + }, + { + "epoch": 4.335829972472953, + "grad_norm": 4.9849534034729, + "learning_rate": 2.640198311290648e-07, + "loss": 0.1332, + "num_input_tokens_seen": 213202112, + "step": 67730 + }, + { + "epoch": 4.33615005441393, + "grad_norm": 4.176053047180176, + "learning_rate": 2.6377001079269215e-07, + "loss": 0.1567, + "num_input_tokens_seen": 213217856, + "step": 67735 + }, + { + "epoch": 4.336470136354907, + "grad_norm": 8.501351356506348, + "learning_rate": 2.635203021219976e-07, + "loss": 0.1348, + "num_input_tokens_seen": 213232256, + "step": 67740 + }, + { + "epoch": 4.336790218295883, + "grad_norm": 8.839070320129395, + "learning_rate": 2.6327070512944947e-07, + "loss": 0.1511, + "num_input_tokens_seen": 213247488, + "step": 67745 + }, + { + "epoch": 4.33711030023686, + "grad_norm": 6.035251140594482, + "learning_rate": 2.6302121982751144e-07, + "loss": 0.1696, + "num_input_tokens_seen": 213262272, + "step": 67750 + }, + { + "epoch": 4.337430382177837, + "grad_norm": 3.7447519302368164, + "learning_rate": 2.6277184622864245e-07, + "loss": 0.1443, + "num_input_tokens_seen": 213278528, + "step": 67755 + }, + { + "epoch": 4.337750464118814, + "grad_norm": 5.01539945602417, + "learning_rate": 2.625225843452933e-07, + "loss": 0.1079, + "num_input_tokens_seen": 213294208, + "step": 67760 + }, + { + "epoch": 4.338070546059791, + "grad_norm": 7.325087547302246, + "learning_rate": 2.6227343418991246e-07, + "loss": 0.1091, + "num_input_tokens_seen": 213309440, + "step": 67765 + }, + { + "epoch": 4.338390628000768, + "grad_norm": 6.623265743255615, + "learning_rate": 2.6202439577493966e-07, + "loss": 0.1213, + "num_input_tokens_seen": 213326464, + "step": 67770 + }, + { + "epoch": 4.338710709941745, + "grad_norm": 8.35936450958252, + "learning_rate": 2.6177546911281185e-07, + "loss": 0.1333, + "num_input_tokens_seen": 213342336, + "step": 67775 + }, + { + "epoch": 4.339030791882722, + "grad_norm": 10.889266967773438, + "learning_rate": 2.6152665421595865e-07, + "loss": 0.2306, + "num_input_tokens_seen": 213358080, + "step": 67780 + }, + { + "epoch": 4.339350873823699, + "grad_norm": 6.596691131591797, + "learning_rate": 2.6127795109680443e-07, + "loss": 0.128, + "num_input_tokens_seen": 213372672, + "step": 67785 + }, + { + "epoch": 4.339670955764676, + "grad_norm": 7.890226364135742, + "learning_rate": 2.6102935976776825e-07, + "loss": 0.1503, + "num_input_tokens_seen": 213388160, + "step": 67790 + }, + { + "epoch": 4.339991037705652, + "grad_norm": 7.337114334106445, + "learning_rate": 2.607808802412634e-07, + "loss": 0.0951, + "num_input_tokens_seen": 213403456, + "step": 67795 + }, + { + "epoch": 4.340311119646629, + "grad_norm": 7.363361835479736, + "learning_rate": 2.605325125296976e-07, + "loss": 0.1334, + "num_input_tokens_seen": 213420608, + "step": 67800 + }, + { + "epoch": 4.340631201587606, + "grad_norm": 6.135413646697998, + "learning_rate": 2.602842566454733e-07, + "loss": 0.1515, + "num_input_tokens_seen": 213436096, + "step": 67805 + }, + { + "epoch": 4.340951283528583, + "grad_norm": 3.8889341354370117, + "learning_rate": 2.6003611260098705e-07, + "loss": 0.0939, + "num_input_tokens_seen": 213452096, + "step": 67810 + }, + { + "epoch": 4.34127136546956, + "grad_norm": 4.888471603393555, + "learning_rate": 2.5978808040862976e-07, + "loss": 0.1105, + "num_input_tokens_seen": 213467200, + "step": 67815 + }, + { + "epoch": 4.341591447410537, + "grad_norm": 11.1681489944458, + "learning_rate": 2.5954016008078704e-07, + "loss": 0.1204, + "num_input_tokens_seen": 213482816, + "step": 67820 + }, + { + "epoch": 4.341911529351514, + "grad_norm": 7.666003227233887, + "learning_rate": 2.5929235162983845e-07, + "loss": 0.1339, + "num_input_tokens_seen": 213497664, + "step": 67825 + }, + { + "epoch": 4.342231611292491, + "grad_norm": 3.0516467094421387, + "learning_rate": 2.590446550681586e-07, + "loss": 0.1269, + "num_input_tokens_seen": 213513024, + "step": 67830 + }, + { + "epoch": 4.342551693233467, + "grad_norm": 5.891164779663086, + "learning_rate": 2.5879707040811527e-07, + "loss": 0.1238, + "num_input_tokens_seen": 213527872, + "step": 67835 + }, + { + "epoch": 4.342871775174444, + "grad_norm": 7.049712181091309, + "learning_rate": 2.585495976620733e-07, + "loss": 0.1203, + "num_input_tokens_seen": 213543424, + "step": 67840 + }, + { + "epoch": 4.343191857115421, + "grad_norm": 6.610733985900879, + "learning_rate": 2.583022368423882e-07, + "loss": 0.1121, + "num_input_tokens_seen": 213558144, + "step": 67845 + }, + { + "epoch": 4.343511939056398, + "grad_norm": 4.559146404266357, + "learning_rate": 2.5805498796141304e-07, + "loss": 0.1569, + "num_input_tokens_seen": 213573184, + "step": 67850 + }, + { + "epoch": 4.343832020997375, + "grad_norm": 5.692841529846191, + "learning_rate": 2.578078510314941e-07, + "loss": 0.1484, + "num_input_tokens_seen": 213587712, + "step": 67855 + }, + { + "epoch": 4.344152102938352, + "grad_norm": 6.057157516479492, + "learning_rate": 2.57560826064972e-07, + "loss": 0.123, + "num_input_tokens_seen": 213603456, + "step": 67860 + }, + { + "epoch": 4.344472184879329, + "grad_norm": 27.30727767944336, + "learning_rate": 2.573139130741817e-07, + "loss": 0.1645, + "num_input_tokens_seen": 213619264, + "step": 67865 + }, + { + "epoch": 4.344792266820306, + "grad_norm": 16.97752571105957, + "learning_rate": 2.570671120714524e-07, + "loss": 0.1573, + "num_input_tokens_seen": 213635392, + "step": 67870 + }, + { + "epoch": 4.345112348761283, + "grad_norm": 7.297659397125244, + "learning_rate": 2.5682042306910957e-07, + "loss": 0.1601, + "num_input_tokens_seen": 213652224, + "step": 67875 + }, + { + "epoch": 4.345432430702259, + "grad_norm": 6.505072593688965, + "learning_rate": 2.565738460794695e-07, + "loss": 0.1461, + "num_input_tokens_seen": 213667712, + "step": 67880 + }, + { + "epoch": 4.345752512643236, + "grad_norm": 7.795975685119629, + "learning_rate": 2.5632738111484644e-07, + "loss": 0.1313, + "num_input_tokens_seen": 213681984, + "step": 67885 + }, + { + "epoch": 4.346072594584213, + "grad_norm": 11.805654525756836, + "learning_rate": 2.5608102818754725e-07, + "loss": 0.1267, + "num_input_tokens_seen": 213696768, + "step": 67890 + }, + { + "epoch": 4.34639267652519, + "grad_norm": 11.477078437805176, + "learning_rate": 2.558347873098732e-07, + "loss": 0.1883, + "num_input_tokens_seen": 213712256, + "step": 67895 + }, + { + "epoch": 4.346712758466167, + "grad_norm": 1.9607453346252441, + "learning_rate": 2.555886584941203e-07, + "loss": 0.1275, + "num_input_tokens_seen": 213727552, + "step": 67900 + }, + { + "epoch": 4.347032840407144, + "grad_norm": 10.33537769317627, + "learning_rate": 2.553426417525792e-07, + "loss": 0.1666, + "num_input_tokens_seen": 213743488, + "step": 67905 + }, + { + "epoch": 4.347352922348121, + "grad_norm": 5.940270900726318, + "learning_rate": 2.5509673709753465e-07, + "loss": 0.1325, + "num_input_tokens_seen": 213758592, + "step": 67910 + }, + { + "epoch": 4.347673004289098, + "grad_norm": 4.732336521148682, + "learning_rate": 2.548509445412659e-07, + "loss": 0.125, + "num_input_tokens_seen": 213773824, + "step": 67915 + }, + { + "epoch": 4.347993086230075, + "grad_norm": 6.606586933135986, + "learning_rate": 2.5460526409604586e-07, + "loss": 0.1177, + "num_input_tokens_seen": 213788416, + "step": 67920 + }, + { + "epoch": 4.348313168171051, + "grad_norm": 8.511772155761719, + "learning_rate": 2.5435969577414345e-07, + "loss": 0.1054, + "num_input_tokens_seen": 213803136, + "step": 67925 + }, + { + "epoch": 4.348633250112028, + "grad_norm": 5.129955768585205, + "learning_rate": 2.541142395878207e-07, + "loss": 0.1535, + "num_input_tokens_seen": 213819648, + "step": 67930 + }, + { + "epoch": 4.348953332053005, + "grad_norm": 6.720314979553223, + "learning_rate": 2.538688955493346e-07, + "loss": 0.1334, + "num_input_tokens_seen": 213835584, + "step": 67935 + }, + { + "epoch": 4.349273413993982, + "grad_norm": 5.513282299041748, + "learning_rate": 2.5362366367093603e-07, + "loss": 0.1374, + "num_input_tokens_seen": 213851968, + "step": 67940 + }, + { + "epoch": 4.349593495934959, + "grad_norm": 21.81499671936035, + "learning_rate": 2.5337854396487034e-07, + "loss": 0.1607, + "num_input_tokens_seen": 213868480, + "step": 67945 + }, + { + "epoch": 4.349913577875936, + "grad_norm": 5.5817413330078125, + "learning_rate": 2.5313353644337903e-07, + "loss": 0.1363, + "num_input_tokens_seen": 213885120, + "step": 67950 + }, + { + "epoch": 4.350233659816913, + "grad_norm": 5.809193134307861, + "learning_rate": 2.528886411186943e-07, + "loss": 0.1133, + "num_input_tokens_seen": 213902080, + "step": 67955 + }, + { + "epoch": 4.35055374175789, + "grad_norm": 5.952630996704102, + "learning_rate": 2.5264385800304687e-07, + "loss": 0.1242, + "num_input_tokens_seen": 213919232, + "step": 67960 + }, + { + "epoch": 4.350873823698867, + "grad_norm": 7.259697914123535, + "learning_rate": 2.523991871086584e-07, + "loss": 0.1588, + "num_input_tokens_seen": 213935360, + "step": 67965 + }, + { + "epoch": 4.351193905639843, + "grad_norm": 7.578164577484131, + "learning_rate": 2.5215462844774744e-07, + "loss": 0.1353, + "num_input_tokens_seen": 213950720, + "step": 67970 + }, + { + "epoch": 4.35151398758082, + "grad_norm": 4.048254489898682, + "learning_rate": 2.519101820325259e-07, + "loss": 0.1115, + "num_input_tokens_seen": 213967040, + "step": 67975 + }, + { + "epoch": 4.351834069521797, + "grad_norm": 5.549897193908691, + "learning_rate": 2.516658478751996e-07, + "loss": 0.143, + "num_input_tokens_seen": 213984384, + "step": 67980 + }, + { + "epoch": 4.352154151462774, + "grad_norm": 5.864795684814453, + "learning_rate": 2.5142162598797063e-07, + "loss": 0.1467, + "num_input_tokens_seen": 214000704, + "step": 67985 + }, + { + "epoch": 4.352474233403751, + "grad_norm": 8.367820739746094, + "learning_rate": 2.511775163830321e-07, + "loss": 0.1568, + "num_input_tokens_seen": 214019712, + "step": 67990 + }, + { + "epoch": 4.352794315344728, + "grad_norm": 10.074535369873047, + "learning_rate": 2.509335190725759e-07, + "loss": 0.0906, + "num_input_tokens_seen": 214035328, + "step": 67995 + }, + { + "epoch": 4.353114397285705, + "grad_norm": 6.758686542510986, + "learning_rate": 2.506896340687839e-07, + "loss": 0.1677, + "num_input_tokens_seen": 214050688, + "step": 68000 + }, + { + "epoch": 4.353434479226682, + "grad_norm": 13.189379692077637, + "learning_rate": 2.504458613838356e-07, + "loss": 0.1416, + "num_input_tokens_seen": 214065280, + "step": 68005 + }, + { + "epoch": 4.3537545611676585, + "grad_norm": 6.981067657470703, + "learning_rate": 2.502022010299035e-07, + "loss": 0.152, + "num_input_tokens_seen": 214081408, + "step": 68010 + }, + { + "epoch": 4.3540746431086355, + "grad_norm": 7.397358417510986, + "learning_rate": 2.4995865301915473e-07, + "loss": 0.2511, + "num_input_tokens_seen": 214097152, + "step": 68015 + }, + { + "epoch": 4.3543947250496124, + "grad_norm": 7.698946952819824, + "learning_rate": 2.49715217363751e-07, + "loss": 0.1267, + "num_input_tokens_seen": 214112832, + "step": 68020 + }, + { + "epoch": 4.354714806990589, + "grad_norm": 5.581218719482422, + "learning_rate": 2.4947189407584784e-07, + "loss": 0.1704, + "num_input_tokens_seen": 214128512, + "step": 68025 + }, + { + "epoch": 4.355034888931566, + "grad_norm": 6.188083171844482, + "learning_rate": 2.4922868316759527e-07, + "loss": 0.1271, + "num_input_tokens_seen": 214144512, + "step": 68030 + }, + { + "epoch": 4.355354970872543, + "grad_norm": 5.292924404144287, + "learning_rate": 2.4898558465113935e-07, + "loss": 0.1152, + "num_input_tokens_seen": 214160000, + "step": 68035 + }, + { + "epoch": 4.35567505281352, + "grad_norm": 8.121076583862305, + "learning_rate": 2.487425985386172e-07, + "loss": 0.1667, + "num_input_tokens_seen": 214176768, + "step": 68040 + }, + { + "epoch": 4.355995134754497, + "grad_norm": 44.1669807434082, + "learning_rate": 2.4849972484216387e-07, + "loss": 0.1652, + "num_input_tokens_seen": 214191552, + "step": 68045 + }, + { + "epoch": 4.356315216695474, + "grad_norm": 5.475996971130371, + "learning_rate": 2.4825696357390656e-07, + "loss": 0.1493, + "num_input_tokens_seen": 214206656, + "step": 68050 + }, + { + "epoch": 4.356635298636451, + "grad_norm": 6.277372360229492, + "learning_rate": 2.4801431474596766e-07, + "loss": 0.1189, + "num_input_tokens_seen": 214221952, + "step": 68055 + }, + { + "epoch": 4.3569553805774275, + "grad_norm": 5.7157416343688965, + "learning_rate": 2.477717783704636e-07, + "loss": 0.1283, + "num_input_tokens_seen": 214237376, + "step": 68060 + }, + { + "epoch": 4.3572754625184045, + "grad_norm": 7.608909606933594, + "learning_rate": 2.4752935445950535e-07, + "loss": 0.1263, + "num_input_tokens_seen": 214253632, + "step": 68065 + }, + { + "epoch": 4.3575955444593815, + "grad_norm": 7.348202228546143, + "learning_rate": 2.472870430251992e-07, + "loss": 0.1959, + "num_input_tokens_seen": 214269376, + "step": 68070 + }, + { + "epoch": 4.3579156264003585, + "grad_norm": 12.373557090759277, + "learning_rate": 2.4704484407964303e-07, + "loss": 0.1753, + "num_input_tokens_seen": 214284864, + "step": 68075 + }, + { + "epoch": 4.3582357083413354, + "grad_norm": 7.329690933227539, + "learning_rate": 2.4680275763493273e-07, + "loss": 0.1114, + "num_input_tokens_seen": 214300992, + "step": 68080 + }, + { + "epoch": 4.358555790282312, + "grad_norm": 6.078319072723389, + "learning_rate": 2.465607837031564e-07, + "loss": 0.1027, + "num_input_tokens_seen": 214316352, + "step": 68085 + }, + { + "epoch": 4.358875872223289, + "grad_norm": 7.259687423706055, + "learning_rate": 2.463189222963966e-07, + "loss": 0.1452, + "num_input_tokens_seen": 214333632, + "step": 68090 + }, + { + "epoch": 4.359195954164266, + "grad_norm": 6.003373146057129, + "learning_rate": 2.4607717342673086e-07, + "loss": 0.1413, + "num_input_tokens_seen": 214348480, + "step": 68095 + }, + { + "epoch": 4.3595160361052425, + "grad_norm": 6.046045303344727, + "learning_rate": 2.458355371062304e-07, + "loss": 0.2, + "num_input_tokens_seen": 214364288, + "step": 68100 + }, + { + "epoch": 4.3598361180462195, + "grad_norm": 5.74640417098999, + "learning_rate": 2.455940133469628e-07, + "loss": 0.1063, + "num_input_tokens_seen": 214379648, + "step": 68105 + }, + { + "epoch": 4.3601561999871965, + "grad_norm": 4.411043643951416, + "learning_rate": 2.4535260216098615e-07, + "loss": 0.1157, + "num_input_tokens_seen": 214394496, + "step": 68110 + }, + { + "epoch": 4.3604762819281735, + "grad_norm": 8.684246063232422, + "learning_rate": 2.451113035603572e-07, + "loss": 0.1066, + "num_input_tokens_seen": 214409664, + "step": 68115 + }, + { + "epoch": 4.3607963638691505, + "grad_norm": 6.491117000579834, + "learning_rate": 2.4487011755712443e-07, + "loss": 0.1429, + "num_input_tokens_seen": 214425600, + "step": 68120 + }, + { + "epoch": 4.3611164458101275, + "grad_norm": 6.152917385101318, + "learning_rate": 2.446290441633314e-07, + "loss": 0.1468, + "num_input_tokens_seen": 214439872, + "step": 68125 + }, + { + "epoch": 4.3614365277511045, + "grad_norm": 9.971014022827148, + "learning_rate": 2.4438808339101615e-07, + "loss": 0.1316, + "num_input_tokens_seen": 214454912, + "step": 68130 + }, + { + "epoch": 4.3617566096920815, + "grad_norm": 4.779713153839111, + "learning_rate": 2.441472352522109e-07, + "loss": 0.1236, + "num_input_tokens_seen": 214470208, + "step": 68135 + }, + { + "epoch": 4.3620766916330584, + "grad_norm": 6.744056701660156, + "learning_rate": 2.4390649975894244e-07, + "loss": 0.1019, + "num_input_tokens_seen": 214485888, + "step": 68140 + }, + { + "epoch": 4.3623967735740345, + "grad_norm": 5.064704418182373, + "learning_rate": 2.4366587692323196e-07, + "loss": 0.1106, + "num_input_tokens_seen": 214501696, + "step": 68145 + }, + { + "epoch": 4.3627168555150115, + "grad_norm": 17.110652923583984, + "learning_rate": 2.4342536675709406e-07, + "loss": 0.1187, + "num_input_tokens_seen": 214517184, + "step": 68150 + }, + { + "epoch": 4.3630369374559885, + "grad_norm": 6.722414970397949, + "learning_rate": 2.431849692725399e-07, + "loss": 0.1068, + "num_input_tokens_seen": 214533120, + "step": 68155 + }, + { + "epoch": 4.3633570193969655, + "grad_norm": 5.54644250869751, + "learning_rate": 2.4294468448157304e-07, + "loss": 0.0825, + "num_input_tokens_seen": 214547904, + "step": 68160 + }, + { + "epoch": 4.3636771013379425, + "grad_norm": 4.196951866149902, + "learning_rate": 2.4270451239619186e-07, + "loss": 0.0626, + "num_input_tokens_seen": 214564160, + "step": 68165 + }, + { + "epoch": 4.3639971832789195, + "grad_norm": 3.945397138595581, + "learning_rate": 2.424644530283898e-07, + "loss": 0.1123, + "num_input_tokens_seen": 214578368, + "step": 68170 + }, + { + "epoch": 4.3643172652198965, + "grad_norm": 7.8508219718933105, + "learning_rate": 2.422245063901532e-07, + "loss": 0.1215, + "num_input_tokens_seen": 214594496, + "step": 68175 + }, + { + "epoch": 4.3646373471608735, + "grad_norm": 7.833642482757568, + "learning_rate": 2.4198467249346547e-07, + "loss": 0.1454, + "num_input_tokens_seen": 214610880, + "step": 68180 + }, + { + "epoch": 4.3649574291018505, + "grad_norm": 6.5405073165893555, + "learning_rate": 2.417449513503009e-07, + "loss": 0.1073, + "num_input_tokens_seen": 214626816, + "step": 68185 + }, + { + "epoch": 4.365277511042827, + "grad_norm": 5.581280708312988, + "learning_rate": 2.4150534297263135e-07, + "loss": 0.1574, + "num_input_tokens_seen": 214642048, + "step": 68190 + }, + { + "epoch": 4.365597592983804, + "grad_norm": 4.0587029457092285, + "learning_rate": 2.4126584737241996e-07, + "loss": 0.1334, + "num_input_tokens_seen": 214657664, + "step": 68195 + }, + { + "epoch": 4.365917674924781, + "grad_norm": 5.464590549468994, + "learning_rate": 2.410264645616273e-07, + "loss": 0.1465, + "num_input_tokens_seen": 214673600, + "step": 68200 + }, + { + "epoch": 4.3662377568657575, + "grad_norm": 7.055979251861572, + "learning_rate": 2.407871945522064e-07, + "loss": 0.1223, + "num_input_tokens_seen": 214688576, + "step": 68205 + }, + { + "epoch": 4.3665578388067345, + "grad_norm": 6.174255847930908, + "learning_rate": 2.4054803735610535e-07, + "loss": 0.1337, + "num_input_tokens_seen": 214703232, + "step": 68210 + }, + { + "epoch": 4.3668779207477115, + "grad_norm": 5.4480462074279785, + "learning_rate": 2.403089929852659e-07, + "loss": 0.1444, + "num_input_tokens_seen": 214718528, + "step": 68215 + }, + { + "epoch": 4.3671980026886885, + "grad_norm": 8.793130874633789, + "learning_rate": 2.400700614516249e-07, + "loss": 0.1182, + "num_input_tokens_seen": 214734400, + "step": 68220 + }, + { + "epoch": 4.3675180846296655, + "grad_norm": 6.470193862915039, + "learning_rate": 2.39831242767114e-07, + "loss": 0.1145, + "num_input_tokens_seen": 214750720, + "step": 68225 + }, + { + "epoch": 4.3678381665706425, + "grad_norm": 7.875886917114258, + "learning_rate": 2.3959253694365773e-07, + "loss": 0.1521, + "num_input_tokens_seen": 214767168, + "step": 68230 + }, + { + "epoch": 4.368158248511619, + "grad_norm": 5.99756383895874, + "learning_rate": 2.3935394399317635e-07, + "loss": 0.1359, + "num_input_tokens_seen": 214782592, + "step": 68235 + }, + { + "epoch": 4.368478330452596, + "grad_norm": 6.438555717468262, + "learning_rate": 2.3911546392758387e-07, + "loss": 0.1161, + "num_input_tokens_seen": 214797632, + "step": 68240 + }, + { + "epoch": 4.368798412393573, + "grad_norm": 8.0603609085083, + "learning_rate": 2.3887709675878835e-07, + "loss": 0.1449, + "num_input_tokens_seen": 214813888, + "step": 68245 + }, + { + "epoch": 4.36911849433455, + "grad_norm": 5.070333003997803, + "learning_rate": 2.38638842498693e-07, + "loss": 0.1307, + "num_input_tokens_seen": 214828224, + "step": 68250 + }, + { + "epoch": 4.369438576275527, + "grad_norm": 4.997121334075928, + "learning_rate": 2.3840070115919462e-07, + "loss": 0.1223, + "num_input_tokens_seen": 214843200, + "step": 68255 + }, + { + "epoch": 4.3697586582165036, + "grad_norm": 9.310190200805664, + "learning_rate": 2.381626727521849e-07, + "loss": 0.1461, + "num_input_tokens_seen": 214859392, + "step": 68260 + }, + { + "epoch": 4.3700787401574805, + "grad_norm": 7.33721399307251, + "learning_rate": 2.3792475728955046e-07, + "loss": 0.1309, + "num_input_tokens_seen": 214875584, + "step": 68265 + }, + { + "epoch": 4.3703988220984575, + "grad_norm": 8.308321952819824, + "learning_rate": 2.3768695478317005e-07, + "loss": 0.1253, + "num_input_tokens_seen": 214890816, + "step": 68270 + }, + { + "epoch": 4.370718904039434, + "grad_norm": 7.7968058586120605, + "learning_rate": 2.374492652449198e-07, + "loss": 0.1477, + "num_input_tokens_seen": 214905728, + "step": 68275 + }, + { + "epoch": 4.371038985980411, + "grad_norm": 7.642432689666748, + "learning_rate": 2.3721168868666793e-07, + "loss": 0.1718, + "num_input_tokens_seen": 214921792, + "step": 68280 + }, + { + "epoch": 4.371359067921388, + "grad_norm": 8.021454811096191, + "learning_rate": 2.369742251202778e-07, + "loss": 0.1627, + "num_input_tokens_seen": 214936256, + "step": 68285 + }, + { + "epoch": 4.371679149862365, + "grad_norm": 3.4975903034210205, + "learning_rate": 2.3673687455760735e-07, + "loss": 0.0926, + "num_input_tokens_seen": 214951936, + "step": 68290 + }, + { + "epoch": 4.371999231803342, + "grad_norm": 6.8458476066589355, + "learning_rate": 2.3649963701050772e-07, + "loss": 0.125, + "num_input_tokens_seen": 214966464, + "step": 68295 + }, + { + "epoch": 4.372319313744319, + "grad_norm": 4.756906032562256, + "learning_rate": 2.3626251249082744e-07, + "loss": 0.1055, + "num_input_tokens_seen": 214983104, + "step": 68300 + }, + { + "epoch": 4.372639395685296, + "grad_norm": 6.418999195098877, + "learning_rate": 2.3602550101040454e-07, + "loss": 0.1297, + "num_input_tokens_seen": 214999360, + "step": 68305 + }, + { + "epoch": 4.372959477626273, + "grad_norm": 5.318027019500732, + "learning_rate": 2.3578860258107628e-07, + "loss": 0.1403, + "num_input_tokens_seen": 215016640, + "step": 68310 + }, + { + "epoch": 4.37327955956725, + "grad_norm": 6.598598480224609, + "learning_rate": 2.355518172146712e-07, + "loss": 0.1308, + "num_input_tokens_seen": 215031808, + "step": 68315 + }, + { + "epoch": 4.3735996415082266, + "grad_norm": 8.850607872009277, + "learning_rate": 2.3531514492301345e-07, + "loss": 0.1332, + "num_input_tokens_seen": 215049024, + "step": 68320 + }, + { + "epoch": 4.373919723449203, + "grad_norm": 10.077116966247559, + "learning_rate": 2.3507858571792081e-07, + "loss": 0.1569, + "num_input_tokens_seen": 215064512, + "step": 68325 + }, + { + "epoch": 4.37423980539018, + "grad_norm": 6.915741920471191, + "learning_rate": 2.3484213961120577e-07, + "loss": 0.1687, + "num_input_tokens_seen": 215079936, + "step": 68330 + }, + { + "epoch": 4.374559887331157, + "grad_norm": 7.291201591491699, + "learning_rate": 2.3460580661467642e-07, + "loss": 0.1534, + "num_input_tokens_seen": 215095552, + "step": 68335 + }, + { + "epoch": 4.374879969272134, + "grad_norm": 8.617464065551758, + "learning_rate": 2.3436958674013216e-07, + "loss": 0.1338, + "num_input_tokens_seen": 215111232, + "step": 68340 + }, + { + "epoch": 4.375200051213111, + "grad_norm": 14.986099243164062, + "learning_rate": 2.3413347999936998e-07, + "loss": 0.186, + "num_input_tokens_seen": 215126080, + "step": 68345 + }, + { + "epoch": 4.375520133154088, + "grad_norm": 2.871467351913452, + "learning_rate": 2.338974864041793e-07, + "loss": 0.0991, + "num_input_tokens_seen": 215141184, + "step": 68350 + }, + { + "epoch": 4.375840215095065, + "grad_norm": 8.409286499023438, + "learning_rate": 2.3366160596634436e-07, + "loss": 0.1234, + "num_input_tokens_seen": 215155968, + "step": 68355 + }, + { + "epoch": 4.376160297036042, + "grad_norm": 6.913079738616943, + "learning_rate": 2.33425838697644e-07, + "loss": 0.1445, + "num_input_tokens_seen": 215172160, + "step": 68360 + }, + { + "epoch": 4.376480378977018, + "grad_norm": 10.081184387207031, + "learning_rate": 2.331901846098511e-07, + "loss": 0.1426, + "num_input_tokens_seen": 215188160, + "step": 68365 + }, + { + "epoch": 4.376800460917995, + "grad_norm": 7.921389102935791, + "learning_rate": 2.3295464371473287e-07, + "loss": 0.1611, + "num_input_tokens_seen": 215204544, + "step": 68370 + }, + { + "epoch": 4.377120542858972, + "grad_norm": 6.580147743225098, + "learning_rate": 2.3271921602405129e-07, + "loss": 0.1618, + "num_input_tokens_seen": 215220864, + "step": 68375 + }, + { + "epoch": 4.377440624799949, + "grad_norm": 6.706888675689697, + "learning_rate": 2.324839015495617e-07, + "loss": 0.1636, + "num_input_tokens_seen": 215236352, + "step": 68380 + }, + { + "epoch": 4.377760706740926, + "grad_norm": 6.097093105316162, + "learning_rate": 2.3224870030301577e-07, + "loss": 0.1369, + "num_input_tokens_seen": 215252928, + "step": 68385 + }, + { + "epoch": 4.378080788681903, + "grad_norm": 11.13411808013916, + "learning_rate": 2.3201361229615694e-07, + "loss": 0.1762, + "num_input_tokens_seen": 215270080, + "step": 68390 + }, + { + "epoch": 4.37840087062288, + "grad_norm": 3.1120216846466064, + "learning_rate": 2.3177863754072493e-07, + "loss": 0.1265, + "num_input_tokens_seen": 215287808, + "step": 68395 + }, + { + "epoch": 4.378720952563857, + "grad_norm": 2.070814847946167, + "learning_rate": 2.3154377604845318e-07, + "loss": 0.0845, + "num_input_tokens_seen": 215301824, + "step": 68400 + }, + { + "epoch": 4.379041034504834, + "grad_norm": 4.399643898010254, + "learning_rate": 2.3130902783106867e-07, + "loss": 0.1127, + "num_input_tokens_seen": 215317440, + "step": 68405 + }, + { + "epoch": 4.37936111644581, + "grad_norm": 7.797565937042236, + "learning_rate": 2.310743929002951e-07, + "loss": 0.1349, + "num_input_tokens_seen": 215333952, + "step": 68410 + }, + { + "epoch": 4.379681198386787, + "grad_norm": 6.9666056632995605, + "learning_rate": 2.3083987126784723e-07, + "loss": 0.126, + "num_input_tokens_seen": 215350272, + "step": 68415 + }, + { + "epoch": 4.380001280327764, + "grad_norm": 11.914999008178711, + "learning_rate": 2.306054629454374e-07, + "loss": 0.1614, + "num_input_tokens_seen": 215365760, + "step": 68420 + }, + { + "epoch": 4.380321362268741, + "grad_norm": 10.611661911010742, + "learning_rate": 2.303711679447687e-07, + "loss": 0.1559, + "num_input_tokens_seen": 215382912, + "step": 68425 + }, + { + "epoch": 4.380641444209718, + "grad_norm": 8.174426078796387, + "learning_rate": 2.3013698627754267e-07, + "loss": 0.1158, + "num_input_tokens_seen": 215399744, + "step": 68430 + }, + { + "epoch": 4.380961526150695, + "grad_norm": 8.332947731018066, + "learning_rate": 2.299029179554521e-07, + "loss": 0.1577, + "num_input_tokens_seen": 215414912, + "step": 68435 + }, + { + "epoch": 4.381281608091672, + "grad_norm": 6.742274284362793, + "learning_rate": 2.296689629901852e-07, + "loss": 0.1296, + "num_input_tokens_seen": 215430464, + "step": 68440 + }, + { + "epoch": 4.381601690032649, + "grad_norm": 2.581009864807129, + "learning_rate": 2.2943512139342455e-07, + "loss": 0.0872, + "num_input_tokens_seen": 215446336, + "step": 68445 + }, + { + "epoch": 4.381921771973626, + "grad_norm": 10.15190315246582, + "learning_rate": 2.2920139317684665e-07, + "loss": 0.1934, + "num_input_tokens_seen": 215461312, + "step": 68450 + }, + { + "epoch": 4.382241853914602, + "grad_norm": 13.782634735107422, + "learning_rate": 2.2896777835212354e-07, + "loss": 0.1609, + "num_input_tokens_seen": 215477504, + "step": 68455 + }, + { + "epoch": 4.382561935855579, + "grad_norm": 10.32253360748291, + "learning_rate": 2.2873427693092003e-07, + "loss": 0.1693, + "num_input_tokens_seen": 215492416, + "step": 68460 + }, + { + "epoch": 4.382882017796556, + "grad_norm": 4.753645420074463, + "learning_rate": 2.285008889248963e-07, + "loss": 0.1326, + "num_input_tokens_seen": 215508800, + "step": 68465 + }, + { + "epoch": 4.383202099737533, + "grad_norm": 8.729572296142578, + "learning_rate": 2.2826761434570633e-07, + "loss": 0.1533, + "num_input_tokens_seen": 215524160, + "step": 68470 + }, + { + "epoch": 4.38352218167851, + "grad_norm": 7.274385929107666, + "learning_rate": 2.2803445320499856e-07, + "loss": 0.0927, + "num_input_tokens_seen": 215540416, + "step": 68475 + }, + { + "epoch": 4.383842263619487, + "grad_norm": 6.338737964630127, + "learning_rate": 2.2780140551441621e-07, + "loss": 0.1327, + "num_input_tokens_seen": 215557120, + "step": 68480 + }, + { + "epoch": 4.384162345560464, + "grad_norm": 8.792352676391602, + "learning_rate": 2.2756847128559606e-07, + "loss": 0.1328, + "num_input_tokens_seen": 215572992, + "step": 68485 + }, + { + "epoch": 4.384482427501441, + "grad_norm": 7.506782531738281, + "learning_rate": 2.2733565053016938e-07, + "loss": 0.1588, + "num_input_tokens_seen": 215587712, + "step": 68490 + }, + { + "epoch": 4.384802509442418, + "grad_norm": 22.68720054626465, + "learning_rate": 2.2710294325976324e-07, + "loss": 0.2038, + "num_input_tokens_seen": 215603456, + "step": 68495 + }, + { + "epoch": 4.385122591383394, + "grad_norm": 9.159598350524902, + "learning_rate": 2.2687034948599613e-07, + "loss": 0.1214, + "num_input_tokens_seen": 215619520, + "step": 68500 + }, + { + "epoch": 4.385442673324371, + "grad_norm": 12.357356071472168, + "learning_rate": 2.2663786922048402e-07, + "loss": 0.1287, + "num_input_tokens_seen": 215633856, + "step": 68505 + }, + { + "epoch": 4.385762755265348, + "grad_norm": 8.335542678833008, + "learning_rate": 2.2640550247483517e-07, + "loss": 0.1602, + "num_input_tokens_seen": 215649280, + "step": 68510 + }, + { + "epoch": 4.386082837206325, + "grad_norm": 6.97014856338501, + "learning_rate": 2.2617324926065276e-07, + "loss": 0.1408, + "num_input_tokens_seen": 215664192, + "step": 68515 + }, + { + "epoch": 4.386402919147302, + "grad_norm": 5.025457382202148, + "learning_rate": 2.2594110958953448e-07, + "loss": 0.1775, + "num_input_tokens_seen": 215681024, + "step": 68520 + }, + { + "epoch": 4.386723001088279, + "grad_norm": 7.134970664978027, + "learning_rate": 2.257090834730713e-07, + "loss": 0.1583, + "num_input_tokens_seen": 215697216, + "step": 68525 + }, + { + "epoch": 4.387043083029256, + "grad_norm": 8.533008575439453, + "learning_rate": 2.2547717092285148e-07, + "loss": 0.131, + "num_input_tokens_seen": 215713536, + "step": 68530 + }, + { + "epoch": 4.387363164970233, + "grad_norm": 6.851417541503906, + "learning_rate": 2.25245371950453e-07, + "loss": 0.1456, + "num_input_tokens_seen": 215729280, + "step": 68535 + }, + { + "epoch": 4.387683246911209, + "grad_norm": 14.544195175170898, + "learning_rate": 2.2501368656745236e-07, + "loss": 0.1976, + "num_input_tokens_seen": 215745664, + "step": 68540 + }, + { + "epoch": 4.388003328852186, + "grad_norm": 9.137605667114258, + "learning_rate": 2.2478211478541818e-07, + "loss": 0.1521, + "num_input_tokens_seen": 215760192, + "step": 68545 + }, + { + "epoch": 4.388323410793163, + "grad_norm": 7.801126956939697, + "learning_rate": 2.245506566159142e-07, + "loss": 0.1721, + "num_input_tokens_seen": 215776896, + "step": 68550 + }, + { + "epoch": 4.38864349273414, + "grad_norm": 5.1992387771606445, + "learning_rate": 2.2431931207049784e-07, + "loss": 0.1071, + "num_input_tokens_seen": 215793920, + "step": 68555 + }, + { + "epoch": 4.388963574675117, + "grad_norm": 4.158491134643555, + "learning_rate": 2.2408808116072155e-07, + "loss": 0.1205, + "num_input_tokens_seen": 215809856, + "step": 68560 + }, + { + "epoch": 4.389283656616094, + "grad_norm": 7.442516803741455, + "learning_rate": 2.2385696389813165e-07, + "loss": 0.1373, + "num_input_tokens_seen": 215824704, + "step": 68565 + }, + { + "epoch": 4.389603738557071, + "grad_norm": 7.179439067840576, + "learning_rate": 2.2362596029426865e-07, + "loss": 0.16, + "num_input_tokens_seen": 215840064, + "step": 68570 + }, + { + "epoch": 4.389923820498048, + "grad_norm": 10.298540115356445, + "learning_rate": 2.2339507036066826e-07, + "loss": 0.1306, + "num_input_tokens_seen": 215855296, + "step": 68575 + }, + { + "epoch": 4.390243902439025, + "grad_norm": 4.10420560836792, + "learning_rate": 2.2316429410885991e-07, + "loss": 0.0857, + "num_input_tokens_seen": 215870976, + "step": 68580 + }, + { + "epoch": 4.390563984380002, + "grad_norm": 6.147488117218018, + "learning_rate": 2.229336315503669e-07, + "loss": 0.117, + "num_input_tokens_seen": 215886848, + "step": 68585 + }, + { + "epoch": 4.390884066320978, + "grad_norm": 10.988699913024902, + "learning_rate": 2.2270308269670748e-07, + "loss": 0.1023, + "num_input_tokens_seen": 215902144, + "step": 68590 + }, + { + "epoch": 4.391204148261955, + "grad_norm": 7.528984069824219, + "learning_rate": 2.2247264755939414e-07, + "loss": 0.1323, + "num_input_tokens_seen": 215918208, + "step": 68595 + }, + { + "epoch": 4.391524230202932, + "grad_norm": 4.8275346755981445, + "learning_rate": 2.2224232614993373e-07, + "loss": 0.1176, + "num_input_tokens_seen": 215934848, + "step": 68600 + }, + { + "epoch": 4.391844312143909, + "grad_norm": 7.5467095375061035, + "learning_rate": 2.220121184798271e-07, + "loss": 0.1315, + "num_input_tokens_seen": 215950016, + "step": 68605 + }, + { + "epoch": 4.392164394084886, + "grad_norm": 19.940298080444336, + "learning_rate": 2.2178202456056924e-07, + "loss": 0.1965, + "num_input_tokens_seen": 215964864, + "step": 68610 + }, + { + "epoch": 4.392484476025863, + "grad_norm": 4.366911888122559, + "learning_rate": 2.215520444036509e-07, + "loss": 0.1214, + "num_input_tokens_seen": 215980224, + "step": 68615 + }, + { + "epoch": 4.39280455796684, + "grad_norm": 9.211626052856445, + "learning_rate": 2.2132217802055488e-07, + "loss": 0.1851, + "num_input_tokens_seen": 215995456, + "step": 68620 + }, + { + "epoch": 4.393124639907817, + "grad_norm": 10.387260437011719, + "learning_rate": 2.2109242542276032e-07, + "loss": 0.1647, + "num_input_tokens_seen": 216011264, + "step": 68625 + }, + { + "epoch": 4.393444721848793, + "grad_norm": 8.807304382324219, + "learning_rate": 2.208627866217397e-07, + "loss": 0.1623, + "num_input_tokens_seen": 216026624, + "step": 68630 + }, + { + "epoch": 4.39376480378977, + "grad_norm": 7.5952887535095215, + "learning_rate": 2.2063326162895972e-07, + "loss": 0.176, + "num_input_tokens_seen": 216041984, + "step": 68635 + }, + { + "epoch": 4.394084885730747, + "grad_norm": 8.463459968566895, + "learning_rate": 2.2040385045588253e-07, + "loss": 0.1405, + "num_input_tokens_seen": 216057024, + "step": 68640 + }, + { + "epoch": 4.394404967671724, + "grad_norm": 6.048543930053711, + "learning_rate": 2.2017455311396208e-07, + "loss": 0.1438, + "num_input_tokens_seen": 216074496, + "step": 68645 + }, + { + "epoch": 4.394725049612701, + "grad_norm": 10.105977058410645, + "learning_rate": 2.199453696146503e-07, + "loss": 0.1325, + "num_input_tokens_seen": 216090880, + "step": 68650 + }, + { + "epoch": 4.395045131553678, + "grad_norm": 8.639942169189453, + "learning_rate": 2.1971629996938943e-07, + "loss": 0.1628, + "num_input_tokens_seen": 216106624, + "step": 68655 + }, + { + "epoch": 4.395365213494655, + "grad_norm": 7.1931376457214355, + "learning_rate": 2.1948734418961948e-07, + "loss": 0.1105, + "num_input_tokens_seen": 216122752, + "step": 68660 + }, + { + "epoch": 4.395685295435632, + "grad_norm": 8.061334609985352, + "learning_rate": 2.192585022867727e-07, + "loss": 0.1333, + "num_input_tokens_seen": 216137536, + "step": 68665 + }, + { + "epoch": 4.396005377376609, + "grad_norm": 4.877218723297119, + "learning_rate": 2.1902977427227662e-07, + "loss": 0.1093, + "num_input_tokens_seen": 216152832, + "step": 68670 + }, + { + "epoch": 4.396325459317585, + "grad_norm": 5.792323589324951, + "learning_rate": 2.1880116015755236e-07, + "loss": 0.0979, + "num_input_tokens_seen": 216168128, + "step": 68675 + }, + { + "epoch": 4.396645541258562, + "grad_norm": 4.794584274291992, + "learning_rate": 2.185726599540153e-07, + "loss": 0.139, + "num_input_tokens_seen": 216183040, + "step": 68680 + }, + { + "epoch": 4.396965623199539, + "grad_norm": 4.163796424865723, + "learning_rate": 2.183442736730765e-07, + "loss": 0.1028, + "num_input_tokens_seen": 216198272, + "step": 68685 + }, + { + "epoch": 4.397285705140516, + "grad_norm": 4.216004848480225, + "learning_rate": 2.1811600132613992e-07, + "loss": 0.1174, + "num_input_tokens_seen": 216213440, + "step": 68690 + }, + { + "epoch": 4.397605787081493, + "grad_norm": 7.549151420593262, + "learning_rate": 2.1788784292460452e-07, + "loss": 0.1425, + "num_input_tokens_seen": 216230528, + "step": 68695 + }, + { + "epoch": 4.39792586902247, + "grad_norm": 6.313313007354736, + "learning_rate": 2.1765979847986313e-07, + "loss": 0.1405, + "num_input_tokens_seen": 216246400, + "step": 68700 + }, + { + "epoch": 4.398245950963447, + "grad_norm": 8.42365550994873, + "learning_rate": 2.1743186800330301e-07, + "loss": 0.16, + "num_input_tokens_seen": 216262016, + "step": 68705 + }, + { + "epoch": 4.398566032904424, + "grad_norm": 12.212480545043945, + "learning_rate": 2.1720405150630591e-07, + "loss": 0.172, + "num_input_tokens_seen": 216277376, + "step": 68710 + }, + { + "epoch": 4.398886114845401, + "grad_norm": 7.175004482269287, + "learning_rate": 2.16976349000248e-07, + "loss": 0.1409, + "num_input_tokens_seen": 216291968, + "step": 68715 + }, + { + "epoch": 4.399206196786377, + "grad_norm": 9.334477424621582, + "learning_rate": 2.167487604964988e-07, + "loss": 0.1377, + "num_input_tokens_seen": 216307712, + "step": 68720 + }, + { + "epoch": 4.399526278727354, + "grad_norm": 7.819352149963379, + "learning_rate": 2.1652128600642447e-07, + "loss": 0.1266, + "num_input_tokens_seen": 216322368, + "step": 68725 + }, + { + "epoch": 4.399846360668331, + "grad_norm": 5.0678629875183105, + "learning_rate": 2.1629392554138207e-07, + "loss": 0.1413, + "num_input_tokens_seen": 216337344, + "step": 68730 + }, + { + "epoch": 4.400166442609308, + "grad_norm": 4.789564609527588, + "learning_rate": 2.1606667911272637e-07, + "loss": 0.0947, + "num_input_tokens_seen": 216351680, + "step": 68735 + }, + { + "epoch": 4.400486524550285, + "grad_norm": 145.9189453125, + "learning_rate": 2.1583954673180335e-07, + "loss": 0.1737, + "num_input_tokens_seen": 216367936, + "step": 68740 + }, + { + "epoch": 4.400806606491262, + "grad_norm": 10.072610855102539, + "learning_rate": 2.1561252840995584e-07, + "loss": 0.111, + "num_input_tokens_seen": 216383936, + "step": 68745 + }, + { + "epoch": 4.401126688432239, + "grad_norm": 12.298395156860352, + "learning_rate": 2.1538562415852005e-07, + "loss": 0.1774, + "num_input_tokens_seen": 216399040, + "step": 68750 + }, + { + "epoch": 4.401446770373216, + "grad_norm": 5.9276862144470215, + "learning_rate": 2.1515883398882553e-07, + "loss": 0.1669, + "num_input_tokens_seen": 216414656, + "step": 68755 + }, + { + "epoch": 4.401766852314193, + "grad_norm": 10.912825584411621, + "learning_rate": 2.1493215791219823e-07, + "loss": 0.1553, + "num_input_tokens_seen": 216430336, + "step": 68760 + }, + { + "epoch": 4.402086934255169, + "grad_norm": 8.543196678161621, + "learning_rate": 2.1470559593995577e-07, + "loss": 0.1275, + "num_input_tokens_seen": 216447296, + "step": 68765 + }, + { + "epoch": 4.402407016196146, + "grad_norm": 7.7332682609558105, + "learning_rate": 2.1447914808341242e-07, + "loss": 0.1531, + "num_input_tokens_seen": 216462016, + "step": 68770 + }, + { + "epoch": 4.402727098137123, + "grad_norm": 8.019559860229492, + "learning_rate": 2.142528143538758e-07, + "loss": 0.1497, + "num_input_tokens_seen": 216478336, + "step": 68775 + }, + { + "epoch": 4.4030471800781, + "grad_norm": 6.87905216217041, + "learning_rate": 2.140265947626477e-07, + "loss": 0.1606, + "num_input_tokens_seen": 216494336, + "step": 68780 + }, + { + "epoch": 4.403367262019077, + "grad_norm": 7.830920696258545, + "learning_rate": 2.138004893210241e-07, + "loss": 0.1079, + "num_input_tokens_seen": 216509312, + "step": 68785 + }, + { + "epoch": 4.403687343960054, + "grad_norm": 6.696734428405762, + "learning_rate": 2.1357449804029567e-07, + "loss": 0.1321, + "num_input_tokens_seen": 216526848, + "step": 68790 + }, + { + "epoch": 4.404007425901031, + "grad_norm": 7.655062198638916, + "learning_rate": 2.133486209317473e-07, + "loss": 0.1595, + "num_input_tokens_seen": 216544448, + "step": 68795 + }, + { + "epoch": 4.404327507842008, + "grad_norm": 4.8630266189575195, + "learning_rate": 2.1312285800665745e-07, + "loss": 0.1079, + "num_input_tokens_seen": 216559872, + "step": 68800 + }, + { + "epoch": 4.404647589782984, + "grad_norm": 4.794332504272461, + "learning_rate": 2.1289720927630069e-07, + "loss": 0.0746, + "num_input_tokens_seen": 216574464, + "step": 68805 + }, + { + "epoch": 4.404967671723961, + "grad_norm": 4.631940841674805, + "learning_rate": 2.1267167475194444e-07, + "loss": 0.1298, + "num_input_tokens_seen": 216590080, + "step": 68810 + }, + { + "epoch": 4.405287753664938, + "grad_norm": 3.0557589530944824, + "learning_rate": 2.1244625444485023e-07, + "loss": 0.1356, + "num_input_tokens_seen": 216606528, + "step": 68815 + }, + { + "epoch": 4.405607835605915, + "grad_norm": 7.680446147918701, + "learning_rate": 2.122209483662749e-07, + "loss": 0.1249, + "num_input_tokens_seen": 216621248, + "step": 68820 + }, + { + "epoch": 4.405927917546892, + "grad_norm": 4.990911960601807, + "learning_rate": 2.1199575652746857e-07, + "loss": 0.1037, + "num_input_tokens_seen": 216636416, + "step": 68825 + }, + { + "epoch": 4.406247999487869, + "grad_norm": 8.618562698364258, + "learning_rate": 2.1177067893967646e-07, + "loss": 0.1393, + "num_input_tokens_seen": 216652544, + "step": 68830 + }, + { + "epoch": 4.406568081428846, + "grad_norm": 11.7448091506958, + "learning_rate": 2.1154571561413762e-07, + "loss": 0.1389, + "num_input_tokens_seen": 216668032, + "step": 68835 + }, + { + "epoch": 4.406888163369823, + "grad_norm": 8.376294136047363, + "learning_rate": 2.1132086656208527e-07, + "loss": 0.1304, + "num_input_tokens_seen": 216683136, + "step": 68840 + }, + { + "epoch": 4.4072082453108, + "grad_norm": 24.923599243164062, + "learning_rate": 2.1109613179474825e-07, + "loss": 0.1055, + "num_input_tokens_seen": 216699008, + "step": 68845 + }, + { + "epoch": 4.407528327251777, + "grad_norm": 9.384109497070312, + "learning_rate": 2.1087151132334727e-07, + "loss": 0.1204, + "num_input_tokens_seen": 216714368, + "step": 68850 + }, + { + "epoch": 4.407848409192753, + "grad_norm": 10.889680862426758, + "learning_rate": 2.1064700515909975e-07, + "loss": 0.1576, + "num_input_tokens_seen": 216730432, + "step": 68855 + }, + { + "epoch": 4.40816849113373, + "grad_norm": 7.38665246963501, + "learning_rate": 2.1042261331321616e-07, + "loss": 0.114, + "num_input_tokens_seen": 216747904, + "step": 68860 + }, + { + "epoch": 4.408488573074707, + "grad_norm": 10.472291946411133, + "learning_rate": 2.1019833579690063e-07, + "loss": 0.154, + "num_input_tokens_seen": 216763648, + "step": 68865 + }, + { + "epoch": 4.408808655015684, + "grad_norm": 7.639312267303467, + "learning_rate": 2.099741726213539e-07, + "loss": 0.1425, + "num_input_tokens_seen": 216779136, + "step": 68870 + }, + { + "epoch": 4.409128736956661, + "grad_norm": 8.536800384521484, + "learning_rate": 2.097501237977681e-07, + "loss": 0.1038, + "num_input_tokens_seen": 216794816, + "step": 68875 + }, + { + "epoch": 4.409448818897638, + "grad_norm": 7.08976936340332, + "learning_rate": 2.095261893373321e-07, + "loss": 0.1107, + "num_input_tokens_seen": 216809280, + "step": 68880 + }, + { + "epoch": 4.409768900838615, + "grad_norm": 4.388968467712402, + "learning_rate": 2.0930236925122692e-07, + "loss": 0.1353, + "num_input_tokens_seen": 216824128, + "step": 68885 + }, + { + "epoch": 4.410088982779592, + "grad_norm": 8.33321475982666, + "learning_rate": 2.0907866355063006e-07, + "loss": 0.1449, + "num_input_tokens_seen": 216840256, + "step": 68890 + }, + { + "epoch": 4.410409064720568, + "grad_norm": 10.51948070526123, + "learning_rate": 2.0885507224671197e-07, + "loss": 0.121, + "num_input_tokens_seen": 216857600, + "step": 68895 + }, + { + "epoch": 4.410729146661545, + "grad_norm": 6.836394309997559, + "learning_rate": 2.086315953506371e-07, + "loss": 0.1349, + "num_input_tokens_seen": 216873152, + "step": 68900 + }, + { + "epoch": 4.411049228602522, + "grad_norm": 6.857544898986816, + "learning_rate": 2.084082328735651e-07, + "loss": 0.1164, + "num_input_tokens_seen": 216889152, + "step": 68905 + }, + { + "epoch": 4.411369310543499, + "grad_norm": 6.936807155609131, + "learning_rate": 2.0818498482664955e-07, + "loss": 0.1164, + "num_input_tokens_seen": 216904192, + "step": 68910 + }, + { + "epoch": 4.411689392484476, + "grad_norm": 10.088497161865234, + "learning_rate": 2.0796185122103797e-07, + "loss": 0.1648, + "num_input_tokens_seen": 216919552, + "step": 68915 + }, + { + "epoch": 4.412009474425453, + "grad_norm": 4.785491466522217, + "learning_rate": 2.0773883206787275e-07, + "loss": 0.0874, + "num_input_tokens_seen": 216934720, + "step": 68920 + }, + { + "epoch": 4.41232955636643, + "grad_norm": 7.687224864959717, + "learning_rate": 2.0751592737829062e-07, + "loss": 0.1486, + "num_input_tokens_seen": 216950336, + "step": 68925 + }, + { + "epoch": 4.412649638307407, + "grad_norm": 10.325772285461426, + "learning_rate": 2.0729313716342209e-07, + "loss": 0.1377, + "num_input_tokens_seen": 216965504, + "step": 68930 + }, + { + "epoch": 4.412969720248384, + "grad_norm": 11.461034774780273, + "learning_rate": 2.0707046143439187e-07, + "loss": 0.151, + "num_input_tokens_seen": 216980672, + "step": 68935 + }, + { + "epoch": 4.41328980218936, + "grad_norm": 10.858847618103027, + "learning_rate": 2.068479002023191e-07, + "loss": 0.1334, + "num_input_tokens_seen": 216996608, + "step": 68940 + }, + { + "epoch": 4.413609884130337, + "grad_norm": 6.809417724609375, + "learning_rate": 2.0662545347831798e-07, + "loss": 0.1647, + "num_input_tokens_seen": 217013184, + "step": 68945 + }, + { + "epoch": 4.413929966071314, + "grad_norm": 29.58592414855957, + "learning_rate": 2.0640312127349544e-07, + "loss": 0.1388, + "num_input_tokens_seen": 217028544, + "step": 68950 + }, + { + "epoch": 4.414250048012291, + "grad_norm": 4.620422840118408, + "learning_rate": 2.0618090359895482e-07, + "loss": 0.121, + "num_input_tokens_seen": 217045120, + "step": 68955 + }, + { + "epoch": 4.414570129953268, + "grad_norm": 4.31859016418457, + "learning_rate": 2.0595880046579113e-07, + "loss": 0.1209, + "num_input_tokens_seen": 217060224, + "step": 68960 + }, + { + "epoch": 4.414890211894245, + "grad_norm": 6.50623893737793, + "learning_rate": 2.0573681188509637e-07, + "loss": 0.1759, + "num_input_tokens_seen": 217074880, + "step": 68965 + }, + { + "epoch": 4.415210293835222, + "grad_norm": 8.765506744384766, + "learning_rate": 2.055149378679541e-07, + "loss": 0.1506, + "num_input_tokens_seen": 217090624, + "step": 68970 + }, + { + "epoch": 4.415530375776199, + "grad_norm": 5.332673072814941, + "learning_rate": 2.052931784254447e-07, + "loss": 0.0857, + "num_input_tokens_seen": 217107136, + "step": 68975 + }, + { + "epoch": 4.415850457717176, + "grad_norm": 5.868852138519287, + "learning_rate": 2.050715335686415e-07, + "loss": 0.1358, + "num_input_tokens_seen": 217122368, + "step": 68980 + }, + { + "epoch": 4.416170539658152, + "grad_norm": 3.4141476154327393, + "learning_rate": 2.048500033086115e-07, + "loss": 0.1529, + "num_input_tokens_seen": 217138816, + "step": 68985 + }, + { + "epoch": 4.416490621599129, + "grad_norm": 5.7976765632629395, + "learning_rate": 2.04628587656418e-07, + "loss": 0.0889, + "num_input_tokens_seen": 217153536, + "step": 68990 + }, + { + "epoch": 4.416810703540106, + "grad_norm": 48.58253479003906, + "learning_rate": 2.044072866231159e-07, + "loss": 0.1377, + "num_input_tokens_seen": 217169984, + "step": 68995 + }, + { + "epoch": 4.417130785481083, + "grad_norm": 6.562442302703857, + "learning_rate": 2.0418610021975732e-07, + "loss": 0.1121, + "num_input_tokens_seen": 217186560, + "step": 69000 + }, + { + "epoch": 4.41745086742206, + "grad_norm": 7.7289276123046875, + "learning_rate": 2.0396502845738602e-07, + "loss": 0.1765, + "num_input_tokens_seen": 217203776, + "step": 69005 + }, + { + "epoch": 4.417770949363037, + "grad_norm": 5.455543518066406, + "learning_rate": 2.0374407134704178e-07, + "loss": 0.0829, + "num_input_tokens_seen": 217219264, + "step": 69010 + }, + { + "epoch": 4.418091031304014, + "grad_norm": 6.585197448730469, + "learning_rate": 2.0352322889975796e-07, + "loss": 0.0862, + "num_input_tokens_seen": 217235840, + "step": 69015 + }, + { + "epoch": 4.418411113244991, + "grad_norm": 8.940705299377441, + "learning_rate": 2.0330250112656215e-07, + "loss": 0.1079, + "num_input_tokens_seen": 217251456, + "step": 69020 + }, + { + "epoch": 4.418731195185968, + "grad_norm": 10.250747680664062, + "learning_rate": 2.0308188803847635e-07, + "loss": 0.1418, + "num_input_tokens_seen": 217266240, + "step": 69025 + }, + { + "epoch": 4.419051277126944, + "grad_norm": 6.66684627532959, + "learning_rate": 2.0286138964651676e-07, + "loss": 0.1094, + "num_input_tokens_seen": 217282944, + "step": 69030 + }, + { + "epoch": 4.419371359067921, + "grad_norm": 13.860973358154297, + "learning_rate": 2.0264100596169345e-07, + "loss": 0.1358, + "num_input_tokens_seen": 217298112, + "step": 69035 + }, + { + "epoch": 4.419691441008898, + "grad_norm": 6.7544264793396, + "learning_rate": 2.0242073699501236e-07, + "loss": 0.1167, + "num_input_tokens_seen": 217313664, + "step": 69040 + }, + { + "epoch": 4.420011522949875, + "grad_norm": 10.190360069274902, + "learning_rate": 2.0220058275747217e-07, + "loss": 0.1408, + "num_input_tokens_seen": 217328320, + "step": 69045 + }, + { + "epoch": 4.420331604890852, + "grad_norm": 4.576066017150879, + "learning_rate": 2.0198054326006606e-07, + "loss": 0.2011, + "num_input_tokens_seen": 217343552, + "step": 69050 + }, + { + "epoch": 4.420651686831829, + "grad_norm": 8.980774879455566, + "learning_rate": 2.017606185137816e-07, + "loss": 0.1643, + "num_input_tokens_seen": 217359616, + "step": 69055 + }, + { + "epoch": 4.420971768772806, + "grad_norm": 10.584094047546387, + "learning_rate": 2.0154080852960056e-07, + "loss": 0.1961, + "num_input_tokens_seen": 217375424, + "step": 69060 + }, + { + "epoch": 4.421291850713783, + "grad_norm": 9.12341594696045, + "learning_rate": 2.0132111331849947e-07, + "loss": 0.1649, + "num_input_tokens_seen": 217390848, + "step": 69065 + }, + { + "epoch": 4.421611932654759, + "grad_norm": 7.736807346343994, + "learning_rate": 2.0110153289144812e-07, + "loss": 0.1364, + "num_input_tokens_seen": 217407616, + "step": 69070 + }, + { + "epoch": 4.421932014595736, + "grad_norm": 6.148576259613037, + "learning_rate": 2.0088206725941278e-07, + "loss": 0.1023, + "num_input_tokens_seen": 217423296, + "step": 69075 + }, + { + "epoch": 4.422252096536713, + "grad_norm": 4.599947452545166, + "learning_rate": 2.006627164333502e-07, + "loss": 0.111, + "num_input_tokens_seen": 217438656, + "step": 69080 + }, + { + "epoch": 4.42257217847769, + "grad_norm": 5.831418991088867, + "learning_rate": 2.004434804242153e-07, + "loss": 0.1101, + "num_input_tokens_seen": 217455040, + "step": 69085 + }, + { + "epoch": 4.422892260418667, + "grad_norm": 9.871000289916992, + "learning_rate": 2.0022435924295508e-07, + "loss": 0.1491, + "num_input_tokens_seen": 217470848, + "step": 69090 + }, + { + "epoch": 4.423212342359644, + "grad_norm": 6.137930393218994, + "learning_rate": 2.0000535290051083e-07, + "loss": 0.1008, + "num_input_tokens_seen": 217488064, + "step": 69095 + }, + { + "epoch": 4.423532424300621, + "grad_norm": 12.063417434692383, + "learning_rate": 1.997864614078196e-07, + "loss": 0.1556, + "num_input_tokens_seen": 217503296, + "step": 69100 + }, + { + "epoch": 4.423852506241598, + "grad_norm": 4.950654029846191, + "learning_rate": 1.9956768477581044e-07, + "loss": 0.1135, + "num_input_tokens_seen": 217518464, + "step": 69105 + }, + { + "epoch": 4.424172588182575, + "grad_norm": 5.420118808746338, + "learning_rate": 1.9934902301540964e-07, + "loss": 0.1434, + "num_input_tokens_seen": 217533952, + "step": 69110 + }, + { + "epoch": 4.424492670123552, + "grad_norm": 7.600779056549072, + "learning_rate": 1.99130476137534e-07, + "loss": 0.1169, + "num_input_tokens_seen": 217548992, + "step": 69115 + }, + { + "epoch": 4.424812752064528, + "grad_norm": 8.205527305603027, + "learning_rate": 1.9891204415309785e-07, + "loss": 0.1238, + "num_input_tokens_seen": 217564160, + "step": 69120 + }, + { + "epoch": 4.425132834005505, + "grad_norm": 8.920653343200684, + "learning_rate": 1.9869372707300856e-07, + "loss": 0.1162, + "num_input_tokens_seen": 217580288, + "step": 69125 + }, + { + "epoch": 4.425452915946482, + "grad_norm": 5.49472188949585, + "learning_rate": 1.984755249081674e-07, + "loss": 0.1178, + "num_input_tokens_seen": 217595008, + "step": 69130 + }, + { + "epoch": 4.425772997887459, + "grad_norm": 8.601279258728027, + "learning_rate": 1.9825743766947014e-07, + "loss": 0.0866, + "num_input_tokens_seen": 217609984, + "step": 69135 + }, + { + "epoch": 4.426093079828436, + "grad_norm": 5.15601921081543, + "learning_rate": 1.980394653678072e-07, + "loss": 0.0968, + "num_input_tokens_seen": 217624640, + "step": 69140 + }, + { + "epoch": 4.426413161769413, + "grad_norm": 8.034141540527344, + "learning_rate": 1.978216080140624e-07, + "loss": 0.1389, + "num_input_tokens_seen": 217640896, + "step": 69145 + }, + { + "epoch": 4.42673324371039, + "grad_norm": 6.134253978729248, + "learning_rate": 1.9760386561911537e-07, + "loss": 0.1078, + "num_input_tokens_seen": 217656512, + "step": 69150 + }, + { + "epoch": 4.427053325651367, + "grad_norm": 9.516523361206055, + "learning_rate": 1.9738623819383852e-07, + "loss": 0.0876, + "num_input_tokens_seen": 217671616, + "step": 69155 + }, + { + "epoch": 4.427373407592343, + "grad_norm": 95.21343231201172, + "learning_rate": 1.9716872574909923e-07, + "loss": 0.1675, + "num_input_tokens_seen": 217686656, + "step": 69160 + }, + { + "epoch": 4.42769348953332, + "grad_norm": 5.2562642097473145, + "learning_rate": 1.9695132829575857e-07, + "loss": 0.1062, + "num_input_tokens_seen": 217703296, + "step": 69165 + }, + { + "epoch": 4.428013571474297, + "grad_norm": 8.514799118041992, + "learning_rate": 1.9673404584467258e-07, + "loss": 0.128, + "num_input_tokens_seen": 217718016, + "step": 69170 + }, + { + "epoch": 4.428333653415274, + "grad_norm": 6.717249393463135, + "learning_rate": 1.9651687840669092e-07, + "loss": 0.12, + "num_input_tokens_seen": 217732992, + "step": 69175 + }, + { + "epoch": 4.428653735356251, + "grad_norm": 4.839006423950195, + "learning_rate": 1.962998259926577e-07, + "loss": 0.1107, + "num_input_tokens_seen": 217748096, + "step": 69180 + }, + { + "epoch": 4.428973817297228, + "grad_norm": 3.3712942600250244, + "learning_rate": 1.9608288861341225e-07, + "loss": 0.1051, + "num_input_tokens_seen": 217764096, + "step": 69185 + }, + { + "epoch": 4.429293899238205, + "grad_norm": 17.13375473022461, + "learning_rate": 1.9586606627978593e-07, + "loss": 0.1685, + "num_input_tokens_seen": 217780608, + "step": 69190 + }, + { + "epoch": 4.429613981179182, + "grad_norm": 4.568472862243652, + "learning_rate": 1.9564935900260734e-07, + "loss": 0.0954, + "num_input_tokens_seen": 217796352, + "step": 69195 + }, + { + "epoch": 4.429934063120159, + "grad_norm": 10.054071426391602, + "learning_rate": 1.9543276679269613e-07, + "loss": 0.0962, + "num_input_tokens_seen": 217811136, + "step": 69200 + }, + { + "epoch": 4.430254145061135, + "grad_norm": 6.111292839050293, + "learning_rate": 1.9521628966086864e-07, + "loss": 0.1146, + "num_input_tokens_seen": 217827456, + "step": 69205 + }, + { + "epoch": 4.430574227002112, + "grad_norm": 9.559941291809082, + "learning_rate": 1.9499992761793458e-07, + "loss": 0.1101, + "num_input_tokens_seen": 217843264, + "step": 69210 + }, + { + "epoch": 4.430894308943089, + "grad_norm": 8.434992790222168, + "learning_rate": 1.9478368067469749e-07, + "loss": 0.1223, + "num_input_tokens_seen": 217858432, + "step": 69215 + }, + { + "epoch": 4.431214390884066, + "grad_norm": 3.650014638900757, + "learning_rate": 1.9456754884195655e-07, + "loss": 0.126, + "num_input_tokens_seen": 217874112, + "step": 69220 + }, + { + "epoch": 4.431534472825043, + "grad_norm": 5.704514026641846, + "learning_rate": 1.9435153213050312e-07, + "loss": 0.1397, + "num_input_tokens_seen": 217889344, + "step": 69225 + }, + { + "epoch": 4.43185455476602, + "grad_norm": 7.0404486656188965, + "learning_rate": 1.9413563055112465e-07, + "loss": 0.0912, + "num_input_tokens_seen": 217904576, + "step": 69230 + }, + { + "epoch": 4.432174636706997, + "grad_norm": 7.273706912994385, + "learning_rate": 1.939198441146023e-07, + "loss": 0.1608, + "num_input_tokens_seen": 217919232, + "step": 69235 + }, + { + "epoch": 4.432494718647974, + "grad_norm": 6.734958648681641, + "learning_rate": 1.9370417283171072e-07, + "loss": 0.1051, + "num_input_tokens_seen": 217934144, + "step": 69240 + }, + { + "epoch": 4.432814800588951, + "grad_norm": 7.1561360359191895, + "learning_rate": 1.9348861671321966e-07, + "loss": 0.1495, + "num_input_tokens_seen": 217949056, + "step": 69245 + }, + { + "epoch": 4.433134882529927, + "grad_norm": 10.494422912597656, + "learning_rate": 1.932731757698933e-07, + "loss": 0.1505, + "num_input_tokens_seen": 217966336, + "step": 69250 + }, + { + "epoch": 4.433454964470904, + "grad_norm": 5.911226272583008, + "learning_rate": 1.9305785001248883e-07, + "loss": 0.1432, + "num_input_tokens_seen": 217981824, + "step": 69255 + }, + { + "epoch": 4.433775046411881, + "grad_norm": 6.5869140625, + "learning_rate": 1.9284263945175908e-07, + "loss": 0.1343, + "num_input_tokens_seen": 217997632, + "step": 69260 + }, + { + "epoch": 4.434095128352858, + "grad_norm": 8.86147689819336, + "learning_rate": 1.9262754409844985e-07, + "loss": 0.1941, + "num_input_tokens_seen": 218012416, + "step": 69265 + }, + { + "epoch": 4.434415210293835, + "grad_norm": 8.387849807739258, + "learning_rate": 1.9241256396330288e-07, + "loss": 0.1536, + "num_input_tokens_seen": 218027904, + "step": 69270 + }, + { + "epoch": 4.434735292234812, + "grad_norm": 4.868916034698486, + "learning_rate": 1.921976990570529e-07, + "loss": 0.1174, + "num_input_tokens_seen": 218043840, + "step": 69275 + }, + { + "epoch": 4.435055374175789, + "grad_norm": 8.243587493896484, + "learning_rate": 1.919829493904285e-07, + "loss": 0.1416, + "num_input_tokens_seen": 218059520, + "step": 69280 + }, + { + "epoch": 4.435375456116766, + "grad_norm": 7.252748012542725, + "learning_rate": 1.9176831497415393e-07, + "loss": 0.1487, + "num_input_tokens_seen": 218074880, + "step": 69285 + }, + { + "epoch": 4.435695538057743, + "grad_norm": 10.919062614440918, + "learning_rate": 1.915537958189459e-07, + "loss": 0.1598, + "num_input_tokens_seen": 218091648, + "step": 69290 + }, + { + "epoch": 4.436015619998719, + "grad_norm": 5.00651741027832, + "learning_rate": 1.9133939193551804e-07, + "loss": 0.1524, + "num_input_tokens_seen": 218107456, + "step": 69295 + }, + { + "epoch": 4.436335701939696, + "grad_norm": 9.297605514526367, + "learning_rate": 1.9112510333457485e-07, + "loss": 0.1667, + "num_input_tokens_seen": 218124416, + "step": 69300 + }, + { + "epoch": 4.436655783880673, + "grad_norm": 6.637078762054443, + "learning_rate": 1.9091093002681803e-07, + "loss": 0.1479, + "num_input_tokens_seen": 218140224, + "step": 69305 + }, + { + "epoch": 4.43697586582165, + "grad_norm": 2.851811647415161, + "learning_rate": 1.906968720229413e-07, + "loss": 0.1211, + "num_input_tokens_seen": 218157376, + "step": 69310 + }, + { + "epoch": 4.437295947762627, + "grad_norm": 9.10893726348877, + "learning_rate": 1.904829293336341e-07, + "loss": 0.1096, + "num_input_tokens_seen": 218172288, + "step": 69315 + }, + { + "epoch": 4.437616029703604, + "grad_norm": 4.361540794372559, + "learning_rate": 1.9026910196957986e-07, + "loss": 0.1438, + "num_input_tokens_seen": 218187904, + "step": 69320 + }, + { + "epoch": 4.437936111644581, + "grad_norm": 7.299145221710205, + "learning_rate": 1.9005538994145507e-07, + "loss": 0.1146, + "num_input_tokens_seen": 218203968, + "step": 69325 + }, + { + "epoch": 4.438256193585558, + "grad_norm": 8.27573299407959, + "learning_rate": 1.8984179325993308e-07, + "loss": 0.1408, + "num_input_tokens_seen": 218219776, + "step": 69330 + }, + { + "epoch": 4.438576275526534, + "grad_norm": 5.944495677947998, + "learning_rate": 1.8962831193567788e-07, + "loss": 0.1296, + "num_input_tokens_seen": 218234880, + "step": 69335 + }, + { + "epoch": 4.438896357467511, + "grad_norm": 6.606401443481445, + "learning_rate": 1.8941494597935124e-07, + "loss": 0.1389, + "num_input_tokens_seen": 218251968, + "step": 69340 + }, + { + "epoch": 4.439216439408488, + "grad_norm": 7.861584186553955, + "learning_rate": 1.89201695401606e-07, + "loss": 0.1744, + "num_input_tokens_seen": 218266816, + "step": 69345 + }, + { + "epoch": 4.439536521349465, + "grad_norm": 6.072310447692871, + "learning_rate": 1.88988560213092e-07, + "loss": 0.1143, + "num_input_tokens_seen": 218284160, + "step": 69350 + }, + { + "epoch": 4.439856603290442, + "grad_norm": 10.44810962677002, + "learning_rate": 1.8877554042445178e-07, + "loss": 0.1526, + "num_input_tokens_seen": 218299712, + "step": 69355 + }, + { + "epoch": 4.440176685231419, + "grad_norm": 9.224567413330078, + "learning_rate": 1.885626360463222e-07, + "loss": 0.1798, + "num_input_tokens_seen": 218316544, + "step": 69360 + }, + { + "epoch": 4.440496767172396, + "grad_norm": 5.073801040649414, + "learning_rate": 1.8834984708933468e-07, + "loss": 0.1062, + "num_input_tokens_seen": 218331200, + "step": 69365 + }, + { + "epoch": 4.440816849113373, + "grad_norm": 10.611773490905762, + "learning_rate": 1.8813717356411492e-07, + "loss": 0.1513, + "num_input_tokens_seen": 218346368, + "step": 69370 + }, + { + "epoch": 4.44113693105435, + "grad_norm": 8.44467544555664, + "learning_rate": 1.879246154812825e-07, + "loss": 0.1303, + "num_input_tokens_seen": 218361664, + "step": 69375 + }, + { + "epoch": 4.441457012995327, + "grad_norm": 5.4720964431762695, + "learning_rate": 1.877121728514522e-07, + "loss": 0.1369, + "num_input_tokens_seen": 218377920, + "step": 69380 + }, + { + "epoch": 4.441777094936303, + "grad_norm": 5.7751054763793945, + "learning_rate": 1.8749984568523089e-07, + "loss": 0.1277, + "num_input_tokens_seen": 218393280, + "step": 69385 + }, + { + "epoch": 4.44209717687728, + "grad_norm": 3.555480480194092, + "learning_rate": 1.8728763399322252e-07, + "loss": 0.0977, + "num_input_tokens_seen": 218407936, + "step": 69390 + }, + { + "epoch": 4.442417258818257, + "grad_norm": 8.2736177444458, + "learning_rate": 1.8707553778602282e-07, + "loss": 0.1393, + "num_input_tokens_seen": 218422976, + "step": 69395 + }, + { + "epoch": 4.442737340759234, + "grad_norm": 6.049947738647461, + "learning_rate": 1.868635570742236e-07, + "loss": 0.1009, + "num_input_tokens_seen": 218438656, + "step": 69400 + }, + { + "epoch": 4.443057422700211, + "grad_norm": 8.118163108825684, + "learning_rate": 1.8665169186840943e-07, + "loss": 0.1584, + "num_input_tokens_seen": 218454080, + "step": 69405 + }, + { + "epoch": 4.443377504641188, + "grad_norm": 4.7971038818359375, + "learning_rate": 1.8643994217915961e-07, + "loss": 0.1521, + "num_input_tokens_seen": 218469056, + "step": 69410 + }, + { + "epoch": 4.443697586582165, + "grad_norm": 8.891928672790527, + "learning_rate": 1.8622830801704905e-07, + "loss": 0.1504, + "num_input_tokens_seen": 218483776, + "step": 69415 + }, + { + "epoch": 4.444017668523142, + "grad_norm": 4.592909336090088, + "learning_rate": 1.86016789392644e-07, + "loss": 0.1207, + "num_input_tokens_seen": 218499136, + "step": 69420 + }, + { + "epoch": 4.444337750464118, + "grad_norm": 8.732571601867676, + "learning_rate": 1.858053863165077e-07, + "loss": 0.128, + "num_input_tokens_seen": 218513088, + "step": 69425 + }, + { + "epoch": 4.444657832405095, + "grad_norm": 5.093923568725586, + "learning_rate": 1.8559409879919636e-07, + "loss": 0.1121, + "num_input_tokens_seen": 218528384, + "step": 69430 + }, + { + "epoch": 4.444977914346072, + "grad_norm": 6.032820701599121, + "learning_rate": 1.8538292685126026e-07, + "loss": 0.1311, + "num_input_tokens_seen": 218543040, + "step": 69435 + }, + { + "epoch": 4.445297996287049, + "grad_norm": 5.894008159637451, + "learning_rate": 1.8517187048324446e-07, + "loss": 0.1148, + "num_input_tokens_seen": 218558400, + "step": 69440 + }, + { + "epoch": 4.445618078228026, + "grad_norm": 6.978239059448242, + "learning_rate": 1.8496092970568757e-07, + "loss": 0.0942, + "num_input_tokens_seen": 218575232, + "step": 69445 + }, + { + "epoch": 4.445938160169003, + "grad_norm": 6.990809440612793, + "learning_rate": 1.847501045291239e-07, + "loss": 0.11, + "num_input_tokens_seen": 218590208, + "step": 69450 + }, + { + "epoch": 4.44625824210998, + "grad_norm": 6.901622295379639, + "learning_rate": 1.8453939496407975e-07, + "loss": 0.1019, + "num_input_tokens_seen": 218606272, + "step": 69455 + }, + { + "epoch": 4.446578324050957, + "grad_norm": 4.419789791107178, + "learning_rate": 1.8432880102107754e-07, + "loss": 0.1144, + "num_input_tokens_seen": 218621376, + "step": 69460 + }, + { + "epoch": 4.446898405991934, + "grad_norm": 6.990864276885986, + "learning_rate": 1.8411832271063328e-07, + "loss": 0.141, + "num_input_tokens_seen": 218636416, + "step": 69465 + }, + { + "epoch": 4.44721848793291, + "grad_norm": 6.569733142852783, + "learning_rate": 1.8390796004325696e-07, + "loss": 0.1628, + "num_input_tokens_seen": 218653440, + "step": 69470 + }, + { + "epoch": 4.447538569873887, + "grad_norm": 5.314136028289795, + "learning_rate": 1.8369771302945316e-07, + "loss": 0.1397, + "num_input_tokens_seen": 218668800, + "step": 69475 + }, + { + "epoch": 4.447858651814864, + "grad_norm": 6.88749885559082, + "learning_rate": 1.834875816797202e-07, + "loss": 0.1364, + "num_input_tokens_seen": 218683328, + "step": 69480 + }, + { + "epoch": 4.448178733755841, + "grad_norm": 5.461257457733154, + "learning_rate": 1.8327756600455132e-07, + "loss": 0.1205, + "num_input_tokens_seen": 218699328, + "step": 69485 + }, + { + "epoch": 4.448498815696818, + "grad_norm": 8.455431938171387, + "learning_rate": 1.8306766601443343e-07, + "loss": 0.1402, + "num_input_tokens_seen": 218714304, + "step": 69490 + }, + { + "epoch": 4.448818897637795, + "grad_norm": 8.882311820983887, + "learning_rate": 1.828578817198473e-07, + "loss": 0.1515, + "num_input_tokens_seen": 218729664, + "step": 69495 + }, + { + "epoch": 4.449138979578772, + "grad_norm": 6.778238773345947, + "learning_rate": 1.8264821313126958e-07, + "loss": 0.1253, + "num_input_tokens_seen": 218745600, + "step": 69500 + }, + { + "epoch": 4.449459061519749, + "grad_norm": 10.37492847442627, + "learning_rate": 1.8243866025916907e-07, + "loss": 0.1192, + "num_input_tokens_seen": 218762304, + "step": 69505 + }, + { + "epoch": 4.449779143460726, + "grad_norm": 4.668059825897217, + "learning_rate": 1.8222922311401047e-07, + "loss": 0.153, + "num_input_tokens_seen": 218776960, + "step": 69510 + }, + { + "epoch": 4.450099225401702, + "grad_norm": 7.461447715759277, + "learning_rate": 1.8201990170625157e-07, + "loss": 0.1357, + "num_input_tokens_seen": 218793280, + "step": 69515 + }, + { + "epoch": 4.450419307342679, + "grad_norm": 14.053898811340332, + "learning_rate": 1.818106960463445e-07, + "loss": 0.0949, + "num_input_tokens_seen": 218810240, + "step": 69520 + }, + { + "epoch": 4.450739389283656, + "grad_norm": 5.116372585296631, + "learning_rate": 1.8160160614473703e-07, + "loss": 0.1111, + "num_input_tokens_seen": 218825792, + "step": 69525 + }, + { + "epoch": 4.451059471224633, + "grad_norm": 6.0801005363464355, + "learning_rate": 1.8139263201186836e-07, + "loss": 0.1376, + "num_input_tokens_seen": 218842176, + "step": 69530 + }, + { + "epoch": 4.45137955316561, + "grad_norm": 6.953969478607178, + "learning_rate": 1.8118377365817534e-07, + "loss": 0.1484, + "num_input_tokens_seen": 218857344, + "step": 69535 + }, + { + "epoch": 4.451699635106587, + "grad_norm": 6.465044975280762, + "learning_rate": 1.8097503109408553e-07, + "loss": 0.1222, + "num_input_tokens_seen": 218872832, + "step": 69540 + }, + { + "epoch": 4.452019717047564, + "grad_norm": 5.9587178230285645, + "learning_rate": 1.807664043300239e-07, + "loss": 0.1965, + "num_input_tokens_seen": 218889536, + "step": 69545 + }, + { + "epoch": 4.452339798988541, + "grad_norm": 8.360187530517578, + "learning_rate": 1.805578933764074e-07, + "loss": 0.1365, + "num_input_tokens_seen": 218904640, + "step": 69550 + }, + { + "epoch": 4.452659880929518, + "grad_norm": 9.796744346618652, + "learning_rate": 1.8034949824364824e-07, + "loss": 0.1715, + "num_input_tokens_seen": 218919872, + "step": 69555 + }, + { + "epoch": 4.4529799628704945, + "grad_norm": 9.420080184936523, + "learning_rate": 1.8014121894215252e-07, + "loss": 0.1422, + "num_input_tokens_seen": 218935424, + "step": 69560 + }, + { + "epoch": 4.4533000448114715, + "grad_norm": 5.351378440856934, + "learning_rate": 1.7993305548232003e-07, + "loss": 0.0803, + "num_input_tokens_seen": 218951040, + "step": 69565 + }, + { + "epoch": 4.4536201267524484, + "grad_norm": 22.599849700927734, + "learning_rate": 1.7972500787454717e-07, + "loss": 0.2246, + "num_input_tokens_seen": 218967680, + "step": 69570 + }, + { + "epoch": 4.453940208693425, + "grad_norm": 4.657711982727051, + "learning_rate": 1.7951707612922032e-07, + "loss": 0.12, + "num_input_tokens_seen": 218983104, + "step": 69575 + }, + { + "epoch": 4.454260290634402, + "grad_norm": 7.82424783706665, + "learning_rate": 1.7930926025672451e-07, + "loss": 0.1104, + "num_input_tokens_seen": 218997248, + "step": 69580 + }, + { + "epoch": 4.454580372575379, + "grad_norm": 6.102978229522705, + "learning_rate": 1.791015602674359e-07, + "loss": 0.1288, + "num_input_tokens_seen": 219012608, + "step": 69585 + }, + { + "epoch": 4.454900454516356, + "grad_norm": 5.866600513458252, + "learning_rate": 1.7889397617172649e-07, + "loss": 0.1238, + "num_input_tokens_seen": 219027072, + "step": 69590 + }, + { + "epoch": 4.455220536457333, + "grad_norm": 7.575148105621338, + "learning_rate": 1.7868650797996156e-07, + "loss": 0.1223, + "num_input_tokens_seen": 219043968, + "step": 69595 + }, + { + "epoch": 4.4555406183983095, + "grad_norm": 5.729439735412598, + "learning_rate": 1.7847915570250117e-07, + "loss": 0.1511, + "num_input_tokens_seen": 219059200, + "step": 69600 + }, + { + "epoch": 4.4558607003392865, + "grad_norm": 7.315492153167725, + "learning_rate": 1.78271919349699e-07, + "loss": 0.1452, + "num_input_tokens_seen": 219074368, + "step": 69605 + }, + { + "epoch": 4.4561807822802635, + "grad_norm": 7.503133296966553, + "learning_rate": 1.7806479893190455e-07, + "loss": 0.1056, + "num_input_tokens_seen": 219090176, + "step": 69610 + }, + { + "epoch": 4.4565008642212405, + "grad_norm": 6.634546279907227, + "learning_rate": 1.7785779445945867e-07, + "loss": 0.0991, + "num_input_tokens_seen": 219106240, + "step": 69615 + }, + { + "epoch": 4.4568209461622175, + "grad_norm": 19.839189529418945, + "learning_rate": 1.7765090594269924e-07, + "loss": 0.1892, + "num_input_tokens_seen": 219122240, + "step": 69620 + }, + { + "epoch": 4.4571410281031945, + "grad_norm": 3.092836380004883, + "learning_rate": 1.7744413339195715e-07, + "loss": 0.1367, + "num_input_tokens_seen": 219137152, + "step": 69625 + }, + { + "epoch": 4.4574611100441714, + "grad_norm": 9.086727142333984, + "learning_rate": 1.772374768175572e-07, + "loss": 0.1387, + "num_input_tokens_seen": 219152128, + "step": 69630 + }, + { + "epoch": 4.457781191985148, + "grad_norm": 6.5993428230285645, + "learning_rate": 1.7703093622981865e-07, + "loss": 0.1311, + "num_input_tokens_seen": 219167552, + "step": 69635 + }, + { + "epoch": 4.458101273926125, + "grad_norm": 8.387181282043457, + "learning_rate": 1.7682451163905517e-07, + "loss": 0.1422, + "num_input_tokens_seen": 219184576, + "step": 69640 + }, + { + "epoch": 4.458421355867102, + "grad_norm": 15.320856094360352, + "learning_rate": 1.766182030555752e-07, + "loss": 0.1922, + "num_input_tokens_seen": 219201024, + "step": 69645 + }, + { + "epoch": 4.4587414378080785, + "grad_norm": 12.35460090637207, + "learning_rate": 1.764120104896791e-07, + "loss": 0.1521, + "num_input_tokens_seen": 219217216, + "step": 69650 + }, + { + "epoch": 4.4590615197490555, + "grad_norm": 17.66187858581543, + "learning_rate": 1.7620593395166474e-07, + "loss": 0.0957, + "num_input_tokens_seen": 219234240, + "step": 69655 + }, + { + "epoch": 4.4593816016900325, + "grad_norm": 3.708822727203369, + "learning_rate": 1.7599997345182195e-07, + "loss": 0.1359, + "num_input_tokens_seen": 219250112, + "step": 69660 + }, + { + "epoch": 4.4597016836310095, + "grad_norm": 11.596823692321777, + "learning_rate": 1.7579412900043501e-07, + "loss": 0.129, + "num_input_tokens_seen": 219264896, + "step": 69665 + }, + { + "epoch": 4.4600217655719865, + "grad_norm": 4.8799309730529785, + "learning_rate": 1.7558840060778292e-07, + "loss": 0.1221, + "num_input_tokens_seen": 219280384, + "step": 69670 + }, + { + "epoch": 4.4603418475129635, + "grad_norm": 5.9237284660339355, + "learning_rate": 1.753827882841383e-07, + "loss": 0.1147, + "num_input_tokens_seen": 219296576, + "step": 69675 + }, + { + "epoch": 4.4606619294539405, + "grad_norm": 5.918821334838867, + "learning_rate": 1.7517729203976958e-07, + "loss": 0.1177, + "num_input_tokens_seen": 219311232, + "step": 69680 + }, + { + "epoch": 4.4609820113949175, + "grad_norm": 3.398927688598633, + "learning_rate": 1.7497191188493662e-07, + "loss": 0.1277, + "num_input_tokens_seen": 219326592, + "step": 69685 + }, + { + "epoch": 4.461302093335894, + "grad_norm": 6.219391822814941, + "learning_rate": 1.7476664782989594e-07, + "loss": 0.124, + "num_input_tokens_seen": 219342272, + "step": 69690 + }, + { + "epoch": 4.4616221752768705, + "grad_norm": 4.091805458068848, + "learning_rate": 1.745614998848974e-07, + "loss": 0.1427, + "num_input_tokens_seen": 219357952, + "step": 69695 + }, + { + "epoch": 4.4619422572178475, + "grad_norm": 4.9849162101745605, + "learning_rate": 1.7435646806018448e-07, + "loss": 0.1702, + "num_input_tokens_seen": 219374720, + "step": 69700 + }, + { + "epoch": 4.4622623391588245, + "grad_norm": 6.838088512420654, + "learning_rate": 1.741515523659959e-07, + "loss": 0.1301, + "num_input_tokens_seen": 219390400, + "step": 69705 + }, + { + "epoch": 4.4625824210998015, + "grad_norm": 7.408144474029541, + "learning_rate": 1.7394675281256407e-07, + "loss": 0.1124, + "num_input_tokens_seen": 219406656, + "step": 69710 + }, + { + "epoch": 4.4629025030407785, + "grad_norm": 6.024177074432373, + "learning_rate": 1.7374206941011523e-07, + "loss": 0.1343, + "num_input_tokens_seen": 219421824, + "step": 69715 + }, + { + "epoch": 4.4632225849817555, + "grad_norm": 9.426107406616211, + "learning_rate": 1.7353750216887039e-07, + "loss": 0.119, + "num_input_tokens_seen": 219437440, + "step": 69720 + }, + { + "epoch": 4.4635426669227325, + "grad_norm": 9.105602264404297, + "learning_rate": 1.7333305109904413e-07, + "loss": 0.1894, + "num_input_tokens_seen": 219452224, + "step": 69725 + }, + { + "epoch": 4.4638627488637095, + "grad_norm": 8.98697280883789, + "learning_rate": 1.731287162108472e-07, + "loss": 0.1614, + "num_input_tokens_seen": 219466880, + "step": 69730 + }, + { + "epoch": 4.464182830804686, + "grad_norm": 6.9235334396362305, + "learning_rate": 1.7292449751448087e-07, + "loss": 0.1149, + "num_input_tokens_seen": 219482496, + "step": 69735 + }, + { + "epoch": 4.464502912745663, + "grad_norm": 4.44085168838501, + "learning_rate": 1.7272039502014449e-07, + "loss": 0.1285, + "num_input_tokens_seen": 219498368, + "step": 69740 + }, + { + "epoch": 4.46482299468664, + "grad_norm": 5.399537563323975, + "learning_rate": 1.7251640873802905e-07, + "loss": 0.1362, + "num_input_tokens_seen": 219513408, + "step": 69745 + }, + { + "epoch": 4.465143076627617, + "grad_norm": 10.66358757019043, + "learning_rate": 1.7231253867832032e-07, + "loss": 0.1121, + "num_input_tokens_seen": 219529088, + "step": 69750 + }, + { + "epoch": 4.4654631585685935, + "grad_norm": 8.606019020080566, + "learning_rate": 1.7210878485119985e-07, + "loss": 0.1074, + "num_input_tokens_seen": 219544576, + "step": 69755 + }, + { + "epoch": 4.4657832405095705, + "grad_norm": 6.901431560516357, + "learning_rate": 1.7190514726684037e-07, + "loss": 0.1674, + "num_input_tokens_seen": 219560064, + "step": 69760 + }, + { + "epoch": 4.4661033224505475, + "grad_norm": 9.288446426391602, + "learning_rate": 1.7170162593541178e-07, + "loss": 0.1022, + "num_input_tokens_seen": 219575424, + "step": 69765 + }, + { + "epoch": 4.4664234043915245, + "grad_norm": 8.680310249328613, + "learning_rate": 1.7149822086707567e-07, + "loss": 0.1462, + "num_input_tokens_seen": 219592256, + "step": 69770 + }, + { + "epoch": 4.4667434863325015, + "grad_norm": 8.9393310546875, + "learning_rate": 1.7129493207199006e-07, + "loss": 0.097, + "num_input_tokens_seen": 219608320, + "step": 69775 + }, + { + "epoch": 4.467063568273478, + "grad_norm": 3.873614549636841, + "learning_rate": 1.7109175956030595e-07, + "loss": 0.1632, + "num_input_tokens_seen": 219624256, + "step": 69780 + }, + { + "epoch": 4.467383650214455, + "grad_norm": 8.835458755493164, + "learning_rate": 1.7088870334216827e-07, + "loss": 0.1218, + "num_input_tokens_seen": 219638656, + "step": 69785 + }, + { + "epoch": 4.467703732155432, + "grad_norm": 9.733636856079102, + "learning_rate": 1.7068576342771703e-07, + "loss": 0.1249, + "num_input_tokens_seen": 219653632, + "step": 69790 + }, + { + "epoch": 4.468023814096409, + "grad_norm": 7.200865268707275, + "learning_rate": 1.7048293982708518e-07, + "loss": 0.1586, + "num_input_tokens_seen": 219669760, + "step": 69795 + }, + { + "epoch": 4.468343896037386, + "grad_norm": 5.496358394622803, + "learning_rate": 1.7028023255040244e-07, + "loss": 0.1466, + "num_input_tokens_seen": 219685184, + "step": 69800 + }, + { + "epoch": 4.468663977978363, + "grad_norm": 4.872005462646484, + "learning_rate": 1.7007764160778872e-07, + "loss": 0.1214, + "num_input_tokens_seen": 219702720, + "step": 69805 + }, + { + "epoch": 4.46898405991934, + "grad_norm": 7.111124515533447, + "learning_rate": 1.6987516700936207e-07, + "loss": 0.1596, + "num_input_tokens_seen": 219720256, + "step": 69810 + }, + { + "epoch": 4.4693041418603165, + "grad_norm": 6.4622907638549805, + "learning_rate": 1.6967280876523217e-07, + "loss": 0.1266, + "num_input_tokens_seen": 219735872, + "step": 69815 + }, + { + "epoch": 4.4696242238012935, + "grad_norm": 4.783539772033691, + "learning_rate": 1.694705668855043e-07, + "loss": 0.1267, + "num_input_tokens_seen": 219751680, + "step": 69820 + }, + { + "epoch": 4.46994430574227, + "grad_norm": 9.6181640625, + "learning_rate": 1.692684413802767e-07, + "loss": 0.144, + "num_input_tokens_seen": 219767040, + "step": 69825 + }, + { + "epoch": 4.470264387683247, + "grad_norm": 7.466552257537842, + "learning_rate": 1.6906643225964304e-07, + "loss": 0.1171, + "num_input_tokens_seen": 219784384, + "step": 69830 + }, + { + "epoch": 4.470584469624224, + "grad_norm": 7.578187942504883, + "learning_rate": 1.6886453953368964e-07, + "loss": 0.1216, + "num_input_tokens_seen": 219800192, + "step": 69835 + }, + { + "epoch": 4.470904551565201, + "grad_norm": 4.691976547241211, + "learning_rate": 1.6866276321249986e-07, + "loss": 0.114, + "num_input_tokens_seen": 219816448, + "step": 69840 + }, + { + "epoch": 4.471224633506178, + "grad_norm": 11.632445335388184, + "learning_rate": 1.684611033061473e-07, + "loss": 0.156, + "num_input_tokens_seen": 219831808, + "step": 69845 + }, + { + "epoch": 4.471544715447155, + "grad_norm": 7.17780876159668, + "learning_rate": 1.6825955982470306e-07, + "loss": 0.1274, + "num_input_tokens_seen": 219847168, + "step": 69850 + }, + { + "epoch": 4.471864797388132, + "grad_norm": 5.0293288230896, + "learning_rate": 1.6805813277823075e-07, + "loss": 0.1213, + "num_input_tokens_seen": 219862272, + "step": 69855 + }, + { + "epoch": 4.472184879329109, + "grad_norm": 2.9628899097442627, + "learning_rate": 1.6785682217678844e-07, + "loss": 0.1077, + "num_input_tokens_seen": 219878272, + "step": 69860 + }, + { + "epoch": 4.472504961270085, + "grad_norm": 7.133001327514648, + "learning_rate": 1.6765562803042924e-07, + "loss": 0.1285, + "num_input_tokens_seen": 219892992, + "step": 69865 + }, + { + "epoch": 4.472825043211062, + "grad_norm": 6.447132587432861, + "learning_rate": 1.6745455034919838e-07, + "loss": 0.1122, + "num_input_tokens_seen": 219908736, + "step": 69870 + }, + { + "epoch": 4.473145125152039, + "grad_norm": 7.223556041717529, + "learning_rate": 1.6725358914313844e-07, + "loss": 0.1247, + "num_input_tokens_seen": 219923840, + "step": 69875 + }, + { + "epoch": 4.473465207093016, + "grad_norm": 3.004561424255371, + "learning_rate": 1.6705274442228275e-07, + "loss": 0.1047, + "num_input_tokens_seen": 219939392, + "step": 69880 + }, + { + "epoch": 4.473785289033993, + "grad_norm": 4.7783589363098145, + "learning_rate": 1.6685201619666164e-07, + "loss": 0.1226, + "num_input_tokens_seen": 219954496, + "step": 69885 + }, + { + "epoch": 4.47410537097497, + "grad_norm": 6.215480327606201, + "learning_rate": 1.6665140447629762e-07, + "loss": 0.1456, + "num_input_tokens_seen": 219970752, + "step": 69890 + }, + { + "epoch": 4.474425452915947, + "grad_norm": 2.358074903488159, + "learning_rate": 1.6645090927120854e-07, + "loss": 0.1304, + "num_input_tokens_seen": 219985472, + "step": 69895 + }, + { + "epoch": 4.474745534856924, + "grad_norm": 7.455054759979248, + "learning_rate": 1.6625053059140612e-07, + "loss": 0.1338, + "num_input_tokens_seen": 219999616, + "step": 69900 + }, + { + "epoch": 4.475065616797901, + "grad_norm": 8.32168197631836, + "learning_rate": 1.660502684468962e-07, + "loss": 0.0955, + "num_input_tokens_seen": 220014144, + "step": 69905 + }, + { + "epoch": 4.475385698738878, + "grad_norm": 5.911342620849609, + "learning_rate": 1.6585012284767858e-07, + "loss": 0.1256, + "num_input_tokens_seen": 220029696, + "step": 69910 + }, + { + "epoch": 4.475705780679854, + "grad_norm": 7.325296401977539, + "learning_rate": 1.6565009380374752e-07, + "loss": 0.1082, + "num_input_tokens_seen": 220045760, + "step": 69915 + }, + { + "epoch": 4.476025862620831, + "grad_norm": 9.183334350585938, + "learning_rate": 1.6545018132509194e-07, + "loss": 0.1459, + "num_input_tokens_seen": 220061696, + "step": 69920 + }, + { + "epoch": 4.476345944561808, + "grad_norm": 18.155885696411133, + "learning_rate": 1.6525038542169386e-07, + "loss": 0.1237, + "num_input_tokens_seen": 220077184, + "step": 69925 + }, + { + "epoch": 4.476666026502785, + "grad_norm": 3.467287302017212, + "learning_rate": 1.6505070610353057e-07, + "loss": 0.1802, + "num_input_tokens_seen": 220093440, + "step": 69930 + }, + { + "epoch": 4.476986108443762, + "grad_norm": 8.396596908569336, + "learning_rate": 1.6485114338057245e-07, + "loss": 0.1719, + "num_input_tokens_seen": 220110080, + "step": 69935 + }, + { + "epoch": 4.477306190384739, + "grad_norm": 3.7914726734161377, + "learning_rate": 1.6465169726278485e-07, + "loss": 0.1265, + "num_input_tokens_seen": 220126080, + "step": 69940 + }, + { + "epoch": 4.477626272325716, + "grad_norm": 6.946190357208252, + "learning_rate": 1.644523677601273e-07, + "loss": 0.1821, + "num_input_tokens_seen": 220140864, + "step": 69945 + }, + { + "epoch": 4.477946354266693, + "grad_norm": 5.822764873504639, + "learning_rate": 1.6425315488255294e-07, + "loss": 0.1111, + "num_input_tokens_seen": 220156544, + "step": 69950 + }, + { + "epoch": 4.478266436207669, + "grad_norm": 6.541527271270752, + "learning_rate": 1.640540586400094e-07, + "loss": 0.1253, + "num_input_tokens_seen": 220171840, + "step": 69955 + }, + { + "epoch": 4.478586518148646, + "grad_norm": 4.568787574768066, + "learning_rate": 1.6385507904243924e-07, + "loss": 0.0854, + "num_input_tokens_seen": 220186752, + "step": 69960 + }, + { + "epoch": 4.478906600089623, + "grad_norm": 3.644805431365967, + "learning_rate": 1.6365621609977734e-07, + "loss": 0.0735, + "num_input_tokens_seen": 220201088, + "step": 69965 + }, + { + "epoch": 4.4792266820306, + "grad_norm": 8.855317115783691, + "learning_rate": 1.634574698219546e-07, + "loss": 0.124, + "num_input_tokens_seen": 220216768, + "step": 69970 + }, + { + "epoch": 4.479546763971577, + "grad_norm": 7.710419178009033, + "learning_rate": 1.6325884021889533e-07, + "loss": 0.1255, + "num_input_tokens_seen": 220231936, + "step": 69975 + }, + { + "epoch": 4.479866845912554, + "grad_norm": 5.5553297996521, + "learning_rate": 1.6306032730051742e-07, + "loss": 0.1017, + "num_input_tokens_seen": 220246592, + "step": 69980 + }, + { + "epoch": 4.480186927853531, + "grad_norm": 7.372467994689941, + "learning_rate": 1.6286193107673487e-07, + "loss": 0.1462, + "num_input_tokens_seen": 220261888, + "step": 69985 + }, + { + "epoch": 4.480507009794508, + "grad_norm": 6.158074378967285, + "learning_rate": 1.6266365155745312e-07, + "loss": 0.1161, + "num_input_tokens_seen": 220276800, + "step": 69990 + }, + { + "epoch": 4.480827091735485, + "grad_norm": 6.135972023010254, + "learning_rate": 1.624654887525745e-07, + "loss": 0.1647, + "num_input_tokens_seen": 220292160, + "step": 69995 + }, + { + "epoch": 4.481147173676461, + "grad_norm": 7.724602699279785, + "learning_rate": 1.6226744267199308e-07, + "loss": 0.1887, + "num_input_tokens_seen": 220307648, + "step": 70000 + }, + { + "epoch": 4.481467255617438, + "grad_norm": 8.848335266113281, + "learning_rate": 1.6206951332559894e-07, + "loss": 0.1344, + "num_input_tokens_seen": 220323648, + "step": 70005 + }, + { + "epoch": 4.481787337558415, + "grad_norm": 7.04402494430542, + "learning_rate": 1.6187170072327562e-07, + "loss": 0.1439, + "num_input_tokens_seen": 220339968, + "step": 70010 + }, + { + "epoch": 4.482107419499392, + "grad_norm": 8.11075210571289, + "learning_rate": 1.6167400487490047e-07, + "loss": 0.1211, + "num_input_tokens_seen": 220355136, + "step": 70015 + }, + { + "epoch": 4.482427501440369, + "grad_norm": 6.282297134399414, + "learning_rate": 1.614764257903459e-07, + "loss": 0.1041, + "num_input_tokens_seen": 220370304, + "step": 70020 + }, + { + "epoch": 4.482747583381346, + "grad_norm": 7.446840763092041, + "learning_rate": 1.6127896347947702e-07, + "loss": 0.1214, + "num_input_tokens_seen": 220386240, + "step": 70025 + }, + { + "epoch": 4.483067665322323, + "grad_norm": 5.497285842895508, + "learning_rate": 1.6108161795215572e-07, + "loss": 0.157, + "num_input_tokens_seen": 220401600, + "step": 70030 + }, + { + "epoch": 4.4833877472633, + "grad_norm": 5.122358798980713, + "learning_rate": 1.6088438921823463e-07, + "loss": 0.1292, + "num_input_tokens_seen": 220417728, + "step": 70035 + }, + { + "epoch": 4.483707829204277, + "grad_norm": 10.659148216247559, + "learning_rate": 1.6068727728756368e-07, + "loss": 0.1869, + "num_input_tokens_seen": 220435200, + "step": 70040 + }, + { + "epoch": 4.484027911145253, + "grad_norm": 6.004514694213867, + "learning_rate": 1.6049028216998468e-07, + "loss": 0.1442, + "num_input_tokens_seen": 220449600, + "step": 70045 + }, + { + "epoch": 4.48434799308623, + "grad_norm": 7.7512640953063965, + "learning_rate": 1.6029340387533539e-07, + "loss": 0.1654, + "num_input_tokens_seen": 220465280, + "step": 70050 + }, + { + "epoch": 4.484668075027207, + "grad_norm": 5.534257411956787, + "learning_rate": 1.6009664241344618e-07, + "loss": 0.1323, + "num_input_tokens_seen": 220481472, + "step": 70055 + }, + { + "epoch": 4.484988156968184, + "grad_norm": 3.608067750930786, + "learning_rate": 1.5989999779414234e-07, + "loss": 0.1493, + "num_input_tokens_seen": 220497344, + "step": 70060 + }, + { + "epoch": 4.485308238909161, + "grad_norm": 7.8584699630737305, + "learning_rate": 1.5970347002724346e-07, + "loss": 0.1714, + "num_input_tokens_seen": 220513984, + "step": 70065 + }, + { + "epoch": 4.485628320850138, + "grad_norm": 7.544483661651611, + "learning_rate": 1.5950705912256366e-07, + "loss": 0.1036, + "num_input_tokens_seen": 220529472, + "step": 70070 + }, + { + "epoch": 4.485948402791115, + "grad_norm": 5.718693733215332, + "learning_rate": 1.5931076508990951e-07, + "loss": 0.173, + "num_input_tokens_seen": 220545344, + "step": 70075 + }, + { + "epoch": 4.486268484732092, + "grad_norm": 8.057104110717773, + "learning_rate": 1.591145879390843e-07, + "loss": 0.1361, + "num_input_tokens_seen": 220561920, + "step": 70080 + }, + { + "epoch": 4.486588566673069, + "grad_norm": 5.399226188659668, + "learning_rate": 1.5891852767988243e-07, + "loss": 0.1346, + "num_input_tokens_seen": 220576960, + "step": 70085 + }, + { + "epoch": 4.486908648614045, + "grad_norm": 4.266441345214844, + "learning_rate": 1.5872258432209549e-07, + "loss": 0.1342, + "num_input_tokens_seen": 220592192, + "step": 70090 + }, + { + "epoch": 4.487228730555022, + "grad_norm": 5.093173980712891, + "learning_rate": 1.585267578755073e-07, + "loss": 0.1167, + "num_input_tokens_seen": 220608640, + "step": 70095 + }, + { + "epoch": 4.487548812495999, + "grad_norm": 2.862766981124878, + "learning_rate": 1.5833104834989648e-07, + "loss": 0.1202, + "num_input_tokens_seen": 220625024, + "step": 70100 + }, + { + "epoch": 4.487868894436976, + "grad_norm": 4.7775092124938965, + "learning_rate": 1.5813545575503632e-07, + "loss": 0.1104, + "num_input_tokens_seen": 220639744, + "step": 70105 + }, + { + "epoch": 4.488188976377953, + "grad_norm": 6.178213119506836, + "learning_rate": 1.5793998010069255e-07, + "loss": 0.0776, + "num_input_tokens_seen": 220655040, + "step": 70110 + }, + { + "epoch": 4.48850905831893, + "grad_norm": 3.6697299480438232, + "learning_rate": 1.5774462139662716e-07, + "loss": 0.0842, + "num_input_tokens_seen": 220672384, + "step": 70115 + }, + { + "epoch": 4.488829140259907, + "grad_norm": 8.939558982849121, + "learning_rate": 1.5754937965259482e-07, + "loss": 0.1939, + "num_input_tokens_seen": 220687744, + "step": 70120 + }, + { + "epoch": 4.489149222200884, + "grad_norm": 6.611408233642578, + "learning_rate": 1.573542548783452e-07, + "loss": 0.1449, + "num_input_tokens_seen": 220702848, + "step": 70125 + }, + { + "epoch": 4.48946930414186, + "grad_norm": 7.174863815307617, + "learning_rate": 1.5715924708362196e-07, + "loss": 0.143, + "num_input_tokens_seen": 220718720, + "step": 70130 + }, + { + "epoch": 4.489789386082837, + "grad_norm": 11.80747127532959, + "learning_rate": 1.5696435627816225e-07, + "loss": 0.1038, + "num_input_tokens_seen": 220735104, + "step": 70135 + }, + { + "epoch": 4.490109468023814, + "grad_norm": 6.233668327331543, + "learning_rate": 1.5676958247169805e-07, + "loss": 0.1334, + "num_input_tokens_seen": 220749888, + "step": 70140 + }, + { + "epoch": 4.490429549964791, + "grad_norm": 6.079896450042725, + "learning_rate": 1.5657492567395516e-07, + "loss": 0.1412, + "num_input_tokens_seen": 220764864, + "step": 70145 + }, + { + "epoch": 4.490749631905768, + "grad_norm": 8.584856986999512, + "learning_rate": 1.5638038589465443e-07, + "loss": 0.1492, + "num_input_tokens_seen": 220780672, + "step": 70150 + }, + { + "epoch": 4.491069713846745, + "grad_norm": 6.710099220275879, + "learning_rate": 1.5618596314351003e-07, + "loss": 0.1121, + "num_input_tokens_seen": 220795712, + "step": 70155 + }, + { + "epoch": 4.491389795787722, + "grad_norm": 5.067479133605957, + "learning_rate": 1.5599165743022975e-07, + "loss": 0.1244, + "num_input_tokens_seen": 220810624, + "step": 70160 + }, + { + "epoch": 4.491709877728699, + "grad_norm": 9.347183227539062, + "learning_rate": 1.5579746876451696e-07, + "loss": 0.1525, + "num_input_tokens_seen": 220826048, + "step": 70165 + }, + { + "epoch": 4.492029959669676, + "grad_norm": 6.739727973937988, + "learning_rate": 1.5560339715606776e-07, + "loss": 0.1572, + "num_input_tokens_seen": 220844352, + "step": 70170 + }, + { + "epoch": 4.492350041610653, + "grad_norm": 6.545708656311035, + "learning_rate": 1.5540944261457357e-07, + "loss": 0.1244, + "num_input_tokens_seen": 220859712, + "step": 70175 + }, + { + "epoch": 4.492670123551629, + "grad_norm": 5.494226932525635, + "learning_rate": 1.5521560514971916e-07, + "loss": 0.0878, + "num_input_tokens_seen": 220874880, + "step": 70180 + }, + { + "epoch": 4.492990205492606, + "grad_norm": 84.20800018310547, + "learning_rate": 1.5502188477118345e-07, + "loss": 0.2133, + "num_input_tokens_seen": 220891136, + "step": 70185 + }, + { + "epoch": 4.493310287433583, + "grad_norm": 7.946804046630859, + "learning_rate": 1.548282814886412e-07, + "loss": 0.1168, + "num_input_tokens_seen": 220906624, + "step": 70190 + }, + { + "epoch": 4.49363036937456, + "grad_norm": 8.297209739685059, + "learning_rate": 1.546347953117583e-07, + "loss": 0.1324, + "num_input_tokens_seen": 220923200, + "step": 70195 + }, + { + "epoch": 4.493950451315537, + "grad_norm": 5.999319076538086, + "learning_rate": 1.5444142625019753e-07, + "loss": 0.1657, + "num_input_tokens_seen": 220939776, + "step": 70200 + }, + { + "epoch": 4.494270533256514, + "grad_norm": 7.113226890563965, + "learning_rate": 1.5424817431361428e-07, + "loss": 0.151, + "num_input_tokens_seen": 220954240, + "step": 70205 + }, + { + "epoch": 4.494590615197491, + "grad_norm": 8.415470123291016, + "learning_rate": 1.540550395116583e-07, + "loss": 0.1397, + "num_input_tokens_seen": 220968640, + "step": 70210 + }, + { + "epoch": 4.494910697138468, + "grad_norm": 7.755077838897705, + "learning_rate": 1.5386202185397465e-07, + "loss": 0.1244, + "num_input_tokens_seen": 220985216, + "step": 70215 + }, + { + "epoch": 4.495230779079444, + "grad_norm": 10.028063774108887, + "learning_rate": 1.5366912135020062e-07, + "loss": 0.1985, + "num_input_tokens_seen": 221001408, + "step": 70220 + }, + { + "epoch": 4.495550861020421, + "grad_norm": 10.999134063720703, + "learning_rate": 1.5347633800996963e-07, + "loss": 0.1547, + "num_input_tokens_seen": 221016576, + "step": 70225 + }, + { + "epoch": 4.495870942961398, + "grad_norm": 124.63667297363281, + "learning_rate": 1.5328367184290677e-07, + "loss": 0.1679, + "num_input_tokens_seen": 221031680, + "step": 70230 + }, + { + "epoch": 4.496191024902375, + "grad_norm": 4.885156631469727, + "learning_rate": 1.530911228586346e-07, + "loss": 0.1326, + "num_input_tokens_seen": 221047424, + "step": 70235 + }, + { + "epoch": 4.496511106843352, + "grad_norm": 12.005194664001465, + "learning_rate": 1.528986910667668e-07, + "loss": 0.1601, + "num_input_tokens_seen": 221062784, + "step": 70240 + }, + { + "epoch": 4.496831188784329, + "grad_norm": 10.195160865783691, + "learning_rate": 1.5270637647691267e-07, + "loss": 0.1514, + "num_input_tokens_seen": 221077824, + "step": 70245 + }, + { + "epoch": 4.497151270725306, + "grad_norm": 6.961213111877441, + "learning_rate": 1.525141790986759e-07, + "loss": 0.1055, + "num_input_tokens_seen": 221093568, + "step": 70250 + }, + { + "epoch": 4.497471352666283, + "grad_norm": 6.5504937171936035, + "learning_rate": 1.5232209894165295e-07, + "loss": 0.1353, + "num_input_tokens_seen": 221110144, + "step": 70255 + }, + { + "epoch": 4.49779143460726, + "grad_norm": 7.937884330749512, + "learning_rate": 1.5213013601543619e-07, + "loss": 0.1503, + "num_input_tokens_seen": 221125632, + "step": 70260 + }, + { + "epoch": 4.498111516548236, + "grad_norm": 4.33675479888916, + "learning_rate": 1.5193829032961015e-07, + "loss": 0.0863, + "num_input_tokens_seen": 221142016, + "step": 70265 + }, + { + "epoch": 4.498431598489213, + "grad_norm": 6.736292362213135, + "learning_rate": 1.517465618937558e-07, + "loss": 0.1553, + "num_input_tokens_seen": 221158592, + "step": 70270 + }, + { + "epoch": 4.49875168043019, + "grad_norm": 7.432892799377441, + "learning_rate": 1.5155495071744658e-07, + "loss": 0.1032, + "num_input_tokens_seen": 221174016, + "step": 70275 + }, + { + "epoch": 4.499071762371167, + "grad_norm": 6.3432416915893555, + "learning_rate": 1.513634568102504e-07, + "loss": 0.1602, + "num_input_tokens_seen": 221188480, + "step": 70280 + }, + { + "epoch": 4.499391844312144, + "grad_norm": 7.977627754211426, + "learning_rate": 1.5117208018172957e-07, + "loss": 0.1548, + "num_input_tokens_seen": 221204480, + "step": 70285 + }, + { + "epoch": 4.499711926253121, + "grad_norm": 4.082662105560303, + "learning_rate": 1.5098082084144068e-07, + "loss": 0.1047, + "num_input_tokens_seen": 221222208, + "step": 70290 + }, + { + "epoch": 4.500032008194098, + "grad_norm": 6.691642761230469, + "learning_rate": 1.5078967879893353e-07, + "loss": 0.1387, + "num_input_tokens_seen": 221237504, + "step": 70295 + }, + { + "epoch": 4.500352090135075, + "grad_norm": 6.636939525604248, + "learning_rate": 1.5059865406375384e-07, + "loss": 0.1378, + "num_input_tokens_seen": 221253056, + "step": 70300 + }, + { + "epoch": 4.500672172076051, + "grad_norm": 7.091588973999023, + "learning_rate": 1.50407746645439e-07, + "loss": 0.1645, + "num_input_tokens_seen": 221269248, + "step": 70305 + }, + { + "epoch": 4.500864221240637, + "eval_loss": 0.6086835265159607, + "eval_runtime": 50.981, + "eval_samples_per_second": 272.376, + "eval_steps_per_second": 34.052, + "num_input_tokens_seen": 221278272, + "step": 70308 + }, + { + "epoch": 4.500992254017028, + "grad_norm": 2.6510870456695557, + "learning_rate": 1.502169565535236e-07, + "loss": 0.1307, + "num_input_tokens_seen": 221286720, + "step": 70310 + }, + { + "epoch": 4.501312335958005, + "grad_norm": 7.470419406890869, + "learning_rate": 1.5002628379753305e-07, + "loss": 0.1483, + "num_input_tokens_seen": 221301824, + "step": 70315 + }, + { + "epoch": 4.501632417898982, + "grad_norm": 10.529953956604004, + "learning_rate": 1.4983572838698972e-07, + "loss": 0.169, + "num_input_tokens_seen": 221316800, + "step": 70320 + }, + { + "epoch": 4.501952499839959, + "grad_norm": 5.7928147315979, + "learning_rate": 1.4964529033140856e-07, + "loss": 0.121, + "num_input_tokens_seen": 221334400, + "step": 70325 + }, + { + "epoch": 4.502272581780936, + "grad_norm": 7.99615478515625, + "learning_rate": 1.494549696402986e-07, + "loss": 0.1592, + "num_input_tokens_seen": 221349504, + "step": 70330 + }, + { + "epoch": 4.502592663721913, + "grad_norm": 8.313000679016113, + "learning_rate": 1.4926476632316473e-07, + "loss": 0.1331, + "num_input_tokens_seen": 221364800, + "step": 70335 + }, + { + "epoch": 4.50291274566289, + "grad_norm": 9.38417911529541, + "learning_rate": 1.4907468038950324e-07, + "loss": 0.1543, + "num_input_tokens_seen": 221380224, + "step": 70340 + }, + { + "epoch": 4.503232827603867, + "grad_norm": 6.5494489669799805, + "learning_rate": 1.4888471184880683e-07, + "loss": 0.1287, + "num_input_tokens_seen": 221396224, + "step": 70345 + }, + { + "epoch": 4.503552909544844, + "grad_norm": 9.285980224609375, + "learning_rate": 1.486948607105615e-07, + "loss": 0.1297, + "num_input_tokens_seen": 221411328, + "step": 70350 + }, + { + "epoch": 4.50387299148582, + "grad_norm": 4.444103240966797, + "learning_rate": 1.4850512698424747e-07, + "loss": 0.1418, + "num_input_tokens_seen": 221426816, + "step": 70355 + }, + { + "epoch": 4.504193073426797, + "grad_norm": 4.256240367889404, + "learning_rate": 1.483155106793388e-07, + "loss": 0.1001, + "num_input_tokens_seen": 221441024, + "step": 70360 + }, + { + "epoch": 4.504513155367774, + "grad_norm": 2.549241781234741, + "learning_rate": 1.4812601180530405e-07, + "loss": 0.0924, + "num_input_tokens_seen": 221456960, + "step": 70365 + }, + { + "epoch": 4.504833237308751, + "grad_norm": 5.026088714599609, + "learning_rate": 1.4793663037160565e-07, + "loss": 0.1594, + "num_input_tokens_seen": 221472384, + "step": 70370 + }, + { + "epoch": 4.505153319249728, + "grad_norm": 5.617691516876221, + "learning_rate": 1.477473663877002e-07, + "loss": 0.1194, + "num_input_tokens_seen": 221488320, + "step": 70375 + }, + { + "epoch": 4.505473401190705, + "grad_norm": 9.137266159057617, + "learning_rate": 1.475582198630393e-07, + "loss": 0.1285, + "num_input_tokens_seen": 221504576, + "step": 70380 + }, + { + "epoch": 4.505793483131682, + "grad_norm": 9.037117004394531, + "learning_rate": 1.4736919080706734e-07, + "loss": 0.1567, + "num_input_tokens_seen": 221519744, + "step": 70385 + }, + { + "epoch": 4.506113565072659, + "grad_norm": 6.594067096710205, + "learning_rate": 1.4718027922922374e-07, + "loss": 0.1245, + "num_input_tokens_seen": 221535872, + "step": 70390 + }, + { + "epoch": 4.506433647013635, + "grad_norm": 7.415459632873535, + "learning_rate": 1.4699148513894173e-07, + "loss": 0.1983, + "num_input_tokens_seen": 221551744, + "step": 70395 + }, + { + "epoch": 4.506753728954612, + "grad_norm": 3.2873477935791016, + "learning_rate": 1.4680280854564826e-07, + "loss": 0.1324, + "num_input_tokens_seen": 221567488, + "step": 70400 + }, + { + "epoch": 4.507073810895589, + "grad_norm": 9.445351600646973, + "learning_rate": 1.4661424945876524e-07, + "loss": 0.0993, + "num_input_tokens_seen": 221582528, + "step": 70405 + }, + { + "epoch": 4.507393892836566, + "grad_norm": 12.690184593200684, + "learning_rate": 1.4642580788770843e-07, + "loss": 0.1916, + "num_input_tokens_seen": 221598208, + "step": 70410 + }, + { + "epoch": 4.507713974777543, + "grad_norm": 5.431219100952148, + "learning_rate": 1.4623748384188703e-07, + "loss": 0.1744, + "num_input_tokens_seen": 221614848, + "step": 70415 + }, + { + "epoch": 4.50803405671852, + "grad_norm": 8.29196834564209, + "learning_rate": 1.4604927733070595e-07, + "loss": 0.1177, + "num_input_tokens_seen": 221630336, + "step": 70420 + }, + { + "epoch": 4.508354138659497, + "grad_norm": 6.206125259399414, + "learning_rate": 1.4586118836356216e-07, + "loss": 0.0852, + "num_input_tokens_seen": 221647808, + "step": 70425 + }, + { + "epoch": 4.508674220600474, + "grad_norm": 7.411611557006836, + "learning_rate": 1.4567321694984843e-07, + "loss": 0.1256, + "num_input_tokens_seen": 221663232, + "step": 70430 + }, + { + "epoch": 4.508994302541451, + "grad_norm": 6.432920455932617, + "learning_rate": 1.454853630989514e-07, + "loss": 0.1454, + "num_input_tokens_seen": 221678272, + "step": 70435 + }, + { + "epoch": 4.509314384482428, + "grad_norm": 4.189800262451172, + "learning_rate": 1.452976268202505e-07, + "loss": 0.1047, + "num_input_tokens_seen": 221695040, + "step": 70440 + }, + { + "epoch": 4.509634466423404, + "grad_norm": 7.000402927398682, + "learning_rate": 1.451100081231216e-07, + "loss": 0.1347, + "num_input_tokens_seen": 221712704, + "step": 70445 + }, + { + "epoch": 4.509954548364381, + "grad_norm": 7.418424129486084, + "learning_rate": 1.4492250701693218e-07, + "loss": 0.1447, + "num_input_tokens_seen": 221728448, + "step": 70450 + }, + { + "epoch": 4.510274630305358, + "grad_norm": 7.359106063842773, + "learning_rate": 1.447351235110464e-07, + "loss": 0.1322, + "num_input_tokens_seen": 221743936, + "step": 70455 + }, + { + "epoch": 4.510594712246335, + "grad_norm": 5.495746612548828, + "learning_rate": 1.4454785761481933e-07, + "loss": 0.0946, + "num_input_tokens_seen": 221758976, + "step": 70460 + }, + { + "epoch": 4.510914794187312, + "grad_norm": 6.044074535369873, + "learning_rate": 1.443607093376037e-07, + "loss": 0.0909, + "num_input_tokens_seen": 221776512, + "step": 70465 + }, + { + "epoch": 4.511234876128289, + "grad_norm": 13.714351654052734, + "learning_rate": 1.4417367868874433e-07, + "loss": 0.1663, + "num_input_tokens_seen": 221792512, + "step": 70470 + }, + { + "epoch": 4.511554958069266, + "grad_norm": 8.005231857299805, + "learning_rate": 1.4398676567758035e-07, + "loss": 0.1203, + "num_input_tokens_seen": 221807680, + "step": 70475 + }, + { + "epoch": 4.511875040010243, + "grad_norm": 7.770995140075684, + "learning_rate": 1.4379997031344518e-07, + "loss": 0.1465, + "num_input_tokens_seen": 221824000, + "step": 70480 + }, + { + "epoch": 4.512195121951219, + "grad_norm": 4.739990234375, + "learning_rate": 1.436132926056663e-07, + "loss": 0.1207, + "num_input_tokens_seen": 221839168, + "step": 70485 + }, + { + "epoch": 4.512515203892196, + "grad_norm": 8.999574661254883, + "learning_rate": 1.434267325635655e-07, + "loss": 0.0961, + "num_input_tokens_seen": 221857024, + "step": 70490 + }, + { + "epoch": 4.512835285833173, + "grad_norm": 3.6792044639587402, + "learning_rate": 1.4324029019645913e-07, + "loss": 0.141, + "num_input_tokens_seen": 221875392, + "step": 70495 + }, + { + "epoch": 4.51315536777415, + "grad_norm": 5.930098056793213, + "learning_rate": 1.4305396551365675e-07, + "loss": 0.1099, + "num_input_tokens_seen": 221891328, + "step": 70500 + }, + { + "epoch": 4.513475449715127, + "grad_norm": 8.914262771606445, + "learning_rate": 1.4286775852446226e-07, + "loss": 0.1362, + "num_input_tokens_seen": 221906560, + "step": 70505 + }, + { + "epoch": 4.513795531656104, + "grad_norm": 7.707979679107666, + "learning_rate": 1.4268166923817412e-07, + "loss": 0.1105, + "num_input_tokens_seen": 221921216, + "step": 70510 + }, + { + "epoch": 4.514115613597081, + "grad_norm": 4.549236297607422, + "learning_rate": 1.4249569766408484e-07, + "loss": 0.0972, + "num_input_tokens_seen": 221937472, + "step": 70515 + }, + { + "epoch": 4.514435695538058, + "grad_norm": 4.091418266296387, + "learning_rate": 1.4230984381148032e-07, + "loss": 0.1015, + "num_input_tokens_seen": 221953472, + "step": 70520 + }, + { + "epoch": 4.514755777479035, + "grad_norm": 5.8003010749816895, + "learning_rate": 1.4212410768964126e-07, + "loss": 0.1435, + "num_input_tokens_seen": 221969024, + "step": 70525 + }, + { + "epoch": 4.515075859420012, + "grad_norm": 8.535234451293945, + "learning_rate": 1.419384893078432e-07, + "loss": 0.2193, + "num_input_tokens_seen": 221984832, + "step": 70530 + }, + { + "epoch": 4.515395941360988, + "grad_norm": 12.591760635375977, + "learning_rate": 1.4175298867535353e-07, + "loss": 0.1949, + "num_input_tokens_seen": 222000192, + "step": 70535 + }, + { + "epoch": 4.515716023301965, + "grad_norm": 6.271299362182617, + "learning_rate": 1.4156760580143675e-07, + "loss": 0.153, + "num_input_tokens_seen": 222015168, + "step": 70540 + }, + { + "epoch": 4.516036105242942, + "grad_norm": 5.335093975067139, + "learning_rate": 1.4138234069534822e-07, + "loss": 0.1466, + "num_input_tokens_seen": 222030272, + "step": 70545 + }, + { + "epoch": 4.516356187183919, + "grad_norm": 6.4128594398498535, + "learning_rate": 1.411971933663406e-07, + "loss": 0.1251, + "num_input_tokens_seen": 222045120, + "step": 70550 + }, + { + "epoch": 4.516676269124896, + "grad_norm": 7.551212310791016, + "learning_rate": 1.4101216382365833e-07, + "loss": 0.1185, + "num_input_tokens_seen": 222061440, + "step": 70555 + }, + { + "epoch": 4.516996351065873, + "grad_norm": 8.72729206085205, + "learning_rate": 1.408272520765408e-07, + "loss": 0.0941, + "num_input_tokens_seen": 222076480, + "step": 70560 + }, + { + "epoch": 4.51731643300685, + "grad_norm": 2.489936590194702, + "learning_rate": 1.406424581342228e-07, + "loss": 0.1247, + "num_input_tokens_seen": 222091648, + "step": 70565 + }, + { + "epoch": 4.517636514947827, + "grad_norm": 6.059986114501953, + "learning_rate": 1.4045778200592997e-07, + "loss": 0.1387, + "num_input_tokens_seen": 222107712, + "step": 70570 + }, + { + "epoch": 4.517956596888803, + "grad_norm": 6.188740253448486, + "learning_rate": 1.4027322370088553e-07, + "loss": 0.1016, + "num_input_tokens_seen": 222124032, + "step": 70575 + }, + { + "epoch": 4.51827667882978, + "grad_norm": 6.255310535430908, + "learning_rate": 1.4008878322830488e-07, + "loss": 0.1439, + "num_input_tokens_seen": 222140672, + "step": 70580 + }, + { + "epoch": 4.518596760770757, + "grad_norm": 7.166032314300537, + "learning_rate": 1.3990446059739816e-07, + "loss": 0.1312, + "num_input_tokens_seen": 222155712, + "step": 70585 + }, + { + "epoch": 4.518916842711734, + "grad_norm": 9.548765182495117, + "learning_rate": 1.3972025581736936e-07, + "loss": 0.1327, + "num_input_tokens_seen": 222171968, + "step": 70590 + }, + { + "epoch": 4.519236924652711, + "grad_norm": 10.535009384155273, + "learning_rate": 1.3953616889741673e-07, + "loss": 0.136, + "num_input_tokens_seen": 222186816, + "step": 70595 + }, + { + "epoch": 4.519557006593688, + "grad_norm": 3.683959722518921, + "learning_rate": 1.393521998467326e-07, + "loss": 0.1054, + "num_input_tokens_seen": 222202944, + "step": 70600 + }, + { + "epoch": 4.519877088534665, + "grad_norm": 14.580625534057617, + "learning_rate": 1.3916834867450352e-07, + "loss": 0.1645, + "num_input_tokens_seen": 222218112, + "step": 70605 + }, + { + "epoch": 4.520197170475642, + "grad_norm": 8.669018745422363, + "learning_rate": 1.3898461538990965e-07, + "loss": 0.1397, + "num_input_tokens_seen": 222233344, + "step": 70610 + }, + { + "epoch": 4.520517252416619, + "grad_norm": 8.151637077331543, + "learning_rate": 1.3880100000212642e-07, + "loss": 0.1085, + "num_input_tokens_seen": 222247936, + "step": 70615 + }, + { + "epoch": 4.520837334357595, + "grad_norm": 13.349177360534668, + "learning_rate": 1.3861750252032207e-07, + "loss": 0.0946, + "num_input_tokens_seen": 222263168, + "step": 70620 + }, + { + "epoch": 4.521157416298572, + "grad_norm": 12.817773818969727, + "learning_rate": 1.3843412295365955e-07, + "loss": 0.1761, + "num_input_tokens_seen": 222281536, + "step": 70625 + }, + { + "epoch": 4.521477498239549, + "grad_norm": 8.50457763671875, + "learning_rate": 1.382508613112962e-07, + "loss": 0.1108, + "num_input_tokens_seen": 222297472, + "step": 70630 + }, + { + "epoch": 4.521797580180526, + "grad_norm": 9.057779312133789, + "learning_rate": 1.3806771760238285e-07, + "loss": 0.1148, + "num_input_tokens_seen": 222313088, + "step": 70635 + }, + { + "epoch": 4.522117662121503, + "grad_norm": 5.249372482299805, + "learning_rate": 1.3788469183606462e-07, + "loss": 0.1408, + "num_input_tokens_seen": 222329344, + "step": 70640 + }, + { + "epoch": 4.52243774406248, + "grad_norm": 1.067947268486023, + "learning_rate": 1.3770178402148116e-07, + "loss": 0.0952, + "num_input_tokens_seen": 222344128, + "step": 70645 + }, + { + "epoch": 4.522757826003457, + "grad_norm": 4.596525192260742, + "learning_rate": 1.3751899416776626e-07, + "loss": 0.1312, + "num_input_tokens_seen": 222359808, + "step": 70650 + }, + { + "epoch": 4.523077907944434, + "grad_norm": 2.6001265048980713, + "learning_rate": 1.3733632228404626e-07, + "loss": 0.0929, + "num_input_tokens_seen": 222375488, + "step": 70655 + }, + { + "epoch": 4.52339798988541, + "grad_norm": 6.9285759925842285, + "learning_rate": 1.371537683794441e-07, + "loss": 0.1294, + "num_input_tokens_seen": 222392000, + "step": 70660 + }, + { + "epoch": 4.523718071826387, + "grad_norm": 4.714211463928223, + "learning_rate": 1.3697133246307532e-07, + "loss": 0.1487, + "num_input_tokens_seen": 222407168, + "step": 70665 + }, + { + "epoch": 4.524038153767364, + "grad_norm": 10.139909744262695, + "learning_rate": 1.3678901454404925e-07, + "loss": 0.1413, + "num_input_tokens_seen": 222423808, + "step": 70670 + }, + { + "epoch": 4.524358235708341, + "grad_norm": 7.475371837615967, + "learning_rate": 1.3660681463147086e-07, + "loss": 0.1716, + "num_input_tokens_seen": 222438912, + "step": 70675 + }, + { + "epoch": 4.524678317649318, + "grad_norm": 8.600197792053223, + "learning_rate": 1.36424732734437e-07, + "loss": 0.1222, + "num_input_tokens_seen": 222453696, + "step": 70680 + }, + { + "epoch": 4.524998399590295, + "grad_norm": 6.383472442626953, + "learning_rate": 1.362427688620413e-07, + "loss": 0.112, + "num_input_tokens_seen": 222469120, + "step": 70685 + }, + { + "epoch": 4.525318481531272, + "grad_norm": 8.173453330993652, + "learning_rate": 1.3606092302336866e-07, + "loss": 0.1283, + "num_input_tokens_seen": 222484544, + "step": 70690 + }, + { + "epoch": 4.525638563472249, + "grad_norm": 6.999107360839844, + "learning_rate": 1.3587919522750044e-07, + "loss": 0.0812, + "num_input_tokens_seen": 222501184, + "step": 70695 + }, + { + "epoch": 4.525958645413226, + "grad_norm": 7.3357744216918945, + "learning_rate": 1.3569758548351103e-07, + "loss": 0.1355, + "num_input_tokens_seen": 222516608, + "step": 70700 + }, + { + "epoch": 4.526278727354203, + "grad_norm": 7.367413520812988, + "learning_rate": 1.3551609380046903e-07, + "loss": 0.1258, + "num_input_tokens_seen": 222532352, + "step": 70705 + }, + { + "epoch": 4.526598809295179, + "grad_norm": 5.007935523986816, + "learning_rate": 1.353347201874372e-07, + "loss": 0.0945, + "num_input_tokens_seen": 222548736, + "step": 70710 + }, + { + "epoch": 4.526918891236156, + "grad_norm": 6.867390155792236, + "learning_rate": 1.3515346465347245e-07, + "loss": 0.1458, + "num_input_tokens_seen": 222564736, + "step": 70715 + }, + { + "epoch": 4.527238973177133, + "grad_norm": 8.360433578491211, + "learning_rate": 1.3497232720762505e-07, + "loss": 0.1355, + "num_input_tokens_seen": 222579520, + "step": 70720 + }, + { + "epoch": 4.52755905511811, + "grad_norm": 6.0332512855529785, + "learning_rate": 1.3479130785894107e-07, + "loss": 0.1466, + "num_input_tokens_seen": 222594752, + "step": 70725 + }, + { + "epoch": 4.527879137059087, + "grad_norm": 55.297245025634766, + "learning_rate": 1.3461040661645912e-07, + "loss": 0.2823, + "num_input_tokens_seen": 222612672, + "step": 70730 + }, + { + "epoch": 4.528199219000064, + "grad_norm": 6.355478286743164, + "learning_rate": 1.3442962348921285e-07, + "loss": 0.1378, + "num_input_tokens_seen": 222627968, + "step": 70735 + }, + { + "epoch": 4.528519300941041, + "grad_norm": 6.860058784484863, + "learning_rate": 1.3424895848622942e-07, + "loss": 0.1256, + "num_input_tokens_seen": 222644480, + "step": 70740 + }, + { + "epoch": 4.528839382882018, + "grad_norm": 5.357797622680664, + "learning_rate": 1.3406841161653e-07, + "loss": 0.114, + "num_input_tokens_seen": 222659392, + "step": 70745 + }, + { + "epoch": 4.529159464822994, + "grad_norm": 7.876824855804443, + "learning_rate": 1.338879828891307e-07, + "loss": 0.1296, + "num_input_tokens_seen": 222675072, + "step": 70750 + }, + { + "epoch": 4.529479546763971, + "grad_norm": 12.400163650512695, + "learning_rate": 1.3370767231304037e-07, + "loss": 0.228, + "num_input_tokens_seen": 222690304, + "step": 70755 + }, + { + "epoch": 4.529799628704948, + "grad_norm": 14.819084167480469, + "learning_rate": 1.3352747989726385e-07, + "loss": 0.1558, + "num_input_tokens_seen": 222705344, + "step": 70760 + }, + { + "epoch": 4.530119710645925, + "grad_norm": 7.960252285003662, + "learning_rate": 1.3334740565079801e-07, + "loss": 0.1268, + "num_input_tokens_seen": 222720192, + "step": 70765 + }, + { + "epoch": 4.530439792586902, + "grad_norm": 6.212223529815674, + "learning_rate": 1.33167449582636e-07, + "loss": 0.1253, + "num_input_tokens_seen": 222735616, + "step": 70770 + }, + { + "epoch": 4.530759874527879, + "grad_norm": 6.2232513427734375, + "learning_rate": 1.3298761170176255e-07, + "loss": 0.0883, + "num_input_tokens_seen": 222751936, + "step": 70775 + }, + { + "epoch": 4.531079956468856, + "grad_norm": 5.199068546295166, + "learning_rate": 1.3280789201715854e-07, + "loss": 0.1138, + "num_input_tokens_seen": 222767872, + "step": 70780 + }, + { + "epoch": 4.531400038409833, + "grad_norm": 8.620165824890137, + "learning_rate": 1.326282905377982e-07, + "loss": 0.1461, + "num_input_tokens_seen": 222783360, + "step": 70785 + }, + { + "epoch": 4.53172012035081, + "grad_norm": 6.5252366065979, + "learning_rate": 1.324488072726496e-07, + "loss": 0.1418, + "num_input_tokens_seen": 222799936, + "step": 70790 + }, + { + "epoch": 4.532040202291787, + "grad_norm": 6.511880874633789, + "learning_rate": 1.322694422306761e-07, + "loss": 0.135, + "num_input_tokens_seen": 222817664, + "step": 70795 + }, + { + "epoch": 4.532360284232763, + "grad_norm": 8.849104881286621, + "learning_rate": 1.3209019542083313e-07, + "loss": 0.1051, + "num_input_tokens_seen": 222833664, + "step": 70800 + }, + { + "epoch": 4.53268036617374, + "grad_norm": 9.864873886108398, + "learning_rate": 1.3191106685207178e-07, + "loss": 0.2186, + "num_input_tokens_seen": 222849856, + "step": 70805 + }, + { + "epoch": 4.533000448114717, + "grad_norm": 5.284796237945557, + "learning_rate": 1.3173205653333715e-07, + "loss": 0.1267, + "num_input_tokens_seen": 222865280, + "step": 70810 + }, + { + "epoch": 4.533320530055694, + "grad_norm": 8.1801118850708, + "learning_rate": 1.3155316447356765e-07, + "loss": 0.1373, + "num_input_tokens_seen": 222880832, + "step": 70815 + }, + { + "epoch": 4.533640611996671, + "grad_norm": 45.03822326660156, + "learning_rate": 1.313743906816961e-07, + "loss": 0.1698, + "num_input_tokens_seen": 222895936, + "step": 70820 + }, + { + "epoch": 4.533960693937648, + "grad_norm": 4.834411144256592, + "learning_rate": 1.3119573516664984e-07, + "loss": 0.079, + "num_input_tokens_seen": 222911232, + "step": 70825 + }, + { + "epoch": 4.534280775878625, + "grad_norm": 7.172026634216309, + "learning_rate": 1.3101719793735007e-07, + "loss": 0.1053, + "num_input_tokens_seen": 222927232, + "step": 70830 + }, + { + "epoch": 4.534600857819602, + "grad_norm": 8.366809844970703, + "learning_rate": 1.3083877900271185e-07, + "loss": 0.1166, + "num_input_tokens_seen": 222942848, + "step": 70835 + }, + { + "epoch": 4.534920939760578, + "grad_norm": 6.809967994689941, + "learning_rate": 1.306604783716442e-07, + "loss": 0.1313, + "num_input_tokens_seen": 222959296, + "step": 70840 + }, + { + "epoch": 4.535241021701555, + "grad_norm": 11.978163719177246, + "learning_rate": 1.3048229605305108e-07, + "loss": 0.1726, + "num_input_tokens_seen": 222976320, + "step": 70845 + }, + { + "epoch": 4.535561103642532, + "grad_norm": 3.5247855186462402, + "learning_rate": 1.3030423205582955e-07, + "loss": 0.1038, + "num_input_tokens_seen": 222991360, + "step": 70850 + }, + { + "epoch": 4.535881185583509, + "grad_norm": 4.643628120422363, + "learning_rate": 1.3012628638887165e-07, + "loss": 0.133, + "num_input_tokens_seen": 223006080, + "step": 70855 + }, + { + "epoch": 4.536201267524486, + "grad_norm": 6.17526912689209, + "learning_rate": 1.2994845906106252e-07, + "loss": 0.1226, + "num_input_tokens_seen": 223021504, + "step": 70860 + }, + { + "epoch": 4.536521349465463, + "grad_norm": 7.8083906173706055, + "learning_rate": 1.2977075008128255e-07, + "loss": 0.1884, + "num_input_tokens_seen": 223038336, + "step": 70865 + }, + { + "epoch": 4.53684143140644, + "grad_norm": 5.703947067260742, + "learning_rate": 1.295931594584049e-07, + "loss": 0.1449, + "num_input_tokens_seen": 223056256, + "step": 70870 + }, + { + "epoch": 4.537161513347417, + "grad_norm": 6.0707688331604, + "learning_rate": 1.2941568720129778e-07, + "loss": 0.118, + "num_input_tokens_seen": 223072768, + "step": 70875 + }, + { + "epoch": 4.537481595288394, + "grad_norm": 5.913261413574219, + "learning_rate": 1.2923833331882378e-07, + "loss": 0.1638, + "num_input_tokens_seen": 223090432, + "step": 70880 + }, + { + "epoch": 4.53780167722937, + "grad_norm": 6.968750953674316, + "learning_rate": 1.290610978198381e-07, + "loss": 0.1098, + "num_input_tokens_seen": 223105280, + "step": 70885 + }, + { + "epoch": 4.538121759170347, + "grad_norm": 8.6599702835083, + "learning_rate": 1.288839807131917e-07, + "loss": 0.0926, + "num_input_tokens_seen": 223120448, + "step": 70890 + }, + { + "epoch": 4.538441841111324, + "grad_norm": 5.900869846343994, + "learning_rate": 1.2870698200772857e-07, + "loss": 0.137, + "num_input_tokens_seen": 223135872, + "step": 70895 + }, + { + "epoch": 4.538761923052301, + "grad_norm": 3.8829948902130127, + "learning_rate": 1.285301017122867e-07, + "loss": 0.0738, + "num_input_tokens_seen": 223150592, + "step": 70900 + }, + { + "epoch": 4.539082004993278, + "grad_norm": 10.382326126098633, + "learning_rate": 1.283533398356998e-07, + "loss": 0.1483, + "num_input_tokens_seen": 223167360, + "step": 70905 + }, + { + "epoch": 4.539402086934255, + "grad_norm": 4.252706050872803, + "learning_rate": 1.2817669638679307e-07, + "loss": 0.0748, + "num_input_tokens_seen": 223183040, + "step": 70910 + }, + { + "epoch": 4.539722168875232, + "grad_norm": 6.341135025024414, + "learning_rate": 1.2800017137438837e-07, + "loss": 0.1027, + "num_input_tokens_seen": 223198720, + "step": 70915 + }, + { + "epoch": 4.540042250816209, + "grad_norm": 8.04793643951416, + "learning_rate": 1.2782376480729915e-07, + "loss": 0.1212, + "num_input_tokens_seen": 223214656, + "step": 70920 + }, + { + "epoch": 4.540362332757185, + "grad_norm": 6.693320274353027, + "learning_rate": 1.2764747669433502e-07, + "loss": 0.1335, + "num_input_tokens_seen": 223229568, + "step": 70925 + }, + { + "epoch": 4.540682414698162, + "grad_norm": 5.406533241271973, + "learning_rate": 1.27471307044299e-07, + "loss": 0.1703, + "num_input_tokens_seen": 223245696, + "step": 70930 + }, + { + "epoch": 4.541002496639139, + "grad_norm": 9.94287109375, + "learning_rate": 1.2729525586598761e-07, + "loss": 0.0976, + "num_input_tokens_seen": 223263488, + "step": 70935 + }, + { + "epoch": 4.541322578580116, + "grad_norm": 5.666947841644287, + "learning_rate": 1.2711932316819247e-07, + "loss": 0.1428, + "num_input_tokens_seen": 223278976, + "step": 70940 + }, + { + "epoch": 4.541642660521093, + "grad_norm": 5.682990074157715, + "learning_rate": 1.2694350895969848e-07, + "loss": 0.1352, + "num_input_tokens_seen": 223294208, + "step": 70945 + }, + { + "epoch": 4.54196274246207, + "grad_norm": 7.727145195007324, + "learning_rate": 1.2676781324928418e-07, + "loss": 0.1224, + "num_input_tokens_seen": 223310208, + "step": 70950 + }, + { + "epoch": 4.542282824403047, + "grad_norm": 8.727681159973145, + "learning_rate": 1.2659223604572446e-07, + "loss": 0.1099, + "num_input_tokens_seen": 223328000, + "step": 70955 + }, + { + "epoch": 4.542602906344024, + "grad_norm": 6.303327560424805, + "learning_rate": 1.2641677735778513e-07, + "loss": 0.1044, + "num_input_tokens_seen": 223343104, + "step": 70960 + }, + { + "epoch": 4.542922988285001, + "grad_norm": 6.69219446182251, + "learning_rate": 1.2624143719422889e-07, + "loss": 0.1606, + "num_input_tokens_seen": 223357632, + "step": 70965 + }, + { + "epoch": 4.543243070225978, + "grad_norm": 7.73576545715332, + "learning_rate": 1.2606621556381064e-07, + "loss": 0.1414, + "num_input_tokens_seen": 223373056, + "step": 70970 + }, + { + "epoch": 4.543563152166954, + "grad_norm": 6.704898834228516, + "learning_rate": 1.2589111247528012e-07, + "loss": 0.1308, + "num_input_tokens_seen": 223389952, + "step": 70975 + }, + { + "epoch": 4.543883234107931, + "grad_norm": 8.902854919433594, + "learning_rate": 1.2571612793738142e-07, + "loss": 0.1504, + "num_input_tokens_seen": 223405440, + "step": 70980 + }, + { + "epoch": 4.544203316048908, + "grad_norm": 4.578578948974609, + "learning_rate": 1.255412619588517e-07, + "loss": 0.1406, + "num_input_tokens_seen": 223420928, + "step": 70985 + }, + { + "epoch": 4.544523397989885, + "grad_norm": 5.663704872131348, + "learning_rate": 1.2536651454842368e-07, + "loss": 0.1182, + "num_input_tokens_seen": 223436288, + "step": 70990 + }, + { + "epoch": 4.544843479930862, + "grad_norm": 6.564233779907227, + "learning_rate": 1.251918857148224e-07, + "loss": 0.117, + "num_input_tokens_seen": 223451968, + "step": 70995 + }, + { + "epoch": 4.545163561871839, + "grad_norm": 7.222389221191406, + "learning_rate": 1.2501737546676912e-07, + "loss": 0.1489, + "num_input_tokens_seen": 223468096, + "step": 71000 + }, + { + "epoch": 4.545483643812816, + "grad_norm": 10.684134483337402, + "learning_rate": 1.2484298381297666e-07, + "loss": 0.0986, + "num_input_tokens_seen": 223484032, + "step": 71005 + }, + { + "epoch": 4.545803725753793, + "grad_norm": 7.788516998291016, + "learning_rate": 1.2466871076215415e-07, + "loss": 0.1461, + "num_input_tokens_seen": 223499136, + "step": 71010 + }, + { + "epoch": 4.546123807694769, + "grad_norm": 6.616151332855225, + "learning_rate": 1.244945563230035e-07, + "loss": 0.1309, + "num_input_tokens_seen": 223515328, + "step": 71015 + }, + { + "epoch": 4.546443889635746, + "grad_norm": 3.4060442447662354, + "learning_rate": 1.2432052050422084e-07, + "loss": 0.0979, + "num_input_tokens_seen": 223530880, + "step": 71020 + }, + { + "epoch": 4.546763971576723, + "grad_norm": 6.119932174682617, + "learning_rate": 1.241466033144975e-07, + "loss": 0.1408, + "num_input_tokens_seen": 223545728, + "step": 71025 + }, + { + "epoch": 4.5470840535177, + "grad_norm": 8.858579635620117, + "learning_rate": 1.239728047625169e-07, + "loss": 0.2186, + "num_input_tokens_seen": 223561600, + "step": 71030 + }, + { + "epoch": 4.547404135458677, + "grad_norm": 5.5674028396606445, + "learning_rate": 1.2379912485695838e-07, + "loss": 0.1206, + "num_input_tokens_seen": 223576000, + "step": 71035 + }, + { + "epoch": 4.547724217399654, + "grad_norm": 7.265388011932373, + "learning_rate": 1.2362556360649452e-07, + "loss": 0.1115, + "num_input_tokens_seen": 223591168, + "step": 71040 + }, + { + "epoch": 4.548044299340631, + "grad_norm": 8.042901992797852, + "learning_rate": 1.2345212101979199e-07, + "loss": 0.1072, + "num_input_tokens_seen": 223607168, + "step": 71045 + }, + { + "epoch": 4.548364381281608, + "grad_norm": 7.017349720001221, + "learning_rate": 1.2327879710551133e-07, + "loss": 0.1287, + "num_input_tokens_seen": 223622400, + "step": 71050 + }, + { + "epoch": 4.548684463222585, + "grad_norm": 29.258291244506836, + "learning_rate": 1.2310559187230787e-07, + "loss": 0.1505, + "num_input_tokens_seen": 223637696, + "step": 71055 + }, + { + "epoch": 4.549004545163562, + "grad_norm": 7.76992654800415, + "learning_rate": 1.2293250532883022e-07, + "loss": 0.1425, + "num_input_tokens_seen": 223653248, + "step": 71060 + }, + { + "epoch": 4.549324627104538, + "grad_norm": 21.00111961364746, + "learning_rate": 1.227595374837215e-07, + "loss": 0.1391, + "num_input_tokens_seen": 223669248, + "step": 71065 + }, + { + "epoch": 4.549644709045515, + "grad_norm": 4.824155807495117, + "learning_rate": 1.2258668834561866e-07, + "loss": 0.12, + "num_input_tokens_seen": 223685568, + "step": 71070 + }, + { + "epoch": 4.549964790986492, + "grad_norm": 6.616867542266846, + "learning_rate": 1.2241395792315342e-07, + "loss": 0.1335, + "num_input_tokens_seen": 223701184, + "step": 71075 + }, + { + "epoch": 4.550284872927469, + "grad_norm": 4.962491512298584, + "learning_rate": 1.2224134622495083e-07, + "loss": 0.1077, + "num_input_tokens_seen": 223717632, + "step": 71080 + }, + { + "epoch": 4.550604954868446, + "grad_norm": 5.724390029907227, + "learning_rate": 1.2206885325963008e-07, + "loss": 0.1536, + "num_input_tokens_seen": 223732928, + "step": 71085 + }, + { + "epoch": 4.550925036809423, + "grad_norm": 10.609411239624023, + "learning_rate": 1.218964790358043e-07, + "loss": 0.1241, + "num_input_tokens_seen": 223749760, + "step": 71090 + }, + { + "epoch": 4.5512451187504, + "grad_norm": 7.569988250732422, + "learning_rate": 1.2172422356208164e-07, + "loss": 0.1209, + "num_input_tokens_seen": 223765568, + "step": 71095 + }, + { + "epoch": 4.551565200691377, + "grad_norm": 7.847097396850586, + "learning_rate": 1.215520868470632e-07, + "loss": 0.1231, + "num_input_tokens_seen": 223781568, + "step": 71100 + }, + { + "epoch": 4.5518852826323535, + "grad_norm": 21.78169822692871, + "learning_rate": 1.2138006889934411e-07, + "loss": 0.1464, + "num_input_tokens_seen": 223797184, + "step": 71105 + }, + { + "epoch": 4.5522053645733305, + "grad_norm": 5.307941436767578, + "learning_rate": 1.2120816972751553e-07, + "loss": 0.1143, + "num_input_tokens_seen": 223812672, + "step": 71110 + }, + { + "epoch": 4.5525254465143075, + "grad_norm": 3.840822219848633, + "learning_rate": 1.210363893401595e-07, + "loss": 0.1232, + "num_input_tokens_seen": 223828608, + "step": 71115 + }, + { + "epoch": 4.5528455284552845, + "grad_norm": 8.970829010009766, + "learning_rate": 1.2086472774585495e-07, + "loss": 0.1691, + "num_input_tokens_seen": 223843776, + "step": 71120 + }, + { + "epoch": 4.553165610396261, + "grad_norm": 17.508676528930664, + "learning_rate": 1.206931849531734e-07, + "loss": 0.1783, + "num_input_tokens_seen": 223859776, + "step": 71125 + }, + { + "epoch": 4.553485692337238, + "grad_norm": 4.404343605041504, + "learning_rate": 1.2052176097068075e-07, + "loss": 0.1281, + "num_input_tokens_seen": 223875264, + "step": 71130 + }, + { + "epoch": 4.553805774278215, + "grad_norm": 12.504973411560059, + "learning_rate": 1.2035045580693737e-07, + "loss": 0.1319, + "num_input_tokens_seen": 223891264, + "step": 71135 + }, + { + "epoch": 4.554125856219192, + "grad_norm": 7.703059196472168, + "learning_rate": 1.2017926947049647e-07, + "loss": 0.177, + "num_input_tokens_seen": 223906944, + "step": 71140 + }, + { + "epoch": 4.554445938160169, + "grad_norm": 7.3925862312316895, + "learning_rate": 1.2000820196990782e-07, + "loss": 0.0987, + "num_input_tokens_seen": 223921728, + "step": 71145 + }, + { + "epoch": 4.5547660201011455, + "grad_norm": 26.032955169677734, + "learning_rate": 1.1983725331371182e-07, + "loss": 0.1643, + "num_input_tokens_seen": 223937664, + "step": 71150 + }, + { + "epoch": 4.5550861020421225, + "grad_norm": 7.102910995483398, + "learning_rate": 1.1966642351044583e-07, + "loss": 0.1039, + "num_input_tokens_seen": 223954368, + "step": 71155 + }, + { + "epoch": 4.5554061839830995, + "grad_norm": 5.531511306762695, + "learning_rate": 1.1949571256863997e-07, + "loss": 0.1078, + "num_input_tokens_seen": 223969728, + "step": 71160 + }, + { + "epoch": 4.5557262659240765, + "grad_norm": 7.834264278411865, + "learning_rate": 1.193251204968185e-07, + "loss": 0.1415, + "num_input_tokens_seen": 223985216, + "step": 71165 + }, + { + "epoch": 4.5560463478650535, + "grad_norm": 5.659829616546631, + "learning_rate": 1.1915464730350018e-07, + "loss": 0.1583, + "num_input_tokens_seen": 224000192, + "step": 71170 + }, + { + "epoch": 4.5563664298060305, + "grad_norm": 5.177059650421143, + "learning_rate": 1.1898429299719739e-07, + "loss": 0.0869, + "num_input_tokens_seen": 224017088, + "step": 71175 + }, + { + "epoch": 4.5566865117470075, + "grad_norm": 6.428414821624756, + "learning_rate": 1.1881405758641662e-07, + "loss": 0.1612, + "num_input_tokens_seen": 224032320, + "step": 71180 + }, + { + "epoch": 4.557006593687984, + "grad_norm": 7.274055004119873, + "learning_rate": 1.1864394107965916e-07, + "loss": 0.1646, + "num_input_tokens_seen": 224047872, + "step": 71185 + }, + { + "epoch": 4.5573266756289605, + "grad_norm": 10.652641296386719, + "learning_rate": 1.1847394348541846e-07, + "loss": 0.1223, + "num_input_tokens_seen": 224064064, + "step": 71190 + }, + { + "epoch": 4.5576467575699375, + "grad_norm": 6.448033809661865, + "learning_rate": 1.1830406481218443e-07, + "loss": 0.1125, + "num_input_tokens_seen": 224078528, + "step": 71195 + }, + { + "epoch": 4.5579668395109145, + "grad_norm": 7.165369987487793, + "learning_rate": 1.181343050684397e-07, + "loss": 0.1145, + "num_input_tokens_seen": 224093696, + "step": 71200 + }, + { + "epoch": 4.5582869214518915, + "grad_norm": 7.74601936340332, + "learning_rate": 1.1796466426266112e-07, + "loss": 0.0946, + "num_input_tokens_seen": 224109184, + "step": 71205 + }, + { + "epoch": 4.5586070033928685, + "grad_norm": 5.739587783813477, + "learning_rate": 1.1779514240331969e-07, + "loss": 0.104, + "num_input_tokens_seen": 224124160, + "step": 71210 + }, + { + "epoch": 4.5589270853338455, + "grad_norm": 6.825385093688965, + "learning_rate": 1.1762573949888002e-07, + "loss": 0.1091, + "num_input_tokens_seen": 224141568, + "step": 71215 + }, + { + "epoch": 4.5592471672748225, + "grad_norm": 5.630831241607666, + "learning_rate": 1.1745645555780227e-07, + "loss": 0.0917, + "num_input_tokens_seen": 224156992, + "step": 71220 + }, + { + "epoch": 4.5595672492157995, + "grad_norm": 5.381971836090088, + "learning_rate": 1.1728729058853806e-07, + "loss": 0.1227, + "num_input_tokens_seen": 224171840, + "step": 71225 + }, + { + "epoch": 4.5598873311567765, + "grad_norm": 10.815786361694336, + "learning_rate": 1.1711824459953641e-07, + "loss": 0.1402, + "num_input_tokens_seen": 224188544, + "step": 71230 + }, + { + "epoch": 4.5602074130977535, + "grad_norm": 6.623733043670654, + "learning_rate": 1.1694931759923672e-07, + "loss": 0.14, + "num_input_tokens_seen": 224203648, + "step": 71235 + }, + { + "epoch": 4.56052749503873, + "grad_norm": 6.428081035614014, + "learning_rate": 1.1678050959607584e-07, + "loss": 0.1254, + "num_input_tokens_seen": 224219328, + "step": 71240 + }, + { + "epoch": 4.5608475769797066, + "grad_norm": 6.496181964874268, + "learning_rate": 1.1661182059848231e-07, + "loss": 0.1012, + "num_input_tokens_seen": 224235712, + "step": 71245 + }, + { + "epoch": 4.5611676589206835, + "grad_norm": 7.957906246185303, + "learning_rate": 1.1644325061487965e-07, + "loss": 0.113, + "num_input_tokens_seen": 224252480, + "step": 71250 + }, + { + "epoch": 4.5614877408616605, + "grad_norm": 5.487069606781006, + "learning_rate": 1.1627479965368615e-07, + "loss": 0.1115, + "num_input_tokens_seen": 224268800, + "step": 71255 + }, + { + "epoch": 4.5618078228026375, + "grad_norm": 6.834134101867676, + "learning_rate": 1.161064677233123e-07, + "loss": 0.0969, + "num_input_tokens_seen": 224285120, + "step": 71260 + }, + { + "epoch": 4.5621279047436145, + "grad_norm": 5.885236740112305, + "learning_rate": 1.1593825483216442e-07, + "loss": 0.1204, + "num_input_tokens_seen": 224299776, + "step": 71265 + }, + { + "epoch": 4.5624479866845915, + "grad_norm": 7.642640590667725, + "learning_rate": 1.1577016098864191e-07, + "loss": 0.1214, + "num_input_tokens_seen": 224315968, + "step": 71270 + }, + { + "epoch": 4.5627680686255685, + "grad_norm": 5.0720086097717285, + "learning_rate": 1.156021862011386e-07, + "loss": 0.1247, + "num_input_tokens_seen": 224333120, + "step": 71275 + }, + { + "epoch": 4.563088150566545, + "grad_norm": 7.981992721557617, + "learning_rate": 1.1543433047804225e-07, + "loss": 0.1869, + "num_input_tokens_seen": 224348352, + "step": 71280 + }, + { + "epoch": 4.563408232507522, + "grad_norm": 29.098562240600586, + "learning_rate": 1.1526659382773475e-07, + "loss": 0.1544, + "num_input_tokens_seen": 224364416, + "step": 71285 + }, + { + "epoch": 4.563728314448499, + "grad_norm": 7.901058197021484, + "learning_rate": 1.150989762585919e-07, + "loss": 0.0978, + "num_input_tokens_seen": 224379520, + "step": 71290 + }, + { + "epoch": 4.564048396389476, + "grad_norm": 7.1301960945129395, + "learning_rate": 1.1493147777898367e-07, + "loss": 0.1321, + "num_input_tokens_seen": 224394368, + "step": 71295 + }, + { + "epoch": 4.564368478330453, + "grad_norm": 9.148780822753906, + "learning_rate": 1.1476409839727365e-07, + "loss": 0.1334, + "num_input_tokens_seen": 224410304, + "step": 71300 + }, + { + "epoch": 4.5646885602714296, + "grad_norm": 10.482728958129883, + "learning_rate": 1.1459683812182099e-07, + "loss": 0.1488, + "num_input_tokens_seen": 224427328, + "step": 71305 + }, + { + "epoch": 4.5650086422124065, + "grad_norm": 7.591701030731201, + "learning_rate": 1.144296969609765e-07, + "loss": 0.1515, + "num_input_tokens_seen": 224443328, + "step": 71310 + }, + { + "epoch": 4.5653287241533835, + "grad_norm": 6.389355659484863, + "learning_rate": 1.142626749230874e-07, + "loss": 0.0841, + "num_input_tokens_seen": 224459776, + "step": 71315 + }, + { + "epoch": 4.5656488060943605, + "grad_norm": 6.0576701164245605, + "learning_rate": 1.1409577201649341e-07, + "loss": 0.1807, + "num_input_tokens_seen": 224474944, + "step": 71320 + }, + { + "epoch": 4.5659688880353375, + "grad_norm": 11.462362289428711, + "learning_rate": 1.1392898824952869e-07, + "loss": 0.1202, + "num_input_tokens_seen": 224490560, + "step": 71325 + }, + { + "epoch": 4.566288969976314, + "grad_norm": 5.113414764404297, + "learning_rate": 1.1376232363052187e-07, + "loss": 0.1203, + "num_input_tokens_seen": 224505280, + "step": 71330 + }, + { + "epoch": 4.566609051917291, + "grad_norm": 4.6174702644348145, + "learning_rate": 1.1359577816779488e-07, + "loss": 0.1206, + "num_input_tokens_seen": 224520768, + "step": 71335 + }, + { + "epoch": 4.566929133858268, + "grad_norm": 7.834562301635742, + "learning_rate": 1.1342935186966497e-07, + "loss": 0.1084, + "num_input_tokens_seen": 224536960, + "step": 71340 + }, + { + "epoch": 4.567249215799245, + "grad_norm": 4.9163618087768555, + "learning_rate": 1.1326304474444134e-07, + "loss": 0.1134, + "num_input_tokens_seen": 224553792, + "step": 71345 + }, + { + "epoch": 4.567569297740222, + "grad_norm": 6.224636554718018, + "learning_rate": 1.1309685680042953e-07, + "loss": 0.1283, + "num_input_tokens_seen": 224569280, + "step": 71350 + }, + { + "epoch": 4.567889379681199, + "grad_norm": 7.032807350158691, + "learning_rate": 1.1293078804592767e-07, + "loss": 0.1325, + "num_input_tokens_seen": 224583936, + "step": 71355 + }, + { + "epoch": 4.568209461622176, + "grad_norm": 8.995983123779297, + "learning_rate": 1.1276483848922853e-07, + "loss": 0.1016, + "num_input_tokens_seen": 224600192, + "step": 71360 + }, + { + "epoch": 4.5685295435631526, + "grad_norm": 8.734341621398926, + "learning_rate": 1.1259900813861885e-07, + "loss": 0.1563, + "num_input_tokens_seen": 224615424, + "step": 71365 + }, + { + "epoch": 4.568849625504129, + "grad_norm": 6.63704252243042, + "learning_rate": 1.1243329700237864e-07, + "loss": 0.1742, + "num_input_tokens_seen": 224631616, + "step": 71370 + }, + { + "epoch": 4.569169707445106, + "grad_norm": 9.61662769317627, + "learning_rate": 1.122677050887841e-07, + "loss": 0.1483, + "num_input_tokens_seen": 224647936, + "step": 71375 + }, + { + "epoch": 4.569489789386083, + "grad_norm": 11.066605567932129, + "learning_rate": 1.1210223240610218e-07, + "loss": 0.1854, + "num_input_tokens_seen": 224662208, + "step": 71380 + }, + { + "epoch": 4.56980987132706, + "grad_norm": 4.954483985900879, + "learning_rate": 1.1193687896259742e-07, + "loss": 0.1174, + "num_input_tokens_seen": 224678720, + "step": 71385 + }, + { + "epoch": 4.570129953268037, + "grad_norm": 6.601202487945557, + "learning_rate": 1.1177164476652569e-07, + "loss": 0.143, + "num_input_tokens_seen": 224693824, + "step": 71390 + }, + { + "epoch": 4.570450035209014, + "grad_norm": 7.2526068687438965, + "learning_rate": 1.1160652982613818e-07, + "loss": 0.1392, + "num_input_tokens_seen": 224709120, + "step": 71395 + }, + { + "epoch": 4.570770117149991, + "grad_norm": 11.176554679870605, + "learning_rate": 1.1144153414967967e-07, + "loss": 0.1682, + "num_input_tokens_seen": 224724288, + "step": 71400 + }, + { + "epoch": 4.571090199090968, + "grad_norm": 5.384622573852539, + "learning_rate": 1.112766577453897e-07, + "loss": 0.1275, + "num_input_tokens_seen": 224739584, + "step": 71405 + }, + { + "epoch": 4.571410281031945, + "grad_norm": 10.443768501281738, + "learning_rate": 1.1111190062150057e-07, + "loss": 0.1433, + "num_input_tokens_seen": 224755136, + "step": 71410 + }, + { + "epoch": 4.571730362972921, + "grad_norm": 9.18533992767334, + "learning_rate": 1.1094726278624068e-07, + "loss": 0.1184, + "num_input_tokens_seen": 224771008, + "step": 71415 + }, + { + "epoch": 4.572050444913898, + "grad_norm": 2.480837821960449, + "learning_rate": 1.1078274424782959e-07, + "loss": 0.1282, + "num_input_tokens_seen": 224786176, + "step": 71420 + }, + { + "epoch": 4.572370526854875, + "grad_norm": 7.883366584777832, + "learning_rate": 1.1061834501448376e-07, + "loss": 0.1075, + "num_input_tokens_seen": 224801600, + "step": 71425 + }, + { + "epoch": 4.572690608795852, + "grad_norm": 5.930212020874023, + "learning_rate": 1.1045406509441164e-07, + "loss": 0.0999, + "num_input_tokens_seen": 224816576, + "step": 71430 + }, + { + "epoch": 4.573010690736829, + "grad_norm": 7.102156639099121, + "learning_rate": 1.1028990449581694e-07, + "loss": 0.1159, + "num_input_tokens_seen": 224831104, + "step": 71435 + }, + { + "epoch": 4.573330772677806, + "grad_norm": 5.555959224700928, + "learning_rate": 1.10125863226897e-07, + "loss": 0.1193, + "num_input_tokens_seen": 224845376, + "step": 71440 + }, + { + "epoch": 4.573650854618783, + "grad_norm": 7.465349197387695, + "learning_rate": 1.0996194129584276e-07, + "loss": 0.1172, + "num_input_tokens_seen": 224860992, + "step": 71445 + }, + { + "epoch": 4.57397093655976, + "grad_norm": 11.378684043884277, + "learning_rate": 1.0979813871084044e-07, + "loss": 0.1335, + "num_input_tokens_seen": 224876672, + "step": 71450 + }, + { + "epoch": 4.574291018500736, + "grad_norm": 9.022961616516113, + "learning_rate": 1.0963445548006824e-07, + "loss": 0.1455, + "num_input_tokens_seen": 224892608, + "step": 71455 + }, + { + "epoch": 4.574611100441713, + "grad_norm": 5.659968852996826, + "learning_rate": 1.0947089161170099e-07, + "loss": 0.1401, + "num_input_tokens_seen": 224907968, + "step": 71460 + }, + { + "epoch": 4.57493118238269, + "grad_norm": 4.032375812530518, + "learning_rate": 1.0930744711390523e-07, + "loss": 0.0782, + "num_input_tokens_seen": 224922496, + "step": 71465 + }, + { + "epoch": 4.575251264323667, + "grad_norm": 4.224039077758789, + "learning_rate": 1.0914412199484303e-07, + "loss": 0.1133, + "num_input_tokens_seen": 224937856, + "step": 71470 + }, + { + "epoch": 4.575571346264644, + "grad_norm": 5.752786159515381, + "learning_rate": 1.0898091626266983e-07, + "loss": 0.1167, + "num_input_tokens_seen": 224953600, + "step": 71475 + }, + { + "epoch": 4.575891428205621, + "grad_norm": 5.29496431350708, + "learning_rate": 1.0881782992553546e-07, + "loss": 0.1403, + "num_input_tokens_seen": 224970432, + "step": 71480 + }, + { + "epoch": 4.576211510146598, + "grad_norm": 6.545330047607422, + "learning_rate": 1.0865486299158317e-07, + "loss": 0.1316, + "num_input_tokens_seen": 224986304, + "step": 71485 + }, + { + "epoch": 4.576531592087575, + "grad_norm": 6.485830307006836, + "learning_rate": 1.0849201546895089e-07, + "loss": 0.1415, + "num_input_tokens_seen": 225001536, + "step": 71490 + }, + { + "epoch": 4.576851674028552, + "grad_norm": 6.584132194519043, + "learning_rate": 1.083292873657707e-07, + "loss": 0.2262, + "num_input_tokens_seen": 225016384, + "step": 71495 + }, + { + "epoch": 4.577171755969529, + "grad_norm": 6.552221298217773, + "learning_rate": 1.0816667869016806e-07, + "loss": 0.1604, + "num_input_tokens_seen": 225031296, + "step": 71500 + }, + { + "epoch": 4.577491837910505, + "grad_norm": 7.097666263580322, + "learning_rate": 1.0800418945026314e-07, + "loss": 0.1623, + "num_input_tokens_seen": 225047168, + "step": 71505 + }, + { + "epoch": 4.577811919851482, + "grad_norm": 5.775707244873047, + "learning_rate": 1.0784181965416918e-07, + "loss": 0.14, + "num_input_tokens_seen": 225062720, + "step": 71510 + }, + { + "epoch": 4.578132001792459, + "grad_norm": 8.498727798461914, + "learning_rate": 1.0767956930999468e-07, + "loss": 0.1263, + "num_input_tokens_seen": 225078720, + "step": 71515 + }, + { + "epoch": 4.578452083733436, + "grad_norm": 6.494494915008545, + "learning_rate": 1.0751743842584123e-07, + "loss": 0.1385, + "num_input_tokens_seen": 225094848, + "step": 71520 + }, + { + "epoch": 4.578772165674413, + "grad_norm": 6.932327747344971, + "learning_rate": 1.0735542700980512e-07, + "loss": 0.1383, + "num_input_tokens_seen": 225110528, + "step": 71525 + }, + { + "epoch": 4.57909224761539, + "grad_norm": 4.938967227935791, + "learning_rate": 1.071935350699757e-07, + "loss": 0.1028, + "num_input_tokens_seen": 225125504, + "step": 71530 + }, + { + "epoch": 4.579412329556367, + "grad_norm": 6.0139594078063965, + "learning_rate": 1.070317626144382e-07, + "loss": 0.1912, + "num_input_tokens_seen": 225141312, + "step": 71535 + }, + { + "epoch": 4.579732411497344, + "grad_norm": 7.175844192504883, + "learning_rate": 1.0687010965126921e-07, + "loss": 0.1729, + "num_input_tokens_seen": 225156544, + "step": 71540 + }, + { + "epoch": 4.58005249343832, + "grad_norm": 12.532125473022461, + "learning_rate": 1.0670857618854197e-07, + "loss": 0.0871, + "num_input_tokens_seen": 225172800, + "step": 71545 + }, + { + "epoch": 4.580372575379297, + "grad_norm": 6.1944499015808105, + "learning_rate": 1.0654716223432199e-07, + "loss": 0.1186, + "num_input_tokens_seen": 225187648, + "step": 71550 + }, + { + "epoch": 4.580692657320274, + "grad_norm": 5.0415849685668945, + "learning_rate": 1.0638586779666976e-07, + "loss": 0.112, + "num_input_tokens_seen": 225202752, + "step": 71555 + }, + { + "epoch": 4.581012739261251, + "grad_norm": 3.8703994750976562, + "learning_rate": 1.0622469288363968e-07, + "loss": 0.1416, + "num_input_tokens_seen": 225218240, + "step": 71560 + }, + { + "epoch": 4.581332821202228, + "grad_norm": 8.292884826660156, + "learning_rate": 1.0606363750327947e-07, + "loss": 0.1091, + "num_input_tokens_seen": 225236992, + "step": 71565 + }, + { + "epoch": 4.581652903143205, + "grad_norm": 8.511720657348633, + "learning_rate": 1.0590270166363187e-07, + "loss": 0.1602, + "num_input_tokens_seen": 225252416, + "step": 71570 + }, + { + "epoch": 4.581972985084182, + "grad_norm": 6.415884971618652, + "learning_rate": 1.0574188537273267e-07, + "loss": 0.1033, + "num_input_tokens_seen": 225268160, + "step": 71575 + }, + { + "epoch": 4.582293067025159, + "grad_norm": 6.361555576324463, + "learning_rate": 1.0558118863861266e-07, + "loss": 0.1463, + "num_input_tokens_seen": 225282944, + "step": 71580 + }, + { + "epoch": 4.582613148966136, + "grad_norm": 4.879749774932861, + "learning_rate": 1.0542061146929599e-07, + "loss": 0.1086, + "num_input_tokens_seen": 225298432, + "step": 71585 + }, + { + "epoch": 4.582933230907113, + "grad_norm": 7.25694465637207, + "learning_rate": 1.0526015387280098e-07, + "loss": 0.1555, + "num_input_tokens_seen": 225313984, + "step": 71590 + }, + { + "epoch": 4.583253312848089, + "grad_norm": 4.698009967803955, + "learning_rate": 1.0509981585714035e-07, + "loss": 0.0699, + "num_input_tokens_seen": 225328768, + "step": 71595 + }, + { + "epoch": 4.583573394789066, + "grad_norm": 7.996856689453125, + "learning_rate": 1.0493959743031995e-07, + "loss": 0.1617, + "num_input_tokens_seen": 225343808, + "step": 71600 + }, + { + "epoch": 4.583893476730043, + "grad_norm": 7.945338726043701, + "learning_rate": 1.0477949860034115e-07, + "loss": 0.1486, + "num_input_tokens_seen": 225359360, + "step": 71605 + }, + { + "epoch": 4.58421355867102, + "grad_norm": 7.863069534301758, + "learning_rate": 1.0461951937519726e-07, + "loss": 0.1467, + "num_input_tokens_seen": 225374848, + "step": 71610 + }, + { + "epoch": 4.584533640611997, + "grad_norm": 7.342241287231445, + "learning_rate": 1.0445965976287803e-07, + "loss": 0.1638, + "num_input_tokens_seen": 225390144, + "step": 71615 + }, + { + "epoch": 4.584853722552974, + "grad_norm": 6.475534439086914, + "learning_rate": 1.0429991977136539e-07, + "loss": 0.1237, + "num_input_tokens_seen": 225405056, + "step": 71620 + }, + { + "epoch": 4.585173804493951, + "grad_norm": 7.562079429626465, + "learning_rate": 1.0414029940863601e-07, + "loss": 0.1357, + "num_input_tokens_seen": 225420864, + "step": 71625 + }, + { + "epoch": 4.585493886434928, + "grad_norm": 4.251443862915039, + "learning_rate": 1.0398079868266048e-07, + "loss": 0.1428, + "num_input_tokens_seen": 225439104, + "step": 71630 + }, + { + "epoch": 4.585813968375904, + "grad_norm": 7.148857116699219, + "learning_rate": 1.0382141760140352e-07, + "loss": 0.1331, + "num_input_tokens_seen": 225455104, + "step": 71635 + }, + { + "epoch": 4.586134050316881, + "grad_norm": 4.768857955932617, + "learning_rate": 1.036621561728235e-07, + "loss": 0.1329, + "num_input_tokens_seen": 225469440, + "step": 71640 + }, + { + "epoch": 4.586454132257858, + "grad_norm": 7.835071563720703, + "learning_rate": 1.0350301440487376e-07, + "loss": 0.1366, + "num_input_tokens_seen": 225484800, + "step": 71645 + }, + { + "epoch": 4.586774214198835, + "grad_norm": 6.409290313720703, + "learning_rate": 1.033439923054999e-07, + "loss": 0.1327, + "num_input_tokens_seen": 225499648, + "step": 71650 + }, + { + "epoch": 4.587094296139812, + "grad_norm": 4.580118656158447, + "learning_rate": 1.031850898826442e-07, + "loss": 0.111, + "num_input_tokens_seen": 225515008, + "step": 71655 + }, + { + "epoch": 4.587414378080789, + "grad_norm": 73.70586395263672, + "learning_rate": 1.0302630714423972e-07, + "loss": 0.2199, + "num_input_tokens_seen": 225530304, + "step": 71660 + }, + { + "epoch": 4.587734460021766, + "grad_norm": 7.0903472900390625, + "learning_rate": 1.0286764409821654e-07, + "loss": 0.0972, + "num_input_tokens_seen": 225545664, + "step": 71665 + }, + { + "epoch": 4.588054541962743, + "grad_norm": 20.730445861816406, + "learning_rate": 1.027091007524969e-07, + "loss": 0.204, + "num_input_tokens_seen": 225561088, + "step": 71670 + }, + { + "epoch": 4.58837462390372, + "grad_norm": 5.995267391204834, + "learning_rate": 1.0255067711499756e-07, + "loss": 0.1343, + "num_input_tokens_seen": 225577216, + "step": 71675 + }, + { + "epoch": 4.588694705844697, + "grad_norm": 7.8113322257995605, + "learning_rate": 1.0239237319363021e-07, + "loss": 0.1733, + "num_input_tokens_seen": 225592512, + "step": 71680 + }, + { + "epoch": 4.589014787785673, + "grad_norm": 7.890571594238281, + "learning_rate": 1.0223418899629828e-07, + "loss": 0.1738, + "num_input_tokens_seen": 225608320, + "step": 71685 + }, + { + "epoch": 4.58933486972665, + "grad_norm": 6.082173824310303, + "learning_rate": 1.0207612453090182e-07, + "loss": 0.143, + "num_input_tokens_seen": 225624128, + "step": 71690 + }, + { + "epoch": 4.589654951667627, + "grad_norm": 5.0581440925598145, + "learning_rate": 1.0191817980533315e-07, + "loss": 0.0987, + "num_input_tokens_seen": 225640320, + "step": 71695 + }, + { + "epoch": 4.589975033608604, + "grad_norm": 7.397561550140381, + "learning_rate": 1.0176035482747981e-07, + "loss": 0.1262, + "num_input_tokens_seen": 225655808, + "step": 71700 + }, + { + "epoch": 4.590295115549581, + "grad_norm": 7.273339748382568, + "learning_rate": 1.016026496052222e-07, + "loss": 0.1595, + "num_input_tokens_seen": 225670912, + "step": 71705 + }, + { + "epoch": 4.590615197490558, + "grad_norm": 10.695024490356445, + "learning_rate": 1.014450641464354e-07, + "loss": 0.1142, + "num_input_tokens_seen": 225687104, + "step": 71710 + }, + { + "epoch": 4.590935279431535, + "grad_norm": 5.577702522277832, + "learning_rate": 1.0128759845898838e-07, + "loss": 0.1299, + "num_input_tokens_seen": 225702080, + "step": 71715 + }, + { + "epoch": 4.591255361372511, + "grad_norm": 7.252420902252197, + "learning_rate": 1.0113025255074432e-07, + "loss": 0.0972, + "num_input_tokens_seen": 225717056, + "step": 71720 + }, + { + "epoch": 4.591575443313488, + "grad_norm": 10.463665962219238, + "learning_rate": 1.0097302642956025e-07, + "loss": 0.1374, + "num_input_tokens_seen": 225732224, + "step": 71725 + }, + { + "epoch": 4.591895525254465, + "grad_norm": 6.685267448425293, + "learning_rate": 1.0081592010328711e-07, + "loss": 0.1285, + "num_input_tokens_seen": 225748352, + "step": 71730 + }, + { + "epoch": 4.592215607195442, + "grad_norm": 3.370009422302246, + "learning_rate": 1.006589335797703e-07, + "loss": 0.118, + "num_input_tokens_seen": 225763840, + "step": 71735 + }, + { + "epoch": 4.592535689136419, + "grad_norm": 8.290106773376465, + "learning_rate": 1.0050206686684827e-07, + "loss": 0.1691, + "num_input_tokens_seen": 225779200, + "step": 71740 + }, + { + "epoch": 4.592855771077396, + "grad_norm": 11.623377799987793, + "learning_rate": 1.0034531997235475e-07, + "loss": 0.1638, + "num_input_tokens_seen": 225794496, + "step": 71745 + }, + { + "epoch": 4.593175853018373, + "grad_norm": 6.463217735290527, + "learning_rate": 1.0018869290411654e-07, + "loss": 0.1313, + "num_input_tokens_seen": 225811904, + "step": 71750 + }, + { + "epoch": 4.59349593495935, + "grad_norm": 8.300263404846191, + "learning_rate": 1.0003218566995487e-07, + "loss": 0.1251, + "num_input_tokens_seen": 225828288, + "step": 71755 + }, + { + "epoch": 4.593816016900327, + "grad_norm": 9.260213851928711, + "learning_rate": 9.987579827768462e-08, + "loss": 0.1297, + "num_input_tokens_seen": 225843904, + "step": 71760 + }, + { + "epoch": 4.594136098841304, + "grad_norm": 7.012050628662109, + "learning_rate": 9.971953073511565e-08, + "loss": 0.1375, + "num_input_tokens_seen": 225860032, + "step": 71765 + }, + { + "epoch": 4.59445618078228, + "grad_norm": 7.471791744232178, + "learning_rate": 9.95633830500503e-08, + "loss": 0.0914, + "num_input_tokens_seen": 225875072, + "step": 71770 + }, + { + "epoch": 4.594776262723257, + "grad_norm": 7.053227424621582, + "learning_rate": 9.940735523028628e-08, + "loss": 0.1657, + "num_input_tokens_seen": 225891392, + "step": 71775 + }, + { + "epoch": 4.595096344664234, + "grad_norm": 7.299990177154541, + "learning_rate": 9.92514472836148e-08, + "loss": 0.1509, + "num_input_tokens_seen": 225906176, + "step": 71780 + }, + { + "epoch": 4.595416426605211, + "grad_norm": 6.555915355682373, + "learning_rate": 9.909565921782077e-08, + "loss": 0.1462, + "num_input_tokens_seen": 225923520, + "step": 71785 + }, + { + "epoch": 4.595736508546188, + "grad_norm": 3.677220106124878, + "learning_rate": 9.893999104068408e-08, + "loss": 0.1051, + "num_input_tokens_seen": 225939968, + "step": 71790 + }, + { + "epoch": 4.596056590487165, + "grad_norm": 15.743525505065918, + "learning_rate": 9.878444275997712e-08, + "loss": 0.1622, + "num_input_tokens_seen": 225955136, + "step": 71795 + }, + { + "epoch": 4.596376672428142, + "grad_norm": 5.190000534057617, + "learning_rate": 9.862901438346839e-08, + "loss": 0.1444, + "num_input_tokens_seen": 225970432, + "step": 71800 + }, + { + "epoch": 4.596696754369119, + "grad_norm": 4.118879318237305, + "learning_rate": 9.847370591891752e-08, + "loss": 0.0974, + "num_input_tokens_seen": 225986816, + "step": 71805 + }, + { + "epoch": 4.597016836310095, + "grad_norm": 4.617920875549316, + "learning_rate": 9.83185173740811e-08, + "loss": 0.1092, + "num_input_tokens_seen": 226002688, + "step": 71810 + }, + { + "epoch": 4.597336918251072, + "grad_norm": 7.099570274353027, + "learning_rate": 9.816344875670791e-08, + "loss": 0.1662, + "num_input_tokens_seen": 226017728, + "step": 71815 + }, + { + "epoch": 4.597657000192049, + "grad_norm": 6.205284595489502, + "learning_rate": 9.80085000745415e-08, + "loss": 0.0916, + "num_input_tokens_seen": 226033280, + "step": 71820 + }, + { + "epoch": 4.597977082133026, + "grad_norm": 11.002988815307617, + "learning_rate": 9.785367133531898e-08, + "loss": 0.1357, + "num_input_tokens_seen": 226049472, + "step": 71825 + }, + { + "epoch": 4.598297164074003, + "grad_norm": 8.545486450195312, + "learning_rate": 9.76989625467717e-08, + "loss": 0.1376, + "num_input_tokens_seen": 226065792, + "step": 71830 + }, + { + "epoch": 4.59861724601498, + "grad_norm": 6.269343852996826, + "learning_rate": 9.75443737166254e-08, + "loss": 0.0953, + "num_input_tokens_seen": 226082944, + "step": 71835 + }, + { + "epoch": 4.598937327955957, + "grad_norm": 8.879371643066406, + "learning_rate": 9.738990485259864e-08, + "loss": 0.1213, + "num_input_tokens_seen": 226098368, + "step": 71840 + }, + { + "epoch": 4.599257409896934, + "grad_norm": 5.988637447357178, + "learning_rate": 9.723555596240553e-08, + "loss": 0.1013, + "num_input_tokens_seen": 226113280, + "step": 71845 + }, + { + "epoch": 4.599577491837911, + "grad_norm": 5.582190990447998, + "learning_rate": 9.708132705375351e-08, + "loss": 0.1224, + "num_input_tokens_seen": 226128512, + "step": 71850 + }, + { + "epoch": 4.599897573778888, + "grad_norm": 8.393719673156738, + "learning_rate": 9.692721813434336e-08, + "loss": 0.1327, + "num_input_tokens_seen": 226144640, + "step": 71855 + }, + { + "epoch": 4.600217655719864, + "grad_norm": 8.21787166595459, + "learning_rate": 9.677322921187088e-08, + "loss": 0.1267, + "num_input_tokens_seen": 226159488, + "step": 71860 + }, + { + "epoch": 4.600537737660841, + "grad_norm": 6.483309745788574, + "learning_rate": 9.661936029402546e-08, + "loss": 0.1427, + "num_input_tokens_seen": 226176448, + "step": 71865 + }, + { + "epoch": 4.600857819601818, + "grad_norm": 9.371317863464355, + "learning_rate": 9.646561138849014e-08, + "loss": 0.1368, + "num_input_tokens_seen": 226192576, + "step": 71870 + }, + { + "epoch": 4.601177901542795, + "grad_norm": 4.592071533203125, + "learning_rate": 9.631198250294294e-08, + "loss": 0.1371, + "num_input_tokens_seen": 226206912, + "step": 71875 + }, + { + "epoch": 4.601497983483772, + "grad_norm": 3.5782246589660645, + "learning_rate": 9.615847364505465e-08, + "loss": 0.1285, + "num_input_tokens_seen": 226222208, + "step": 71880 + }, + { + "epoch": 4.601818065424749, + "grad_norm": 5.67766809463501, + "learning_rate": 9.600508482249166e-08, + "loss": 0.129, + "num_input_tokens_seen": 226239168, + "step": 71885 + }, + { + "epoch": 4.602138147365726, + "grad_norm": 7.269615650177002, + "learning_rate": 9.585181604291228e-08, + "loss": 0.1246, + "num_input_tokens_seen": 226254656, + "step": 71890 + }, + { + "epoch": 4.602458229306703, + "grad_norm": 7.310482501983643, + "learning_rate": 9.569866731397038e-08, + "loss": 0.1405, + "num_input_tokens_seen": 226269376, + "step": 71895 + }, + { + "epoch": 4.602778311247679, + "grad_norm": 7.958652019500732, + "learning_rate": 9.554563864331373e-08, + "loss": 0.1312, + "num_input_tokens_seen": 226284416, + "step": 71900 + }, + { + "epoch": 4.603098393188656, + "grad_norm": 5.439431667327881, + "learning_rate": 9.539273003858318e-08, + "loss": 0.143, + "num_input_tokens_seen": 226300224, + "step": 71905 + }, + { + "epoch": 4.603418475129633, + "grad_norm": 5.701115131378174, + "learning_rate": 9.523994150741511e-08, + "loss": 0.1151, + "num_input_tokens_seen": 226315840, + "step": 71910 + }, + { + "epoch": 4.60373855707061, + "grad_norm": 5.7277655601501465, + "learning_rate": 9.508727305743815e-08, + "loss": 0.0849, + "num_input_tokens_seen": 226330752, + "step": 71915 + }, + { + "epoch": 4.604058639011587, + "grad_norm": 9.133159637451172, + "learning_rate": 9.49347246962759e-08, + "loss": 0.1479, + "num_input_tokens_seen": 226346560, + "step": 71920 + }, + { + "epoch": 4.604378720952564, + "grad_norm": 3.5703141689300537, + "learning_rate": 9.478229643154618e-08, + "loss": 0.1116, + "num_input_tokens_seen": 226361408, + "step": 71925 + }, + { + "epoch": 4.604698802893541, + "grad_norm": 5.479892730712891, + "learning_rate": 9.462998827086012e-08, + "loss": 0.1724, + "num_input_tokens_seen": 226378240, + "step": 71930 + }, + { + "epoch": 4.605018884834518, + "grad_norm": 8.102742195129395, + "learning_rate": 9.447780022182357e-08, + "loss": 0.109, + "num_input_tokens_seen": 226393856, + "step": 71935 + }, + { + "epoch": 4.605338966775495, + "grad_norm": 6.162160873413086, + "learning_rate": 9.432573229203573e-08, + "loss": 0.1899, + "num_input_tokens_seen": 226409600, + "step": 71940 + }, + { + "epoch": 4.605659048716472, + "grad_norm": 6.389029026031494, + "learning_rate": 9.417378448908998e-08, + "loss": 0.1598, + "num_input_tokens_seen": 226425792, + "step": 71945 + }, + { + "epoch": 4.605979130657448, + "grad_norm": 7.312409400939941, + "learning_rate": 9.402195682057385e-08, + "loss": 0.1339, + "num_input_tokens_seen": 226441216, + "step": 71950 + }, + { + "epoch": 4.606299212598425, + "grad_norm": 4.383991718292236, + "learning_rate": 9.387024929406879e-08, + "loss": 0.1129, + "num_input_tokens_seen": 226456512, + "step": 71955 + }, + { + "epoch": 4.606619294539402, + "grad_norm": 3.2392921447753906, + "learning_rate": 9.371866191715067e-08, + "loss": 0.1145, + "num_input_tokens_seen": 226474112, + "step": 71960 + }, + { + "epoch": 4.606939376480379, + "grad_norm": 6.679901599884033, + "learning_rate": 9.356719469738873e-08, + "loss": 0.1759, + "num_input_tokens_seen": 226489920, + "step": 71965 + }, + { + "epoch": 4.607259458421356, + "grad_norm": 10.198675155639648, + "learning_rate": 9.341584764234635e-08, + "loss": 0.1302, + "num_input_tokens_seen": 226504576, + "step": 71970 + }, + { + "epoch": 4.607579540362333, + "grad_norm": 6.093377113342285, + "learning_rate": 9.326462075958137e-08, + "loss": 0.1262, + "num_input_tokens_seen": 226519488, + "step": 71975 + }, + { + "epoch": 4.60789962230331, + "grad_norm": 4.935763835906982, + "learning_rate": 9.31135140566447e-08, + "loss": 0.1337, + "num_input_tokens_seen": 226534592, + "step": 71980 + }, + { + "epoch": 4.608219704244286, + "grad_norm": 15.684066772460938, + "learning_rate": 9.296252754108225e-08, + "loss": 0.1339, + "num_input_tokens_seen": 226550272, + "step": 71985 + }, + { + "epoch": 4.608539786185263, + "grad_norm": 11.454120635986328, + "learning_rate": 9.2811661220433e-08, + "loss": 0.1727, + "num_input_tokens_seen": 226567104, + "step": 71990 + }, + { + "epoch": 4.60885986812624, + "grad_norm": 15.82736873626709, + "learning_rate": 9.266091510223146e-08, + "loss": 0.1343, + "num_input_tokens_seen": 226582784, + "step": 71995 + }, + { + "epoch": 4.609179950067217, + "grad_norm": 7.381292343139648, + "learning_rate": 9.251028919400385e-08, + "loss": 0.1683, + "num_input_tokens_seen": 226599296, + "step": 72000 + }, + { + "epoch": 4.609500032008194, + "grad_norm": 7.728713512420654, + "learning_rate": 9.235978350327246e-08, + "loss": 0.1576, + "num_input_tokens_seen": 226615744, + "step": 72005 + }, + { + "epoch": 4.609820113949171, + "grad_norm": 11.11450481414795, + "learning_rate": 9.220939803755269e-08, + "loss": 0.1205, + "num_input_tokens_seen": 226630720, + "step": 72010 + }, + { + "epoch": 4.610140195890148, + "grad_norm": 28.2357177734375, + "learning_rate": 9.205913280435352e-08, + "loss": 0.1782, + "num_input_tokens_seen": 226647936, + "step": 72015 + }, + { + "epoch": 4.610460277831125, + "grad_norm": 3.2459938526153564, + "learning_rate": 9.190898781117925e-08, + "loss": 0.1098, + "num_input_tokens_seen": 226663680, + "step": 72020 + }, + { + "epoch": 4.610780359772102, + "grad_norm": 4.347908020019531, + "learning_rate": 9.175896306552634e-08, + "loss": 0.1331, + "num_input_tokens_seen": 226678976, + "step": 72025 + }, + { + "epoch": 4.611100441713079, + "grad_norm": 8.927854537963867, + "learning_rate": 9.160905857488717e-08, + "loss": 0.1393, + "num_input_tokens_seen": 226694144, + "step": 72030 + }, + { + "epoch": 4.611420523654055, + "grad_norm": 10.05022144317627, + "learning_rate": 9.145927434674629e-08, + "loss": 0.1737, + "num_input_tokens_seen": 226708800, + "step": 72035 + }, + { + "epoch": 4.611740605595032, + "grad_norm": 8.542195320129395, + "learning_rate": 9.13096103885841e-08, + "loss": 0.0947, + "num_input_tokens_seen": 226724480, + "step": 72040 + }, + { + "epoch": 4.612060687536009, + "grad_norm": 5.789675235748291, + "learning_rate": 9.116006670787325e-08, + "loss": 0.1318, + "num_input_tokens_seen": 226739456, + "step": 72045 + }, + { + "epoch": 4.612380769476986, + "grad_norm": 6.591165065765381, + "learning_rate": 9.101064331208165e-08, + "loss": 0.1285, + "num_input_tokens_seen": 226754944, + "step": 72050 + }, + { + "epoch": 4.612700851417963, + "grad_norm": 5.842239856719971, + "learning_rate": 9.086134020867055e-08, + "loss": 0.1325, + "num_input_tokens_seen": 226769920, + "step": 72055 + }, + { + "epoch": 4.61302093335894, + "grad_norm": 6.369499206542969, + "learning_rate": 9.071215740509537e-08, + "loss": 0.1794, + "num_input_tokens_seen": 226784960, + "step": 72060 + }, + { + "epoch": 4.613341015299917, + "grad_norm": 4.173990726470947, + "learning_rate": 9.056309490880544e-08, + "loss": 0.0837, + "num_input_tokens_seen": 226800768, + "step": 72065 + }, + { + "epoch": 4.613661097240894, + "grad_norm": 7.370219707489014, + "learning_rate": 9.041415272724396e-08, + "loss": 0.0679, + "num_input_tokens_seen": 226816896, + "step": 72070 + }, + { + "epoch": 4.61398117918187, + "grad_norm": 6.347251892089844, + "learning_rate": 9.026533086784889e-08, + "loss": 0.1183, + "num_input_tokens_seen": 226832896, + "step": 72075 + }, + { + "epoch": 4.614301261122847, + "grad_norm": 7.761768817901611, + "learning_rate": 9.011662933805149e-08, + "loss": 0.1624, + "num_input_tokens_seen": 226847936, + "step": 72080 + }, + { + "epoch": 4.614621343063824, + "grad_norm": 6.879167556762695, + "learning_rate": 8.996804814527693e-08, + "loss": 0.1095, + "num_input_tokens_seen": 226863360, + "step": 72085 + }, + { + "epoch": 4.614941425004801, + "grad_norm": 6.619941234588623, + "learning_rate": 8.981958729694457e-08, + "loss": 0.1501, + "num_input_tokens_seen": 226878784, + "step": 72090 + }, + { + "epoch": 4.615261506945778, + "grad_norm": 5.795214653015137, + "learning_rate": 8.967124680046819e-08, + "loss": 0.1179, + "num_input_tokens_seen": 226895616, + "step": 72095 + }, + { + "epoch": 4.615581588886755, + "grad_norm": 7.202408790588379, + "learning_rate": 8.952302666325408e-08, + "loss": 0.0891, + "num_input_tokens_seen": 226912000, + "step": 72100 + }, + { + "epoch": 4.615901670827732, + "grad_norm": 6.402564525604248, + "learning_rate": 8.937492689270522e-08, + "loss": 0.1361, + "num_input_tokens_seen": 226928192, + "step": 72105 + }, + { + "epoch": 4.616221752768709, + "grad_norm": 7.109971523284912, + "learning_rate": 8.922694749621513e-08, + "loss": 0.1639, + "num_input_tokens_seen": 226943936, + "step": 72110 + }, + { + "epoch": 4.616541834709686, + "grad_norm": 8.255278587341309, + "learning_rate": 8.907908848117485e-08, + "loss": 0.1455, + "num_input_tokens_seen": 226959872, + "step": 72115 + }, + { + "epoch": 4.616861916650663, + "grad_norm": 5.756924629211426, + "learning_rate": 8.893134985496599e-08, + "loss": 0.0895, + "num_input_tokens_seen": 226974912, + "step": 72120 + }, + { + "epoch": 4.617181998591639, + "grad_norm": 8.050064086914062, + "learning_rate": 8.878373162496734e-08, + "loss": 0.1536, + "num_input_tokens_seen": 226990656, + "step": 72125 + }, + { + "epoch": 4.617502080532616, + "grad_norm": 6.66571569442749, + "learning_rate": 8.863623379854942e-08, + "loss": 0.1079, + "num_input_tokens_seen": 227005632, + "step": 72130 + }, + { + "epoch": 4.617822162473593, + "grad_norm": 5.993621826171875, + "learning_rate": 8.848885638307741e-08, + "loss": 0.0973, + "num_input_tokens_seen": 227021440, + "step": 72135 + }, + { + "epoch": 4.61814224441457, + "grad_norm": 7.109934329986572, + "learning_rate": 8.834159938591131e-08, + "loss": 0.1035, + "num_input_tokens_seen": 227036672, + "step": 72140 + }, + { + "epoch": 4.618462326355547, + "grad_norm": 6.553922176361084, + "learning_rate": 8.819446281440325e-08, + "loss": 0.117, + "num_input_tokens_seen": 227052928, + "step": 72145 + }, + { + "epoch": 4.618782408296524, + "grad_norm": 7.935873985290527, + "learning_rate": 8.804744667590153e-08, + "loss": 0.1554, + "num_input_tokens_seen": 227068544, + "step": 72150 + }, + { + "epoch": 4.619102490237501, + "grad_norm": 6.338312149047852, + "learning_rate": 8.790055097774668e-08, + "loss": 0.117, + "num_input_tokens_seen": 227083520, + "step": 72155 + }, + { + "epoch": 4.619422572178478, + "grad_norm": 5.620743751525879, + "learning_rate": 8.775377572727423e-08, + "loss": 0.1018, + "num_input_tokens_seen": 227099264, + "step": 72160 + }, + { + "epoch": 4.619742654119454, + "grad_norm": 7.42328405380249, + "learning_rate": 8.760712093181356e-08, + "loss": 0.1249, + "num_input_tokens_seen": 227114496, + "step": 72165 + }, + { + "epoch": 4.620062736060431, + "grad_norm": 7.391551494598389, + "learning_rate": 8.746058659868717e-08, + "loss": 0.1567, + "num_input_tokens_seen": 227129536, + "step": 72170 + }, + { + "epoch": 4.620382818001408, + "grad_norm": 5.221895694732666, + "learning_rate": 8.731417273521308e-08, + "loss": 0.1458, + "num_input_tokens_seen": 227144256, + "step": 72175 + }, + { + "epoch": 4.620702899942385, + "grad_norm": 10.360139846801758, + "learning_rate": 8.716787934870158e-08, + "loss": 0.113, + "num_input_tokens_seen": 227159808, + "step": 72180 + }, + { + "epoch": 4.621022981883362, + "grad_norm": 9.451258659362793, + "learning_rate": 8.702170644645819e-08, + "loss": 0.1498, + "num_input_tokens_seen": 227176192, + "step": 72185 + }, + { + "epoch": 4.621343063824339, + "grad_norm": 4.77562141418457, + "learning_rate": 8.687565403578208e-08, + "loss": 0.2111, + "num_input_tokens_seen": 227191872, + "step": 72190 + }, + { + "epoch": 4.621663145765316, + "grad_norm": 7.578736782073975, + "learning_rate": 8.67297221239663e-08, + "loss": 0.1301, + "num_input_tokens_seen": 227208768, + "step": 72195 + }, + { + "epoch": 4.621983227706293, + "grad_norm": 3.908022880554199, + "learning_rate": 8.658391071829808e-08, + "loss": 0.1165, + "num_input_tokens_seen": 227223168, + "step": 72200 + }, + { + "epoch": 4.62230330964727, + "grad_norm": 5.283285617828369, + "learning_rate": 8.643821982605826e-08, + "loss": 0.1173, + "num_input_tokens_seen": 227238784, + "step": 72205 + }, + { + "epoch": 4.622623391588247, + "grad_norm": 6.983174800872803, + "learning_rate": 8.629264945452214e-08, + "loss": 0.1041, + "num_input_tokens_seen": 227255424, + "step": 72210 + }, + { + "epoch": 4.622943473529223, + "grad_norm": 8.933622360229492, + "learning_rate": 8.614719961095836e-08, + "loss": 0.1124, + "num_input_tokens_seen": 227270144, + "step": 72215 + }, + { + "epoch": 4.6232635554702, + "grad_norm": 7.486414909362793, + "learning_rate": 8.600187030262997e-08, + "loss": 0.1159, + "num_input_tokens_seen": 227286656, + "step": 72220 + }, + { + "epoch": 4.623583637411177, + "grad_norm": 6.545816421508789, + "learning_rate": 8.58566615367945e-08, + "loss": 0.0744, + "num_input_tokens_seen": 227302336, + "step": 72225 + }, + { + "epoch": 4.623903719352154, + "grad_norm": 6.900462627410889, + "learning_rate": 8.571157332070228e-08, + "loss": 0.1673, + "num_input_tokens_seen": 227318208, + "step": 72230 + }, + { + "epoch": 4.624223801293131, + "grad_norm": 4.348147869110107, + "learning_rate": 8.556660566159864e-08, + "loss": 0.1104, + "num_input_tokens_seen": 227334016, + "step": 72235 + }, + { + "epoch": 4.624543883234108, + "grad_norm": 7.4940667152404785, + "learning_rate": 8.54217585667222e-08, + "loss": 0.1376, + "num_input_tokens_seen": 227350720, + "step": 72240 + }, + { + "epoch": 4.624863965175085, + "grad_norm": 5.580598831176758, + "learning_rate": 8.527703204330607e-08, + "loss": 0.1202, + "num_input_tokens_seen": 227367040, + "step": 72245 + }, + { + "epoch": 4.625184047116061, + "grad_norm": 6.333499908447266, + "learning_rate": 8.513242609857753e-08, + "loss": 0.1063, + "num_input_tokens_seen": 227382144, + "step": 72250 + }, + { + "epoch": 4.625504129057038, + "grad_norm": 9.246003150939941, + "learning_rate": 8.498794073975636e-08, + "loss": 0.153, + "num_input_tokens_seen": 227397824, + "step": 72255 + }, + { + "epoch": 4.625824210998015, + "grad_norm": 5.523098468780518, + "learning_rate": 8.4843575974059e-08, + "loss": 0.1076, + "num_input_tokens_seen": 227412544, + "step": 72260 + }, + { + "epoch": 4.626144292938992, + "grad_norm": 9.130630493164062, + "learning_rate": 8.46993318086925e-08, + "loss": 0.1476, + "num_input_tokens_seen": 227428864, + "step": 72265 + }, + { + "epoch": 4.626464374879969, + "grad_norm": 4.818211555480957, + "learning_rate": 8.455520825086105e-08, + "loss": 0.1022, + "num_input_tokens_seen": 227444032, + "step": 72270 + }, + { + "epoch": 4.626784456820946, + "grad_norm": 6.870357990264893, + "learning_rate": 8.44112053077606e-08, + "loss": 0.1015, + "num_input_tokens_seen": 227460032, + "step": 72275 + }, + { + "epoch": 4.627104538761923, + "grad_norm": 4.969864845275879, + "learning_rate": 8.426732298658258e-08, + "loss": 0.1472, + "num_input_tokens_seen": 227476672, + "step": 72280 + }, + { + "epoch": 4.6274246207029, + "grad_norm": 9.565366744995117, + "learning_rate": 8.412356129451099e-08, + "loss": 0.1316, + "num_input_tokens_seen": 227491904, + "step": 72285 + }, + { + "epoch": 4.627744702643877, + "grad_norm": 6.719968318939209, + "learning_rate": 8.397992023872508e-08, + "loss": 0.1001, + "num_input_tokens_seen": 227507840, + "step": 72290 + }, + { + "epoch": 4.628064784584854, + "grad_norm": 9.707263946533203, + "learning_rate": 8.383639982639747e-08, + "loss": 0.1326, + "num_input_tokens_seen": 227523520, + "step": 72295 + }, + { + "epoch": 4.62838486652583, + "grad_norm": 6.985896110534668, + "learning_rate": 8.369300006469461e-08, + "loss": 0.1299, + "num_input_tokens_seen": 227539648, + "step": 72300 + }, + { + "epoch": 4.628704948466807, + "grad_norm": 5.564934730529785, + "learning_rate": 8.35497209607769e-08, + "loss": 0.1256, + "num_input_tokens_seen": 227555264, + "step": 72305 + }, + { + "epoch": 4.629025030407784, + "grad_norm": 6.496347904205322, + "learning_rate": 8.340656252179974e-08, + "loss": 0.1384, + "num_input_tokens_seen": 227571648, + "step": 72310 + }, + { + "epoch": 4.629345112348761, + "grad_norm": 8.406511306762695, + "learning_rate": 8.326352475491101e-08, + "loss": 0.1439, + "num_input_tokens_seen": 227586496, + "step": 72315 + }, + { + "epoch": 4.629665194289738, + "grad_norm": 9.050700187683105, + "learning_rate": 8.312060766725388e-08, + "loss": 0.1583, + "num_input_tokens_seen": 227601472, + "step": 72320 + }, + { + "epoch": 4.629985276230715, + "grad_norm": 5.713834285736084, + "learning_rate": 8.297781126596433e-08, + "loss": 0.1593, + "num_input_tokens_seen": 227617536, + "step": 72325 + }, + { + "epoch": 4.630305358171692, + "grad_norm": 3.0524563789367676, + "learning_rate": 8.283513555817274e-08, + "loss": 0.1117, + "num_input_tokens_seen": 227632704, + "step": 72330 + }, + { + "epoch": 4.630625440112669, + "grad_norm": 11.166820526123047, + "learning_rate": 8.269258055100454e-08, + "loss": 0.156, + "num_input_tokens_seen": 227648640, + "step": 72335 + }, + { + "epoch": 4.630945522053645, + "grad_norm": 6.962629795074463, + "learning_rate": 8.255014625157709e-08, + "loss": 0.1008, + "num_input_tokens_seen": 227664960, + "step": 72340 + }, + { + "epoch": 4.631265603994622, + "grad_norm": 9.4150390625, + "learning_rate": 8.240783266700387e-08, + "loss": 0.1675, + "num_input_tokens_seen": 227680576, + "step": 72345 + }, + { + "epoch": 4.631585685935599, + "grad_norm": 3.8682541847229004, + "learning_rate": 8.226563980439e-08, + "loss": 0.0895, + "num_input_tokens_seen": 227697280, + "step": 72350 + }, + { + "epoch": 4.631905767876576, + "grad_norm": 5.03187894821167, + "learning_rate": 8.212356767083706e-08, + "loss": 0.1265, + "num_input_tokens_seen": 227713152, + "step": 72355 + }, + { + "epoch": 4.632225849817553, + "grad_norm": 11.122124671936035, + "learning_rate": 8.198161627343881e-08, + "loss": 0.1322, + "num_input_tokens_seen": 227728576, + "step": 72360 + }, + { + "epoch": 4.63254593175853, + "grad_norm": 5.947368144989014, + "learning_rate": 8.183978561928319e-08, + "loss": 0.1192, + "num_input_tokens_seen": 227744448, + "step": 72365 + }, + { + "epoch": 4.632866013699507, + "grad_norm": 6.401241779327393, + "learning_rate": 8.169807571545369e-08, + "loss": 0.1155, + "num_input_tokens_seen": 227760000, + "step": 72370 + }, + { + "epoch": 4.633186095640484, + "grad_norm": 9.157021522521973, + "learning_rate": 8.15564865690252e-08, + "loss": 0.1282, + "num_input_tokens_seen": 227775488, + "step": 72375 + }, + { + "epoch": 4.633506177581461, + "grad_norm": 4.771386623382568, + "learning_rate": 8.141501818706877e-08, + "loss": 0.1357, + "num_input_tokens_seen": 227790272, + "step": 72380 + }, + { + "epoch": 4.633826259522438, + "grad_norm": 8.024582862854004, + "learning_rate": 8.127367057664842e-08, + "loss": 0.1583, + "num_input_tokens_seen": 227805824, + "step": 72385 + }, + { + "epoch": 4.634146341463414, + "grad_norm": 6.497335433959961, + "learning_rate": 8.113244374482243e-08, + "loss": 0.0813, + "num_input_tokens_seen": 227822912, + "step": 72390 + }, + { + "epoch": 4.634466423404391, + "grad_norm": 8.679835319519043, + "learning_rate": 8.099133769864265e-08, + "loss": 0.1038, + "num_input_tokens_seen": 227837504, + "step": 72395 + }, + { + "epoch": 4.634786505345368, + "grad_norm": 8.737686157226562, + "learning_rate": 8.08503524451551e-08, + "loss": 0.1134, + "num_input_tokens_seen": 227852992, + "step": 72400 + }, + { + "epoch": 4.635106587286345, + "grad_norm": 7.183636665344238, + "learning_rate": 8.070948799140027e-08, + "loss": 0.1408, + "num_input_tokens_seen": 227868480, + "step": 72405 + }, + { + "epoch": 4.635426669227322, + "grad_norm": 7.643718242645264, + "learning_rate": 8.056874434441197e-08, + "loss": 0.1485, + "num_input_tokens_seen": 227882880, + "step": 72410 + }, + { + "epoch": 4.635746751168299, + "grad_norm": 9.301488876342773, + "learning_rate": 8.042812151121793e-08, + "loss": 0.156, + "num_input_tokens_seen": 227899264, + "step": 72415 + }, + { + "epoch": 4.636066833109276, + "grad_norm": 9.032569885253906, + "learning_rate": 8.028761949884084e-08, + "loss": 0.1853, + "num_input_tokens_seen": 227914432, + "step": 72420 + }, + { + "epoch": 4.636386915050253, + "grad_norm": 4.503036022186279, + "learning_rate": 8.014723831429594e-08, + "loss": 0.1024, + "num_input_tokens_seen": 227930432, + "step": 72425 + }, + { + "epoch": 4.636706996991229, + "grad_norm": 14.646204948425293, + "learning_rate": 8.000697796459373e-08, + "loss": 0.2161, + "num_input_tokens_seen": 227946432, + "step": 72430 + }, + { + "epoch": 4.637027078932206, + "grad_norm": 6.146353721618652, + "learning_rate": 7.986683845673748e-08, + "loss": 0.1531, + "num_input_tokens_seen": 227960832, + "step": 72435 + }, + { + "epoch": 4.637347160873183, + "grad_norm": 5.7867021560668945, + "learning_rate": 7.972681979772551e-08, + "loss": 0.1102, + "num_input_tokens_seen": 227976192, + "step": 72440 + }, + { + "epoch": 4.63766724281416, + "grad_norm": 13.072103500366211, + "learning_rate": 7.958692199454915e-08, + "loss": 0.1153, + "num_input_tokens_seen": 227991680, + "step": 72445 + }, + { + "epoch": 4.637987324755137, + "grad_norm": 6.022846698760986, + "learning_rate": 7.944714505419448e-08, + "loss": 0.1571, + "num_input_tokens_seen": 228007232, + "step": 72450 + }, + { + "epoch": 4.638307406696114, + "grad_norm": 8.753857612609863, + "learning_rate": 7.930748898364149e-08, + "loss": 0.2076, + "num_input_tokens_seen": 228022720, + "step": 72455 + }, + { + "epoch": 4.638627488637091, + "grad_norm": 9.831282615661621, + "learning_rate": 7.91679537898632e-08, + "loss": 0.1597, + "num_input_tokens_seen": 228038016, + "step": 72460 + }, + { + "epoch": 4.638947570578068, + "grad_norm": 4.53375244140625, + "learning_rate": 7.902853947982764e-08, + "loss": 0.1065, + "num_input_tokens_seen": 228052864, + "step": 72465 + }, + { + "epoch": 4.639267652519045, + "grad_norm": 7.295158863067627, + "learning_rate": 7.888924606049675e-08, + "loss": 0.1465, + "num_input_tokens_seen": 228068352, + "step": 72470 + }, + { + "epoch": 4.639587734460022, + "grad_norm": 8.90295124053955, + "learning_rate": 7.875007353882552e-08, + "loss": 0.1201, + "num_input_tokens_seen": 228084672, + "step": 72475 + }, + { + "epoch": 4.639907816400998, + "grad_norm": 170.0874481201172, + "learning_rate": 7.861102192176395e-08, + "loss": 0.1088, + "num_input_tokens_seen": 228100864, + "step": 72480 + }, + { + "epoch": 4.640227898341975, + "grad_norm": 6.368956089019775, + "learning_rate": 7.84720912162551e-08, + "loss": 0.1338, + "num_input_tokens_seen": 228115968, + "step": 72485 + }, + { + "epoch": 4.640547980282952, + "grad_norm": 8.945205688476562, + "learning_rate": 7.83332814292373e-08, + "loss": 0.1203, + "num_input_tokens_seen": 228132736, + "step": 72490 + }, + { + "epoch": 4.640868062223929, + "grad_norm": 8.664642333984375, + "learning_rate": 7.819459256764083e-08, + "loss": 0.1164, + "num_input_tokens_seen": 228148800, + "step": 72495 + }, + { + "epoch": 4.641188144164906, + "grad_norm": 2.462362289428711, + "learning_rate": 7.805602463839212e-08, + "loss": 0.1113, + "num_input_tokens_seen": 228163712, + "step": 72500 + }, + { + "epoch": 4.641508226105883, + "grad_norm": 4.248269557952881, + "learning_rate": 7.79175776484098e-08, + "loss": 0.1175, + "num_input_tokens_seen": 228178432, + "step": 72505 + }, + { + "epoch": 4.64182830804686, + "grad_norm": 5.339663028717041, + "learning_rate": 7.777925160460776e-08, + "loss": 0.1329, + "num_input_tokens_seen": 228193856, + "step": 72510 + }, + { + "epoch": 4.642148389987836, + "grad_norm": 7.824469566345215, + "learning_rate": 7.764104651389298e-08, + "loss": 0.1489, + "num_input_tokens_seen": 228208768, + "step": 72515 + }, + { + "epoch": 4.642468471928813, + "grad_norm": 7.330857753753662, + "learning_rate": 7.750296238316663e-08, + "loss": 0.1213, + "num_input_tokens_seen": 228225344, + "step": 72520 + }, + { + "epoch": 4.64278855386979, + "grad_norm": 8.958391189575195, + "learning_rate": 7.7364999219324e-08, + "loss": 0.1263, + "num_input_tokens_seen": 228239872, + "step": 72525 + }, + { + "epoch": 4.643108635810767, + "grad_norm": 5.790865898132324, + "learning_rate": 7.722715702925432e-08, + "loss": 0.1204, + "num_input_tokens_seen": 228255040, + "step": 72530 + }, + { + "epoch": 4.643428717751744, + "grad_norm": 4.1683573722839355, + "learning_rate": 7.708943581984041e-08, + "loss": 0.109, + "num_input_tokens_seen": 228270592, + "step": 72535 + }, + { + "epoch": 4.643748799692721, + "grad_norm": 8.582944869995117, + "learning_rate": 7.695183559795982e-08, + "loss": 0.139, + "num_input_tokens_seen": 228285632, + "step": 72540 + }, + { + "epoch": 4.644068881633698, + "grad_norm": 6.96180534362793, + "learning_rate": 7.681435637048318e-08, + "loss": 0.1182, + "num_input_tokens_seen": 228303552, + "step": 72545 + }, + { + "epoch": 4.644388963574675, + "grad_norm": 15.435144424438477, + "learning_rate": 7.667699814427582e-08, + "loss": 0.127, + "num_input_tokens_seen": 228319936, + "step": 72550 + }, + { + "epoch": 4.644709045515652, + "grad_norm": 5.91757869720459, + "learning_rate": 7.65397609261967e-08, + "loss": 0.1141, + "num_input_tokens_seen": 228336896, + "step": 72555 + }, + { + "epoch": 4.645029127456629, + "grad_norm": 8.183950424194336, + "learning_rate": 7.640264472309811e-08, + "loss": 0.1452, + "num_input_tokens_seen": 228352128, + "step": 72560 + }, + { + "epoch": 4.645349209397605, + "grad_norm": 6.122823715209961, + "learning_rate": 7.626564954182792e-08, + "loss": 0.1243, + "num_input_tokens_seen": 228368448, + "step": 72565 + }, + { + "epoch": 4.645669291338582, + "grad_norm": 4.8033127784729, + "learning_rate": 7.61287753892262e-08, + "loss": 0.1149, + "num_input_tokens_seen": 228384512, + "step": 72570 + }, + { + "epoch": 4.645989373279559, + "grad_norm": 9.117191314697266, + "learning_rate": 7.599202227212805e-08, + "loss": 0.157, + "num_input_tokens_seen": 228398912, + "step": 72575 + }, + { + "epoch": 4.646309455220536, + "grad_norm": 6.251395225524902, + "learning_rate": 7.585539019736188e-08, + "loss": 0.1111, + "num_input_tokens_seen": 228413888, + "step": 72580 + }, + { + "epoch": 4.646629537161513, + "grad_norm": 6.375049114227295, + "learning_rate": 7.571887917175086e-08, + "loss": 0.1115, + "num_input_tokens_seen": 228429056, + "step": 72585 + }, + { + "epoch": 4.64694961910249, + "grad_norm": 4.568967342376709, + "learning_rate": 7.55824892021112e-08, + "loss": 0.1235, + "num_input_tokens_seen": 228444736, + "step": 72590 + }, + { + "epoch": 4.647269701043467, + "grad_norm": 7.638192653656006, + "learning_rate": 7.544622029525356e-08, + "loss": 0.139, + "num_input_tokens_seen": 228459840, + "step": 72595 + }, + { + "epoch": 4.647589782984444, + "grad_norm": 6.709547996520996, + "learning_rate": 7.531007245798305e-08, + "loss": 0.12, + "num_input_tokens_seen": 228475712, + "step": 72600 + }, + { + "epoch": 4.6479098649254205, + "grad_norm": 7.5320844650268555, + "learning_rate": 7.51740456970973e-08, + "loss": 0.1487, + "num_input_tokens_seen": 228490176, + "step": 72605 + }, + { + "epoch": 4.6482299468663975, + "grad_norm": 9.191444396972656, + "learning_rate": 7.503814001938975e-08, + "loss": 0.1478, + "num_input_tokens_seen": 228506176, + "step": 72610 + }, + { + "epoch": 4.6485500288073744, + "grad_norm": 7.75004243850708, + "learning_rate": 7.490235543164609e-08, + "loss": 0.1512, + "num_input_tokens_seen": 228522240, + "step": 72615 + }, + { + "epoch": 4.648870110748351, + "grad_norm": 7.633791446685791, + "learning_rate": 7.4766691940647e-08, + "loss": 0.1258, + "num_input_tokens_seen": 228537792, + "step": 72620 + }, + { + "epoch": 4.649190192689328, + "grad_norm": 9.884175300598145, + "learning_rate": 7.463114955316653e-08, + "loss": 0.179, + "num_input_tokens_seen": 228553600, + "step": 72625 + }, + { + "epoch": 4.649510274630305, + "grad_norm": 6.020257472991943, + "learning_rate": 7.44957282759734e-08, + "loss": 0.112, + "num_input_tokens_seen": 228569728, + "step": 72630 + }, + { + "epoch": 4.649830356571282, + "grad_norm": 11.71239185333252, + "learning_rate": 7.436042811582944e-08, + "loss": 0.2158, + "num_input_tokens_seen": 228585280, + "step": 72635 + }, + { + "epoch": 4.650150438512259, + "grad_norm": 3.7219982147216797, + "learning_rate": 7.422524907949119e-08, + "loss": 0.094, + "num_input_tokens_seen": 228600512, + "step": 72640 + }, + { + "epoch": 4.650470520453236, + "grad_norm": 7.209644317626953, + "learning_rate": 7.409019117370825e-08, + "loss": 0.1175, + "num_input_tokens_seen": 228615744, + "step": 72645 + }, + { + "epoch": 4.650790602394213, + "grad_norm": 7.768423557281494, + "learning_rate": 7.39552544052255e-08, + "loss": 0.1566, + "num_input_tokens_seen": 228632960, + "step": 72650 + }, + { + "epoch": 4.6511106843351895, + "grad_norm": 8.679803848266602, + "learning_rate": 7.382043878078033e-08, + "loss": 0.1128, + "num_input_tokens_seen": 228648512, + "step": 72655 + }, + { + "epoch": 4.6514307662761665, + "grad_norm": 4.997073650360107, + "learning_rate": 7.368574430710484e-08, + "loss": 0.1539, + "num_input_tokens_seen": 228664064, + "step": 72660 + }, + { + "epoch": 4.6517508482171435, + "grad_norm": 8.633726119995117, + "learning_rate": 7.355117099092535e-08, + "loss": 0.1434, + "num_input_tokens_seen": 228679552, + "step": 72665 + }, + { + "epoch": 4.6520709301581205, + "grad_norm": 36.221168518066406, + "learning_rate": 7.341671883896117e-08, + "loss": 0.1441, + "num_input_tokens_seen": 228696576, + "step": 72670 + }, + { + "epoch": 4.6523910120990974, + "grad_norm": 4.456834316253662, + "learning_rate": 7.328238785792669e-08, + "loss": 0.0874, + "num_input_tokens_seen": 228711872, + "step": 72675 + }, + { + "epoch": 4.652711094040074, + "grad_norm": 6.4044671058654785, + "learning_rate": 7.3148178054529e-08, + "loss": 0.1149, + "num_input_tokens_seen": 228726656, + "step": 72680 + }, + { + "epoch": 4.653031175981051, + "grad_norm": 7.406925678253174, + "learning_rate": 7.301408943547111e-08, + "loss": 0.1334, + "num_input_tokens_seen": 228741696, + "step": 72685 + }, + { + "epoch": 4.653351257922028, + "grad_norm": 7.160458087921143, + "learning_rate": 7.28801220074471e-08, + "loss": 0.1242, + "num_input_tokens_seen": 228757440, + "step": 72690 + }, + { + "epoch": 4.6536713398630045, + "grad_norm": 4.481289386749268, + "learning_rate": 7.274627577714771e-08, + "loss": 0.1562, + "num_input_tokens_seen": 228774080, + "step": 72695 + }, + { + "epoch": 4.6539914218039815, + "grad_norm": 6.045177936553955, + "learning_rate": 7.261255075125623e-08, + "loss": 0.1858, + "num_input_tokens_seen": 228788672, + "step": 72700 + }, + { + "epoch": 4.6543115037449585, + "grad_norm": 51.510711669921875, + "learning_rate": 7.247894693645007e-08, + "loss": 0.1244, + "num_input_tokens_seen": 228804160, + "step": 72705 + }, + { + "epoch": 4.6546315856859355, + "grad_norm": 8.531805992126465, + "learning_rate": 7.234546433940087e-08, + "loss": 0.1227, + "num_input_tokens_seen": 228819968, + "step": 72710 + }, + { + "epoch": 4.6549516676269125, + "grad_norm": 11.449283599853516, + "learning_rate": 7.221210296677384e-08, + "loss": 0.1291, + "num_input_tokens_seen": 228835584, + "step": 72715 + }, + { + "epoch": 4.6552717495678895, + "grad_norm": 4.859318256378174, + "learning_rate": 7.207886282522891e-08, + "loss": 0.1333, + "num_input_tokens_seen": 228851392, + "step": 72720 + }, + { + "epoch": 4.6555918315088665, + "grad_norm": 6.537769794464111, + "learning_rate": 7.194574392141829e-08, + "loss": 0.0877, + "num_input_tokens_seen": 228867712, + "step": 72725 + }, + { + "epoch": 4.6559119134498435, + "grad_norm": 5.407824516296387, + "learning_rate": 7.181274626199053e-08, + "loss": 0.0871, + "num_input_tokens_seen": 228882880, + "step": 72730 + }, + { + "epoch": 4.65623199539082, + "grad_norm": 9.809453964233398, + "learning_rate": 7.167986985358616e-08, + "loss": 0.1208, + "num_input_tokens_seen": 228899456, + "step": 72735 + }, + { + "epoch": 4.656552077331797, + "grad_norm": 8.260040283203125, + "learning_rate": 7.154711470284043e-08, + "loss": 0.1594, + "num_input_tokens_seen": 228915328, + "step": 72740 + }, + { + "epoch": 4.6568721592727735, + "grad_norm": 6.069849967956543, + "learning_rate": 7.141448081638219e-08, + "loss": 0.1059, + "num_input_tokens_seen": 228931328, + "step": 72745 + }, + { + "epoch": 4.6571922412137505, + "grad_norm": 8.256842613220215, + "learning_rate": 7.128196820083505e-08, + "loss": 0.1116, + "num_input_tokens_seen": 228948928, + "step": 72750 + }, + { + "epoch": 4.6575123231547275, + "grad_norm": 8.512104034423828, + "learning_rate": 7.114957686281538e-08, + "loss": 0.204, + "num_input_tokens_seen": 228964928, + "step": 72755 + }, + { + "epoch": 4.6578324050957045, + "grad_norm": 4.908413887023926, + "learning_rate": 7.101730680893481e-08, + "loss": 0.1163, + "num_input_tokens_seen": 228982080, + "step": 72760 + }, + { + "epoch": 4.6581524870366815, + "grad_norm": 7.831545829772949, + "learning_rate": 7.088515804579754e-08, + "loss": 0.1596, + "num_input_tokens_seen": 228998528, + "step": 72765 + }, + { + "epoch": 4.6584725689776585, + "grad_norm": 4.89417839050293, + "learning_rate": 7.075313058000272e-08, + "loss": 0.1141, + "num_input_tokens_seen": 229014272, + "step": 72770 + }, + { + "epoch": 4.6587926509186355, + "grad_norm": 9.026582717895508, + "learning_rate": 7.062122441814312e-08, + "loss": 0.133, + "num_input_tokens_seen": 229031168, + "step": 72775 + }, + { + "epoch": 4.659112732859612, + "grad_norm": 8.925113677978516, + "learning_rate": 7.048943956680543e-08, + "loss": 0.1359, + "num_input_tokens_seen": 229046976, + "step": 72780 + }, + { + "epoch": 4.659432814800589, + "grad_norm": 8.346519470214844, + "learning_rate": 7.035777603257021e-08, + "loss": 0.1714, + "num_input_tokens_seen": 229062912, + "step": 72785 + }, + { + "epoch": 4.659752896741566, + "grad_norm": 5.319995403289795, + "learning_rate": 7.022623382201193e-08, + "loss": 0.1059, + "num_input_tokens_seen": 229078464, + "step": 72790 + }, + { + "epoch": 4.6600729786825426, + "grad_norm": 8.619247436523438, + "learning_rate": 7.009481294169978e-08, + "loss": 0.1921, + "num_input_tokens_seen": 229093696, + "step": 72795 + }, + { + "epoch": 4.6603930606235195, + "grad_norm": 11.156533241271973, + "learning_rate": 6.996351339819518e-08, + "loss": 0.1094, + "num_input_tokens_seen": 229111296, + "step": 72800 + }, + { + "epoch": 4.6607131425644965, + "grad_norm": 13.988569259643555, + "learning_rate": 6.983233519805593e-08, + "loss": 0.1231, + "num_input_tokens_seen": 229126720, + "step": 72805 + }, + { + "epoch": 4.6610332245054735, + "grad_norm": 6.134225368499756, + "learning_rate": 6.970127834783069e-08, + "loss": 0.1054, + "num_input_tokens_seen": 229144064, + "step": 72810 + }, + { + "epoch": 4.6613533064464505, + "grad_norm": 7.773244380950928, + "learning_rate": 6.957034285406506e-08, + "loss": 0.1294, + "num_input_tokens_seen": 229159360, + "step": 72815 + }, + { + "epoch": 4.6616733883874275, + "grad_norm": 9.182275772094727, + "learning_rate": 6.943952872329684e-08, + "loss": 0.1443, + "num_input_tokens_seen": 229176000, + "step": 72820 + }, + { + "epoch": 4.6619934703284045, + "grad_norm": 9.403925895690918, + "learning_rate": 6.930883596205834e-08, + "loss": 0.1475, + "num_input_tokens_seen": 229191552, + "step": 72825 + }, + { + "epoch": 4.662313552269381, + "grad_norm": 6.038296222686768, + "learning_rate": 6.917826457687543e-08, + "loss": 0.1755, + "num_input_tokens_seen": 229207296, + "step": 72830 + }, + { + "epoch": 4.662633634210358, + "grad_norm": 10.0132417678833, + "learning_rate": 6.904781457426818e-08, + "loss": 0.1581, + "num_input_tokens_seen": 229222656, + "step": 72835 + }, + { + "epoch": 4.662953716151335, + "grad_norm": 15.565777778625488, + "learning_rate": 6.891748596075082e-08, + "loss": 0.1397, + "num_input_tokens_seen": 229239744, + "step": 72840 + }, + { + "epoch": 4.663273798092312, + "grad_norm": 7.394548416137695, + "learning_rate": 6.87872787428312e-08, + "loss": 0.1136, + "num_input_tokens_seen": 229254528, + "step": 72845 + }, + { + "epoch": 4.663593880033289, + "grad_norm": 8.415958404541016, + "learning_rate": 6.865719292701107e-08, + "loss": 0.1586, + "num_input_tokens_seen": 229269184, + "step": 72850 + }, + { + "epoch": 4.6639139619742656, + "grad_norm": 9.724515914916992, + "learning_rate": 6.852722851978661e-08, + "loss": 0.1257, + "num_input_tokens_seen": 229283776, + "step": 72855 + }, + { + "epoch": 4.6642340439152425, + "grad_norm": 6.06201696395874, + "learning_rate": 6.839738552764707e-08, + "loss": 0.1441, + "num_input_tokens_seen": 229298240, + "step": 72860 + }, + { + "epoch": 4.6645541258562195, + "grad_norm": 6.35894250869751, + "learning_rate": 6.826766395707618e-08, + "loss": 0.0929, + "num_input_tokens_seen": 229315584, + "step": 72865 + }, + { + "epoch": 4.664874207797196, + "grad_norm": 3.536227226257324, + "learning_rate": 6.813806381455207e-08, + "loss": 0.1165, + "num_input_tokens_seen": 229332480, + "step": 72870 + }, + { + "epoch": 4.665194289738173, + "grad_norm": 9.616220474243164, + "learning_rate": 6.80085851065454e-08, + "loss": 0.11, + "num_input_tokens_seen": 229348288, + "step": 72875 + }, + { + "epoch": 4.66551437167915, + "grad_norm": 7.800935745239258, + "learning_rate": 6.787922783952294e-08, + "loss": 0.1362, + "num_input_tokens_seen": 229365248, + "step": 72880 + }, + { + "epoch": 4.665834453620127, + "grad_norm": 2.715083360671997, + "learning_rate": 6.774999201994259e-08, + "loss": 0.129, + "num_input_tokens_seen": 229381760, + "step": 72885 + }, + { + "epoch": 4.666154535561104, + "grad_norm": 10.78232192993164, + "learning_rate": 6.762087765425918e-08, + "loss": 0.1462, + "num_input_tokens_seen": 229396160, + "step": 72890 + }, + { + "epoch": 4.666474617502081, + "grad_norm": 7.272708892822266, + "learning_rate": 6.749188474891893e-08, + "loss": 0.0713, + "num_input_tokens_seen": 229411712, + "step": 72895 + }, + { + "epoch": 4.666794699443058, + "grad_norm": 6.056488037109375, + "learning_rate": 6.736301331036366e-08, + "loss": 0.1271, + "num_input_tokens_seen": 229427968, + "step": 72900 + }, + { + "epoch": 4.667114781384035, + "grad_norm": 4.366558074951172, + "learning_rate": 6.723426334502847e-08, + "loss": 0.0927, + "num_input_tokens_seen": 229443200, + "step": 72905 + }, + { + "epoch": 4.667434863325012, + "grad_norm": 8.933533668518066, + "learning_rate": 6.710563485934185e-08, + "loss": 0.1143, + "num_input_tokens_seen": 229459200, + "step": 72910 + }, + { + "epoch": 4.6677549452659886, + "grad_norm": 8.396504402160645, + "learning_rate": 6.69771278597281e-08, + "loss": 0.1747, + "num_input_tokens_seen": 229475712, + "step": 72915 + }, + { + "epoch": 4.668075027206965, + "grad_norm": 4.247453689575195, + "learning_rate": 6.684874235260291e-08, + "loss": 0.1402, + "num_input_tokens_seen": 229491520, + "step": 72920 + }, + { + "epoch": 4.668395109147942, + "grad_norm": 7.012718200683594, + "learning_rate": 6.672047834437783e-08, + "loss": 0.17, + "num_input_tokens_seen": 229507392, + "step": 72925 + }, + { + "epoch": 4.668715191088919, + "grad_norm": 7.612191200256348, + "learning_rate": 6.659233584145746e-08, + "loss": 0.1274, + "num_input_tokens_seen": 229524352, + "step": 72930 + }, + { + "epoch": 4.669035273029896, + "grad_norm": 7.307647705078125, + "learning_rate": 6.646431485024085e-08, + "loss": 0.1653, + "num_input_tokens_seen": 229539968, + "step": 72935 + }, + { + "epoch": 4.669355354970873, + "grad_norm": 7.839741230010986, + "learning_rate": 6.633641537712066e-08, + "loss": 0.1243, + "num_input_tokens_seen": 229554944, + "step": 72940 + }, + { + "epoch": 4.66967543691185, + "grad_norm": 5.936976909637451, + "learning_rate": 6.620863742848288e-08, + "loss": 0.1262, + "num_input_tokens_seen": 229571200, + "step": 72945 + }, + { + "epoch": 4.669995518852827, + "grad_norm": 6.660998344421387, + "learning_rate": 6.608098101070908e-08, + "loss": 0.1765, + "num_input_tokens_seen": 229586880, + "step": 72950 + }, + { + "epoch": 4.670315600793804, + "grad_norm": 9.241201400756836, + "learning_rate": 6.595344613017307e-08, + "loss": 0.1208, + "num_input_tokens_seen": 229601792, + "step": 72955 + }, + { + "epoch": 4.67063568273478, + "grad_norm": 10.76977252960205, + "learning_rate": 6.582603279324362e-08, + "loss": 0.1383, + "num_input_tokens_seen": 229617024, + "step": 72960 + }, + { + "epoch": 4.670955764675757, + "grad_norm": 9.083772659301758, + "learning_rate": 6.569874100628288e-08, + "loss": 0.1161, + "num_input_tokens_seen": 229633728, + "step": 72965 + }, + { + "epoch": 4.671275846616734, + "grad_norm": 10.039247512817383, + "learning_rate": 6.557157077564713e-08, + "loss": 0.1409, + "num_input_tokens_seen": 229648960, + "step": 72970 + }, + { + "epoch": 4.671595928557711, + "grad_norm": 6.883363723754883, + "learning_rate": 6.54445221076866e-08, + "loss": 0.1149, + "num_input_tokens_seen": 229664704, + "step": 72975 + }, + { + "epoch": 4.671916010498688, + "grad_norm": 5.716353893280029, + "learning_rate": 6.531759500874563e-08, + "loss": 0.1062, + "num_input_tokens_seen": 229679232, + "step": 72980 + }, + { + "epoch": 4.672236092439665, + "grad_norm": 6.284977912902832, + "learning_rate": 6.519078948516194e-08, + "loss": 0.1484, + "num_input_tokens_seen": 229696128, + "step": 72985 + }, + { + "epoch": 4.672556174380642, + "grad_norm": 9.305854797363281, + "learning_rate": 6.506410554326825e-08, + "loss": 0.1004, + "num_input_tokens_seen": 229710656, + "step": 72990 + }, + { + "epoch": 4.672876256321619, + "grad_norm": 9.547969818115234, + "learning_rate": 6.493754318938949e-08, + "loss": 0.1867, + "num_input_tokens_seen": 229726592, + "step": 72995 + }, + { + "epoch": 4.673196338262596, + "grad_norm": 6.080901622772217, + "learning_rate": 6.481110242984645e-08, + "loss": 0.1095, + "num_input_tokens_seen": 229742336, + "step": 73000 + }, + { + "epoch": 4.673516420203573, + "grad_norm": 7.407078742980957, + "learning_rate": 6.468478327095212e-08, + "loss": 0.0999, + "num_input_tokens_seen": 229757312, + "step": 73005 + }, + { + "epoch": 4.673836502144549, + "grad_norm": 8.685391426086426, + "learning_rate": 6.455858571901508e-08, + "loss": 0.1799, + "num_input_tokens_seen": 229771520, + "step": 73010 + }, + { + "epoch": 4.674156584085526, + "grad_norm": 5.649151802062988, + "learning_rate": 6.44325097803361e-08, + "loss": 0.0701, + "num_input_tokens_seen": 229787648, + "step": 73015 + }, + { + "epoch": 4.674476666026503, + "grad_norm": 3.551208734512329, + "learning_rate": 6.430655546121129e-08, + "loss": 0.0831, + "num_input_tokens_seen": 229803072, + "step": 73020 + }, + { + "epoch": 4.67479674796748, + "grad_norm": 6.720343112945557, + "learning_rate": 6.418072276793031e-08, + "loss": 0.1144, + "num_input_tokens_seen": 229818880, + "step": 73025 + }, + { + "epoch": 4.675116829908457, + "grad_norm": 5.80397367477417, + "learning_rate": 6.405501170677591e-08, + "loss": 0.1126, + "num_input_tokens_seen": 229834944, + "step": 73030 + }, + { + "epoch": 4.675436911849434, + "grad_norm": 6.177711486816406, + "learning_rate": 6.392942228402616e-08, + "loss": 0.1023, + "num_input_tokens_seen": 229850624, + "step": 73035 + }, + { + "epoch": 4.675756993790411, + "grad_norm": 6.514820575714111, + "learning_rate": 6.380395450595184e-08, + "loss": 0.1349, + "num_input_tokens_seen": 229865920, + "step": 73040 + }, + { + "epoch": 4.676077075731387, + "grad_norm": 5.075316905975342, + "learning_rate": 6.367860837881851e-08, + "loss": 0.109, + "num_input_tokens_seen": 229880320, + "step": 73045 + }, + { + "epoch": 4.676397157672364, + "grad_norm": 4.933959484100342, + "learning_rate": 6.355338390888505e-08, + "loss": 0.149, + "num_input_tokens_seen": 229896640, + "step": 73050 + }, + { + "epoch": 4.676717239613341, + "grad_norm": 8.377143859863281, + "learning_rate": 6.342828110240451e-08, + "loss": 0.1463, + "num_input_tokens_seen": 229911936, + "step": 73055 + }, + { + "epoch": 4.677037321554318, + "grad_norm": 11.023083686828613, + "learning_rate": 6.330329996562412e-08, + "loss": 0.1401, + "num_input_tokens_seen": 229929088, + "step": 73060 + }, + { + "epoch": 4.677357403495295, + "grad_norm": 16.0779972076416, + "learning_rate": 6.317844050478445e-08, + "loss": 0.1218, + "num_input_tokens_seen": 229944512, + "step": 73065 + }, + { + "epoch": 4.677677485436272, + "grad_norm": 6.464205265045166, + "learning_rate": 6.305370272612078e-08, + "loss": 0.1105, + "num_input_tokens_seen": 229960064, + "step": 73070 + }, + { + "epoch": 4.677997567377249, + "grad_norm": 6.542064189910889, + "learning_rate": 6.292908663586144e-08, + "loss": 0.2026, + "num_input_tokens_seen": 229975232, + "step": 73075 + }, + { + "epoch": 4.678317649318226, + "grad_norm": 12.433767318725586, + "learning_rate": 6.280459224022928e-08, + "loss": 0.1484, + "num_input_tokens_seen": 229992256, + "step": 73080 + }, + { + "epoch": 4.678637731259203, + "grad_norm": 6.2869648933410645, + "learning_rate": 6.268021954544095e-08, + "loss": 0.1312, + "num_input_tokens_seen": 230006976, + "step": 73085 + }, + { + "epoch": 4.67895781320018, + "grad_norm": 7.632470607757568, + "learning_rate": 6.255596855770707e-08, + "loss": 0.136, + "num_input_tokens_seen": 230021568, + "step": 73090 + }, + { + "epoch": 4.679277895141156, + "grad_norm": 5.961106300354004, + "learning_rate": 6.243183928323181e-08, + "loss": 0.1165, + "num_input_tokens_seen": 230037632, + "step": 73095 + }, + { + "epoch": 4.679597977082133, + "grad_norm": 10.476335525512695, + "learning_rate": 6.230783172821359e-08, + "loss": 0.1218, + "num_input_tokens_seen": 230053184, + "step": 73100 + }, + { + "epoch": 4.67991805902311, + "grad_norm": 4.4454216957092285, + "learning_rate": 6.218394589884464e-08, + "loss": 0.1192, + "num_input_tokens_seen": 230068416, + "step": 73105 + }, + { + "epoch": 4.680238140964087, + "grad_norm": 8.861747741699219, + "learning_rate": 6.206018180131168e-08, + "loss": 0.1385, + "num_input_tokens_seen": 230084672, + "step": 73110 + }, + { + "epoch": 4.680558222905064, + "grad_norm": 5.68947172164917, + "learning_rate": 6.193653944179423e-08, + "loss": 0.0944, + "num_input_tokens_seen": 230101376, + "step": 73115 + }, + { + "epoch": 4.680878304846041, + "grad_norm": 7.941765308380127, + "learning_rate": 6.181301882646679e-08, + "loss": 0.1419, + "num_input_tokens_seen": 230116864, + "step": 73120 + }, + { + "epoch": 4.681198386787018, + "grad_norm": 6.944609642028809, + "learning_rate": 6.168961996149692e-08, + "loss": 0.1384, + "num_input_tokens_seen": 230133248, + "step": 73125 + }, + { + "epoch": 4.681518468727995, + "grad_norm": 8.674835205078125, + "learning_rate": 6.156634285304691e-08, + "loss": 0.1422, + "num_input_tokens_seen": 230148800, + "step": 73130 + }, + { + "epoch": 4.681838550668971, + "grad_norm": 5.483536243438721, + "learning_rate": 6.144318750727213e-08, + "loss": 0.1046, + "num_input_tokens_seen": 230164032, + "step": 73135 + }, + { + "epoch": 4.682158632609948, + "grad_norm": 12.054047584533691, + "learning_rate": 6.132015393032264e-08, + "loss": 0.1448, + "num_input_tokens_seen": 230180288, + "step": 73140 + }, + { + "epoch": 4.682478714550925, + "grad_norm": 7.596570014953613, + "learning_rate": 6.119724212834244e-08, + "loss": 0.1762, + "num_input_tokens_seen": 230195904, + "step": 73145 + }, + { + "epoch": 4.682798796491902, + "grad_norm": 6.761381149291992, + "learning_rate": 6.107445210746826e-08, + "loss": 0.1421, + "num_input_tokens_seen": 230211264, + "step": 73150 + }, + { + "epoch": 4.683118878432879, + "grad_norm": 5.988059997558594, + "learning_rate": 6.095178387383217e-08, + "loss": 0.121, + "num_input_tokens_seen": 230226816, + "step": 73155 + }, + { + "epoch": 4.683438960373856, + "grad_norm": 7.485236644744873, + "learning_rate": 6.082923743355951e-08, + "loss": 0.1636, + "num_input_tokens_seen": 230242816, + "step": 73160 + }, + { + "epoch": 4.683759042314833, + "grad_norm": 5.728535175323486, + "learning_rate": 6.070681279276958e-08, + "loss": 0.1264, + "num_input_tokens_seen": 230257792, + "step": 73165 + }, + { + "epoch": 4.68407912425581, + "grad_norm": 6.890857696533203, + "learning_rate": 6.05845099575758e-08, + "loss": 0.1512, + "num_input_tokens_seen": 230272448, + "step": 73170 + }, + { + "epoch": 4.684399206196787, + "grad_norm": 7.813992977142334, + "learning_rate": 6.046232893408499e-08, + "loss": 0.187, + "num_input_tokens_seen": 230287936, + "step": 73175 + }, + { + "epoch": 4.684719288137764, + "grad_norm": 7.6976141929626465, + "learning_rate": 6.034026972839807e-08, + "loss": 0.1061, + "num_input_tokens_seen": 230303424, + "step": 73180 + }, + { + "epoch": 4.68503937007874, + "grad_norm": 3.3818955421447754, + "learning_rate": 6.021833234661045e-08, + "loss": 0.0877, + "num_input_tokens_seen": 230319552, + "step": 73185 + }, + { + "epoch": 4.685359452019717, + "grad_norm": 7.909183979034424, + "learning_rate": 6.009651679481116e-08, + "loss": 0.1239, + "num_input_tokens_seen": 230334528, + "step": 73190 + }, + { + "epoch": 4.685679533960694, + "grad_norm": 1.5943865776062012, + "learning_rate": 5.997482307908281e-08, + "loss": 0.1369, + "num_input_tokens_seen": 230349568, + "step": 73195 + }, + { + "epoch": 4.685999615901671, + "grad_norm": 7.652099609375, + "learning_rate": 5.985325120550223e-08, + "loss": 0.1013, + "num_input_tokens_seen": 230365696, + "step": 73200 + }, + { + "epoch": 4.686319697842648, + "grad_norm": 8.432891845703125, + "learning_rate": 5.973180118013982e-08, + "loss": 0.1414, + "num_input_tokens_seen": 230382016, + "step": 73205 + }, + { + "epoch": 4.686639779783625, + "grad_norm": 5.593147277832031, + "learning_rate": 5.961047300906047e-08, + "loss": 0.1835, + "num_input_tokens_seen": 230397696, + "step": 73210 + }, + { + "epoch": 4.686959861724602, + "grad_norm": 7.44268274307251, + "learning_rate": 5.9489266698322387e-08, + "loss": 0.1048, + "num_input_tokens_seen": 230412864, + "step": 73215 + }, + { + "epoch": 4.687279943665579, + "grad_norm": 4.267625331878662, + "learning_rate": 5.936818225397878e-08, + "loss": 0.0997, + "num_input_tokens_seen": 230427968, + "step": 73220 + }, + { + "epoch": 4.687600025606555, + "grad_norm": 7.323783874511719, + "learning_rate": 5.924721968207453e-08, + "loss": 0.1102, + "num_input_tokens_seen": 230443776, + "step": 73225 + }, + { + "epoch": 4.687920107547532, + "grad_norm": 7.155543327331543, + "learning_rate": 5.912637898865148e-08, + "loss": 0.1537, + "num_input_tokens_seen": 230458880, + "step": 73230 + }, + { + "epoch": 4.688240189488509, + "grad_norm": 6.437474727630615, + "learning_rate": 5.900566017974257e-08, + "loss": 0.1252, + "num_input_tokens_seen": 230473920, + "step": 73235 + }, + { + "epoch": 4.688560271429486, + "grad_norm": 5.286518096923828, + "learning_rate": 5.888506326137633e-08, + "loss": 0.153, + "num_input_tokens_seen": 230489792, + "step": 73240 + }, + { + "epoch": 4.688880353370463, + "grad_norm": 6.878422737121582, + "learning_rate": 5.876458823957487e-08, + "loss": 0.1257, + "num_input_tokens_seen": 230504832, + "step": 73245 + }, + { + "epoch": 4.68920043531144, + "grad_norm": 6.758843421936035, + "learning_rate": 5.8644235120353665e-08, + "loss": 0.1214, + "num_input_tokens_seen": 230520192, + "step": 73250 + }, + { + "epoch": 4.689520517252417, + "grad_norm": 3.3181400299072266, + "learning_rate": 5.852400390972318e-08, + "loss": 0.1276, + "num_input_tokens_seen": 230535680, + "step": 73255 + }, + { + "epoch": 4.689840599193394, + "grad_norm": 6.465785503387451, + "learning_rate": 5.84038946136864e-08, + "loss": 0.1105, + "num_input_tokens_seen": 230550528, + "step": 73260 + }, + { + "epoch": 4.690160681134371, + "grad_norm": 5.6044535636901855, + "learning_rate": 5.828390723824184e-08, + "loss": 0.1535, + "num_input_tokens_seen": 230565440, + "step": 73265 + }, + { + "epoch": 4.690480763075348, + "grad_norm": 6.973963737487793, + "learning_rate": 5.816404178938001e-08, + "loss": 0.1366, + "num_input_tokens_seen": 230582208, + "step": 73270 + }, + { + "epoch": 4.690800845016324, + "grad_norm": 7.084126949310303, + "learning_rate": 5.804429827308694e-08, + "loss": 0.1885, + "num_input_tokens_seen": 230597696, + "step": 73275 + }, + { + "epoch": 4.691120926957301, + "grad_norm": 3.466931104660034, + "learning_rate": 5.792467669534202e-08, + "loss": 0.0957, + "num_input_tokens_seen": 230613888, + "step": 73280 + }, + { + "epoch": 4.691441008898278, + "grad_norm": 8.551719665527344, + "learning_rate": 5.7805177062118525e-08, + "loss": 0.2248, + "num_input_tokens_seen": 230629632, + "step": 73285 + }, + { + "epoch": 4.691761090839255, + "grad_norm": 10.709152221679688, + "learning_rate": 5.768579937938335e-08, + "loss": 0.163, + "num_input_tokens_seen": 230645440, + "step": 73290 + }, + { + "epoch": 4.692081172780232, + "grad_norm": 5.927586555480957, + "learning_rate": 5.756654365309783e-08, + "loss": 0.1181, + "num_input_tokens_seen": 230660544, + "step": 73295 + }, + { + "epoch": 4.692401254721209, + "grad_norm": 6.285358428955078, + "learning_rate": 5.744740988921721e-08, + "loss": 0.16, + "num_input_tokens_seen": 230677312, + "step": 73300 + }, + { + "epoch": 4.692721336662186, + "grad_norm": 6.358704566955566, + "learning_rate": 5.732839809368979e-08, + "loss": 0.1196, + "num_input_tokens_seen": 230693312, + "step": 73305 + }, + { + "epoch": 4.693041418603162, + "grad_norm": 3.4999442100524902, + "learning_rate": 5.7209508272459135e-08, + "loss": 0.157, + "num_input_tokens_seen": 230709632, + "step": 73310 + }, + { + "epoch": 4.693361500544139, + "grad_norm": 7.059776306152344, + "learning_rate": 5.7090740431461346e-08, + "loss": 0.15, + "num_input_tokens_seen": 230725632, + "step": 73315 + }, + { + "epoch": 4.693681582485116, + "grad_norm": 6.3608784675598145, + "learning_rate": 5.69720945766275e-08, + "loss": 0.1249, + "num_input_tokens_seen": 230741632, + "step": 73320 + }, + { + "epoch": 4.694001664426093, + "grad_norm": 6.357320308685303, + "learning_rate": 5.685357071388203e-08, + "loss": 0.1326, + "num_input_tokens_seen": 230757184, + "step": 73325 + }, + { + "epoch": 4.69432174636707, + "grad_norm": 9.379758834838867, + "learning_rate": 5.673516884914326e-08, + "loss": 0.1176, + "num_input_tokens_seen": 230771520, + "step": 73330 + }, + { + "epoch": 4.694641828308047, + "grad_norm": 5.756266117095947, + "learning_rate": 5.6616888988323404e-08, + "loss": 0.142, + "num_input_tokens_seen": 230787136, + "step": 73335 + }, + { + "epoch": 4.694961910249024, + "grad_norm": 44.01459884643555, + "learning_rate": 5.64987311373294e-08, + "loss": 0.2031, + "num_input_tokens_seen": 230805184, + "step": 73340 + }, + { + "epoch": 4.695281992190001, + "grad_norm": 5.501564025878906, + "learning_rate": 5.6380695302060696e-08, + "loss": 0.0862, + "num_input_tokens_seen": 230820288, + "step": 73345 + }, + { + "epoch": 4.695602074130978, + "grad_norm": 5.9130072593688965, + "learning_rate": 5.626278148841174e-08, + "loss": 0.1266, + "num_input_tokens_seen": 230835456, + "step": 73350 + }, + { + "epoch": 4.695922156071955, + "grad_norm": 5.73062801361084, + "learning_rate": 5.614498970227061e-08, + "loss": 0.1365, + "num_input_tokens_seen": 230852480, + "step": 73355 + }, + { + "epoch": 4.696242238012931, + "grad_norm": 4.40322208404541, + "learning_rate": 5.6027319949519264e-08, + "loss": 0.1164, + "num_input_tokens_seen": 230868992, + "step": 73360 + }, + { + "epoch": 4.696562319953908, + "grad_norm": 3.93477725982666, + "learning_rate": 5.590977223603328e-08, + "loss": 0.1405, + "num_input_tokens_seen": 230885760, + "step": 73365 + }, + { + "epoch": 4.696882401894885, + "grad_norm": 6.6357879638671875, + "learning_rate": 5.579234656768212e-08, + "loss": 0.0959, + "num_input_tokens_seen": 230903232, + "step": 73370 + }, + { + "epoch": 4.697202483835862, + "grad_norm": 4.607434272766113, + "learning_rate": 5.567504295033027e-08, + "loss": 0.106, + "num_input_tokens_seen": 230918272, + "step": 73375 + }, + { + "epoch": 4.697522565776839, + "grad_norm": 6.697998046875, + "learning_rate": 5.555786138983443e-08, + "loss": 0.1122, + "num_input_tokens_seen": 230934272, + "step": 73380 + }, + { + "epoch": 4.697842647717816, + "grad_norm": 6.772831916809082, + "learning_rate": 5.5440801892046594e-08, + "loss": 0.1117, + "num_input_tokens_seen": 230950144, + "step": 73385 + }, + { + "epoch": 4.698162729658793, + "grad_norm": 6.8201823234558105, + "learning_rate": 5.53238644628118e-08, + "loss": 0.1378, + "num_input_tokens_seen": 230965184, + "step": 73390 + }, + { + "epoch": 4.69848281159977, + "grad_norm": 8.88969612121582, + "learning_rate": 5.5207049107969257e-08, + "loss": 0.1683, + "num_input_tokens_seen": 230981312, + "step": 73395 + }, + { + "epoch": 4.698802893540746, + "grad_norm": 3.8962695598602295, + "learning_rate": 5.509035583335237e-08, + "loss": 0.1005, + "num_input_tokens_seen": 230996288, + "step": 73400 + }, + { + "epoch": 4.699122975481723, + "grad_norm": 6.58479118347168, + "learning_rate": 5.497378464478787e-08, + "loss": 0.1176, + "num_input_tokens_seen": 231011520, + "step": 73405 + }, + { + "epoch": 4.6994430574227, + "grad_norm": 4.842419147491455, + "learning_rate": 5.485733554809719e-08, + "loss": 0.1081, + "num_input_tokens_seen": 231026880, + "step": 73410 + }, + { + "epoch": 4.699763139363677, + "grad_norm": 6.7862043380737305, + "learning_rate": 5.474100854909431e-08, + "loss": 0.147, + "num_input_tokens_seen": 231043072, + "step": 73415 + }, + { + "epoch": 4.700083221304654, + "grad_norm": 10.326733589172363, + "learning_rate": 5.462480365358902e-08, + "loss": 0.1295, + "num_input_tokens_seen": 231058048, + "step": 73420 + }, + { + "epoch": 4.700403303245631, + "grad_norm": 7.8787994384765625, + "learning_rate": 5.450872086738335e-08, + "loss": 0.1301, + "num_input_tokens_seen": 231073344, + "step": 73425 + }, + { + "epoch": 4.700723385186608, + "grad_norm": 6.09564733505249, + "learning_rate": 5.439276019627432e-08, + "loss": 0.1716, + "num_input_tokens_seen": 231088704, + "step": 73430 + }, + { + "epoch": 4.701043467127585, + "grad_norm": 4.477666854858398, + "learning_rate": 5.427692164605203e-08, + "loss": 0.1269, + "num_input_tokens_seen": 231103744, + "step": 73435 + }, + { + "epoch": 4.701363549068562, + "grad_norm": 8.704917907714844, + "learning_rate": 5.416120522250101e-08, + "loss": 0.1007, + "num_input_tokens_seen": 231119360, + "step": 73440 + }, + { + "epoch": 4.701683631009539, + "grad_norm": 6.986968040466309, + "learning_rate": 5.404561093139915e-08, + "loss": 0.1522, + "num_input_tokens_seen": 231134848, + "step": 73445 + }, + { + "epoch": 4.702003712950515, + "grad_norm": 12.06301498413086, + "learning_rate": 5.393013877851932e-08, + "loss": 0.1559, + "num_input_tokens_seen": 231151040, + "step": 73450 + }, + { + "epoch": 4.702323794891492, + "grad_norm": 7.163887977600098, + "learning_rate": 5.381478876962692e-08, + "loss": 0.1347, + "num_input_tokens_seen": 231165824, + "step": 73455 + }, + { + "epoch": 4.702643876832469, + "grad_norm": 10.1632719039917, + "learning_rate": 5.36995609104829e-08, + "loss": 0.1306, + "num_input_tokens_seen": 231180608, + "step": 73460 + }, + { + "epoch": 4.702963958773446, + "grad_norm": 10.986671447753906, + "learning_rate": 5.3584455206839855e-08, + "loss": 0.1666, + "num_input_tokens_seen": 231196992, + "step": 73465 + }, + { + "epoch": 4.703284040714423, + "grad_norm": 10.840387344360352, + "learning_rate": 5.346947166444655e-08, + "loss": 0.1021, + "num_input_tokens_seen": 231211712, + "step": 73470 + }, + { + "epoch": 4.7036041226554, + "grad_norm": 7.2370524406433105, + "learning_rate": 5.335461028904421e-08, + "loss": 0.1717, + "num_input_tokens_seen": 231226688, + "step": 73475 + }, + { + "epoch": 4.703924204596377, + "grad_norm": 5.845836162567139, + "learning_rate": 5.3239871086368245e-08, + "loss": 0.1499, + "num_input_tokens_seen": 231241856, + "step": 73480 + }, + { + "epoch": 4.704244286537354, + "grad_norm": 5.888522148132324, + "learning_rate": 5.31252540621488e-08, + "loss": 0.0853, + "num_input_tokens_seen": 231258560, + "step": 73485 + }, + { + "epoch": 4.70456436847833, + "grad_norm": 6.795463562011719, + "learning_rate": 5.301075922210852e-08, + "loss": 0.1042, + "num_input_tokens_seen": 231274240, + "step": 73490 + }, + { + "epoch": 4.704884450419307, + "grad_norm": 7.572648525238037, + "learning_rate": 5.2896386571965605e-08, + "loss": 0.1418, + "num_input_tokens_seen": 231290048, + "step": 73495 + }, + { + "epoch": 4.705204532360284, + "grad_norm": 5.270069122314453, + "learning_rate": 5.278213611742994e-08, + "loss": 0.1013, + "num_input_tokens_seen": 231306048, + "step": 73500 + }, + { + "epoch": 4.705524614301261, + "grad_norm": 6.402685642242432, + "learning_rate": 5.266800786420778e-08, + "loss": 0.1174, + "num_input_tokens_seen": 231322816, + "step": 73505 + }, + { + "epoch": 4.705844696242238, + "grad_norm": 7.899251461029053, + "learning_rate": 5.2554001817997636e-08, + "loss": 0.137, + "num_input_tokens_seen": 231337280, + "step": 73510 + }, + { + "epoch": 4.706164778183215, + "grad_norm": 6.956145286560059, + "learning_rate": 5.2440117984492166e-08, + "loss": 0.1297, + "num_input_tokens_seen": 231352960, + "step": 73515 + }, + { + "epoch": 4.706484860124192, + "grad_norm": 9.155282974243164, + "learning_rate": 5.23263563693785e-08, + "loss": 0.1608, + "num_input_tokens_seen": 231368064, + "step": 73520 + }, + { + "epoch": 4.706804942065169, + "grad_norm": 6.510521411895752, + "learning_rate": 5.22127169783368e-08, + "loss": 0.1151, + "num_input_tokens_seen": 231384832, + "step": 73525 + }, + { + "epoch": 4.707125024006146, + "grad_norm": 11.417977333068848, + "learning_rate": 5.209919981704198e-08, + "loss": 0.1378, + "num_input_tokens_seen": 231401088, + "step": 73530 + }, + { + "epoch": 4.707445105947123, + "grad_norm": 5.759010314941406, + "learning_rate": 5.198580489116284e-08, + "loss": 0.105, + "num_input_tokens_seen": 231416896, + "step": 73535 + }, + { + "epoch": 4.707765187888099, + "grad_norm": 5.164749622344971, + "learning_rate": 5.1872532206361234e-08, + "loss": 0.0956, + "num_input_tokens_seen": 231432320, + "step": 73540 + }, + { + "epoch": 4.708085269829076, + "grad_norm": 8.400341987609863, + "learning_rate": 5.175938176829348e-08, + "loss": 0.1142, + "num_input_tokens_seen": 231449600, + "step": 73545 + }, + { + "epoch": 4.708405351770053, + "grad_norm": 5.254576206207275, + "learning_rate": 5.1646353582609777e-08, + "loss": 0.1118, + "num_input_tokens_seen": 231465536, + "step": 73550 + }, + { + "epoch": 4.70872543371103, + "grad_norm": 7.802916526794434, + "learning_rate": 5.153344765495421e-08, + "loss": 0.1166, + "num_input_tokens_seen": 231480704, + "step": 73555 + }, + { + "epoch": 4.709045515652007, + "grad_norm": 5.825875759124756, + "learning_rate": 5.1420663990964514e-08, + "loss": 0.1093, + "num_input_tokens_seen": 231496256, + "step": 73560 + }, + { + "epoch": 4.709365597592984, + "grad_norm": 6.132323741912842, + "learning_rate": 5.1308002596272555e-08, + "loss": 0.1134, + "num_input_tokens_seen": 231512128, + "step": 73565 + }, + { + "epoch": 4.709685679533961, + "grad_norm": 4.1384196281433105, + "learning_rate": 5.119546347650467e-08, + "loss": 0.147, + "num_input_tokens_seen": 231527872, + "step": 73570 + }, + { + "epoch": 4.710005761474937, + "grad_norm": 4.018065929412842, + "learning_rate": 5.10830466372797e-08, + "loss": 0.1331, + "num_input_tokens_seen": 231543424, + "step": 73575 + }, + { + "epoch": 4.710325843415914, + "grad_norm": 8.387165069580078, + "learning_rate": 5.097075208421148e-08, + "loss": 0.1523, + "num_input_tokens_seen": 231559040, + "step": 73580 + }, + { + "epoch": 4.710645925356891, + "grad_norm": 5.134759902954102, + "learning_rate": 5.0858579822907206e-08, + "loss": 0.1265, + "num_input_tokens_seen": 231576704, + "step": 73585 + }, + { + "epoch": 4.710966007297868, + "grad_norm": 6.233074188232422, + "learning_rate": 5.07465298589685e-08, + "loss": 0.1183, + "num_input_tokens_seen": 231592000, + "step": 73590 + }, + { + "epoch": 4.711286089238845, + "grad_norm": 9.995704650878906, + "learning_rate": 5.06346021979906e-08, + "loss": 0.1462, + "num_input_tokens_seen": 231607168, + "step": 73595 + }, + { + "epoch": 4.711606171179822, + "grad_norm": 6.891883850097656, + "learning_rate": 5.0522796845561826e-08, + "loss": 0.1004, + "num_input_tokens_seen": 231623424, + "step": 73600 + }, + { + "epoch": 4.711926253120799, + "grad_norm": 7.301811218261719, + "learning_rate": 5.041111380726632e-08, + "loss": 0.1133, + "num_input_tokens_seen": 231638592, + "step": 73605 + }, + { + "epoch": 4.712246335061776, + "grad_norm": 6.104639053344727, + "learning_rate": 5.0299553088679896e-08, + "loss": 0.1748, + "num_input_tokens_seen": 231653504, + "step": 73610 + }, + { + "epoch": 4.712566417002753, + "grad_norm": 7.087498188018799, + "learning_rate": 5.018811469537394e-08, + "loss": 0.1306, + "num_input_tokens_seen": 231668032, + "step": 73615 + }, + { + "epoch": 4.71288649894373, + "grad_norm": 8.0560302734375, + "learning_rate": 5.007679863291315e-08, + "loss": 0.1682, + "num_input_tokens_seen": 231683072, + "step": 73620 + }, + { + "epoch": 4.713206580884706, + "grad_norm": 5.989359378814697, + "learning_rate": 4.9965604906855594e-08, + "loss": 0.1183, + "num_input_tokens_seen": 231697792, + "step": 73625 + }, + { + "epoch": 4.713526662825683, + "grad_norm": 8.487401008605957, + "learning_rate": 4.9854533522754045e-08, + "loss": 0.1674, + "num_input_tokens_seen": 231713024, + "step": 73630 + }, + { + "epoch": 4.71384674476666, + "grad_norm": 6.996622562408447, + "learning_rate": 4.974358448615463e-08, + "loss": 0.1751, + "num_input_tokens_seen": 231728704, + "step": 73635 + }, + { + "epoch": 4.714166826707637, + "grad_norm": 9.448464393615723, + "learning_rate": 4.963275780259791e-08, + "loss": 0.1221, + "num_input_tokens_seen": 231743360, + "step": 73640 + }, + { + "epoch": 4.714486908648614, + "grad_norm": 9.584981918334961, + "learning_rate": 4.952205347761751e-08, + "loss": 0.1398, + "num_input_tokens_seen": 231758784, + "step": 73645 + }, + { + "epoch": 4.714806990589591, + "grad_norm": 7.916441440582275, + "learning_rate": 4.9411471516741806e-08, + "loss": 0.1405, + "num_input_tokens_seen": 231773888, + "step": 73650 + }, + { + "epoch": 4.715127072530568, + "grad_norm": 5.781665802001953, + "learning_rate": 4.930101192549275e-08, + "loss": 0.1416, + "num_input_tokens_seen": 231788800, + "step": 73655 + }, + { + "epoch": 4.715447154471545, + "grad_norm": 7.360741138458252, + "learning_rate": 4.9190674709385655e-08, + "loss": 0.1014, + "num_input_tokens_seen": 231804032, + "step": 73660 + }, + { + "epoch": 4.715767236412521, + "grad_norm": 8.567200660705566, + "learning_rate": 4.908045987393056e-08, + "loss": 0.1118, + "num_input_tokens_seen": 231818752, + "step": 73665 + }, + { + "epoch": 4.716087318353498, + "grad_norm": 5.966978073120117, + "learning_rate": 4.8970367424631125e-08, + "loss": 0.1301, + "num_input_tokens_seen": 231833664, + "step": 73670 + }, + { + "epoch": 4.716407400294475, + "grad_norm": 6.851259231567383, + "learning_rate": 4.886039736698406e-08, + "loss": 0.0802, + "num_input_tokens_seen": 231850496, + "step": 73675 + }, + { + "epoch": 4.716727482235452, + "grad_norm": 6.679396629333496, + "learning_rate": 4.875054970648191e-08, + "loss": 0.1247, + "num_input_tokens_seen": 231866176, + "step": 73680 + }, + { + "epoch": 4.717047564176429, + "grad_norm": 4.1370463371276855, + "learning_rate": 4.864082444860862e-08, + "loss": 0.1106, + "num_input_tokens_seen": 231882176, + "step": 73685 + }, + { + "epoch": 4.717367646117406, + "grad_norm": 4.419643402099609, + "learning_rate": 4.8531221598844545e-08, + "loss": 0.127, + "num_input_tokens_seen": 231897984, + "step": 73690 + }, + { + "epoch": 4.717687728058383, + "grad_norm": 8.591629981994629, + "learning_rate": 4.84217411626614e-08, + "loss": 0.1445, + "num_input_tokens_seen": 231914304, + "step": 73695 + }, + { + "epoch": 4.71800780999936, + "grad_norm": 7.649059295654297, + "learning_rate": 4.8312383145526774e-08, + "loss": 0.1687, + "num_input_tokens_seen": 231929600, + "step": 73700 + }, + { + "epoch": 4.718327891940337, + "grad_norm": 4.738097667694092, + "learning_rate": 4.8203147552901566e-08, + "loss": 0.1082, + "num_input_tokens_seen": 231944576, + "step": 73705 + }, + { + "epoch": 4.718647973881314, + "grad_norm": 8.741315841674805, + "learning_rate": 4.809403439024002e-08, + "loss": 0.1516, + "num_input_tokens_seen": 231961408, + "step": 73710 + }, + { + "epoch": 4.71896805582229, + "grad_norm": 10.13512897491455, + "learning_rate": 4.7985043662991395e-08, + "loss": 0.1671, + "num_input_tokens_seen": 231976448, + "step": 73715 + }, + { + "epoch": 4.719288137763267, + "grad_norm": 3.7972710132598877, + "learning_rate": 4.787617537659689e-08, + "loss": 0.1444, + "num_input_tokens_seen": 231990912, + "step": 73720 + }, + { + "epoch": 4.719608219704244, + "grad_norm": 3.8017518520355225, + "learning_rate": 4.776742953649438e-08, + "loss": 0.1042, + "num_input_tokens_seen": 232009024, + "step": 73725 + }, + { + "epoch": 4.719928301645221, + "grad_norm": 66.65164947509766, + "learning_rate": 4.765880614811258e-08, + "loss": 0.1367, + "num_input_tokens_seen": 232024192, + "step": 73730 + }, + { + "epoch": 4.720248383586198, + "grad_norm": 5.767770767211914, + "learning_rate": 4.7550305216876315e-08, + "loss": 0.1457, + "num_input_tokens_seen": 232039424, + "step": 73735 + }, + { + "epoch": 4.720568465527175, + "grad_norm": 11.635076522827148, + "learning_rate": 4.744192674820347e-08, + "loss": 0.1621, + "num_input_tokens_seen": 232054656, + "step": 73740 + }, + { + "epoch": 4.720888547468152, + "grad_norm": 10.24577808380127, + "learning_rate": 4.733367074750611e-08, + "loss": 0.1297, + "num_input_tokens_seen": 232071680, + "step": 73745 + }, + { + "epoch": 4.721208629409129, + "grad_norm": 6.505891799926758, + "learning_rate": 4.722553722018963e-08, + "loss": 0.1854, + "num_input_tokens_seen": 232087168, + "step": 73750 + }, + { + "epoch": 4.721528711350105, + "grad_norm": 6.710010051727295, + "learning_rate": 4.711752617165388e-08, + "loss": 0.1434, + "num_input_tokens_seen": 232103040, + "step": 73755 + }, + { + "epoch": 4.721848793291082, + "grad_norm": 4.250417709350586, + "learning_rate": 4.700963760729177e-08, + "loss": 0.1494, + "num_input_tokens_seen": 232117888, + "step": 73760 + }, + { + "epoch": 4.722168875232059, + "grad_norm": 8.827103614807129, + "learning_rate": 4.690187153249176e-08, + "loss": 0.1116, + "num_input_tokens_seen": 232133248, + "step": 73765 + }, + { + "epoch": 4.722488957173036, + "grad_norm": 6.486767292022705, + "learning_rate": 4.679422795263428e-08, + "loss": 0.1319, + "num_input_tokens_seen": 232147584, + "step": 73770 + }, + { + "epoch": 4.722809039114013, + "grad_norm": 7.16926908493042, + "learning_rate": 4.668670687309501e-08, + "loss": 0.1414, + "num_input_tokens_seen": 232163008, + "step": 73775 + }, + { + "epoch": 4.72312912105499, + "grad_norm": 6.1516289710998535, + "learning_rate": 4.657930829924273e-08, + "loss": 0.113, + "num_input_tokens_seen": 232179328, + "step": 73780 + }, + { + "epoch": 4.723449202995967, + "grad_norm": 9.595916748046875, + "learning_rate": 4.6472032236440364e-08, + "loss": 0.1288, + "num_input_tokens_seen": 232194496, + "step": 73785 + }, + { + "epoch": 4.723769284936944, + "grad_norm": 6.315212726593018, + "learning_rate": 4.636487869004475e-08, + "loss": 0.109, + "num_input_tokens_seen": 232209280, + "step": 73790 + }, + { + "epoch": 4.724089366877921, + "grad_norm": 6.746302604675293, + "learning_rate": 4.625784766540631e-08, + "loss": 0.1194, + "num_input_tokens_seen": 232225536, + "step": 73795 + }, + { + "epoch": 4.724409448818898, + "grad_norm": 3.721200466156006, + "learning_rate": 4.615093916787022e-08, + "loss": 0.1079, + "num_input_tokens_seen": 232242176, + "step": 73800 + }, + { + "epoch": 4.724729530759874, + "grad_norm": 14.545215606689453, + "learning_rate": 4.6044153202774166e-08, + "loss": 0.1189, + "num_input_tokens_seen": 232259712, + "step": 73805 + }, + { + "epoch": 4.725049612700851, + "grad_norm": 7.22305965423584, + "learning_rate": 4.5937489775451374e-08, + "loss": 0.1489, + "num_input_tokens_seen": 232276480, + "step": 73810 + }, + { + "epoch": 4.725369694641828, + "grad_norm": 3.489537000656128, + "learning_rate": 4.58309488912273e-08, + "loss": 0.1006, + "num_input_tokens_seen": 232292224, + "step": 73815 + }, + { + "epoch": 4.725689776582805, + "grad_norm": 5.31291389465332, + "learning_rate": 4.5724530555422416e-08, + "loss": 0.1326, + "num_input_tokens_seen": 232307392, + "step": 73820 + }, + { + "epoch": 4.726009858523782, + "grad_norm": 4.981882095336914, + "learning_rate": 4.561823477335081e-08, + "loss": 0.1721, + "num_input_tokens_seen": 232322752, + "step": 73825 + }, + { + "epoch": 4.726329940464759, + "grad_norm": 6.682344913482666, + "learning_rate": 4.55120615503199e-08, + "loss": 0.0902, + "num_input_tokens_seen": 232338304, + "step": 73830 + }, + { + "epoch": 4.726650022405736, + "grad_norm": 4.347426891326904, + "learning_rate": 4.5406010891632104e-08, + "loss": 0.1177, + "num_input_tokens_seen": 232354432, + "step": 73835 + }, + { + "epoch": 4.726970104346712, + "grad_norm": 9.364648818969727, + "learning_rate": 4.530008280258208e-08, + "loss": 0.141, + "num_input_tokens_seen": 232369920, + "step": 73840 + }, + { + "epoch": 4.727290186287689, + "grad_norm": 6.753251075744629, + "learning_rate": 4.519427728846004e-08, + "loss": 0.1157, + "num_input_tokens_seen": 232384896, + "step": 73845 + }, + { + "epoch": 4.727610268228666, + "grad_norm": 6.571170330047607, + "learning_rate": 4.508859435454926e-08, + "loss": 0.1335, + "num_input_tokens_seen": 232400064, + "step": 73850 + }, + { + "epoch": 4.727930350169643, + "grad_norm": 6.808867931365967, + "learning_rate": 4.498303400612691e-08, + "loss": 0.1583, + "num_input_tokens_seen": 232415808, + "step": 73855 + }, + { + "epoch": 4.72825043211062, + "grad_norm": 4.007649898529053, + "learning_rate": 4.487759624846405e-08, + "loss": 0.1177, + "num_input_tokens_seen": 232431168, + "step": 73860 + }, + { + "epoch": 4.728570514051597, + "grad_norm": 9.024085998535156, + "learning_rate": 4.4772281086825905e-08, + "loss": 0.1467, + "num_input_tokens_seen": 232447232, + "step": 73865 + }, + { + "epoch": 4.728890595992574, + "grad_norm": 5.406055450439453, + "learning_rate": 4.466708852647106e-08, + "loss": 0.1056, + "num_input_tokens_seen": 232462464, + "step": 73870 + }, + { + "epoch": 4.729210677933551, + "grad_norm": 4.328599452972412, + "learning_rate": 4.4562018572652524e-08, + "loss": 0.2092, + "num_input_tokens_seen": 232481600, + "step": 73875 + }, + { + "epoch": 4.729530759874528, + "grad_norm": 8.971298217773438, + "learning_rate": 4.4457071230616656e-08, + "loss": 0.1094, + "num_input_tokens_seen": 232498752, + "step": 73880 + }, + { + "epoch": 4.729850841815505, + "grad_norm": 7.969549179077148, + "learning_rate": 4.435224650560427e-08, + "loss": 0.1342, + "num_input_tokens_seen": 232514496, + "step": 73885 + }, + { + "epoch": 4.730170923756481, + "grad_norm": 5.758510112762451, + "learning_rate": 4.424754440285006e-08, + "loss": 0.122, + "num_input_tokens_seen": 232529664, + "step": 73890 + }, + { + "epoch": 4.730491005697458, + "grad_norm": 5.9425883293151855, + "learning_rate": 4.414296492758152e-08, + "loss": 0.1481, + "num_input_tokens_seen": 232546176, + "step": 73895 + }, + { + "epoch": 4.730811087638435, + "grad_norm": 6.382148265838623, + "learning_rate": 4.403850808502141e-08, + "loss": 0.1273, + "num_input_tokens_seen": 232561280, + "step": 73900 + }, + { + "epoch": 4.731131169579412, + "grad_norm": 6.128588676452637, + "learning_rate": 4.393417388038529e-08, + "loss": 0.1615, + "num_input_tokens_seen": 232577792, + "step": 73905 + }, + { + "epoch": 4.731451251520389, + "grad_norm": 6.157104015350342, + "learning_rate": 4.382996231888398e-08, + "loss": 0.0995, + "num_input_tokens_seen": 232593728, + "step": 73910 + }, + { + "epoch": 4.731771333461366, + "grad_norm": 24.75564193725586, + "learning_rate": 4.372587340571999e-08, + "loss": 0.1221, + "num_input_tokens_seen": 232609024, + "step": 73915 + }, + { + "epoch": 4.732091415402343, + "grad_norm": 6.88290548324585, + "learning_rate": 4.362190714609221e-08, + "loss": 0.1173, + "num_input_tokens_seen": 232624064, + "step": 73920 + }, + { + "epoch": 4.73241149734332, + "grad_norm": 7.27026891708374, + "learning_rate": 4.3518063545190947e-08, + "loss": 0.109, + "num_input_tokens_seen": 232641024, + "step": 73925 + }, + { + "epoch": 4.732731579284296, + "grad_norm": 11.624313354492188, + "learning_rate": 4.3414342608202595e-08, + "loss": 0.1378, + "num_input_tokens_seen": 232656768, + "step": 73930 + }, + { + "epoch": 4.733051661225273, + "grad_norm": 10.978426933288574, + "learning_rate": 4.3310744340306075e-08, + "loss": 0.1154, + "num_input_tokens_seen": 232672576, + "step": 73935 + }, + { + "epoch": 4.73337174316625, + "grad_norm": 7.035129547119141, + "learning_rate": 4.320726874667447e-08, + "loss": 0.1172, + "num_input_tokens_seen": 232687872, + "step": 73940 + }, + { + "epoch": 4.733691825107227, + "grad_norm": 7.3491716384887695, + "learning_rate": 4.310391583247503e-08, + "loss": 0.1152, + "num_input_tokens_seen": 232704000, + "step": 73945 + }, + { + "epoch": 4.734011907048204, + "grad_norm": 8.971781730651855, + "learning_rate": 4.300068560286835e-08, + "loss": 0.1181, + "num_input_tokens_seen": 232719552, + "step": 73950 + }, + { + "epoch": 4.734331988989181, + "grad_norm": 7.182229042053223, + "learning_rate": 4.289757806300948e-08, + "loss": 0.0953, + "num_input_tokens_seen": 232735040, + "step": 73955 + }, + { + "epoch": 4.734652070930158, + "grad_norm": 9.209964752197266, + "learning_rate": 4.27945932180468e-08, + "loss": 0.1417, + "num_input_tokens_seen": 232754560, + "step": 73960 + }, + { + "epoch": 4.734972152871135, + "grad_norm": 9.164322853088379, + "learning_rate": 4.269173107312341e-08, + "loss": 0.1613, + "num_input_tokens_seen": 232770688, + "step": 73965 + }, + { + "epoch": 4.735292234812112, + "grad_norm": 7.044551849365234, + "learning_rate": 4.258899163337521e-08, + "loss": 0.138, + "num_input_tokens_seen": 232786048, + "step": 73970 + }, + { + "epoch": 4.735612316753089, + "grad_norm": 69.92762756347656, + "learning_rate": 4.248637490393226e-08, + "loss": 0.1359, + "num_input_tokens_seen": 232801408, + "step": 73975 + }, + { + "epoch": 4.735932398694065, + "grad_norm": 3.8539679050445557, + "learning_rate": 4.238388088991935e-08, + "loss": 0.1005, + "num_input_tokens_seen": 232821056, + "step": 73980 + }, + { + "epoch": 4.736252480635042, + "grad_norm": 6.225468158721924, + "learning_rate": 4.228150959645405e-08, + "loss": 0.1143, + "num_input_tokens_seen": 232837504, + "step": 73985 + }, + { + "epoch": 4.736572562576019, + "grad_norm": 9.726847648620605, + "learning_rate": 4.2179261028648385e-08, + "loss": 0.1471, + "num_input_tokens_seen": 232852032, + "step": 73990 + }, + { + "epoch": 4.736892644516996, + "grad_norm": 4.3617472648620605, + "learning_rate": 4.2077135191608e-08, + "loss": 0.0952, + "num_input_tokens_seen": 232867904, + "step": 73995 + }, + { + "epoch": 4.737212726457973, + "grad_norm": 14.166886329650879, + "learning_rate": 4.197513209043269e-08, + "loss": 0.1332, + "num_input_tokens_seen": 232884288, + "step": 74000 + }, + { + "epoch": 4.73753280839895, + "grad_norm": 33.77763366699219, + "learning_rate": 4.187325173021617e-08, + "loss": 0.1478, + "num_input_tokens_seen": 232900416, + "step": 74005 + }, + { + "epoch": 4.737852890339927, + "grad_norm": 4.742879867553711, + "learning_rate": 4.17714941160452e-08, + "loss": 0.1663, + "num_input_tokens_seen": 232916928, + "step": 74010 + }, + { + "epoch": 4.738172972280904, + "grad_norm": 8.769509315490723, + "learning_rate": 4.166985925300154e-08, + "loss": 0.1142, + "num_input_tokens_seen": 232935744, + "step": 74015 + }, + { + "epoch": 4.73849305422188, + "grad_norm": 12.75314712524414, + "learning_rate": 4.1568347146160035e-08, + "loss": 0.1247, + "num_input_tokens_seen": 232950528, + "step": 74020 + }, + { + "epoch": 4.738813136162857, + "grad_norm": 2.9054694175720215, + "learning_rate": 4.146695780058968e-08, + "loss": 0.1167, + "num_input_tokens_seen": 232966848, + "step": 74025 + }, + { + "epoch": 4.739133218103834, + "grad_norm": 5.161970615386963, + "learning_rate": 4.136569122135392e-08, + "loss": 0.0911, + "num_input_tokens_seen": 232983040, + "step": 74030 + }, + { + "epoch": 4.739453300044811, + "grad_norm": 9.804821014404297, + "learning_rate": 4.126454741350844e-08, + "loss": 0.1299, + "num_input_tokens_seen": 232999808, + "step": 74035 + }, + { + "epoch": 4.739773381985788, + "grad_norm": 3.6160550117492676, + "learning_rate": 4.116352638210447e-08, + "loss": 0.1338, + "num_input_tokens_seen": 233016320, + "step": 74040 + }, + { + "epoch": 4.740093463926765, + "grad_norm": 11.919844627380371, + "learning_rate": 4.106262813218659e-08, + "loss": 0.1548, + "num_input_tokens_seen": 233033216, + "step": 74045 + }, + { + "epoch": 4.740413545867742, + "grad_norm": 5.020599842071533, + "learning_rate": 4.096185266879271e-08, + "loss": 0.1068, + "num_input_tokens_seen": 233049280, + "step": 74050 + }, + { + "epoch": 4.740733627808719, + "grad_norm": 5.531476974487305, + "learning_rate": 4.086119999695548e-08, + "loss": 0.1205, + "num_input_tokens_seen": 233064384, + "step": 74055 + }, + { + "epoch": 4.741053709749696, + "grad_norm": 7.196449279785156, + "learning_rate": 4.076067012170032e-08, + "loss": 0.1341, + "num_input_tokens_seen": 233079168, + "step": 74060 + }, + { + "epoch": 4.741373791690673, + "grad_norm": 6.097263336181641, + "learning_rate": 4.066026304804821e-08, + "loss": 0.1492, + "num_input_tokens_seen": 233095296, + "step": 74065 + }, + { + "epoch": 4.741693873631649, + "grad_norm": 5.489364147186279, + "learning_rate": 4.055997878101181e-08, + "loss": 0.0897, + "num_input_tokens_seen": 233111552, + "step": 74070 + }, + { + "epoch": 4.742013955572626, + "grad_norm": 5.199853897094727, + "learning_rate": 4.045981732559934e-08, + "loss": 0.1233, + "num_input_tokens_seen": 233127488, + "step": 74075 + }, + { + "epoch": 4.742334037513603, + "grad_norm": 7.467297077178955, + "learning_rate": 4.035977868681262e-08, + "loss": 0.1122, + "num_input_tokens_seen": 233143872, + "step": 74080 + }, + { + "epoch": 4.74265411945458, + "grad_norm": 8.140924453735352, + "learning_rate": 4.0259862869646557e-08, + "loss": 0.1337, + "num_input_tokens_seen": 233159360, + "step": 74085 + }, + { + "epoch": 4.742974201395557, + "grad_norm": 2.861382007598877, + "learning_rate": 4.016006987909077e-08, + "loss": 0.0919, + "num_input_tokens_seen": 233174848, + "step": 74090 + }, + { + "epoch": 4.743294283336534, + "grad_norm": 7.400623321533203, + "learning_rate": 4.006039972012793e-08, + "loss": 0.1226, + "num_input_tokens_seen": 233191296, + "step": 74095 + }, + { + "epoch": 4.743614365277511, + "grad_norm": 5.228528022766113, + "learning_rate": 3.996085239773573e-08, + "loss": 0.1095, + "num_input_tokens_seen": 233207360, + "step": 74100 + }, + { + "epoch": 4.7439344472184874, + "grad_norm": 6.05613374710083, + "learning_rate": 3.9861427916884633e-08, + "loss": 0.1206, + "num_input_tokens_seen": 233221888, + "step": 74105 + }, + { + "epoch": 4.744254529159464, + "grad_norm": 5.82201623916626, + "learning_rate": 3.9762126282539006e-08, + "loss": 0.0942, + "num_input_tokens_seen": 233236992, + "step": 74110 + }, + { + "epoch": 4.744574611100441, + "grad_norm": 3.4723596572875977, + "learning_rate": 3.966294749965821e-08, + "loss": 0.1296, + "num_input_tokens_seen": 233252928, + "step": 74115 + }, + { + "epoch": 4.744894693041418, + "grad_norm": 7.280732154846191, + "learning_rate": 3.95638915731944e-08, + "loss": 0.0977, + "num_input_tokens_seen": 233267712, + "step": 74120 + }, + { + "epoch": 4.745214774982395, + "grad_norm": 6.15798282623291, + "learning_rate": 3.946495850809418e-08, + "loss": 0.0949, + "num_input_tokens_seen": 233281984, + "step": 74125 + }, + { + "epoch": 4.745534856923372, + "grad_norm": 5.912557125091553, + "learning_rate": 3.9366148309297195e-08, + "loss": 0.0871, + "num_input_tokens_seen": 233297472, + "step": 74130 + }, + { + "epoch": 4.745854938864349, + "grad_norm": 14.844764709472656, + "learning_rate": 3.926746098173756e-08, + "loss": 0.1606, + "num_input_tokens_seen": 233313280, + "step": 74135 + }, + { + "epoch": 4.746175020805326, + "grad_norm": 4.378493309020996, + "learning_rate": 3.916889653034384e-08, + "loss": 0.1081, + "num_input_tokens_seen": 233328896, + "step": 74140 + }, + { + "epoch": 4.746495102746303, + "grad_norm": 5.872570514678955, + "learning_rate": 3.9070454960037086e-08, + "loss": 0.1058, + "num_input_tokens_seen": 233345088, + "step": 74145 + }, + { + "epoch": 4.74681518468728, + "grad_norm": 7.349909782409668, + "learning_rate": 3.897213627573365e-08, + "loss": 0.0974, + "num_input_tokens_seen": 233361216, + "step": 74150 + }, + { + "epoch": 4.7471352666282565, + "grad_norm": 4.327391624450684, + "learning_rate": 3.8873940482342384e-08, + "loss": 0.1023, + "num_input_tokens_seen": 233377792, + "step": 74155 + }, + { + "epoch": 4.7474553485692335, + "grad_norm": 8.254826545715332, + "learning_rate": 3.8775867584767145e-08, + "loss": 0.1428, + "num_input_tokens_seen": 233393792, + "step": 74160 + }, + { + "epoch": 4.7477754305102104, + "grad_norm": 8.957845687866211, + "learning_rate": 3.8677917587904844e-08, + "loss": 0.1314, + "num_input_tokens_seen": 233409408, + "step": 74165 + }, + { + "epoch": 4.748095512451187, + "grad_norm": 7.10029935836792, + "learning_rate": 3.858009049664685e-08, + "loss": 0.2069, + "num_input_tokens_seen": 233424832, + "step": 74170 + }, + { + "epoch": 4.748415594392164, + "grad_norm": 8.174123764038086, + "learning_rate": 3.8482386315878416e-08, + "loss": 0.1239, + "num_input_tokens_seen": 233440064, + "step": 74175 + }, + { + "epoch": 4.748735676333141, + "grad_norm": 9.42420482635498, + "learning_rate": 3.838480505047759e-08, + "loss": 0.1283, + "num_input_tokens_seen": 233455872, + "step": 74180 + }, + { + "epoch": 4.749055758274118, + "grad_norm": 7.973194122314453, + "learning_rate": 3.828734670531769e-08, + "loss": 0.1478, + "num_input_tokens_seen": 233471168, + "step": 74185 + }, + { + "epoch": 4.749375840215095, + "grad_norm": 5.132247447967529, + "learning_rate": 3.819001128526512e-08, + "loss": 0.0953, + "num_input_tokens_seen": 233485824, + "step": 74190 + }, + { + "epoch": 4.7496959221560715, + "grad_norm": 6.400381088256836, + "learning_rate": 3.809279879518013e-08, + "loss": 0.0894, + "num_input_tokens_seen": 233502784, + "step": 74195 + }, + { + "epoch": 4.7500160040970485, + "grad_norm": 6.072603225708008, + "learning_rate": 3.7995709239917465e-08, + "loss": 0.1182, + "num_input_tokens_seen": 233520512, + "step": 74200 + }, + { + "epoch": 4.7503360860380255, + "grad_norm": 10.057016372680664, + "learning_rate": 3.7898742624324915e-08, + "loss": 0.1649, + "num_input_tokens_seen": 233535744, + "step": 74205 + }, + { + "epoch": 4.7506561679790025, + "grad_norm": 7.6140031814575195, + "learning_rate": 3.780189895324443e-08, + "loss": 0.1252, + "num_input_tokens_seen": 233552064, + "step": 74210 + }, + { + "epoch": 4.750912233531784, + "eval_loss": 0.6057931184768677, + "eval_runtime": 51.0113, + "eval_samples_per_second": 272.214, + "eval_steps_per_second": 34.032, + "num_input_tokens_seen": 233564288, + "step": 74214 + }, + { + "epoch": 4.7509762499199795, + "grad_norm": 7.224413871765137, + "learning_rate": 3.7705178231511875e-08, + "loss": 0.1157, + "num_input_tokens_seen": 233567168, + "step": 74215 + }, + { + "epoch": 4.7512963318609565, + "grad_norm": 5.6669182777404785, + "learning_rate": 3.760858046395699e-08, + "loss": 0.1425, + "num_input_tokens_seen": 233583552, + "step": 74220 + }, + { + "epoch": 4.7516164138019334, + "grad_norm": 9.586512565612793, + "learning_rate": 3.7512105655403974e-08, + "loss": 0.1248, + "num_input_tokens_seen": 233599168, + "step": 74225 + }, + { + "epoch": 4.75193649574291, + "grad_norm": 6.465323448181152, + "learning_rate": 3.7415753810669253e-08, + "loss": 0.1434, + "num_input_tokens_seen": 233614464, + "step": 74230 + }, + { + "epoch": 4.752256577683887, + "grad_norm": 6.740175724029541, + "learning_rate": 3.731952493456509e-08, + "loss": 0.1861, + "num_input_tokens_seen": 233632128, + "step": 74235 + }, + { + "epoch": 4.752576659624864, + "grad_norm": 10.617633819580078, + "learning_rate": 3.722341903189597e-08, + "loss": 0.1573, + "num_input_tokens_seen": 233647232, + "step": 74240 + }, + { + "epoch": 4.7528967415658405, + "grad_norm": 9.87649917602539, + "learning_rate": 3.712743610746111e-08, + "loss": 0.1575, + "num_input_tokens_seen": 233662720, + "step": 74245 + }, + { + "epoch": 4.7532168235068175, + "grad_norm": 11.216869354248047, + "learning_rate": 3.703157616605363e-08, + "loss": 0.1193, + "num_input_tokens_seen": 233677824, + "step": 74250 + }, + { + "epoch": 4.7535369054477945, + "grad_norm": 9.057735443115234, + "learning_rate": 3.693583921245997e-08, + "loss": 0.1434, + "num_input_tokens_seen": 233693568, + "step": 74255 + }, + { + "epoch": 4.7538569873887715, + "grad_norm": 13.843781471252441, + "learning_rate": 3.684022525146103e-08, + "loss": 0.1683, + "num_input_tokens_seen": 233708864, + "step": 74260 + }, + { + "epoch": 4.7541770693297485, + "grad_norm": 10.361680030822754, + "learning_rate": 3.6744734287830766e-08, + "loss": 0.1419, + "num_input_tokens_seen": 233725504, + "step": 74265 + }, + { + "epoch": 4.7544971512707255, + "grad_norm": 12.352992057800293, + "learning_rate": 3.664936632633814e-08, + "loss": 0.1156, + "num_input_tokens_seen": 233740480, + "step": 74270 + }, + { + "epoch": 4.7548172332117025, + "grad_norm": 7.618231773376465, + "learning_rate": 3.655412137174519e-08, + "loss": 0.1669, + "num_input_tokens_seen": 233754944, + "step": 74275 + }, + { + "epoch": 4.7551373151526795, + "grad_norm": 8.6541109085083, + "learning_rate": 3.645899942880754e-08, + "loss": 0.1299, + "num_input_tokens_seen": 233769984, + "step": 74280 + }, + { + "epoch": 4.755457397093656, + "grad_norm": 8.555667877197266, + "learning_rate": 3.6364000502275285e-08, + "loss": 0.1703, + "num_input_tokens_seen": 233784640, + "step": 74285 + }, + { + "epoch": 4.7557774790346325, + "grad_norm": 8.959076881408691, + "learning_rate": 3.626912459689214e-08, + "loss": 0.1472, + "num_input_tokens_seen": 233802240, + "step": 74290 + }, + { + "epoch": 4.7560975609756095, + "grad_norm": 6.511390209197998, + "learning_rate": 3.617437171739624e-08, + "loss": 0.1296, + "num_input_tokens_seen": 233818304, + "step": 74295 + }, + { + "epoch": 4.7564176429165865, + "grad_norm": 6.457735061645508, + "learning_rate": 3.607974186851826e-08, + "loss": 0.1498, + "num_input_tokens_seen": 233834368, + "step": 74300 + }, + { + "epoch": 4.7567377248575635, + "grad_norm": 8.144827842712402, + "learning_rate": 3.598523505498414e-08, + "loss": 0.1469, + "num_input_tokens_seen": 233850240, + "step": 74305 + }, + { + "epoch": 4.7570578067985405, + "grad_norm": 7.469751358032227, + "learning_rate": 3.5890851281512594e-08, + "loss": 0.125, + "num_input_tokens_seen": 233864832, + "step": 74310 + }, + { + "epoch": 4.7573778887395175, + "grad_norm": 5.976531982421875, + "learning_rate": 3.5796590552816815e-08, + "loss": 0.1653, + "num_input_tokens_seen": 233879488, + "step": 74315 + }, + { + "epoch": 4.7576979706804945, + "grad_norm": 6.541297912597656, + "learning_rate": 3.570245287360386e-08, + "loss": 0.1048, + "num_input_tokens_seen": 233895168, + "step": 74320 + }, + { + "epoch": 4.7580180526214715, + "grad_norm": 6.2826247215271, + "learning_rate": 3.560843824857441e-08, + "loss": 0.1108, + "num_input_tokens_seen": 233910784, + "step": 74325 + }, + { + "epoch": 4.7583381345624485, + "grad_norm": 56.79745864868164, + "learning_rate": 3.5514546682423044e-08, + "loss": 0.163, + "num_input_tokens_seen": 233926656, + "step": 74330 + }, + { + "epoch": 4.758658216503425, + "grad_norm": 5.801333427429199, + "learning_rate": 3.5420778179837964e-08, + "loss": 0.192, + "num_input_tokens_seen": 233941952, + "step": 74335 + }, + { + "epoch": 4.758978298444402, + "grad_norm": 6.546515941619873, + "learning_rate": 3.5327132745501535e-08, + "loss": 0.1245, + "num_input_tokens_seen": 233957952, + "step": 74340 + }, + { + "epoch": 4.759298380385379, + "grad_norm": 10.174497604370117, + "learning_rate": 3.523361038409057e-08, + "loss": 0.1383, + "num_input_tokens_seen": 233974400, + "step": 74345 + }, + { + "epoch": 4.7596184623263555, + "grad_norm": 6.614574432373047, + "learning_rate": 3.514021110027438e-08, + "loss": 0.077, + "num_input_tokens_seen": 233989760, + "step": 74350 + }, + { + "epoch": 4.7599385442673325, + "grad_norm": 9.358530044555664, + "learning_rate": 3.504693489871702e-08, + "loss": 0.1296, + "num_input_tokens_seen": 234005760, + "step": 74355 + }, + { + "epoch": 4.7602586262083095, + "grad_norm": 6.158155918121338, + "learning_rate": 3.495378178407643e-08, + "loss": 0.1496, + "num_input_tokens_seen": 234022400, + "step": 74360 + }, + { + "epoch": 4.7605787081492865, + "grad_norm": 7.069812297821045, + "learning_rate": 3.486075176100362e-08, + "loss": 0.1553, + "num_input_tokens_seen": 234038720, + "step": 74365 + }, + { + "epoch": 4.760898790090263, + "grad_norm": 6.986353874206543, + "learning_rate": 3.476784483414486e-08, + "loss": 0.1218, + "num_input_tokens_seen": 234054912, + "step": 74370 + }, + { + "epoch": 4.76121887203124, + "grad_norm": 7.008198261260986, + "learning_rate": 3.4675061008138664e-08, + "loss": 0.1305, + "num_input_tokens_seen": 234071616, + "step": 74375 + }, + { + "epoch": 4.761538953972217, + "grad_norm": 7.252816200256348, + "learning_rate": 3.458240028761883e-08, + "loss": 0.1552, + "num_input_tokens_seen": 234087808, + "step": 74380 + }, + { + "epoch": 4.761859035913194, + "grad_norm": 6.4728779792785645, + "learning_rate": 3.4489862677211936e-08, + "loss": 0.1138, + "num_input_tokens_seen": 234102912, + "step": 74385 + }, + { + "epoch": 4.762179117854171, + "grad_norm": 4.059924125671387, + "learning_rate": 3.4397448181538726e-08, + "loss": 0.0945, + "num_input_tokens_seen": 234118400, + "step": 74390 + }, + { + "epoch": 4.762499199795148, + "grad_norm": 9.835928916931152, + "learning_rate": 3.43051568052144e-08, + "loss": 0.1406, + "num_input_tokens_seen": 234132864, + "step": 74395 + }, + { + "epoch": 4.762819281736125, + "grad_norm": 4.034813404083252, + "learning_rate": 3.4212988552846936e-08, + "loss": 0.094, + "num_input_tokens_seen": 234149248, + "step": 74400 + }, + { + "epoch": 4.763139363677102, + "grad_norm": 5.834192276000977, + "learning_rate": 3.4120943429039324e-08, + "loss": 0.118, + "num_input_tokens_seen": 234164224, + "step": 74405 + }, + { + "epoch": 4.7634594456180785, + "grad_norm": 7.273866653442383, + "learning_rate": 3.402902143838705e-08, + "loss": 0.1164, + "num_input_tokens_seen": 234180288, + "step": 74410 + }, + { + "epoch": 4.7637795275590555, + "grad_norm": 6.359996318817139, + "learning_rate": 3.393722258548088e-08, + "loss": 0.1564, + "num_input_tokens_seen": 234195840, + "step": 74415 + }, + { + "epoch": 4.764099609500032, + "grad_norm": 7.373340129852295, + "learning_rate": 3.384554687490466e-08, + "loss": 0.1233, + "num_input_tokens_seen": 234211456, + "step": 74420 + }, + { + "epoch": 4.764419691441009, + "grad_norm": 10.221199035644531, + "learning_rate": 3.3753994311236106e-08, + "loss": 0.1477, + "num_input_tokens_seen": 234226624, + "step": 74425 + }, + { + "epoch": 4.764739773381986, + "grad_norm": 7.671237945556641, + "learning_rate": 3.366256489904685e-08, + "loss": 0.0965, + "num_input_tokens_seen": 234242432, + "step": 74430 + }, + { + "epoch": 4.765059855322963, + "grad_norm": 20.684898376464844, + "learning_rate": 3.3571258642902394e-08, + "loss": 0.1509, + "num_input_tokens_seen": 234257920, + "step": 74435 + }, + { + "epoch": 4.76537993726394, + "grad_norm": 6.167635440826416, + "learning_rate": 3.3480075547362154e-08, + "loss": 0.1437, + "num_input_tokens_seen": 234274944, + "step": 74440 + }, + { + "epoch": 4.765700019204917, + "grad_norm": 10.094968795776367, + "learning_rate": 3.338901561697944e-08, + "loss": 0.0895, + "num_input_tokens_seen": 234289792, + "step": 74445 + }, + { + "epoch": 4.766020101145894, + "grad_norm": 3.7459986209869385, + "learning_rate": 3.3298078856300874e-08, + "loss": 0.0701, + "num_input_tokens_seen": 234304768, + "step": 74450 + }, + { + "epoch": 4.766340183086871, + "grad_norm": 4.4437971115112305, + "learning_rate": 3.3207265269867836e-08, + "loss": 0.1504, + "num_input_tokens_seen": 234319616, + "step": 74455 + }, + { + "epoch": 4.766660265027847, + "grad_norm": 8.144400596618652, + "learning_rate": 3.311657486221476e-08, + "loss": 0.135, + "num_input_tokens_seen": 234334272, + "step": 74460 + }, + { + "epoch": 4.766980346968824, + "grad_norm": 6.168826103210449, + "learning_rate": 3.30260076378705e-08, + "loss": 0.1336, + "num_input_tokens_seen": 234349696, + "step": 74465 + }, + { + "epoch": 4.767300428909801, + "grad_norm": 7.336982250213623, + "learning_rate": 3.293556360135758e-08, + "loss": 0.1244, + "num_input_tokens_seen": 234364864, + "step": 74470 + }, + { + "epoch": 4.767620510850778, + "grad_norm": 11.258648872375488, + "learning_rate": 3.2845242757192085e-08, + "loss": 0.1555, + "num_input_tokens_seen": 234380288, + "step": 74475 + }, + { + "epoch": 4.767940592791755, + "grad_norm": 7.2982611656188965, + "learning_rate": 3.275504510988403e-08, + "loss": 0.1255, + "num_input_tokens_seen": 234395008, + "step": 74480 + }, + { + "epoch": 4.768260674732732, + "grad_norm": 8.58685302734375, + "learning_rate": 3.266497066393759e-08, + "loss": 0.1423, + "num_input_tokens_seen": 234410176, + "step": 74485 + }, + { + "epoch": 4.768580756673709, + "grad_norm": 10.858810424804688, + "learning_rate": 3.2575019423851106e-08, + "loss": 0.1873, + "num_input_tokens_seen": 234425856, + "step": 74490 + }, + { + "epoch": 4.768900838614686, + "grad_norm": 7.386778354644775, + "learning_rate": 3.2485191394115153e-08, + "loss": 0.1291, + "num_input_tokens_seen": 234441472, + "step": 74495 + }, + { + "epoch": 4.769220920555663, + "grad_norm": 5.179378509521484, + "learning_rate": 3.239548657921643e-08, + "loss": 0.0958, + "num_input_tokens_seen": 234457408, + "step": 74500 + }, + { + "epoch": 4.76954100249664, + "grad_norm": 6.075483798980713, + "learning_rate": 3.230590498363356e-08, + "loss": 0.128, + "num_input_tokens_seen": 234472832, + "step": 74505 + }, + { + "epoch": 4.769861084437616, + "grad_norm": 3.922769546508789, + "learning_rate": 3.22164466118402e-08, + "loss": 0.1635, + "num_input_tokens_seen": 234488320, + "step": 74510 + }, + { + "epoch": 4.770181166378593, + "grad_norm": 6.108360767364502, + "learning_rate": 3.212711146830333e-08, + "loss": 0.1556, + "num_input_tokens_seen": 234506112, + "step": 74515 + }, + { + "epoch": 4.77050124831957, + "grad_norm": 8.4469633102417, + "learning_rate": 3.2037899557483545e-08, + "loss": 0.1281, + "num_input_tokens_seen": 234523648, + "step": 74520 + }, + { + "epoch": 4.770821330260547, + "grad_norm": 7.264898777008057, + "learning_rate": 3.194881088383617e-08, + "loss": 0.1576, + "num_input_tokens_seen": 234538560, + "step": 74525 + }, + { + "epoch": 4.771141412201524, + "grad_norm": 7.578794956207275, + "learning_rate": 3.1859845451809315e-08, + "loss": 0.1453, + "num_input_tokens_seen": 234554496, + "step": 74530 + }, + { + "epoch": 4.771461494142501, + "grad_norm": 8.41057014465332, + "learning_rate": 3.1771003265845815e-08, + "loss": 0.1317, + "num_input_tokens_seen": 234569600, + "step": 74535 + }, + { + "epoch": 4.771781576083478, + "grad_norm": 5.321902751922607, + "learning_rate": 3.168228433038212e-08, + "loss": 0.1402, + "num_input_tokens_seen": 234585152, + "step": 74540 + }, + { + "epoch": 4.772101658024455, + "grad_norm": 7.440902233123779, + "learning_rate": 3.159368864984802e-08, + "loss": 0.1452, + "num_input_tokens_seen": 234599744, + "step": 74545 + }, + { + "epoch": 4.772421739965431, + "grad_norm": 5.065062046051025, + "learning_rate": 3.150521622866748e-08, + "loss": 0.1187, + "num_input_tokens_seen": 234615040, + "step": 74550 + }, + { + "epoch": 4.772741821906408, + "grad_norm": 6.167762279510498, + "learning_rate": 3.141686707125863e-08, + "loss": 0.097, + "num_input_tokens_seen": 234632768, + "step": 74555 + }, + { + "epoch": 4.773061903847385, + "grad_norm": 5.444477558135986, + "learning_rate": 3.1328641182033214e-08, + "loss": 0.1359, + "num_input_tokens_seen": 234648832, + "step": 74560 + }, + { + "epoch": 4.773381985788362, + "grad_norm": 7.191439628601074, + "learning_rate": 3.1240538565396325e-08, + "loss": 0.1411, + "num_input_tokens_seen": 234663872, + "step": 74565 + }, + { + "epoch": 4.773702067729339, + "grad_norm": 5.7349042892456055, + "learning_rate": 3.115255922574778e-08, + "loss": 0.1816, + "num_input_tokens_seen": 234679296, + "step": 74570 + }, + { + "epoch": 4.774022149670316, + "grad_norm": 4.542884349822998, + "learning_rate": 3.106470316748072e-08, + "loss": 0.1338, + "num_input_tokens_seen": 234695360, + "step": 74575 + }, + { + "epoch": 4.774342231611293, + "grad_norm": 6.59098482131958, + "learning_rate": 3.097697039498221e-08, + "loss": 0.1087, + "num_input_tokens_seen": 234711616, + "step": 74580 + }, + { + "epoch": 4.77466231355227, + "grad_norm": 11.848445892333984, + "learning_rate": 3.088936091263289e-08, + "loss": 0.1395, + "num_input_tokens_seen": 234726784, + "step": 74585 + }, + { + "epoch": 4.774982395493247, + "grad_norm": 6.272937774658203, + "learning_rate": 3.080187472480789e-08, + "loss": 0.0694, + "num_input_tokens_seen": 234742400, + "step": 74590 + }, + { + "epoch": 4.775302477434224, + "grad_norm": 7.466609001159668, + "learning_rate": 3.071451183587565e-08, + "loss": 0.0987, + "num_input_tokens_seen": 234758528, + "step": 74595 + }, + { + "epoch": 4.7756225593752, + "grad_norm": 11.818137168884277, + "learning_rate": 3.062727225019879e-08, + "loss": 0.1424, + "num_input_tokens_seen": 234774656, + "step": 74600 + }, + { + "epoch": 4.775942641316177, + "grad_norm": 3.6176564693450928, + "learning_rate": 3.0540155972133275e-08, + "loss": 0.1352, + "num_input_tokens_seen": 234790528, + "step": 74605 + }, + { + "epoch": 4.776262723257154, + "grad_norm": 1.2369071245193481, + "learning_rate": 3.045316300602979e-08, + "loss": 0.105, + "num_input_tokens_seen": 234806464, + "step": 74610 + }, + { + "epoch": 4.776582805198131, + "grad_norm": 6.113955020904541, + "learning_rate": 3.036629335623154e-08, + "loss": 0.1414, + "num_input_tokens_seen": 234821632, + "step": 74615 + }, + { + "epoch": 4.776902887139108, + "grad_norm": 8.38001537322998, + "learning_rate": 3.027954702707697e-08, + "loss": 0.0939, + "num_input_tokens_seen": 234837376, + "step": 74620 + }, + { + "epoch": 4.777222969080085, + "grad_norm": 5.3620452880859375, + "learning_rate": 3.0192924022897365e-08, + "loss": 0.1217, + "num_input_tokens_seen": 234853376, + "step": 74625 + }, + { + "epoch": 4.777543051021062, + "grad_norm": 6.924496173858643, + "learning_rate": 3.0106424348018417e-08, + "loss": 0.1146, + "num_input_tokens_seen": 234868608, + "step": 74630 + }, + { + "epoch": 4.777863132962038, + "grad_norm": 8.13479995727539, + "learning_rate": 3.002004800675945e-08, + "loss": 0.1605, + "num_input_tokens_seen": 234885312, + "step": 74635 + }, + { + "epoch": 4.778183214903015, + "grad_norm": 6.777738094329834, + "learning_rate": 2.993379500343341e-08, + "loss": 0.1277, + "num_input_tokens_seen": 234904192, + "step": 74640 + }, + { + "epoch": 4.778503296843992, + "grad_norm": 7.9288716316223145, + "learning_rate": 2.984766534234795e-08, + "loss": 0.1633, + "num_input_tokens_seen": 234919360, + "step": 74645 + }, + { + "epoch": 4.778823378784969, + "grad_norm": 6.27785587310791, + "learning_rate": 2.9761659027803257e-08, + "loss": 0.1372, + "num_input_tokens_seen": 234934464, + "step": 74650 + }, + { + "epoch": 4.779143460725946, + "grad_norm": 7.391513824462891, + "learning_rate": 2.9675776064094495e-08, + "loss": 0.1541, + "num_input_tokens_seen": 234951744, + "step": 74655 + }, + { + "epoch": 4.779463542666923, + "grad_norm": 6.259893417358398, + "learning_rate": 2.9590016455509908e-08, + "loss": 0.0929, + "num_input_tokens_seen": 234966848, + "step": 74660 + }, + { + "epoch": 4.7797836246079, + "grad_norm": 8.390100479125977, + "learning_rate": 2.9504380206332173e-08, + "loss": 0.1831, + "num_input_tokens_seen": 234982016, + "step": 74665 + }, + { + "epoch": 4.780103706548877, + "grad_norm": 5.995839595794678, + "learning_rate": 2.941886732083704e-08, + "loss": 0.1148, + "num_input_tokens_seen": 234997568, + "step": 74670 + }, + { + "epoch": 4.780423788489854, + "grad_norm": 8.445080757141113, + "learning_rate": 2.933347780329526e-08, + "loss": 0.1312, + "num_input_tokens_seen": 235013568, + "step": 74675 + }, + { + "epoch": 4.780743870430831, + "grad_norm": 5.704761028289795, + "learning_rate": 2.9248211657969816e-08, + "loss": 0.092, + "num_input_tokens_seen": 235029504, + "step": 74680 + }, + { + "epoch": 4.781063952371807, + "grad_norm": 1.7518844604492188, + "learning_rate": 2.9163068889119516e-08, + "loss": 0.0907, + "num_input_tokens_seen": 235045312, + "step": 74685 + }, + { + "epoch": 4.781384034312784, + "grad_norm": 6.537142276763916, + "learning_rate": 2.9078049500995133e-08, + "loss": 0.1454, + "num_input_tokens_seen": 235061824, + "step": 74690 + }, + { + "epoch": 4.781704116253761, + "grad_norm": 1.9764777421951294, + "learning_rate": 2.8993153497842708e-08, + "loss": 0.1331, + "num_input_tokens_seen": 235077568, + "step": 74695 + }, + { + "epoch": 4.782024198194738, + "grad_norm": 6.027496814727783, + "learning_rate": 2.8908380883901077e-08, + "loss": 0.1477, + "num_input_tokens_seen": 235092864, + "step": 74700 + }, + { + "epoch": 4.782344280135715, + "grad_norm": 7.648016929626465, + "learning_rate": 2.8823731663403243e-08, + "loss": 0.0905, + "num_input_tokens_seen": 235108608, + "step": 74705 + }, + { + "epoch": 4.782664362076692, + "grad_norm": 4.259855270385742, + "learning_rate": 2.8739205840576656e-08, + "loss": 0.1283, + "num_input_tokens_seen": 235124608, + "step": 74710 + }, + { + "epoch": 4.782984444017669, + "grad_norm": 8.356532096862793, + "learning_rate": 2.8654803419641554e-08, + "loss": 0.1257, + "num_input_tokens_seen": 235140160, + "step": 74715 + }, + { + "epoch": 4.783304525958646, + "grad_norm": 48.77132797241211, + "learning_rate": 2.8570524404812893e-08, + "loss": 0.1582, + "num_input_tokens_seen": 235155584, + "step": 74720 + }, + { + "epoch": 4.783624607899622, + "grad_norm": 8.07671070098877, + "learning_rate": 2.848636880029898e-08, + "loss": 0.1286, + "num_input_tokens_seen": 235170304, + "step": 74725 + }, + { + "epoch": 4.783944689840599, + "grad_norm": 7.773960590362549, + "learning_rate": 2.8402336610302283e-08, + "loss": 0.1374, + "num_input_tokens_seen": 235186176, + "step": 74730 + }, + { + "epoch": 4.784264771781576, + "grad_norm": 8.23366928100586, + "learning_rate": 2.831842783901889e-08, + "loss": 0.1392, + "num_input_tokens_seen": 235201408, + "step": 74735 + }, + { + "epoch": 4.784584853722553, + "grad_norm": 7.598379611968994, + "learning_rate": 2.8234642490638507e-08, + "loss": 0.1631, + "num_input_tokens_seen": 235216896, + "step": 74740 + }, + { + "epoch": 4.78490493566353, + "grad_norm": 9.067571640014648, + "learning_rate": 2.815098056934501e-08, + "loss": 0.1293, + "num_input_tokens_seen": 235233408, + "step": 74745 + }, + { + "epoch": 4.785225017604507, + "grad_norm": 6.6039299964904785, + "learning_rate": 2.8067442079316444e-08, + "loss": 0.1907, + "num_input_tokens_seen": 235250432, + "step": 74750 + }, + { + "epoch": 4.785545099545484, + "grad_norm": 5.955656051635742, + "learning_rate": 2.7984027024723915e-08, + "loss": 0.0923, + "num_input_tokens_seen": 235265920, + "step": 74755 + }, + { + "epoch": 4.785865181486461, + "grad_norm": 5.992641925811768, + "learning_rate": 2.7900735409732704e-08, + "loss": 0.1538, + "num_input_tokens_seen": 235282496, + "step": 74760 + }, + { + "epoch": 4.786185263427438, + "grad_norm": 7.274853229522705, + "learning_rate": 2.7817567238501984e-08, + "loss": 0.0995, + "num_input_tokens_seen": 235297664, + "step": 74765 + }, + { + "epoch": 4.786505345368415, + "grad_norm": 6.061085224151611, + "learning_rate": 2.7734522515185102e-08, + "loss": 0.1395, + "num_input_tokens_seen": 235313536, + "step": 74770 + }, + { + "epoch": 4.786825427309391, + "grad_norm": 8.204859733581543, + "learning_rate": 2.7651601243928462e-08, + "loss": 0.1431, + "num_input_tokens_seen": 235328768, + "step": 74775 + }, + { + "epoch": 4.787145509250368, + "grad_norm": 6.583730697631836, + "learning_rate": 2.7568803428872915e-08, + "loss": 0.1117, + "num_input_tokens_seen": 235344384, + "step": 74780 + }, + { + "epoch": 4.787465591191345, + "grad_norm": 6.302046298980713, + "learning_rate": 2.7486129074152934e-08, + "loss": 0.1219, + "num_input_tokens_seen": 235361792, + "step": 74785 + }, + { + "epoch": 4.787785673132322, + "grad_norm": 10.153810501098633, + "learning_rate": 2.7403578183896884e-08, + "loss": 0.1024, + "num_input_tokens_seen": 235378752, + "step": 74790 + }, + { + "epoch": 4.788105755073299, + "grad_norm": 9.58544921875, + "learning_rate": 2.7321150762226743e-08, + "loss": 0.125, + "num_input_tokens_seen": 235394944, + "step": 74795 + }, + { + "epoch": 4.788425837014276, + "grad_norm": 6.308936595916748, + "learning_rate": 2.723884681325839e-08, + "loss": 0.1228, + "num_input_tokens_seen": 235410560, + "step": 74800 + }, + { + "epoch": 4.788745918955253, + "grad_norm": 8.085773468017578, + "learning_rate": 2.7156666341102145e-08, + "loss": 0.1286, + "num_input_tokens_seen": 235426688, + "step": 74805 + }, + { + "epoch": 4.78906600089623, + "grad_norm": 4.3153767585754395, + "learning_rate": 2.707460934986139e-08, + "loss": 0.0951, + "num_input_tokens_seen": 235442048, + "step": 74810 + }, + { + "epoch": 4.789386082837206, + "grad_norm": 7.3983473777771, + "learning_rate": 2.6992675843633408e-08, + "loss": 0.1333, + "num_input_tokens_seen": 235458240, + "step": 74815 + }, + { + "epoch": 4.789706164778183, + "grad_norm": 8.404217720031738, + "learning_rate": 2.69108658265102e-08, + "loss": 0.1369, + "num_input_tokens_seen": 235473856, + "step": 74820 + }, + { + "epoch": 4.79002624671916, + "grad_norm": 7.649463653564453, + "learning_rate": 2.6829179302576003e-08, + "loss": 0.1185, + "num_input_tokens_seen": 235489536, + "step": 74825 + }, + { + "epoch": 4.790346328660137, + "grad_norm": 17.698802947998047, + "learning_rate": 2.674761627591088e-08, + "loss": 0.1613, + "num_input_tokens_seen": 235505920, + "step": 74830 + }, + { + "epoch": 4.790666410601114, + "grad_norm": 5.588159084320068, + "learning_rate": 2.666617675058658e-08, + "loss": 0.1412, + "num_input_tokens_seen": 235521216, + "step": 74835 + }, + { + "epoch": 4.790986492542091, + "grad_norm": 17.557235717773438, + "learning_rate": 2.6584860730670682e-08, + "loss": 0.1291, + "num_input_tokens_seen": 235536704, + "step": 74840 + }, + { + "epoch": 4.791306574483068, + "grad_norm": 8.124228477478027, + "learning_rate": 2.6503668220222988e-08, + "loss": 0.1207, + "num_input_tokens_seen": 235551936, + "step": 74845 + }, + { + "epoch": 4.791626656424045, + "grad_norm": 6.17577600479126, + "learning_rate": 2.642259922329832e-08, + "loss": 0.151, + "num_input_tokens_seen": 235567232, + "step": 74850 + }, + { + "epoch": 4.791946738365022, + "grad_norm": 3.8282206058502197, + "learning_rate": 2.6341653743944538e-08, + "loss": 0.0967, + "num_input_tokens_seen": 235583040, + "step": 74855 + }, + { + "epoch": 4.792266820305999, + "grad_norm": 7.225045680999756, + "learning_rate": 2.6260831786203976e-08, + "loss": 0.1503, + "num_input_tokens_seen": 235597696, + "step": 74860 + }, + { + "epoch": 4.792586902246975, + "grad_norm": 5.6649489402771, + "learning_rate": 2.6180133354112013e-08, + "loss": 0.1339, + "num_input_tokens_seen": 235613888, + "step": 74865 + }, + { + "epoch": 4.792906984187952, + "grad_norm": 3.951551914215088, + "learning_rate": 2.609955845169876e-08, + "loss": 0.1132, + "num_input_tokens_seen": 235629632, + "step": 74870 + }, + { + "epoch": 4.793227066128929, + "grad_norm": 7.264484405517578, + "learning_rate": 2.6019107082987104e-08, + "loss": 0.1654, + "num_input_tokens_seen": 235644672, + "step": 74875 + }, + { + "epoch": 4.793547148069906, + "grad_norm": 2.7012438774108887, + "learning_rate": 2.593877925199495e-08, + "loss": 0.106, + "num_input_tokens_seen": 235660160, + "step": 74880 + }, + { + "epoch": 4.793867230010883, + "grad_norm": 14.311240196228027, + "learning_rate": 2.585857496273325e-08, + "loss": 0.1481, + "num_input_tokens_seen": 235675520, + "step": 74885 + }, + { + "epoch": 4.79418731195186, + "grad_norm": 6.711099147796631, + "learning_rate": 2.5778494219206862e-08, + "loss": 0.2174, + "num_input_tokens_seen": 235691328, + "step": 74890 + }, + { + "epoch": 4.794507393892837, + "grad_norm": 8.86414909362793, + "learning_rate": 2.5698537025415083e-08, + "loss": 0.1311, + "num_input_tokens_seen": 235706816, + "step": 74895 + }, + { + "epoch": 4.794827475833813, + "grad_norm": 7.569084167480469, + "learning_rate": 2.5618703385349997e-08, + "loss": 0.1343, + "num_input_tokens_seen": 235721920, + "step": 74900 + }, + { + "epoch": 4.79514755777479, + "grad_norm": 4.7655534744262695, + "learning_rate": 2.553899330299814e-08, + "loss": 0.1618, + "num_input_tokens_seen": 235736768, + "step": 74905 + }, + { + "epoch": 4.795467639715767, + "grad_norm": 7.187072277069092, + "learning_rate": 2.5459406782339936e-08, + "loss": 0.1474, + "num_input_tokens_seen": 235752512, + "step": 74910 + }, + { + "epoch": 4.795787721656744, + "grad_norm": 7.305126190185547, + "learning_rate": 2.5379943827349984e-08, + "loss": 0.1266, + "num_input_tokens_seen": 235768256, + "step": 74915 + }, + { + "epoch": 4.796107803597721, + "grad_norm": 7.316634654998779, + "learning_rate": 2.5300604441995115e-08, + "loss": 0.1488, + "num_input_tokens_seen": 235782784, + "step": 74920 + }, + { + "epoch": 4.796427885538698, + "grad_norm": 3.9083807468414307, + "learning_rate": 2.5221388630238543e-08, + "loss": 0.1571, + "num_input_tokens_seen": 235800256, + "step": 74925 + }, + { + "epoch": 4.796747967479675, + "grad_norm": 8.705842018127441, + "learning_rate": 2.5142296396034605e-08, + "loss": 0.1359, + "num_input_tokens_seen": 235815936, + "step": 74930 + }, + { + "epoch": 4.797068049420652, + "grad_norm": 5.90221643447876, + "learning_rate": 2.5063327743333753e-08, + "loss": 0.1299, + "num_input_tokens_seen": 235832384, + "step": 74935 + }, + { + "epoch": 4.797388131361629, + "grad_norm": 8.076054573059082, + "learning_rate": 2.498448267607867e-08, + "loss": 0.1401, + "num_input_tokens_seen": 235848128, + "step": 74940 + }, + { + "epoch": 4.797708213302606, + "grad_norm": 11.891465187072754, + "learning_rate": 2.490576119820648e-08, + "loss": 0.1738, + "num_input_tokens_seen": 235862976, + "step": 74945 + }, + { + "epoch": 4.798028295243582, + "grad_norm": 6.084743499755859, + "learning_rate": 2.4827163313648482e-08, + "loss": 0.1179, + "num_input_tokens_seen": 235878080, + "step": 74950 + }, + { + "epoch": 4.798348377184559, + "grad_norm": 9.720125198364258, + "learning_rate": 2.4748689026329042e-08, + "loss": 0.1961, + "num_input_tokens_seen": 235893952, + "step": 74955 + }, + { + "epoch": 4.798668459125536, + "grad_norm": 8.414511680603027, + "learning_rate": 2.4670338340166966e-08, + "loss": 0.1436, + "num_input_tokens_seen": 235911296, + "step": 74960 + }, + { + "epoch": 4.798988541066513, + "grad_norm": 6.597635269165039, + "learning_rate": 2.4592111259074958e-08, + "loss": 0.116, + "num_input_tokens_seen": 235927296, + "step": 74965 + }, + { + "epoch": 4.79930862300749, + "grad_norm": 7.951528072357178, + "learning_rate": 2.4514007786958782e-08, + "loss": 0.1287, + "num_input_tokens_seen": 235942656, + "step": 74970 + }, + { + "epoch": 4.799628704948467, + "grad_norm": 8.374171257019043, + "learning_rate": 2.4436027927718652e-08, + "loss": 0.1126, + "num_input_tokens_seen": 235957888, + "step": 74975 + }, + { + "epoch": 4.799948786889444, + "grad_norm": 7.486506938934326, + "learning_rate": 2.435817168524868e-08, + "loss": 0.1124, + "num_input_tokens_seen": 235973568, + "step": 74980 + }, + { + "epoch": 4.800268868830421, + "grad_norm": 1.8624166250228882, + "learning_rate": 2.4280439063436578e-08, + "loss": 0.1234, + "num_input_tokens_seen": 235987904, + "step": 74985 + }, + { + "epoch": 4.800588950771397, + "grad_norm": 7.667263507843018, + "learning_rate": 2.4202830066163418e-08, + "loss": 0.1344, + "num_input_tokens_seen": 236004352, + "step": 74990 + }, + { + "epoch": 4.800909032712374, + "grad_norm": 6.837486267089844, + "learning_rate": 2.4125344697305266e-08, + "loss": 0.133, + "num_input_tokens_seen": 236021696, + "step": 74995 + }, + { + "epoch": 4.801229114653351, + "grad_norm": 7.280733585357666, + "learning_rate": 2.4047982960730966e-08, + "loss": 0.1096, + "num_input_tokens_seen": 236037504, + "step": 75000 + }, + { + "epoch": 4.801549196594328, + "grad_norm": 4.170248985290527, + "learning_rate": 2.3970744860303817e-08, + "loss": 0.1214, + "num_input_tokens_seen": 236052608, + "step": 75005 + }, + { + "epoch": 4.801869278535305, + "grad_norm": 8.668259620666504, + "learning_rate": 2.3893630399880185e-08, + "loss": 0.1436, + "num_input_tokens_seen": 236068416, + "step": 75010 + }, + { + "epoch": 4.802189360476282, + "grad_norm": 9.196064949035645, + "learning_rate": 2.381663958331143e-08, + "loss": 0.1614, + "num_input_tokens_seen": 236084672, + "step": 75015 + }, + { + "epoch": 4.802509442417259, + "grad_norm": 4.956031322479248, + "learning_rate": 2.373977241444142e-08, + "loss": 0.1321, + "num_input_tokens_seen": 236099520, + "step": 75020 + }, + { + "epoch": 4.802829524358236, + "grad_norm": 18.35787582397461, + "learning_rate": 2.366302889710903e-08, + "loss": 0.1787, + "num_input_tokens_seen": 236115648, + "step": 75025 + }, + { + "epoch": 4.803149606299213, + "grad_norm": 7.466291427612305, + "learning_rate": 2.358640903514592e-08, + "loss": 0.0954, + "num_input_tokens_seen": 236131200, + "step": 75030 + }, + { + "epoch": 4.80346968824019, + "grad_norm": 8.115631103515625, + "learning_rate": 2.3509912832378746e-08, + "loss": 0.1687, + "num_input_tokens_seen": 236146368, + "step": 75035 + }, + { + "epoch": 4.803789770181166, + "grad_norm": 6.110239505767822, + "learning_rate": 2.3433540292626678e-08, + "loss": 0.1407, + "num_input_tokens_seen": 236162048, + "step": 75040 + }, + { + "epoch": 4.804109852122143, + "grad_norm": 4.725231647491455, + "learning_rate": 2.335729141970361e-08, + "loss": 0.1161, + "num_input_tokens_seen": 236177728, + "step": 75045 + }, + { + "epoch": 4.80442993406312, + "grad_norm": 4.204628944396973, + "learning_rate": 2.328116621741733e-08, + "loss": 0.0982, + "num_input_tokens_seen": 236193024, + "step": 75050 + }, + { + "epoch": 4.804750016004097, + "grad_norm": 9.434525489807129, + "learning_rate": 2.3205164689568405e-08, + "loss": 0.1279, + "num_input_tokens_seen": 236209408, + "step": 75055 + }, + { + "epoch": 4.805070097945074, + "grad_norm": 6.784162521362305, + "learning_rate": 2.312928683995297e-08, + "loss": 0.1167, + "num_input_tokens_seen": 236225216, + "step": 75060 + }, + { + "epoch": 4.805390179886051, + "grad_norm": 8.089348793029785, + "learning_rate": 2.3053532672358824e-08, + "loss": 0.1225, + "num_input_tokens_seen": 236239808, + "step": 75065 + }, + { + "epoch": 4.805710261827028, + "grad_norm": 8.069291114807129, + "learning_rate": 2.297790219056989e-08, + "loss": 0.1458, + "num_input_tokens_seen": 236254720, + "step": 75070 + }, + { + "epoch": 4.806030343768005, + "grad_norm": 6.190887928009033, + "learning_rate": 2.2902395398361755e-08, + "loss": 0.1154, + "num_input_tokens_seen": 236269632, + "step": 75075 + }, + { + "epoch": 4.806350425708981, + "grad_norm": 5.8906474113464355, + "learning_rate": 2.282701229950557e-08, + "loss": 0.0826, + "num_input_tokens_seen": 236284864, + "step": 75080 + }, + { + "epoch": 4.806670507649958, + "grad_norm": 7.1583638191223145, + "learning_rate": 2.275175289776499e-08, + "loss": 0.124, + "num_input_tokens_seen": 236300288, + "step": 75085 + }, + { + "epoch": 4.806990589590935, + "grad_norm": 17.474018096923828, + "learning_rate": 2.26766171968984e-08, + "loss": 0.1499, + "num_input_tokens_seen": 236315840, + "step": 75090 + }, + { + "epoch": 4.807310671531912, + "grad_norm": 3.541393518447876, + "learning_rate": 2.2601605200657793e-08, + "loss": 0.1085, + "num_input_tokens_seen": 236331264, + "step": 75095 + }, + { + "epoch": 4.807630753472889, + "grad_norm": 6.142131805419922, + "learning_rate": 2.2526716912788793e-08, + "loss": 0.146, + "num_input_tokens_seen": 236347264, + "step": 75100 + }, + { + "epoch": 4.807950835413866, + "grad_norm": 24.363927841186523, + "learning_rate": 2.2451952337030346e-08, + "loss": 0.1494, + "num_input_tokens_seen": 236362496, + "step": 75105 + }, + { + "epoch": 4.808270917354843, + "grad_norm": 3.6018571853637695, + "learning_rate": 2.237731147711669e-08, + "loss": 0.089, + "num_input_tokens_seen": 236379776, + "step": 75110 + }, + { + "epoch": 4.80859099929582, + "grad_norm": 6.05485200881958, + "learning_rate": 2.2302794336774846e-08, + "loss": 0.1351, + "num_input_tokens_seen": 236395072, + "step": 75115 + }, + { + "epoch": 4.808911081236797, + "grad_norm": 5.4175615310668945, + "learning_rate": 2.2228400919725445e-08, + "loss": 0.1269, + "num_input_tokens_seen": 236411200, + "step": 75120 + }, + { + "epoch": 4.809231163177774, + "grad_norm": 4.688850402832031, + "learning_rate": 2.2154131229683297e-08, + "loss": 0.0987, + "num_input_tokens_seen": 236425856, + "step": 75125 + }, + { + "epoch": 4.80955124511875, + "grad_norm": 8.946290969848633, + "learning_rate": 2.2079985270357374e-08, + "loss": 0.1418, + "num_input_tokens_seen": 236441408, + "step": 75130 + }, + { + "epoch": 4.809871327059727, + "grad_norm": 6.587691783905029, + "learning_rate": 2.2005963045449995e-08, + "loss": 0.1102, + "num_input_tokens_seen": 236457024, + "step": 75135 + }, + { + "epoch": 4.810191409000704, + "grad_norm": 39.74348449707031, + "learning_rate": 2.1932064558657094e-08, + "loss": 0.1223, + "num_input_tokens_seen": 236472448, + "step": 75140 + }, + { + "epoch": 4.810511490941681, + "grad_norm": 6.10310173034668, + "learning_rate": 2.185828981366961e-08, + "loss": 0.1348, + "num_input_tokens_seen": 236489472, + "step": 75145 + }, + { + "epoch": 4.810831572882658, + "grad_norm": 6.83845329284668, + "learning_rate": 2.178463881417042e-08, + "loss": 0.1235, + "num_input_tokens_seen": 236504576, + "step": 75150 + }, + { + "epoch": 4.811151654823635, + "grad_norm": 6.859157085418701, + "learning_rate": 2.1711111563838262e-08, + "loss": 0.155, + "num_input_tokens_seen": 236520192, + "step": 75155 + }, + { + "epoch": 4.811471736764612, + "grad_norm": 6.827095985412598, + "learning_rate": 2.1637708066344075e-08, + "loss": 0.1342, + "num_input_tokens_seen": 236537152, + "step": 75160 + }, + { + "epoch": 4.811791818705588, + "grad_norm": 5.6406474113464355, + "learning_rate": 2.156442832535327e-08, + "loss": 0.106, + "num_input_tokens_seen": 236552640, + "step": 75165 + }, + { + "epoch": 4.812111900646565, + "grad_norm": 8.822300910949707, + "learning_rate": 2.1491272344525415e-08, + "loss": 0.147, + "num_input_tokens_seen": 236568128, + "step": 75170 + }, + { + "epoch": 4.812431982587542, + "grad_norm": 6.8733391761779785, + "learning_rate": 2.1418240127513423e-08, + "loss": 0.1444, + "num_input_tokens_seen": 236583168, + "step": 75175 + }, + { + "epoch": 4.812752064528519, + "grad_norm": 6.12846565246582, + "learning_rate": 2.13453316779641e-08, + "loss": 0.1053, + "num_input_tokens_seen": 236598720, + "step": 75180 + }, + { + "epoch": 4.813072146469496, + "grad_norm": 6.4789814949035645, + "learning_rate": 2.1272546999517584e-08, + "loss": 0.153, + "num_input_tokens_seen": 236614976, + "step": 75185 + }, + { + "epoch": 4.813392228410473, + "grad_norm": 4.886899471282959, + "learning_rate": 2.1199886095809307e-08, + "loss": 0.1202, + "num_input_tokens_seen": 236630528, + "step": 75190 + }, + { + "epoch": 4.81371231035145, + "grad_norm": 4.845536231994629, + "learning_rate": 2.1127348970466922e-08, + "loss": 0.0844, + "num_input_tokens_seen": 236647552, + "step": 75195 + }, + { + "epoch": 4.814032392292427, + "grad_norm": 9.335631370544434, + "learning_rate": 2.1054935627112805e-08, + "loss": 0.1569, + "num_input_tokens_seen": 236663232, + "step": 75200 + }, + { + "epoch": 4.814352474233404, + "grad_norm": 8.477069854736328, + "learning_rate": 2.0982646069362955e-08, + "loss": 0.1148, + "num_input_tokens_seen": 236679168, + "step": 75205 + }, + { + "epoch": 4.814672556174381, + "grad_norm": 10.143085479736328, + "learning_rate": 2.0910480300826985e-08, + "loss": 0.163, + "num_input_tokens_seen": 236696128, + "step": 75210 + }, + { + "epoch": 4.814992638115357, + "grad_norm": 7.339085578918457, + "learning_rate": 2.0838438325108403e-08, + "loss": 0.1698, + "num_input_tokens_seen": 236712448, + "step": 75215 + }, + { + "epoch": 4.815312720056334, + "grad_norm": 6.690225601196289, + "learning_rate": 2.076652014580488e-08, + "loss": 0.1178, + "num_input_tokens_seen": 236728768, + "step": 75220 + }, + { + "epoch": 4.815632801997311, + "grad_norm": 9.895577430725098, + "learning_rate": 2.069472576650744e-08, + "loss": 0.1358, + "num_input_tokens_seen": 236745728, + "step": 75225 + }, + { + "epoch": 4.815952883938288, + "grad_norm": 6.843519687652588, + "learning_rate": 2.062305519080099e-08, + "loss": 0.146, + "num_input_tokens_seen": 236762432, + "step": 75230 + }, + { + "epoch": 4.816272965879265, + "grad_norm": 7.188801288604736, + "learning_rate": 2.0551508422264886e-08, + "loss": 0.1146, + "num_input_tokens_seen": 236779520, + "step": 75235 + }, + { + "epoch": 4.816593047820242, + "grad_norm": 5.961406230926514, + "learning_rate": 2.0480085464471277e-08, + "loss": 0.1183, + "num_input_tokens_seen": 236795200, + "step": 75240 + }, + { + "epoch": 4.816913129761219, + "grad_norm": 10.422233581542969, + "learning_rate": 2.040878632098675e-08, + "loss": 0.1313, + "num_input_tokens_seen": 236811072, + "step": 75245 + }, + { + "epoch": 4.817233211702196, + "grad_norm": 5.0417280197143555, + "learning_rate": 2.0337610995371794e-08, + "loss": 0.1623, + "num_input_tokens_seen": 236826752, + "step": 75250 + }, + { + "epoch": 4.817553293643172, + "grad_norm": 6.760603427886963, + "learning_rate": 2.026655949118078e-08, + "loss": 0.1166, + "num_input_tokens_seen": 236842752, + "step": 75255 + }, + { + "epoch": 4.817873375584149, + "grad_norm": 8.41942024230957, + "learning_rate": 2.0195631811960882e-08, + "loss": 0.1135, + "num_input_tokens_seen": 236857920, + "step": 75260 + }, + { + "epoch": 4.818193457525126, + "grad_norm": 1.7287815809249878, + "learning_rate": 2.0124827961254534e-08, + "loss": 0.1134, + "num_input_tokens_seen": 236872960, + "step": 75265 + }, + { + "epoch": 4.818513539466103, + "grad_norm": 6.812352180480957, + "learning_rate": 2.0054147942596967e-08, + "loss": 0.1411, + "num_input_tokens_seen": 236888704, + "step": 75270 + }, + { + "epoch": 4.81883362140708, + "grad_norm": 8.202141761779785, + "learning_rate": 1.998359175951786e-08, + "loss": 0.1475, + "num_input_tokens_seen": 236904128, + "step": 75275 + }, + { + "epoch": 4.819153703348057, + "grad_norm": 4.932657718658447, + "learning_rate": 1.991315941553995e-08, + "loss": 0.1007, + "num_input_tokens_seen": 236920256, + "step": 75280 + }, + { + "epoch": 4.819473785289034, + "grad_norm": 5.708725452423096, + "learning_rate": 1.98428509141807e-08, + "loss": 0.0977, + "num_input_tokens_seen": 236935040, + "step": 75285 + }, + { + "epoch": 4.819793867230011, + "grad_norm": 3.912609338760376, + "learning_rate": 1.9772666258950913e-08, + "loss": 0.1245, + "num_input_tokens_seen": 236950400, + "step": 75290 + }, + { + "epoch": 4.820113949170988, + "grad_norm": 5.719554901123047, + "learning_rate": 1.9702605453355007e-08, + "loss": 0.1496, + "num_input_tokens_seen": 236966144, + "step": 75295 + }, + { + "epoch": 4.820434031111965, + "grad_norm": 7.601113319396973, + "learning_rate": 1.963266850089185e-08, + "loss": 0.1386, + "num_input_tokens_seen": 236981824, + "step": 75300 + }, + { + "epoch": 4.820754113052941, + "grad_norm": 7.527652263641357, + "learning_rate": 1.9562855405053092e-08, + "loss": 0.1928, + "num_input_tokens_seen": 236999232, + "step": 75305 + }, + { + "epoch": 4.821074194993918, + "grad_norm": 5.760935306549072, + "learning_rate": 1.9493166169325395e-08, + "loss": 0.1102, + "num_input_tokens_seen": 237014784, + "step": 75310 + }, + { + "epoch": 4.821394276934895, + "grad_norm": 2.3399672508239746, + "learning_rate": 1.942360079718847e-08, + "loss": 0.0877, + "num_input_tokens_seen": 237031104, + "step": 75315 + }, + { + "epoch": 4.821714358875872, + "grad_norm": 5.44224214553833, + "learning_rate": 1.9354159292116203e-08, + "loss": 0.1016, + "num_input_tokens_seen": 237046080, + "step": 75320 + }, + { + "epoch": 4.822034440816849, + "grad_norm": 6.871076583862305, + "learning_rate": 1.9284841657575827e-08, + "loss": 0.1461, + "num_input_tokens_seen": 237061248, + "step": 75325 + }, + { + "epoch": 4.822354522757826, + "grad_norm": 9.004928588867188, + "learning_rate": 1.9215647897029012e-08, + "loss": 0.1599, + "num_input_tokens_seen": 237076928, + "step": 75330 + }, + { + "epoch": 4.822674604698803, + "grad_norm": 5.872319221496582, + "learning_rate": 1.9146578013930496e-08, + "loss": 0.1229, + "num_input_tokens_seen": 237093632, + "step": 75335 + }, + { + "epoch": 4.82299468663978, + "grad_norm": 7.272821426391602, + "learning_rate": 1.907763201172974e-08, + "loss": 0.1142, + "num_input_tokens_seen": 237110464, + "step": 75340 + }, + { + "epoch": 4.823314768580756, + "grad_norm": 11.533271789550781, + "learning_rate": 1.9008809893869552e-08, + "loss": 0.1102, + "num_input_tokens_seen": 237125312, + "step": 75345 + }, + { + "epoch": 4.823634850521733, + "grad_norm": 5.109034538269043, + "learning_rate": 1.894011166378634e-08, + "loss": 0.1336, + "num_input_tokens_seen": 237142464, + "step": 75350 + }, + { + "epoch": 4.82395493246271, + "grad_norm": 8.372750282287598, + "learning_rate": 1.8871537324910427e-08, + "loss": 0.1491, + "num_input_tokens_seen": 237157504, + "step": 75355 + }, + { + "epoch": 4.824275014403687, + "grad_norm": 8.022089004516602, + "learning_rate": 1.8803086880666565e-08, + "loss": 0.1177, + "num_input_tokens_seen": 237173120, + "step": 75360 + }, + { + "epoch": 4.824595096344664, + "grad_norm": 7.342555046081543, + "learning_rate": 1.8734760334472024e-08, + "loss": 0.1385, + "num_input_tokens_seen": 237187968, + "step": 75365 + }, + { + "epoch": 4.824915178285641, + "grad_norm": 7.523736000061035, + "learning_rate": 1.866655768973935e-08, + "loss": 0.1245, + "num_input_tokens_seen": 237207360, + "step": 75370 + }, + { + "epoch": 4.825235260226618, + "grad_norm": 4.480780124664307, + "learning_rate": 1.8598478949874155e-08, + "loss": 0.0867, + "num_input_tokens_seen": 237223744, + "step": 75375 + }, + { + "epoch": 4.825555342167595, + "grad_norm": 6.9375481605529785, + "learning_rate": 1.8530524118275382e-08, + "loss": 0.1123, + "num_input_tokens_seen": 237240448, + "step": 75380 + }, + { + "epoch": 4.825875424108572, + "grad_norm": 5.285628318786621, + "learning_rate": 1.846269319833699e-08, + "loss": 0.1312, + "num_input_tokens_seen": 237256192, + "step": 75385 + }, + { + "epoch": 4.826195506049549, + "grad_norm": 3.9752795696258545, + "learning_rate": 1.8394986193445708e-08, + "loss": 0.1082, + "num_input_tokens_seen": 237270912, + "step": 75390 + }, + { + "epoch": 4.826515587990525, + "grad_norm": 5.346744060516357, + "learning_rate": 1.832740310698272e-08, + "loss": 0.1427, + "num_input_tokens_seen": 237288128, + "step": 75395 + }, + { + "epoch": 4.826835669931502, + "grad_norm": 5.741377830505371, + "learning_rate": 1.8259943942322557e-08, + "loss": 0.109, + "num_input_tokens_seen": 237304064, + "step": 75400 + }, + { + "epoch": 4.827155751872479, + "grad_norm": 4.503917694091797, + "learning_rate": 1.8192608702833903e-08, + "loss": 0.1363, + "num_input_tokens_seen": 237320640, + "step": 75405 + }, + { + "epoch": 4.827475833813456, + "grad_norm": 6.549430847167969, + "learning_rate": 1.8125397391879352e-08, + "loss": 0.1808, + "num_input_tokens_seen": 237338304, + "step": 75410 + }, + { + "epoch": 4.827795915754433, + "grad_norm": 4.84158182144165, + "learning_rate": 1.8058310012814273e-08, + "loss": 0.1513, + "num_input_tokens_seen": 237353408, + "step": 75415 + }, + { + "epoch": 4.82811599769541, + "grad_norm": 6.026427268981934, + "learning_rate": 1.7991346568989598e-08, + "loss": 0.1071, + "num_input_tokens_seen": 237368960, + "step": 75420 + }, + { + "epoch": 4.828436079636387, + "grad_norm": 5.0624098777771, + "learning_rate": 1.7924507063748763e-08, + "loss": 0.1679, + "num_input_tokens_seen": 237384832, + "step": 75425 + }, + { + "epoch": 4.828756161577363, + "grad_norm": 7.032829761505127, + "learning_rate": 1.7857791500429378e-08, + "loss": 0.1145, + "num_input_tokens_seen": 237400512, + "step": 75430 + }, + { + "epoch": 4.82907624351834, + "grad_norm": 9.120243072509766, + "learning_rate": 1.779119988236294e-08, + "loss": 0.1578, + "num_input_tokens_seen": 237416256, + "step": 75435 + }, + { + "epoch": 4.829396325459317, + "grad_norm": 10.789117813110352, + "learning_rate": 1.7724732212874572e-08, + "loss": 0.1284, + "num_input_tokens_seen": 237431296, + "step": 75440 + }, + { + "epoch": 4.829716407400294, + "grad_norm": 6.376999378204346, + "learning_rate": 1.7658388495283563e-08, + "loss": 0.0989, + "num_input_tokens_seen": 237447040, + "step": 75445 + }, + { + "epoch": 4.830036489341271, + "grad_norm": 4.600183010101318, + "learning_rate": 1.759216873290226e-08, + "loss": 0.1114, + "num_input_tokens_seen": 237463168, + "step": 75450 + }, + { + "epoch": 4.830356571282248, + "grad_norm": 5.653549671173096, + "learning_rate": 1.752607292903774e-08, + "loss": 0.1209, + "num_input_tokens_seen": 237479232, + "step": 75455 + }, + { + "epoch": 4.830676653223225, + "grad_norm": 5.382125377655029, + "learning_rate": 1.746010108699042e-08, + "loss": 0.1228, + "num_input_tokens_seen": 237494976, + "step": 75460 + }, + { + "epoch": 4.830996735164202, + "grad_norm": 8.907022476196289, + "learning_rate": 1.7394253210054613e-08, + "loss": 0.137, + "num_input_tokens_seen": 237511232, + "step": 75465 + }, + { + "epoch": 4.831316817105179, + "grad_norm": 3.4470882415771484, + "learning_rate": 1.7328529301518515e-08, + "loss": 0.1509, + "num_input_tokens_seen": 237526720, + "step": 75470 + }, + { + "epoch": 4.831636899046156, + "grad_norm": 5.0161967277526855, + "learning_rate": 1.7262929364663673e-08, + "loss": 0.1434, + "num_input_tokens_seen": 237542528, + "step": 75475 + }, + { + "epoch": 4.831956980987132, + "grad_norm": 7.205667495727539, + "learning_rate": 1.7197453402766073e-08, + "loss": 0.1276, + "num_input_tokens_seen": 237558080, + "step": 75480 + }, + { + "epoch": 4.832277062928109, + "grad_norm": 7.333730220794678, + "learning_rate": 1.7132101419095326e-08, + "loss": 0.0911, + "num_input_tokens_seen": 237574592, + "step": 75485 + }, + { + "epoch": 4.832597144869086, + "grad_norm": 12.830851554870605, + "learning_rate": 1.7066873416914377e-08, + "loss": 0.1761, + "num_input_tokens_seen": 237592192, + "step": 75490 + }, + { + "epoch": 4.832917226810063, + "grad_norm": 7.172666072845459, + "learning_rate": 1.700176939948117e-08, + "loss": 0.1351, + "num_input_tokens_seen": 237608064, + "step": 75495 + }, + { + "epoch": 4.83323730875104, + "grad_norm": 4.046794891357422, + "learning_rate": 1.6936789370045613e-08, + "loss": 0.1362, + "num_input_tokens_seen": 237623872, + "step": 75500 + }, + { + "epoch": 4.833557390692017, + "grad_norm": 8.3873872756958, + "learning_rate": 1.6871933331853164e-08, + "loss": 0.1269, + "num_input_tokens_seen": 237639744, + "step": 75505 + }, + { + "epoch": 4.833877472632994, + "grad_norm": 6.852449893951416, + "learning_rate": 1.680720128814234e-08, + "loss": 0.1469, + "num_input_tokens_seen": 237655488, + "step": 75510 + }, + { + "epoch": 4.834197554573971, + "grad_norm": 7.4288835525512695, + "learning_rate": 1.6742593242145e-08, + "loss": 0.1226, + "num_input_tokens_seen": 237671488, + "step": 75515 + }, + { + "epoch": 4.834517636514947, + "grad_norm": 6.543638706207275, + "learning_rate": 1.667810919708829e-08, + "loss": 0.1412, + "num_input_tokens_seen": 237687040, + "step": 75520 + }, + { + "epoch": 4.834837718455924, + "grad_norm": 5.46401309967041, + "learning_rate": 1.6613749156191296e-08, + "loss": 0.1122, + "num_input_tokens_seen": 237702912, + "step": 75525 + }, + { + "epoch": 4.835157800396901, + "grad_norm": 5.987226486206055, + "learning_rate": 1.6549513122668393e-08, + "loss": 0.134, + "num_input_tokens_seen": 237717888, + "step": 75530 + }, + { + "epoch": 4.835477882337878, + "grad_norm": 6.664194583892822, + "learning_rate": 1.6485401099726738e-08, + "loss": 0.0941, + "num_input_tokens_seen": 237733696, + "step": 75535 + }, + { + "epoch": 4.835797964278855, + "grad_norm": 8.487370491027832, + "learning_rate": 1.6421413090568218e-08, + "loss": 0.1398, + "num_input_tokens_seen": 237748608, + "step": 75540 + }, + { + "epoch": 4.836118046219832, + "grad_norm": 5.566051006317139, + "learning_rate": 1.635754909838777e-08, + "loss": 0.1138, + "num_input_tokens_seen": 237764736, + "step": 75545 + }, + { + "epoch": 4.836438128160809, + "grad_norm": 10.288837432861328, + "learning_rate": 1.6293809126374515e-08, + "loss": 0.1803, + "num_input_tokens_seen": 237780160, + "step": 75550 + }, + { + "epoch": 4.836758210101786, + "grad_norm": 7.196048736572266, + "learning_rate": 1.6230193177711184e-08, + "loss": 0.1223, + "num_input_tokens_seen": 237795200, + "step": 75555 + }, + { + "epoch": 4.837078292042763, + "grad_norm": 6.587904930114746, + "learning_rate": 1.6166701255574678e-08, + "loss": 0.1103, + "num_input_tokens_seen": 237809920, + "step": 75560 + }, + { + "epoch": 4.83739837398374, + "grad_norm": 3.9712655544281006, + "learning_rate": 1.6103333363135243e-08, + "loss": 0.1294, + "num_input_tokens_seen": 237826368, + "step": 75565 + }, + { + "epoch": 4.837718455924716, + "grad_norm": 2.824751853942871, + "learning_rate": 1.6040089503557287e-08, + "loss": 0.1121, + "num_input_tokens_seen": 237841920, + "step": 75570 + }, + { + "epoch": 4.838038537865693, + "grad_norm": 5.170014381408691, + "learning_rate": 1.5976969679998568e-08, + "loss": 0.1326, + "num_input_tokens_seen": 237857408, + "step": 75575 + }, + { + "epoch": 4.83835861980667, + "grad_norm": 8.238429069519043, + "learning_rate": 1.591397389561128e-08, + "loss": 0.1293, + "num_input_tokens_seen": 237872704, + "step": 75580 + }, + { + "epoch": 4.838678701747647, + "grad_norm": 4.901496887207031, + "learning_rate": 1.5851102153540964e-08, + "loss": 0.1315, + "num_input_tokens_seen": 237888448, + "step": 75585 + }, + { + "epoch": 4.838998783688624, + "grad_norm": 3.247267484664917, + "learning_rate": 1.578835445692706e-08, + "loss": 0.1358, + "num_input_tokens_seen": 237903680, + "step": 75590 + }, + { + "epoch": 4.839318865629601, + "grad_norm": 9.294288635253906, + "learning_rate": 1.572573080890316e-08, + "loss": 0.1391, + "num_input_tokens_seen": 237919360, + "step": 75595 + }, + { + "epoch": 4.839638947570578, + "grad_norm": 7.265223503112793, + "learning_rate": 1.5663231212595943e-08, + "loss": 0.1116, + "num_input_tokens_seen": 237935360, + "step": 75600 + }, + { + "epoch": 4.839959029511555, + "grad_norm": 4.714715480804443, + "learning_rate": 1.560085567112679e-08, + "loss": 0.1546, + "num_input_tokens_seen": 237950336, + "step": 75605 + }, + { + "epoch": 4.840279111452531, + "grad_norm": 4.767847061157227, + "learning_rate": 1.5538604187609884e-08, + "loss": 0.1033, + "num_input_tokens_seen": 237966848, + "step": 75610 + }, + { + "epoch": 4.840599193393508, + "grad_norm": 10.37243938446045, + "learning_rate": 1.5476476765154126e-08, + "loss": 0.1219, + "num_input_tokens_seen": 237982080, + "step": 75615 + }, + { + "epoch": 4.840919275334485, + "grad_norm": 7.990004539489746, + "learning_rate": 1.541447340686175e-08, + "loss": 0.1846, + "num_input_tokens_seen": 237997888, + "step": 75620 + }, + { + "epoch": 4.841239357275462, + "grad_norm": 6.405463695526123, + "learning_rate": 1.5352594115828624e-08, + "loss": 0.182, + "num_input_tokens_seen": 238013376, + "step": 75625 + }, + { + "epoch": 4.841559439216439, + "grad_norm": 11.866926193237305, + "learning_rate": 1.5290838895145043e-08, + "loss": 0.1719, + "num_input_tokens_seen": 238030208, + "step": 75630 + }, + { + "epoch": 4.841879521157416, + "grad_norm": 6.741593837738037, + "learning_rate": 1.522920774789466e-08, + "loss": 0.121, + "num_input_tokens_seen": 238046272, + "step": 75635 + }, + { + "epoch": 4.842199603098393, + "grad_norm": 4.143682479858398, + "learning_rate": 1.516770067715473e-08, + "loss": 0.1466, + "num_input_tokens_seen": 238061568, + "step": 75640 + }, + { + "epoch": 4.84251968503937, + "grad_norm": 15.547056198120117, + "learning_rate": 1.5106317685996964e-08, + "loss": 0.1399, + "num_input_tokens_seen": 238076352, + "step": 75645 + }, + { + "epoch": 4.842839766980347, + "grad_norm": 6.439312934875488, + "learning_rate": 1.504505877748641e-08, + "loss": 0.1294, + "num_input_tokens_seen": 238094080, + "step": 75650 + }, + { + "epoch": 4.843159848921324, + "grad_norm": 3.224447011947632, + "learning_rate": 1.4983923954681735e-08, + "loss": 0.0934, + "num_input_tokens_seen": 238110080, + "step": 75655 + }, + { + "epoch": 4.8434799308623, + "grad_norm": 5.614113807678223, + "learning_rate": 1.4922913220636326e-08, + "loss": 0.1358, + "num_input_tokens_seen": 238126144, + "step": 75660 + }, + { + "epoch": 4.843800012803277, + "grad_norm": 8.288689613342285, + "learning_rate": 1.4862026578396083e-08, + "loss": 0.1138, + "num_input_tokens_seen": 238142592, + "step": 75665 + }, + { + "epoch": 4.844120094744254, + "grad_norm": 5.944485664367676, + "learning_rate": 1.4801264031001627e-08, + "loss": 0.1079, + "num_input_tokens_seen": 238157760, + "step": 75670 + }, + { + "epoch": 4.844440176685231, + "grad_norm": 5.930298805236816, + "learning_rate": 1.4740625581486923e-08, + "loss": 0.1317, + "num_input_tokens_seen": 238172608, + "step": 75675 + }, + { + "epoch": 4.844760258626208, + "grad_norm": 9.519388198852539, + "learning_rate": 1.4680111232880379e-08, + "loss": 0.1381, + "num_input_tokens_seen": 238187328, + "step": 75680 + }, + { + "epoch": 4.845080340567185, + "grad_norm": 7.748563289642334, + "learning_rate": 1.4619720988203468e-08, + "loss": 0.1558, + "num_input_tokens_seen": 238203584, + "step": 75685 + }, + { + "epoch": 4.845400422508162, + "grad_norm": 8.201735496520996, + "learning_rate": 1.4559454850471832e-08, + "loss": 0.1319, + "num_input_tokens_seen": 238219072, + "step": 75690 + }, + { + "epoch": 4.8457205044491385, + "grad_norm": 5.803487300872803, + "learning_rate": 1.4499312822694733e-08, + "loss": 0.1137, + "num_input_tokens_seen": 238234432, + "step": 75695 + }, + { + "epoch": 4.8460405863901155, + "grad_norm": 4.186499118804932, + "learning_rate": 1.44392949078756e-08, + "loss": 0.0912, + "num_input_tokens_seen": 238249024, + "step": 75700 + }, + { + "epoch": 4.8463606683310925, + "grad_norm": 8.400900840759277, + "learning_rate": 1.4379401109011204e-08, + "loss": 0.2068, + "num_input_tokens_seen": 238263552, + "step": 75705 + }, + { + "epoch": 4.8466807502720695, + "grad_norm": 8.422445297241211, + "learning_rate": 1.4319631429092207e-08, + "loss": 0.1415, + "num_input_tokens_seen": 238278848, + "step": 75710 + }, + { + "epoch": 4.8470008322130465, + "grad_norm": 13.922231674194336, + "learning_rate": 1.4259985871103721e-08, + "loss": 0.1501, + "num_input_tokens_seen": 238293632, + "step": 75715 + }, + { + "epoch": 4.847320914154023, + "grad_norm": 9.635915756225586, + "learning_rate": 1.4200464438023642e-08, + "loss": 0.1726, + "num_input_tokens_seen": 238308608, + "step": 75720 + }, + { + "epoch": 4.847640996095, + "grad_norm": 7.3957839012146, + "learning_rate": 1.4141067132824316e-08, + "loss": 0.1247, + "num_input_tokens_seen": 238323840, + "step": 75725 + }, + { + "epoch": 4.847961078035977, + "grad_norm": 7.161489486694336, + "learning_rate": 1.40817939584717e-08, + "loss": 0.1215, + "num_input_tokens_seen": 238339648, + "step": 75730 + }, + { + "epoch": 4.848281159976954, + "grad_norm": 6.5629353523254395, + "learning_rate": 1.4022644917925654e-08, + "loss": 0.146, + "num_input_tokens_seen": 238354496, + "step": 75735 + }, + { + "epoch": 4.848601241917931, + "grad_norm": 18.983810424804688, + "learning_rate": 1.3963620014139645e-08, + "loss": 0.1378, + "num_input_tokens_seen": 238369664, + "step": 75740 + }, + { + "epoch": 4.8489213238589075, + "grad_norm": 10.317915916442871, + "learning_rate": 1.3904719250061316e-08, + "loss": 0.1618, + "num_input_tokens_seen": 238386496, + "step": 75745 + }, + { + "epoch": 4.8492414057998845, + "grad_norm": 29.511722564697266, + "learning_rate": 1.3845942628631648e-08, + "loss": 0.1654, + "num_input_tokens_seen": 238403264, + "step": 75750 + }, + { + "epoch": 4.8495614877408615, + "grad_norm": 10.611448287963867, + "learning_rate": 1.3787290152785514e-08, + "loss": 0.1614, + "num_input_tokens_seen": 238418368, + "step": 75755 + }, + { + "epoch": 4.8498815696818385, + "grad_norm": 7.014817714691162, + "learning_rate": 1.3728761825452242e-08, + "loss": 0.0952, + "num_input_tokens_seen": 238434368, + "step": 75760 + }, + { + "epoch": 4.8502016516228155, + "grad_norm": 4.106075286865234, + "learning_rate": 1.3670357649553933e-08, + "loss": 0.0997, + "num_input_tokens_seen": 238448960, + "step": 75765 + }, + { + "epoch": 4.8505217335637925, + "grad_norm": 7.549690246582031, + "learning_rate": 1.3612077628007147e-08, + "loss": 0.1256, + "num_input_tokens_seen": 238463552, + "step": 75770 + }, + { + "epoch": 4.8508418155047694, + "grad_norm": 9.285969734191895, + "learning_rate": 1.3553921763722055e-08, + "loss": 0.1224, + "num_input_tokens_seen": 238478528, + "step": 75775 + }, + { + "epoch": 4.851161897445746, + "grad_norm": 8.570269584655762, + "learning_rate": 1.3495890059602723e-08, + "loss": 0.15, + "num_input_tokens_seen": 238496128, + "step": 75780 + }, + { + "epoch": 4.8514819793867225, + "grad_norm": 9.172342300415039, + "learning_rate": 1.3437982518546832e-08, + "loss": 0.1264, + "num_input_tokens_seen": 238511936, + "step": 75785 + }, + { + "epoch": 4.8518020613276995, + "grad_norm": 5.704357147216797, + "learning_rate": 1.3380199143446238e-08, + "loss": 0.1236, + "num_input_tokens_seen": 238527168, + "step": 75790 + }, + { + "epoch": 4.8521221432686765, + "grad_norm": 8.155374526977539, + "learning_rate": 1.3322539937185852e-08, + "loss": 0.1523, + "num_input_tokens_seen": 238542976, + "step": 75795 + }, + { + "epoch": 4.8524422252096535, + "grad_norm": 11.527777671813965, + "learning_rate": 1.3265004902645595e-08, + "loss": 0.1294, + "num_input_tokens_seen": 238557760, + "step": 75800 + }, + { + "epoch": 4.8527623071506305, + "grad_norm": 6.197359085083008, + "learning_rate": 1.3207594042697614e-08, + "loss": 0.1267, + "num_input_tokens_seen": 238573888, + "step": 75805 + }, + { + "epoch": 4.8530823890916075, + "grad_norm": 6.095122814178467, + "learning_rate": 1.3150307360209614e-08, + "loss": 0.1533, + "num_input_tokens_seen": 238590784, + "step": 75810 + }, + { + "epoch": 4.8534024710325845, + "grad_norm": 6.505041599273682, + "learning_rate": 1.3093144858041528e-08, + "loss": 0.11, + "num_input_tokens_seen": 238605952, + "step": 75815 + }, + { + "epoch": 4.8537225529735615, + "grad_norm": 9.378494262695312, + "learning_rate": 1.3036106539048021e-08, + "loss": 0.1783, + "num_input_tokens_seen": 238621120, + "step": 75820 + }, + { + "epoch": 4.8540426349145385, + "grad_norm": 5.662252426147461, + "learning_rate": 1.2979192406077091e-08, + "loss": 0.1111, + "num_input_tokens_seen": 238636928, + "step": 75825 + }, + { + "epoch": 4.8543627168555155, + "grad_norm": 4.842312335968018, + "learning_rate": 1.2922402461970906e-08, + "loss": 0.0834, + "num_input_tokens_seen": 238652480, + "step": 75830 + }, + { + "epoch": 4.854682798796492, + "grad_norm": 7.411164283752441, + "learning_rate": 1.2865736709565258e-08, + "loss": 0.1284, + "num_input_tokens_seen": 238670016, + "step": 75835 + }, + { + "epoch": 4.8550028807374686, + "grad_norm": 5.363612174987793, + "learning_rate": 1.2809195151689823e-08, + "loss": 0.1419, + "num_input_tokens_seen": 238686272, + "step": 75840 + }, + { + "epoch": 4.8553229626784455, + "grad_norm": 16.401020050048828, + "learning_rate": 1.2752777791167626e-08, + "loss": 0.2286, + "num_input_tokens_seen": 238701376, + "step": 75845 + }, + { + "epoch": 4.8556430446194225, + "grad_norm": 3.809973955154419, + "learning_rate": 1.2696484630816408e-08, + "loss": 0.1259, + "num_input_tokens_seen": 238718656, + "step": 75850 + }, + { + "epoch": 4.8559631265603995, + "grad_norm": 9.67296314239502, + "learning_rate": 1.26403156734467e-08, + "loss": 0.1536, + "num_input_tokens_seen": 238734528, + "step": 75855 + }, + { + "epoch": 4.8562832085013765, + "grad_norm": 8.710954666137695, + "learning_rate": 1.2584270921863484e-08, + "loss": 0.1468, + "num_input_tokens_seen": 238750400, + "step": 75860 + }, + { + "epoch": 4.8566032904423535, + "grad_norm": 7.8178229331970215, + "learning_rate": 1.2528350378865073e-08, + "loss": 0.1326, + "num_input_tokens_seen": 238764800, + "step": 75865 + }, + { + "epoch": 4.8569233723833305, + "grad_norm": 4.210721969604492, + "learning_rate": 1.2472554047244512e-08, + "loss": 0.1932, + "num_input_tokens_seen": 238779712, + "step": 75870 + }, + { + "epoch": 4.857243454324307, + "grad_norm": 5.241265773773193, + "learning_rate": 1.2416881929787072e-08, + "loss": 0.1359, + "num_input_tokens_seen": 238795840, + "step": 75875 + }, + { + "epoch": 4.857563536265284, + "grad_norm": 7.842426300048828, + "learning_rate": 1.2361334029273586e-08, + "loss": 0.1416, + "num_input_tokens_seen": 238812736, + "step": 75880 + }, + { + "epoch": 4.857883618206261, + "grad_norm": 8.308199882507324, + "learning_rate": 1.2305910348477112e-08, + "loss": 0.1357, + "num_input_tokens_seen": 238828928, + "step": 75885 + }, + { + "epoch": 4.858203700147238, + "grad_norm": 9.55829906463623, + "learning_rate": 1.2250610890165992e-08, + "loss": 0.1251, + "num_input_tokens_seen": 238844480, + "step": 75890 + }, + { + "epoch": 4.858523782088215, + "grad_norm": 6.6440935134887695, + "learning_rate": 1.2195435657100796e-08, + "loss": 0.1038, + "num_input_tokens_seen": 238860736, + "step": 75895 + }, + { + "epoch": 4.8588438640291916, + "grad_norm": 2.9313135147094727, + "learning_rate": 1.2140384652037095e-08, + "loss": 0.1103, + "num_input_tokens_seen": 238875776, + "step": 75900 + }, + { + "epoch": 4.8591639459701685, + "grad_norm": 8.579383850097656, + "learning_rate": 1.2085457877723805e-08, + "loss": 0.1327, + "num_input_tokens_seen": 238892224, + "step": 75905 + }, + { + "epoch": 4.8594840279111455, + "grad_norm": 7.052529335021973, + "learning_rate": 1.203065533690373e-08, + "loss": 0.1306, + "num_input_tokens_seen": 238909056, + "step": 75910 + }, + { + "epoch": 4.8598041098521225, + "grad_norm": 11.288861274719238, + "learning_rate": 1.1975977032313291e-08, + "loss": 0.1577, + "num_input_tokens_seen": 238924800, + "step": 75915 + }, + { + "epoch": 4.8601241917930995, + "grad_norm": 9.215072631835938, + "learning_rate": 1.1921422966683083e-08, + "loss": 0.1231, + "num_input_tokens_seen": 238939712, + "step": 75920 + }, + { + "epoch": 4.860444273734076, + "grad_norm": 7.021851539611816, + "learning_rate": 1.1866993142737038e-08, + "loss": 0.0955, + "num_input_tokens_seen": 238954944, + "step": 75925 + }, + { + "epoch": 4.860764355675053, + "grad_norm": 3.855649471282959, + "learning_rate": 1.1812687563192982e-08, + "loss": 0.1146, + "num_input_tokens_seen": 238971328, + "step": 75930 + }, + { + "epoch": 4.86108443761603, + "grad_norm": 29.713247299194336, + "learning_rate": 1.1758506230762911e-08, + "loss": 0.1, + "num_input_tokens_seen": 238986304, + "step": 75935 + }, + { + "epoch": 4.861404519557007, + "grad_norm": 5.50950288772583, + "learning_rate": 1.1704449148152163e-08, + "loss": 0.0854, + "num_input_tokens_seen": 239002048, + "step": 75940 + }, + { + "epoch": 4.861724601497984, + "grad_norm": 9.151079177856445, + "learning_rate": 1.1650516318060523e-08, + "loss": 0.1655, + "num_input_tokens_seen": 239018624, + "step": 75945 + }, + { + "epoch": 4.862044683438961, + "grad_norm": 13.437159538269043, + "learning_rate": 1.159670774318028e-08, + "loss": 0.1123, + "num_input_tokens_seen": 239034496, + "step": 75950 + }, + { + "epoch": 4.862364765379938, + "grad_norm": 6.422824859619141, + "learning_rate": 1.1543023426199285e-08, + "loss": 0.1426, + "num_input_tokens_seen": 239050112, + "step": 75955 + }, + { + "epoch": 4.862684847320914, + "grad_norm": 4.338107109069824, + "learning_rate": 1.1489463369797338e-08, + "loss": 0.0867, + "num_input_tokens_seen": 239064832, + "step": 75960 + }, + { + "epoch": 4.863004929261891, + "grad_norm": 2.8607821464538574, + "learning_rate": 1.14360275766498e-08, + "loss": 0.0697, + "num_input_tokens_seen": 239081280, + "step": 75965 + }, + { + "epoch": 4.863325011202868, + "grad_norm": 6.965506553649902, + "learning_rate": 1.1382716049424259e-08, + "loss": 0.1372, + "num_input_tokens_seen": 239096640, + "step": 75970 + }, + { + "epoch": 4.863645093143845, + "grad_norm": 8.242317199707031, + "learning_rate": 1.1329528790783307e-08, + "loss": 0.1582, + "num_input_tokens_seen": 239111552, + "step": 75975 + }, + { + "epoch": 4.863965175084822, + "grad_norm": 16.252296447753906, + "learning_rate": 1.1276465803382875e-08, + "loss": 0.1885, + "num_input_tokens_seen": 239127104, + "step": 75980 + }, + { + "epoch": 4.864285257025799, + "grad_norm": 4.625786781311035, + "learning_rate": 1.1223527089872232e-08, + "loss": 0.1205, + "num_input_tokens_seen": 239143168, + "step": 75985 + }, + { + "epoch": 4.864605338966776, + "grad_norm": 5.140679359436035, + "learning_rate": 1.1170712652895099e-08, + "loss": 0.1061, + "num_input_tokens_seen": 239159424, + "step": 75990 + }, + { + "epoch": 4.864925420907753, + "grad_norm": 6.063313961029053, + "learning_rate": 1.1118022495088532e-08, + "loss": 0.1172, + "num_input_tokens_seen": 239176384, + "step": 75995 + }, + { + "epoch": 4.86524550284873, + "grad_norm": 7.441099643707275, + "learning_rate": 1.1065456619084036e-08, + "loss": 0.1468, + "num_input_tokens_seen": 239192064, + "step": 76000 + }, + { + "epoch": 4.865565584789707, + "grad_norm": 5.283560276031494, + "learning_rate": 1.1013015027506458e-08, + "loss": 0.0856, + "num_input_tokens_seen": 239206720, + "step": 76005 + }, + { + "epoch": 4.865885666730683, + "grad_norm": 9.813076972961426, + "learning_rate": 1.0960697722973978e-08, + "loss": 0.1264, + "num_input_tokens_seen": 239223488, + "step": 76010 + }, + { + "epoch": 4.86620574867166, + "grad_norm": 9.501228332519531, + "learning_rate": 1.090850470809951e-08, + "loss": 0.132, + "num_input_tokens_seen": 239238720, + "step": 76015 + }, + { + "epoch": 4.866525830612637, + "grad_norm": 6.868983745574951, + "learning_rate": 1.0856435985488745e-08, + "loss": 0.1585, + "num_input_tokens_seen": 239253120, + "step": 76020 + }, + { + "epoch": 4.866845912553614, + "grad_norm": 3.195352792739868, + "learning_rate": 1.080449155774238e-08, + "loss": 0.0961, + "num_input_tokens_seen": 239268096, + "step": 76025 + }, + { + "epoch": 4.867165994494591, + "grad_norm": 6.896650791168213, + "learning_rate": 1.0752671427453898e-08, + "loss": 0.1236, + "num_input_tokens_seen": 239282752, + "step": 76030 + }, + { + "epoch": 4.867486076435568, + "grad_norm": 8.735917091369629, + "learning_rate": 1.070097559721095e-08, + "loss": 0.1387, + "num_input_tokens_seen": 239299072, + "step": 76035 + }, + { + "epoch": 4.867806158376545, + "grad_norm": 49.95860290527344, + "learning_rate": 1.0649404069595082e-08, + "loss": 0.1356, + "num_input_tokens_seen": 239314624, + "step": 76040 + }, + { + "epoch": 4.868126240317522, + "grad_norm": 8.785271644592285, + "learning_rate": 1.0597956847181457e-08, + "loss": 0.1301, + "num_input_tokens_seen": 239329536, + "step": 76045 + }, + { + "epoch": 4.868446322258498, + "grad_norm": 16.677261352539062, + "learning_rate": 1.0546633932538853e-08, + "loss": 0.1584, + "num_input_tokens_seen": 239344896, + "step": 76050 + }, + { + "epoch": 4.868766404199475, + "grad_norm": 38.806602478027344, + "learning_rate": 1.0495435328230497e-08, + "loss": 0.2035, + "num_input_tokens_seen": 239360768, + "step": 76055 + }, + { + "epoch": 4.869086486140452, + "grad_norm": 7.542311191558838, + "learning_rate": 1.0444361036812401e-08, + "loss": 0.135, + "num_input_tokens_seen": 239376000, + "step": 76060 + }, + { + "epoch": 4.869406568081429, + "grad_norm": 9.110735893249512, + "learning_rate": 1.0393411060835856e-08, + "loss": 0.1407, + "num_input_tokens_seen": 239391232, + "step": 76065 + }, + { + "epoch": 4.869726650022406, + "grad_norm": 6.912339687347412, + "learning_rate": 1.034258540284383e-08, + "loss": 0.1369, + "num_input_tokens_seen": 239407424, + "step": 76070 + }, + { + "epoch": 4.870046731963383, + "grad_norm": 4.250179290771484, + "learning_rate": 1.0291884065375402e-08, + "loss": 0.1577, + "num_input_tokens_seen": 239423360, + "step": 76075 + }, + { + "epoch": 4.87036681390436, + "grad_norm": 7.733861446380615, + "learning_rate": 1.0241307050961602e-08, + "loss": 0.1566, + "num_input_tokens_seen": 239439104, + "step": 76080 + }, + { + "epoch": 4.870686895845337, + "grad_norm": 9.211966514587402, + "learning_rate": 1.0190854362128465e-08, + "loss": 0.1454, + "num_input_tokens_seen": 239454592, + "step": 76085 + }, + { + "epoch": 4.871006977786314, + "grad_norm": 7.933476448059082, + "learning_rate": 1.014052600139509e-08, + "loss": 0.1571, + "num_input_tokens_seen": 239470272, + "step": 76090 + }, + { + "epoch": 4.871327059727291, + "grad_norm": 5.298399925231934, + "learning_rate": 1.0090321971274464e-08, + "loss": 0.1414, + "num_input_tokens_seen": 239485696, + "step": 76095 + }, + { + "epoch": 4.871647141668267, + "grad_norm": 7.434033393859863, + "learning_rate": 1.0040242274273749e-08, + "loss": 0.1537, + "num_input_tokens_seen": 239501824, + "step": 76100 + }, + { + "epoch": 4.871967223609244, + "grad_norm": 7.249874591827393, + "learning_rate": 9.99028691289372e-09, + "loss": 0.1263, + "num_input_tokens_seen": 239517568, + "step": 76105 + }, + { + "epoch": 4.872287305550221, + "grad_norm": 5.991157531738281, + "learning_rate": 9.940455889628498e-09, + "loss": 0.1416, + "num_input_tokens_seen": 239532800, + "step": 76110 + }, + { + "epoch": 4.872607387491198, + "grad_norm": 13.768902778625488, + "learning_rate": 9.89074920696692e-09, + "loss": 0.1386, + "num_input_tokens_seen": 239552192, + "step": 76115 + }, + { + "epoch": 4.872927469432175, + "grad_norm": 3.8860559463500977, + "learning_rate": 9.841166867390895e-09, + "loss": 0.0614, + "num_input_tokens_seen": 239566912, + "step": 76120 + }, + { + "epoch": 4.873247551373152, + "grad_norm": 8.422876358032227, + "learning_rate": 9.791708873375939e-09, + "loss": 0.125, + "num_input_tokens_seen": 239583040, + "step": 76125 + }, + { + "epoch": 4.873567633314129, + "grad_norm": 8.304262161254883, + "learning_rate": 9.742375227392297e-09, + "loss": 0.1232, + "num_input_tokens_seen": 239597440, + "step": 76130 + }, + { + "epoch": 4.873887715255106, + "grad_norm": 5.519636154174805, + "learning_rate": 9.693165931903004e-09, + "loss": 0.1181, + "num_input_tokens_seen": 239613056, + "step": 76135 + }, + { + "epoch": 4.874207797196082, + "grad_norm": 5.529992580413818, + "learning_rate": 9.644080989365534e-09, + "loss": 0.091, + "num_input_tokens_seen": 239628224, + "step": 76140 + }, + { + "epoch": 4.874527879137059, + "grad_norm": 5.9097442626953125, + "learning_rate": 9.595120402230707e-09, + "loss": 0.1661, + "num_input_tokens_seen": 239644160, + "step": 76145 + }, + { + "epoch": 4.874847961078036, + "grad_norm": 5.715277671813965, + "learning_rate": 9.546284172943787e-09, + "loss": 0.1528, + "num_input_tokens_seen": 239660352, + "step": 76150 + }, + { + "epoch": 4.875168043019013, + "grad_norm": 7.904169082641602, + "learning_rate": 9.497572303943103e-09, + "loss": 0.1373, + "num_input_tokens_seen": 239675840, + "step": 76155 + }, + { + "epoch": 4.87548812495999, + "grad_norm": 5.810630798339844, + "learning_rate": 9.448984797660876e-09, + "loss": 0.1207, + "num_input_tokens_seen": 239692928, + "step": 76160 + }, + { + "epoch": 4.875808206900967, + "grad_norm": 6.804194927215576, + "learning_rate": 9.400521656523775e-09, + "loss": 0.1428, + "num_input_tokens_seen": 239708288, + "step": 76165 + }, + { + "epoch": 4.876128288841944, + "grad_norm": 6.275364875793457, + "learning_rate": 9.352182882951255e-09, + "loss": 0.1252, + "num_input_tokens_seen": 239724416, + "step": 76170 + }, + { + "epoch": 4.876448370782921, + "grad_norm": 5.061872959136963, + "learning_rate": 9.303968479357772e-09, + "loss": 0.1028, + "num_input_tokens_seen": 239740800, + "step": 76175 + }, + { + "epoch": 4.876768452723898, + "grad_norm": 8.975785255432129, + "learning_rate": 9.25587844815029e-09, + "loss": 0.1897, + "num_input_tokens_seen": 239756096, + "step": 76180 + }, + { + "epoch": 4.877088534664875, + "grad_norm": 6.533121109008789, + "learning_rate": 9.207912791730777e-09, + "loss": 0.1047, + "num_input_tokens_seen": 239770752, + "step": 76185 + }, + { + "epoch": 4.877408616605851, + "grad_norm": 4.020623207092285, + "learning_rate": 9.160071512493706e-09, + "loss": 0.1175, + "num_input_tokens_seen": 239787072, + "step": 76190 + }, + { + "epoch": 4.877728698546828, + "grad_norm": 5.617883205413818, + "learning_rate": 9.112354612828277e-09, + "loss": 0.1365, + "num_input_tokens_seen": 239802752, + "step": 76195 + }, + { + "epoch": 4.878048780487805, + "grad_norm": 9.78384780883789, + "learning_rate": 9.064762095117585e-09, + "loss": 0.1338, + "num_input_tokens_seen": 239818048, + "step": 76200 + }, + { + "epoch": 4.878368862428782, + "grad_norm": 8.413976669311523, + "learning_rate": 9.017293961737783e-09, + "loss": 0.1664, + "num_input_tokens_seen": 239833984, + "step": 76205 + }, + { + "epoch": 4.878688944369759, + "grad_norm": 6.997419357299805, + "learning_rate": 8.969950215059198e-09, + "loss": 0.1539, + "num_input_tokens_seen": 239849344, + "step": 76210 + }, + { + "epoch": 4.879009026310736, + "grad_norm": 5.348503112792969, + "learning_rate": 8.922730857445771e-09, + "loss": 0.1103, + "num_input_tokens_seen": 239863936, + "step": 76215 + }, + { + "epoch": 4.879329108251713, + "grad_norm": 7.440668106079102, + "learning_rate": 8.875635891255896e-09, + "loss": 0.1669, + "num_input_tokens_seen": 239880192, + "step": 76220 + }, + { + "epoch": 4.879649190192689, + "grad_norm": 8.048218727111816, + "learning_rate": 8.828665318841024e-09, + "loss": 0.1698, + "num_input_tokens_seen": 239896768, + "step": 76225 + }, + { + "epoch": 4.879969272133666, + "grad_norm": 8.066920280456543, + "learning_rate": 8.781819142546499e-09, + "loss": 0.1383, + "num_input_tokens_seen": 239912576, + "step": 76230 + }, + { + "epoch": 4.880289354074643, + "grad_norm": 10.645369529724121, + "learning_rate": 8.735097364711565e-09, + "loss": 0.1702, + "num_input_tokens_seen": 239930176, + "step": 76235 + }, + { + "epoch": 4.88060943601562, + "grad_norm": 7.31517219543457, + "learning_rate": 8.688499987669351e-09, + "loss": 0.1631, + "num_input_tokens_seen": 239946752, + "step": 76240 + }, + { + "epoch": 4.880929517956597, + "grad_norm": 5.124349117279053, + "learning_rate": 8.642027013746889e-09, + "loss": 0.1363, + "num_input_tokens_seen": 239962496, + "step": 76245 + }, + { + "epoch": 4.881249599897574, + "grad_norm": 9.43193531036377, + "learning_rate": 8.595678445264544e-09, + "loss": 0.1232, + "num_input_tokens_seen": 239978496, + "step": 76250 + }, + { + "epoch": 4.881569681838551, + "grad_norm": 11.135734558105469, + "learning_rate": 8.549454284536573e-09, + "loss": 0.1478, + "num_input_tokens_seen": 239993536, + "step": 76255 + }, + { + "epoch": 4.881889763779528, + "grad_norm": 6.878116607666016, + "learning_rate": 8.50335453387141e-09, + "loss": 0.166, + "num_input_tokens_seen": 240009152, + "step": 76260 + }, + { + "epoch": 4.882209845720505, + "grad_norm": 7.551279067993164, + "learning_rate": 8.457379195571102e-09, + "loss": 0.1519, + "num_input_tokens_seen": 240024832, + "step": 76265 + }, + { + "epoch": 4.882529927661482, + "grad_norm": 9.284730911254883, + "learning_rate": 8.411528271931313e-09, + "loss": 0.1361, + "num_input_tokens_seen": 240040320, + "step": 76270 + }, + { + "epoch": 4.882850009602458, + "grad_norm": 65.11074829101562, + "learning_rate": 8.365801765241321e-09, + "loss": 0.1749, + "num_input_tokens_seen": 240055424, + "step": 76275 + }, + { + "epoch": 4.883170091543435, + "grad_norm": 20.165498733520508, + "learning_rate": 8.320199677784857e-09, + "loss": 0.2047, + "num_input_tokens_seen": 240071872, + "step": 76280 + }, + { + "epoch": 4.883490173484412, + "grad_norm": 4.670200824737549, + "learning_rate": 8.274722011838986e-09, + "loss": 0.0991, + "num_input_tokens_seen": 240087872, + "step": 76285 + }, + { + "epoch": 4.883810255425389, + "grad_norm": 3.9187071323394775, + "learning_rate": 8.229368769674673e-09, + "loss": 0.0973, + "num_input_tokens_seen": 240102912, + "step": 76290 + }, + { + "epoch": 4.884130337366366, + "grad_norm": 9.632052421569824, + "learning_rate": 8.184139953556491e-09, + "loss": 0.1305, + "num_input_tokens_seen": 240118592, + "step": 76295 + }, + { + "epoch": 4.884450419307343, + "grad_norm": 6.5244975090026855, + "learning_rate": 8.139035565742915e-09, + "loss": 0.1303, + "num_input_tokens_seen": 240134464, + "step": 76300 + }, + { + "epoch": 4.88477050124832, + "grad_norm": 10.38232707977295, + "learning_rate": 8.094055608486028e-09, + "loss": 0.1544, + "num_input_tokens_seen": 240150720, + "step": 76305 + }, + { + "epoch": 4.885090583189297, + "grad_norm": 7.28853178024292, + "learning_rate": 8.049200084032649e-09, + "loss": 0.1457, + "num_input_tokens_seen": 240166272, + "step": 76310 + }, + { + "epoch": 4.885410665130273, + "grad_norm": 6.321457862854004, + "learning_rate": 8.004468994621816e-09, + "loss": 0.1214, + "num_input_tokens_seen": 240181056, + "step": 76315 + }, + { + "epoch": 4.88573074707125, + "grad_norm": 4.902372360229492, + "learning_rate": 7.959862342487579e-09, + "loss": 0.1339, + "num_input_tokens_seen": 240196160, + "step": 76320 + }, + { + "epoch": 4.886050829012227, + "grad_norm": 5.481544494628906, + "learning_rate": 7.915380129857042e-09, + "loss": 0.1292, + "num_input_tokens_seen": 240211520, + "step": 76325 + }, + { + "epoch": 4.886370910953204, + "grad_norm": 5.928019046783447, + "learning_rate": 7.871022358951763e-09, + "loss": 0.1346, + "num_input_tokens_seen": 240227776, + "step": 76330 + }, + { + "epoch": 4.886690992894181, + "grad_norm": 6.191487789154053, + "learning_rate": 7.826789031986638e-09, + "loss": 0.1496, + "num_input_tokens_seen": 240243072, + "step": 76335 + }, + { + "epoch": 4.887011074835158, + "grad_norm": 8.255571365356445, + "learning_rate": 7.782680151170175e-09, + "loss": 0.1494, + "num_input_tokens_seen": 240258624, + "step": 76340 + }, + { + "epoch": 4.887331156776135, + "grad_norm": 5.653181076049805, + "learning_rate": 7.738695718705614e-09, + "loss": 0.1253, + "num_input_tokens_seen": 240274496, + "step": 76345 + }, + { + "epoch": 4.887651238717112, + "grad_norm": 5.969869136810303, + "learning_rate": 7.694835736788697e-09, + "loss": 0.137, + "num_input_tokens_seen": 240289536, + "step": 76350 + }, + { + "epoch": 4.887971320658089, + "grad_norm": 5.233370304107666, + "learning_rate": 7.651100207609619e-09, + "loss": 0.1308, + "num_input_tokens_seen": 240304704, + "step": 76355 + }, + { + "epoch": 4.888291402599066, + "grad_norm": 5.617459774017334, + "learning_rate": 7.607489133352464e-09, + "loss": 0.1412, + "num_input_tokens_seen": 240320256, + "step": 76360 + }, + { + "epoch": 4.888611484540042, + "grad_norm": 6.804502964019775, + "learning_rate": 7.564002516194936e-09, + "loss": 0.1534, + "num_input_tokens_seen": 240337536, + "step": 76365 + }, + { + "epoch": 4.888931566481019, + "grad_norm": 6.304695129394531, + "learning_rate": 7.520640358308351e-09, + "loss": 0.1367, + "num_input_tokens_seen": 240353664, + "step": 76370 + }, + { + "epoch": 4.889251648421996, + "grad_norm": 7.9192657470703125, + "learning_rate": 7.477402661858201e-09, + "loss": 0.1344, + "num_input_tokens_seen": 240368896, + "step": 76375 + }, + { + "epoch": 4.889571730362973, + "grad_norm": 0.4637592136859894, + "learning_rate": 7.434289429003872e-09, + "loss": 0.1102, + "num_input_tokens_seen": 240384704, + "step": 76380 + }, + { + "epoch": 4.88989181230395, + "grad_norm": 6.299748420715332, + "learning_rate": 7.391300661897249e-09, + "loss": 0.1189, + "num_input_tokens_seen": 240400128, + "step": 76385 + }, + { + "epoch": 4.890211894244927, + "grad_norm": 11.248077392578125, + "learning_rate": 7.348436362685785e-09, + "loss": 0.1221, + "num_input_tokens_seen": 240415552, + "step": 76390 + }, + { + "epoch": 4.890531976185904, + "grad_norm": 7.773911952972412, + "learning_rate": 7.30569653350971e-09, + "loss": 0.1167, + "num_input_tokens_seen": 240430848, + "step": 76395 + }, + { + "epoch": 4.890852058126881, + "grad_norm": 6.010772705078125, + "learning_rate": 7.26308117650315e-09, + "loss": 0.132, + "num_input_tokens_seen": 240447424, + "step": 76400 + }, + { + "epoch": 4.891172140067857, + "grad_norm": 6.466130256652832, + "learning_rate": 7.220590293794405e-09, + "loss": 0.1441, + "num_input_tokens_seen": 240463360, + "step": 76405 + }, + { + "epoch": 4.891492222008834, + "grad_norm": 12.487627983093262, + "learning_rate": 7.178223887504554e-09, + "loss": 0.1452, + "num_input_tokens_seen": 240478144, + "step": 76410 + }, + { + "epoch": 4.891812303949811, + "grad_norm": 5.888126850128174, + "learning_rate": 7.135981959749683e-09, + "loss": 0.1284, + "num_input_tokens_seen": 240494016, + "step": 76415 + }, + { + "epoch": 4.892132385890788, + "grad_norm": 8.59408950805664, + "learning_rate": 7.093864512638937e-09, + "loss": 0.1206, + "num_input_tokens_seen": 240509184, + "step": 76420 + }, + { + "epoch": 4.892452467831765, + "grad_norm": 7.7405195236206055, + "learning_rate": 7.051871548275635e-09, + "loss": 0.1024, + "num_input_tokens_seen": 240524224, + "step": 76425 + }, + { + "epoch": 4.892772549772742, + "grad_norm": 6.249320030212402, + "learning_rate": 7.01000306875671e-09, + "loss": 0.152, + "num_input_tokens_seen": 240539328, + "step": 76430 + }, + { + "epoch": 4.893092631713719, + "grad_norm": 6.575611114501953, + "learning_rate": 6.9682590761724344e-09, + "loss": 0.1137, + "num_input_tokens_seen": 240553920, + "step": 76435 + }, + { + "epoch": 4.893412713654696, + "grad_norm": 7.808593273162842, + "learning_rate": 6.926639572607807e-09, + "loss": 0.1477, + "num_input_tokens_seen": 240570688, + "step": 76440 + }, + { + "epoch": 4.893732795595673, + "grad_norm": 5.867847919464111, + "learning_rate": 6.8851445601408884e-09, + "loss": 0.1921, + "num_input_tokens_seen": 240587648, + "step": 76445 + }, + { + "epoch": 4.89405287753665, + "grad_norm": 11.365434646606445, + "learning_rate": 6.843774040843354e-09, + "loss": 0.1596, + "num_input_tokens_seen": 240604224, + "step": 76450 + }, + { + "epoch": 4.894372959477626, + "grad_norm": 6.575910568237305, + "learning_rate": 6.802528016781607e-09, + "loss": 0.1189, + "num_input_tokens_seen": 240621120, + "step": 76455 + }, + { + "epoch": 4.894693041418603, + "grad_norm": 7.5914177894592285, + "learning_rate": 6.76140649001511e-09, + "loss": 0.1566, + "num_input_tokens_seen": 240637888, + "step": 76460 + }, + { + "epoch": 4.89501312335958, + "grad_norm": 8.890203475952148, + "learning_rate": 6.720409462596944e-09, + "loss": 0.1291, + "num_input_tokens_seen": 240653248, + "step": 76465 + }, + { + "epoch": 4.895333205300557, + "grad_norm": 10.14054012298584, + "learning_rate": 6.6795369365746375e-09, + "loss": 0.1721, + "num_input_tokens_seen": 240669056, + "step": 76470 + }, + { + "epoch": 4.895653287241534, + "grad_norm": 7.583086013793945, + "learning_rate": 6.638788913989058e-09, + "loss": 0.1247, + "num_input_tokens_seen": 240685376, + "step": 76475 + }, + { + "epoch": 4.895973369182511, + "grad_norm": 3.8073930740356445, + "learning_rate": 6.5981653968749674e-09, + "loss": 0.1372, + "num_input_tokens_seen": 240700224, + "step": 76480 + }, + { + "epoch": 4.896293451123488, + "grad_norm": 6.669774532318115, + "learning_rate": 6.557666387260464e-09, + "loss": 0.1509, + "num_input_tokens_seen": 240715712, + "step": 76485 + }, + { + "epoch": 4.896613533064464, + "grad_norm": 10.453267097473145, + "learning_rate": 6.517291887168653e-09, + "loss": 0.1406, + "num_input_tokens_seen": 240732800, + "step": 76490 + }, + { + "epoch": 4.896933615005441, + "grad_norm": 8.908679962158203, + "learning_rate": 6.477041898614866e-09, + "loss": 0.1357, + "num_input_tokens_seen": 240748544, + "step": 76495 + }, + { + "epoch": 4.897253696946418, + "grad_norm": 6.126983165740967, + "learning_rate": 6.436916423609718e-09, + "loss": 0.0972, + "num_input_tokens_seen": 240763392, + "step": 76500 + }, + { + "epoch": 4.897573778887395, + "grad_norm": 6.810024738311768, + "learning_rate": 6.39691546415605e-09, + "loss": 0.1498, + "num_input_tokens_seen": 240781120, + "step": 76505 + }, + { + "epoch": 4.897893860828372, + "grad_norm": 8.70777416229248, + "learning_rate": 6.357039022251988e-09, + "loss": 0.1136, + "num_input_tokens_seen": 240796480, + "step": 76510 + }, + { + "epoch": 4.898213942769349, + "grad_norm": 7.036071300506592, + "learning_rate": 6.317287099888436e-09, + "loss": 0.1309, + "num_input_tokens_seen": 240811328, + "step": 76515 + }, + { + "epoch": 4.898534024710326, + "grad_norm": 8.40705394744873, + "learning_rate": 6.277659699050476e-09, + "loss": 0.1471, + "num_input_tokens_seen": 240826944, + "step": 76520 + }, + { + "epoch": 4.898854106651303, + "grad_norm": 7.483603000640869, + "learning_rate": 6.238156821716801e-09, + "loss": 0.1394, + "num_input_tokens_seen": 240841856, + "step": 76525 + }, + { + "epoch": 4.89917418859228, + "grad_norm": 10.708263397216797, + "learning_rate": 6.1987784698600005e-09, + "loss": 0.1103, + "num_input_tokens_seen": 240857472, + "step": 76530 + }, + { + "epoch": 4.899494270533257, + "grad_norm": 8.360173225402832, + "learning_rate": 6.159524645446558e-09, + "loss": 0.1552, + "num_input_tokens_seen": 240872768, + "step": 76535 + }, + { + "epoch": 4.899814352474233, + "grad_norm": 21.400293350219727, + "learning_rate": 6.12039535043657e-09, + "loss": 0.1359, + "num_input_tokens_seen": 240887424, + "step": 76540 + }, + { + "epoch": 4.90013443441521, + "grad_norm": 3.730990409851074, + "learning_rate": 6.081390586783753e-09, + "loss": 0.1248, + "num_input_tokens_seen": 240903936, + "step": 76545 + }, + { + "epoch": 4.900454516356187, + "grad_norm": 4.530307769775391, + "learning_rate": 6.042510356435993e-09, + "loss": 0.1086, + "num_input_tokens_seen": 240919680, + "step": 76550 + }, + { + "epoch": 4.900774598297164, + "grad_norm": 4.798343181610107, + "learning_rate": 6.003754661334793e-09, + "loss": 0.0986, + "num_input_tokens_seen": 240934784, + "step": 76555 + }, + { + "epoch": 4.901094680238141, + "grad_norm": 4.894050598144531, + "learning_rate": 5.965123503415271e-09, + "loss": 0.1642, + "num_input_tokens_seen": 240950656, + "step": 76560 + }, + { + "epoch": 4.901414762179118, + "grad_norm": 8.002725601196289, + "learning_rate": 5.9266168846064395e-09, + "loss": 0.1192, + "num_input_tokens_seen": 240966016, + "step": 76565 + }, + { + "epoch": 4.901734844120095, + "grad_norm": 4.535548686981201, + "learning_rate": 5.888234806831206e-09, + "loss": 0.1047, + "num_input_tokens_seen": 240981888, + "step": 76570 + }, + { + "epoch": 4.902054926061072, + "grad_norm": 6.699045658111572, + "learning_rate": 5.849977272006369e-09, + "loss": 0.097, + "num_input_tokens_seen": 240997248, + "step": 76575 + }, + { + "epoch": 4.902375008002048, + "grad_norm": 9.356484413146973, + "learning_rate": 5.811844282042067e-09, + "loss": 0.1327, + "num_input_tokens_seen": 241011904, + "step": 76580 + }, + { + "epoch": 4.902695089943025, + "grad_norm": 9.148504257202148, + "learning_rate": 5.773835838842334e-09, + "loss": 0.1364, + "num_input_tokens_seen": 241026944, + "step": 76585 + }, + { + "epoch": 4.903015171884002, + "grad_norm": 9.34339427947998, + "learning_rate": 5.735951944305373e-09, + "loss": 0.1266, + "num_input_tokens_seen": 241043200, + "step": 76590 + }, + { + "epoch": 4.903335253824979, + "grad_norm": 4.469866752624512, + "learning_rate": 5.698192600323005e-09, + "loss": 0.0909, + "num_input_tokens_seen": 241071744, + "step": 76595 + }, + { + "epoch": 4.903655335765956, + "grad_norm": 5.880859851837158, + "learning_rate": 5.660557808780387e-09, + "loss": 0.1238, + "num_input_tokens_seen": 241087296, + "step": 76600 + }, + { + "epoch": 4.903975417706933, + "grad_norm": 8.82970142364502, + "learning_rate": 5.62304757155685e-09, + "loss": 0.0983, + "num_input_tokens_seen": 241103104, + "step": 76605 + }, + { + "epoch": 4.90429549964791, + "grad_norm": 6.2620649337768555, + "learning_rate": 5.585661890525895e-09, + "loss": 0.1001, + "num_input_tokens_seen": 241117376, + "step": 76610 + }, + { + "epoch": 4.904615581588887, + "grad_norm": 7.215878009796143, + "learning_rate": 5.548400767553808e-09, + "loss": 0.1411, + "num_input_tokens_seen": 241132480, + "step": 76615 + }, + { + "epoch": 4.904935663529864, + "grad_norm": 8.314034461975098, + "learning_rate": 5.511264204501321e-09, + "loss": 0.1447, + "num_input_tokens_seen": 241148416, + "step": 76620 + }, + { + "epoch": 4.905255745470841, + "grad_norm": 5.281721115112305, + "learning_rate": 5.47425220322334e-09, + "loss": 0.1086, + "num_input_tokens_seen": 241165824, + "step": 76625 + }, + { + "epoch": 4.905575827411817, + "grad_norm": 7.0864787101745605, + "learning_rate": 5.437364765567555e-09, + "loss": 0.1246, + "num_input_tokens_seen": 241180416, + "step": 76630 + }, + { + "epoch": 4.905895909352794, + "grad_norm": 12.001233100891113, + "learning_rate": 5.400601893376101e-09, + "loss": 0.1738, + "num_input_tokens_seen": 241197248, + "step": 76635 + }, + { + "epoch": 4.906215991293771, + "grad_norm": 8.003586769104004, + "learning_rate": 5.363963588484456e-09, + "loss": 0.1298, + "num_input_tokens_seen": 241213056, + "step": 76640 + }, + { + "epoch": 4.906536073234748, + "grad_norm": 9.500752449035645, + "learning_rate": 5.327449852722821e-09, + "loss": 0.1165, + "num_input_tokens_seen": 241228672, + "step": 76645 + }, + { + "epoch": 4.906856155175725, + "grad_norm": 9.27523422241211, + "learning_rate": 5.291060687913629e-09, + "loss": 0.1659, + "num_input_tokens_seen": 241243712, + "step": 76650 + }, + { + "epoch": 4.907176237116702, + "grad_norm": 6.513916015625, + "learning_rate": 5.254796095874592e-09, + "loss": 0.1274, + "num_input_tokens_seen": 241259456, + "step": 76655 + }, + { + "epoch": 4.907496319057679, + "grad_norm": 27.025747299194336, + "learning_rate": 5.2186560784164845e-09, + "loss": 0.2301, + "num_input_tokens_seen": 241275264, + "step": 76660 + }, + { + "epoch": 4.907816400998656, + "grad_norm": 5.461848735809326, + "learning_rate": 5.1826406373436945e-09, + "loss": 0.1328, + "num_input_tokens_seen": 241291840, + "step": 76665 + }, + { + "epoch": 4.908136482939632, + "grad_norm": 6.3350419998168945, + "learning_rate": 5.146749774455062e-09, + "loss": 0.2093, + "num_input_tokens_seen": 241307520, + "step": 76670 + }, + { + "epoch": 4.908456564880609, + "grad_norm": 6.321559429168701, + "learning_rate": 5.11098349154221e-09, + "loss": 0.1466, + "num_input_tokens_seen": 241323264, + "step": 76675 + }, + { + "epoch": 4.908776646821586, + "grad_norm": 7.0284833908081055, + "learning_rate": 5.075341790391486e-09, + "loss": 0.1059, + "num_input_tokens_seen": 241338880, + "step": 76680 + }, + { + "epoch": 4.909096728762563, + "grad_norm": 6.469413757324219, + "learning_rate": 5.0398246727825785e-09, + "loss": 0.141, + "num_input_tokens_seen": 241355200, + "step": 76685 + }, + { + "epoch": 4.90941681070354, + "grad_norm": 8.047957420349121, + "learning_rate": 5.004432140489346e-09, + "loss": 0.1335, + "num_input_tokens_seen": 241370240, + "step": 76690 + }, + { + "epoch": 4.909736892644517, + "grad_norm": 58.040401458740234, + "learning_rate": 4.969164195278431e-09, + "loss": 0.1502, + "num_input_tokens_seen": 241385728, + "step": 76695 + }, + { + "epoch": 4.910056974585494, + "grad_norm": 2.7322096824645996, + "learning_rate": 4.934020838911757e-09, + "loss": 0.1209, + "num_input_tokens_seen": 241401472, + "step": 76700 + }, + { + "epoch": 4.910377056526471, + "grad_norm": 8.741145133972168, + "learning_rate": 4.8990020731434775e-09, + "loss": 0.1353, + "num_input_tokens_seen": 241417792, + "step": 76705 + }, + { + "epoch": 4.910697138467448, + "grad_norm": 8.763684272766113, + "learning_rate": 4.86410789972247e-09, + "loss": 0.1508, + "num_input_tokens_seen": 241435840, + "step": 76710 + }, + { + "epoch": 4.911017220408425, + "grad_norm": 3.2191288471221924, + "learning_rate": 4.8293383203912305e-09, + "loss": 0.1295, + "num_input_tokens_seen": 241451904, + "step": 76715 + }, + { + "epoch": 4.911337302349401, + "grad_norm": 6.655338764190674, + "learning_rate": 4.794693336886147e-09, + "loss": 0.1311, + "num_input_tokens_seen": 241467136, + "step": 76720 + }, + { + "epoch": 4.911657384290378, + "grad_norm": 7.313230037689209, + "learning_rate": 4.760172950936947e-09, + "loss": 0.1043, + "num_input_tokens_seen": 241482432, + "step": 76725 + }, + { + "epoch": 4.911977466231355, + "grad_norm": 9.160311698913574, + "learning_rate": 4.725777164267531e-09, + "loss": 0.189, + "num_input_tokens_seen": 241497728, + "step": 76730 + }, + { + "epoch": 4.912297548172332, + "grad_norm": 7.0247602462768555, + "learning_rate": 4.691505978595135e-09, + "loss": 0.125, + "num_input_tokens_seen": 241513024, + "step": 76735 + }, + { + "epoch": 4.912617630113309, + "grad_norm": 4.085752010345459, + "learning_rate": 4.657359395631722e-09, + "loss": 0.1549, + "num_input_tokens_seen": 241528384, + "step": 76740 + }, + { + "epoch": 4.912937712054286, + "grad_norm": 7.210933685302734, + "learning_rate": 4.623337417081764e-09, + "loss": 0.1177, + "num_input_tokens_seen": 241543360, + "step": 76745 + }, + { + "epoch": 4.913257793995263, + "grad_norm": 7.14440393447876, + "learning_rate": 4.589440044644456e-09, + "loss": 0.1099, + "num_input_tokens_seen": 241559744, + "step": 76750 + }, + { + "epoch": 4.913577875936239, + "grad_norm": 6.114537715911865, + "learning_rate": 4.555667280012332e-09, + "loss": 0.1189, + "num_input_tokens_seen": 241576448, + "step": 76755 + }, + { + "epoch": 4.913897957877216, + "grad_norm": 9.615031242370605, + "learning_rate": 4.522019124871824e-09, + "loss": 0.1318, + "num_input_tokens_seen": 241591744, + "step": 76760 + }, + { + "epoch": 4.914218039818193, + "grad_norm": 8.053332328796387, + "learning_rate": 4.488495580903529e-09, + "loss": 0.1418, + "num_input_tokens_seen": 241606976, + "step": 76765 + }, + { + "epoch": 4.91453812175917, + "grad_norm": 6.701362133026123, + "learning_rate": 4.455096649780832e-09, + "loss": 0.1476, + "num_input_tokens_seen": 241624064, + "step": 76770 + }, + { + "epoch": 4.914858203700147, + "grad_norm": 5.627013683319092, + "learning_rate": 4.421822333171844e-09, + "loss": 0.1563, + "num_input_tokens_seen": 241639552, + "step": 76775 + }, + { + "epoch": 4.915178285641124, + "grad_norm": 5.32188606262207, + "learning_rate": 4.388672632738289e-09, + "loss": 0.1066, + "num_input_tokens_seen": 241654848, + "step": 76780 + }, + { + "epoch": 4.915498367582101, + "grad_norm": 10.113202095031738, + "learning_rate": 4.355647550134956e-09, + "loss": 0.1624, + "num_input_tokens_seen": 241670080, + "step": 76785 + }, + { + "epoch": 4.915818449523078, + "grad_norm": 4.666036128997803, + "learning_rate": 4.322747087011358e-09, + "loss": 0.1416, + "num_input_tokens_seen": 241685312, + "step": 76790 + }, + { + "epoch": 4.916138531464055, + "grad_norm": 4.577199459075928, + "learning_rate": 4.2899712450103496e-09, + "loss": 0.1207, + "num_input_tokens_seen": 241699968, + "step": 76795 + }, + { + "epoch": 4.916458613405032, + "grad_norm": 7.8286356925964355, + "learning_rate": 4.2573200257683966e-09, + "loss": 0.1647, + "num_input_tokens_seen": 241715776, + "step": 76800 + }, + { + "epoch": 4.916778695346008, + "grad_norm": 7.2151689529418945, + "learning_rate": 4.224793430915863e-09, + "loss": 0.1806, + "num_input_tokens_seen": 241729792, + "step": 76805 + }, + { + "epoch": 4.917098777286985, + "grad_norm": 5.91048526763916, + "learning_rate": 4.192391462077283e-09, + "loss": 0.1019, + "num_input_tokens_seen": 241744704, + "step": 76810 + }, + { + "epoch": 4.917418859227962, + "grad_norm": 7.6244964599609375, + "learning_rate": 4.160114120870528e-09, + "loss": 0.125, + "num_input_tokens_seen": 241760512, + "step": 76815 + }, + { + "epoch": 4.917738941168939, + "grad_norm": 5.616939067840576, + "learning_rate": 4.127961408907366e-09, + "loss": 0.1156, + "num_input_tokens_seen": 241776512, + "step": 76820 + }, + { + "epoch": 4.918059023109916, + "grad_norm": 6.565927505493164, + "learning_rate": 4.095933327793178e-09, + "loss": 0.1726, + "num_input_tokens_seen": 241791360, + "step": 76825 + }, + { + "epoch": 4.918379105050893, + "grad_norm": 8.703240394592285, + "learning_rate": 4.064029879127518e-09, + "loss": 0.1392, + "num_input_tokens_seen": 241806272, + "step": 76830 + }, + { + "epoch": 4.91869918699187, + "grad_norm": 6.234724521636963, + "learning_rate": 4.0322510645032786e-09, + "loss": 0.1218, + "num_input_tokens_seen": 241823040, + "step": 76835 + }, + { + "epoch": 4.919019268932847, + "grad_norm": 7.224242687225342, + "learning_rate": 4.000596885507524e-09, + "loss": 0.098, + "num_input_tokens_seen": 241837824, + "step": 76840 + }, + { + "epoch": 4.919339350873823, + "grad_norm": 8.19570255279541, + "learning_rate": 3.969067343720379e-09, + "loss": 0.108, + "num_input_tokens_seen": 241853120, + "step": 76845 + }, + { + "epoch": 4.9196594328148, + "grad_norm": 4.190908432006836, + "learning_rate": 3.9376624407169716e-09, + "loss": 0.0886, + "num_input_tokens_seen": 241868928, + "step": 76850 + }, + { + "epoch": 4.919979514755777, + "grad_norm": 6.496078968048096, + "learning_rate": 3.9063821780652156e-09, + "loss": 0.1296, + "num_input_tokens_seen": 241884544, + "step": 76855 + }, + { + "epoch": 4.920299596696754, + "grad_norm": 6.535991668701172, + "learning_rate": 3.875226557326916e-09, + "loss": 0.1034, + "num_input_tokens_seen": 241899392, + "step": 76860 + }, + { + "epoch": 4.920619678637731, + "grad_norm": 6.682768821716309, + "learning_rate": 3.8441955800580525e-09, + "loss": 0.1351, + "num_input_tokens_seen": 241915072, + "step": 76865 + }, + { + "epoch": 4.920939760578708, + "grad_norm": 12.78032398223877, + "learning_rate": 3.813289247807939e-09, + "loss": 0.1709, + "num_input_tokens_seen": 241931840, + "step": 76870 + }, + { + "epoch": 4.921259842519685, + "grad_norm": 6.588595390319824, + "learning_rate": 3.782507562120341e-09, + "loss": 0.1152, + "num_input_tokens_seen": 241948224, + "step": 76875 + }, + { + "epoch": 4.921579924460662, + "grad_norm": 10.240798950195312, + "learning_rate": 3.751850524531531e-09, + "loss": 0.114, + "num_input_tokens_seen": 241963264, + "step": 76880 + }, + { + "epoch": 4.921900006401639, + "grad_norm": 6.473204612731934, + "learning_rate": 3.72131813657306e-09, + "loss": 0.1146, + "num_input_tokens_seen": 241979264, + "step": 76885 + }, + { + "epoch": 4.922220088342616, + "grad_norm": 13.968958854675293, + "learning_rate": 3.690910399769265e-09, + "loss": 0.1478, + "num_input_tokens_seen": 241994624, + "step": 76890 + }, + { + "epoch": 4.922540170283592, + "grad_norm": 6.826053142547607, + "learning_rate": 3.660627315638654e-09, + "loss": 0.1463, + "num_input_tokens_seen": 242010368, + "step": 76895 + }, + { + "epoch": 4.922860252224569, + "grad_norm": 4.532285690307617, + "learning_rate": 3.63046888569335e-09, + "loss": 0.1097, + "num_input_tokens_seen": 242024960, + "step": 76900 + }, + { + "epoch": 4.923180334165546, + "grad_norm": 11.163296699523926, + "learning_rate": 3.600435111439371e-09, + "loss": 0.1899, + "num_input_tokens_seen": 242041152, + "step": 76905 + }, + { + "epoch": 4.923500416106523, + "grad_norm": 6.800040245056152, + "learning_rate": 3.57052599437635e-09, + "loss": 0.1325, + "num_input_tokens_seen": 242057024, + "step": 76910 + }, + { + "epoch": 4.9238204980475, + "grad_norm": 7.298641204833984, + "learning_rate": 3.540741535997816e-09, + "loss": 0.1525, + "num_input_tokens_seen": 242071680, + "step": 76915 + }, + { + "epoch": 4.924140579988477, + "grad_norm": 7.648929595947266, + "learning_rate": 3.5110817377909113e-09, + "loss": 0.1029, + "num_input_tokens_seen": 242086400, + "step": 76920 + }, + { + "epoch": 4.924460661929454, + "grad_norm": 5.769326210021973, + "learning_rate": 3.481546601237229e-09, + "loss": 0.1437, + "num_input_tokens_seen": 242103360, + "step": 76925 + }, + { + "epoch": 4.924780743870431, + "grad_norm": 5.944430351257324, + "learning_rate": 3.4521361278108677e-09, + "loss": 0.1526, + "num_input_tokens_seen": 242118720, + "step": 76930 + }, + { + "epoch": 4.925100825811407, + "grad_norm": 7.977330684661865, + "learning_rate": 3.4228503189809302e-09, + "loss": 0.1564, + "num_input_tokens_seen": 242136320, + "step": 76935 + }, + { + "epoch": 4.925420907752384, + "grad_norm": 7.3111419677734375, + "learning_rate": 3.3936891762095803e-09, + "loss": 0.1275, + "num_input_tokens_seen": 242154048, + "step": 76940 + }, + { + "epoch": 4.925740989693361, + "grad_norm": 6.927733898162842, + "learning_rate": 3.3646527009531528e-09, + "loss": 0.1692, + "num_input_tokens_seen": 242170560, + "step": 76945 + }, + { + "epoch": 4.926061071634338, + "grad_norm": 6.998211860656738, + "learning_rate": 3.335740894661321e-09, + "loss": 0.1088, + "num_input_tokens_seen": 242185472, + "step": 76950 + }, + { + "epoch": 4.926381153575315, + "grad_norm": 8.139201164245605, + "learning_rate": 3.3069537587782086e-09, + "loss": 0.1553, + "num_input_tokens_seen": 242201792, + "step": 76955 + }, + { + "epoch": 4.926701235516292, + "grad_norm": 8.830289840698242, + "learning_rate": 3.278291294740721e-09, + "loss": 0.125, + "num_input_tokens_seen": 242215872, + "step": 76960 + }, + { + "epoch": 4.927021317457269, + "grad_norm": 6.052250862121582, + "learning_rate": 3.2497535039804905e-09, + "loss": 0.1315, + "num_input_tokens_seen": 242231232, + "step": 76965 + }, + { + "epoch": 4.927341399398246, + "grad_norm": 5.849827289581299, + "learning_rate": 3.221340387922489e-09, + "loss": 0.0974, + "num_input_tokens_seen": 242246528, + "step": 76970 + }, + { + "epoch": 4.927661481339223, + "grad_norm": 3.1556789875030518, + "learning_rate": 3.1930519479855816e-09, + "loss": 0.1575, + "num_input_tokens_seen": 242261824, + "step": 76975 + }, + { + "epoch": 4.9279815632802, + "grad_norm": 5.920756816864014, + "learning_rate": 3.164888185582249e-09, + "loss": 0.1637, + "num_input_tokens_seen": 242278592, + "step": 76980 + }, + { + "epoch": 4.928301645221176, + "grad_norm": 7.495852470397949, + "learning_rate": 3.1368491021188663e-09, + "loss": 0.1099, + "num_input_tokens_seen": 242294272, + "step": 76985 + }, + { + "epoch": 4.928621727162153, + "grad_norm": 9.401968002319336, + "learning_rate": 3.108934698995425e-09, + "loss": 0.1196, + "num_input_tokens_seen": 242309248, + "step": 76990 + }, + { + "epoch": 4.92894180910313, + "grad_norm": 6.884219169616699, + "learning_rate": 3.0811449776060874e-09, + "loss": 0.1361, + "num_input_tokens_seen": 242324608, + "step": 76995 + }, + { + "epoch": 4.929261891044107, + "grad_norm": 5.7066450119018555, + "learning_rate": 3.053479939338355e-09, + "loss": 0.0993, + "num_input_tokens_seen": 242340288, + "step": 77000 + }, + { + "epoch": 4.929581972985084, + "grad_norm": 8.051702499389648, + "learning_rate": 3.0259395855739005e-09, + "loss": 0.1388, + "num_input_tokens_seen": 242356288, + "step": 77005 + }, + { + "epoch": 4.929902054926061, + "grad_norm": 14.495574951171875, + "learning_rate": 2.998523917687457e-09, + "loss": 0.1588, + "num_input_tokens_seen": 242372160, + "step": 77010 + }, + { + "epoch": 4.930222136867038, + "grad_norm": 11.310275077819824, + "learning_rate": 2.971232937048485e-09, + "loss": 0.1585, + "num_input_tokens_seen": 242387712, + "step": 77015 + }, + { + "epoch": 4.930542218808014, + "grad_norm": 9.98112678527832, + "learning_rate": 2.9440666450195053e-09, + "loss": 0.128, + "num_input_tokens_seen": 242402624, + "step": 77020 + }, + { + "epoch": 4.930862300748991, + "grad_norm": 5.108853340148926, + "learning_rate": 2.9170250429572113e-09, + "loss": 0.1348, + "num_input_tokens_seen": 242419776, + "step": 77025 + }, + { + "epoch": 4.931182382689968, + "grad_norm": 6.126262664794922, + "learning_rate": 2.8901081322119105e-09, + "loss": 0.1029, + "num_input_tokens_seen": 242435840, + "step": 77030 + }, + { + "epoch": 4.931502464630945, + "grad_norm": 8.728769302368164, + "learning_rate": 2.8633159141275293e-09, + "loss": 0.1312, + "num_input_tokens_seen": 242451520, + "step": 77035 + }, + { + "epoch": 4.931822546571922, + "grad_norm": 6.099876880645752, + "learning_rate": 2.836648390042163e-09, + "loss": 0.1471, + "num_input_tokens_seen": 242467840, + "step": 77040 + }, + { + "epoch": 4.932142628512899, + "grad_norm": 5.729235649108887, + "learning_rate": 2.8101055612872465e-09, + "loss": 0.1564, + "num_input_tokens_seen": 242483840, + "step": 77045 + }, + { + "epoch": 4.932462710453876, + "grad_norm": 5.161963939666748, + "learning_rate": 2.783687429188109e-09, + "loss": 0.1553, + "num_input_tokens_seen": 242501184, + "step": 77050 + }, + { + "epoch": 4.932782792394853, + "grad_norm": 8.329567909240723, + "learning_rate": 2.7573939950642503e-09, + "loss": 0.1446, + "num_input_tokens_seen": 242516800, + "step": 77055 + }, + { + "epoch": 4.93310287433583, + "grad_norm": 6.870696067810059, + "learning_rate": 2.7312252602285093e-09, + "loss": 0.1238, + "num_input_tokens_seen": 242532096, + "step": 77060 + }, + { + "epoch": 4.933422956276807, + "grad_norm": 6.29523229598999, + "learning_rate": 2.705181225987619e-09, + "loss": 0.1247, + "num_input_tokens_seen": 242548416, + "step": 77065 + }, + { + "epoch": 4.933743038217783, + "grad_norm": 10.49675464630127, + "learning_rate": 2.679261893641927e-09, + "loss": 0.1417, + "num_input_tokens_seen": 242563776, + "step": 77070 + }, + { + "epoch": 4.93406312015876, + "grad_norm": 4.983008861541748, + "learning_rate": 2.6534672644859536e-09, + "loss": 0.1207, + "num_input_tokens_seen": 242579008, + "step": 77075 + }, + { + "epoch": 4.934383202099737, + "grad_norm": 5.709385395050049, + "learning_rate": 2.6277973398075586e-09, + "loss": 0.1284, + "num_input_tokens_seen": 242593856, + "step": 77080 + }, + { + "epoch": 4.934703284040714, + "grad_norm": 8.491381645202637, + "learning_rate": 2.602252120888493e-09, + "loss": 0.1056, + "num_input_tokens_seen": 242609664, + "step": 77085 + }, + { + "epoch": 4.935023365981691, + "grad_norm": 5.368650436401367, + "learning_rate": 2.5768316090046817e-09, + "loss": 0.1059, + "num_input_tokens_seen": 242625152, + "step": 77090 + }, + { + "epoch": 4.935343447922668, + "grad_norm": 6.511684417724609, + "learning_rate": 2.5515358054251095e-09, + "loss": 0.1323, + "num_input_tokens_seen": 242639872, + "step": 77095 + }, + { + "epoch": 4.935663529863645, + "grad_norm": 6.941057205200195, + "learning_rate": 2.5263647114132096e-09, + "loss": 0.097, + "num_input_tokens_seen": 242655936, + "step": 77100 + }, + { + "epoch": 4.935983611804622, + "grad_norm": 4.528468132019043, + "learning_rate": 2.501318328226032e-09, + "loss": 0.1782, + "num_input_tokens_seen": 242671424, + "step": 77105 + }, + { + "epoch": 4.936303693745598, + "grad_norm": 12.634703636169434, + "learning_rate": 2.4763966571136888e-09, + "loss": 0.1143, + "num_input_tokens_seen": 242686784, + "step": 77110 + }, + { + "epoch": 4.936623775686575, + "grad_norm": 5.698779582977295, + "learning_rate": 2.4515996993210167e-09, + "loss": 0.1169, + "num_input_tokens_seen": 242702208, + "step": 77115 + }, + { + "epoch": 4.936943857627552, + "grad_norm": 10.934432029724121, + "learning_rate": 2.4269274560861923e-09, + "loss": 0.1495, + "num_input_tokens_seen": 242717056, + "step": 77120 + }, + { + "epoch": 4.937263939568529, + "grad_norm": 5.815536022186279, + "learning_rate": 2.402379928641285e-09, + "loss": 0.0898, + "num_input_tokens_seen": 242732992, + "step": 77125 + }, + { + "epoch": 4.937584021509506, + "grad_norm": 41.47750473022461, + "learning_rate": 2.377957118211982e-09, + "loss": 0.1133, + "num_input_tokens_seen": 242748288, + "step": 77130 + }, + { + "epoch": 4.937904103450483, + "grad_norm": 9.638845443725586, + "learning_rate": 2.3536590260181402e-09, + "loss": 0.1387, + "num_input_tokens_seen": 242763264, + "step": 77135 + }, + { + "epoch": 4.93822418539146, + "grad_norm": 6.575776100158691, + "learning_rate": 2.3294856532724008e-09, + "loss": 0.13, + "num_input_tokens_seen": 242778560, + "step": 77140 + }, + { + "epoch": 4.938544267332437, + "grad_norm": 8.137701034545898, + "learning_rate": 2.305437001182409e-09, + "loss": 0.1385, + "num_input_tokens_seen": 242793920, + "step": 77145 + }, + { + "epoch": 4.938864349273414, + "grad_norm": 6.214886665344238, + "learning_rate": 2.281513070948871e-09, + "loss": 0.1293, + "num_input_tokens_seen": 242810176, + "step": 77150 + }, + { + "epoch": 4.939184431214391, + "grad_norm": 7.4593658447265625, + "learning_rate": 2.2577138637663865e-09, + "loss": 0.0911, + "num_input_tokens_seen": 242826432, + "step": 77155 + }, + { + "epoch": 4.939504513155367, + "grad_norm": 6.785914897918701, + "learning_rate": 2.2340393808234496e-09, + "loss": 0.1056, + "num_input_tokens_seen": 242841216, + "step": 77160 + }, + { + "epoch": 4.939824595096344, + "grad_norm": 4.388907432556152, + "learning_rate": 2.2104896233021698e-09, + "loss": 0.1069, + "num_input_tokens_seen": 242856000, + "step": 77165 + }, + { + "epoch": 4.940144677037321, + "grad_norm": 9.696898460388184, + "learning_rate": 2.1870645923788292e-09, + "loss": 0.1748, + "num_input_tokens_seen": 242871872, + "step": 77170 + }, + { + "epoch": 4.940464758978298, + "grad_norm": 9.857001304626465, + "learning_rate": 2.163764289222492e-09, + "loss": 0.1392, + "num_input_tokens_seen": 242890048, + "step": 77175 + }, + { + "epoch": 4.940784840919275, + "grad_norm": 7.38655424118042, + "learning_rate": 2.140588714997227e-09, + "loss": 0.1054, + "num_input_tokens_seen": 242905600, + "step": 77180 + }, + { + "epoch": 4.941104922860252, + "grad_norm": 8.298417091369629, + "learning_rate": 2.117537870859887e-09, + "loss": 0.2571, + "num_input_tokens_seen": 242921472, + "step": 77185 + }, + { + "epoch": 4.941425004801229, + "grad_norm": 6.074807643890381, + "learning_rate": 2.0946117579617733e-09, + "loss": 0.1923, + "num_input_tokens_seen": 242937088, + "step": 77190 + }, + { + "epoch": 4.941745086742206, + "grad_norm": 9.600857734680176, + "learning_rate": 2.071810377447525e-09, + "loss": 0.1124, + "num_input_tokens_seen": 242953728, + "step": 77195 + }, + { + "epoch": 4.9420651686831825, + "grad_norm": 6.924229621887207, + "learning_rate": 2.0491337304559546e-09, + "loss": 0.0859, + "num_input_tokens_seen": 242970048, + "step": 77200 + }, + { + "epoch": 4.9423852506241595, + "grad_norm": 7.009031772613525, + "learning_rate": 2.0265818181194885e-09, + "loss": 0.173, + "num_input_tokens_seen": 242986624, + "step": 77205 + }, + { + "epoch": 4.9427053325651364, + "grad_norm": 14.338005065917969, + "learning_rate": 2.004154641563616e-09, + "loss": 0.1756, + "num_input_tokens_seen": 243001536, + "step": 77210 + }, + { + "epoch": 4.943025414506113, + "grad_norm": 6.614535331726074, + "learning_rate": 1.981852201909107e-09, + "loss": 0.1873, + "num_input_tokens_seen": 243018432, + "step": 77215 + }, + { + "epoch": 4.94334549644709, + "grad_norm": 5.050029277801514, + "learning_rate": 1.9596745002689597e-09, + "loss": 0.1458, + "num_input_tokens_seen": 243033920, + "step": 77220 + }, + { + "epoch": 4.943665578388067, + "grad_norm": 8.336152076721191, + "learning_rate": 1.9376215377509e-09, + "loss": 0.1194, + "num_input_tokens_seen": 243048896, + "step": 77225 + }, + { + "epoch": 4.943985660329044, + "grad_norm": 4.455416679382324, + "learning_rate": 1.9156933154559908e-09, + "loss": 0.1452, + "num_input_tokens_seen": 243064576, + "step": 77230 + }, + { + "epoch": 4.944305742270021, + "grad_norm": 9.272031784057617, + "learning_rate": 1.893889834479468e-09, + "loss": 0.1342, + "num_input_tokens_seen": 243080320, + "step": 77235 + }, + { + "epoch": 4.944625824210998, + "grad_norm": 9.008463859558105, + "learning_rate": 1.8722110959099037e-09, + "loss": 0.1674, + "num_input_tokens_seen": 243095744, + "step": 77240 + }, + { + "epoch": 4.944945906151975, + "grad_norm": 3.998835325241089, + "learning_rate": 1.850657100829767e-09, + "loss": 0.1205, + "num_input_tokens_seen": 243112320, + "step": 77245 + }, + { + "epoch": 4.9452659880929515, + "grad_norm": 9.189290046691895, + "learning_rate": 1.8292278503156958e-09, + "loss": 0.1471, + "num_input_tokens_seen": 243127168, + "step": 77250 + }, + { + "epoch": 4.9455860700339285, + "grad_norm": 6.334533214569092, + "learning_rate": 1.8079233454371126e-09, + "loss": 0.1377, + "num_input_tokens_seen": 243142848, + "step": 77255 + }, + { + "epoch": 4.9459061519749055, + "grad_norm": 4.716793537139893, + "learning_rate": 1.7867435872584437e-09, + "loss": 0.0792, + "num_input_tokens_seen": 243159552, + "step": 77260 + }, + { + "epoch": 4.9462262339158825, + "grad_norm": 6.879601955413818, + "learning_rate": 1.7656885768368992e-09, + "loss": 0.1807, + "num_input_tokens_seen": 243175040, + "step": 77265 + }, + { + "epoch": 4.946546315856859, + "grad_norm": 6.152540683746338, + "learning_rate": 1.7447583152244153e-09, + "loss": 0.1333, + "num_input_tokens_seen": 243190912, + "step": 77270 + }, + { + "epoch": 4.946866397797836, + "grad_norm": 9.838340759277344, + "learning_rate": 1.723952803465434e-09, + "loss": 0.1126, + "num_input_tokens_seen": 243205952, + "step": 77275 + }, + { + "epoch": 4.947186479738813, + "grad_norm": 9.486599922180176, + "learning_rate": 1.703272042599402e-09, + "loss": 0.1722, + "num_input_tokens_seen": 243220928, + "step": 77280 + }, + { + "epoch": 4.9475065616797895, + "grad_norm": 9.004573822021484, + "learning_rate": 1.682716033658549e-09, + "loss": 0.1612, + "num_input_tokens_seen": 243237824, + "step": 77285 + }, + { + "epoch": 4.9478266436207665, + "grad_norm": 6.252070426940918, + "learning_rate": 1.6622847776698314e-09, + "loss": 0.1464, + "num_input_tokens_seen": 243252928, + "step": 77290 + }, + { + "epoch": 4.9481467255617435, + "grad_norm": 8.646941184997559, + "learning_rate": 1.641978275652989e-09, + "loss": 0.1613, + "num_input_tokens_seen": 243270144, + "step": 77295 + }, + { + "epoch": 4.9484668075027205, + "grad_norm": 3.379074811935425, + "learning_rate": 1.621796528622488e-09, + "loss": 0.1337, + "num_input_tokens_seen": 243286016, + "step": 77300 + }, + { + "epoch": 4.9487868894436975, + "grad_norm": 9.822723388671875, + "learning_rate": 1.6017395375858557e-09, + "loss": 0.1124, + "num_input_tokens_seen": 243301248, + "step": 77305 + }, + { + "epoch": 4.9491069713846745, + "grad_norm": 5.724215030670166, + "learning_rate": 1.5818073035447912e-09, + "loss": 0.1515, + "num_input_tokens_seen": 243316800, + "step": 77310 + }, + { + "epoch": 4.9494270533256515, + "grad_norm": 4.858299255371094, + "learning_rate": 1.5619998274943316e-09, + "loss": 0.1255, + "num_input_tokens_seen": 243332224, + "step": 77315 + }, + { + "epoch": 4.9497471352666285, + "grad_norm": 6.597044467926025, + "learning_rate": 1.5423171104236857e-09, + "loss": 0.1724, + "num_input_tokens_seen": 243348096, + "step": 77320 + }, + { + "epoch": 4.9500672172076055, + "grad_norm": 4.285383701324463, + "learning_rate": 1.522759153315956e-09, + "loss": 0.1574, + "num_input_tokens_seen": 243363392, + "step": 77325 + }, + { + "epoch": 4.950387299148582, + "grad_norm": 2.9106638431549072, + "learning_rate": 1.5033259571473058e-09, + "loss": 0.1445, + "num_input_tokens_seen": 243379520, + "step": 77330 + }, + { + "epoch": 4.9507073810895585, + "grad_norm": 19.39719009399414, + "learning_rate": 1.484017522888348e-09, + "loss": 0.1646, + "num_input_tokens_seen": 243394368, + "step": 77335 + }, + { + "epoch": 4.9510274630305355, + "grad_norm": 7.788175106048584, + "learning_rate": 1.4648338515033112e-09, + "loss": 0.1214, + "num_input_tokens_seen": 243409216, + "step": 77340 + }, + { + "epoch": 4.9513475449715125, + "grad_norm": 9.31411075592041, + "learning_rate": 1.4457749439503178e-09, + "loss": 0.1642, + "num_input_tokens_seen": 243424256, + "step": 77345 + }, + { + "epoch": 4.9516676269124895, + "grad_norm": 8.537033081054688, + "learning_rate": 1.4268408011805512e-09, + "loss": 0.1421, + "num_input_tokens_seen": 243439936, + "step": 77350 + }, + { + "epoch": 4.9519877088534665, + "grad_norm": 4.786579608917236, + "learning_rate": 1.4080314241399218e-09, + "loss": 0.1381, + "num_input_tokens_seen": 243454656, + "step": 77355 + }, + { + "epoch": 4.9523077907944435, + "grad_norm": 7.662133693695068, + "learning_rate": 1.3893468137674005e-09, + "loss": 0.1524, + "num_input_tokens_seen": 243469632, + "step": 77360 + }, + { + "epoch": 4.9526278727354205, + "grad_norm": 2.970719575881958, + "learning_rate": 1.3707869709961297e-09, + "loss": 0.1276, + "num_input_tokens_seen": 243485056, + "step": 77365 + }, + { + "epoch": 4.9529479546763975, + "grad_norm": 9.253515243530273, + "learning_rate": 1.3523518967528682e-09, + "loss": 0.152, + "num_input_tokens_seen": 243500480, + "step": 77370 + }, + { + "epoch": 4.953268036617374, + "grad_norm": 10.051335334777832, + "learning_rate": 1.3340415919579908e-09, + "loss": 0.1355, + "num_input_tokens_seen": 243515968, + "step": 77375 + }, + { + "epoch": 4.953588118558351, + "grad_norm": 6.8624267578125, + "learning_rate": 1.3158560575263212e-09, + "loss": 0.1437, + "num_input_tokens_seen": 243531136, + "step": 77380 + }, + { + "epoch": 4.953908200499328, + "grad_norm": 7.502879619598389, + "learning_rate": 1.297795294365467e-09, + "loss": 0.1236, + "num_input_tokens_seen": 243546176, + "step": 77385 + }, + { + "epoch": 4.9542282824403046, + "grad_norm": 13.300885200500488, + "learning_rate": 1.2798593033774843e-09, + "loss": 0.157, + "num_input_tokens_seen": 243561536, + "step": 77390 + }, + { + "epoch": 4.9545483643812815, + "grad_norm": 5.016808986663818, + "learning_rate": 1.2620480854580453e-09, + "loss": 0.0718, + "num_input_tokens_seen": 243576320, + "step": 77395 + }, + { + "epoch": 4.9548684463222585, + "grad_norm": 10.842459678649902, + "learning_rate": 1.2443616414964387e-09, + "loss": 0.0955, + "num_input_tokens_seen": 243591488, + "step": 77400 + }, + { + "epoch": 4.9551885282632355, + "grad_norm": 4.678999900817871, + "learning_rate": 1.2267999723758472e-09, + "loss": 0.1424, + "num_input_tokens_seen": 243608832, + "step": 77405 + }, + { + "epoch": 4.9555086102042125, + "grad_norm": 7.893259525299072, + "learning_rate": 1.2093630789730692e-09, + "loss": 0.175, + "num_input_tokens_seen": 243625536, + "step": 77410 + }, + { + "epoch": 4.9558286921451895, + "grad_norm": 9.132567405700684, + "learning_rate": 1.1920509621590748e-09, + "loss": 0.1116, + "num_input_tokens_seen": 243639808, + "step": 77415 + }, + { + "epoch": 4.9561487740861665, + "grad_norm": 8.721930503845215, + "learning_rate": 1.17486362279845e-09, + "loss": 0.1386, + "num_input_tokens_seen": 243655424, + "step": 77420 + }, + { + "epoch": 4.956468856027143, + "grad_norm": 8.110475540161133, + "learning_rate": 1.1578010617488422e-09, + "loss": 0.1339, + "num_input_tokens_seen": 243670848, + "step": 77425 + }, + { + "epoch": 4.95678893796812, + "grad_norm": 5.608476161956787, + "learning_rate": 1.1408632798629027e-09, + "loss": 0.0594, + "num_input_tokens_seen": 243685120, + "step": 77430 + }, + { + "epoch": 4.957109019909097, + "grad_norm": 16.302297592163086, + "learning_rate": 1.1240502779863438e-09, + "loss": 0.1308, + "num_input_tokens_seen": 243699776, + "step": 77435 + }, + { + "epoch": 4.957429101850074, + "grad_norm": 9.662297248840332, + "learning_rate": 1.1073620569582167e-09, + "loss": 0.124, + "num_input_tokens_seen": 243715904, + "step": 77440 + }, + { + "epoch": 4.957749183791051, + "grad_norm": 4.807847023010254, + "learning_rate": 1.0907986176122988e-09, + "loss": 0.1289, + "num_input_tokens_seen": 243731264, + "step": 77445 + }, + { + "epoch": 4.9580692657320276, + "grad_norm": 8.0792236328125, + "learning_rate": 1.0743599607754285e-09, + "loss": 0.127, + "num_input_tokens_seen": 243747392, + "step": 77450 + }, + { + "epoch": 4.9583893476730045, + "grad_norm": 6.266168594360352, + "learning_rate": 1.0580460872688937e-09, + "loss": 0.1257, + "num_input_tokens_seen": 243763456, + "step": 77455 + }, + { + "epoch": 4.9587094296139815, + "grad_norm": 7.881096363067627, + "learning_rate": 1.041856997906765e-09, + "loss": 0.1227, + "num_input_tokens_seen": 243779904, + "step": 77460 + }, + { + "epoch": 4.959029511554958, + "grad_norm": 7.8230085372924805, + "learning_rate": 1.0257926934978402e-09, + "loss": 0.1338, + "num_input_tokens_seen": 243795648, + "step": 77465 + }, + { + "epoch": 4.959349593495935, + "grad_norm": 5.054243087768555, + "learning_rate": 1.0098531748439778e-09, + "loss": 0.0907, + "num_input_tokens_seen": 243810624, + "step": 77470 + }, + { + "epoch": 4.959669675436912, + "grad_norm": 4.082937717437744, + "learning_rate": 9.94038442741485e-10, + "loss": 0.1494, + "num_input_tokens_seen": 243825408, + "step": 77475 + }, + { + "epoch": 4.959989757377889, + "grad_norm": 10.398564338684082, + "learning_rate": 9.783484979797309e-10, + "loss": 0.1262, + "num_input_tokens_seen": 243841472, + "step": 77480 + }, + { + "epoch": 4.960309839318866, + "grad_norm": 4.146139144897461, + "learning_rate": 9.627833413425324e-10, + "loss": 0.0931, + "num_input_tokens_seen": 243856448, + "step": 77485 + }, + { + "epoch": 4.960629921259843, + "grad_norm": 1.1739405393600464, + "learning_rate": 9.473429736067686e-10, + "loss": 0.1191, + "num_input_tokens_seen": 243872128, + "step": 77490 + }, + { + "epoch": 4.96095000320082, + "grad_norm": 5.535854816436768, + "learning_rate": 9.32027395543489e-10, + "loss": 0.1775, + "num_input_tokens_seen": 243889600, + "step": 77495 + }, + { + "epoch": 4.961270085141797, + "grad_norm": 4.668059349060059, + "learning_rate": 9.168366079176372e-10, + "loss": 0.1342, + "num_input_tokens_seen": 243905536, + "step": 77500 + }, + { + "epoch": 4.961590167082774, + "grad_norm": 10.475799560546875, + "learning_rate": 9.017706114877734e-10, + "loss": 0.1372, + "num_input_tokens_seen": 243920000, + "step": 77505 + }, + { + "epoch": 4.9619102490237506, + "grad_norm": 9.07540225982666, + "learning_rate": 8.86829407006351e-10, + "loss": 0.1475, + "num_input_tokens_seen": 243936960, + "step": 77510 + }, + { + "epoch": 4.962230330964727, + "grad_norm": 6.2348456382751465, + "learning_rate": 8.720129952188849e-10, + "loss": 0.1515, + "num_input_tokens_seen": 243952832, + "step": 77515 + }, + { + "epoch": 4.962550412905704, + "grad_norm": 6.448611259460449, + "learning_rate": 8.573213768658939e-10, + "loss": 0.1125, + "num_input_tokens_seen": 243968896, + "step": 77520 + }, + { + "epoch": 4.962870494846681, + "grad_norm": 5.735324859619141, + "learning_rate": 8.427545526806802e-10, + "loss": 0.1148, + "num_input_tokens_seen": 243983488, + "step": 77525 + }, + { + "epoch": 4.963190576787658, + "grad_norm": 5.723547458648682, + "learning_rate": 8.283125233904399e-10, + "loss": 0.1502, + "num_input_tokens_seen": 243999296, + "step": 77530 + }, + { + "epoch": 4.963510658728635, + "grad_norm": 5.943792343139648, + "learning_rate": 8.139952897168179e-10, + "loss": 0.118, + "num_input_tokens_seen": 244014528, + "step": 77535 + }, + { + "epoch": 4.963830740669612, + "grad_norm": 8.086183547973633, + "learning_rate": 7.998028523745205e-10, + "loss": 0.1087, + "num_input_tokens_seen": 244029952, + "step": 77540 + }, + { + "epoch": 4.964150822610589, + "grad_norm": 4.406962871551514, + "learning_rate": 7.857352120718698e-10, + "loss": 0.1206, + "num_input_tokens_seen": 244045824, + "step": 77545 + }, + { + "epoch": 4.964470904551566, + "grad_norm": 7.285211563110352, + "learning_rate": 7.717923695119145e-10, + "loss": 0.1345, + "num_input_tokens_seen": 244060928, + "step": 77550 + }, + { + "epoch": 4.964790986492542, + "grad_norm": 4.908071994781494, + "learning_rate": 7.579743253907645e-10, + "loss": 0.1289, + "num_input_tokens_seen": 244075776, + "step": 77555 + }, + { + "epoch": 4.965111068433519, + "grad_norm": 9.025750160217285, + "learning_rate": 7.442810803981459e-10, + "loss": 0.181, + "num_input_tokens_seen": 244092928, + "step": 77560 + }, + { + "epoch": 4.965431150374496, + "grad_norm": 4.976108551025391, + "learning_rate": 7.307126352179561e-10, + "loss": 0.1588, + "num_input_tokens_seen": 244108288, + "step": 77565 + }, + { + "epoch": 4.965751232315473, + "grad_norm": 4.687877178192139, + "learning_rate": 7.172689905277086e-10, + "loss": 0.1545, + "num_input_tokens_seen": 244122944, + "step": 77570 + }, + { + "epoch": 4.96607131425645, + "grad_norm": 11.355158805847168, + "learning_rate": 7.039501469988108e-10, + "loss": 0.1665, + "num_input_tokens_seen": 244139008, + "step": 77575 + }, + { + "epoch": 4.966391396197427, + "grad_norm": 7.202480792999268, + "learning_rate": 6.907561052962863e-10, + "loss": 0.127, + "num_input_tokens_seen": 244156096, + "step": 77580 + }, + { + "epoch": 4.966711478138404, + "grad_norm": 5.10023307800293, + "learning_rate": 6.776868660790525e-10, + "loss": 0.1055, + "num_input_tokens_seen": 244173376, + "step": 77585 + }, + { + "epoch": 4.967031560079381, + "grad_norm": 4.832103729248047, + "learning_rate": 6.64742429999643e-10, + "loss": 0.123, + "num_input_tokens_seen": 244188352, + "step": 77590 + }, + { + "epoch": 4.967351642020358, + "grad_norm": 5.290318965911865, + "learning_rate": 6.519227977044851e-10, + "loss": 0.1075, + "num_input_tokens_seen": 244205120, + "step": 77595 + }, + { + "epoch": 4.967671723961334, + "grad_norm": 6.52385950088501, + "learning_rate": 6.392279698333447e-10, + "loss": 0.1182, + "num_input_tokens_seen": 244220672, + "step": 77600 + }, + { + "epoch": 4.967991805902311, + "grad_norm": 3.4638426303863525, + "learning_rate": 6.266579470207146e-10, + "loss": 0.167, + "num_input_tokens_seen": 244236032, + "step": 77605 + }, + { + "epoch": 4.968311887843288, + "grad_norm": 8.986452102661133, + "learning_rate": 6.14212729894148e-10, + "loss": 0.1858, + "num_input_tokens_seen": 244252800, + "step": 77610 + }, + { + "epoch": 4.968631969784265, + "grad_norm": 9.516409873962402, + "learning_rate": 6.018923190748149e-10, + "loss": 0.1277, + "num_input_tokens_seen": 244268544, + "step": 77615 + }, + { + "epoch": 4.968952051725242, + "grad_norm": 4.160053730010986, + "learning_rate": 5.896967151780564e-10, + "loss": 0.121, + "num_input_tokens_seen": 244283264, + "step": 77620 + }, + { + "epoch": 4.969272133666219, + "grad_norm": 5.6827311515808105, + "learning_rate": 5.776259188131073e-10, + "loss": 0.1355, + "num_input_tokens_seen": 244299712, + "step": 77625 + }, + { + "epoch": 4.969592215607196, + "grad_norm": 3.0212180614471436, + "learning_rate": 5.656799305822636e-10, + "loss": 0.1311, + "num_input_tokens_seen": 244314816, + "step": 77630 + }, + { + "epoch": 4.969912297548173, + "grad_norm": 9.061809539794922, + "learning_rate": 5.538587510822702e-10, + "loss": 0.1564, + "num_input_tokens_seen": 244330496, + "step": 77635 + }, + { + "epoch": 4.970232379489149, + "grad_norm": 64.31742095947266, + "learning_rate": 5.421623809034882e-10, + "loss": 0.1397, + "num_input_tokens_seen": 244345728, + "step": 77640 + }, + { + "epoch": 4.970552461430126, + "grad_norm": 4.507245063781738, + "learning_rate": 5.305908206298949e-10, + "loss": 0.0867, + "num_input_tokens_seen": 244361728, + "step": 77645 + }, + { + "epoch": 4.970872543371103, + "grad_norm": 7.007059574127197, + "learning_rate": 5.191440708393613e-10, + "loss": 0.1088, + "num_input_tokens_seen": 244377728, + "step": 77650 + }, + { + "epoch": 4.97119262531208, + "grad_norm": 5.581309795379639, + "learning_rate": 5.078221321036525e-10, + "loss": 0.1322, + "num_input_tokens_seen": 244392640, + "step": 77655 + }, + { + "epoch": 4.971512707253057, + "grad_norm": 28.570146560668945, + "learning_rate": 4.966250049875942e-10, + "loss": 0.1037, + "num_input_tokens_seen": 244407616, + "step": 77660 + }, + { + "epoch": 4.971832789194034, + "grad_norm": 4.534883975982666, + "learning_rate": 4.855526900510166e-10, + "loss": 0.0899, + "num_input_tokens_seen": 244425216, + "step": 77665 + }, + { + "epoch": 4.972152871135011, + "grad_norm": 5.893171310424805, + "learning_rate": 4.746051878462554e-10, + "loss": 0.1292, + "num_input_tokens_seen": 244443136, + "step": 77670 + }, + { + "epoch": 4.972472953075988, + "grad_norm": 7.777210712432861, + "learning_rate": 4.6378249892009565e-10, + "loss": 0.1404, + "num_input_tokens_seen": 244459648, + "step": 77675 + }, + { + "epoch": 4.972793035016965, + "grad_norm": 3.476093292236328, + "learning_rate": 4.5308462381293826e-10, + "loss": 0.1061, + "num_input_tokens_seen": 244475840, + "step": 77680 + }, + { + "epoch": 4.973113116957942, + "grad_norm": 3.338660478591919, + "learning_rate": 4.425115630593557e-10, + "loss": 0.1305, + "num_input_tokens_seen": 244492224, + "step": 77685 + }, + { + "epoch": 4.973433198898918, + "grad_norm": 5.301723957061768, + "learning_rate": 4.320633171867039e-10, + "loss": 0.1381, + "num_input_tokens_seen": 244508608, + "step": 77690 + }, + { + "epoch": 4.973753280839895, + "grad_norm": 10.410428047180176, + "learning_rate": 4.217398867173428e-10, + "loss": 0.1032, + "num_input_tokens_seen": 244523328, + "step": 77695 + }, + { + "epoch": 4.974073362780872, + "grad_norm": 6.710970878601074, + "learning_rate": 4.1154127216613826e-10, + "loss": 0.1129, + "num_input_tokens_seen": 244540352, + "step": 77700 + }, + { + "epoch": 4.974393444721849, + "grad_norm": 4.084699630737305, + "learning_rate": 4.014674740429603e-10, + "loss": 0.1084, + "num_input_tokens_seen": 244555840, + "step": 77705 + }, + { + "epoch": 4.974713526662826, + "grad_norm": 6.0012311935424805, + "learning_rate": 3.915184928504623e-10, + "loss": 0.1037, + "num_input_tokens_seen": 244570752, + "step": 77710 + }, + { + "epoch": 4.975033608603803, + "grad_norm": 10.94933795928955, + "learning_rate": 3.816943290854691e-10, + "loss": 0.132, + "num_input_tokens_seen": 244585472, + "step": 77715 + }, + { + "epoch": 4.97535369054478, + "grad_norm": 7.800212860107422, + "learning_rate": 3.719949832386993e-10, + "loss": 0.1101, + "num_input_tokens_seen": 244600640, + "step": 77720 + }, + { + "epoch": 4.975673772485757, + "grad_norm": 13.990882873535156, + "learning_rate": 3.624204557944877e-10, + "loss": 0.0917, + "num_input_tokens_seen": 244616128, + "step": 77725 + }, + { + "epoch": 4.975993854426733, + "grad_norm": 12.781333923339844, + "learning_rate": 3.5297074723078527e-10, + "loss": 0.1886, + "num_input_tokens_seen": 244632128, + "step": 77730 + }, + { + "epoch": 4.97631393636771, + "grad_norm": 10.67518138885498, + "learning_rate": 3.4364585801943685e-10, + "loss": 0.1892, + "num_input_tokens_seen": 244647808, + "step": 77735 + }, + { + "epoch": 4.976634018308687, + "grad_norm": 5.821952819824219, + "learning_rate": 3.344457886264585e-10, + "loss": 0.1363, + "num_input_tokens_seen": 244663808, + "step": 77740 + }, + { + "epoch": 4.976954100249664, + "grad_norm": 8.862136840820312, + "learning_rate": 3.253705395106499e-10, + "loss": 0.1687, + "num_input_tokens_seen": 244679936, + "step": 77745 + }, + { + "epoch": 4.977274182190641, + "grad_norm": 6.281673908233643, + "learning_rate": 3.1642011112581474e-10, + "loss": 0.1167, + "num_input_tokens_seen": 244695680, + "step": 77750 + }, + { + "epoch": 4.977594264131618, + "grad_norm": 8.272185325622559, + "learning_rate": 3.075945039182626e-10, + "loss": 0.1144, + "num_input_tokens_seen": 244710784, + "step": 77755 + }, + { + "epoch": 4.977914346072595, + "grad_norm": 6.297723293304443, + "learning_rate": 2.9889371832930725e-10, + "loss": 0.1137, + "num_input_tokens_seen": 244727744, + "step": 77760 + }, + { + "epoch": 4.978234428013572, + "grad_norm": 6.462857246398926, + "learning_rate": 2.903177547930458e-10, + "loss": 0.1229, + "num_input_tokens_seen": 244743360, + "step": 77765 + }, + { + "epoch": 4.978554509954549, + "grad_norm": 8.549174308776855, + "learning_rate": 2.8186661373774683e-10, + "loss": 0.0964, + "num_input_tokens_seen": 244759232, + "step": 77770 + }, + { + "epoch": 4.978874591895526, + "grad_norm": 12.1586332321167, + "learning_rate": 2.735402955855726e-10, + "loss": 0.1706, + "num_input_tokens_seen": 244774016, + "step": 77775 + }, + { + "epoch": 4.979194673836502, + "grad_norm": 11.11378288269043, + "learning_rate": 2.6533880075202414e-10, + "loss": 0.1908, + "num_input_tokens_seen": 244789440, + "step": 77780 + }, + { + "epoch": 4.979514755777479, + "grad_norm": 5.261876583099365, + "learning_rate": 2.5726212964677364e-10, + "loss": 0.1037, + "num_input_tokens_seen": 244804096, + "step": 77785 + }, + { + "epoch": 4.979834837718456, + "grad_norm": 6.9109344482421875, + "learning_rate": 2.493102826733873e-10, + "loss": 0.1278, + "num_input_tokens_seen": 244819520, + "step": 77790 + }, + { + "epoch": 4.980154919659433, + "grad_norm": 10.656251907348633, + "learning_rate": 2.414832602287698e-10, + "loss": 0.1482, + "num_input_tokens_seen": 244835712, + "step": 77795 + }, + { + "epoch": 4.98047500160041, + "grad_norm": 6.124893665313721, + "learning_rate": 2.3378106270344204e-10, + "loss": 0.1263, + "num_input_tokens_seen": 244851520, + "step": 77800 + }, + { + "epoch": 4.980795083541387, + "grad_norm": 5.114864826202393, + "learning_rate": 2.262036904823739e-10, + "loss": 0.1208, + "num_input_tokens_seen": 244867712, + "step": 77805 + }, + { + "epoch": 4.981115165482364, + "grad_norm": 4.8491339683532715, + "learning_rate": 2.1875114394387387e-10, + "loss": 0.1306, + "num_input_tokens_seen": 244883136, + "step": 77810 + }, + { + "epoch": 4.981435247423341, + "grad_norm": 4.916806697845459, + "learning_rate": 2.114234234601442e-10, + "loss": 0.1069, + "num_input_tokens_seen": 244898112, + "step": 77815 + }, + { + "epoch": 4.981755329364317, + "grad_norm": 8.027032852172852, + "learning_rate": 2.0422052939700343e-10, + "loss": 0.1617, + "num_input_tokens_seen": 244913600, + "step": 77820 + }, + { + "epoch": 4.982075411305294, + "grad_norm": 6.44345760345459, + "learning_rate": 1.9714246211388622e-10, + "loss": 0.1076, + "num_input_tokens_seen": 244929088, + "step": 77825 + }, + { + "epoch": 4.982395493246271, + "grad_norm": 5.906368732452393, + "learning_rate": 1.9018922196467615e-10, + "loss": 0.1483, + "num_input_tokens_seen": 244944832, + "step": 77830 + }, + { + "epoch": 4.982715575187248, + "grad_norm": 4.846679210662842, + "learning_rate": 1.8336080929631793e-10, + "loss": 0.1109, + "num_input_tokens_seen": 244960960, + "step": 77835 + }, + { + "epoch": 4.983035657128225, + "grad_norm": 18.578542709350586, + "learning_rate": 1.7665722444992761e-10, + "loss": 0.1747, + "num_input_tokens_seen": 244977408, + "step": 77840 + }, + { + "epoch": 4.983355739069202, + "grad_norm": 4.724497318267822, + "learning_rate": 1.700784677602374e-10, + "loss": 0.1474, + "num_input_tokens_seen": 244993664, + "step": 77845 + }, + { + "epoch": 4.983675821010179, + "grad_norm": 4.007516384124756, + "learning_rate": 1.6362453955559575e-10, + "loss": 0.0894, + "num_input_tokens_seen": 245009152, + "step": 77850 + }, + { + "epoch": 4.983995902951156, + "grad_norm": 6.697225570678711, + "learning_rate": 1.5729544015824494e-10, + "loss": 0.1345, + "num_input_tokens_seen": 245025216, + "step": 77855 + }, + { + "epoch": 4.984315984892133, + "grad_norm": 5.347310543060303, + "learning_rate": 1.5109116988432092e-10, + "loss": 0.1322, + "num_input_tokens_seen": 245040896, + "step": 77860 + }, + { + "epoch": 4.984636066833109, + "grad_norm": 8.752092361450195, + "learning_rate": 1.4501172904385353e-10, + "loss": 0.1358, + "num_input_tokens_seen": 245055808, + "step": 77865 + }, + { + "epoch": 4.984956148774086, + "grad_norm": 6.082198143005371, + "learning_rate": 1.3905711794021114e-10, + "loss": 0.1047, + "num_input_tokens_seen": 245072448, + "step": 77870 + }, + { + "epoch": 4.985276230715063, + "grad_norm": 5.780874729156494, + "learning_rate": 1.3322733687065604e-10, + "loss": 0.1337, + "num_input_tokens_seen": 245088320, + "step": 77875 + }, + { + "epoch": 4.98559631265604, + "grad_norm": 7.8690948486328125, + "learning_rate": 1.2752238612662172e-10, + "loss": 0.1103, + "num_input_tokens_seen": 245104128, + "step": 77880 + }, + { + "epoch": 4.985916394597017, + "grad_norm": 7.348402976989746, + "learning_rate": 1.2194226599260283e-10, + "loss": 0.1379, + "num_input_tokens_seen": 245120512, + "step": 77885 + }, + { + "epoch": 4.986236476537994, + "grad_norm": 5.879912376403809, + "learning_rate": 1.164869767472654e-10, + "loss": 0.1084, + "num_input_tokens_seen": 245136896, + "step": 77890 + }, + { + "epoch": 4.986556558478971, + "grad_norm": 8.0476713180542, + "learning_rate": 1.1115651866344667e-10, + "loss": 0.1533, + "num_input_tokens_seen": 245154432, + "step": 77895 + }, + { + "epoch": 4.986876640419948, + "grad_norm": 10.149620056152344, + "learning_rate": 1.0595089200676756e-10, + "loss": 0.154, + "num_input_tokens_seen": 245170624, + "step": 77900 + }, + { + "epoch": 4.987196722360924, + "grad_norm": 8.130736351013184, + "learning_rate": 1.0087009703757533e-10, + "loss": 0.1123, + "num_input_tokens_seen": 245186112, + "step": 77905 + }, + { + "epoch": 4.987516804301901, + "grad_norm": 7.943731307983398, + "learning_rate": 9.59141340092784e-11, + "loss": 0.0976, + "num_input_tokens_seen": 245201472, + "step": 77910 + }, + { + "epoch": 4.987836886242878, + "grad_norm": 5.50078010559082, + "learning_rate": 9.108300316973407e-11, + "loss": 0.1485, + "num_input_tokens_seen": 245216960, + "step": 77915 + }, + { + "epoch": 4.988156968183855, + "grad_norm": 7.317728519439697, + "learning_rate": 8.637670475986071e-11, + "loss": 0.1192, + "num_input_tokens_seen": 245233280, + "step": 77920 + }, + { + "epoch": 4.988477050124832, + "grad_norm": 6.537759780883789, + "learning_rate": 8.179523901474806e-11, + "loss": 0.1237, + "num_input_tokens_seen": 245248896, + "step": 77925 + }, + { + "epoch": 4.988797132065809, + "grad_norm": 7.110579967498779, + "learning_rate": 7.733860616310208e-11, + "loss": 0.1457, + "num_input_tokens_seen": 245263744, + "step": 77930 + }, + { + "epoch": 4.989117214006786, + "grad_norm": 5.0523529052734375, + "learning_rate": 7.300680642752245e-11, + "loss": 0.0994, + "num_input_tokens_seen": 245278848, + "step": 77935 + }, + { + "epoch": 4.989437295947763, + "grad_norm": 12.868497848510742, + "learning_rate": 6.879984002450269e-11, + "loss": 0.1573, + "num_input_tokens_seen": 245293632, + "step": 77940 + }, + { + "epoch": 4.98975737788874, + "grad_norm": 2.4756877422332764, + "learning_rate": 6.471770716387493e-11, + "loss": 0.1218, + "num_input_tokens_seen": 245309952, + "step": 77945 + }, + { + "epoch": 4.990077459829717, + "grad_norm": 4.589564800262451, + "learning_rate": 6.076040804964267e-11, + "loss": 0.1241, + "num_input_tokens_seen": 245325888, + "step": 77950 + }, + { + "epoch": 4.990397541770693, + "grad_norm": 6.668095111846924, + "learning_rate": 5.6927942879148046e-11, + "loss": 0.1366, + "num_input_tokens_seen": 245341312, + "step": 77955 + }, + { + "epoch": 4.99071762371167, + "grad_norm": 6.62178897857666, + "learning_rate": 5.322031184390453e-11, + "loss": 0.1117, + "num_input_tokens_seen": 245357760, + "step": 77960 + }, + { + "epoch": 4.991037705652647, + "grad_norm": 8.708206176757812, + "learning_rate": 4.963751512931936e-11, + "loss": 0.1299, + "num_input_tokens_seen": 245371904, + "step": 77965 + }, + { + "epoch": 4.991357787593624, + "grad_norm": 7.154589653015137, + "learning_rate": 4.617955291386089e-11, + "loss": 0.1135, + "num_input_tokens_seen": 245387712, + "step": 77970 + }, + { + "epoch": 4.991677869534601, + "grad_norm": 5.723326683044434, + "learning_rate": 4.2846425370723924e-11, + "loss": 0.1689, + "num_input_tokens_seen": 245402624, + "step": 77975 + }, + { + "epoch": 4.991997951475578, + "grad_norm": 7.538029670715332, + "learning_rate": 3.9638132665886784e-11, + "loss": 0.1612, + "num_input_tokens_seen": 245418624, + "step": 77980 + }, + { + "epoch": 4.992318033416555, + "grad_norm": 4.3995513916015625, + "learning_rate": 3.655467495949916e-11, + "loss": 0.0896, + "num_input_tokens_seen": 245433984, + "step": 77985 + }, + { + "epoch": 4.992638115357532, + "grad_norm": 8.337804794311523, + "learning_rate": 3.359605240588204e-11, + "loss": 0.129, + "num_input_tokens_seen": 245449472, + "step": 77990 + }, + { + "epoch": 4.992958197298508, + "grad_norm": 7.295002460479736, + "learning_rate": 3.076226515269509e-11, + "loss": 0.099, + "num_input_tokens_seen": 245464512, + "step": 77995 + }, + { + "epoch": 4.993278279239485, + "grad_norm": 5.596002101898193, + "learning_rate": 2.8053313341214196e-11, + "loss": 0.1504, + "num_input_tokens_seen": 245480768, + "step": 78000 + }, + { + "epoch": 4.993598361180462, + "grad_norm": 5.875351905822754, + "learning_rate": 2.5469197107164113e-11, + "loss": 0.149, + "num_input_tokens_seen": 245496640, + "step": 78005 + }, + { + "epoch": 4.993918443121439, + "grad_norm": 7.194498062133789, + "learning_rate": 2.300991657905316e-11, + "loss": 0.0999, + "num_input_tokens_seen": 245512320, + "step": 78010 + }, + { + "epoch": 4.994238525062416, + "grad_norm": 17.286455154418945, + "learning_rate": 2.067547187983854e-11, + "loss": 0.1716, + "num_input_tokens_seen": 245527168, + "step": 78015 + }, + { + "epoch": 4.994558607003393, + "grad_norm": 7.806553363800049, + "learning_rate": 1.846586312637122e-11, + "loss": 0.1145, + "num_input_tokens_seen": 245542592, + "step": 78020 + }, + { + "epoch": 4.99487868894437, + "grad_norm": 6.517457962036133, + "learning_rate": 1.638109042884084e-11, + "loss": 0.1178, + "num_input_tokens_seen": 245558208, + "step": 78025 + }, + { + "epoch": 4.995198770885347, + "grad_norm": 4.202336311340332, + "learning_rate": 1.4421153891053252e-11, + "loss": 0.0969, + "num_input_tokens_seen": 245574976, + "step": 78030 + }, + { + "epoch": 4.995518852826324, + "grad_norm": 8.855648040771484, + "learning_rate": 1.2586053611263193e-11, + "loss": 0.1036, + "num_input_tokens_seen": 245589888, + "step": 78035 + }, + { + "epoch": 4.995838934767301, + "grad_norm": 6.161116123199463, + "learning_rate": 1.0875789681064063e-11, + "loss": 0.1257, + "num_input_tokens_seen": 245605376, + "step": 78040 + }, + { + "epoch": 4.996159016708277, + "grad_norm": 8.317530632019043, + "learning_rate": 9.290362185387924e-12, + "loss": 0.1021, + "num_input_tokens_seen": 245621056, + "step": 78045 + }, + { + "epoch": 4.996479098649254, + "grad_norm": 8.895816802978516, + "learning_rate": 7.829771204170833e-12, + "loss": 0.149, + "num_input_tokens_seen": 245636928, + "step": 78050 + }, + { + "epoch": 4.996799180590231, + "grad_norm": 4.920835018157959, + "learning_rate": 6.494016809854842e-12, + "loss": 0.1508, + "num_input_tokens_seen": 245651968, + "step": 78055 + }, + { + "epoch": 4.997119262531208, + "grad_norm": 8.876502990722656, + "learning_rate": 5.283099069053332e-12, + "loss": 0.1208, + "num_input_tokens_seen": 245666752, + "step": 78060 + }, + { + "epoch": 4.997439344472185, + "grad_norm": 7.693271636962891, + "learning_rate": 4.1970180425510155e-12, + "loss": 0.1275, + "num_input_tokens_seen": 245683776, + "step": 78065 + }, + { + "epoch": 4.997759426413162, + "grad_norm": 6.169247150421143, + "learning_rate": 3.2357737844712635e-12, + "loss": 0.1582, + "num_input_tokens_seen": 245699008, + "step": 78070 + }, + { + "epoch": 4.998079508354139, + "grad_norm": 6.748289585113525, + "learning_rate": 2.399366342831222e-12, + "loss": 0.1184, + "num_input_tokens_seen": 245715008, + "step": 78075 + }, + { + "epoch": 4.998399590295116, + "grad_norm": 5.908524990081787, + "learning_rate": 1.6877957592642547e-12, + "loss": 0.1467, + "num_input_tokens_seen": 245730240, + "step": 78080 + }, + { + "epoch": 4.998719672236092, + "grad_norm": 7.711742401123047, + "learning_rate": 1.101062069575054e-12, + "loss": 0.1206, + "num_input_tokens_seen": 245744896, + "step": 78085 + }, + { + "epoch": 4.999039754177069, + "grad_norm": 8.90974235534668, + "learning_rate": 6.391653026294187e-13, + "loss": 0.1843, + "num_input_tokens_seen": 245761728, + "step": 78090 + }, + { + "epoch": 4.999359836118046, + "grad_norm": 12.309709548950195, + "learning_rate": 3.0210548174203214e-13, + "loss": 0.1793, + "num_input_tokens_seen": 245777472, + "step": 78095 + }, + { + "epoch": 4.999679918059023, + "grad_norm": 6.770830154418945, + "learning_rate": 8.988262384379553e-14, + "loss": 0.1155, + "num_input_tokens_seen": 245792448, + "step": 78100 + }, + { + "epoch": 5.0, + "grad_norm": 4.477665901184082, + "learning_rate": 2.4967394818276035e-15, + "loss": 0.0874, + "num_input_tokens_seen": 245808128, + "step": 78105 + }, + { + "epoch": 5.0, + "num_input_tokens_seen": 245808128, + "step": 78105, + "total_flos": 1.43524334436719e+18, + "train_loss": 0.2808395623519733, + "train_runtime": 12345.7888, + "train_samples_per_second": 50.612, + "train_steps_per_second": 6.326 + } + ], + "logging_steps": 5, + "max_steps": 78105, + "num_input_tokens_seen": 245808128, + "num_train_epochs": 5, + "save_steps": 3906, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.43524334436719e+18, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000..0177c43 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:989a294f6f42da4dc5b6e43a3e5204f9ea23ee3ae17ddfc438f23d7b940334e1 +size 6289 diff --git a/training_eval_loss.png b/training_eval_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..657e41077c8c54a82586c894fbe4a08dff4aed15 GIT binary patch literal 46483 zcmeFZWmJ`27dE;Fy2#DFKm^mJ*Qe4n?{|TDqmX>&)B7 z_xpLq`TifraEP$?zH6>IuUhy&mw9^aD(O`e3Uy8VnTR|Jh3<_)p-o=71mEH88J&fH z`0Pbh>|aCQ+L>5cvNJzmW#(bLV{C74ZO6yLV)1`o z!E9w~#PUtup$k3*)B2gJ9SVi3i~L2)5KcEip^DGNMII?Q$E}PxIfu<1)o)sTNLcWvzy7 zVuIrCtDliLm2O_S48%K~{A;NsPKh?c^rCVEZ?8YTa#!R{Qtl$^zb|(`UVMJ#?%V6h zuR{O-FaE#Xgv{sYJcb=NV>nFY=bHPdD=I1k_J;J|zJ2>yKGS(`*vO18m9#fi9Dl9$ zcr6AeMU6?d@M467=;Oz)J=E(wPyY<&X{TwWi-%FaCt?30->s@?-rY0db*|Z)6dov% zrZ~T-ck&ENhom!_-Iq!-lGSEf zB$7$J>0XF{eDB?US6^HTL->d-QMHiHL}3amE35Zn`qIAghL>^39;V3`*e!I$;H3S! z&1dhI^u+g9wkrGR=;)OzS3b)o3Y~1Vvl#GsHoUtgaP%jo*nAj|o0~g^)2vB9M6jtb zkhq3354mE5kd>gLyJ#XOXk}6JB{cc1_LADN2L!b?r(yYK^O96TwAA zcE~kmDdkYo()!)9sxJ8QB-1`!Hj$c!#Z!rf2 zJbxFPq^75{+y8m4R&4yOHH_wPqXGADcTjt{%8BLf-MeW``8waw9oNS@gSf|^si~|@S-6kd`9SMTBK1<-OM`?RTFZQM@ zAG!GS!_hlGQb}6d*wEQ(o0~JT)a>A0xQsJ97p2|Q+Umbvcb;(Lp4d>GS-bwX*BBpB zk&3pqY!)M>Ig6|!RCJ7tB3b=f%7tnDrVI}#8RBJ>7Z(@tz1_vvulPa<4)Aia{uRy$wRDV#y*VPNO!L zJ&yP08gPYY;ksEk-x;(=-XJFrR?GPU-zRjtW1uifY#^Q4;+Dat(Y?&e!h-AMw9mG_ zK0B*mfazo-MU77sm?j})#*j=uBS+u^H<8`6kCw}P==toK*?t^Z+CLLO!*ME?N(`&nv*Pj_;9_$kR2s_x`XdD=0L2gtNX{EFF85cd9`G4Wna0p zx?13DH_z+FjT^6I(o+<(l&W_J)M*2z^Cv1Dmj8@ZaYH3JJ3IgUs>pD5y6(ll!tQnE z{=3A?H#YWGZEdan>hFlWy0gl@n;*l%NVqM@zJ2>vUS9q)^94;Lv!-dC*Y92lt{ScGj%aq1@oHYa_+UT3D_YJ|7(1>W6^ZSIN2mJ}i=!3n%*@Qi z79)`n49dUP)eB!@KJmrDn2c1nED*LT&GoN4KZ`}XaOwK>>)u&e4AQY&b3IQfmUQrPmAAhYr_%?s}Ofdh( z-d>*w7cns4 z+uxAOI`PaUzhHbTq|{+q>5ELf1T5CIwY5>N^V8JSR30+*f&KM}ZuLYV4{JNSnThi= z?v$q?)OYTD9$Fh6v8s!QHOXr}lCOKgwI%_|tXJ0!R$hqk8PD0-nX`+_1E>u)Ha2>K zA#JhVA9I=Z#g zVx)XIw<=)NiD}Nma({h7cB0n9#?CGU+D*1f0lK&F*})oZK zq-@*uRWvoFho0Eb-j3zv<(1$vPjVFttKEL3UmsJ9!w~>)pxTcSZ ziR;!YB|=!-R^#SYcggu3vro6pT0c@fY+X;9)+?m5bhk1zq<-;5DhY-F)H*4r>5mM# zlxNb?Oq`cSHcJQX9zRBZ_39N0n#f3P9aH23{iaEHHvafDH+ih@BqSufC*#HQ7Ct`- zLt1#bjsEd|b2!}#T3T9bzq0ZNmh=$;@X4DqA;Q0EJ>2a1Zm<3>(eI4m96abGjQH@u zTOw^br)+$>W`F!1J^j^qndYemUo^kKK>d~vcMlE^r>3VL59Vqf9xfyX#x1CIXG)t) z92@vAz$%(_e!qn4wBe0?v!NSA4BXm}HJi37D$t3LCnA?3cCxH}j-FrFldBZzTvSwaxYH}^vhnRQiLj>tFE8)O zy4SgWy*Jv*Xhj&D-ptBM2Q2RHwg?8*V&l*TZ8E68dr~`}~eWsj= zUT3^zHF)xRlgOI3v+_!r59iF@OFTzkc)PcByO(2PRw|98A>RT>y)S zadKy|ckg0Fb^$5&rAyd1y|wZd;Gop^@4ww%=-!5V{V_V)zpRWKb$;0G`QdlNeYam3>5>AQKkx;1em?1lfySY$qphtTU$FA&KhVq!nS&0YH`f*+a5x(V17_>nWeQg34V|ty-Zyo1 zUKI&ZaNFa)e}7CZ^~qZ%y`j511O{MFmNLuthK6V%4?dVO?#HWT*rXhO2pEEssgvq9 zftwRuyDu5I0BdJ&Js+T@E?e{`UOU4I?F(G6BBf%vg1pX;Ck#jpZPI@H5b+ME5iTUMeM*)!zoCv6w&c-3N>C;c%scS+qVwAbcXm^V;(>x8u_6Y(^>$4u!>EX@MRi zP7@yKsJ1WCv1GyLQ@#`g4~Jm|IW+c^S!rWDANM@AK+s5@{o1H1=GCk6*q7cU(*_ip z^ipFy2Q)vwL0YE5C;=Ea9MA?WQD9@Ops=vE$?xJ!+YyVh;WCf2!-cD#4k{6dBAt*hLLv=m8#tT$e|l0{j$xe3lL)%nhb}k$_`&mo$DoZsG^K|& zIyxH1Mmwi`KX27Vz1%vfq1LcS)ub;YlnoYz?GK7rOd7qAOAMh-OxkA$HxPa0@ca*uKN`N@JX&Im3U&u1Q9Uc%wC zmU|&QCtU=ha^89r%@K9WTS8^pUZ*7<>KYnx=(MIKYQwc2RUTEVr>8L0w72;R3JR`% zcHe9yZWOxQ=z`Qpz2_LLXxhcaMFXKT322ou=(M-E%xU)vphqxlxsPpC+!6D;liJ5ZfM2NKb->F}5R3iwZ z4&%8}u7>JM?Gd+AwFk5kJ%e^gkquG^x)!6;0s>*N)wap2s<2z2r;m`}^*Ac_h+RFb z!TW4nqIT?53Y7`RBzPDt?B!WtL%h(J`Qqwlz*Katt*towyXzC$i$9azG`Mx@70?1y zQ~*w+JE0W0P@Fw+JQ!f~^Yf#Cz`(!;JG+AoU3f$UE}m`E6$=S3Vz?|!z1$5txqtoo z75@42ow@l~E9QMpLQ%QC8n^vYnm4(*xqIgev$G#!V%j>nC!_$;+1S|3bu;Jb?k@Ek zbC6QYCamP2YM&nw2PAI*3Pc+^cRh!;oUg*zn9$zb>hBc6QDII;0&uY$MGRODQP(qsmD1tGD+-v=jo};WCEys{rw1Ub$jzJl6{24F(gA zSEtwhm{YMy@4JB<^{|*2)4DO&nC3f4{XP#$=Mz*%LDeai3SqkG|)2=VVMnN z(>d*1sm_#aZfxY2bzLYn?Y{~m6M+f)Yh$Y;7Uy+^%kyo^H-p6B0UHBD0m_7vcrDEZ zp#r7^jUAVoq4ms+W@jwe9u2kmWr=mg5DUYOS_q zf0_jA1DGmS)J#mBLMth3LRf)rxDn03&SleO;>m=bTC}kx2G z>r;S>_D`mT;Uk=uz)ns54shgg_icX1AMNc~qhlXYZ!Y6H{?V0`l{Eq&02rp@Y+O=W znu>`j2wIgj5J`Z6xZ$h3OzZn+{hM%~;pVh|e)Pr|`iF-b%*dOpIu9!-)`9Z7oS*I^ zsIsETTb+?eESO9jKsO)+mVF1bKIPhrnb{il%@Ku|nOUJ$gGz$fp0#8F=x9^3) z(tv=+5eJi6F)f?c+|tdq6vj<-M$O7_7^u_J)3595gaNW;tCh4(xgc|QWAYnvTlWg| z7?g8=7<2I5_ihfpJp=Rv&WLYRt<)S-rI|mRrnC?qn@cDz-N>%Qt-L;H^p|kSJ648^ zU|CFp&@=pA(^` zq)cvc3~ZGDVy59Cqphut@-1+fb2*2LA?E+!(w8AeKtKSuLl`xn+FIqb&Md!Y-2YYa z;^oV|FlZduZBKzkdt>lPGkQr4E#&>(~I6J8xDTj-S) zb|_~N6dap68r(j$-d&~wnp2`?;Z>tBtCm)yO+QnVpSu5CHJ-<2x{|t8VRmMTy;q(v+;oxN2>a8{g z(eas5UAoeJ7uAH`EzfoB+p-c8S1~X!M77EuhT8lx*z92M{#j%s3y(`9cP7E}$ON>q zhV(>F30Np?9-5O|jFTN5Qb&8Mfl*P!d9_FMa&mIOhx3iP$zi}djxmW5&MhsqfyOsf zY=RGzRu7mQsAh5MRy7eIDcuedLkNstNXWvVwwD3?&>bq@9Y$Kmrw1vcFH*AZjRRi! z2z-at_~#vjOTB*m`lm5RM?8NNK8*}doE5-UMzvx;1eZQ~b7>Q5`xo42U+wZnJs_Ak zASLlPHoZKarjs=9e*_w$UgOnLtBEf7)bF6Pxa{;ibphB06P6!JLeX}N;k*rFC2tWJ zd4!5y5@kX7jzDB_4Wo8j+HLjm{q_ECA8A0#{=vcX?JRXKq1xOJ=b~W3A-yesVyQo? z0wfH2j_rJbg4)x)5zt}YL1FpB#m0lUG}0WvY1(%su<t;ltEiP9@Wc%%LT4igpp*zr@GwP3ENv?5fI;9$h2s>(07 zjQbIS(S?+GMj*)D-+U0;IqEoKfk=uLzd6*(EQ6}5_!qhpjo?`r9Ie$QbeCpD?C_pK z)kX|`AEP8|)|bhjKYIg>I5|1l*xIt?u>_S7d8E~w zO}7RW!-f)2mgCjYFreTK;=pKD_yMkc{Z3OC36OChPg_<-Cb+>jFo%DxZI~I@+S%!G zZ=ILdxXZ%r`HtuYNtT))P+Wfg{%G|;!GMgg117`1bOBdj<3HL2$Vifr@Hz=eE4;Np zg%QUpU|Nqh9|QrNk~NkQ78iaR?Bsf%GU!FyrD8H ziqqqLmR+n0#%=@Ep)>lbE!U?ZBpb=8T^|fPv9a_dGXf;6iXMya^1L7_$`&>U4?HfidO}}@b z-yCey&Qvz!)dj#}x-oEX>;7C&36!z@pP#P~ITjREgmVxvYqY@oWfT+$0+N3go4jyq zw-N{KN-O%n7x;Q^q1c9zp&rkPpX{XHNO zb8B~(iEZY$4l4oOm)nGdjo;o}&U48%YkWSsTtWGOg2&n$O0Lj;QC=*F)L_DM*DU)B zhuHvjzGH9X91gI`iPoh z5m#5&P<@LW&Qj)vtiT|^(!WbB0+VS6$@ICbCm-VB}^0A8Z$NRuRMbhMvE^NIuEAFP89+ZP}6lXUPKAgq(-5&$V zoHbf%Z{J>3zq6~0nN6q=DkY=e^muvp)n)RBDLt%idn=TAbfR)AiPyx%#i2W-KOU>= zKOD~JF%A$2KHOg5FD9=jFTakDKVw})^+(y!Wdv|m?ZGsbhNk9iwLoUA>SsVg8rJCn z0=>F$+4k(jMJZ29)0~@FWrAN>>)h2e)e~B%C^?G`45;}M^R_K$EDOpe$O;H~IcMnl?e4)}VRDOr@}N>| zELXtrOXKi=rNT*i?ZPGONqE%7G}b84H+6y6%(igTSk+yA==rJ(Px4o2=2QMaCt6XLVH$YrQW*%Vp7XZ{l)cHum1q1CT zEKAUQ5Uqm3^WZMH0m`Adfeg~1Y5=@?0HzxlG19QiRiFu}ms!q%f>||RKt(564UCu@ z5Y+71@%jREglS+#j2h)1KrJSU^~0tRih~D*Xk2{ue{?9abhd#nA{cJ#2fl1R5M$`{ zK7cZqnS&9*3^2|w_{-L)q{O6N`f{|~X61KD;N+x^(D4fU$B!Qm0Kl#u1HC2*5^Dgx zMO;aV2sD3WAcF!D18n9KBBTJg1+=eH?fd`$=A2~`CKlEVkf-~1@4kX*UI=%0Z*8od zCedAfX^8-uw5;%HI0)upAT~U3Tu}j0gcnu_%yy*vbx0cb7q_{eU%GV3w)wWx2CVC; znVA=}l7;}xcGkv5gx;;1UV8Hi+BLiR5bN~J%ptIw#jz@GkSwe!^v23C{vvYI9SkLwsdz+I83A33p z;c8eA%L)iQf*KGpj*xs%fd~lq`+$J&z%+fZh^H49{bBHqfrt0&)UyNz%6pmw=hx6- z?Lgruu$mD1ORA)r0X26ceIArPL>&Xq0Y>*We7DGY>M@_gl7b#~EVmW&-mbaiRuqf2 zOnW5L=Js|9s3L=8#0VHge5$pI1*Db1Lq?DnA!(hmqR`Qwd#?Ly(!MyiZ-QG_gzYEgq}-MoD+jHxdLp6n zeD_D-tt5bZ!A03wpGX9x*f#YE(fQOR+x6hQg0~~eEXTego@zi=Ru&!+(U)Q}B?hJA`C8Sw5EUb%RF7$i1kj(Y z$v3$4Fs)10i;R0H)4(4NsH>|>?p+Lr!kzQPKs;9j+6JAlwERNGrje6muYnpCv3&kn&QP@lc2p6zwA>C2}3{bJ+bpfa%Y z=TP#Q;Dht7s7I>ECJ1CAx}b!DLO5{4WUyZ7poaz)dY&Fp4xT|PivUE$0IFy}RaF%N z$>%cD6XaoO6N(l@Mv?MZ>lB1Uu!X;>7+)w&?Z$oRvXq&=B5d^X<&ba)qDmt29>_GJ zTEtX`b?3*zRMgaBpt>u?#l;Z->4X0^C0z^&%3*AY2NuwpZ$VazQn5Y3Uk6>_JyStC3P^ zAO%XrYB>$t?5;b;jbmdqL;5%rLfJol{GbJHA@T5`H=E~)vm0F$LmjRpJc&88p)c8a zs<+gKBYa7``=irtNK@m(;zVc_^z_0n4L7DMTb5N7A}foQs;?*iX4IOo~Us5shw>-c@Uzw;&1fg&X>V%lCkNNHOIbE(;$@u3BQ&VyVb9JT?N=izh zCE3+EnPjg+>7F*8rwn zK#y_uqV?PB4sjNYVMya2#AfzzGdIX*;Xf~3LaXaN^CFr)w<{H$`a z0{DVaKi?S(Z2JVJKA4Lsy^9AxwF2yGVx*`6QN9le!G)EA7&jo)u>;^Z7E*RX_DIqYx#X$iCA4!%>4-gP3;IJPMXn@PuY-7mN;ULZ0T7-+^qq^5!KVB65N4gO8 ziZ=SbcYnvrQibE3Y9xGt7$*?%XoDe%Xr6$6?|>#$+YSDq%S;eA55P>29vN+exhLGi zV4emb_7selkMZ%HfZZ7GBM694nFgiOqM9?1+@jxN$57D0cmcd>50G$jkzjUt#JZtmc#jR&x(;kj4odn}7uah%6MWVrbByVHv?$zV-DT*+)9GGt9FgP?eSkavF>|Y(ZlR zi;7yXMiOOqu*{BE>&{QDB7_Ehufc->(gi5q0I?OM00_a;_481N7=xt1Q-J3{-Ec6g zA@0G)2Un&&9FrlRc?A>G#`!iPTtE}Mfrs}Rq+HOn*o?a3=(cC@;ZbJS>xfxZj&2t% z*k9uHVUrRgP-IX;O@NBh6hhGrSs65ya_hVBRHlo%h4caRite3*-1H3eYY@2hR8ys6 zIBbTD0RG_!v#1un1#1OiFo<9?G6u~76Ek!6se%AaP>inTEyebd+*(TmJ0~YdxS@+0 zVq__Ry1(qJ3h4UY)8Ll?h(K3G0pzYdoWBJw-zvSoDBSg!_wHuPiRpp(mTK3;hh6n# zQ*ZQA-z-s!yhj+-?~L{Nn~n~+05Cy^$};;+K|MuC`Q9hQi3TXN0n+{rGBPPJ_`rm~ zF|=#FpjA{@=$*_ii?|f%=;+21wSqv#DZ%)S*vOEWPnA(}Pi*1DjGT)_H70KUyk)5(6_ITb`1;Y%P4wQTW7%D`}Kw1UHG zJPuSKBxMFUrstXTXT-)}rt*Mtj0Cc{$DQvX6#+ClW8)B7Y;9{tndAOZIqyuaU9uUz zFgzfe4R<14q1=NCGc%sYa_W7~=qfje0)3!7gg}-!9x4Q%g57GoS`WWIJ;5~qT#+r% zc6rSc!5}(4hwSDZ4vui333k&zFyTQ2#2LYPL0YJ4i{d%cHe_@ldEz`9`n09BH3@Eb zwq_NPw)O;DqCQBT_o=DbAcDm~dJU+Z4kTHOIbyChJFM62vw?97DDN!f>@$R-e5|<))^KO%0WC3z*IgUN@}r3 zLgEkps1z9@iUqB$mkpCQr2k$T7);I0&E;F^>fVJ3nhYQlCQkT~K%W!bJILm-0`Jvw zTP=yfS*UDTSGpp1E4qWIcEkfm^wn|I+nKK=XWul9Ikrc>1rpq3EWWyVhfh9W!^p^J zW_49_W#8$E+>F;H09wf@DZ}}?Z#-ff^>iWj1zJbSkUq4QMrb>$VHZ%r{)m=)M0kXmh$4-@tVvlRV0DXH4ZDPKGe3k!>HEbTZXXH-D~03tO9jU5SDKrh_< z`2@G$-WV_oivi(HIf9?OnX9c7-hoB$Vq*?3Tu=Pe-kAM5=BivDC1(N(T`Hb00^+wt zpalk#@qULoZEbHa>b^=2bP!f%vIl~CKPDt}L71rlCaP+QDIu8YQ@|Dq*SBGTZ$jo^ z3vk#Bj8#27N*F@~1S;AGG%|6MC4*Y}&>n#JZvgE@f}7CBUYP6Z%L#!b0Rdxc7?r<3 zn86|AsXy9XhPy5;?Mwak`mTIa+NM|Sn&_Yk?yh5+L4FDOc%RT!CN<9e*1XI#yVo$3hUfL1i~4 zOFo|-K-PC+l-)r+cK0?Ph~@By<#pA zXlHRUDR);kh^r9%A(@j6!`sVRe~GQbw1`fG0JlcE0NY0F$z;uXt}}AJ-@a}a@)_>N z3)Ez)P2-n@-1kOab2}=FTbGR?LveCW-WL7JPp&>JFI*e)r>*IuR zwV{e?%+FAhYn|DW4Yw;kJ+XDvO>3qxrTL49{rEb#Ji9g1A1$QP8LTIh9#-BUG|stv z(~a%gn^yU@8_7Z+E(Iz`CsP+ z`0fs0;wdrmE5~p8`2ITfN zN;eTrV{g+-x$5pOVq9XHtsO(h#IB6&pCU}um^z&D1U~L>GJjjmWp3I7{2+JVctq~8 zBZn+WfV+RCgM}d&O-L#n1R2COgUm=61XpeW+jd+T`i3tn@C|TH7tl6TOM5$#BRn`d zLWU<;xcopgyCE#MGKk1Xh@glKx=*Ix)9lqQn#$| zf!R`{SEmD2f@VO6(j+wsiXrbjk;|M^FeNwowZ*_7ASc->Z#|e)E%|NkmQ-o~wKq`8tf~p-lt;*&Z;}pt|ef?jlW|ge0o!0$8F(vS%m|vXL;(sZo#^ zq~C-gp@SmhwS5b38iq*$h*P&f1pqG%FjC;di1h8tgG?DP$t8lyKn&ZP#f357=eY{A z60nrXa3K{)AbTT7(jWT$%BI8m_}Ek&M9^q=E1|R8QenKRiV+p~P&*H2YW$#rU!PTV z$W#7ZL!Xg>84+H@_kPueDdo$3Ee`DxDLwg?V|)P1nt;D_L5R;~F)a$p>oTgVE0}^6 zwss6z5J~Q0Q`QcY*xA|j7;`|>W#q2-#f^=PjK~9MQuK2KF|6wK;wj&sIGt)QnwA8R z%b{iSI)?_T`jAPtB^l%!QHju--U1363hUEO*<4yaQ(>dE>x;&*FOp5Qul16w2!@I0 zM`4ZU4o+0kni4~U$3a#m)W!jbgFrvA5!yvXg#ZeLKkfiCk-ReuRXYed!}orG;MED6 zZ5~@&v!Z}73P8b8w@0zi2ni*?s4sF_*FqBFDDc>=9UKrl9zHBIB0}+t8H5r)!qY|2 z7p&aY%opiH6?QZbj4BB^j0Fk}Ii4vbmkrP1uLZ~c0q`+bkt~X?`RW~m9=4%3&#j21 zh9QBn|k)BUGU=+pOFy^{K=%Oo$W;N_JzEe z%nSu<^^va?V$vT}>wlrsf~?Uej&X_IM+}QT!U0=H(RgKDEDXg>fKeL%tid=^@$nMY zzY#@SRL^(T-P?#sBi-*2?-aDtpqYNR?B@?*Betv+zGPSN|32-}qDv`BeFQ(+tSKyY zZqDt?lg_rx3+z~5cU+TQpZ$B0YANQOHAU@Ys-bqDfusRfZ+@9Ca>k6>s?2i7|9j11 zDXF)m;t0BNZ)Pa2kYyJEDbtETgcj2vf#OrVdV1=x>}86>_^WqKV_MDcrvJRC;EDO^ zqj4X-tJqX-* zl0gCzT4JIsgva3rq65T4kNuWD$_34w=MGT!g<(Zw++X-J?6qe5%9ur&=48y_qOS{b4i_B z67OXHpl(cARqx6l5p&r)OwA{+iU{x!+8W=9U~&2K{nZSnS#CKW*xv@>0oZ(^%Ry?! zM<$0E7ayNCCwoP1W`)U{^8*T<$E**a`o(PaH&{W2zqYWE&u-`Z5R=txtn{q4C+uU& zWm}YW|JTjBaO_LYe>Us}r!{;yu|-?&qh((r4HEl!2_RKO(*U}PCjLW-nd$7iqsYP^P++u5QEFi3p(N?n} zZE3msL2U`wkE+ZtclxHL8G~per2j*DIIZQy=zdPOV_#0C%Hhv0KK_s%=%H?zIu`- zx&PFPQr3#H?59ZXxNCcFZ0>JMYGteYvR0+*Vd}=opRpMNF1r5tI(36n;2)ZN_Riu& z`&zj7gJ`A`N7hi;(zx0B!|Y$UwD*jHuHweyukq0cfLp!Xtr>fu_}k%!!N2$U38G~; z5!JiD@n1g^YEJixpEa*^K=(cRbuoDQBNy)K+_~FtJbq#t0eeyb`>yh34ri^S!G=c6 z_^|hzS(g7k{6&?(S_4 zU0@>QoA%Qq7!m+hG=~WulH-9%nv?4bzQE(~XbZB0kmX$(?@M7miF*k~n~5~MG2vJ$ zMl<6hba9v-F`8;i(tXOXLpVGJCm9Zo2!zNCLP6pbHbFTbEv7?2-!=3Uux$&ZmSD3` z!*Ioc;l{77=!ghWNDe^Sn)As!Y_vPzp~INGe(P4dH20W->FDtAFla(y5fMI7QOcqe zwFn8K4-}oM8Z&yJKDj27M5u*vzv^ep`6^Gtf;#@Q4(5yXPZoYW8N0<^s*L}*Mw5Yk zMNPc6&g>C!{{NknRaDWjFbbRI#8P^669GN+mfOtWC zT`b6hpWV88CoSY0l#1KwzYbrvi*q$mjg&c=Ej8nN#P;MFJdam+jt*%6 zQNsypDk{)I2rAOl|D-Fm6=tg=F4z}-t5ESS69P#1V~`m({z7MC8g5N1}hqmUyE?9i@G=c?XbtvYkA8p_@L}Sr!Erd2r;cEKkNJ z(L-+lUToklg0L?ELa{R_UkDe2{bH#1q?`dj7U#R-vFbr(LG}W>%tsp`<zD#7l+}vCvph9Ul@ivYNDu`9Z>>~!~#4N?Pn={~{(JaXU|J~c( z{oSKd3ZI{r{GeG%PzyzYuNfB}PGH#a=@Do$@%&C;ZgB32lY#vFD5rK*h2+MKZ@?v@ z*$l3NO>tj9AkHYkL^xZ7EAIo6kB_ zyo_U^0xjq3gYT81uFf*XQ2$Da|K=PKyE;K3mLv;W(V1V#&Tvl7g|wzbT0j#>pJa+u z)De+;xRBZ;t$s2pEh;y3-AEdBT!zW)NO3p!pKKCn5UzVNQ825~^@aGegS_1~C$;|- z-FyFODuD)(NvPtS<1(XgFaMmkk2S>q9k#uN!Q*7&-MxWb&$MIZ@c$jqa2b7%DG7rW zB%cI!64Z_r)10=*@V^l+|GyDGPQ{2;MkjWZ>+NB=lHJ?n-2Z-ELPi?f77QW4 z22_!b!`3Dz+V2Uxi01D6cRG>=TJ$K*Ul*^}w0003FHx|a$%e=OyH*2=^93#U{q^9m z#aWV$i>^z5uCHmvC;9Wq{CCQcZH)PyDCSy>p97~Obq*i0&+fK~{<|8sPmHrfG2YWO z-n0SVUFps#>7V`k3B1YiywZfToX0F+lp-;_;jjbUUdGgwJE~t;?h z(f=E%rX+!)cb9+Ne3+;wQE&cUQcUjilXC8FTFm{o6EZIUdrdT14Q3HWTpzuFgN*2! zw%5?WDMOA|vflrG+#!kaxn?X*E|YBen2@yd#D$U2`zFab&HpYdvSQ$%ZYa!z;AZDr z=Zc@q9a)u6@OR(-yOa9b=((TpSWEkbEiLdBQXQWA75;lT2IiXeLdqp5`hf3ElDp@6 zFHaK7asGoFCC`$2q+WePCtyPNIMCxFF1%`EqtweTp>%$okmg@84OMyxYQ|X)@OVmV zYn|T9ADWwbOA8tq(yP_(Znr_TMALqaySkcchE}%EW3#{QF(o62ctkmHgzLi|~zn7D^l;W{QsiX2VPMT-NjWzVzQFLbzI{b%y%1tB7k6C{aRdSh8*2>a+aD)laH97#4Nx^d%vfYM6K##3KTjLKps-JP@X zES;Os`z^7bmmwgNwl2RL$)eo_b43*lHpp=y@-HHnLEFbCP&l!70(BjnoK<0MLE1gY z0^tasgV&7g8HSd~Z|()DGuW_OeQG2t+r8|;gY=mSgo)Z>d`Mk+j1$-WIPD``z}#2D zzrPEi0TL=C;02=qe7yv5$Q?W+*jEeb6CfxMC80{#UOM(XTBNb8m?J{=(B+Ty_HM^4 z+<*`M`ow1D99nMtJ@+dcKl`6)Bzu0{WY&RzDx@SpYA>*Qk<|~gA0UzXv<+}_5iv15 z=OZrU#kTD@s=Nk(J_|>SC zmVmSq5)KhVmu`aCR^Qewr`h?fy`K z;6PwmS=lKOjQ6gmFy51^JBWOCpME?rS$A=ZuRdzhDFn7>;hpb*Mtrm5#6f@%sgAiU zLe2WQe#qXt?)=d`uYhz0vSACNoSVPR@(72|5z}*8{1`n)t%TmT4z_M_JpckB$0?IOXTwN0c(xtA<6e06y>k_x_Dn`4 zP1YyOJIkwMb+Di_+&RZFvinUTzhnre(VhiZQUGdrinlZaht21~`GKu6L=_-s9#cTd z5K;h{ecPbF5|^lHPIu@d*o6i_aPW&oR58{zC8+w|_lTD#4wiQWnyDrkiF~fvAV=Zj z97n%0kUIo=6zs%NMpR8m3_R7+O7KwHfMEO;TwKHo^e(YS(6r7WW_ekC7gr6IjXyl`QwvViAHGM&li!BX6;ze0AN=JDT#50f-}pYaDWoI&(Sj;6oc))2Y$+6 z`&%mX-W}lII@QE#`kOD}QpFH+q*iG@Ey3NleFg$?jmF0QPrcHI8hSF)Z&en}(=4iy zX{eyl!Z^9nXs$qCo~EdC?J3RDf5WHmDs>S&36_v!Gm3f570=c0*-Ttd!)#eUG4_+_ zMU+>fiY>O9uLXH*B2%C6FI^VN$zPD7jy@FAh=cbr$drdLfF|{+tvL2^Iqrc&N zwQFNH->si{Q;LZ4M8o99tXxBn|B=++S;^TXwAP(`eZ?SWUzx~scTTCkJdpPjlS9#G z(Rpwh$pO)t^+i>Asb6k~nI`df%q2Ii2*vACAgB9TLtM5OM^CT(6(H(YU{ zZPSY=@Ox7^!NW=~(f%;xS6qn%@kGQl`xBT=EB!97n+EAk&DRy(8qh!_-_TWOUbMc# zbq(sw3Rgfa_GW6r0UCk7CI6maKXX=nORnV18Q)vCD3eibo0!Y4pY7y9RhgD@^)z^= z!h`@1%Pw;sJ*M+t=>~VyXbxS86eYEEktxyNKXdlmSJ|EF62v`}?nbGjE&rNUG7Nm| z_-&5VdpYeSFhlGwD9BLq(r$(`E5_dYA)Z3XKGAa6@meCFa0#5^0@}%JxzJFnJIoI* zmt3_pp#s0p^}ly<(Y&zbjsEB>fl(6^gVD+_S0|374__=UdtlrViYEq4xL3wz>3C7~84iN0O_otp#w!hj4j{zGCJc*?=Y zSi6pyJ2qa8o3=Jz3KDOje9deaK7%%9nvhT|Z~-XQ=#U8V(f?sPMLp&3Q161tCs{de zy^+QVGTk)kKUJ6p8Cle^CPT4!Q9EXYZ;QlgKZ@>#DkH;N!ikM4>P2SM(y8JUJm7;$ zGt6z9M{=fCa1`{G+>6V$&JTW_GJJ&`8&nLvqG9bf-BIo5gviRRVzoj+Rfk%!Djx6V zH5!*9@~U8WeQTcphbiHynglxS*Qo#A7gP|?bgIgD)lhPH@bKDB-;LSKhGdoY>^|z! zO)nbxY#WE5oyL*Q3hF0#*FOCBmd@O__Jvqd5qxX1as&wnw%is**;939(`^E|d79;% zhXSE-l1V-8V>SvzBP+89sTwG~)Ptf7wEs$vfJWX{{8?vN-nP2x%-I0B#PoiHI80)p zarf8ijM@>ig-ynA?v*ffa4uKB4$$ZXQF)<|CXW#_j0-L z^4oX8LDiUB*>N^ru3^!} zmVlx$K@rnoCbgxe7f> z;2S6X?O)Qpr#mlbu?fw|Se`jWc%(I>+Yv8O#T`rgp0;weMm(V3V5&iPhR?~gFXwLg zR~g60GXj(QT{kQJD009X}5k*2mQbJN%0YO@nkPc~(?hq6Z5Ty~2 zQbI~vx}~KAN$KvEl6vR;JimS3bH+G(pE2}5k+tra-x=5SnW7GN6Kq|Q(F|HAHXi4O z*s58RO!s`3Nj*N;p?0#KSInPmt|{I5<8wmz^0y~-$ioMD>Z8>eb0j52(P@baCY|j9 zM6u=@=NrSUL`<4zk^@BU3(%*@Dx8)5s~OtA`iI*$h6ciD zQr26ln}ms1H@-t5n|y4_dB{1^I)$^si}$BNEtgT*c2QNU`y0Am7wfdKC5H?N6!!xn z|6ZnNPh4nG50b0ogqQx(cUX%izfn&!{hO56>6NIJH+#l;les$@y#t&OFwjW40|Z|9y2 z01N30S|Jn~>L9Q)E{gTO;p+GlM#ew92{iKF$d;EG?CqUhw|{=glI*Kkc>7$ZOUc1w zx{gr~=wGPH)no{!Xg1_vkl%}sl!0p)tZKEhbTi>-l=KO$42Gb9cIcn}!(5C^-OFTDpdw56n{NBg!@EV6Qn(X~O zqq56?eb)_I8G#(eMdx-T*qp{u(YlZ1%91J17WjgZ)5ap^1L|Y9Dd>7&ZqHCx$DJ#S zXgKHB<>xMHSTj!qAiA8~3`N^M_5I*hrK1G*fgzo=L*`|gHxt+GYYF9p)}G8< z+8#AaLA)YRvJsSwBKok=vKKc&J%$)W5ZoV06Oww5rK|2<3U?i*x}Et`YV0xR8z~K& z*Dz_S$Z#eSD8Fnl`P=Qo3-NBq%Ujvc5|UNUYn(K0de%1NLi{u9-wRg$F=K8-U>|!f z8uKwQ+y6!jL~-6-`-L};^o3-OezG=4Ek2;Gu*jHG93UWjc{~4E(Q6RyUH1h7Spev? zh{?&@z$fK~Ovk~0k%7qqH^!yt+0`cwFL0*!%Td&dKW5hCP4xBZs;_(``BA6C38%Z+ zb1jn9x$mv?EX~OZCS$?8o5>gCk~Zee&jkeNS*o`ka`m1Nv_(3wRHWul$B5`Anp4fn zLTscceTyQbYSa_5v7VJVvD;$xq?FJpz-96UVh`BkP=(-eenUVd2jho-OxK9x&$|v8 zSAh2+A};O`Okd-~eZ(e!+~hKaz#+j#7We|JX#m|h=|zMYz>gs~LB9iga$Doon0UGD`WG;WP4Mn?pem-Q|xwM-q@r?#MM(Z%qD%zmx79#5Bp# zbLuVBG`X=hx7elFeY)c~tiJM`Xx2mR9vVIY!Q%Ez6BJ5_6&eUisQbVfp-?<#ZR7-z zF0^fn!If3zy48cIqeVH%bccZ|H!Ysx89pL^8 z`c%!SgAxObWJ)MtIdp>46>$M%JRv3}#Xtak<`Y%x&Iu^Qj14kkTiZdPmoRZgBrS}LSfyIsa2ej#e$guc`ODZ0s0pvUPh~#9xiwsU zI;JtE?8tqa(8rZM%I1Y0+z8DvuC-meC2rjI9|GvnA5W>2drSt;L!eBO%6qLuJUKqe zFBK`SvZOQYlnw}=d#8!ObpzFAr*kMEM}RHa*cd853-Ah3a&nFJyHMOCa_uzG9fG8E z2gp`^AX5e$2oNBKFq^h2<_M5}2rzMefXwCyWWb1=LKeOO%v+$3LpRY03JK`@!yET( z-5GIJfULc9A5q2Bb+ii@@v_~sQa#dZJF4&U!r00$|KJ_(;VECkn!PTs>0T{FIEM>p2e<=MlAVf*&>=nlOf;DHo!$38!jkd}5zw`so}B?YiTLDj zs~~z>4&fPqD;#ljAbbi$vBJd_4vm%3LquAIcqBn2Yyd_jB$^`T)$RlVjiV|=#;vKT zc`ydTY_^#`cycC;*3N4ucYL<;#HH0=ML^b%zoo_FykniA4^59K&xfB)VX}PhLv$0I zR+VvTLDE-rDNgJ>#|&i8rIOTVF4+N$QcCLCR3YeeohsKaP|bXvNJhkDnS;cQd`&5 zv=1_PH#pv7A_?@X#&@aA%}k5iW3Qwli}zvrC?)fke<p*gfTZ0OpHxH76^${al6GK9G)NMPN`XZF5x^1PRTLaRKLtT) z9L&H$>r_tu`W$hw{*8u5z1Ga?4DZi?TGnJzhxN?9ex8ZG^r5vxNyBdyw|d<;@`Smk zj~;bmgQP80-k-4BPlREIbU#u~C3=>e`%XZg`8?>Fmf%fCa3mbMRj6%HMuAVs!uEY` zFAywu4h|UE*yLL=aA_(0>q9df7)4W{h_Rs2JN;cbTzrTBmBvdU|AGXbgp}$Gb;a~& zp3D*7bTHrvcCuOxtX3M@$SasXIT6xAB_$<=B(2{y`4PQ_`v^Yxh6yKXL+u>rmgcea zq^9dtUzYOHuWB7M_v_VP&0qa^dAs7g^nF0qM*doF0tDP`kc!&Fp@9P11*pWu7w5(JbM_oo5#;0EL-PXGBy>^dfh$l}&`*5rS}Q<2<3!vYGB821mby4v?bBIw zZtL0JLAh~=$+3Pj-9lqQ*0Gc(+kzV zr|-aB$Wr?egK~lB(Pc%qiQp^HkZY)Gy?@~~QEEbcA>isviB%Gipnb_so-O`5v($^H z`IqeA>`=S<2DwSA4rlenrROclkDhmbmKtDnJI*gNvAG8`{uGDXzM!?ebBKAqb#WdB zSs6Kyp&e+D3Blqh>HUI!y!-l^2MeD!-&>H;N&188KF8~ED=46qo;-P?rY{W-H(9$t zTsu4OXV$r+`YqR#IcR)BLL5i(tC)iBD#!?8n0()+s_x%LGzAg!FwXOuNbD2RJz=BQ zu$`^9;*9m);hgg)rj^={ve1P4B&V9ZITa>A1??ndQXc{7ee)+R5jeE@f&Vl*yvLQqefjhLVS9R zQ=x6Jz|V3*C#9>Bf9dD&aCw*F3E&CI{s{`*Y`h;qVTKNp{=VggxHhxv}MG;&S{YsJs7$s<^Q6f)x#60)A zi<5(`68PfK;`}A}J@X?SJZNv^!1(&?88}hxMiv)tW4wY%Onz}5>dBKc@sfu#og0~G zZs{`VU8iKJq1oGarop}g@+m10sepAI^zB09aPtNfp^@_rN=rG4;c`K-c*h%IE(FcG-H-I`=nS_w>Opn-kZ|>3N6lZw~ zOuKD6@6(-4@K1 zZwg)6tV&Jue>v&;3fiady>wTGS+lT^x8x0O7+NB?$=kDwyLaz`Ub)Eg(B`9ntrA3R zwtpi3AlorTXFbc?@5Rs7=EEW;==-uV{)GJd>j8!Z<4D5`L2mBI_D3rO@XlA=5ggM( z301_Q|2*k2v7h$32CCR5+jq=m&q?62Vkns6t+<08 zk&Fjz{)e;A*?8&TjgLgBkr3WaK^%;zw~fr%`M;=r|f|R4N-JUhza%ML~atJX977ov}Q?2Z22W9pEos z9>^v|{KbNTmCes#fK=hOlX~U?0=*M&uuUSaR}DS)0AN`bIxNV++`1B8P+)6=o5lHF zFBDt4`6FvN{sSWFn}Uc|ijV3oQR;_^n)8C^hXr3x*rd*$&dA>4hct8!64*uLFTjAn zndS$@f0oXY+ftG%!Wn^*2Gal%a^MU8Gx7yNI=SaNxePi6hV+pW&;|nRF{!DHQ2aC; zu~aN}-2lpn_0;Cdisr2QDqGa(_euF+eJ>+|JP8qd`wtn3QWN&PEZC)T5BF|`!P7feXj|1~pk6Xc^^*AOkRgS|_I^3WMs z>Up*u9#ZZ10CBV(rik~UL&8dV|No-nQvlEA5qNi(M~dj-V<6@ApY_ow_*{Jmqws|M z+BI&lry-^vxWDUTkSKIr5g7LcRy<5A=K#_?4>E**P!qJb5!ZTU+eo^R`A=EeTg-v33_z@EQc~RR+LMkT-%gnNWwvtFb_pd}d#2HR+Z0SA+YZjD z8mcImWL@uuL18-Cu`m!bp?q{SKVwsZ(h6!oup?geyZ~w8 zBiILU<^tAd?iLO0tJk}lb-yusBvBm0h}Zn4rMJ>7sUufBJKv3DB9lm614UEI(soPLFok{sm|`p{%l3UZ%TYg&xS|hn1+=Qp2>uOjn7vD#h+aB~4dF9qb}fO% z5UflH%I$<702BZugg$~yd=HQ{1i_C6k?Y_^+58q9E?@QQseODdp7bJ)4vG)Z1`plBF6RAFmAhDB< zPs1`?UDYy~I_e?V=~~A&c9$w&OibC){Bw1`e8(_@paE~Qa>T~u4!BP#8M9- z0DuJ96{z01cHI*@OTXQ;e@2Q{l9dBR~^J zIGSLxhVlyqh!OB~H@CJHKW`&IlrF%53V#0{3K15hri7zh{f|X_AHUOjv(ke!q!_DW6IP3v5{h6!5Z%XOB)nP1R0DMeBq)-`rz4~Vej z(OTw#TKF1fA}{NS9GL+B4n3rnHlLkc?8378wNC=lqKz1R*Fjf)?pVB8K(vn>>0lK` z46R6$`!h7F!P5r3Dx@WXAVU|#rr`8}67@;ZA?Tt;XO@6o77KsNn;rQq`qKv<%pjQd zv2t5Z*FEu`byjGR(lv-4`hqKha5$qfHt^i@8>x-tZu;sCp{(wY0=8(PRWPWTl zODU-Sq>f8w9&u+r*}J^&@>$kbL7kZ`Raz6ZnU63u53GgHSV9V1AEN=opaZDy?U#~vi9V#AT0NJ zQq?VG^522(j*uZ#ll0?k50Ghgs$b<4o$~ES1)1iYBg~7Ns_MCqC4)S&zx$bhngAih zK^99X;ua1LedG>X71XN7?*y+!C+fBbbV&@bF8^$=Zaz!owbTl5LniZO(St;#k$IQi zg=zPR8$*wkc=~gTx%!BU2wBKOy(SaGn-}EE*(IDijBDq(Di^wk<`#MC@(`AhjDwn* z+vRIn5&Q8qvU!#Cs*6D_EYDdOqJuR^x4sSL)l`1) zP|RP`inaXX!hRlh1I!Z>AOE7+ElZS%RuZ>7jqU`cSX9&bJ&%^E`lNsVLs`@s&;>_a zhkm2?b2pcCt+|#F>`0}(f!;u{=I#QECE?hScv2Jv`BJ2QZ^Zc~?-e^4m0Jf=6dqBP zRT5tvxp&_~nC_tgnCu1;?ZY5?KYTRRZn}og z-HLqieNVQRPfIz$jzr$;bnDt$&qqNYUGl&$>o}CPLbed5g$W}ULE7B$tDSnNzfaEU z3tSS53QpK!Q2tp{@R0(++Z358FqfnDi@V~K)N$Nd>44D30`qF=L-o2BnBm2P>3fGi zN6qTCl-di+Z{e~bo|eiQ^dRR6{k6Y~|GZs*+1Y>BXFW5WF;ovW$9pk_cRoxSFP3AF z%d#1s4U`$`fzF(^B>bI-WK0X2lqQ+Rr7GSRlC_r)`XL}UCx`cQX@FJ*gv@qNaiA{(ItwSc z8O2E@8aMs30g>7cL{1vbiPZ0Zf_7!r(iug2%{kw46qcXEVBvAD8uF>Hyn|1T%(^3_ ztcr)SBb19eDJ~BUVQfrG?}-GZB{xsRf;A@uD;vdne*Aiw_$eY96)8){Vc! ziospHveLfs!tZVzdrhH{c*#pK|32%Ytu>JEbhQcHJKe^FOV78odsJUsey+>%z4!gK z*w&`jWQB~M%o&ED_o#LH=fi_WU?0px%|#4r3T;ZnVlC=h7dt6?HI!5 z?|9uqre3H*=>DXgmq-0!YdtL`EGI}+e$-8`Y@4$hNGnyu(TmD=bLhYQ9UG1ZFkC_2 zQ}FfYYC04%;N+v=%X*At{=E0$HjrZXm>81$<_6#KsQ*WjZ8?EN*6}iuuIVVTI zw2PIO_H#|MLSwj zz=}V<;`pW-L@@Qx-GiI;DiUtih-=SpE%KSAxAlQNGj-s&dw1qVF6e>8iJgO=N-atR zXy!N8JCv38zko;$_x&}PPW!kyDpI~m76Rv796M3$h@G~gR5vqM+}*f4S*p{{NjSIe z=MpJ{wt-9A0H%!!8t*(`FoQ^0DSZ*id3Ybg6}4v(^yo3W`svAoHfb?}L(mf4*gIAt zDZv8~R^zNpm7e1jHI%J@nHY^Dec0g(tcKU!OxMeu$cgKQY!<+ka$Vud7T(hVN>C9g zlV{4YDfg$vi*xi?a__x|vrm&{=_?!L1pwB_Qrq6svtp}9PMIcp90Mw|lU>Ve)0Tf`bE+rob@^SBm3`OAS|85X*eJSNjgRAk-GNbR{qnK zf8LQe24CmsROa>Sfxp9aBcuX(QD=1m!_7OLb%@Idc|FQvafLLbPV8)7ejEs~Q9Ogb z*RjKi#Y^G1_P+Tyzi=xKtr_1M>0T@NtVEF|3$rj2>qd^S9%8YhD#0`XaB_2Bd6<16 z-;y!EeD@ruA0OZK*MIAU;JihD;>gM5y~B<3MT_OFkxx>L^JPhQ#E=Rn zp={o)k-A}R%6xRLKt!g1$;Pp?0< zv62P-5KK|t{k6tUId?DCd!GV2)N_$?+@0nYP03CJt77jvEaV{>=ORPDyZFxwxK}S? zzGy!c2X%@AuHt&-fQ0%K;>m@(AC7$QW%%CDq4&(ps|ET>I@X_l^NAADK4no|Ui3+H zUPsf79QQ184u)6LRet4V)6Ci0w8E6Vka_!W4!y#ei+=42wP=Fg{l$#CAMzygg7wqu zh2bRz9Z-+yz*hd&?^!U6sikB=ck@qB@rO5p5s?~OZv^IVDlnLk{k|xSKt96=dmNoZn?g=cPUj=erTzaci(i52Fy5(f2A7n5%%> zClc)DAqL8bh*hHY8Grsw_@}Xqfu}9`{(me(GK!OwI+>~){D)*;6p)(z5^2XM#eL!k z7T#4W=I?J{ir3Ea^&^Gva6j$%&3|Dof9YjGB@i%?`3s2^zbxDtmaaldGgxVe3f2r* zzh|;>j$~j&gTHhb^0n(4n~UVx>_92i%_dGg3j?%@d%4F`%lUjy+DkzVAw%l;sF&xb z;P=23rhU^>avvbUG)ab(^Q=v}INJoEg)Hoj)FVpFB+3hEhe1ok=PqF>wj(R}=WA6_ zqlY5(^?0yy*TT<$JAcb@Oc$a=;Y@z1zER&UFHSlCGr-z(;#dA7IyLRv=?f80%_TGx z)>0I64U|DrEEx4cIrehN*UwrHb1Wgx+jg>F+zkDkKEK={5C?zYEX%tP_}DR=P{{n) zpzyH)S9r3oc<`9Q2)8ZH;g{v9N z8|UobT!hI7%(1Z=to(kH{^GA-luUCDA_tv~?2jSvi8K9t+7?LY@)<^bdqb^z`=LAx z>5E0n5#}PgITs-{BKg*5RyP*|9C4y7V9W!~ciC<}I}yQQ!y95M?%W$}z5`tfYUE~D zh+o>=p42zzC08WiJWI6YdTPLh$g~iLHr|cnSiH;xc3h9Nc!?-Xv(mSWXq*0jD~Q0_ z?HgFu&*ZQn8r4z6V0E0TBKlEvwK9OnISQWRhI7ZEjQ0M$KQ*2#$DXSnvb6eYS3%us z;VaDaWcQvEZOF^K*RYyWWVAr=g+xupUt>!n>OH}Bd zlQ5;d;g03SIe4uaq+C-9%Z+ipt`6H?{F3B0&qpgZhppAJSxiH|f5scSF$8Hbm6kz| z*|qJ-sc{b*QpwQ-m>y z7$VZj8Jz_K7;#xEyCQN+u+m0fccg}YFuTRxi(IggN_K)uPc(4F@o23f_$?ghX?1I>N);s!3Kg4^71j3e4k#H&KhLZy5w27Wql3uR`|8Xt@7+I9*g2kkN)w3 z(Wem{SCO3p6IPQIQ9jvbb~aP7~6(v(40 z>+>9hX{JcWdl8m-l5ugmkJRgk(=lReA9j^mzvtmeDp*Xh}>aJ>54V0Fy z$-lmZ@Zz?Z4vhbp2?xt}8IGhr6Qdf8+3Y^G;+pM*c|jn@v$pfn>Qv#w3zuZWR%Pss z3|FXVYqrl`!ndh_j*QAHYNQ+EksJXh-D?qD%U#0(wz5rqjlAD=L(V3}?+ zm3=dtzX_@xomDM<(v)aPuO}V*EGK37Ux!8qVECyhcEu7Y&z)J5ILlHPZ)+&WmE+0N zY^Yl?Jvpkd0R8q1JjsEa#*V+#I&=Xn(@C?{c$DE&!b6)RBR19n0cYmGg_iH?jtR7| zVGa#4+#uXP4d!LrzY7A7=?-{NuFm)i^?t(KdrOjEN4!I2^)H&1p4+Bt?4dp$X}5_} zD|W?PDkM<@{up>Dfc^4SII{T-y(|NWFy^vUax030IU(%`wjkBXZS6R&vZ}0ncnhSF zmu>+C0t_qjl%gGi9^AY5#(HvCJiB=py@T3q)L-UTVR|_G*>P6S;*3;6=K)EB7Aq}Z zN`Oz|Jm{kJIF#X47&!n%N)gig1BzVcBr_}#^< zZJozgVTeI|m0Zd<{4in8TaiBn z{wlIr5wPE2@=5})^@oZrpkzTvB@h6K=lmm;e|*@04}^Rv= zETuONYCc`N@P$cYfmWXmfb|^^HW2Wa5oQ|r} zcYZQ;u?s6U3V}R^%*NOXYQBRo6TrBIqMD#qVt_3z#xN5F8e1z!Apwzo1cJ2O&BhOn z308CeSjla#Y^bISm-0h@N%4sbHz+PG$id!p#12AQ0c_XiDake{MUS>Vx;=5kukq5j zIWYK4sl}k!Za&qyOVuMk*@ErhsR1wP%2sP;T2+5)T)OsBx?vxEpUCM7!9ilje8k0u z@p%x3Z}MD6 zb8uAE8lmqJg<|p)Z~^s}+{2i68CN9+bXhaLeALTFhK@EhJJMpO+ma~2Is>B;fhZ%s z(e`r$y9?dl+dMoaBYP}wC7wOIe(`s>9$6NE0s=8o*x<<{HfgdLUy-HZ&NDf3L zOamA|;{$mYn%BhF)c?B0gZU9AcO5@|eDBQNH7%_>)~3X}xQAdNjt(36V_YUzi$Pj$ zQHccyeN5H#>BYHFi&Z@}mcipCu5;n^r=^Mxf^K3VwPrlWYi_Jo1dQl*fHzwdAITV) zZ30Qr8y^yR9U}q=_cweTpCyCQk@Cv*_gd!-SMDy=W&|hC+Xis1S9XNvT?u$7m3ni0^4%6(o^f*SX(aQy#pGZcGhpU zdrtPVLqiXmzu|nb&AlI}xr|6ka(3!+{(lkbM@($&C`2{-Un0~wprg(Lw5!>u2w4&a zoJ?#3@lCBI1qFpW{k5=_%s(YXKX@1d76;6I4z#7D2FcXkhyKYc(G7;S1*B!qx!DdD zGV@Iy$J$*B+kBf_@&+Ug+dpTijOTC2iEerzdC+<59A#zP`ytuxF-hT_-H8{S`-#{d zt4F&<9_t^Gcwz*_#onBBT z4DQ(isg04HQyOkkZbZS&&Q|BPUfoVGg&?AT=qJQb+BZGX!ihtAaRyGkj9pdbRH6yk z(G@l&AY+&vJjyivKNYC{{?U2TiLk<0x5^0_I-w5c@|_W+38FsTgoU^Nr~342K~YMA zvuR$3gqKlAi$#Hxkv;^(2EA-Wd@cZrK<${^*|ZRZV;Ow9t`W#Iog zfJ7k+tlE37@(pm~q{$aR#vC*RQ!@(%kt&Gn%BmwPotWoNnX#iBHEj|4}N>$==3qZM|6x=pP* z>Zzn){Y^xn`%eF_034~!pEza~s?LGiAyT()3uZu;_iHo)?7ue6b#D*ngn{iWnWN{Sr39l1dGVPl1?tc#8Zpl1%j9KYJkyY+ z`dmEb$^WZvUMsd$>>CdI*q#Zmw%hrW;Oj6Au<5LrRP1zGKdkQEnI*&qZ!(ml1`ZdC zAXEkVnLUW<`jsC)fP&^(;mRfP_0pl@1Q!`A#)FWY;cFSc zm0&fSKU!+Zt?hO;+;qBD?G2(fqu-bI7AiL9SMt^xT)xi!eXEEuzA`$VB0Q$eL|R+P z7z*>M{C=w;@_5YKVHYRI{pL$u3wiVTIX%q)D;X^JErOcttx=MQ3Pq__ z3!7^>ZNt+}pW)^bgFT#nhyAX~ddY|?Z#Y^R&!(u)MLys*{3_=+lT(m)xt;zTnQCY( zSfxU56tT0Jt0S#3P~~x6xDmm;@X(n{T~u~)Q)NEMb}QC*oTMB_E5$|hWy)Taff8w>b-hLDl$$5Z zobV}-vmuPS>XpyLI{qp-0IDFaaID!=a+~AHjr3h+gx5c z=a`ARgRA_qG;o9ISI+G>Rg5yW->r-C7uG&kj|+Zgep`ETNDXuv@X-RFCW{z4dKI+& z*o6eAoXUCiXd4pG4$K9hXdH;~ys1#nYdMG?5bb4crg1e`Qm7(v4x_=B{qnU+|%` zoV_+I8{x9*o>ZDxNM|y)#jE%*Szl!TdBcWKMoMZp5G{P%@^PHF&DPrGSMIk>PcuK| zG=(s=$t-V=K6f?xvHhsV-G}a+i?7i{d1O_H4VQHdg~FkVv+gIu;@eWUPaU`vBk@M9 zBr^PZMq5jjv`B@}&4+5vHrcUEmTTwYwEm7;QysyH_a&tUG_RvNHn6UrMvQY_M%fh) zysI@6qDD`9{q1NEgMNEYrB*vh*lTh`N80a}zR8l>kx=s`tIw+KkF6}eSBC6wKV{iI zUmxRQ?jp>P#-r9rde8mbPkVLI;`|V&-y6jA|V04b_g3gBxfhrXWdbA_-BG@ zc@fHKXhlvnF7NWd-*3OKg$4!L5MbPLPh2)&mnO9HZzzcwra{;CxG;L{EowSs-Wkb- z8B@wZYYR_>Se&DNQf_atnWou1?O8p?t)4}AvlVLjkapFhHG4a{~qtWM$M~m5>To%tR`J;w5 z*uRem@Dy?LOtHNQ(Dp!KeDp-|?ENsbETpBo|9;Hq^|&S4ZVwXQwaR}H_$c5cNW7N7FkuolCh zY-Br=y=^DzVl=or@U5+-`ZuPM-|JbW_atmO!@WCad&Q})p~Wj$9#+DIdk(L|+`~)e zT#Q4g00oI1H3@@F!M0kTvnwyRYpppom1$#TubbPA&5eU=s)oC7Mco#o{FpZ$gvbap`I=L9?pup$y?!Gt{P)d~$AEXDl$0nA#;pl% zd?CKp<^T!WSh#2gGJ1j(36ci#TT>{%6h|sS<#d%F#8l%o3<}r>=tHbz&;CR`7&-rQ zU4e=kht^m5Fr^9Z(@y=E@-4<53!cB{iz#FBBFto6}&Yn6=uUJp;$4i6BKKjWTb0B2b?U^~c`7Jr z5HR|7YnKfTh0%2SNyX#b#qdF$oc|GvA$dbbvDG2Eo{f~#Eiun^rPJeAGeP)|=O98S zKXXNi`v~9(J{;72I>RwUg2Lbx8}X{R&R^XyeL2YyPn3>s)eC-wJnq3$T0Sv+)_^(s zui^4RVHM7sy8Jm^uG9Ry;;L9W-qWIp>#y=6YXsDY6iEYR&5Z6}z)U$f*iC3G;_x z;mtzt`)S^mQl8WtHZ>l61-nQ0Q18dY%I?WUT~{rN)))B)@&)#e-4ImyvF_6etkA4t zSK?>oypW1tS&VhQF{B+9Mx)rM|4Sy-^%K_ZRxFXy^II>=c7@l)J&Q;!$Bz@W8!WuH z-S@URnS6+dokfxVMCGKNm~l&UPFQq$r9CIjnU>K^`@YH0H|M1=>z9bz+q0J;!9DgynKPvgcjN=af;U--__obMqE7jEXk<9A3vZMbXejE^uS-my@Pw9L+&h(*8t}flnU5ky z!?^M?E2ngJyhZ4lwF?~9VUFZf!XC`?Bob$;kk~X%7f6H`kl-G_zu0iGmAJ%+kJI*n z{oJeu<~R0-VkoIY^d&Ih)wV|T;6~BAiJUtb+{T$d&E(X9@sN% zu%FE1_DANdI#qL{cns5W^``x{4j;;3JiJibl8KxyRhtTKBX&dwgG|<*>zx2Pe|qMa z_&dxzT90jP?$-{6;+Bw+nda`fztW<)?!@ZJ)-j$CSm{jXWkU zn@9H`T)J3{ahMg4{2XDhJ9<5LF8)to zQ#fO!OV2UHM@JZ%VSGA(d;MeJJIOP(7fSgRKB~6XKPr7iGg-I4i#rym4z2ZH%Tw$&{CU^ft0}%AL!nQ&~sV7iDCy-j=IYy3UURL>UB)>9aTeC|Y z@b?UqzRws%M3&y-NNQ9xACmU$pkNM(>k%G`vBQ<)#*Gn+H`=2D9gkvzvZ~#?QYP5c zzus3+zy}RB^l*w?wjj$qaVpYf9zNqs#RQllzIH2ctWjO@9YE?8aeS9XeU(h=UKnBW{L?P*kd8_PL z?ECYU*rAK4PKHy9zg)}<+}7fHZxpdPoh-bq61EF*%7?8?X*S5B{ zp7actpO{LJIG4XNeEGPYL3NNnbzsD9)ZXXxZyN>*RbVajTCOCcbJgzi7qm8c1(dG^ zy{Gf2Gpu0rwjuOY&m{wcx{Q|A$GtcOMoBF#3fK_)+j|oFJ-D!|5J1{`dlVTN8AtGy zf8VLL9_0p8{`2M_VxUb6z)pwX-c%ti=xKe+ySJ)hvb^V)$ybl)mwxEpUkqCbtFZIY z`D)e7rcb$1EM(nNi%QVFrIXnj{zx}gH!vvZ_tF9CnTpD*FJBn_11fFMX<)k|tZL>@ z0ecfQ?6ac)f5+-iKCz=|48SWQi?BG_Qru>7jE#*ApsVZ#JBSe~;SB5`u{v~ywSjMa zeNjvFtgJbb!%VvjMKkB&44*2#G*;4ZqK?{4+~$^o!(|)Du{jOa`uaI4&xgAUJbOP8 zJt7J82@V=>+(6s*fSu^bdg5oWjY&d6sLW*qZbNu-GC`@ci_irug76cmwMI z=V0%{ci?{;7JU;v-G|0$>X$FA$IPs(RVxQX&H_HAgsJFMJa59TRUe=;{vl|n@uV!5 zJ++ymy?&EIc>g!#{{+|6Fi@L{m$wFn8R=3(9Z$HqfLj^fxxmcsyzzpWo&BMk+nPQR zxRIEdnHzvTxg=s*K8A+E4M2KO%TpKqW$MsCl-ARuhCMVs-BqwQpvko4^QTWJkgn(T zz%n|p<`9&>Z-3e%bCgz?LjY0t(fSL&g4KSF#-|p3+xs&lS5=1$p;CS(#QynLe)X{( z&N@2sj@cRC&#dJjCf21XKSZ&{$N^><7bYH#)FopZBS3utGpJTf7$C&~hY9Z2IWJ_n zICz;J0%sxDaC9(N<2f_|;hm~6)rF1Px1u^=ZQxUn)zQ~*O8m+;AiuAd-La7PTPD=x zS0Eaa**Ci$lO?%K^b#gwqwv?LP_@#d76tN9*|u$n`uaX+b^)TsBRRS6VATzEopi@8 zEiH|MjYOzv*hnH>pjV5`vtSE6K9$hG!m&p!HA;(0fa3iTtfSlpUPNh?sk-_#@O&d{ zy5VV}XJf-t7Tv#!n1e21Ip}h)9?#M{G4p+xQ;okNNl#^UsjMPRpQ}GAZAZn0R_}D3 zs7%DYm+z3g?!(^T{)p=V9RmtZe1}SEVq#}bh$gW1>b|?Xpx(=4lj!JZgQM*kWD&fh z(IZ%WW?Z~>VFH#$G}=pGJ{w(4l*^Eimd4U^Umyc9Klp&ik&gmfM4f(Z&{mO2t@8b} z@_wv4928~3h+k>+=pZ22e&l5hKE&JYgCUU`UW;u--L1~Weaw?fCv-w_o$SQKl~D{; z&qpt)f0*;qfq93IkMHM7MIz>ehf-|9&e;m9c{I0z(mLibSqQhSxZYct$tfrmybg}c z@x%J3wo4h$Oawx8xR5?UR~-))eFes4;z}GJAI}C+U#WSqyNz`}Sr;Jet31{mTRjb% z`_AR;#;}g=cK%D}*^7glRZ1@_Z^&?5r;TO6e~*9Pw{^)h7t$61sDIl-I*dt-nmAt& z4iEV@bRJ%LP*_0Xp22v^w>NM_*X(KN^CMS-I2tcC+~v|ll!Vm0667j+`94`RUh^dH z_W2VF@@<{P;Ox_2Bt%1f9kIuCzx_qsBwzaSE6$Hw7xK$wC|PtT%dm>GxS)0<+jAGp zkbsVWII5v&wybGFIq=~vb}{_-is-#(JmiW8p;JO$^RJ0B*({+a!F5jpTRa9GSFqkA zC}=!*>bsPGg&mbK$zz-1Pv6Hqm?$B`3e1+ADR?q%pu#uw((_AG!I^vL^NsrXx(APs z+dmKQo{Q7bxk6n_XCCeF$3pMp!U)+|Q|V>G$8+UTvuG&Wmr;S=e^go3b#vT`Liv!Hp-71kn?fnQ6RKxsC1lN!-V~iaZ7jcA{0o)MsNCw(Ech8Es+tMQi z#mjrIi!g|8E{6TkVtaY9pt7{H(boCc>V*VnEf#$mhROsbsuGj(-ycm?hwM%wBmDDF zzkh1TM!{jGYWp@M`hE)N{FL{?sM{Oq>S8VC5O&5zWZ>63d%MF&!Bu`s!-DVK|vx{;9i$@LRy)1pWPSgV)TZP!&dsr|F5)j4~KGV z|M)W$!j|))L}Sx|8e%6ZB~c_QIpo}d(j=lXIV&l{ZYz<>DRe@LT^Wr;8cMSpNm2

GQf|d6ozqgBZ`=7YH@tANPM%+jJQ%`ILRDyi#*pS0ie`2`xk}!A zl{9f@!^U$BSI?AG!Kb%4r{E8{R?KEe(|7-?g(d+rXvl$OAvoENb@(A*} zFeb05|6#te_(n>{_f9`AtjV&auo&Q+@_esm8=F5a2x;3X ze$^Oq!oEtw>-fFZnVfr%Gpx5CzZ7ay}^4RsuxOL z5-B|ych=(<66=*58lGTru~$WkA}13}!z5z+QC!r`dmD(Thly{-h2A(H#Gp8(03&z1bSEYDC>2Qew5h#j=CUh>N9D;U9ER%WMrCAw)-g$X>WW1DaD_Z7}fatN(*O1|OJ~rgxzbA6q?UH*YZZ^Y; zho%SSC4|7YaWE6CnMxe)B&9haKPl7kG-0E&^E-!3r>$F~Afh0Gvv!PB{=&@{quSed z^GR$GjbC41Pf}9={!jq?O$%h87|)-*PU&xG*%`KpnjFt!leA{q{F6~dEg}`moKIqO zCv2Qi!D@||i{rUZARwj(e%eDEX2uyxXqVcQ*6yd%%|H}@B;SdMpWp90Z@6YhUA_9l zH!(@cBN?k8C{gVD?gUPP&xSZ$$>-apg($p1Jo*uZH|Z@KvNQ7hV|DGtRp?7?7O{7T z-mMZzbvLC6|8NjzI6(*V{l_r^mdltH=Q(scN^tHs> zz$wqDYSBavX;Lzt^+jel+1p>q&7B1v1MTUP%F06^Hvbb{8yg$nAUq0upsJ;%g(lHzw=I%~h>aO=P^owJ7wyfiqG?m<5UqqpYgXjcB(O~8 zbb9YE?x7%^?D*5G<`eu9{)vr6W6UTqX2&Ccbf~|@@lkp4nUHjEdT5Iy)Y8z2M!gD!z!6(l8~vhp zzBlvo=)=GP{qLy#+yc2#OBl$KFiwhf1!gvlC2D%jjtS|?U5I>ndxA>m;p_TMn>OVS z1q1{b9qE=vWSf?NEpc+E9KyZp?CtGS8-Zz8)z;BLX%U66w6a=v^yqS^ISwqj8$J>A zwl4@lI_ujU?PT$l>AbFW=2DY%b@Q3@^tA?rEUuZLEaY*|M+UkgF(7b#fMPTW3fSuE z8V{X2f>4VP?1CVl1Ze0nE(O)lFm%tj)A*TE5&Xf|a1TT>F9O|z+dP+bLI>wgH1xyfVL0v>|lnR6^KR3J2tFC_Bnqf_KuQ<20eOX z*!tRw%Y-4}dkpGu+wAO4;DqlUunc)jYD`Sb*_xFt&WlsH=TcMSXRi?u4Gj$tZswE( zbbLjTnVNR;bLV~lA1X09&RxDNerfY5oQLkvjr?YPgO%{ib;d;QSw;V-O;iTMAvU&K zTa$Y75JSt)VMC4#g+jTRo7;9s4|Z$miSlxjk7}k6HPYzyfK|oI;}jewf*LOTA38c% z1;!CCUDB>1JK_yfh+jXHge}EtKLvIkza7&5?;p-N#F**lZ6-!U)%hGbY=b{=Gsh*u zpY=m+e1l}vipO616av!CAKc{d$JibV&d)+K`){9ewX96-7_j-XsgFE03U;cpbfZ$MZ~)48ZPP$Oh}lHheg2&oMzzQEZC3yLHhb78(OKg0P#K-w><}RF zuS>M9YHB@yNEXo-dHZm7|Fs)Jj-!mN4PsR5ffd!Xs;oXU*PSOp{W`?eh3c=+oHHW( z_wP3f9=d9MDKAeO^)@(~z3xuq9&qdRQeV)#QDhkaK>hjxN36cZQ3(=~&kh@^SqRN3 zlz?q8GZQlOZ~4&ESM~}djsU2~KW=SR7m21|R$2grZ++?1DUl^R3JIu)ejOfuGSXAO z&BdkH?s{RNG^iyf9P?uy@2mh9OCt5fC%Xj(26CWGorI7Z>K4CFP7JW}Y9U3BdNbks zE@wJBy#VQF)i;Gb09|4C+rnME7}xD~c3YCc%q-rk7A7lvV$8U9d}N^F;>C-OnG~x;64F4l z460l)6zd(t-7WD5>CqZQ`QR*KH`D}Z{FJ4x3n0bLL0?uOuT#}obKU~=Ru33OB;RA^CLv0Xp3r0;rIqSArh1A`lC5P{jKIvwZl%H*or5+-OSZ zU$#-m$W1}>K*U1GiE*$7XW#n?m#GBha0T*nuImmfp#liQQkDx>!0IhQX4i-jCG_{JO zC@=u4Vzt&WoxbK$CuH_w;Rl#df`1kKaACP9xHO7TBa^ABs^V=74u0Tt&m9Q%ygc*F zHQ^k@7#2Z6#YW-Z?~&E0RbYIjd53&w>~wa%#4VqKQrJuaDKROkdF_ydbp_b4d0P z#6OY61*?BW7R7h}G8z9Me9f+Bi2+KXI=i~s5S{V#Q#@~6+sr-6wzjePg_yr+kr+{k zVCFYNL-y;XG@2B~8*N}6=6hvjWtulosk3wHVli2bxNZgEf9Dz*i+?>|bN0f8HwOpS zA-#!lM0z)Z<>QcHF|G}5Vq9sUSAhc@yHPE8MxthL)xN0r)kJp}+%-|r75U{trYkQwDXC(o zUP+qMbT|{Zxc-i&+nxTNWr_8e|zDt2bD20h2*Sz&2CniN2_16-}P`fq|wpQ zn>TNoZr^_1wIv#ana8!Yv*E)*WO<5wAQ?v9@mWQXks#Vo0W0U4{<&vh9lE#$cwQv1 zXJPS54sfsCBrE$49z7Ba&IxPu6x z3@9mi2{Kv$+#~RCAG;NL&&vYZ%@HxMOSL=qexZy6wzahpFX`1z`}gM#yB#}b^!)jA zH(rykp}m8{gQWeRm($8OgnGO1825p2ny#7!MfPPW!=FEhnYp9D(ca04<;qIkpJ{s| zdPB}SOG~lNzz41^7JQo zlycLot@D>Gx%ISMG^PIYsPb<<(vWTLQka^A=TlyNtdLs-RYtyE$%=4Mz@HSSCdbPM zxt^1+g8!PkM))=MEOi@M{+`fi+%p~Q;gLgi1>qZoxO(;K6*hYg(&UT_B$V?@&?IU)X__~#0yxT>u7StN_;!h6ywg3C<;xZD2{T*U zKZegGC7m1@Il_+|9~u7`xc9CKB4Gq9U*jUDK)+$RjP*baue<)nY+^tl`cOs`dQt(U3tH!1xaz+(@vcVTvin z#K~too?__O7a6?sh_g@@o9%*1Pv&X05KGlHVLLDZ#0ngn=(A>^a-bz_*d_p{GK!83 zw);5?wE9IT<9=cD#wsOrV^Mw` z2hL$E8b64iu~W(c8?wHmsU?HEk0Mcbw>275#RXWN>9>KdDJ9u;>|4jtQ89 zv#6@v`9H=Zr&!R$mO~Ex>hN6;F5 zNY5@aZ{E_RP1j1W9aRPM+N$s(D5Ue;M$-L~jvkLdWBKw1mC2Lc?+*IcCQ}OGvq=wz z`DK=C9|wtWri#Jdl3T|c&o{Loe0U@cGpT@E0o;yRKJ{~e#t;3@fOuKh&(TI9)BK=W zZSyiIbQ@skSVv6ADE^%5*T19u|9coeM91i@h6Y))`!5>VepUaDlbO$S!XOE0X=xb+ zGXo*;SFc`?u(ar6582I+lM$f0U5n~3}Ps4$Vxn!xd zxP$~y=Hx6SOu<;3Gk5MXI8_*yonU9dhzXvkPe#p>TQGmc3MGtcqVQmoA@(vbIR#uh(xLHIoekP1EZ%92tK!xQ% z6lh|ZID^m%Qk%#(H+Pv4$h53 z+J?h1$FS9)QYoa%bu4g+L7Z0XI*FlbbhK9f(eC6oU_E=| z>eUKg!yI@`>!$VQNAA8w%FrakjrJ5F*dR7J+n`(a82e{~udH>ct5%i17=jf88m$sM z#n-)VK0ZE9PS`KzBE5Ic33<$%jS1BV+Xk^F$=Q;*264S`(k89TmoK|L$8j84Y>%FP zyfe!7#)0wWv(;9|e*Yfg^V=Mv0?nR{<{|h!y5AKQbBd>=6mnV$D^dGcwJN}s6P&%ich+A0R` z8yi6lj{Q`@=oW#YPOijY_d9<^ns*NVeaE&pNDlm0K$I(de#=J7k9{s+>A{UcWS)wp z43a8XLm_4qt^Qplnp}s$=stz&+D`pTFY}#r(f#ZZvFq2b?*>A?1kvNpq8)mNKW!$l zRQ(4Q$!rs~G81d--y|*5Oh=$_rni?4B4=H*0~P5))f@BwW8u3VnIUrZj7^>Cum7J_ b^rtCd#+j0m;_bVO2)wK;Y|U?*?L6^6F83X+ literal 0 HcmV?d00001 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..79eeefd63a6a98e14cb4b28bf7bb5787ba8e5cb1 GIT binary patch literal 39037 zcmdpecRbgB+x7<;WhWz*RZ+^$UP(zpQOVwsy=9X^WkyMqk*w@Z2w9;>_FmaLdq3w} zzw5g0`+2>d`;X`O@9Fg$#nPWUne?7_JwF;S3QG zz*qQN`hLUzMC`Ar+TXD7l*7 zwVenTm&Lz7!D(e{%vGu3&;(C8Y%Qy1hhgM;=wF;<$s`jDdoy+8s?=SlceDME-0qC+ z5iZ^eEx55h5T)Fp{lE$*-HU1Z-qnl>2jcmeBB7SlSnc&IE9@I~nzt$Mu7!~1yv%8s z{OFn}st~rk9sk4RR#<^aRA`9DK<0g&pJ8Ek(P5i&%VL8Ir>PD}VU+O8;kd1tpw~0_ zmBC5ibrpVTk>FisVPW~e>5IbzA5fyL!b0HN$^LqnEPT7Ih8sHr-=Ef}!;Yc{@Q`57 z;rpNI{(t)MT0@MEn%ZY`c|vC_QMIYYpVp>|RIsA8m5TQ*$xKIju47ml3qL|3mlH$Q%&}EHm&XKavk}43_CNlLh22> z^3C29{b;jO2p81*dT3>?x3GU*aJ5fqB9dDVmsLJOch0EmqJ)HF51x?KfY=#U){#KL zfr`RG4}WH9U)7Qu0#>87fi$Ec-`4sZdo3bF96seowXiaJ?(m3-i^sK_ruQH7*C=u1 zIC}Ib#nGd<3Chp2vbcBVtURlxT9Y)&GPN`7U~N1GwIomd{Hog9sol5NYGyLagQllV zb9)zSPgv>J1{^0MA<5fdym7zvF&<%-(~|zHSFgJ4Ptcb~H%E*6!=aq!=U@Gl|MKGX z)bF)tX`?gNW6^Wzq_|6-)&%dmJ3Tcu{#$vYKWqX5 zDt%UC4X@!StRLPtGgrNLkD81@>=~`L6Hc($pJ(||c0am`Y)(t;uC$u15y$f1yLZpp zI;*cyoR{|~`n|n?W7Yhr$g_-!6UfjI@r)4w=>GPU-69c^x{uxa%LtbjF6n6`a))@ z_bE}wpC^~GPl?jfQjxKf;SZ%pJD0n2jhCiVGxfhdJ^cQcf0@IKia%o*y&J4Z_SP*y=U&hK zZ5_AO_WMP11-)6imAJ|6rWbX-oIEWPP%-`^ipTlSZ-MuXed|+%jBZ|k{@B(RY2g+QPR8)LTS`!_V8!u+&kCt$k5cRO~AEl#~giKWkaCzqd_EO?{T&@wi+4$B!SG*x8TZUPVPhLZYHoot?CB2_+wU_aOLDAPDLdpRaI4ZyIZSOevBSniOY?KykdX8o!;2nUZA9< zC44Q1J~i8&dp&0mPLVmR-g<3TtDNszDT+gehK5716JI|+LNNTr`H}^>AY)Uq zSUqhmBO_L$rgw)01qC;@w@0UvVtf^o!us~#KN_e^R*D~*nISedHip-Y!WmjF+04L( zU&);nu;_W3oP4g&ZTW}H_Wk?!t00z=OKij%Zu^p+_*wGEDt=ws)|MCI)zI|xi;xhC z(zPBlibHW)7X1YlCmC>0eB*7%0&`uco- zoN@{`MBwcnwNV)M+WPoH|)25NC}afe1meofy_3=0c8v%yz1 z7OsI|6HV{=X51|1oJAaG>A*GcVbfQyUoWKlC}4DOPQ&2+HvJBrl{Gc^adB}@+jE71 z^Mk*PhvAZzzVkI>*oXM|nr5!5#*3=C9 z&}wt1l6tMImfDWV^?7X1VO}#CC5nB6Kf}?9+P5ln;Nb*dIXA*shkqs9{2}SgO6N-fAv_SDn1%_$ zpq`%I$Xt|w#n-NEgW>7vBaa?El8+J~HEw#xBcgaSI7P~AaCU&2=M+3)Ix3{+RXS=eE$^}o%$dWISOYV40PCg8a zp*32?#>dC?+me-%)6%L9g2f4RbacQ<1i^F(JoYw?m!u~exysRZe9L#|tWZE#($WgN zlcYHOGfuh)jsnn=_zDI3EOHFkuHX@3;%uuyiFcxp$es|ATNa>&SE8*94?jlfDJUpN z^^2P@Bw8=9AA^m>;iAG_h2$%6 ztZb83);`yTvU409HB`66^c;J?uX^sUd!D;+q3Wv-G1?Lw3I!FL;#GZp7F;~MrJ*O} zEEg^i_ZC>7IDesd0Wux8>zcWI?nwwsI&u>QKvO4V}IOs-oLcqKNkO2y`$hcsE)f!f1J`3gh47VhAFZq(4 zdI3L}??}%ukKnK2RQjjkLUMX~jag<{M`A&=upNP5zdhB~`a*E0 zwks(F{x_UjU-x#mYepwmUC?T_*XFA5$i=+0%UqZNF~F`QI5mrOU!~>SjLJY1JI&3V zRZ~}*QXp2~x;|e6>+$s=rp3j=IkjFyS$;m;EU|r2VtbbFqG}daQ&WT1Vbd(Wj<5s8 z!glK|$v8-|aIxHO?(Q}li+U*g7uk*-@qmz&+pD3hjGM0FkHE=dB_T2mNIpIGKL~_{ zg)vN0vfQ!1{46{c03JKKt?$v|CnWbaPBVCH=m`l64@bMso2>I#l`hHjxy+=axUmR1 zd$f2WTDsh0x4NP{zxgX%7BwxWX2t&Ax@0M&?~#1L`ST`Q3~;Qbt!|azM|$9x0H~_b zIY30w{Ni@Je1A`Lz-g2;x98`^su+MBv9&IPjmh}n#hvBm`1_gK7}FjxcH_p42ys^d z2De42KF|H#Xa4?p*hP&368Ei@%7Ej1LqkJegP!}Rh-of*X=l>v)Nwe2Uo*dHTv}

j-Jsht z>DpYcamOY9T3cINk+HJwcVNszeZ_V%Qu(sn%)GqH5rS6H8{6;%W{hHOzW;*dg$oy` zz!rVrSfr9#4|_8aflK86OuEH*$M2Z4i^~yWT26Bt#ygM^&%=cre#50hQ~2P)gTwRl z^IwZ>MyoB(pFfX_4NLBC-b$^zlvMWm^=p}4K%nO^ioJ#Xy<2?+4_jC&TUwOPv9lj7 zG`U1y+1PlqXd>FR+G2gcHIw4x$&1I19g_`mo_PQ0NOW}c$mVww4KtEpA3#;sV-3_c z+i}urXh)6CrGIXM;GJOOYV2Jru3<86;h7~3rxF)^{HBO@aM{aliggOlRFx;BC!%B~{_uWwni%{hhkW zO`eDI;)b6TymhzA#KFNT`}_MRoh^1G%u-0bb$49$09naL>rhO8d4fX}C1@q^`A6Ff zCo{9uWJh|GVK2PcXKA#~zx+f-A-JQrOgv?wC&XD?48mB8VTpoTw!RN`nU2pW;CD$} zTAKXgpFcO<@$q?`cCujpK#jW>{eq|DjTdm?S3zt}}yvjLVIuCyfN%@=}1%iX_~(= z_C}}LLZi^~+Kn5BjebPp$67MfMe>_*yKmVOd3$+#<1Vo=Gvk=%tBWNHTkv&$f6GTn zPtU$50dNInsuO~i=Br<{wY8IzlPe&e^a;u(CPRR`>OOwmqG=+qzJ3P#nTR&Rf*q*hp++VSD&@16Ms~W#8;GsoLOAF!80KzV7(v<*4Nd~~4;pQ_f@r1tQj6sltwEI-<-hHYN zC7{>zPB^q&UeLS?yLIbU{dQ-p&4*WU_dh2l67&{Yfh&-fWtQ)4T>uwparXMmkPCS% z4uDDM1Qa#T;Tp+q#YhCMtgJZxP&02d5Hxd-q&YVN?L_S)Jc;EU7)57B|e;*>QtHg%HXD@=E7Lc?Hv z&{Oi*5uc51xUJjD%H9C8Zw`ze@xiU~Bc~l49T!WS+Z?d!iH^S>%)-S*4#+U%cjQEK z4Ee2F%>p_LzWf@J%+4%CU)w_>UhC>9{RHg|>TEa(|;v5^F2F{oNv}m_Et##L8q#RoWjsNUxWJ z8N|6CG#q>CDK?cRF!>W#uYZYHHn`l)Exv_GM*df%7FEyDkBAJh<$yk3b@Y62A_zue^tsp!-FV z@uv6wfIa;e%AsC8te#E-P_NKXQhHpk_2R=~P0meS{OM?NEVR>C*aCq`nP7&%?fDXN zD1uS_xdc#8&bt!o{1Na-ndX?`cY!fgq$dQJ0B&iWeb@7+E7D~ThmMr=c!8h?Lu+d* zkLRA-#>U38W%0UTMsjSV7^<_7u&~pcCZ@l?y_A)et*9cG4B*fx7=mD2_2HU7ps3XQ znYB=%JOk8ypSKooX=H^H-Blj=)9ks9nT#Qxm-NO}$ z!Osq<8w`RIJQmIC9<6J15+|ZO)`dhw=-t=y6Z7(p`O|QG?7ojH3}4~qV-QfEF0bx)8RBID6^U%iJ_oO;yv z7(7jXt|!0iv%`hG<(U1U^umF!@QGjJ^#k|g%Yr4g&Z(rSK4yz{!aaRG=$QY_L4Xi4 z;-_+retd<(;4y?opcY1w&VSB80Q5$o zs5MbR6@mg>y&3ORjdG+>$<(R~ULqorI2?40c5hZA_I^7;2WW!Qm*$=PvZ1Y0=90fr zr`2i@F)}w7_?(Ju1ZoYz5-${FxF5}HLhbv*P+0~T$lvR*0ekhh{etMmck7X7C5RQu z{32Z~@{`u{`RAO)qfD0pVemklU778sS}0p*0Cyk${rhR)3Bi%w^+BtG^|I_BH?SR< zs?GAXkLe^XfteoF`7TwYBNOjgK&jLC(avO)CcJndQuxtG`-R@7#HU z(0r2O`>){~8X_y#uWGpO@2-b5jsXmJ%-xPyH3vS4<^1{jhugqi*YuKVYH9J9bzJ-& z$$ztS1J!+Cg-2WGpFDYz4TK{qTAZDo>BOA40qh4F;a!G^1Z>)+UV>uHfsFch<9X}A z;?$zDRxP%@y`7bfqB)YEIP=REJ2J|Oip#IU!$;sEUupUQ-D@xMUd?1^Tr7N?d*@z9 z7Zgfoq;)vlxdCzL0VfwB;V!b`B97QIIHOU35?6qM5$z+RrG2Av^LBN0^<|1fK%)4U zk#&CJH|w~Zpsd#z&H)t0$Qqn_-joo;p$GvB`Y-!?t`Ii}x7Qcy0QNI=Pe8f#P#{bt zv-7mDu-YFd?ILTO%hDZc8a?c98qsB-ni*Y2_(e5u#4(FLny>y{A4W+<^#sxC&nSq!TFh^HPJ75*T_r5bF+=Ib!sX4cNH0rW`$d_^?H^gwnlx4^(cEVJAiGi3Be|29VN@+HzE{Dey&35oc% zbm~dV4ALKE(Y>wXs7wON3JfCZBU@(K9o>r@tgHl=wd$m94%$FY3*Q(JtY)R<(mr;5 z#Ow2vCO}z^jvjM%aq&}79_9mh-Nlpq;T<)Kkfx-r21lbN$10$0a?XfKn zvy6VwBj$pcnfb@OaRN-XsB#{Jr?t^Nt=f zp+~Ld0TO!~24S9O*MQeWBqn4xl;#c*65h;_p@wIa%n78a<<|ZtE??qKMi>o%V949I z=l49&%0%pCj~=6wXp8co6R_~p&kFER zM85Gk-QFVWl;Vq>_WFNZ8x~I)HyM~dU)Y5v5~{RgJoki6O--xkF5J&BU;e}sEWSq9k)d^A zlc$t%yg4Q~->g&4H)B#-d-6=IA3eH$ zdW)@qL1_u+Xp;I z3t09sYU*LAq~NDYHm;c}_I__KbA1H0C;?(@F42=pYz*OTY;DQ0#?wnwFuKkHcsTQ{ zvKp|y--iz@h++kzRQBf0Dny?_lT1J~J5LyDmupw9cz+}z!E$|V^!4=@;f4%aK3tPk zP^f{hH9S8b^y<~IG_4XEa9rbI&>ZI^@qU zP>y%y8k3{vfBPnj9-G@NA+4$!5#eUL-v4M65tq{B;!n}p!0r5;vKR8)6Z6RjRXSdIZn=6073?L!}#G*08Y&FNfFia>({T_YHEiuSZuJW z&>Hj{!UN`?K7ERi4e}zFmC475;e@yiO)xGIhXfis31ZS19j?+v-1*XFMF3m+kY+e& zaMxB=s)1Yzg0w&f{aD~2Rm(lf>MM5QgC)KKvCQqXs4KQuf%hyZNCuvaa0Qh21zo=5 z+RdnSc_pp_5-ondI%$IP`Hpq7sci0mZ>YpM$Hi3#{{I}#-_Y2&aLQ(m1c(;kFpked zWFcV?qkA;qjQS`qqoacW&+5W~jxfH52!3jJ;1O_asK<(!Dnz3~*h3zIQkyf`5Lj=Z zT>J5XU|`Q)9@E-%;;z5qWzF&zf?H0~4xDHk2z-JcrJ8Co% z3cRN8%OeS3BKiu6)d$h!-cTjSDyKY!0{$>itZ!y>-8OCYAY6E7W@g6TW%r4Qpc{T` z98K9~n$JY!aDERWVgR>p1qM8XmbHTeH3117liOOiu~EV$MoAA%K-d(0eSL?Ztv395 zCWDIXq!3eg5{F55dj5TiX*n;{N2S==**OHIAEXy4Frm`=I$4TCsNbP~!w;O|t(cXi zWkwcMP0V6qnm{-ngVxehXh9r?3g=o>Pe){Ja1EkTpe0Q#m!M21>X^3JspENLEj>M* zngOB^0^!hO)4ve`kxLD=L7&1!AYKR*2o#8hWEK_eopmI-qIDk8v_}Tr*WXi|IC0_J zYdO82u~IT;DM-fDA}_GzKqAxwhR{1TH5K|z?10tor~-Kw^6J%@`POh=ZiQw(=(|?} z}s$*l9_twJRKN-EgL1h9$-s-LgWT-LwqUHz_LzAIb02%N(Q@+sN^(c#0CM$OS2 znnl)9wrP=(^uu8HN5P&?!|E%0do_Sfqebvv3X-u)nKoi<20XSc;sv5K2og^RTiC@g z`r7RSwK`G&9gHK0uR=E_DJeM&y+T@OI&#H#UbEA79-~GX&%3a&5Exn}@Gv4U+9d1g zw)64sr%#_g+iCL9*iJH0B1)d;l|IjEYo6iF7zy{MP*ER2rNmwK*A&N(pLc_FY4kp+ zn3p@Tq2!Xg;*Z^nJ3!fUJoWWG0u-NeP(s4`Z4Zwx*1Z;mhdDVp(fby`s9!8ZB{mNO zNb=FJEWWlw7U1vC$3E4PM_ONB|Km$Ez7)lmoj*6@fYb555*H#S0UYGlJd;-WfeENp z5S7mbBqlREyPVTZdf`*3pL&uHDcC*Gr7!ozuUs)!Gb4F$Kj@^e{+~4nXh$I4AhHX& z0$98r2r)u=b-i^53{0EWJ1x3t*EaTFZfH$Ry|_+=PJAX`RnGb+y1d(>UjW5f}dTs|87)KhuM;o46ey_+$my&zV&~*xn^2+x%}#Zh$W7-eGUAoE%=TW;%|ppZB=2S+BITG{;OS zc~C8NyngxeG4R%Af>rEvn4_28@ect30gma{&^Pbvn3$Z0Dy_FP?~}M5)`jD%r$bIo zexPUSjK~mkPc1X4pYD{jk3uN$8w;85)Y~bLBCF0_Y-ScTUoyXPTOZ z#KZ&xQ~;LIv0J~H<3NI?r2^eED5FqaIWnR@Hy+}Hga|Dyl)zCXr=&av>4#&OIhnLR zsI>lcNgH~C>LMVR>_1|zWPjCCCmvvXyAroiIdDsC>1lhfRCtXrQh;ey1L_F8&gh<} zunqu9XJ~S=>jnpl*}=;VCk{z@DJDIMiaG%X*kiB5$QYw&O07oa=VftQA)SGZPwuv| zn!#Km1?9`YUk@)i&1Tn807nMBX5a#b0M%h$U>(puKy(5`H0a$|e{tUq@YOamGh6C1 z2!{R#@s}@OoInf04I%jZJK;NhNmCTockdoU#T!ymKyL-?pn_J%joG9ZmGcbZE)~#b z2XRQ%O|G&-=_$`R03mw1UzC;0?EqB9kqgXInr0z05$Fgr2GabYnC7e7jDw0EL#(G5m4t&1V8)6 z4vX1wT6uA=$-ORwTj<>PnKvmwLj%i}1)YARNWm820`#E+ya75QZu`mGSOs)8eE~## z=^jO`MAf{9l$OO~G#Dycyn$mVaM??_lcZnijgL*cY$JuqY+o@IPPFhS=e{FyGT%pHH`V(IU-;ZMhXx>Lm&%qh?2H;P*0D#lao{4rbx>2fo|~A zL~O`t5VM|&XkkL}=g;ap5m-Qu_XFOybe9;!oG2j3RHkNrc>x_iCl?p}_D}bm0HKN| zaI{Ih!vpzY+p{NEEMLbIJbV89F|f;P3xiyit)}@xw!ev7*5@rckot|1j_w?55C&FS z1vlH<7M=)L#S4&c>Ec&nI$lE}AVFV-hj+vogpOqs9_)M+Bqt!Qp+i-no0}U2CFNtt zH7B4~R|JTB2{6c8hj-u+$Dl`q@OVqE@hx=vfMk6^Aoi#iJrV%ly;K#w$)9luenT16 zJ3p#4p^IXdJ*f*cK7n1ff!)Ld3Pv^q49?KpoW&Xs5Vpdt>(FDukdP3} z=Ss`G;}2+JA}I=k^KVL!wjc=xYTTm;JrB#TKO{ww*8j}x9t1F?`%_8f5Gi&r&=-(n zpP4?}&oA~>zwRsxOA3hkkX8%0NcPtaJwLKVfHH$d!Eli1d`0>Gu9cZ!lC-*urRdD| zw)$5Fkbi#QWar=@g%n%^)lpo_LsY6Uojr>WeEFOFD4F*GH<(bQc%{jCAR`3B1goKc z{`@(rzd$(fU@lRK&43&Pq)N2Z3=Ew0XA(1>i2q;cOO=w|f~XFdgGu%7M7ziGSw`oeu^g5W|zK#HQD%p2B&iy@IlDG>EY9Ya7$kKh$O zpV9Y@7wl*x+>eOlk*H4n>1(UiOHGnTsF{mWf?a!pn&lN zG;n*{gZqflL^RKH)bKfe{5aG@_?UTL5oK%At?Mc(G@#CbF5(NQFh({fVrZNBQ8)3F z$T6hd1A!3bsZ)Va-{}rjJeHX&cH88IaET3pvw4BPe7ct!0-*vFL(D+lL8&yd3tff) z2&;O4^pKn>RwXrCuZm!4O?07@3Oc_sK#tVcB~RJ)sv5g{>f97M1gOP6@Hn?F#;Oqc z6mj1`f*h2kfCrY=B;2>2RWf3S2nastlzWUU03?Bg3e7n@45c*aHBmf~#OTCbE&+-F z66ZFgGw8;mxc@7tCI0#-1pV}aLTDW7FA`#vm6apKAPR_Uo%$Jnog6yi&n1y)6aswB z8Y!cMrtNp=m=?Hbn=<%$f&Wbli?Um<V64hTK(!)WaZtMG?~p)NDxZfq8-J z7+$QK}1E5lB%9wV|d)o*cpZK&~WcVr0Y{nqvTP zh#)A`4DPK7B1$~Q?a%X~W+bgkigr1&zcV4(lBpw!UII+?3CLIrP`ARq)*vefLkhphtKTS3%M2mG9wbyGv(|<7zSB|-ZLdT6T|c#o zQxz9=Sw5I)+t?x`q{jj2pQT;5#U8D}X9{`RY~Mh`UOL=Ha*U ztee9+p6H|0=%cRdmMFk_?rpL`kA?t5R3QAuf$QfOXa>1#cX#*5Ep_#td!{^qdta_< zo+zGAe;9c<4}cY%`w(0M#H! zmiiwJf~3U&#Eqc&lFMCJ`)rRAbxq9T;$lMz3*tw zr7F7~`{N>?3~22zQUn}8h0Nf(HY=s7sv0#v1Hew|3BiHtb3XnqJ9MH&d*oSIawx%B zA-ke5ar-`n8f%zZ@x3-mZ>Ct#uK}_H`3SGNPQ)~v{W&6BM8+Wgvcz%jioLykR4_z` z5J;K;JsqzQKY#X3<#;F#sT5N;A*gm)T?#5VaVt=?1e>UUl!D}>fOs$r&SH3WmIR9s zeZ+Z1S~?pH;4i`jBZ>vR-MRDUD?Rs?89yc@pgfh^<3K^#(#{rE4>>jfs2_lb6Wr%AMcai0y-#$xz@ZZqI|IL_;MM$)N zE?`R~Hht{j6S-6v(f|u!$uanUe)4~TJ^$0f9zJQ{&)=}M;tfio)+jj-7HsH8hxNpk z(JsGKzWq2RSSQ=@4(#Xg2@a&!b=eMjphYr441z)uxih$RNP(h(q zy%GGc-(Je_EC2VKV@Y59alpaO{pP(;h^qftRMq{?u*EQDyP>FjH~but^Kg!5kB@0et@0i(2Ul6>}oA zv9s{`Z(37Kdoj`#=!8s2FpJyCZ*n(0$#8%Kc$t9CCXo0tA2C|{#XxvFduQ8=ed8Ta zj$aM0g>9vrq#oIK2ta7`@B8kv`N^a{{yR9;m;+Wi^(%o+#Qt1ge?KKO)Bs#T&o-rf z8-`tOO#&pj%TB$uI^zfBoVHI#fg&rOVX{AV9L-z3Oz8ffg5QsU36P*6O(p_XppwKz zzzPDX4ZyY3u0P|)Rq*uwE|it(&^9s8pXFt=0wjV4ZkTobEoe1BdkJkR7n@!fi{Z8& zx{P#nK=grx0rZErQ`;eSWy$v9#Hy!Cm)B^-wjb_V?{gqpcFgX|h8=(Y+_bg#CA{kz zx$Yg)e0mI|yREYlXsiZ!`?I|u@V8zZsx(@NZ+Qs7gMD+#e2q359c+HL7M2V!8-_Fx zYd#73rrhnZ-@ic;ayy5Eo0|gs_Q!xR9Npr|$}vx%KR7z(6Af1lDh~YT+pk$pOtZ+E zSrG(JP>8ARbV0A%8>A_oyu7>@J;?B+&p%{+4F2~rK{)SAWBw>ejVs}pvjO~pZYICT z>h^64TRXe6z3?B(W5?QN-C=Iwm0ZHLo*w$+jTHx;o9)kVS&Cxu+v#9k&|W}iRg&V= zss3qqnV^%;VPr@aa!~;(kfgI|Mg`B~NJy$Akqusgw1SrAu`x`D`lZnEqMcX*`6wFw zz)*7;NK<{-8RiAen3F&_*8vai4`VudfSiR~R?dCd7$Hv8x^AFP&U7D_8J(5XD zGdhq43}C(pI3*fSl}v4p`7i*Nz&9H z(NS*K1DE*&e@ynx_e$?2Lx(2`k9ZYF9cacRj23m^+(8G?NsS)j!dXE> z96l6jSdd%C(;t!I!-ZhO(g*=}w1@`a9Oc31&V=zCP>CYs=!e1|{~c|l0pFG3U%}K1 z|GlbnRm#=}+hnXPQmNqNz^JX;z|D# z5@dJ|qJ;=Cyste3-@N_bpGz$|io%Y=QS^kS0|Kt?1{}b-w`r^Ha8%EFo`(6RuS$<=C~1#O6PQ1&%e(xgoN}t(^#bX zjDRmZdX@~9Q%8All4u7F+=LRa@(?KQh?bpG*ro|D4Xra>o;O_p52^k%4s8B6Fj=u8S5i2HO>Ud1s&K-3F0B#?#2&=dyr_7;DK-O_F^0F5sRj8T}bIVLC{h61bpma$0$-Z!EH&u*N) z!33yua-d8U(*;5n%{D@#l^lii2$;0GXVF+6#zuMaq}#pb?vqs%+<&j^66^#DzH=8Z zf;k)lVb=>(kYLboxeZ#IHJgUU#;sx)v(Dh)3 z8W^8r+p{D(GXYI?MAQKNGBq_dG6!Rn8xxZI&%b|{hlXCP=l&kj4*aI;Fr>pBKlwKn zz*Z0ufC?}eqtpEn$-lU|ijSQXbtC~UL||KpidFuh&6hqPh|iWwZqMdAL7TO=eLOWz zk0^o?4PC2+x8q#~)tTv1Wi3y$0jUlA`A3j%mhH!~~-b#{wB$mr{{>}@uC>O*}fd*cS>#FE2}zX9rf z^|W*a94#80!q9TjXiXN_z)9%)$`eh!_&sz$O0@~64d6|Jlnr#HSOtiH(M&Xk!EJ~* zq-$w(hTwmv_{Rpsy$ostD(~D9_d$FD^u5qb6gC8B4NO(8?Mzzm5+mAYWVe&N2T_cXaK-!WH zx4@DQ9SfP7ICcnwDJ0kjIV?;+2350I?bxxXN{0 zPV@c8<>8D_ak>M<(Bje(W#Q)5*2oXR!D^_<)4O;{w@AJWpkKXi3= z76z?V+E12VCLl7me8eHv#-L9qWR64LvTodRJt$jVW@%iuD}487~Bo*%jN8ThD! z^}Yr^3&aCzaR!_eJ_?Fm_y+|7Agh-u2^m#OoI9!xYMSIQRd=#j(BLE8l;&r9uz^DhCm45F%0UM4{~PZBz$BZ zhlf5#Dd#uqv6CqAu-wd*!@7$6g5|^f)zIl(4S%qDhQjnXvDClNv(m*w3TV+jII)4D z2l4!$Y;{z3`UVZ&<*0j-hrS;a3>O2?L;jwa$YGSt z$#7==3vdXBOFXz3Lp1~=^oWW+D*N}-{z>menI-1}`XXdtukR!5sRceLCqjyjx<`w1 z389vH6A45!z1N?6LAi#G1=6@9p~~t$M@fHPrRW%P1frZ zD1kfr!HP{MaSZ>HKHKX&|DpRD|2f33S(2e`w)Q9#r{QMutkD$YGU$4LRhw2!cf36HY`M2%b z0Zg07io&cxZ;Qm5SFT(E_Bji$7Ki-(O@x)?w+D zfcjl%WY={c#K0$8T3S%+2M2@MgRjsaTm*kV&CDE^8);JT;fO`Q=RZjDyQK@(I}O^I z2r=gzLyap}a3N_uOrW`Oyx}t64uXO=bE6n43L5I`f%K0x)X>#s0s;pVoM>Vx1gVpWMgFDpvHp=#ZEGlydy48+^l*48^akmeTh?CSNt;TrwDwsuI@F#hp- zd|a+zY6=iN^ZLxo1)YtnE-oTop9e~S5`3{Ox$;vQrb%j1`x3Z~ zQTTrqXq=uw^Y0h^55cb@=+*&tuwr6RG#~;WQXCEz?J|8Hh8l>FfE;5I71baiz(L(0 zG}?(2&LI0iRN)2;?3z)X)%B8rO!K#Toi!qIg7ShRBg)M5bujQMLoguvzU$K(RbvJB zU!)Dkd)d{14%g`41uz29yr{}Q0N$yp1P2y^yt5`@2#h5;7uW%%BhvKTUVXnzFBBZq z^`!9avB{r{ZO87lsb-&reHdCH2OxBoCXa1U(yIwa(7FtYhz{wa9I_}DBFmYmYAQG9ldt?=%XSKde7eLwe?*vJq8WcZz%JWD3&x{Ol7 zYYhXPJQ0F+I5G$D20b}xOf|JCrrt-zQlaJG2%KnX)@Bxr`y zds_O>^?Vibbj8c*isWUOkZ_k{U-Ug?AFUgkA@yF}RV-=!Cr|r@`llVxJE=KyvLF2K zh_vB*0r18ug0E)|O&Hua_fXU|xMhCaxX_>^nOpG*eb%G}=^u@*?ZJG+YPy|+#n zHd(B6szSYtfy}nCZsr6P{3SCF+VhJtj4zi2w+>*Ta0wX_YgiAr-~(y5jC5*|Rn3CW zeed+X80Zxfh&E5eeavy6lfRI5S*m!BA?BnlS%LxTEpCN$WbNyOX>z&k@K8A{>SMeE^ zox5b38N@y8CrG)>dS2pasc7|PEW>NMFTe%K^B(GZIQByQy;o_D>E!Au;v<ywM`SgktWzAWh2Y&0A47ve|AZFc*65 zQ+2*ZC@jIUb@ve8XwAx3xmPM01v?7ik!MZe?C4RzBxx*5nK8pLD&P*DcTzvxc2E6` zQ31y}GvM{ee1-DTm$%50j_FNomeQ;jD{DT7JIdP2oOR6ST6#h6$~7+i@Tf&1&E0|- zCs^7V+8pMaunLzrn5Kg$(kO#i7efHXlmmZ6q9c0JfN@V zT<80oim(0KxldOVO}0Z7VNsut5O{SGr7LnzcjJ29xlfOKS&gvShO^|@HEMpLm)iP& z9IGO$vD>$`@f0koLhR(E@b`pb%sNDB@W<%Q#lxO_Rs7W0*>V}O$hQGh@R8Y$T4#Lx zRa$J5f=?Y9EoC=Fj~_9SbMvK6q6yEPqNNZ^kGBNErlg9-DN22EwF@15#pljtR<79< zhtA~$$z&OQGgv@q3li0Xb2y`;(`PJ8UfmxUap#=458P4J<3=ALhie>an;%q?>J~g7 z4BxhmtB<}4B9hGqq}wsFT$Kg6#a*_nN!`oT_vi>e3QwCJix7V0`ufvP*>}z7OSagc zwVeZh{g6dp#JtDIerNP|bBBG&FyfkhLC(l`DtDjfOsi`)WmZg|KF+#eW=f`Qak;SZB((mK=I}Fsv#@;rF=|8qR{e@@|XI# zNcB@Wa^H!YOw(H&Ao&|!hsY!(Ell_16aBK;E{;QjMo*B|l=Z9Snz*l9 zmGU`9_>>kWFsR=cc(76eF)?PDoqf-OGjHWbxF3owa0!>=IzI@%kgv}^RdrOJ!=zJL z_-*3-!M-m^eCprWW(`Xux?DI%BZN60YopnrePo5M0mKu1{EF+5CpJwS#uXy7(G-?tTT|EWS z04&@I%H&i{hUpwP~?^5Qqnv|1$A!YURvs15??jCkvYj=e#)eBTf zZ9nh1W0$qCNhYgvCVyJ9FZ{HIsCms0Lv@cfpR-2zxcyNUj zauGy<(Rc5UVOX)eZx8E@LmV7$WZGx~b#9*0;)j%H&WKlG*432CeX)z$x>yJ}G*dcE zTYAgKMb1Og;adNg{}qiZ*Tel&+oynjZ%O2jw0Idn49Y2o^CVa%v-=gXfP!7Oofp*V z`a0~UroHZir&DT#oQf$*LjP#=-Rfu}O3fcuEz{1DrQxRveE#e+4FrIYL~$A1)XM|bjhXtUH)bL+p4g_;$uJdc}GcvEYZT3~;BUaq04xh~WG`BvaT*c#%m z&>B0p=joJ3UR$$uVlH>1kT1DSHIY_UE8&`@NpC|C@&19KkLL;hqwCWVClKO)_J{8ac*mWm# zJxal7ZBMA=I~kiDbz~hI{_$f%fnho7iOI-*7_oe_S?d(O+q!Y2>Pk^j4gup_(L+9m zsUr>iU9TcI4hM0FKT^!3wLX(iRQKzJy8FPICb#&4HMpO-XjJbw)4;Ewk2J%O%+m${ z$MTN<+~PE}h;9l3pIM-Ome0m_wpY>c)zDR$!`M*ZTlb}}pIb*PZdy*Ub>G)YsDVI9 zM?LDVd&@X%Vs~7Z-SWgqUy4lM-txYvi&H$V9;RF;lXTxB$F ziD%KyF7)L3jQjbY#1-C$d4PDw!8E^fRK37U!Mt+26X*u37^9BnzPzIzsH148FZ=42 z9eHrg)Cw-x5Hp_jxniO+X)%hCu>i@Yi8vKD-MV72K7O$e3U!wX;}@DgeP>W=CHI69XJI9$4$B8ea7dKE zOqSMaD|PM^C$w)^*ImOZvh(zBSSMSx&Adv_sAUu78cy7+EDF)^1(X!vd-ILCn6dp% zv!fQxe!2Pj*=mBKo0fvV$=wQOa-1xs7mc&{&XcQaNozEG$`^{zd=~doV$sn;n0q>T zZo>BLjZ@AyHjB)h4w*XCirexY_o5M*hA4bGKKW>d-12~Fplf3rtBbD29p!H4no7PG z?AiOukJYvv$0=6YS&48vqLa4`W;!b4=9(s1S#r*v#Qh@Uoc~L-t!-^fez-j2)ic-! z#C`2cltLMjuLFsT$DCHzb1f{5^HqYY&ybSbHMp}K`Uc6=_4{Q2CkTWy6Bh(MHxgWV8*keoKvC+r+Z z{px0On6|3PB@zvYbfZ%>7SS<<>-sp~754gFv-O-lXYKTm5%A}`)Hk<>s44s0xi2U3 zDo*ig+tHFE7z~Bnxq9uI6bKPuwnYdOM!DNaEXDg){!hHJmT1*oMKW74k?fN0?R^^w za_UI+G>K3>`(9et(Cl@636D!Q8phkBZVVEG?!~L8PISAl5@Ob0B(i^R=08tv?{w$s zoL-e}IPU{_x))sRefo_q&ti(0xz85~#F8>z4W<8>$ezDiMX|N{?w3HJ`~9%jsw*xy zw6=}b=QB@&A~cfYhb`+(3*O1}Pc6*X`(3g=wi0()vzN5*8uIE6Cg8RK%2{?}ux)D> zdaLC9BgxKMqJ%_}vE-MjSIe)tc6D*cDm4YUwGW=r5B^Z5HVCzC$eO>(+VCl}Kh45! zPO+hG>iaJvzs={*L>er8z2L7y2t@25nr=e#^uU2)AOi;kG>>P>}VFmpZ30A^|>7jaA|x1&)lZBPwZ#W9wWA6 zVI83)lT}s{q7l^DakH`hq}+K^fekbBrG;PHW$p4Ews%T2)TZ#f&TN<|==s>(iV@C! z`B~1tj5cs$IDtlmMMKNy<2zoebyMw|`0oLb#C}S&OeyIg;!t>cC~H+d!%(*Tw9q-Z zt8F%plvqbw8&Y4Oa=l1_^Nqou-}OQ;jgz|$^Z3^d4bMO+Z3AM-(eKB5QZIuy*THet49ByA!9bodXHSkCktxGn7M*jY7HqG+yd*{<6xHpZk&a9!ptP z27E$#FrCBfdaV$C$TJattwUb$*VKX1M@HNUrZB$M*Yh~X{GXH>37h#2|I&FB#7<@P zeAMnZ&JZP@W|5#|UNio`DtqgwD!Z@mchg9NbhikS(j}pwNJ}W4(j6ioAl)Ek5Q2n+ zfOHEYDbgt+4bmtri0{09=RD_m$2sTy<8j>MR)?_JdtGbIHS_ZwB`hcPSeroex^k1j zoEt5_CXYa^lqgk%LCWPpe4ty5XT6C z?El!<@xLV;TBCWLzjtJL$(zoT5UXy|L^=nQ?R&d*@M0*=cuA*P&X~{=$0gX+2**ie zePSv5(rsIEb)$3XpF{t*ho`GPkGG_1+H?lhM{_0$nm$ORJsq#qBs|z52#~Tf(VU!M zu?63ks0kPK_u?X3lPfWur(fw?fBzj#&BRCD2n;A&J_IRKKR$T`EymECbejCa&8zBh z`IcQXX2$0gljo-={mbvS z!5oT%zAY%?RJQi!bPd%<;yF1CPeeuye~$KwT&lj9JjuQs=PJq(2}XJ5ak`o$R|=^4 z&7v0ky}N_2dkEbvC)dx(HVV1A!I8AHafh(4Io09YV55vzXY*}J!-7ftyZj# zz^C<+Ifd<>nUZ&MBX4si%d@nShI z-gJ>Xi;nzhU_*t*jFLVYdjNqL9R|+ z%KJXwS}NU9^NXYWahzqfzKe$#P|o{L1x_4>)>qcom!a!PNnM=?DWSpOe@qk$RACbP z=#-jcVq)B2LWb?yH5`~Xgh7ICXp};Rl^u)~-v9Q{Zyz*{LNhm661BM&W<^s^^}gos zbb1L34r&f#R=~klT-ZP+{0?3D>P-J2;oQY$F#kMfx0b}jcnbcBn|yu3{VKGDtI3wX zJ|Bmts%4P7*K<>FIgg!-6xpD>Ull&L*`K5kc}(bZyrw=n9^CrdehWwE_HCVRXMipF zth`Wu!|;0!2(K$AKr3MazQLugZK^evU-26T^x8Xd7z6PS-S6KT+dM2B$9L82ag`{i zcji{ctS=GBiMZu$T!>GC6Y9s3qrnU}-O#m;2RF>5i}aZTWYeB68{hcJI`hCtE9CH< z$|2FOpxk$>c2lL}`s}Rs{NM^S1%JxPa@Cbwa8-E$RN8R~t`*lpQ%Baa;bMlXepm}D zsbnd7c7hH1D5DH|0j~C$pO=9##NDxIIf=cSWVd9e0?@)JL|pZMImzh{7W}DHw5e2q zL+()JNk(dXzRikX&!g329a;32N7;*h#a%jm=xX`kHqMh&%b0UXiJry6L`9aDQ6994 zEz@c2ipQljM=yAbvYU@;-ooXKfg!h9NxwCk=io*Em~B+jV82Pkz_@!$l2U(c!98H| z>lo*|cuR=vEFNcDi;rEydbD)^)%1JlGkJVZH^za?aP&`W9?zHR!ykQm`Z+^*GQR2R z>kFFhq(YK8DTy-mZ)6&8PeLJmlKH7JH%W4j0Geru4a%0+aA3vkiMDK#dud(pUA59r z-}85cYkXgEJ+jM#8$EOVpkh_k`R`xP9hC`t+7Hzn-dg&vO+Pl3+--i>rO{osU%?{u z9DXrUc86)jeqOH zT&jh6!N~i1d$aMv#;p5070da{*Vw3sjT_B*k1cH=8@gO`WP>)=pKx&=c^BO5C(xyv z+^8M1^p*DZ**J%quC09qIDo#GjX5@nIG(<+31}-`^0umm*q5pks1U{%h<=E1^hu~m zYcYJ#`q<;~J&s^$-JTmo?YnttF|Vm~Y_rQo-fWltffxDxRrnRjdIAY2FX;=ck1zlx zDmp>z%#G85)sU7Q{^608V>Qy@5dtHV(gEX+g24WT)7w^ zCwq~q75sbCfL^1-VLfL>!SNOkFNrgCdky)Q(Gi3F_mlWNJWiLyq+h0o7mZu0KDl*x`>@Ly}C7GXF3oU<|jswznW&BJ&LdS>h4Fu z!(HP-;<8b^(ayyw)2)zO*=1D9JU=V`+{H#Q^9>#DZ$LsFRfsKL2AdCEkNiD4j(owkKN@vUvEDosSZMPcXq5vsY z|I%Tj;DB@H4aCmn$N6T9UUQvJHdSK}p9y1V7&GB$*eL5b&;^&?e3ZZM<`o>G5dJNR z8-mM?#!?o$I#e4pcjojWY5yY$D%R z^~$;27Ms_@XuHcqW#n9)(Aq?%^@&e>#8x)tdYKB=5?f2fx`KSnc_(JQxn@0;N{Pc_ zK}xNkRV+VtsGs@PcY2+1Pm^%xQ8SGPB{zKu(hqh%>&(gssR+^g@x3SbCzZi>>HJD# zGRe_PB+E1%7wqQu>J1_9J*TFjrOVc=Z^4Z!GZsPS@$iu^JcThJq@=g6M(>BDi>2d} zTiY#T`$pxmZ_GXaT?i2#n|xFD4;3@oe7gnr)QTwW?;DUBZrIj7#5)c$;jezPkp(%< z&!j&RUnjqd372?^T@c(+OHf|SQ`5s&s*r$sV)T_?O!Cw7p!if8?&8E_JW&R9{vyX7 z;ei`Pn`35fiWbKFF4l2GxlQL@(@!IRZbFNY@;iTHO`}I4sFTK=GF($CRZrZYmoaPs z5?S91EW>;x?xVMDEnO^TC)QD;zFN11GNSE!x^*xpw!@wm<#u?(pSP8r7^9dL*J~wI z&&@!>v>5X2r2!GU)H}QV{XgLz)ZhE5%zq3_K?p5IOkn5DINOjRlrf-lKYWK4Cvaz& z=5Hwj;U$5(@Xc~g zaoQ9uui_vn`EtND3(C;u#@UHQWu^Zt&$F* zR3?fJFRp4GKo*+xwQBfhwTU*u-#V{n>+%T}1zc=;XWK8$vzD669w~kS+xd{5$Z31P zoX~WN2v6c-Hs$V~+|vYDp9SrvJwA6`?pPtIbPs_B&5M80e~Wl`&lPbnK9#Lm(1ny4 z-+M|;Z`WK<^?C2;&Kp7xOdfmt{(vlA-PMBp1!9!@7_E;a-M<&3lxmZIFK;%Gd{_J0 z_wa$UoydSyks$k59LCZcoj$w*HwGs?2e_biN9juCwY3l~89bowP7vdK`i{_<(-w0x zTrJ}ES++)`@Ya;h@$QhV%k8tqHnr6t2(H=dYTl>Rq^tZnQ2}eaOUw9tV$-6ppPrL_ zCP`O&uE3R*TGO1I*{ik0dygd_1v$Qz@xM}6?{5@k?e{zW zeVw#&T}80Fn(?qpOHjy5{1?-oi_vs>RZl*Odb|vAkTy*g(dmUU3p=XoV3GEwh(>wt zH?>+#3GM}xTOZhyd)bo*31tE?+M7wJ#z-)I#l`6TCWc%Q53%71>fK)>LDA#s^k&AL z^=Q&L|I#^v7_c<2TU-qF?Cd~6fagZ4u_KYkRdsEAZV15nYBT%qxXVjGy3}>S-F)jw z=<4JIZ&{&WgZl0Pa^u2@Ayob%grFFeCtm)E>qvtsH12leK4W+?Q*mqPg zidlDc&e1b{prIPw35gbgK%Eq;^Kt4M^G{{xt!OA8)Q9TNr09*m(G`};|5#R6o~Mnk zc9Z6t+}!5%KkBy!-<#^gJHjEuhp}DkrGX%5h^g zY-dm8YoF^_*}nca_(g`Pv->F}N;;Fl(5UudUs}yjd5|`*n=Uq0Bw!}{+lF4L?dwvZR!4-_$$yPjsPa6_ z*W&miE|^QjZ`BMm%MmxcL?A^O(~XJZr@GcGSjF#c+|w<3wdTSkW8(gHuSiOoAY5Wk z0F#K<)%7RWr4WaH#p}6i0%r}L>FXajOuO|#cTM$YCAr9HI(3mwew0}+?_Xr@?V_^L zVr&hG$$#yJ}5ZfM={7@g7g%?&^qpmLHf>$WtSp zVGw$0-v9Q!-unn6&^pLVP@1{$(|AwyaoLCwYMLvlcHcIyXq4{t3)k`4JMgWJr=AHu znZ`qf~&@%yZ_N=ohM3BkGzZMbBC;(R^LA0%G-w3@4-x@J74+HtniLQolr|~TG z8f0CH*}9IlP|UI;bT_d>-0x1g^|zo%NIwlpK|k<-{VynG<$L{oz{j@ zeUm0=%XfmKdsaxC{F;yoiQ%ocZYuN4ZJ z4Qd>}U7OL{`RmXEoVfkAdS}fZ>sKP|=>L9b>R;8mhU;pkJ)jGfqbMj$6X2aV<95+{ zZHh2FzLlBZwec-{Twj&2bnnxv1n?%e#cgK4nyB2&rv!Te*eKB zCiC9ePloMy=K$wZa8L*s0yn}^T^~q7B?WoBx#zT(^*vl=_mAz0FjFDvnuYJh5o(cs zeahLar#*h(OR)Hny(A|`Pv?&4CtTDrN)Z%H&} zg)|emZs|9-8XFj$)qXZ#`gg5E#7bz8jwTB62_Zjw6DOS3n)bYjN3iB?^(1DY9}(Ff z%+HhK_eSn7Grx=}i9vZipLk~3B;v;aM|C+N&mTUy&?6qf91Rj!tj~~+B3&E({Zf0- zj95CI1SjE-T-8PmL4xS6*OeppF5@HXz)LSn4cx*l>nyOIa~DR#H>Zq^WC!GjT>%*$ z6m|MqviDOAmc?~WI1{%|>)Yh{lC8+zI#rJSxO0^w>rJsJsS!!wyng7u`HWAHky|vjoGbdi-H6 zO)ut%hp>jnN#@yYh5)wUbTML>no#Oa`f2}03Pe8bl z@4@K?-BO}yfjnIB^bBtT>6e0~NvKwySK62ME*_=D?wLz7Rh24rotobkGc7EsMru@L zYQCIZC;Kj=Pp3zcXHt}N8CCK_+6%o+mnrn^kI)|)mmYAKXt8zN=P3H}=%Go7V(%T? zL)%Llh&A72Tzxg1mu2I@M7qJLvO}$h_2*_o*)?8BAHX{07QVxV2tRpy#^;myeI`X< z%?4Lg1mGUIr(u4m@kPVpRKcOp9g}-c<;qe=@Hvb7Ugb9^+K{I>Xqp(uK&aNa6Y(IL z{kgk`;@0Y$=`}igWQfQj@rOZ$SEUO9>fV@C2L_aTB89ZQd;s^Y-@w%)}BkzT_x}Tc&;aB9Q)phh5WPrTqYGHd3N%Hm3je zvtvqqBkbNc9Xc${>sVgTweNS91j@X&c0d2YDRTLyZ-B4V`Pr_a{8Pf{P5rCAyrpR= zy>_J^y)7fOqh0%$4nhf z&&cieDHlxVzs*U@OQ=NNXvk**_mBUt0hQ_}*(!ddgajFwi`mpyg3OpMPO|)2TXn5M zFSKSgQ>Ha{yHZk=T9oypOZMOX2^vU&t|CE8IoyErJ|FRM9h6Z{QD;)zItspcD$ft$rnb{fq?rj_CtV}mBZ6!MPlKH#Cc`&ta&~v>s?c(#D%m_4QuSfC5R)sT^ z)CfA#T&ye*_pV=Pz^&9Usqj!mC?ogAk~tRbHTd~!I~tqm|7-f3lcLZ|U%PjydUbNT zpMP?}s8)EU#%X$A{{jp6qsfzbNuhR{B1oO+}iux4oK|{qD?`eE`72K5idw;z=Jlj zBP*oYBoZALe!l3R+xyqJSIRK?qmM6>gIMnUdTk@?$_3BmTOP&so{x#`CdM}1xbjJ~ zK9zAsJn`~HlS+Rin13A{lu{D`hXB_aJsilU+xB}un0A+mOVycxw=27XUghNMW~=zu|g6Y{HT1rDz=MmybRj7aGy=S#G@&Ipy` z4M)qybC!;hqaP@b@rT@4so&o1tw`98W&ipR4goZ|t}REt-q+%|sxbUM5lBmlzPV~R zUf6aFxsU7*pFXIUswY6kY)07W^T*)(N^TtX_D55JMW`me6_!>l)Km5vqdD%bnDJ8tOK+h+x{sZdMq+J2Q?a##KE$SJFh3~Ichlh5#VWS^ z4rigp%?g!Rj@2nULMcNwxi{mEUw_14-F4Psd|z2W38`FG!3R#l{53@}PrXj0UK)q5 zT;N%~!9^t>^mU@o8f2Of&~}RAdCA1sg{Agr(VZ~T`mPg#BYSbFFUq$nQY~R<92e|P zs8T{3{0Jub`hKZ?ka-bRbFFMaRnOSP3`&k`>Ih` zEHxxgI`ldb9!&u;Zr5I5jkMLqcAmiN@u^$iu0j{olm`owY?(S|ga`wLd$0C0Xqxy{ zzTH)}$G}y~_H_`e2x~3%E7F|-HS@?$rx59M;;-Oqy_wxD76xRc7qRP?kG4!dkI#8roI@U+?nj?*AAak1kq0w+YhuHCz_O+W5)WiKa@W-yDdAdrQ?vwrQ6%#azGjb?F zc8^96U+cKaI1uj>;PkG3@9+{h3m7l)6AcXzgPk*e<uV{xVVcMhvNeRdd%ou}`zxXQ0V~U=IeEj7(_whAB8(rO@BBUFkAi!}& z;6O>~b6=;HkXU7@Opvd<++#P2;1(T={;o7zuRo17Wg*glfeN{m^jp9~j_YDJ-PD>L zg0MTQB0p(v^lS6om_%}wucmI2%wgoo*Q@i(1(}T%bXf;LbR=JLwS%P>dRPBF5g>f= ziSO0e=%GV{1mqzT(Qtnyaq6Y$I^?+PP;cxt@AzZPpfvix(i*o~$niU6(XbC!&|E4kt*nm%};A*~BZdVp0C zistU*cWW>TM-fxN@vgV=6uxX({Lp(acdM2Td1PFs`UG*mm9(8-AI6m0pr9LTIfQ|V zy@jH#JHlKuRP5a|W4G(AM&HPs?fr=ZV@v)CQj=}OfyeCXt;3w7k!Tf|AA-(i8SD4V z#i@F;qfn_%N}|Vg{*~>8*PXa2lvq6%g?N%SrdTT5pKu>D&nLHf-JfEc@Tu3A23!~Z zB02PkLl?R=al{U5Q{wAz(=e466c>4p-!qpMk}2jkyY*!^il*v1RQPKMVJrKXTO`Zb z&qb+b^l*LtqqKGTsqQbj$=mN#k}W}yazvNWfoSdS^}@uIDQNd)_Z7iIpUmpH4bSk? zkGH{ezBth`gR4iYZV;{dxVsaJOI?Q)Gt#B>^U#+ddcSKo&;REp+lwD8jQg4rOKtg$ zB8~j3sca+N6wq&nlnmh;mwZ2WrW!O?iChU}Vq}q}1SjY1ZZmtW!>>bGSDhZDzIlsY zJtCpxDwO1jV6%<=JRGw1>OaO#_q|9jgOo2t+9${fLXs|-r=5BYu-HAG=+9 zaV69EV^4f)S{h6#T!m?CM#frWznC{jPxOevse`A*AnIPi_qEaQOBZjzm(Nb@Ti98` zY%t_}_f!VF2f1Yin8dkPdi3-c zmv8aiJBBz%C5ewQbIQ0`4Y=|sSRO1!IBxkeq@66G`7*vw6`t{-mbU$=N#U}k(tZ(#?kh)G{ zXK|VJ*(u};?R2TFjOKyxFpo`ic{Xb(JF)f5Pflx>`2$=3SoT`~+;>;zR>pFsA&-WD%lu)o*-gG4!l(JP zs=E{q>j=`Xv;@yVH#-5e#V)KcD{HIMU@Q6DJKiAu84n(k+fmf9Ku6-gW)`CH8cCl* z1EOXqPtVG-)7KH;g{hNddoJv2`1yMX=w1XaFC^TEP)ydxx&OtJ2q=%J*{zVIbZpGr zMx}_I_21cv7?!G5D99>xDEp>M$A=h&8?*!sJzo1RAvKQWMD+MjQ)IpClYHU$Rww^k za2)A&MyuC{Qsha0{9N1PJPs9M5W+-V!;vWP7O)(5&4>I`RrCn0IM>QWkbSPJ%euPp zVrX79Z;DuP(fAiFmaW5@*L}q`J={e~j@jw`8d?H(`HMrLGKzX~z>TR>H?pO=^JP%~ zoszUpJXj`+t~QcfGUm1Vr<%Cnyc%o^ZrKjLS->V(Yt6?A{{r-k+26Ec`QY;-kQe=(KuUa3Dyfws9p+k2p0MolhNaY;#i&ec0A( zDu9m)LPV&^EGnqe2-ir)MwL30n~?0=xU)dhzZoR$#Cxpw-QC)U$5`QB;OON8yTRNs z`Y7oaFD^QK3G-|>jS3D{<+v^w^0tp$OoBx>O7id^5F`1SL%#k_R`^jE zCknWQ$h=3<-(Z%Py$}P69-BHpoH^~ZYe9U$JW9RnN-eZSj=IATc;idZv0H1dC0h^W z!r~g=6g>w7lrn$$RRMB4YjLp(Fhv3BkVLddLeDRs?qi8-29}ZVQ(TIQ)1ULLvdwm5 zA%!#8K@wlsvfXvE>$PL5+wt5oS35_~7fv2L5>L~MMLpqoOhj-8y!LSAa|Ou52$$lf zZ{GhsD=mOIGq{?g@F0enn8GudnNmM%f}a|RZN49<&E1rwHQm!74GeVDzM)c5ttrt% zx{%ZEu$6ysQ@Q(;wQHX6--(Su%%HO0^%bGq;4wMl&gbqolRTU^nQruAyWAL3jEd-f z(!I&HAwJoaPlKcEgH(~B?SPtYE8@J%+hO#t;g>xZ>>!4ADv}pZX>HVJ(q&l~#Yf|_ zN}9rz@1k3J*sIklkzo4Q@MHC25sI*z!d)E5N%gK;B_>J+qmwrB31Li7RWiB|%@8!% z)cDGHbFAhn2hk}sXa4$K?saZVwVr^>q<`U+k2!iC!Ti3O7OHD-i{?OgsJAFp2y62< z1LfL^)h2$n(of#+atPFwAybw8Q6Xds^`z=}5P;s~@ayGOb0; zGtyWNTmC*zM>C7q$GUCBsCfNV0rnlAng;dP&qw6rXO_M|Un;@6$5(*{eY2`Q4H3fH zZ;;Gz@iLx+XTf)tK1(M~6NamuoO>?MWg0^1__^SBFt}&5I_S`sIR4 z7N`!nKHCd#P_(rC>w2CjP!W)mW9ANJvG6AjGWR&A2M?tO(s4vnK1Lmu&e&*%);$~@ zYSY~a8lHPbO0^PCcaD88jeJ}0)_d+*BmY4i z)Z&oz{D3-PLIt?F8Yc5!xK1%PD?jvK`Pqn5zj(~Dm?;`@6B@e`)MH!Lf^TK>QP*+q zafan|r~72>^rn20FWtNC)WuH1T8a1akJT%4NQ=OU8x{1F)9^)!eO9iX zi8%%(RDC0_RKkBf(O1FK6QhL1>-`b`+pBYv&FfChoGuY~3hWN~M7RgRFJrDkqi2%! znNccHJ+CZzCxro~udr&USgJ658tw_cLpQJX>VVHA%xxu~epndxMTVZ&gGs@HT?u^$ z3aU#{`0mO^iBjtZCLE;{G)yFGQ4oUe%m4lDUxDU~cnSnUV`DZ%8b22k?w_sp4^V7V zZAOj0wijl2caz{{VD&PqyGqF~M+N$3qm0C%Yq7F z6Evppd+Dl7csci0RI|EmlomX9fGMzXo9D8{K*(qio>4=a`IebilNWv95@u zf8!QeG`GFLr-)nd_^2npn0`Qq4iPM>Yhl#;TVahy6@TYaES|n{^rEPiL<9do$%Bj# z+&b3lbp(^0Sdn%5_V!x2qcPp0(8#3tp6XkfXf_oluEKfM5oAG6=NCOy;d#;hsh)lC zo(i){Z=+~@ODktMDsTbQugKLn;|y!3|MUN2$EcI;N(cm7}~3e>-HkQAtd zS<)LOqopvHHc{2^b8S5u+C`M=CV6ol#{c-t`1o1Qj0#FT-zD*t$jobJCNgZC22~@* z;cU}Fn;4Hn3yisrNquGA@t>O7x*=wRX_He0$6ITA++`6?*Wo||7@|?-Vs_J)Tj|M3 z=eOrMyvOJrU~sj|QQv6ojaH@&Bw^t1tJZUo4I4pJ4UkRj&~or#z-PAv_AoRUm?{0PtrQ z!(|C&@-BB(>Pzx&W)QTvL+)aUZQA=8wtQCCeRN5w>(m!sLSh^?C0^LF#kW0mRW~Uj zp-eHHn0np4;X}XmokC}h^rOdVNevYN7A-|WqR_rH%r?XTv7UH_Hha-`G2yRtuqKe*+zae!u<%bX>OxlMEh^r z?Usi@)Pefql0Bq5TX-$Z*?B5(+oitJeFg@umx%=WMxkQ?3YNF+ba^jkGr(jskHm1h z0oEC{@J?p|P{J1G7Q{9XPbGEnp_B@qESDKHOI>N z^{oo2?lJ`J*c^b@`qFB&QCy;}v->kwJqBw2k4SaI9ltO4a%^i)#G#ya zcy#ccp4_WfFdQ{>kA(^#o%L{!Krh8&nP(`p`-~f8u1u@-M|rquo59wxt@hn+(qV4k z?V6_mJ0qa=py5%nFBWv1FRSWZ$LljR1!i4AL&F@#PR## zFqQze7={4eL2W}miyI;~&PkLvWYfOQU~L?Amgf2!CmK9F`3D{f$x2as&Y#tZphj)J zaFg8&p*SrR^>)63cQWJMF9J`NH*leA5jw-6MD=-QTub#;^sgt1v9~mHite}vU|FCW zb#`BrEs{cy4-s)H1{5HXH%_Oq-ZnUXC16*DUab`WjY|wMR1KfI$y|I~S3sku5ISvy z*L!*IE8|m1sds^|uBjUrr9Yur+p$Q2hpgE)(@ENy>V>r!#(CC%vAn-&Np=^ zh;(;3wxS}_a3*nno&Y5k%Fhf_aqy+!zAtR8jX4tA=-c2*sA84+J~MtzZz^W}{zU2E zJpDhMJxUO+H^{{KK3+3sc>QG`N!v0OZ6B5};3?7Ccxb2H+zIMlORY zLni>91w7VN0Wg2{y~f*5UwN;=D>OE4KPFm$pCkxOYXraZp9umWF-e0IErM^guKowI zEt&v`I@$uQTBTU(=g@e*1jtyit&9vsIUw`yES}V59{wK&U9w@u2h_GIZ=rxUFWhts z2BcrKe2igF1U)_wsRL-QN-YjH_7Z@KIrl&%z|#F+M#kD?`IxQnkS%R-+sm{x(I$T6 zyGkp809RuD-#k1gn@nx^2&^7}2>c1^lAtF5s$9Lt@IOo}tk=yNnwpyF$2dX%8$7gs z_dYThU$V`L@^p7s03;?zRk1jaZXLmh4h~={?0(Av%`Rn|7zNx42k$@>h_M0xFYy9k z45Y>m+ZIIslc@yYyOwPg6UX<7KXKsPfS)&oBo}pUmR| z9wQsWaxD_fl+;uf(C*V_$>T96HpA&zbqCa-*QP|H!sm_upKmFvC^^2M@$CQj5{9A+ z0X~C}X2v=ZTn2DgkR1&?!kY9EyT*1A^k#Lx5sEU>1+VG!GS^JwMOxmt|6thx0s0@5 zS!5$MWH;cKF#*C*2B>@Ze}g=+pZ&iNmbru2y=q_~s=k#)u+-2!1dE!v;tfR-KpwM+ zi&GB_48Yiy5U_{2zP|^OFwpv+gSZ%g)DelTcg@X4m4iTl6#$uAIGUj1g9vIxi)9Jp!V z-x}AoZ#4fA;DL1tg|p}&rbdD(kE{ad5%A--Ui^*$73?PO(v@S78-B14a7*CRUAq8& za62N44eB&G#ls@p%i4?>SbO0t+Hj+YZJr4AFsr2`_T8t5>D`<6ttf9fZ3>fsTg{xEb;S z^-ONqGq?ahUXn%Bb4h^T59+&$fT1iDu;{=w2b%HHPEAdX!u+RaVE}W(zowo+0)lW{ zr)OtZ5Piv$d+O?xMn*=VFomfC0?;4!0mX(7Qk@`ER#mTI^NBIwd`Aohm5Dcw=VjV zT)4rSIy&qijE4s}97I=|9nkLhhP95Qe8!FJyOzXns{7y! z))E&UbUW}r!$4qV@%`HXG7vNg1>HBz*>1+H`-T8Z>O$145m~6yCiVAM0frO`<9P-Z zx&35Ww+h0O&L zLZP7IstW93N@3c#`aw`Gxz^Fy8Pz%u!h(pz5&I5}tbaYCkz{a4ir6U^S_mR3{Mh)*xW5cssK_0IaH%BR^X80~VZR|4Pj2nhsFz8z4=LC=kW;5a zkn3P!nwq*mhmH^t=Rm~cfG>?elS~87@_m|Q0`9(xf5D~~vXKQr#40kqe+5*B6fpCL6 z(s?lb<>0J5@Gm|hap!xyeFZc!kp%+zAWT?6HoNySGB0bpK2H-eS6crU%iLvN)x0tWQpQ#k5^AnddKqh)5gzw%XVEP?dp>GBj{u|Qa?CVkY7poJ4P1fWbrpB(`I zr(iq)67JZU_9zWjCXq!EVH2X-kO9>{hjs`t6J{c_QVl015XnIVBLD{ts-D8Vf;0=vf=-N{#xbhy&fFuN}FaPCl76I zmhMFi9jFE;ZCh?q#|;k8%BJ~e{1;RmQN92f#d;YKzk$sU>tuI)!O(yWOiaLMnx;;M zfJRN@lRZR%0vXE$<%Kp_VOz0DAK1X4yf%H)vbVd z2GSte%q{o_5GF_Fw3lQ9FMi>GPy|fU^XO#$}U;!YoDI2 zc?_Ijd>Zm_d-}J&Yq3w_+!zEMKz!R_#l_(J;P9~4w{$87`K6^L0TB@_IX^#%Yn_ew zVedplMWcJIk|0L8tD%vA5IeWM{dC0iZ-jj@-`LztDl6kdV+LK{6!^p0{$fbvh5bz3 z%+AQ{?2YGga)P?zX!y|*-c3qQww^3A{=L>uXXofh9@8mQ z>NIaF{1tSc^XuyLi%UxWRn-0ky^FqJEd1NLXfV4mfnaetxTup`#E7&2wf3M!}{rZ(G;tLB%2ac%5v9`wl z7Nm^>brd$RX{q7i*cA_NXq%cc(d35tCH9oZ8`jP6f}tKQvopUH6CK^L>V7Bwxt+Z| z@)F8G*Nz?jYBHcp5aglh!bCfCevb+@8UTLU?G^p4OZW&J zpaZSSnSAOmyRo$u^JW=7nfAknTzVzCj-u(Xx9^J6$J36^&G7;Qj|Ao|_+H*0$dZwA z;;nO;PXn%Y)H|-Ev@}k*r%sjaGm2zU+WPubG&e;>IdO>S6TxkX?zDIT2I5MXg4V&| z$;o|n_4rr)kKM%Swe|EkA#l^Oya;Fo-46ruJD_5bQuy!JjN>y#&=+K5Oi(7jL;8O7 zy6jtivde01SUee&ea-+xfRCDeD!8Iq;mzRT;k_H^8dZpEoofv=1qF=joXPh<|CSF# z!J`_>V9g?X;Qq#HtFDQ$wY(J(5wQwU>ymd9hpeoLwzhU&aWVI1J!R?ks*yFx*_VnL zBoN4ZdO?IBxn2wi{D)|{L4ff;{Og8+o0}kz509G!*ckAijxH98fQZNfY|qD;9grcC1KpE9=jUt(e*@foK)G5x zQ+>)&6xhfhn#(GRMMnA;tLq_uP^4`w_^;%YluO|LfIx+!IK7ggAsu2epFc+bzCgoI4PEG=}afm1ja<9oTqwZ*-P;dv_ff!du_fBBoEtrc(BrD#$ zdGj#s{_R61_$)8h;O9VqiUHy0zK=922%G{`(09kK(^GLuUITI(2ul)*imn?qc@tuX zprJJN^>Lc!h;(&dWM&egG|90h%S|IXE#OShs%J?@{r>HqA?by8u=WcJpS0h&S&{U` zS}FJ=vTuoDj2#hCDl=(`hU4&e<8*h|x!Lc82)x*car>}wdr%NY{}lKy{$y`nx!&QrvA%oM4H0FdnE*8iY$FU~gh|Ypfb(SUN7y@X-w$lYra<#~3E~f+ zr3CjsJ|C56YHAV`6~(UlPJps?ae+722{Pl4YbaUYwZ@bOMcqA+l$1n598Yg=3o|p6 znwr|_#o0|zMtu$!7b^*Fc6m9m=_mis;M9RU&XtHSNK&G@riO)@KtWd*QBVhY+1PJV zf`ZRm{7&Aze~&}VBzqgeK=2)V_WqF$XLMf+f}JaxP6#rvAQ3|r9ucwl%g1%G9Sbx> znAzAcii<(L5(7T({`p^o9Bg1{2qNasFE0E;znIq>@tyjAD!dj9t6o%81aTKzS9iD8 z!-tV@e7egtS>OFjlE2M95yQ;-C-4DCgyiI~%!0Jm#C=%gzSArawTZlILyh1tBR#|6 s6aVLTjsN@aXbAB5zyJAv^4B%f?$HU1=#zj*6#Tk(NBwr0f?4qY0#7e(761SM literal 0 HcmV?d00001